diff mbox

[v2] SH FDPIC backend support

Message ID 20151006031512.GM8645@brightrain.aerifal.cx
State New
Headers show

Commit Message

Rich Felker Oct. 6, 2015, 3:15 a.m. UTC
On Fri, Oct 02, 2015 at 07:36:27AM +0900, Oleg Endo wrote:
> On Thu, 2015-10-01 at 17:35 -0400, Rich Felker wrote:
> > This is a forward-port of the abandoned SH FDPIC patch from 2010:
> > 
> > https://gcc.gnu.org/ml/gcc-patches/2010-08/msg01536.html
> > 
> > I'm submitting it at this point for initial review, not to be applied
> > right away; I would not be surprised if some changes are needed. It
> > applies on top of gcc 5.2.0 with the patch for pr 66609 applied. With
> > one trivial change it also applies to the current development version
> > of gcc, but I have not tested that setup.
> 
> Thanks for working on this.  Please submit a patch against trunk.

Attached is the initial version of the patch against trunk. I've fixed
the functional issues I'm aware of from the previous version: ICE in
generating the plain-SH2 libgcc-based shifts, missing
sh_legitimate_constant_p changes, and bad asm spec that broke
non-FDPIC. Cosmetic/style changes have not been made yet.

A couple specific questions I have:

- Is the use of self specs (see DRIVER_SELF_SPECS in sh.h) an
  acceptable way to set the default? I brought this up before but
  don't think anyone answered. I find this method more clear and less
  invasive (doesn't require #ifdef FDPIC_DEFAULT all over the place)
  but if there's a policy reason this can't be done I can rework it.

- For the udiv_qrnnd inline asm, the current patch duplicates the asm
  with a minor change to dereference the function descriptor and get a
  code address. This could be done outside the asm (via type punning
  the function pointer) to slightly improve the resulting code and
  avoid duplicating the asm (a macro would be used to load the code
  address from the function pointer; this is identity macro on
  non-FDPIC and would do the type punning on FDPIC) but if this
  approach would be preferable I need some advice on the form of type
  punning that would be acceptable in GCC.

- For the Changelog, should I just edit the one from the original
  patch (https://gcc.gnu.org/ml/gcc-patches/2010-08/txt00148.txt)
  submitted against 4.5 and add myself to the list of patch authors?

If there are no other functional issues to address I'll go ahead and
switch to the cosmetics and try to make a version that's closer to
ready for commit.

Rich

Comments

Oleg Endo Oct. 6, 2015, 12:39 p.m. UTC | #1
On Mon, 2015-10-05 at 23:15 -0400, Rich Felker wrote:
> Attached is the initial version of the patch against trunk. I've fixed
> the functional issues I'm aware of from the previous version: ICE in
> generating the plain-SH2 libgcc-based shifts, missing
> sh_legitimate_constant_p changes, and bad asm spec that broke
> non-FDPIC. Cosmetic/style changes have not been made yet.

OK, I've got a few other points.

> +  if (TARGET_FDPIC)
> +    {
> +      rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
> +      emit_move_insn (adjust_address (tramp_mem, SImode, 0), a);
> +      emit_move_insn (adjust_address (tramp_mem, SImode, 4), OUR_FDPIC_REG);
> +      emit_move_insn (adjust_address (tramp_mem, SImode, 8),
> +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd203d302 : 0xd302d203,
> +				    SImode));
> +      emit_move_insn (adjust_address (tramp_mem, SImode, 12),
> +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x5c216122 : 0x61225c21,
> +				    SImode));
> +      emit_move_insn (adjust_address (tramp_mem, SImode, 16),
> +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009412b : 0x412b0009,
> +				    SImode));
> +      emit_move_insn (adjust_address (tramp_mem, SImode, 20), cxt);
> +      emit_move_insn (adjust_address (tramp_mem, SImode, 24), fnaddr);
> +    }
> +  else
> +    {
> +      emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
> +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
> +				    SImode));
> +      emit_move_insn (adjust_address (tramp_mem, SImode, 4),
> +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
> +				    SImode));
> +      emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
> +      emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
> +    }

I think this hunk really needs a comment.  It copies machine code from
somewhere to somewhere via constant loads... but what exactly are the
instructions ...

In the multiple alternative insn patterns ...

> -  "jsr	@%1%#"
> +  "@
> +   jsr	@%1%#
> +   bsrf	%1\\n%O2:%#"

Please use the formatting as in the other parts of sh.md:
"@
	jsr	@%1%#
	bsrf	%1\n%O2:%#"

(use tabs and I don't think the embedded newline needs double backslash,
but please check)


> +@item --enable-fdpic
> +On SH Linux systems, generate ELF FDPIC code.

It should be "GNU/Linux" as far as I know.

> 
> A couple specific questions I have:
> 
> - Is the use of self specs (see DRIVER_SELF_SPECS in sh.h) an
>   acceptable way to set the default? I brought this up before but
>   don't think anyone answered. I find this method more clear and less
>   invasive (doesn't require #ifdef FDPIC_DEFAULT all over the place)
>   but if there's a policy reason this can't be done I can rework it.

This should be fine.  If there's a problem with that it can be changed
later.  Kaz, do you have any opinion?

Still, maybe it's better ...

> mfdpic
> Target Report Var(TARGET_FDPIC)
> Generate ELF FDPIC code

... to add an Init(0) to this new option.  Just in case.

> - For the udiv_qrnnd inline asm, the current patch duplicates the asm
>   with a minor change to dereference the function descriptor and get a
>   code address. This could be done outside the asm (via type punning
>   the function pointer) to slightly improve the resulting code and
>   avoid duplicating the asm (a macro would be used to load the code
>   address from the function pointer; this is identity macro on
>   non-FDPIC and would do the type punning on FDPIC) but if this
>   approach would be preferable I need some advice on the form of type
>   punning that would be acceptable in GCC.

I think this is fine as it is.  udiv_qrnnd is probably not used very
often, so most likely the compiler will generate the same code to do a
constant pool load before the asm block.  If more such functions are
introduced it might be worth doing it.

> - For the Changelog, should I just edit the one from the original
>   patch (https://gcc.gnu.org/ml/gcc-patches/2010-08/txt00148.txt)
>   submitted against 4.5 and add myself to the list of patch authors?

I think a new ChangeLog entry is better, since the patch most likely
will look quite different from the original.  You can use the original
text as a source for inspiration.  Giving credit to the original authors
would be nice I think.

Cheers,
Oleg
Kaz Kojima Oct. 6, 2015, 1:26 p.m. UTC | #2
Oleg Endo <oleg.endo@t-online.de> wrote:
> On Mon, 2015-10-05 at 23:15 -0400, Rich Felker wrote:
>> - Is the use of self specs (see DRIVER_SELF_SPECS in sh.h) an
>>   acceptable way to set the default? I brought this up before but
>>   don't think anyone answered. I find this method more clear and less
>>   invasive (doesn't require #ifdef FDPIC_DEFAULT all over the place)
>>   but if there's a policy reason this can't be done I can rework it.
> 
> This should be fine.  If there's a problem with that it can be changed
> later.  Kaz, do you have any opinion?

I'm OK with it.

I've started to build and test with the patch for sh4-unknown-linux-gnu.
Build completed successfully.  The testsuite is still running, though
it passes C & C++ tests with no new failures.

Regards,
	kaz
Rich Felker Oct. 6, 2015, 4:52 p.m. UTC | #3
On Tue, Oct 06, 2015 at 09:39:20PM +0900, Oleg Endo wrote:
> On Mon, 2015-10-05 at 23:15 -0400, Rich Felker wrote:
> > Attached is the initial version of the patch against trunk. I've fixed
> > the functional issues I'm aware of from the previous version: ICE in
> > generating the plain-SH2 libgcc-based shifts, missing
> > sh_legitimate_constant_p changes, and bad asm spec that broke
> > non-FDPIC. Cosmetic/style changes have not been made yet.
> 
> OK, I've got a few other points.

Thanks!

> > +  if (TARGET_FDPIC)
> > +    {
> > +      rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
> > +      emit_move_insn (adjust_address (tramp_mem, SImode, 0), a);
> > +      emit_move_insn (adjust_address (tramp_mem, SImode, 4), OUR_FDPIC_REG);
> > +      emit_move_insn (adjust_address (tramp_mem, SImode, 8),
> > +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd203d302 : 0xd302d203,
> > +				    SImode));
> > +      emit_move_insn (adjust_address (tramp_mem, SImode, 12),
> > +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x5c216122 : 0x61225c21,
> > +				    SImode));
> > +      emit_move_insn (adjust_address (tramp_mem, SImode, 16),
> > +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009412b : 0x412b0009,
> > +				    SImode));
> > +      emit_move_insn (adjust_address (tramp_mem, SImode, 20), cxt);
> > +      emit_move_insn (adjust_address (tramp_mem, SImode, 24), fnaddr);
> > +    }
> > +  else
> > +    {
> > +      emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
> > +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
> > +				    SImode));
> > +      emit_move_insn (adjust_address (tramp_mem, SImode, 4),
> > +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
> > +				    SImode));
> > +      emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
> > +      emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
> > +    }
> 
> I think this hunk really needs a comment.  It copies machine code from
> somewhere to somewhere via constant loads... but what exactly are the
> instructions ...

This is generating trampolines for nested functions. This portion of
the patch applied without modification from the old patch, so I didn't
read into it in any more detail; it seems to be the following, which
makes sense:

0:	.long 1f
	.long gotval
1:	mov.l 3f,r3
	mov.l 2f,r2
	mov.l @r2,r1
	mov.l @(4,r2),r12
	jmp @r1
	nop
3:	.long cxt
2:	.long fnaddr

The corresponding non-FDPIC version is:

	mov.l 3f,r3
	mov.l 2f,r2
	jmp @r2
	nop
3:	.long cxt
2:	.long fnaddr

Should these go into the source as comments?
	
> In the multiple alternative insn patterns ...
> 
> > -  "jsr	@%1%#"
> > +  "@
> > +   jsr	@%1%#
> > +   bsrf	%1\\n%O2:%#"
> 
> Please use the formatting as in the other parts of sh.md:
> "@
> 	jsr	@%1%#
> 	bsrf	%1\n%O2:%#"
> 
> (use tabs and I don't think the embedded newline needs double backslash,
> but please check)

I would think it does, but I've found in the RTL files sometimes extra
escaping is silently accepted, and I'm not sure if omitting it would
visibly break. Can I rely on it producing a visible error right away
if removing it is wrong, or do I need to search the gccint
documentation to figure out what the right way is?

> > +@item --enable-fdpic
> > +On SH Linux systems, generate ELF FDPIC code.
> 
> It should be "GNU/Linux" as far as I know.

I don't want to turn this into a political battle so we can go with
whatever is appropriate for upstream gcc. Note however that, at
present, the only targets this code is useful on are completely
non-GNU Linux (musl-based and not using any GNU userspace on the
target). uClibc may also work if someone digs up the old (untouched
since 2011) superh_fdpic branch.

The original patch said "On SH uClinux systems"; I just changed it to
"Linux" because there's no longer anything uClinux-specific about it.

> > A couple specific questions I have:
> > 
> > - Is the use of self specs (see DRIVER_SELF_SPECS in sh.h) an
> >   acceptable way to set the default? I brought this up before but
> >   don't think anyone answered. I find this method more clear and less
> >   invasive (doesn't require #ifdef FDPIC_DEFAULT all over the place)
> >   but if there's a policy reason this can't be done I can rework it.
> 
> This should be fine.  If there's a problem with that it can be changed
> later.  Kaz, do you have any opinion?
> 
> Still, maybe it's better ...
> 
> > mfdpic
> > Target Report Var(TARGET_FDPIC)
> > Generate ELF FDPIC code
> 
> .... to add an Init(0) to this new option.  Just in case.

By "better" you mean leaving the self-specs approach in-place but
explicitly initializing it to 0 with Init(0)? That sounds good to me.

> > - For the udiv_qrnnd inline asm, the current patch duplicates the asm
> >   with a minor change to dereference the function descriptor and get a
> >   code address. This could be done outside the asm (via type punning
> >   the function pointer) to slightly improve the resulting code and
> >   avoid duplicating the asm (a macro would be used to load the code
> >   address from the function pointer; this is identity macro on
> >   non-FDPIC and would do the type punning on FDPIC) but if this
> >   approach would be preferable I need some advice on the form of type
> >   punning that would be acceptable in GCC.
> 
> I think this is fine as it is.  udiv_qrnnd is probably not used very
> often, so most likely the compiler will generate the same code to do a
> constant pool load before the asm block.  If more such functions are
> introduced it might be worth doing it.

It can't generate the same code either way because, with the patch as
submitted, there's an extra load inside the asm. I would prefer
switching to an approach that avoids that (mainly to avoid the ugly
near-duplication of the asm block, but also to save a couple
instructions) but short of feedback on acceptable ways to do the
punning in the C++ I'll just leave it in the asm for now.

> > - For the Changelog, should I just edit the one from the original
> >   patch (https://gcc.gnu.org/ml/gcc-patches/2010-08/txt00148.txt)
> >   submitted against 4.5 and add myself to the list of patch authors?
> 
> I think a new ChangeLog entry is better, since the patch most likely
> will look quite different from the original.  You can use the original
> text as a source for inspiration.  Giving credit to the original authors
> would be nice I think.

OK. Actually I think the patch is a lot closer to the old one than you
think, or at least that's how it feels to me. But I don't mind working
through the whole new patch to make a better ChangeLog entry and just
using the old one as "source for inspiration".

Rich
Oleg Endo Oct. 6, 2015, 10:22 p.m. UTC | #4
On Tue, 2015-10-06 at 12:52 -0400, Rich Felker wrote:
> > > +  if (TARGET_FDPIC)
> > > +    {
> > > +      rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
> > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 0), a);
> > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 4), OUR_FDPIC_REG);
> > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 8),
> > > +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd203d302 : 0xd302d203,
> > > +				    SImode));
> > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 12),
> > > +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x5c216122 : 0x61225c21,
> > > +				    SImode));
> > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 16),
> > > +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009412b : 0x412b0009,
> > > +				    SImode));
> > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 20), cxt);
> > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 24), fnaddr);
> > > +    }
> > > +  else
> > > +    {
> > > +      emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
> > > +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
> > > +				    SImode));
> > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 4),
> > > +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
> > > +				    SImode));
> > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
> > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
> > > +    }
> > 
> > I think this hunk really needs a comment.  It copies machine code from
> > somewhere to somewhere via constant loads... but what exactly are the
> > instructions ...
> 
> This is generating trampolines for nested functions. This portion of
> the patch applied without modification from the old patch, so I didn't
> read into it in any more detail; it seems to be the following, which
> makes sense:
> 
> 0:	.long 1f
> 	.long gotval
> 1:	mov.l 3f,r3
> 	mov.l 2f,r2
> 	mov.l @r2,r1
> 	mov.l @(4,r2),r12
> 	jmp @r1
> 	nop
> 3:	.long cxt
> 2:	.long fnaddr
> 
> The corresponding non-FDPIC version is:
> 
> 	mov.l 3f,r3
> 	mov.l 2f,r2
> 	jmp @r2
> 	nop
> 3:	.long cxt
> 2:	.long fnaddr
> 
> Should these go into the source as comments?

Yes, please.  And of course some of the descriptive text as above.

> I would think it does, but I've found in the RTL files sometimes extra
> escaping is silently accepted, and I'm not sure if omitting it would
> visibly break. Can I rely on it producing a visible error right away
> if removing it is wrong, or do I need to search the gccint
> documentation to figure out what the right way is?

Just compile some code and look at the generated asm.

> I don't want to turn this into a political battle so we can go with
> whatever is appropriate for upstream gcc. Note however that, at
> present, the only targets this code is useful on are completely
> non-GNU Linux (musl-based and not using any GNU userspace on the
> target). uClibc may also work if someone digs up the old (untouched
> since 2011) superh_fdpic branch.

In this case leave as just "Linux".

> By "better" you mean leaving the self-specs approach in-place but
> explicitly initializing it to 0 with Init(0)? That sounds good to me.

Yes.

> It can't generate the same code either way because, with the patch as
> submitted, there's an extra load inside the asm. I would prefer
> switching to an approach that avoids that (mainly to avoid the ugly
> near-duplication of the asm block, but also to save a couple
> instructions) but short of feedback on acceptable ways to do the
> punning in the C++ I'll just leave it in the asm for now.

Do you have some alternatives to what's currently in the patch?  It's
difficult to judge without seeing them...

Cheers,
Oleg
Rich Felker Oct. 6, 2015, 11:36 p.m. UTC | #5
On Wed, Oct 07, 2015 at 07:22:59AM +0900, Oleg Endo wrote:
> On Tue, 2015-10-06 at 12:52 -0400, Rich Felker wrote:
> > > > +  if (TARGET_FDPIC)
> > > > +    {
> > > > +      rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
> > > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 0), a);
> > > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 4), OUR_FDPIC_REG);
> > > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 8),
> > > > +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd203d302 : 0xd302d203,
> > > > +				    SImode));
> > > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 12),
> > > > +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x5c216122 : 0x61225c21,
> > > > +				    SImode));
> > > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 16),
> > > > +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009412b : 0x412b0009,
> > > > +				    SImode));
> > > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 20), cxt);
> > > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 24), fnaddr);
> > > > +    }
> > > > +  else
> > > > +    {
> > > > +      emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
> > > > +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
> > > > +				    SImode));
> > > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 4),
> > > > +		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
> > > > +				    SImode));
> > > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
> > > > +      emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
> > > > +    }
> > > 
> > > I think this hunk really needs a comment.  It copies machine code from
> > > somewhere to somewhere via constant loads... but what exactly are the
> > > instructions ...
> > 
> > This is generating trampolines for nested functions. This portion of
> > the patch applied without modification from the old patch, so I didn't
> > read into it in any more detail; it seems to be the following, which
> > makes sense:
> > 
> > 0:	.long 1f
> > 	.long gotval
> > 1:	mov.l 3f,r3
> > 	mov.l 2f,r2
> > 	mov.l @r2,r1
> > 	mov.l @(4,r2),r12
> > 	jmp @r1
> > 	nop
> > 3:	.long cxt
> > 2:	.long fnaddr
> > 
> > The corresponding non-FDPIC version is:
> > 
> > 	mov.l 3f,r3
> > 	mov.l 2f,r2
> > 	jmp @r2
> > 	nop
> > 3:	.long cxt
> > 2:	.long fnaddr
> > 
> > Should these go into the source as comments?
> 
> Yes, please.  And of course some of the descriptive text as above.

OK.

> > I would think it does, but I've found in the RTL files sometimes extra
> > escaping is silently accepted, and I'm not sure if omitting it would
> > visibly break. Can I rely on it producing a visible error right away
> > if removing it is wrong, or do I need to search the gccint
> > documentation to figure out what the right way is?
> 
> Just compile some code and look at the generated asm.

OK, I'll try this.

> > It can't generate the same code either way because, with the patch as
> > submitted, there's an extra load inside the asm. I would prefer
> > switching to an approach that avoids that (mainly to avoid the ugly
> > near-duplication of the asm block, but also to save a couple
> > instructions) but short of feedback on acceptable ways to do the
> > punning in the C++ I'll just leave it in the asm for now.
> 
> Do you have some alternatives to what's currently in the patch?  It's
> difficult to judge without seeing them...

Perhaps something like the following:

#ifdef __SH_FDPIC__
typedef __attribute__((__may_alias__)) uintptr_t sh_aliased_uintptr_t;
#define SH_CODE_ADDR(x) (*(sh_aliased_uintptr_t *)(x))
#else
#define SH_CODE_ADDR(x) x
#endif

And then just passing SH_CODE_ADDR(__udiv_qrnnd_16) rather than just
__udiv_qrnnd_16 as the input to the asm.

Rich
Oleg Endo Oct. 7, 2015, 1:56 p.m. UTC | #6
On Tue, 2015-10-06 at 19:36 -0400, Rich Felker wrote:
> > 
> > Do you have some alternatives to what's currently in the patch?  It's
> > difficult to judge without seeing them...
> 
> Perhaps something like the following:
> 
> #ifdef __SH_FDPIC__
> typedef __attribute__((__may_alias__)) uintptr_t sh_aliased_uintptr_t;
> #define SH_CODE_ADDR(x) (*(sh_aliased_uintptr_t *)(x))
> #else
> #define SH_CODE_ADDR(x) x
> #endif
> 
> And then just passing SH_CODE_ADDR(__udiv_qrnnd_16) rather than just
> __udiv_qrnnd_16 as the input to the asm.

Looks OK to me.  Have you tried it?  I'm not sure whether uintptr_t is
always available in this case.  It seems longlong.h is used in a couple
of places, for the host and for the target.  To be on the safe side,
it'd be better to do a native GCC build with FDPIC enabled and some
cross builds.  I'm not sure if this is feasible/possible at the moment.
Maybe defer this and make a follow up patch?

Cheers,
Oleg
diff mbox

Patch

diff --git a/gcc/config.gcc b/gcc/config.gcc
index bf26776..ed118f3 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -2621,6 +2621,9 @@  sh-*-elf* | sh[12346l]*-*-elf* | \
 	tm_file="${tm_file} dbxelf.h elfos.h sh/elf.h"
 	case ${target} in
 	sh*-*-linux*)	tmake_file="${tmake_file} sh/t-linux"
+			if test x$enable_fdpic = xyes; then
+				tm_defines="$tm_defines FDPIC_DEFAULT=1"
+			fi
 			tm_file="${tm_file} gnu-user.h linux.h glibc-stdint.h sh/linux.h" ;;
 	sh*-*-netbsd*)
 			tm_file="${tm_file} netbsd.h netbsd-elf.h sh/netbsd-elf.h"
diff --git a/gcc/config/sh/constraints.md b/gcc/config/sh/constraints.md
index 4d1eb2d..41c88a2 100644
--- a/gcc/config/sh/constraints.md
+++ b/gcc/config/sh/constraints.md
@@ -25,6 +25,7 @@ 
 ;;  Bsc: SCRATCH - for the scratch register in movsi_ie in the
 ;;       fldi0 / fldi0 cases
 ;; Cxx: Constants other than only CONST_INT
+;;  Ccl: call site label
 ;;  Css: signed 16-bit constant, literal or symbolic
 ;;  Csu: unsigned 16-bit constant, literal or symbolic
 ;;  Csy: label or symbol
@@ -233,6 +234,11 @@ 
    hence mova is being used, hence do not select this pattern."
   (match_code "scratch"))
 
+(define_constraint "Ccl"
+  "A call site label, for bsrf."
+  (and (match_code "unspec")
+       (match_test "XINT (op, 1) == UNSPEC_CALLER")))
+
 (define_constraint "Css"
   "A signed 16-bit constant, literal or symbolic."
   (and (match_code "const")
diff --git a/gcc/config/sh/linux.h b/gcc/config/sh/linux.h
index a9dd43a..5d4dd1f 100644
--- a/gcc/config/sh/linux.h
+++ b/gcc/config/sh/linux.h
@@ -69,7 +69,8 @@  along with GCC; see the file COPYING3.  If not see
 #define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
 
 #undef SUBTARGET_LINK_EMUL_SUFFIX
-#define SUBTARGET_LINK_EMUL_SUFFIX "_linux"
+#define SUBTARGET_LINK_EMUL_SUFFIX "%{mfdpic:_fd;:_linux}"
+
 #undef SUBTARGET_LINK_SPEC
 #define SUBTARGET_LINK_SPEC \
   "%{shared:-shared} \
diff --git a/gcc/config/sh/sh-c.c b/gcc/config/sh/sh-c.c
index a98c148..01a12e6 100644
--- a/gcc/config/sh/sh-c.c
+++ b/gcc/config/sh/sh-c.c
@@ -141,6 +141,11 @@  sh_cpu_cpp_builtins (cpp_reader* pfile)
     builtin_define ("__HITACHI__");
   if (TARGET_FMOVD)
     builtin_define ("__FMOVD_ENABLED__");
+  if (TARGET_FDPIC)
+    {
+      builtin_define ("__SH_FDPIC__");
+      builtin_define ("__FDPIC__");
+    }
   builtin_define (TARGET_LITTLE_ENDIAN
 		  ? "__LITTLE_ENDIAN__" : "__BIG_ENDIAN__");
 
diff --git a/gcc/config/sh/sh-mem.cc b/gcc/config/sh/sh-mem.cc
index 23a7287..58a5fd0 100644
--- a/gcc/config/sh/sh-mem.cc
+++ b/gcc/config/sh/sh-mem.cc
@@ -122,11 +122,13 @@  expand_block_move (rtx *operands)
 	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
 	  rtx r4 = gen_rtx_REG (SImode, 4);
 	  rtx r5 = gen_rtx_REG (SImode, 5);
+	  rtx lab;
 
-	  function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
+	  function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC,
+			   &lab);
 	  force_into (XEXP (operands[0], 0), r4);
 	  force_into (XEXP (operands[1], 0), r5);
-	  emit_insn (gen_block_move_real_i4 (func_addr_rtx));
+	  emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab));
 	  return true;
 	}
       else if (! optimize_size)
@@ -137,15 +139,16 @@  expand_block_move (rtx *operands)
 	  rtx r4 = gen_rtx_REG (SImode, 4);
 	  rtx r5 = gen_rtx_REG (SImode, 5);
 	  rtx r6 = gen_rtx_REG (SImode, 6);
+	  rtx lab;
 
 	  entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
-	  function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
+	  function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC, &lab);
 	  force_into (XEXP (operands[0], 0), r4);
 	  force_into (XEXP (operands[1], 0), r5);
 
 	  dwords = bytes >> 3;
 	  emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
-	  emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
+	  emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
 	  return true;
 	}
       else
@@ -157,12 +160,13 @@  expand_block_move (rtx *operands)
       rtx func_addr_rtx = gen_reg_rtx (Pmode);
       rtx r4 = gen_rtx_REG (SImode, 4);
       rtx r5 = gen_rtx_REG (SImode, 5);
+      rtx lab;
 
       sprintf (entry, "__movmemSI%d", bytes);
-      function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
+      function_symbol (func_addr_rtx, entry, SFUNC_STATIC, &lab);
       force_into (XEXP (operands[0], 0), r4);
       force_into (XEXP (operands[1], 0), r5);
-      emit_insn (gen_block_move_real (func_addr_rtx));
+      emit_insn (gen_block_move_real (func_addr_rtx, lab));
       return true;
     }
 
@@ -175,8 +179,9 @@  expand_block_move (rtx *operands)
       rtx r4 = gen_rtx_REG (SImode, 4);
       rtx r5 = gen_rtx_REG (SImode, 5);
       rtx r6 = gen_rtx_REG (SImode, 6);
+      rtx lab;
 
-      function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
+      function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC, &lab);
       force_into (XEXP (operands[0], 0), r4);
       force_into (XEXP (operands[1], 0), r5);
 
@@ -189,7 +194,7 @@  expand_block_move (rtx *operands)
       final_switch = 16 - ((bytes / 4) % 16);
       while_loop = ((bytes / 4) / 16 - 1) * 16;
       emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
-      emit_insn (gen_block_lump_real (func_addr_rtx));
+      emit_insn (gen_block_lump_real (func_addr_rtx, lab));
       return true;
     }
 
diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index f94459f..222f4d5 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -377,7 +377,7 @@  extern void fpscr_set_from_mem (int, HARD_REG_SET);
 extern void sh_pr_interrupt (struct cpp_reader *);
 extern void sh_pr_trapa (struct cpp_reader *);
 extern void sh_pr_nosave_low_regs (struct cpp_reader *);
-extern rtx function_symbol (rtx, const char *, enum sh_function_kind);
+extern rtx function_symbol (rtx, const char *, enum sh_function_kind, rtx *);
 extern rtx sh_get_pr_initial_val (void);
 
 extern void sh_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree,
@@ -396,4 +396,6 @@  extern bool sh_hard_regno_mode_ok (unsigned int, machine_mode);
 extern machine_mode sh_hard_regno_caller_save_mode (unsigned int, unsigned int,
 						    machine_mode);
 extern bool sh_can_use_simple_return_p (void);
+extern rtx sh_load_function_descriptor (rtx);
+extern rtx sh_our_fdpic_reg (void);
 #endif /* ! GCC_SH_PROTOS_H */
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 904201b..b468d69 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -268,6 +268,7 @@  static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
 				HOST_WIDE_INT, tree);
 static void sh_file_start (void);
+static bool sh_assemble_integer (rtx, unsigned, int);
 static bool flow_dependent_p (rtx, rtx);
 static void flow_dependent_p_1 (rtx, const_rtx, void *);
 static int shiftcosts (rtx);
@@ -276,6 +277,7 @@  static int addsubcosts (rtx);
 static int multcosts (rtx);
 static bool unspec_caller_rtx_p (rtx);
 static bool sh_cannot_copy_insn_p (rtx_insn *);
+static bool sh_cannot_force_const_mem_p (machine_mode, rtx);
 static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
 static int sh_pr_n_sets (void);
@@ -333,6 +335,7 @@  static void sh_encode_section_info (tree, rtx, int);
 static bool sh2a_function_vector_p (tree);
 static void sh_trampoline_init (rtx, tree, rtx);
 static rtx sh_trampoline_adjust_address (rtx);
+static int sh_reloc_rw_mask (void);
 static void sh_conditional_register_usage (void);
 static bool sh_legitimate_constant_p (machine_mode, rtx);
 static int mov_insn_size (machine_mode, bool);
@@ -421,6 +424,9 @@  static const struct attribute_spec sh_attribute_table[] =
 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
 
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER sh_assemble_integer
+
 #undef TARGET_REGISTER_MOVE_COST
 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
 
@@ -679,6 +685,12 @@  static const struct attribute_spec sh_attribute_table[] =
 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
 
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p
+
+#undef TARGET_ASM_RELOC_RW_MASK
+#define TARGET_ASM_RELOC_RW_MASK sh_reloc_rw_mask
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 
@@ -996,6 +1008,13 @@  sh_option_override (void)
   if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
     TARGET_ZDCBRANCH = 1;
 
+  if (TARGET_FDPIC && !flag_pic)
+    flag_pic = 2;
+
+  if (TARGET_FDPIC
+      && (TARGET_SHMEDIA || TARGET_SHCOMPACT || !TARGET_SH2))
+    sorry ("non-SH2 FDPIC");
+
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (! VALID_REGISTER_P (regno))
       sh_register_names[regno][0] = '\0';
@@ -1004,7 +1023,7 @@  sh_option_override (void)
     if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
       sh_additional_register_names[regno][0] = '\0';
 
-  if ((flag_pic && ! TARGET_PREFERGOT)
+  if (((flag_pic || TARGET_FDPIC) && ! TARGET_PREFERGOT)
       || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
     flag_no_function_cse = 1;
 
@@ -1687,6 +1706,14 @@  sh_asm_output_addr_const_extra (FILE *file, rtx x)
 	  output_addr_const (file, XVECEXP (x, 0, 1));
 	  fputs ("-.)", file);
 	  break;
+	case UNSPEC_GOTFUNCDESC:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTFUNCDESC", file);
+	  break;
+	case UNSPEC_GOTOFFFUNCDESC:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTOFFFUNCDESC", file);
+	  break;
 	default:
 	  return false;
 	}
@@ -1713,8 +1740,10 @@  sh_encode_section_info (tree decl, rtx rtl, int first)
 void
 prepare_move_operands (rtx operands[], machine_mode mode)
 {
+  rtx tmp, base, offset;
+
   if ((mode == SImode || mode == DImode)
-      && flag_pic
+      && (flag_pic || TARGET_FDPIC)
       && ! ((mode == Pmode || mode == ptr_mode)
 	    && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
     {
@@ -1850,7 +1879,7 @@  prepare_move_operands (rtx operands[], machine_mode mode)
 	{
 	  rtx tga_op1, tga_ret, tmp, tmp2;
 
-	  if (! flag_pic
+	  if (! flag_pic && ! TARGET_FDPIC
 	      && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
 		  || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
 		  || tls_kind == TLS_MODEL_INITIAL_EXEC))
@@ -1871,6 +1900,11 @@  prepare_move_operands (rtx operands[], machine_mode mode)
 	    {
 	    case TLS_MODEL_GLOBAL_DYNAMIC:
 	      tga_ret = gen_rtx_REG (Pmode, R0_REG);
+	      if (TARGET_FDPIC)
+		{
+		  rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+		  emit_move_insn (pic_reg, OUR_FDPIC_REG);
+		}
 	      emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
 	      tmp = gen_reg_rtx (Pmode);
 	      emit_move_insn (tmp, tga_ret);
@@ -1879,6 +1913,11 @@  prepare_move_operands (rtx operands[], machine_mode mode)
 
 	    case TLS_MODEL_LOCAL_DYNAMIC:
 	      tga_ret = gen_rtx_REG (Pmode, R0_REG);
+	      if (TARGET_FDPIC)
+		{
+		  rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+		  emit_move_insn (pic_reg, OUR_FDPIC_REG);
+		}
 	      emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
 
 	      tmp = gen_reg_rtx (Pmode);
@@ -1896,6 +1935,11 @@  prepare_move_operands (rtx operands[], machine_mode mode)
 	    case TLS_MODEL_INITIAL_EXEC:
 	      tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
 	      tmp = gen_sym2GOTTPOFF (op1);
+	      if (TARGET_FDPIC)
+		{
+		  rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+		  emit_move_insn (pic_reg, OUR_FDPIC_REG);
+		}
 	      emit_insn (gen_tls_initial_exec (tga_op1, tmp));
 	      op1 = tga_op1;
 	      break;
@@ -1922,6 +1966,20 @@  prepare_move_operands (rtx operands[], machine_mode mode)
 	  operands[1] = op1;
 	}
     }
+
+  if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+    {
+      split_const (operands[1], &base, &offset);
+      if (GET_CODE (base) == SYMBOL_REF
+	  && !offset_within_block_p (base, INTVAL (offset)))
+	{
+	  tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0];
+	  emit_move_insn (tmp, base);
+	  if (!arith_operand (offset, mode))
+	    offset = force_reg (mode, offset);
+	  emit_insn (gen_add3_insn (operands[0], tmp, offset));
+	}
+    }
 }
 
 /* Implement the canonicalize_comparison target hook for the combine
@@ -3026,6 +3084,26 @@  sh_file_start (void)
     }
 }
 
+/* Implementation of TARGET_ASM_INTEGER for SH.  Pointers to functions
+   need to be output as pointers to function descriptors for
+   FDPIC.  */
+
+static bool
+sh_assemble_integer (rtx value, unsigned int size, int aligned_p)
+{
+  if (TARGET_FDPIC
+      && size == UNITS_PER_WORD
+      && GET_CODE (value) == SYMBOL_REF
+      && SYMBOL_REF_FUNCTION_P (value))
+    {
+      fputs ("\t.long\t", asm_out_file);
+      output_addr_const (asm_out_file, value);
+      fputs ("@FUNCDESC\n", asm_out_file);
+      return true;
+    }
+  return default_assemble_integer (value, size, aligned_p);
+}
+
 /* Check if PAT includes UNSPEC_CALLER unspec pattern.  */
 static bool
 unspec_caller_rtx_p (rtx pat)
@@ -3052,7 +3130,7 @@  sh_cannot_copy_insn_p (rtx_insn *insn)
 {
   rtx pat;
 
-  if (!reload_completed || !flag_pic)
+  if (!reload_completed || (!flag_pic && !TARGET_FDPIC))
     return false;
 
   if (!NONJUMP_INSN_P (insn))
@@ -3061,6 +3139,19 @@  sh_cannot_copy_insn_p (rtx_insn *insn)
     return false;
 
   pat = PATTERN (insn);
+
+  if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE)
+    return false;
+
+  if (TARGET_FDPIC
+      && GET_CODE (pat) == PARALLEL)
+    {
+      rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1);
+      if (GET_CODE (t) == USE
+	  && unspec_caller_rtx_p (XEXP (t, 0)))
+	return true;
+    }
+
   if (GET_CODE (pat) != SET)
     return false;
   pat = SET_SRC (pat);
@@ -4037,6 +4128,7 @@  expand_ashiftrt (rtx *operands)
   rtx wrk;
   char func[18];
   int value;
+  rtx lab;
 
   if (TARGET_DYNSHIFT)
     {
@@ -4102,8 +4194,8 @@  expand_ashiftrt (rtx *operands)
   /* Load the value into an arg reg and call a helper.  */
   emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
   sprintf (func, "__ashiftrt_r4_%d", value);
-  function_symbol (wrk, func, SFUNC_STATIC);
-  emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
+  function_symbol (wrk, func, SFUNC_STATIC, &lab);
+  emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab));
   emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
   return true;
 }
@@ -7954,7 +8046,9 @@  sh_expand_prologue (void)
       stack_usage += d;
     }
 
-  if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+  if (flag_pic
+      && !TARGET_FDPIC
+      && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
     emit_insn (gen_GOTaddr2picreg (const0_rtx));
 
   if (SHMEDIA_REGS_STACK_ADJUST ())
@@ -7964,7 +8058,7 @@  sh_expand_prologue (void)
       function_symbol (gen_rtx_REG (Pmode, R0_REG),
 		       (TARGET_FPU_ANY
 			? "__GCC_push_shmedia_regs"
-			: "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
+			: "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT, NULL);
       emit_insn (gen_shmedia_save_restore_regs_compact
 		 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
     }
@@ -7987,7 +8081,7 @@  sh_expand_prologue (void)
       /* This must NOT go through the PLT, otherwise mach and macl
 	 may be clobbered.  */
       function_symbol (gen_rtx_REG (Pmode, R0_REG),
-		      "__GCC_shcompact_incoming_args", SFUNC_GOT);
+		      "__GCC_shcompact_incoming_args", SFUNC_GOT, NULL);
       emit_insn (gen_shcompact_incoming_args ());
     }
 
@@ -8077,7 +8171,7 @@  sh_expand_epilogue (bool sibcall_p)
       function_symbol (gen_rtx_REG (Pmode, R0_REG),
 		       (TARGET_FPU_ANY
 			? "__GCC_pop_shmedia_regs"
-			: "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
+			: "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT, NULL);
       /* This must NOT go through the PLT, otherwise mach and macl
 	 may be clobbered.  */
       emit_insn (gen_shmedia_save_restore_regs_compact
@@ -10458,7 +10552,9 @@  nonpic_symbol_mentioned_p (rtx x)
 	  || XINT (x, 1) == UNSPEC_PLT
 	  || XINT (x, 1) == UNSPEC_PCREL
 	  || XINT (x, 1) == UNSPEC_SYMOFF
-	  || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
+	  || XINT (x, 1) == UNSPEC_PCREL_SYMOFF
+	  || XINT (x, 1) == UNSPEC_GOTFUNCDESC
+	  || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC))
     return false;
 
   fmt = GET_RTX_FORMAT (GET_CODE (x));
@@ -10493,7 +10589,28 @@  legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED,
       if (reg == NULL_RTX)
 	reg = gen_reg_rtx (Pmode);
 
-      emit_insn (gen_symGOTOFF2reg (reg, orig));
+      if (TARGET_FDPIC
+	  && GET_CODE (orig) == SYMBOL_REF
+	  && SYMBOL_REF_FUNCTION_P (orig))
+	{
+	  /* Weak functions may be NULL which doesn't work with
+	     GOTOFFFUNCDESC because the runtime offset is not known.  */
+	  if (SYMBOL_REF_WEAK (orig))
+	    emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
+	  else
+	    emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig));
+	}
+      else if (TARGET_FDPIC
+	       && (GET_CODE (orig) == LABEL_REF
+		   || (GET_CODE (orig) == SYMBOL_REF
+		       && SYMBOL_REF_DECL (orig)
+		       && (TREE_READONLY (SYMBOL_REF_DECL (orig))
+		           || SYMBOL_REF_EXTERNAL_P (orig)
+		           || DECL_SECTION_NAME(SYMBOL_REF_DECL(orig))) )))
+	/* In FDPIC, GOTOFF can only be used for writable data.  */
+	emit_insn (gen_symGOT2reg (reg, orig));
+      else
+	emit_insn (gen_symGOTOFF2reg (reg, orig));
       return reg;
     }
   else if (GET_CODE (orig) == SYMBOL_REF)
@@ -10501,7 +10618,10 @@  legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED,
       if (reg == NULL_RTX)
 	reg = gen_reg_rtx (Pmode);
 
-      emit_insn (gen_symGOT2reg (reg, orig));
+      if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig))
+	emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
+      else
+	emit_insn (gen_symGOT2reg (reg, orig));
       return reg;
     }
   return orig;
@@ -11675,20 +11795,40 @@  sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
       emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
       return;
     }
-  emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
-		  gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
-				SImode));
-  emit_move_insn (adjust_address (tramp_mem, SImode, 4),
-		  gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
-				SImode));
-  emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
-  emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
+  if (TARGET_FDPIC)
+    {
+      rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
+      emit_move_insn (adjust_address (tramp_mem, SImode, 0), a);
+      emit_move_insn (adjust_address (tramp_mem, SImode, 4), OUR_FDPIC_REG);
+      emit_move_insn (adjust_address (tramp_mem, SImode, 8),
+		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd203d302 : 0xd302d203,
+				    SImode));
+      emit_move_insn (adjust_address (tramp_mem, SImode, 12),
+		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x5c216122 : 0x61225c21,
+				    SImode));
+      emit_move_insn (adjust_address (tramp_mem, SImode, 16),
+		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009412b : 0x412b0009,
+				    SImode));
+      emit_move_insn (adjust_address (tramp_mem, SImode, 20), cxt);
+      emit_move_insn (adjust_address (tramp_mem, SImode, 24), fnaddr);
+    }
+  else
+    {
+      emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
+		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
+				    SImode));
+      emit_move_insn (adjust_address (tramp_mem, SImode, 4),
+		      gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
+				    SImode));
+      emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
+      emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
+    }
   if (TARGET_HARD_SH4 || TARGET_SH5)
     {
       if (!TARGET_INLINE_IC_INVALIDATE
 	  || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
 	emit_library_call (function_symbol (NULL, "__ic_invalidate",
-					    FUNCTION_ORDINARY),
+					    FUNCTION_ORDINARY, NULL),
 			   LCT_NORMAL, VOIDmode, 1, tramp, SImode);
       else
 	emit_insn (gen_ic_invalidate_line (tramp));
@@ -11718,7 +11858,7 @@  sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
 	  && (! TARGET_SHCOMPACT
 	      || crtl->args.info.stack_regs == 0)
 	  && ! sh_cfun_interrupt_handler_p ()
-	  && (! flag_pic
+	  && (! flag_pic || TARGET_FDPIC
 	      || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
 	      || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
 }
@@ -11732,7 +11872,7 @@  sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
 
   if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
     emit_insn (gen_sym_label2reg (reg, sym, lab));
-  else if (sibcall_p)
+  else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym))
     emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
   else
     emit_insn (gen_symPLT_label2reg (reg, sym, lab));
@@ -12731,10 +12871,18 @@  sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
     sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
   else
 #endif
-  if (TARGET_SH2 && flag_pic)
+  if (TARGET_SH2 && (flag_pic || TARGET_FDPIC))
     {
-      sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
-      XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
+      if (TARGET_FDPIC)
+        {
+	  sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx);
+          XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2;
+        }
+      else
+        {
+	  sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
+          XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
+        }
     }
   else
     {
@@ -12775,11 +12923,24 @@  sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
   epilogue_completed = 0;
 }
 
+/* Return an RTX for the address of a function NAME of kind KIND,
+   placing the result in TARGET if not NULL.  LAB should be non-NULL
+   for SFUNC_STATIC, if FDPIC; it will be set to (const_int 0) if jsr
+   should be used, or a label_ref if bsrf should be used.  For FDPIC,
+   both SFUNC_GOT and SFUNC_STATIC will return the address of the
+   function itself, not a function descriptor, so they can only be
+   used with functions not using the FDPIC register that are known to
+   be called directory without a PLT entry.  */
+
 rtx
-function_symbol (rtx target, const char *name, enum sh_function_kind kind)
+function_symbol (rtx target, const char *name, enum sh_function_kind kind,
+		 rtx *lab)
 {
   rtx sym;
 
+  if (lab)
+    *lab = const0_rtx;
+
   /* If this is not an ordinary function, the name usually comes from a
      string literal or an sprintf buffer.  Make sure we use the same
      string consistently, so that cse will be able to unify address loads.  */
@@ -12787,7 +12948,7 @@  function_symbol (rtx target, const char *name, enum sh_function_kind kind)
     name = IDENTIFIER_POINTER (get_identifier (name));
   sym = gen_rtx_SYMBOL_REF (Pmode, name);
   SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
-  if (flag_pic)
+  if (flag_pic || TARGET_FDPIC)
     switch (kind)
       {
       case FUNCTION_ORDINARY:
@@ -12802,14 +12963,27 @@  function_symbol (rtx target, const char *name, enum sh_function_kind kind)
 	}
       case SFUNC_STATIC:
 	{
-	  /* ??? To allow cse to work, we use GOTOFF relocations.
-	     We could add combiner patterns to transform this into
-	     straight pc-relative calls with sym2PIC / bsrf when
-	     label load and function call are still 1:1 and in the
-	     same basic block during combine.  */
 	  rtx reg = target ? target : gen_reg_rtx (Pmode);
 
-	  emit_insn (gen_symGOTOFF2reg (reg, sym));
+	  if (TARGET_FDPIC)
+	    {
+	      /* We use PC-relative calls, since GOTOFF can only refer
+		 to writable data.  This works along with
+		 sh_sfunc_call.  */
+	      gcc_assert (lab != NULL);
+	      *lab = PATTERN (gen_call_site ());
+	      emit_insn (gen_sym_label2reg (reg, sym, *lab));
+	    }
+	  else
+	    {
+	      /* ??? To allow cse to work, we use GOTOFF relocations.
+		 we could add combiner patterns to transform this into
+		 straight pc-relative calls with sym2PIC / bsrf when
+		 label load and function call are still 1:1 and in the
+		 same basic block during combine.  */
+	      emit_insn (gen_symGOTOFF2reg (reg, sym));
+	    }
+
 	  sym = reg;
 	  break;
 	}
@@ -13432,6 +13606,12 @@  sh_conditional_register_usage (void)
       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
     }
+  if (TARGET_FDPIC)
+    {
+      fixed_regs[PIC_REG] = 1;
+      call_used_regs[PIC_REG] = 1;
+      call_really_used_regs[PIC_REG] = 1;
+    }
   /* Renesas saves and restores mac registers on call.  */
   if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
     {
@@ -13460,14 +13640,32 @@  sh_conditional_register_usage (void)
 static bool
 sh_legitimate_constant_p (machine_mode mode, rtx x)
 {
-  return (TARGET_SHMEDIA
-	  ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
-	     || x == CONST0_RTX (mode)
-	     || !TARGET_SHMEDIA_FPU
-	     || TARGET_SHMEDIA64)
-	  : (GET_CODE (x) != CONST_DOUBLE
-	     || mode == DFmode || mode == SFmode
-	     || mode == DImode || GET_MODE (x) == VOIDmode));
+  if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+    {
+      rtx base, offset;
+
+      split_const (x, &base, &offset);
+      if (GET_CODE (base) == SYMBOL_REF
+	  && !offset_within_block_p (base, INTVAL (offset)))
+	return false;
+    }
+
+  if (TARGET_FDPIC
+      && (SYMBOLIC_CONST_P (x)
+	  || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
+	      && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0)))))
+    return false;
+
+  if (TARGET_SHMEDIA
+      && ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
+	  || x == CONST0_RTX (mode)
+	  || !TARGET_SHMEDIA_FPU
+	  || TARGET_SHMEDIA64))
+    return false;
+
+  return (GET_CODE (x) != CONST_DOUBLE
+	  || mode == DFmode || mode == SFmode
+	  || mode == DImode || GET_MODE (x) == VOIDmode);
 }
 
 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
@@ -14558,4 +14756,53 @@  sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
     }
 }
 
+bool
+sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED,
+			     rtx x ATTRIBUTE_UNUSED)
+{
+  if (TARGET_FDPIC)
+    return true;
+
+  return false;
+}
+
+/* Emit insns to load the function address from FUNCDESC (an FDPIC
+   function descriptor) into r1 and the GOT address into r12,
+   returning an rtx for r1.  */
+
+rtx
+sh_load_function_descriptor (rtx funcdesc)
+{
+  rtx r1 = gen_rtx_REG (Pmode, R1_REG);
+  rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+  rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
+  rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
+
+  emit_move_insn (r1, fnaddr);
+  /* The ABI requires the entry point address to be loaded first, so
+     prevent the load from being moved after that of the GOT
+     address.  */
+  emit_insn (gen_blockage ());
+  emit_move_insn (pic_reg, gotaddr);
+  return r1;
+}
+
+/* Return an rtx holding the initial value of the FDPIC register (the
+   FDPIC pointer passed in from the caller).  */
+
+rtx
+sh_our_fdpic_reg (void)
+{
+  return get_hard_reg_initial_val (Pmode, PIC_REG);
+}
+
+/* Relocatable data for FDPIC binaries is not permitted in read-only
+   segments.  */
+
+static int
+sh_reloc_rw_mask (void)
+{
+  return (flag_pic || TARGET_FDPIC) ? 3 : 0;
+}
+
 #include "gt-sh.h"
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
index aafcf28..6abfb00 100644
--- a/gcc/config/sh/sh.h
+++ b/gcc/config/sh/sh.h
@@ -321,7 +321,7 @@  extern int code_for_indirect_jump_scratch;
 #endif
 
 #ifndef SUBTARGET_ASM_SPEC
-#define SUBTARGET_ASM_SPEC ""
+#define SUBTARGET_ASM_SPEC "%{mfdpic:--fdpic}"
 #endif
 
 #if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
@@ -349,7 +349,7 @@  extern int code_for_indirect_jump_scratch;
 #define ASM_ISA_DEFAULT_SPEC ""
 #endif /* MASK_SH5 */
 
-#define SUBTARGET_LINK_EMUL_SUFFIX ""
+#define SUBTARGET_LINK_EMUL_SUFFIX "%{mfdpic:_fd}"
 #define SUBTARGET_LINK_SPEC ""
 
 /* Go via SH_LINK_SPEC to avoid code replication.  */
@@ -383,8 +383,18 @@  extern int code_for_indirect_jump_scratch;
 "%{m2a*:%eSH2a does not support little-endian}}"
 #endif
 
+#ifdef FDPIC_DEFAULT
+#define FDPIC_SELF_SPECS "%{!mno-fdpic:-mfdpic}"
+#else
+#define FDPIC_SELF_SPECS
+#endif
+
 #undef DRIVER_SELF_SPECS
-#define DRIVER_SELF_SPECS UNSUPPORTED_SH2A
+#define DRIVER_SELF_SPECS UNSUPPORTED_SH2A SUBTARGET_DRIVER_SELF_SPECS \
+  FDPIC_SELF_SPECS
+
+#undef SUBTARGET_DRIVER_SELF_SPECS
+#define SUBTARGET_DRIVER_SELF_SPECS
 
 #define ASSEMBLER_DIALECT assembler_dialect
 
@@ -942,6 +952,14 @@  extern char sh_additional_register_names[ADDREGNAMES_SIZE] \
    code access to data items.  */
 #define PIC_OFFSET_TABLE_REGNUM	(flag_pic ? PIC_REG : INVALID_REGNUM)
 
+/* For FDPIC, the FDPIC register is call-clobbered (otherwise PLT
+   entries would need to handle saving and restoring it).  */
+#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED TARGET_FDPIC
+
+/* An rtx holding the initial value of the FDPIC register (the FDPIC
+   pointer passed in from the caller).  */
+#define OUR_FDPIC_REG		sh_our_fdpic_reg ()
+
 #define GOT_SYMBOL_NAME "*_GLOBAL_OFFSET_TABLE_"
 
 /* Definitions for register eliminations.
@@ -1566,7 +1584,9 @@  struct sh_args {
    6 000c 00000000 	l2:	.long   function  */
 
 /* Length in units of the trampoline for entering a nested function.  */
-#define TRAMPOLINE_SIZE  (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 24 : 16)
+// FIXME: what happens if someone tries fdpic on SH5?
+#define TRAMPOLINE_SIZE \
+  (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 24 : TARGET_FDPIC ? 32 : 16)
 
 /* Alignment required for a trampoline in bits.  */
 #define TRAMPOLINE_ALIGNMENT \
@@ -1622,6 +1642,11 @@  struct sh_args {
       || GENERAL_REGISTER_P ((unsigned) reg_renumber[(REGNO)])) \
    : (REGNO) == R0_REG || (unsigned) reg_renumber[(REGNO)] == R0_REG)
 
+/* True if SYMBOL + OFFSET constants must refer to something within
+   SYMBOL's section.  */
+// FIXME: is this correct?
+#define SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P TARGET_FDPIC
+
 /* Maximum number of registers that can appear in a valid memory
    address.  */
 #define MAX_REGS_PER_ADDRESS 2
@@ -2262,9 +2287,12 @@  extern int current_function_interrupt;
 /* We have to distinguish between code and data, so that we apply
    datalabel where and only where appropriate.  Use sdataN for data.  */
 #define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
- ((flag_pic && (GLOBAL) ? DW_EH_PE_indirect : 0) \
-  | (flag_pic ? DW_EH_PE_pcrel : DW_EH_PE_absptr) \
-  | ((CODE) ? 0 : (TARGET_SHMEDIA64 ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4)))
+  ((TARGET_FDPIC \
+    ? ((GLOBAL) ? DW_EH_PE_indirect | DW_EH_PE_datarel \
+       : DW_EH_PE_pcrel) \
+    : ((flag_pic && (GLOBAL) ? DW_EH_PE_indirect : 0) \
+       | (flag_pic ? DW_EH_PE_pcrel : DW_EH_PE_absptr))) \
+   | ((CODE) ? 0 : (TARGET_SHMEDIA64 ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4)))
 
 /* Handle special EH pointer encodings.  Absolute, pc-relative, and
    indirect are handled automatically.  */
@@ -2277,6 +2305,17 @@  extern int current_function_interrupt;
 	SYMBOL_REF_FLAGS (ADDR) |= SYMBOL_FLAG_FUNCTION; \
 	if (0) goto DONE; \
       } \
+    if (TARGET_FDPIC \
+        && ((ENCODING) & 0xf0) == (DW_EH_PE_indirect | DW_EH_PE_datarel)) \
+      { \
+        fputs ("\t.ualong ", FILE); \
+        output_addr_const (FILE, ADDR); \
+        if (GET_CODE (ADDR) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (ADDR)) \
+          fputs ("@GOTFUNCDESC", FILE); \
+        else \
+          fputs ("@GOT", FILE); \
+        goto DONE; \
+      } \
   } while (0)
 
 #if (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index d758e3b..0b2acec 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -100,6 +100,7 @@ 
   (R8_REG	8)
   (R9_REG	9)
   (R10_REG	10)
+  (R12_REG	12)
   (R20_REG	20)
   (R21_REG	21)
   (R22_REG	22)
@@ -170,6 +171,9 @@ 
   UNSPEC_SYMOFF
   ;; (unspec [OFFSET ANCHOR] UNSPEC_PCREL_SYMOFF) == OFFSET - (ANCHOR - .).
   UNSPEC_PCREL_SYMOFF
+  ;; For FDPIC
+  UNSPEC_GOTFUNCDESC
+  UNSPEC_GOTOFFFUNCDESC
   ;; Misc builtins
   UNSPEC_BUILTIN_STRLEN
 ])
@@ -2591,15 +2595,18 @@ 
 ;; This reload would clobber the value in r0 we are trying to store.
 ;; If we let reload allocate r0, then this problem can never happen.
 (define_insn "udivsi3_i1"
-  [(set (match_operand:SI 0 "register_operand" "=z")
+  [(set (match_operand:SI 0 "register_operand" "=z,z")
 	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))
    (clobber (reg:SI R1_REG))
    (clobber (reg:SI R4_REG))
-   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+   (use (match_operand 2 "" "Z,Ccl"))]
   "TARGET_SH1 && TARGET_DIVIDE_CALL_DIV1"
-  "jsr	@%1%#"
+  "@
+   jsr	@%1%#
+   bsrf	%1\\n%O2:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
@@ -2648,7 +2655,7 @@ 
 })
 
 (define_insn "udivsi3_i4"
-  [(set (match_operand:SI 0 "register_operand" "=y")
+  [(set (match_operand:SI 0 "register_operand" "=y,y")
 	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))
@@ -2660,16 +2667,19 @@ 
    (clobber (reg:SI R4_REG))
    (clobber (reg:SI R5_REG))
    (clobber (reg:SI FPSCR_STAT_REG))
-   (use (match_operand:SI 1 "arith_reg_operand" "r"))
+   (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+   (use (match_operand 2 "" "Z,Ccl"))
    (use (reg:SI FPSCR_MODES_REG))]
   "TARGET_FPU_DOUBLE && ! TARGET_FPU_SINGLE"
-  "jsr	@%1%#"
+  "@
+   jsr	@%1%#
+   bsrf	%1\\n%O2:%#"
   [(set_attr "type" "sfunc")
    (set_attr "fp_mode" "double")
    (set_attr "needs_delay_slot" "yes")])
 
 (define_insn "udivsi3_i4_single"
-  [(set (match_operand:SI 0 "register_operand" "=y")
+  [(set (match_operand:SI 0 "register_operand" "=y,y")
 	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))
@@ -2680,10 +2690,13 @@ 
    (clobber (reg:SI R1_REG))
    (clobber (reg:SI R4_REG))
    (clobber (reg:SI R5_REG))
-   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+   (use (match_operand 2 "" "Z,Ccl"))]
   "(TARGET_FPU_SINGLE_ONLY || TARGET_FPU_DOUBLE || TARGET_SHCOMPACT)
    && TARGET_FPU_SINGLE"
-  "jsr	@%1%#"
+  "@
+   jsr	@%1%#
+   bsrf	%1\\n%O2:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
@@ -2737,16 +2750,17 @@ 
 	  emit_move_insn (operands[0], operands[2]);
 	  DONE;
 	}
-      function_symbol (operands[3], "__udivsi3_i4i", SFUNC_GOT);
+      function_symbol (operands[3], "__udivsi3_i4i", SFUNC_GOT, NULL);
       last = gen_udivsi3_i4_int (operands[0], operands[3]);
     }
   else if (TARGET_DIVIDE_CALL_FP)
     {
-      function_symbol (operands[3], "__udivsi3_i4", SFUNC_STATIC);
+      rtx lab;
+      function_symbol (operands[3], "__udivsi3_i4", SFUNC_STATIC, &lab);
       if (TARGET_FPU_SINGLE)
-	last = gen_udivsi3_i4_single (operands[0], operands[3]);
+	last = gen_udivsi3_i4_single (operands[0], operands[3], lab);
       else
-	last = gen_udivsi3_i4 (operands[0], operands[3]);
+	last = gen_udivsi3_i4 (operands[0], operands[3], lab);
     }
   else if (TARGET_SHMEDIA_FPU)
     {
@@ -2766,19 +2780,20 @@ 
     {
       function_symbol (operands[3],
 		       TARGET_FPU_ANY ? "__udivsi3_i4" : "__udivsi3",
-		       SFUNC_STATIC);
+		       SFUNC_STATIC, NULL);
 
       if (TARGET_SHMEDIA)
 	last = gen_udivsi3_i1_media (operands[0], operands[3]);
       else if (TARGET_FPU_ANY)
-	last = gen_udivsi3_i4_single (operands[0], operands[3]);
+	last = gen_udivsi3_i4_single (operands[0], operands[3], const0_rtx);
       else
-	last = gen_udivsi3_i1 (operands[0], operands[3]);
+	last = gen_udivsi3_i1 (operands[0], operands[3], const0_rtx);
     }
   else
     {
-      function_symbol (operands[3], "__udivsi3", SFUNC_STATIC);
-      last = gen_udivsi3_i1 (operands[0], operands[3]);
+      rtx lab;
+      function_symbol (operands[3], "__udivsi3", SFUNC_STATIC, &lab);
+      last = gen_udivsi3_i1 (operands[0], operands[3], lab);
     }
   emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
   emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
@@ -2906,7 +2921,7 @@ 
       emit_move_insn (gen_rtx_REG (DImode, R20_REG), x);
       break;
     }
-  sym = function_symbol (NULL, name, kind);
+  sym = function_symbol (NULL, name, kind, NULL);
   emit_insn (gen_divsi3_media_2 (operands[0], sym));
   DONE;
 }
@@ -2926,31 +2941,37 @@ 
 })
 
 (define_insn "divsi3_i4"
-  [(set (match_operand:SI 0 "register_operand" "=y")
+  [(set (match_operand:SI 0 "register_operand" "=y,y")
 	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
    (clobber (reg:SI PR_REG))
    (clobber (reg:DF DR0_REG))
    (clobber (reg:DF DR2_REG))
    (clobber (reg:SI FPSCR_STAT_REG))
-   (use (match_operand:SI 1 "arith_reg_operand" "r"))
+   (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+   (use (match_operand 2 "" "Z,Ccl"))
    (use (reg:SI FPSCR_MODES_REG))]
   "TARGET_FPU_DOUBLE && ! TARGET_FPU_SINGLE"
-  "jsr	@%1%#"
+  "@
+   jsr	@%1%#
+   bsrf	%1\\n%O2:%#"
   [(set_attr "type" "sfunc")
    (set_attr "fp_mode" "double")
    (set_attr "needs_delay_slot" "yes")])
 
 (define_insn "divsi3_i4_single"
-  [(set (match_operand:SI 0 "register_operand" "=y")
+  [(set (match_operand:SI 0 "register_operand" "=y,y")
 	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
    (clobber (reg:SI PR_REG))
    (clobber (reg:DF DR0_REG))
    (clobber (reg:DF DR2_REG))
    (clobber (reg:SI R2_REG))
-   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+   (use (match_operand 2 "" "Z,Ccl"))]
   "(TARGET_FPU_SINGLE_ONLY || TARGET_FPU_DOUBLE || TARGET_SHCOMPACT)
    && TARGET_FPU_SINGLE"
-  "jsr	@%1%#"
+  "@
+   jsr	@%1%#
+   bsrf	%1\\n%O2:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
@@ -2989,16 +3010,17 @@ 
   /* Emit the move of the address to a pseudo outside of the libcall.  */
   if (TARGET_DIVIDE_CALL_TABLE)
     {
-      function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT);
+      function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT, NULL);
       last = gen_divsi3_i4_int (operands[0], operands[3]);
     }
   else if (TARGET_DIVIDE_CALL_FP)
     {
-      function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC);
+      rtx lab;
+      function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC, &lab);
       if (TARGET_FPU_SINGLE)
-	last = gen_divsi3_i4_single (operands[0], operands[3]);
+	last = gen_divsi3_i4_single (operands[0], operands[3], lab);
       else
-	last = gen_divsi3_i4 (operands[0], operands[3]);
+	last = gen_divsi3_i4 (operands[0], operands[3], lab);
     }
   else if (TARGET_SH2A)
     {
@@ -3103,23 +3125,23 @@ 
 	  emit_move_insn (gen_rtx_REG (Pmode, R20_REG), tab_base);
 	}
       if (TARGET_FPU_ANY && TARGET_SH1)
-	function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC);
+	function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC, NULL);
       else if (TARGET_DIVIDE_CALL2)
-	function_symbol (operands[3], "__sdivsi3_2", SFUNC_STATIC);
+	function_symbol (operands[3], "__sdivsi3_2", SFUNC_STATIC, NULL);
       else
-	function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT);
+	function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT, NULL);
 
       if (TARGET_SHMEDIA)
 	last = ((TARGET_DIVIDE_CALL2 ? gen_divsi3_media_2 : gen_divsi3_i1_media)
 		(operands[0], operands[3]));
       else if (TARGET_FPU_ANY)
-	last = gen_divsi3_i4_single (operands[0], operands[3]);
+	last = gen_divsi3_i4_single (operands[0], operands[3], const0_rtx);
       else
 	last = gen_divsi3_i1 (operands[0], operands[3]);
     }
   else
     {
-      function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT);
+      function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT, NULL);
       last = gen_divsi3_i1 (operands[0], operands[3]);
     }
   emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
@@ -3713,7 +3735,7 @@  label:
     {
       /* The address must be set outside the libcall,
 	 since it goes into a pseudo.  */
-      rtx sym = function_symbol (NULL, "__mulsi3", SFUNC_STATIC);
+      rtx sym = function_symbol (NULL, "__mulsi3", SFUNC_STATIC, NULL);
       rtx addr = force_reg (SImode, sym);
       rtx insns = gen_mulsi3_call (operands[0], operands[1],
 				   operands[2], addr);
@@ -4970,8 +4992,9 @@  label:
     {
       emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
       rtx funcaddr = gen_reg_rtx (Pmode);
-      function_symbol (funcaddr, "__ashlsi3_r0", SFUNC_STATIC);
-      emit_insn (gen_ashlsi3_d_call (operands[0], operands[2], funcaddr));
+      rtx lab;
+      function_symbol (funcaddr, "__ashlsi3_r0", SFUNC_STATIC, &lab);
+      emit_insn (gen_ashlsi3_d_call (operands[0], operands[2], funcaddr, lab));
 
       DONE;
     }
@@ -5024,15 +5047,18 @@  label:
 ;; In order to make combine understand the truncation of the shift amount
 ;; operand we have to allow it to use pseudo regs for the shift operands.
 (define_insn "ashlsi3_d_call"
-  [(set (match_operand:SI 0 "arith_reg_dest" "=z")
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z,z")
 	(ashift:SI (reg:SI R4_REG)
-		   (and:SI (match_operand:SI 1 "arith_reg_operand" "z")
+		   (and:SI (match_operand:SI 1 "arith_reg_operand" "z,z")
 			   (const_int 31))))
-   (use (match_operand:SI 2 "arith_reg_operand" "r"))
+   (use (match_operand:SI 2 "arith_reg_operand" "r,r"))
+   (use (match_operand 3 "" "Z,Ccl"))
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))]
   "TARGET_SH1 && !TARGET_DYNSHIFT"
-  "jsr	@%2%#"
+  "@
+   jsr	@%2%#
+   bsrf	%2\\n%O3:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
@@ -5374,12 +5400,15 @@  label:
 (define_insn "ashrsi3_n"
   [(set (reg:SI R4_REG)
 	(ashiftrt:SI (reg:SI R4_REG)
-		     (match_operand:SI 0 "const_int_operand" "i")))
+		     (match_operand:SI 0 "const_int_operand" "i,i")))
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))
-   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+   (use (match_operand 2 "" "Z,Ccl"))]
   "TARGET_SH1"
-  "jsr	@%1%#"
+  "@
+   jsr	@%1%#
+   bsrf	%1\\n%O2:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
@@ -5532,8 +5561,9 @@  label:
     {
       emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
       rtx funcaddr = gen_reg_rtx (Pmode);
-      function_symbol (funcaddr, "__lshrsi3_r0", SFUNC_STATIC);
-      emit_insn (gen_lshrsi3_d_call (operands[0], operands[2], funcaddr));
+      rtx lab;
+      function_symbol (funcaddr, "__lshrsi3_r0", SFUNC_STATIC, &lab);
+      emit_insn (gen_lshrsi3_d_call (operands[0], operands[2], funcaddr, lab));
       DONE;
     }
 })
@@ -5585,15 +5615,18 @@  label:
 ;; In order to make combine understand the truncation of the shift amount
 ;; operand we have to allow it to use pseudo regs for the shift operands.
 (define_insn "lshrsi3_d_call"
-  [(set (match_operand:SI 0 "arith_reg_dest" "=z")
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z,z")
 	(lshiftrt:SI (reg:SI R4_REG)
-		     (and:SI (match_operand:SI 1 "arith_reg_operand" "z")
+		     (and:SI (match_operand:SI 1 "arith_reg_operand" "z,z")
 			     (const_int 31))))
-   (use (match_operand:SI 2 "arith_reg_operand" "r"))
+   (use (match_operand:SI 2 "arith_reg_operand" "r,r"))
+   (use (match_operand 3 "" "Z,Ccl"))
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))]
   "TARGET_SH1 && !TARGET_DYNSHIFT"
-  "jsr	@%2%#"
+  "@
+   jsr	@%2%#
+   bsrf	%2\\n%O3:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
@@ -7315,7 +7348,8 @@  label:
     }
   else if (TARGET_SHCOMPACT)
     {
-      operands[1] = function_symbol (NULL, "__ic_invalidate", SFUNC_STATIC);
+      operands[1] = function_symbol (NULL, "__ic_invalidate", SFUNC_STATIC,
+				     NULL);
       operands[1] = force_reg (Pmode, operands[1]);
       emit_insn (gen_ic_invalidate_line_compact (operands[0], operands[1]));
       DONE;
@@ -7397,7 +7431,7 @@  label:
 
   tramp = force_reg (Pmode, operands[0]);
   sfun = force_reg (Pmode, function_symbol (NULL, "__init_trampoline",
-					    SFUNC_STATIC));
+					    SFUNC_STATIC, NULL));
   emit_move_insn (gen_rtx_REG (SImode, R2_REG), operands[1]);
   emit_move_insn (gen_rtx_REG (SImode, R3_REG), operands[2]);
 
@@ -9459,7 +9493,27 @@  label:
 	 (match_operand 1 "" ""))
    (use (reg:SI FPSCR_MODES_REG))
    (clobber (reg:SI PR_REG))]
-  "TARGET_SH1"
+  "TARGET_SH1 && !TARGET_FDPIC"
+{
+  if (TARGET_SH2A && (dbr_sequence_length () == 0))
+    return "jsr/n	@%0";
+  else
+    return "jsr	@%0%#";
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+(define_insn "calli_fdpic"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (use (reg:SI FPSCR_MODES_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH1 && TARGET_FDPIC"
 {
   if (TARGET_SH2A && (dbr_sequence_length () == 0))
     return "jsr/n	@%0";
@@ -9588,7 +9642,28 @@  label:
 	      (match_operand 2 "" "")))
    (use (reg:SI FPSCR_MODES_REG))
    (clobber (reg:SI PR_REG))]
-  "TARGET_SH1"
+  "TARGET_SH1 && !TARGET_FDPIC"
+{
+  if (TARGET_SH2A && (dbr_sequence_length () == 0))
+    return "jsr/n	@%1";
+  else
+    return "jsr	@%1%#";
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+(define_insn "call_valuei_fdpic"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (use (reg:SI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH1 && TARGET_FDPIC"
 {
   if (TARGET_SH2A && (dbr_sequence_length () == 0))
     return "jsr/n	@%1";
@@ -9725,6 +9800,12 @@  label:
 	      (clobber (reg:SI PR_REG))])]
   ""
 {
+  if (TARGET_FDPIC)
+    {
+      rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+      emit_move_insn (pic_reg, OUR_FDPIC_REG);
+    }
+
   if (TARGET_SHMEDIA)
     {
       operands[0] = shmedia_prepare_call_address (operands[0], 0);
@@ -9760,7 +9841,8 @@  label:
       emit_insn (gen_force_mode_for_call ());
 
       operands[0]
-	= function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+	= function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+			   SFUNC_GOT, NULL);
       operands[0] = force_reg (SImode, operands[0]);
 
       emit_move_insn (r0, func);
@@ -9784,7 +9866,7 @@  label:
       emit_insn (gen_symGOTPLT2reg (reg, XEXP (operands[0], 0)));
       XEXP (operands[0], 0) = reg;
     }
-  if (!flag_pic && TARGET_SH2A
+  if (!flag_pic && !TARGET_FDPIC && TARGET_SH2A
       && MEM_P (operands[0])
       && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF)
     {
@@ -9795,7 +9877,7 @@  label:
 	  DONE;
 	}
     }
-  if (flag_pic && TARGET_SH2
+  if ((flag_pic || TARGET_FDPIC) && TARGET_SH2
       && MEM_P (operands[0])
       && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF)
     {
@@ -9808,7 +9890,13 @@  label:
     operands[1] = operands[2];
   }
 
-  emit_call_insn (gen_calli (operands[0], operands[1]));
+  if (TARGET_FDPIC)
+    {
+      operands[0] = sh_load_function_descriptor (operands[0]);
+      emit_call_insn (gen_calli_fdpic (operands[0], operands[1]));
+    }
+  else
+    emit_call_insn (gen_calli (operands[0], operands[1]));
   DONE;
 })
 
@@ -9888,7 +9976,7 @@  label:
   emit_insn (gen_force_mode_for_call ());
 
   operands[0] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
-				 SFUNC_GOT);
+				 SFUNC_GOT, NULL);
   operands[0] = force_reg (SImode, operands[0]);
 
   emit_move_insn (r0, func);
@@ -9913,6 +10001,12 @@  label:
 	      (clobber (reg:SI PR_REG))])]
   ""
 {
+  if (TARGET_FDPIC)
+    {
+      rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+      emit_move_insn (pic_reg, OUR_FDPIC_REG);
+    }
+
   if (TARGET_SHMEDIA)
     {
       operands[1] = shmedia_prepare_call_address (operands[1], 0);
@@ -9949,7 +10043,8 @@  label:
       emit_insn (gen_force_mode_for_call ());
 
       operands[1]
-	= function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+	= function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+			   SFUNC_GOT, NULL);
       operands[1] = force_reg (SImode, operands[1]);
 
       emit_move_insn (r0, func);
@@ -9975,7 +10070,7 @@  label:
       emit_insn (gen_symGOTPLT2reg (reg, XEXP (operands[1], 0)));
       XEXP (operands[1], 0) = reg;
     }
-  if (!flag_pic && TARGET_SH2A
+  if (!flag_pic && !TARGET_FDPIC && TARGET_SH2A
       && MEM_P (operands[1])
       && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF)
     {
@@ -9986,7 +10081,7 @@  label:
 	  DONE;
 	}
     }
-  if (flag_pic && TARGET_SH2
+  if ((flag_pic || TARGET_FDPIC) && TARGET_SH2
       && MEM_P (operands[1])
       && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF)
     {
@@ -9997,7 +10092,14 @@  label:
   else
     operands[1] = force_reg (SImode, XEXP (operands[1], 0));
 
-  emit_call_insn (gen_call_valuei (operands[0], operands[1], operands[2]));
+  if (TARGET_FDPIC)
+    {
+      operands[1] = sh_load_function_descriptor (operands[1]);
+      emit_call_insn (gen_call_valuei_fdpic (operands[0], operands[1],
+					     operands[2]));
+    }
+  else
+    emit_call_insn (gen_call_valuei (operands[0], operands[1], operands[2]));
   DONE;
 })
 
@@ -10006,7 +10108,21 @@  label:
 	 (match_operand 1 "" ""))
    (use (reg:SI FPSCR_MODES_REG))
    (return)]
-  "TARGET_SH1"
+  "TARGET_SH1 && !TARGET_FDPIC"
+  "jmp	@%0%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcalli_fdpic"
+  [(call (mem:SI (match_operand:SI 0 "register_operand" "k"))
+	 (match_operand 1 "" ""))
+   (use (reg:SI FPSCR_MODES_REG))
+   (use (reg:SI PIC_REG))
+   (return)]
+  "TARGET_SH1 && TARGET_FDPIC"
   "jmp	@%0%#"
   [(set_attr "needs_delay_slot" "yes")
    (set (attr "fp_mode")
@@ -10020,7 +10136,25 @@  label:
    (use (match_operand 2 "" ""))
    (use (reg:SI FPSCR_MODES_REG))
    (return)]
-  "TARGET_SH2"
+  "TARGET_SH2 && !TARGET_FDPIC"
+{
+  return       "braf	%0"	"\n"
+	 "%O2:%#";
+}
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcalli_pcrel_fdpic"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "k"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (reg:SI FPSCR_MODES_REG))
+   (use (reg:SI PIC_REG))
+   (return)]
+  "TARGET_SH2 && TARGET_FDPIC"
 {
   return       "braf	%0"	"\n"
 	 "%O2:%#";
@@ -10053,7 +10187,7 @@  label:
    (use (reg:SI FPSCR_MODES_REG))
    (clobber (match_scratch:SI 2 "=&k"))
    (return)]
-  "TARGET_SH2"
+  "TARGET_SH2 && !TARGET_FDPIC"
   "#"
   "reload_completed"
   [(const_int 0)]
@@ -10073,6 +10207,33 @@  label:
 		      (const_string "single") (const_string "double")))
    (set_attr "type" "jump_ind")])
 
+(define_insn_and_split "sibcall_pcrel_fdpic"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" ""))
+	 (match_operand 1 "" ""))
+   (use (reg:SI FPSCR_MODES_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (match_scratch:SI 2 "=k"))
+   (return)]
+  "TARGET_SH2 && TARGET_FDPIC"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx lab = PATTERN (gen_call_site ());
+  rtx call_insn;
+
+  sh_expand_sym_label2reg (operands[2], operands[0], lab, true);
+  call_insn = emit_call_insn (gen_sibcalli_pcrel_fdpic (operands[2], operands[1],
+						  copy_rtx (lab)));
+  SIBLING_CALL_P (call_insn) = 1;
+  DONE;
+}
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
 (define_insn "sibcall_compact"
   [(call (mem:SI (match_operand:SI 0 "register_operand" "k,k"))
 	 (match_operand 1 "" ""))
@@ -10117,6 +10278,12 @@  label:
      (return)])]
   ""
 {
+  if (TARGET_FDPIC)
+    {
+      rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+      emit_move_insn (pic_reg, OUR_FDPIC_REG);
+    }
+
   if (TARGET_SHMEDIA)
     {
       operands[0] = shmedia_prepare_call_address (operands[0], 1);
@@ -10162,7 +10329,8 @@  label:
       emit_insn (gen_force_mode_for_call ());
 
       operands[0]
-	= function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+	= function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+			   SFUNC_GOT, NULL);
       operands[0] = force_reg (SImode, operands[0]);
 
       /* We don't need a return trampoline, since the callee will
@@ -10188,7 +10356,7 @@  label:
       emit_insn (gen_symGOT2reg (reg, XEXP (operands[0], 0)));
       XEXP (operands[0], 0) = reg;
     }
-  if (flag_pic && TARGET_SH2
+  if ((flag_pic || TARGET_FDPIC) && TARGET_SH2
       && MEM_P (operands[0])
       && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
       /* The PLT needs the PIC register, but the epilogue would have
@@ -10196,13 +10364,24 @@  label:
 	 static functions.  */
       && SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0)))
     {
-      emit_call_insn (gen_sibcall_pcrel (XEXP (operands[0], 0), operands[1]));
+      if (TARGET_FDPIC)
+        emit_call_insn (gen_sibcall_pcrel_fdpic (XEXP (operands[0], 0),
+       	                                         operands[1]));
+      else
+        emit_call_insn (gen_sibcall_pcrel (XEXP (operands[0], 0),
+                                           operands[1]));
       DONE;
     }
   else
     operands[0] = force_reg (SImode, XEXP (operands[0], 0));
 
-  emit_call_insn (gen_sibcalli (operands[0], operands[1]));
+  if (TARGET_FDPIC)
+    {
+      operands[0] = sh_load_function_descriptor (operands[0]);
+      emit_call_insn (gen_sibcalli_fdpic (operands[0], operands[1]));
+    }
+  else
+    emit_call_insn (gen_sibcalli (operands[0], operands[1]));
   DONE;
 })
 
@@ -10212,7 +10391,22 @@  label:
 	      (match_operand 2 "" "")))
    (use (reg:SI FPSCR_MODES_REG))
    (return)]
-  "TARGET_SH1"
+  "TARGET_SH1 && !TARGET_FDPIC"
+  "jmp	@%1%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_valuei_fdpic"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "k"))
+	      (match_operand 2 "" "")))
+   (use (reg:SI FPSCR_MODES_REG))
+   (use (reg:SI PIC_REG))
+   (return)]
+  "TARGET_SH1 && TARGET_FDPIC"
   "jmp	@%1%#"
   [(set_attr "needs_delay_slot" "yes")
    (set (attr "fp_mode")
@@ -10227,7 +10421,26 @@  label:
    (use (match_operand 3 "" ""))
    (use (reg:SI FPSCR_MODES_REG))
    (return)]
-  "TARGET_SH2"
+  "TARGET_SH2 && !TARGET_FDPIC"
+{
+  return       "braf	%1"	"\n"
+	 "%O3:%#";
+}
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_valuei_pcrel_fdpic"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "k"))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (use (reg:SI FPSCR_MODES_REG))
+   (use (reg:SI PIC_REG))
+   (return)]
+  "TARGET_SH2 && TARGET_FDPIC"
 {
   return       "braf	%1"	"\n"
 	 "%O3:%#";
@@ -10245,7 +10458,7 @@  label:
    (use (reg:SI FPSCR_MODES_REG))
    (clobber (match_scratch:SI 3 "=&k"))
    (return)]
-  "TARGET_SH2"
+  "TARGET_SH2 && !TARGET_FDPIC"
   "#"
   "reload_completed"
   [(const_int 0)]
@@ -10258,6 +10471,38 @@  label:
 							operands[3],
 							operands[2],
 							copy_rtx (lab)));
+							  
+  SIBLING_CALL_P (call_insn) = 1;
+  DONE;
+}
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn_and_split "sibcall_value_pcrel_fdpic"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "symbol_ref_operand" ""))
+	      (match_operand 2 "" "")))
+   (use (reg:SI FPSCR_MODES_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (match_scratch:SI 3 "=k"))
+   (return)]
+  "TARGET_SH2 && TARGET_FDPIC"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx lab = PATTERN (gen_call_site ());
+  rtx call_insn;
+
+  sh_expand_sym_label2reg (operands[3], operands[1], lab, true);
+  call_insn = emit_call_insn (gen_sibcall_valuei_pcrel_fdpic (operands[0],
+							      operands[3],
+							      operands[2],
+							      copy_rtx (lab)));
+							  
   SIBLING_CALL_P (call_insn) = 1;
   DONE;
 }
@@ -10314,6 +10559,12 @@  label:
      (return)])]
   ""
 {
+  if (TARGET_FDPIC)
+    {
+      rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+      emit_move_insn (pic_reg, OUR_FDPIC_REG);
+    }
+
   if (TARGET_SHMEDIA)
     {
       operands[1] = shmedia_prepare_call_address (operands[1], 1);
@@ -10360,7 +10611,8 @@  label:
       emit_insn (gen_force_mode_for_call ());
 
       operands[1]
-	= function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+	= function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+			   SFUNC_GOT, NULL);
       operands[1] = force_reg (SImode, operands[1]);
 
       /* We don't need a return trampoline, since the callee will
@@ -10387,7 +10639,7 @@  label:
       emit_insn (gen_symGOT2reg (reg, XEXP (operands[1], 0)));
       XEXP (operands[1], 0) = reg;
     }
-  if (flag_pic && TARGET_SH2
+  if ((flag_pic || TARGET_FDPIC) && TARGET_SH2
       && MEM_P (operands[1])
       && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
       /* The PLT needs the PIC register, but the epilogue would have
@@ -10395,15 +10647,28 @@  label:
 	 static functions.  */
       && SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0)))
     {
-      emit_call_insn (gen_sibcall_value_pcrel (operands[0],
-					       XEXP (operands[1], 0),
-					       operands[2]));
+      if (TARGET_FDPIC)
+	emit_call_insn (gen_sibcall_value_pcrel_fdpic (operands[0],
+						       XEXP (operands[1], 0),
+						       operands[2]));
+      else
+	emit_call_insn (gen_sibcall_value_pcrel (operands[0],
+						 XEXP (operands[1], 0),
+						 operands[2]));
       DONE;
     }
   else
     operands[1] = force_reg (SImode, XEXP (operands[1], 0));
 
-  emit_call_insn (gen_sibcall_valuei (operands[0], operands[1], operands[2]));
+  if (TARGET_FDPIC)
+    {
+      operands[1] = sh_load_function_descriptor (operands[1]);
+      emit_call_insn (gen_sibcall_valuei_fdpic (operands[0], operands[1],
+						operands[2]));
+    }
+  else
+    emit_call_insn (gen_sibcall_valuei (operands[0], operands[1],
+					operands[2]));
   DONE;
 })
 
@@ -10487,7 +10752,7 @@  label:
   emit_insn (gen_force_mode_for_call ());
 
   operands[1] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
-				 SFUNC_GOT);
+				 SFUNC_GOT, NULL);
   operands[1] = force_reg (SImode, operands[1]);
 
   emit_move_insn (r0, func);
@@ -10685,6 +10950,13 @@  label:
       DONE;
     }
 
+  if (TARGET_FDPIC)
+    {
+      rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+      emit_move_insn (pic_reg, OUR_FDPIC_REG);
+      DONE;
+    }
+
   operands[1] = gen_rtx_REG (Pmode, PIC_REG);
   operands[2] = gen_rtx_SYMBOL_REF (VOIDmode, GOT_SYMBOL_NAME);
 
@@ -10817,9 +11089,15 @@  label:
    (set (match_operand 0 "" "") (mem (match_dup 3)))]
   ""
 {
+  rtx picreg;
   rtx mem;
   bool stack_chk_guard_p = false;
 
+  if (TARGET_FDPIC)
+    picreg = OUR_FDPIC_REG;
+  else
+    picreg = gen_rtx_REG (Pmode, PIC_REG);
+
   operands[2] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
   operands[3] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
 
@@ -10859,11 +11137,11 @@  label:
      insn to avoid combining (set A (plus rX r12)) and (set op0 (mem A))
      when rX is a GOT address for the guard symbol.  Ugly but doesn't
      matter because this is a rare situation.  */
+// FIXME: original fdpic patch did not have ssp case here ??
   if (stack_chk_guard_p)
     emit_insn (gen_chk_guard_add (operands[3], operands[2]));
   else
-    emit_move_insn (operands[3], gen_rtx_PLUS (Pmode, operands[2],
-					       gen_rtx_REG (Pmode, PIC_REG)));
+    emit_move_insn (operands[3], gen_rtx_PLUS (Pmode, operands[2], picreg));
 
   /* N.B. This is not constant for a GOTPLT relocation.  */
   mem = gen_rtx_MEM (Pmode, operands[3]);
@@ -10894,6 +11172,26 @@  label:
   DONE;
 })
 
+(define_expand "sym2GOTFUNCDESC"
+  [(const (unspec [(match_operand 0 "" "")] UNSPEC_GOTFUNCDESC))]
+  "TARGET_FDPIC"
+  "")
+
+(define_expand "symGOTFUNCDESC2reg"
+  [(match_operand 0 "" "") (match_operand 1 "" "")]
+  "TARGET_FDPIC"
+{
+  rtx gotsym, insn;
+
+  gotsym = gen_sym2GOTFUNCDESC (operands[1]);
+  PUT_MODE (gotsym, Pmode);
+  insn = emit_insn (gen_symGOT_load (operands[0], gotsym));
+
+  MEM_READONLY_P (SET_SRC (PATTERN (insn))) = 1;
+
+  DONE;
+})
+
 (define_expand "symGOTPLT2reg"
   [(match_operand 0 "" "") (match_operand 1 "" "")]
   ""
@@ -10915,23 +11213,49 @@  label:
   [(match_operand 0 "" "") (match_operand 1 "" "")]
   ""
 {
+  rtx picreg;
   rtx gotoffsym, insn;
   rtx t = (!can_create_pseudo_p ()
 	   ? operands[0]
 	   : gen_reg_rtx (GET_MODE (operands[0])));
 
+  if (TARGET_FDPIC)
+    picreg = OUR_FDPIC_REG;
+  else
+    picreg = gen_rtx_REG (Pmode, PIC_REG);
+
   gotoffsym = gen_sym2GOTOFF (operands[1]);
   PUT_MODE (gotoffsym, Pmode);
   emit_move_insn (t, gotoffsym);
-  insn = emit_move_insn (operands[0],
-			 gen_rtx_PLUS (Pmode, t,
-				       gen_rtx_REG (Pmode, PIC_REG)));
+  insn = emit_move_insn (operands[0], gen_rtx_PLUS (Pmode, t, picreg));
 
   set_unique_reg_note (insn, REG_EQUAL, operands[1]);
 
   DONE;
 })
 
+(define_expand "sym2GOTOFFFUNCDESC"
+  [(const (unspec [(match_operand 0 "" "")] UNSPEC_GOTOFFFUNCDESC))]
+  "TARGET_FDPIC"
+  "")
+
+(define_expand "symGOTOFFFUNCDESC2reg"
+  [(match_operand 0 "" "") (match_operand 1 "" "")]
+  "TARGET_FDPIC"
+{
+  rtx picreg = OUR_FDPIC_REG;
+  rtx gotoffsym;
+  rtx t = (!can_create_pseudo_p ()
+	   ? operands[0]
+	   : gen_reg_rtx (GET_MODE (operands[0])));
+
+  gotoffsym = gen_sym2GOTOFFFUNCDESC (operands[1]);
+  PUT_MODE (gotoffsym, Pmode);
+  emit_move_insn (t, gotoffsym);
+  emit_move_insn (operands[0], gen_rtx_PLUS (Pmode, t, picreg));
+  DONE;
+})
+
 (define_expand "symPLT_label2reg"
   [(set (match_operand:SI 0 "" "")
 	(const:SI
@@ -11608,7 +11932,8 @@  label:
 {
   rtx reg = gen_rtx_REG (Pmode, R0_REG);
 
-  function_symbol (reg, "__GCC_shcompact_return_trampoline", SFUNC_STATIC);
+  function_symbol (reg, "__GCC_shcompact_return_trampoline", SFUNC_STATIC,
+  		   NULL);
   emit_jump_insn (gen_shcompact_return_tramp_i ());
   DONE;
 })
@@ -12688,18 +13013,22 @@  label:
 (define_insn "block_move_real"
   [(parallel [(set (mem:BLK (reg:SI R4_REG))
 		   (mem:BLK (reg:SI R5_REG)))
-	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+	      (use (match_operand 1 "" "Z,Ccl"))
 	      (clobber (reg:SI PR_REG))
 	      (clobber (reg:SI R0_REG))])]
   "TARGET_SH1 && ! TARGET_HARD_SH4"
-  "jsr	@%0%#"
+  "@
+   jsr	@%0%#
+   bsrf	%0\\n%O1:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
 (define_insn "block_lump_real"
   [(parallel [(set (mem:BLK (reg:SI R4_REG))
 		   (mem:BLK (reg:SI R5_REG)))
-	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+	      (use (match_operand 1 "" "Z,Ccl"))
 	      (use (reg:SI R6_REG))
 	      (clobber (reg:SI PR_REG))
 	      (clobber (reg:SI T_REG))
@@ -12708,27 +13037,33 @@  label:
 	      (clobber (reg:SI R6_REG))
 	      (clobber (reg:SI R0_REG))])]
   "TARGET_SH1 && ! TARGET_HARD_SH4"
-  "jsr	@%0%#"
+  "@
+   jsr	@%0%#
+   bsrf	%0\\n%O1:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
 (define_insn "block_move_real_i4"
   [(parallel [(set (mem:BLK (reg:SI R4_REG))
 		   (mem:BLK (reg:SI R5_REG)))
-	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+	      (use (match_operand 1 "" "Z,Ccl"))
 	      (clobber (reg:SI PR_REG))
 	      (clobber (reg:SI R0_REG))
 	      (clobber (reg:SI R1_REG))
 	      (clobber (reg:SI R2_REG))])]
   "TARGET_HARD_SH4"
-  "jsr	@%0%#"
+  "@
+   jsr	@%0%#
+   bsrf	%0\\n%O1:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
 (define_insn "block_lump_real_i4"
   [(parallel [(set (mem:BLK (reg:SI R4_REG))
 		   (mem:BLK (reg:SI R5_REG)))
-	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+	      (use (match_operand 1 "" "Z,Ccl"))
 	      (use (reg:SI R6_REG))
 	      (clobber (reg:SI PR_REG))
 	      (clobber (reg:SI T_REG))
@@ -12740,7 +13075,9 @@  label:
 	      (clobber (reg:SI R2_REG))
 	      (clobber (reg:SI R3_REG))])]
   "TARGET_HARD_SH4"
-  "jsr	@%0%#"
+  "@
+   jsr	@%0%#
+   bsrf	%0\\n%O1:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
diff --git a/gcc/config/sh/sh.opt b/gcc/config/sh/sh.opt
index 8875b5d..7a50ca0 100644
--- a/gcc/config/sh/sh.opt
+++ b/gcc/config/sh/sh.opt
@@ -264,6 +264,10 @@  mdivsi3_libfunc=
 Target RejectNegative Joined Var(sh_divsi3_libfunc) Init("")
 Specify name for 32 bit signed division function
 
+mfdpic
+Target Report Var(TARGET_FDPIC)
+Generate ELF FDPIC code
+
 mfmovd
 Target RejectNegative Mask(FMOVD)
 Enable the use of 64-bit floating point registers in fmov instructions.  See -mdalign if 64-bit alignment is required.
diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 1fd773e..fe57b97 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -1810,6 +1810,9 @@  When neither of these configure options are used, the default will be
 128-bit @code{long double} when built against GNU C Library 2.4 and later,
 64-bit @code{long double} otherwise.
 
+@item --enable-fdpic
+On SH Linux systems, generate ELF FDPIC code.
+
 @item --with-gmp=@var{pathname}
 @itemx --with-gmp-include=@var{pathname}
 @itemx --with-gmp-lib=@var{pathname}
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ebfaaa1..8b26eac 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21178,6 +21178,10 @@  in effect.
 Prefer zero-displacement conditional branches for conditional move instruction
 patterns.  This can result in faster code on the SH4 processor.
 
+@item -mfdpic
+@opindex fdpic
+Generate code using the FDPIC ABI.
+
 @end table
 
 @node Solaris 2 Options
diff --git a/include/longlong.h b/include/longlong.h
index a0b2ce1..19164ed 100644
--- a/include/longlong.h
+++ b/include/longlong.h
@@ -1102,6 +1102,29 @@  extern UDItype __umulsidi3 (USItype, USItype);
 /* This is the same algorithm as __udiv_qrnnd_c.  */
 #define UDIV_NEEDS_NORMALIZATION 1
 
+#ifdef __FDPIC__
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do {									\
+    extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)			\
+			__attribute__ ((visibility ("hidden")));	\
+    /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */	\
+    __asm__ (								\
+	"mov%M4 %4,r5\n"						\
+"	swap.w %3,r4\n"							\
+"	swap.w r5,r6\n"							\
+"	mov.l @%5,r2\n"							\
+"	jsr @r2\n"							\
+"	shll16 r6\n"							\
+"	swap.w r4,r4\n"							\
+"	mov.l @%5,r2\n"							\
+"	jsr @r2\n"							\
+"	swap.w r1,%0\n"							\
+"	or r1,%0"							\
+	: "=r" (q), "=&z" (r)						\
+	: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)		\
+	: "r1", "r2", "r4", "r5", "r6", "pr", "t");			\
+  } while (0)
+#else
 #define udiv_qrnnd(q, r, n1, n0, d) \
   do {									\
     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)			\
@@ -1121,6 +1144,7 @@  extern UDItype __umulsidi3 (USItype, USItype);
 	: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)		\
 	: "r1", "r2", "r4", "r5", "r6", "pr", "t");			\
   } while (0)
+#endif
 
 #define UDIV_TIME 80
 
diff --git a/libitm/config/sh/sjlj.S b/libitm/config/sh/sjlj.S
index 410cef6..76ec6df 100644
--- a/libitm/config/sh/sjlj.S
+++ b/libitm/config/sh/sjlj.S
@@ -58,9 +58,6 @@  _ITM_beginTransaction:
 	jsr	@r1
 	 mov	r15, r5
 #else
-	mova	.Lgot, r0
-	mov.l	.Lgot, r12
-	add	r0, r12
 	mov.l	.Lbegin, r1
 	bsrf	r1
 	 mov	r15, r5
@@ -80,13 +77,11 @@  _ITM_beginTransaction:
 	cfi_endproc
 
         .align  2
-.Lgot:
-	.long	_GLOBAL_OFFSET_TABLE_
 .Lbegin:
 #if defined HAVE_ATTRIBUTE_VISIBILITY || !defined __PIC__
 	.long	GTM_begin_transaction
 #else
-	.long	GTM_begin_transaction@PLT-(.Lbegin0-.)
+	.long	GTM_begin_transaction@PCREL-(.Lbegin0-.)
 #endif
 	.size	_ITM_beginTransaction, . - _ITM_beginTransaction