diff mbox series

[2/4] x86: Add -mfunction-return=

Message ID 20180112131549.18143-3-hjl.tools@gmail.com
State New
Headers show
Series x86: CVE-2017-5715, aka Spectre | expand

Commit Message

H.J. Lu Jan. 12, 2018, 1:15 p.m. UTC
Add -mfunction-return= option to convert function return to call and
return thunks.  The default is 'keep', which keeps function return
unmodified.  'thunk' converts function return to call and return thunk.
'thunk-inline' converts function return to inlined call and return thunk.  'thunk-extern' converts function return to external call and return
thunk provided in a separate object file.  You can control this behavior
for a specific function by using the function attribute function_return.

Function return thunk is the same as memory thunk for -mindirect-branch=
where the return address is at the top of the stack:

__x86_return_thunk:
	call L2
L1:
	pause
	jmp L1
L2:
	lea 8(%rsp), %rsp|lea 4(%esp), %esp
	ret

and function return becomes

	jmp __x86_return_thunk

-mindirect-branch= tests are updated with -mfunction-return=keep to
avoid false test failures when -mfunction-return=thunk is added to
RUNTESTFLAGS for "make check".

gcc/

	* config/i386/i386-protos.h (ix86_output_function_return): New.
	* config/i386/i386.c (ix86_set_indirect_branch_type): Also
	set function_return_type.
	(indirect_thunk_name): Add ret_p to indicate thunk for function
	return.
	(output_indirect_thunk_function): Pass false to
	indirect_thunk_name.
	(ix86_output_indirect_branch): Likewise.
	(output_indirect_thunk_function): Create alias for function
	return thunk if regno < 0.
	(ix86_output_function_return): New function.
	(ix86_handle_fndecl_attribute): Handle function_return.
	(ix86_attribute_table): Add function_return.
	* config/i386/i386.h (machine_function): Add
	function_return_type.
	* config/i386/i386.md (simple_return_internal): Use
	ix86_output_function_return.
	(simple_return_internal_long): Likewise.
	* config/i386/i386.opt (mfunction-return=): New option.
	(indirect_branch): Mention -mfunction-return=.
	* doc/extend.texi: Document function_return function attribute.
	* doc/invoke.texi: Document -mfunction-return= option.

gcc/testsuite/

	* gcc.target/i386/indirect-thunk-1.c (dg-options): Add
	-mfunction-return=keep.
	* gcc.target/i386/indirect-thunk-2.c: Likewise.
	* gcc.target/i386/indirect-thunk-3.c: Likewise.
	* gcc.target/i386/indirect-thunk-4.c: Likewise.
	* gcc.target/i386/indirect-thunk-5.c: Likewise.
	* gcc.target/i386/indirect-thunk-6.c: Likewise.
	* gcc.target/i386/indirect-thunk-7.c: Likewise.
	* gcc.target/i386/indirect-thunk-attr-1.c: Likewise.
	* gcc.target/i386/indirect-thunk-attr-2.c: Likewise.
	* gcc.target/i386/indirect-thunk-attr-3.c: Likewise.
	* gcc.target/i386/indirect-thunk-attr-4.c: Likewise.
	* gcc.target/i386/indirect-thunk-attr-5.c: Likewise.
	* gcc.target/i386/indirect-thunk-attr-6.c: Likewise.
	* gcc.target/i386/indirect-thunk-attr-7.c: Likewise.
	* gcc.target/i386/indirect-thunk-attr-8.c: Likewise.
	* gcc.target/i386/indirect-thunk-bnd-1.c: Likewise.
	* gcc.target/i386/indirect-thunk-bnd-2.c: Likewise.
	* gcc.target/i386/indirect-thunk-bnd-3.c: Likewise.
	* gcc.target/i386/indirect-thunk-bnd-4.c: Likewise.
	* gcc.target/i386/indirect-thunk-extern-1.c: Likewise.
	* gcc.target/i386/indirect-thunk-extern-2.c: Likewise.
	* gcc.target/i386/indirect-thunk-extern-3.c: Likewise.
	* gcc.target/i386/indirect-thunk-extern-4.c: Likewise.
	* gcc.target/i386/indirect-thunk-extern-5.c: Likewise.
	* gcc.target/i386/indirect-thunk-extern-6.c: Likewise.
	* gcc.target/i386/indirect-thunk-extern-7.c: Likewise.
	* gcc.target/i386/indirect-thunk-inline-1.c: Likewise.
	* gcc.target/i386/indirect-thunk-inline-2.c: Likewise.
	* gcc.target/i386/indirect-thunk-inline-3.c: Likewise.
	* gcc.target/i386/indirect-thunk-inline-4.c: Likewise.
	* gcc.target/i386/indirect-thunk-inline-5.c: Likewise.
	* gcc.target/i386/indirect-thunk-inline-6.c: Likewise.
	* gcc.target/i386/indirect-thunk-inline-7.c: Likewise.
	* gcc.target/i386/ret-thunk-1.c: New test.
	* gcc.target/i386/ret-thunk-10.c: Likewise.
	* gcc.target/i386/ret-thunk-11.c: Likewise.
	* gcc.target/i386/ret-thunk-12.c: Likewise.
	* gcc.target/i386/ret-thunk-13.c: Likewise.
	* gcc.target/i386/ret-thunk-14.c: Likewise.
	* gcc.target/i386/ret-thunk-15.c: Likewise.
	* gcc.target/i386/ret-thunk-16.c: Likewise.
	* gcc.target/i386/ret-thunk-2.c: Likewise.
	* gcc.target/i386/ret-thunk-3.c: Likewise.
	* gcc.target/i386/ret-thunk-4.c: Likewise.
	* gcc.target/i386/ret-thunk-5.c: Likewise.
	* gcc.target/i386/ret-thunk-6.c: Likewise.
	* gcc.target/i386/ret-thunk-7.c: Likewise.
	* gcc.target/i386/ret-thunk-8.c: Likewise.
	* gcc.target/i386/ret-thunk-9.c: Likewise.
---
 gcc/config/i386/i386-protos.h                      |   1 +
 gcc/config/i386/i386.c                             | 149 ++++++++++++++++++++-
 gcc/config/i386/i386.h                             |   3 +
 gcc/config/i386/i386.md                            |   9 +-
 gcc/config/i386/i386.opt                           |   6 +-
 gcc/doc/extend.texi                                |   9 ++
 gcc/doc/invoke.texi                                |  13 +-
 gcc/testsuite/gcc.target/i386/indirect-thunk-1.c   |   2 +-
 gcc/testsuite/gcc.target/i386/indirect-thunk-2.c   |   2 +-
 gcc/testsuite/gcc.target/i386/indirect-thunk-3.c   |   2 +-
 gcc/testsuite/gcc.target/i386/indirect-thunk-4.c   |   2 +-
 gcc/testsuite/gcc.target/i386/indirect-thunk-5.c   |   2 +-
 gcc/testsuite/gcc.target/i386/indirect-thunk-6.c   |   2 +-
 gcc/testsuite/gcc.target/i386/indirect-thunk-7.c   |   2 +-
 .../gcc.target/i386/indirect-thunk-attr-1.c        |   2 +-
 .../gcc.target/i386/indirect-thunk-attr-2.c        |   2 +-
 .../gcc.target/i386/indirect-thunk-attr-3.c        |   2 +-
 .../gcc.target/i386/indirect-thunk-attr-4.c        |   2 +-
 .../gcc.target/i386/indirect-thunk-attr-5.c        |   2 +-
 .../gcc.target/i386/indirect-thunk-attr-6.c        |   2 +-
 .../gcc.target/i386/indirect-thunk-attr-7.c        |   2 +-
 .../gcc.target/i386/indirect-thunk-attr-8.c        |   2 +-
 .../gcc.target/i386/indirect-thunk-bnd-1.c         |   2 +-
 .../gcc.target/i386/indirect-thunk-bnd-2.c         |   2 +-
 .../gcc.target/i386/indirect-thunk-bnd-3.c         |   2 +-
 .../gcc.target/i386/indirect-thunk-bnd-4.c         |   2 +-
 .../gcc.target/i386/indirect-thunk-extern-1.c      |   2 +-
 .../gcc.target/i386/indirect-thunk-extern-2.c      |   2 +-
 .../gcc.target/i386/indirect-thunk-extern-3.c      |   2 +-
 .../gcc.target/i386/indirect-thunk-extern-4.c      |   2 +-
 .../gcc.target/i386/indirect-thunk-extern-5.c      |   2 +-
 .../gcc.target/i386/indirect-thunk-extern-6.c      |   2 +-
 .../gcc.target/i386/indirect-thunk-extern-7.c      |   2 +-
 .../gcc.target/i386/indirect-thunk-inline-1.c      |   2 +-
 .../gcc.target/i386/indirect-thunk-inline-2.c      |   2 +-
 .../gcc.target/i386/indirect-thunk-inline-3.c      |   2 +-
 .../gcc.target/i386/indirect-thunk-inline-4.c      |   2 +-
 .../gcc.target/i386/indirect-thunk-inline-5.c      |   2 +-
 .../gcc.target/i386/indirect-thunk-inline-6.c      |   2 +-
 .../gcc.target/i386/indirect-thunk-inline-7.c      |   2 +-
 gcc/testsuite/gcc.target/i386/ret-thunk-1.c        |  12 ++
 gcc/testsuite/gcc.target/i386/ret-thunk-10.c       |  22 +++
 gcc/testsuite/gcc.target/i386/ret-thunk-11.c       |  22 +++
 gcc/testsuite/gcc.target/i386/ret-thunk-12.c       |  21 +++
 gcc/testsuite/gcc.target/i386/ret-thunk-13.c       |  21 +++
 gcc/testsuite/gcc.target/i386/ret-thunk-14.c       |  21 +++
 gcc/testsuite/gcc.target/i386/ret-thunk-15.c       |  21 +++
 gcc/testsuite/gcc.target/i386/ret-thunk-16.c       |  18 +++
 gcc/testsuite/gcc.target/i386/ret-thunk-2.c        |  12 ++
 gcc/testsuite/gcc.target/i386/ret-thunk-3.c        |  12 ++
 gcc/testsuite/gcc.target/i386/ret-thunk-4.c        |  12 ++
 gcc/testsuite/gcc.target/i386/ret-thunk-5.c        |  14 ++
 gcc/testsuite/gcc.target/i386/ret-thunk-6.c        |  13 ++
 gcc/testsuite/gcc.target/i386/ret-thunk-7.c        |  13 ++
 gcc/testsuite/gcc.target/i386/ret-thunk-8.c        |  14 ++
 gcc/testsuite/gcc.target/i386/ret-thunk-9.c        |  23 ++++
 56 files changed, 478 insertions(+), 49 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/ret-thunk-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/ret-thunk-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/ret-thunk-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/ret-thunk-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/ret-thunk-13.c
 create mode 100644 gcc/testsuite/gcc.target/i386/ret-thunk-14.c
 create mode 100644 gcc/testsuite/gcc.target/i386/ret-thunk-15.c
 create mode 100644 gcc/testsuite/gcc.target/i386/ret-thunk-16.c
 create mode 100644 gcc/testsuite/gcc.target/i386/ret-thunk-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/ret-thunk-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/ret-thunk-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/ret-thunk-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/ret-thunk-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/ret-thunk-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/ret-thunk-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/ret-thunk-9.c

Comments

Jan Hubicka Jan. 12, 2018, 5:55 p.m. UTC | #1
> Add -mfunction-return= option to convert function return to call and
> return thunks.  The default is 'keep', which keeps function return
> unmodified.  'thunk' converts function return to call and return thunk.
> 'thunk-inline' converts function return to inlined call and return thunk.  'thunk-extern' converts function return to external call and return
> thunk provided in a separate object file.  You can control this behavior
> for a specific function by using the function attribute function_return.
> 
> Function return thunk is the same as memory thunk for -mindirect-branch=
> where the return address is at the top of the stack:
> 
> __x86_return_thunk:
> 	call L2
> L1:
> 	pause
> 	jmp L1
> L2:
> 	lea 8(%rsp), %rsp|lea 4(%esp), %esp
> 	ret
> 
> and function return becomes
> 
> 	jmp __x86_return_thunk
> 
> -mindirect-branch= tests are updated with -mfunction-return=keep to
> avoid false test failures when -mfunction-return=thunk is added to
> RUNTESTFLAGS for "make check".
> 
> gcc/
> 
> 	* config/i386/i386-protos.h (ix86_output_function_return): New.
> 	* config/i386/i386.c (ix86_set_indirect_branch_type): Also
> 	set function_return_type.
> 	(indirect_thunk_name): Add ret_p to indicate thunk for function
> 	return.
> 	(output_indirect_thunk_function): Pass false to
> 	indirect_thunk_name.
> 	(ix86_output_indirect_branch): Likewise.
> 	(output_indirect_thunk_function): Create alias for function
> 	return thunk if regno < 0.
> 	(ix86_output_function_return): New function.
> 	(ix86_handle_fndecl_attribute): Handle function_return.
> 	(ix86_attribute_table): Add function_return.
> 	* config/i386/i386.h (machine_function): Add
> 	function_return_type.
> 	* config/i386/i386.md (simple_return_internal): Use
> 	ix86_output_function_return.
> 	(simple_return_internal_long): Likewise.
> 	* config/i386/i386.opt (mfunction-return=): New option.
> 	(indirect_branch): Mention -mfunction-return=.
> 	* doc/extend.texi: Document function_return function attribute.
> 	* doc/invoke.texi: Document -mfunction-return= option.
> 
> gcc/testsuite/
> 
> 	* gcc.target/i386/indirect-thunk-1.c (dg-options): Add
> 	-mfunction-return=keep.
> 	* gcc.target/i386/indirect-thunk-2.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-3.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-4.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-5.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-6.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-7.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-attr-1.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-attr-2.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-attr-3.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-attr-4.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-attr-5.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-attr-6.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-attr-7.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-attr-8.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-bnd-1.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-bnd-2.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-bnd-3.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-bnd-4.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-extern-1.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-extern-2.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-extern-3.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-extern-4.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-extern-5.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-extern-6.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-extern-7.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-inline-1.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-inline-2.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-inline-3.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-inline-4.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-inline-5.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-inline-6.c: Likewise.
> 	* gcc.target/i386/indirect-thunk-inline-7.c: Likewise.
> 	* gcc.target/i386/ret-thunk-1.c: New test.
> 	* gcc.target/i386/ret-thunk-10.c: Likewise.
> 	* gcc.target/i386/ret-thunk-11.c: Likewise.
> 	* gcc.target/i386/ret-thunk-12.c: Likewise.
> 	* gcc.target/i386/ret-thunk-13.c: Likewise.
> 	* gcc.target/i386/ret-thunk-14.c: Likewise.
> 	* gcc.target/i386/ret-thunk-15.c: Likewise.
> 	* gcc.target/i386/ret-thunk-16.c: Likewise.
> 	* gcc.target/i386/ret-thunk-2.c: Likewise.
> 	* gcc.target/i386/ret-thunk-3.c: Likewise.
> 	* gcc.target/i386/ret-thunk-4.c: Likewise.
> 	* gcc.target/i386/ret-thunk-5.c: Likewise.
> 	* gcc.target/i386/ret-thunk-6.c: Likewise.
> 	* gcc.target/i386/ret-thunk-7.c: Likewise.
> 	* gcc.target/i386/ret-thunk-8.c: Likewise.
> 	* gcc.target/i386/ret-thunk-9.c: Likewise.
> 
> @@ -10846,6 +10888,35 @@ output_indirect_thunk_function (bool need_bnd_p, int regno)
>  	ASM_OUTPUT_LABEL (asm_out_file, name);
>        }
>  
> +  if (regno < 0)
> +    {
> +      /* Create alias for __x86.return_thunk/__x86.return_thunk_bnd.  */
> +      char alias[32];
> +
> +      indirect_thunk_name (alias, regno, need_bnd_p, true);
> +      ASM_OUTPUT_DEF (asm_out_file, alias, name);
> +#if TARGET_MACHO
> +      if (TARGET_MACHO)
> +	{
> +	  fputs ("\t.weak_definition\t", asm_out_file);
> +	  assemble_name (asm_out_file, alias);
> +	  fputs ("\n\t.private_extern\t", asm_out_file);
> +	  assemble_name (asm_out_file, alias);
> +	  putc ('\n', asm_out_file);
> +	}
> +#else
> +      if (USE_HIDDEN_LINKONCE)
> +	{
> +	  fputs ("\t.globl\t", asm_out_file);
> +	  assemble_name (asm_out_file, alias);
> +	  putc ('\n', asm_out_file);
> +	  fputs ("\t.hidden\t", asm_out_file);
> +	  assemble_name (asm_out_file, alias);
> +	  putc ('\n', asm_out_file);
> +	}
> +#endif

Again, I would hope that using generic function output machinery will avoid 
dependency on used assembler here.

> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index 59e5cc8e7e4..7b17773592b 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -1026,9 +1026,13 @@ mindirect-branch=
>  Target Report RejectNegative Joined Enum(indirect_branch) Var(ix86_indirect_branch) Init(indirect_branch_keep)
>  Convert indirect call and jump to call and return thunks.
>  
> +mfunction-return=
> +Target Report RejectNegative Joined Enum(indirect_branch) Var(ix86_function_return) Init(indirect_branch_keep)
> +Convert function return to call and return thunk.

I suppose you also want ix86_can_inline_p to check that caller&callee agree about thunk usage
for calls/indirect jumps. For returns we do not care as optimized out return is safe?

>  
> +@item function_return("@var{choice}")
> +@cindex @code{function_return} function attribute, x86
> +On x86 targets, the @code{function_return} attribute causes the compiler
> +to convert function return with @var{choice}.  @samp{keep} keeps function
> +return unmodified.  @samp{thunk} converts function return to call and
> +return thunk.  @samp{thunk-inline} converts function return to inlined
> +call and return thunk.  @samp{thunk-extern} converts function return to
> +external call and return thunk provided in a separate object file.

Again please extend both documentation hunks so it is clear what is purpose
of this hack.

Honza
H.J. Lu Jan. 13, 2018, 4:09 p.m. UTC | #2
On Fri, Jan 12, 2018 at 9:55 AM, Jan Hubicka <hubicka@ucw.cz> wrote:
>> Add -mfunction-return= option to convert function return to call and
>> return thunks.  The default is 'keep', which keeps function return
>> unmodified.  'thunk' converts function return to call and return thunk.
>> 'thunk-inline' converts function return to inlined call and return thunk.  'thunk-extern' converts function return to external call and return
>> thunk provided in a separate object file.  You can control this behavior
>> for a specific function by using the function attribute function_return.
>>
>> Function return thunk is the same as memory thunk for -mindirect-branch=
>> where the return address is at the top of the stack:
>>
>> __x86_return_thunk:
>>       call L2
>> L1:
>>       pause
>>       jmp L1
>> L2:
>>       lea 8(%rsp), %rsp|lea 4(%esp), %esp
>>       ret
>>
>> and function return becomes
>>
>>       jmp __x86_return_thunk
>>
>> -mindirect-branch= tests are updated with -mfunction-return=keep to
>> avoid false test failures when -mfunction-return=thunk is added to
>> RUNTESTFLAGS for "make check".
>>
>> gcc/
>>
>>       * config/i386/i386-protos.h (ix86_output_function_return): New.
>>       * config/i386/i386.c (ix86_set_indirect_branch_type): Also
>>       set function_return_type.
>>       (indirect_thunk_name): Add ret_p to indicate thunk for function
>>       return.
>>       (output_indirect_thunk_function): Pass false to
>>       indirect_thunk_name.
>>       (ix86_output_indirect_branch): Likewise.
>>       (output_indirect_thunk_function): Create alias for function
>>       return thunk if regno < 0.
>>       (ix86_output_function_return): New function.
>>       (ix86_handle_fndecl_attribute): Handle function_return.
>>       (ix86_attribute_table): Add function_return.
>>       * config/i386/i386.h (machine_function): Add
>>       function_return_type.
>>       * config/i386/i386.md (simple_return_internal): Use
>>       ix86_output_function_return.
>>       (simple_return_internal_long): Likewise.
>>       * config/i386/i386.opt (mfunction-return=): New option.
>>       (indirect_branch): Mention -mfunction-return=.
>>       * doc/extend.texi: Document function_return function attribute.
>>       * doc/invoke.texi: Document -mfunction-return= option.
>>
>> gcc/testsuite/
>>
>>       * gcc.target/i386/indirect-thunk-1.c (dg-options): Add
>>       -mfunction-return=keep.
>>       * gcc.target/i386/indirect-thunk-2.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-3.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-4.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-5.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-6.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-7.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-attr-1.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-attr-2.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-attr-3.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-attr-4.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-attr-5.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-attr-6.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-attr-7.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-attr-8.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-bnd-1.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-bnd-2.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-bnd-3.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-bnd-4.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-extern-1.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-extern-2.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-extern-3.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-extern-4.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-extern-5.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-extern-6.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-extern-7.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-inline-1.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-inline-2.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-inline-3.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-inline-4.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-inline-5.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-inline-6.c: Likewise.
>>       * gcc.target/i386/indirect-thunk-inline-7.c: Likewise.
>>       * gcc.target/i386/ret-thunk-1.c: New test.
>>       * gcc.target/i386/ret-thunk-10.c: Likewise.
>>       * gcc.target/i386/ret-thunk-11.c: Likewise.
>>       * gcc.target/i386/ret-thunk-12.c: Likewise.
>>       * gcc.target/i386/ret-thunk-13.c: Likewise.
>>       * gcc.target/i386/ret-thunk-14.c: Likewise.
>>       * gcc.target/i386/ret-thunk-15.c: Likewise.
>>       * gcc.target/i386/ret-thunk-16.c: Likewise.
>>       * gcc.target/i386/ret-thunk-2.c: Likewise.
>>       * gcc.target/i386/ret-thunk-3.c: Likewise.
>>       * gcc.target/i386/ret-thunk-4.c: Likewise.
>>       * gcc.target/i386/ret-thunk-5.c: Likewise.
>>       * gcc.target/i386/ret-thunk-6.c: Likewise.
>>       * gcc.target/i386/ret-thunk-7.c: Likewise.
>>       * gcc.target/i386/ret-thunk-8.c: Likewise.
>>       * gcc.target/i386/ret-thunk-9.c: Likewise.
>>
>> @@ -10846,6 +10888,35 @@ output_indirect_thunk_function (bool need_bnd_p, int regno)
>>       ASM_OUTPUT_LABEL (asm_out_file, name);
>>        }
>>
>> +  if (regno < 0)
>> +    {
>> +      /* Create alias for __x86.return_thunk/__x86.return_thunk_bnd.  */
>> +      char alias[32];
>> +
>> +      indirect_thunk_name (alias, regno, need_bnd_p, true);
>> +      ASM_OUTPUT_DEF (asm_out_file, alias, name);
>> +#if TARGET_MACHO
>> +      if (TARGET_MACHO)
>> +     {
>> +       fputs ("\t.weak_definition\t", asm_out_file);
>> +       assemble_name (asm_out_file, alias);
>> +       fputs ("\n\t.private_extern\t", asm_out_file);
>> +       assemble_name (asm_out_file, alias);
>> +       putc ('\n', asm_out_file);
>> +     }
>> +#else
>> +      if (USE_HIDDEN_LINKONCE)
>> +     {
>> +       fputs ("\t.globl\t", asm_out_file);
>> +       assemble_name (asm_out_file, alias);
>> +       putc ('\n', asm_out_file);
>> +       fputs ("\t.hidden\t", asm_out_file);
>> +       assemble_name (asm_out_file, alias);
>> +       putc ('\n', asm_out_file);
>> +     }
>> +#endif
>
> Again, I would hope that using generic function output machinery will avoid
> dependency on used assembler here.

I copied it from PIC thunk.  We can clean them up for GCC 9.

>> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
>> index 59e5cc8e7e4..7b17773592b 100644
>> --- a/gcc/config/i386/i386.opt
>> +++ b/gcc/config/i386/i386.opt
>> @@ -1026,9 +1026,13 @@ mindirect-branch=
>>  Target Report RejectNegative Joined Enum(indirect_branch) Var(ix86_indirect_branch) Init(indirect_branch_keep)
>>  Convert indirect call and jump to call and return thunks.
>>
>> +mfunction-return=
>> +Target Report RejectNegative Joined Enum(indirect_branch) Var(ix86_function_return) Init(indirect_branch_keep)
>> +Convert function return to call and return thunk.
>
> I suppose you also want ix86_can_inline_p to check that caller&callee agree about thunk usage
> for calls/indirect jumps. For returns we do not care as optimized out return is safe?

Thunk usage should be opaque to the rest of compiler.

>>
>> +@item function_return("@var{choice}")
>> +@cindex @code{function_return} function attribute, x86
>> +On x86 targets, the @code{function_return} attribute causes the compiler
>> +to convert function return with @var{choice}.  @samp{keep} keeps function
>> +return unmodified.  @samp{thunk} converts function return to call and
>> +return thunk.  @samp{thunk-inline} converts function return to inlined
>> +call and return thunk.  @samp{thunk-extern} converts function return to
>> +external call and return thunk provided in a separate object file.
>
> Again please extend both documentation hunks so it is clear what is purpose
> of this hack.

David, can you help here?
Woodhouse, David Jan. 13, 2018, 4:51 p.m. UTC | #3
On Sat, 2018-01-13 at 08:09 -0800, H.J. Lu wrote:
> 
> > Again please extend both documentation hunks so it is clear what is purpose
> > of this hack.
> 
> David, can you help here?

On most older CPUs the indirect branch issue is limited to actual
indirect branches.

On Skylake-era CPUs, however, an underflow of the RSB (return stack
buffer) caused by a call/ret imbalance (such as on context switch) will
cause predictions to come from the same problematic branch predictor —
essentially, allowing 'ret' instructions to be targeted by an attacker
in precisely the same way as indirect branches.

Note that there are plenty of other causes for RSB underflow. Like
taking an SMI, which clears the RSB completely. Or various other
things. Including a call stack deeper than 16 function calls.

The -mfunction-return option was an experiment to use the retpoline
approach for 'ret' too. I forget the implementation (I could look
upthread), but essentially it was equivalent to replacing ret with
'pop %r12; jmp __x86_indirect_thunk_r12' so that you *never* deplete
the RSB because of the 'call;ret' trick in the retpoline itself. Hence
your exposure on Skylake was reduced to the possibility of taking an
SMI while *in* the retpoline.

This would, of course, be forcing a mispredict/pipeline stall on every
'ret', rather than only on every indirect branch as in the original
retpoline idea. HJ added the code, but I'm not sure anyone at Intel
ever did actually do the *testing* to establish the performance
characteristics. Dave/Arjan?

For my part, right *now* the kernel doesn't use this option. But then,
we don't have a comprehensive answer for Skylake yet other than "use
the new microcode features". Which are slower than retpoline, but not
as *much* slower on Skylake as they are on other CPUs.
H.J. Lu Jan. 13, 2018, 5:25 p.m. UTC | #4
On Sat, Jan 13, 2018 at 8:51 AM, Woodhouse, David <dwmw@amazon.co.uk> wrote:
> On Sat, 2018-01-13 at 08:09 -0800, H.J. Lu wrote:
>>
>> > Again please extend both documentation hunks so it is clear what is purpose
>> > of this hack.
>>
>> David, can you help here?
>
> On most older CPUs the indirect branch issue is limited to actual
> indirect branches.
>
> On Skylake-era CPUs, however, an underflow of the RSB (return stack
> buffer) caused by a call/ret imbalance (such as on context switch) will
> cause predictions to come from the same problematic branch predictor —
> essentially, allowing 'ret' instructions to be targeted by an attacker
> in precisely the same way as indirect branches.
>
> Note that there are plenty of other causes for RSB underflow. Like
> taking an SMI, which clears the RSB completely. Or various other
> things. Including a call stack deeper than 16 function calls.
>
> The -mfunction-return option was an experiment to use the retpoline
> approach for 'ret' too. I forget the implementation (I could look
> upthread), but essentially it was equivalent to replacing ret with
> 'pop %r12; jmp __x86_indirect_thunk_r12' so that you *never* deplete
> the RSB because of the 'call;ret' trick in the retpoline itself. Hence
> your exposure on Skylake was reduced to the possibility of taking an
> SMI while *in* the retpoline.

RCX/ECX is a scratch register for both 32-bit and 64-bit.  Is it OK
to use it for "ret":

pop %rcx
jmp __x86_indirect_thunk_rcx

> This would, of course, be forcing a mispredict/pipeline stall on every
> 'ret', rather than only on every indirect branch as in the original
> retpoline idea. HJ added the code, but I'm not sure anyone at Intel
> ever did actually do the *testing* to establish the performance
> characteristics. Dave/Arjan?
>
> For my part, right *now* the kernel doesn't use this option. But then,
> we don't have a comprehensive answer for Skylake yet other than "use
> the new microcode features". Which are slower than retpoline, but not
> as *much* slower on Skylake as they are on other CPUs.
>
> Amazon Web Services UK Limited. Registered in England and Wales with registration number 08650665 and which has its registered office at 60 Holborn Viaduct, London EC1A 2FD, United Kingdom.
Jan Hubicka Jan. 13, 2018, 5:31 p.m. UTC | #5
> On Sat, Jan 13, 2018 at 8:51 AM, Woodhouse, David <dwmw@amazon.co.uk> wrote:
> > On Sat, 2018-01-13 at 08:09 -0800, H.J. Lu wrote:
> >>
> >> > Again please extend both documentation hunks so it is clear what is purpose
> >> > of this hack.
> >>
> >> David, can you help here?
> >
> > On most older CPUs the indirect branch issue is limited to actual
> > indirect branches.
> >
> > On Skylake-era CPUs, however, an underflow of the RSB (return stack
> > buffer) caused by a call/ret imbalance (such as on context switch) will
> > cause predictions to come from the same problematic branch predictor —
> > essentially, allowing 'ret' instructions to be targeted by an attacker
> > in precisely the same way as indirect branches.
> >
> > Note that there are plenty of other causes for RSB underflow. Like
> > taking an SMI, which clears the RSB completely. Or various other
> > things. Including a call stack deeper than 16 function calls.
> >
> > The -mfunction-return option was an experiment to use the retpoline
> > approach for 'ret' too. I forget the implementation (I could look
> > upthread), but essentially it was equivalent to replacing ret with
> > 'pop %r12; jmp __x86_indirect_thunk_r12' so that you *never* deplete
> > the RSB because of the 'call;ret' trick in the retpoline itself. Hence
> > your exposure on Skylake was reduced to the possibility of taking an
> > SMI while *in* the retpoline.
> 
> RCX/ECX is a scratch register for both 32-bit and 64-bit.  Is it OK
> to use it for "ret":
> 
> pop %rcx
> jmp __x86_indirect_thunk_rcx

Is it also safe for local functions and IPA-ra?
Also what will your patchset do with large code model? Perhaps we want
to sorry there that it is not suported.

Honza
> 
> > This would, of course, be forcing a mispredict/pipeline stall on every
> > 'ret', rather than only on every indirect branch as in the original
> > retpoline idea. HJ added the code, but I'm not sure anyone at Intel
> > ever did actually do the *testing* to establish the performance
> > characteristics. Dave/Arjan?
> >
> > For my part, right *now* the kernel doesn't use this option. But then,
> > we don't have a comprehensive answer for Skylake yet other than "use
> > the new microcode features". Which are slower than retpoline, but not
> > as *much* slower on Skylake as they are on other CPUs.
> >
> > Amazon Web Services UK Limited. Registered in England and Wales with registration number 08650665 and which has its registered office at 60 Holborn Viaduct, London EC1A 2FD, United Kingdom.
> 
> 
> 
> -- 
> H.J.
H.J. Lu Jan. 13, 2018, 7:09 p.m. UTC | #6
On Sat, Jan 13, 2018 at 9:31 AM, Jan Hubicka <hubicka@ucw.cz> wrote:
>> On Sat, Jan 13, 2018 at 8:51 AM, Woodhouse, David <dwmw@amazon.co.uk> wrote:
>> > On Sat, 2018-01-13 at 08:09 -0800, H.J. Lu wrote:
>> >>
>> >> > Again please extend both documentation hunks so it is clear what is purpose
>> >> > of this hack.
>> >>
>> >> David, can you help here?
>> >
>> > On most older CPUs the indirect branch issue is limited to actual
>> > indirect branches.
>> >
>> > On Skylake-era CPUs, however, an underflow of the RSB (return stack
>> > buffer) caused by a call/ret imbalance (such as on context switch) will
>> > cause predictions to come from the same problematic branch predictor —
>> > essentially, allowing 'ret' instructions to be targeted by an attacker
>> > in precisely the same way as indirect branches.
>> >
>> > Note that there are plenty of other causes for RSB underflow. Like
>> > taking an SMI, which clears the RSB completely. Or various other
>> > things. Including a call stack deeper than 16 function calls.
>> >
>> > The -mfunction-return option was an experiment to use the retpoline
>> > approach for 'ret' too. I forget the implementation (I could look
>> > upthread), but essentially it was equivalent to replacing ret with
>> > 'pop %r12; jmp __x86_indirect_thunk_r12' so that you *never* deplete
>> > the RSB because of the 'call;ret' trick in the retpoline itself. Hence
>> > your exposure on Skylake was reduced to the possibility of taking an
>> > SMI while *in* the retpoline.
>>
>> RCX/ECX is a scratch register for both 32-bit and 64-bit.  Is it OK
>> to use it for "ret":
>>
>> pop %rcx
>> jmp __x86_indirect_thunk_rcx

There is no need for that since the return address is already on top
of stack.  We can just do

   __x86_return_thunk:
            call L2
    L1:
            pause
            jmp L1
    L2:
            lea 8(%rsp), %rsp|lea 4(%esp), %esp
            ret

ret becomes: jmp __x86_return_thunk as my current patch does.

> Is it also safe for local functions and IPA-ra?

Yes.

> Also what will your patchset do with large code model? Perhaps we want
> to sorry there that it is not suported.

True.  I will prepare a separate patch for it.

> Honza
>>
>> > This would, of course, be forcing a mispredict/pipeline stall on every
>> > 'ret', rather than only on every indirect branch as in the original
>> > retpoline idea. HJ added the code, but I'm not sure anyone at Intel
>> > ever did actually do the *testing* to establish the performance
>> > characteristics. Dave/Arjan?
>> >
>> > For my part, right *now* the kernel doesn't use this option. But then,
>> > we don't have a comprehensive answer for Skylake yet other than "use
>> > the new microcode features". Which are slower than retpoline, but not
>> > as *much* slower on Skylake as they are on other CPUs.
>> >
>> > Amazon Web Services UK Limited. Registered in England and Wales with registration number 08650665 and which has its registered office at 60 Holborn Viaduct, London EC1A 2FD, United Kingdom.
>>
>>
>>
>> --
>> H.J.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index bf11cc426f9..fb86f00b3a6 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -306,6 +306,7 @@  extern enum attr_cpu ix86_schedule;
 
 extern const char * ix86_output_call_insn (rtx_insn *insn, rtx call_op);
 extern const char * ix86_output_indirect_jmp (rtx call_op, bool ret_p);
+extern const char * ix86_output_function_return (bool long_p);
 extern bool ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
 						machine_mode mode);
 extern int ix86_min_insn_size (rtx_insn *);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index cead7a61a91..9ffcb69d6d7 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5833,6 +5833,31 @@  ix86_set_indirect_branch_type (tree fndecl)
       else
 	cfun->machine->indirect_branch_type = ix86_indirect_branch;
     }
+
+  if (cfun->machine->function_return_type == indirect_branch_unset)
+    {
+      tree attr = lookup_attribute ("function_return",
+				    DECL_ATTRIBUTES (fndecl));
+      if (attr != NULL)
+	{
+	  tree args = TREE_VALUE (attr);
+	  if (args == NULL)
+	    gcc_unreachable ();
+	  tree cst = TREE_VALUE (args);
+	  if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0)
+	    cfun->machine->function_return_type = indirect_branch_keep;
+	  else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0)
+	    cfun->machine->function_return_type = indirect_branch_thunk;
+	  else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0)
+	    cfun->machine->function_return_type = indirect_branch_thunk_inline;
+	  else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0)
+	    cfun->machine->function_return_type = indirect_branch_thunk_extern;
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	cfun->machine->function_return_type = ix86_function_return;
+    }
 }
 
 /* Establish appropriate back-end context for processing the function
@@ -10695,8 +10720,12 @@  static int indirect_thunks_bnd_used;
 /* Fills in the label name that should be used for the indirect thunk.  */
 
 static void
-indirect_thunk_name (char name[32], int regno, bool need_bnd_p)
+indirect_thunk_name (char name[32], int regno, bool need_bnd_p,
+		     bool ret_p)
 {
+  if (regno >= 0 && ret_p)
+    gcc_unreachable ();
+
   if (USE_HIDDEN_LINKONCE)
     {
       const char *bnd = need_bnd_p ? "_bnd" : "";
@@ -10711,7 +10740,10 @@  indirect_thunk_name (char name[32], int regno, bool need_bnd_p)
 		   bnd, reg_prefix, reg_names[regno]);
 	}
       else
-	sprintf (name, "__x86_indirect_thunk%s", bnd);
+	{
+	  const char *ret = ret_p ? "return" : "indirect";
+	  sprintf (name, "__x86_%s_thunk%s", ret, bnd);
+	}
     }
   else
     {
@@ -10724,10 +10756,20 @@  indirect_thunk_name (char name[32], int regno, bool need_bnd_p)
 	}
       else
 	{
-	  if (need_bnd_p)
-	    ASM_GENERATE_INTERNAL_LABEL (name, "LITB", 0);
+	  if (ret_p)
+	    {
+	      if (need_bnd_p)
+		ASM_GENERATE_INTERNAL_LABEL (name, "LRTB", 0);
+	      else
+		ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
+	    }
 	  else
-	    ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
+	    {
+	      if (need_bnd_p)
+		ASM_GENERATE_INTERNAL_LABEL (name, "LITB", 0);
+	      else
+		ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
+	    }
 	}
     }
 }
@@ -10803,7 +10845,7 @@  output_indirect_thunk_function (bool need_bnd_p, int regno)
   tree decl;
 
   /* Create __x86_indirect_thunk/__x86_indirect_thunk_bnd.  */
-  indirect_thunk_name (name, regno, need_bnd_p);
+  indirect_thunk_name (name, regno, need_bnd_p, false);
   decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
 		     get_identifier (name),
 		     build_function_type_list (void_type_node, NULL_TREE));
@@ -10846,6 +10888,35 @@  output_indirect_thunk_function (bool need_bnd_p, int regno)
 	ASM_OUTPUT_LABEL (asm_out_file, name);
       }
 
+  if (regno < 0)
+    {
+      /* Create alias for __x86.return_thunk/__x86.return_thunk_bnd.  */
+      char alias[32];
+
+      indirect_thunk_name (alias, regno, need_bnd_p, true);
+      ASM_OUTPUT_DEF (asm_out_file, alias, name);
+#if TARGET_MACHO
+      if (TARGET_MACHO)
+	{
+	  fputs ("\t.weak_definition\t", asm_out_file);
+	  assemble_name (asm_out_file, alias);
+	  fputs ("\n\t.private_extern\t", asm_out_file);
+	  assemble_name (asm_out_file, alias);
+	  putc ('\n', asm_out_file);
+	}
+#else
+      if (USE_HIDDEN_LINKONCE)
+	{
+	  fputs ("\t.globl\t", asm_out_file);
+	  assemble_name (asm_out_file, alias);
+	  putc ('\n', asm_out_file);
+	  fputs ("\t.hidden\t", asm_out_file);
+	  assemble_name (asm_out_file, alias);
+	  putc ('\n', asm_out_file);
+	}
+#endif
+    }
+
   DECL_INITIAL (decl) = make_node (BLOCK);
   current_function_decl = decl;
   allocate_struct_function (decl, false);
@@ -28440,7 +28511,7 @@  ix86_output_indirect_branch (rtx call_op, const char *xasm,
 		indirect_thunk_needed = true;
 	    }
 	}
-      indirect_thunk_name (thunk_name_buf, regno, need_bnd_p);
+      indirect_thunk_name (thunk_name_buf, regno, need_bnd_p, false);
       thunk_name = thunk_name_buf;
     }
   else
@@ -28572,6 +28643,46 @@  ix86_output_indirect_jmp (rtx call_op, bool ret_p)
     return "%!jmp\t%A0";
 }
 
+/* Output function return.  CALL_OP is the jump target.  Add a REP
+   prefix to RET if LONG_P is true and function return is kept.  */
+
+const char *
+ix86_output_function_return (bool long_p)
+{
+  if (cfun->machine->function_return_type != indirect_branch_keep)
+    {
+      char thunk_name[32];
+      bool need_bnd_p = ix86_bnd_prefixed_insn_p (current_output_insn);
+
+      if (cfun->machine->function_return_type
+	  != indirect_branch_thunk_inline)
+	{
+	  bool need_thunk = (cfun->machine->function_return_type
+			     == indirect_branch_thunk);
+	  indirect_thunk_name (thunk_name, -1, need_bnd_p, true);
+	  if (need_bnd_p)
+	    {
+	      indirect_thunk_bnd_needed |= need_thunk;
+	      fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
+	    }
+	  else
+	    {
+	      indirect_thunk_needed |= need_thunk;
+	      fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
+	    }
+	}
+      else
+	output_indirect_thunk (need_bnd_p, -1);
+
+      return "";
+    }
+
+  if (!long_p || ix86_bnd_prefixed_insn_p (current_output_insn))
+    return "%!ret";
+
+  return "rep%; ret";
+}
+
 /* Output the assembly for a call instruction.  */
 
 const char *
@@ -40882,6 +40993,28 @@  ix86_handle_fndecl_attribute (tree *node, tree name, tree args, int,
 	}
     }
 
+  if (is_attribute_p ("function_return", name))
+    {
+      tree cst = TREE_VALUE (args);
+      if (TREE_CODE (cst) != STRING_CST)
+	{
+	  warning (OPT_Wattributes,
+		   "%qE attribute requires a string constant argument",
+		   name);
+	  *no_add_attrs = true;
+	}
+      else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
+	       && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
+	       && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
+	       && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
+	{
+	  warning (OPT_Wattributes,
+		   "argument to %qE attribute is not "
+		   "(keep|thunk|thunk-inline|thunk-extern)", name);
+	  *no_add_attrs = true;
+	}
+    }
+
   return NULL_TREE;
 }
 
@@ -45330,6 +45463,8 @@  static const struct attribute_spec ix86_attribute_table[] =
     ix86_handle_fndecl_attribute, NULL },
   { "indirect_branch", 1, 1, true, false, false, false,
     ix86_handle_fndecl_attribute, NULL },
+  { "function_return", 1, 1, true, false, false, false,
+    ix86_handle_fndecl_attribute, NULL },
 
   /* End element.  */
   { NULL, 0, 0, false, false, false, false, NULL, NULL }
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 3b939086112..bc4bc9a7a48 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2577,6 +2577,9 @@  struct GTY(()) machine_function {
      "indirect_jump" or "tablejump".  */
   BOOL_BITFIELD has_local_indirect_jump : 1;
 
+  /* How to generate function return.  */
+  ENUM_BITFIELD(indirect_branch) function_return_type : 3;
+
   /* If true, the current function is a function specified with
      the "interrupt" or "no_caller_saved_registers" attribute.  */
   BOOL_BITFIELD no_caller_saved_registers : 1;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a7573c468ae..6c832a867c8 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -13050,7 +13050,7 @@ 
 (define_insn "simple_return_internal"
   [(simple_return)]
   "reload_completed"
-  "%!ret"
+  "* return ix86_output_function_return (false);"
   [(set_attr "length" "1")
    (set_attr "atom_unit" "jeu")
    (set_attr "length_immediate" "0")
@@ -13072,12 +13072,7 @@ 
   [(simple_return)
    (unspec [(const_int 0)] UNSPEC_REP)]
   "reload_completed"
-{
-  if (ix86_bnd_prefixed_insn_p (insn))
-    return "%!ret";
-
-  return "rep%; ret";
-}
+  "* return ix86_output_function_return (true);"
   [(set_attr "length" "2")
    (set_attr "atom_unit" "jeu")
    (set_attr "length_immediate" "0")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 59e5cc8e7e4..7b17773592b 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1026,9 +1026,13 @@  mindirect-branch=
 Target Report RejectNegative Joined Enum(indirect_branch) Var(ix86_indirect_branch) Init(indirect_branch_keep)
 Convert indirect call and jump to call and return thunks.
 
+mfunction-return=
+Target Report RejectNegative Joined Enum(indirect_branch) Var(ix86_function_return) Init(indirect_branch_keep)
+Convert function return to call and return thunk.
+
 Enum
 Name(indirect_branch) Type(enum indirect_branch)
-Known indirect branch choices (for use with the -mindirect-branch= option):
+Known indirect branch choices (for use with the -mindirect-branch=/-mfunction-return= options):
 
 EnumValue
 Enum(indirect_branch) String(keep) Value(indirect_branch_keep)
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index ddb6035be96..f120b2a1429 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -5764,6 +5764,15 @@  indirect call and jump to inlined call and return thunk.
 @samp{thunk-extern} converts indirect call and jump to external call
 and return thunk provided in a separate object file.
 
+@item function_return("@var{choice}")
+@cindex @code{function_return} function attribute, x86
+On x86 targets, the @code{function_return} attribute causes the compiler
+to convert function return with @var{choice}.  @samp{keep} keeps function
+return unmodified.  @samp{thunk} converts function return to call and
+return thunk.  @samp{thunk-inline} converts function return to inlined
+call and return thunk.  @samp{thunk-extern} converts function return to
+external call and return thunk provided in a separate object file.
+
 @item nocf_check
 @cindex @code{nocf_check} function attribute
 The @code{nocf_check} attribute on a function is used to inform the
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 0d685c3576b..df945989fe8 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1230,7 +1230,7 @@  See RS/6000 and PowerPC Options.
 -mstack-protector-guard-offset=@var{offset} @gol
 -mstack-protector-guard-symbol=@var{symbol} -mmitigate-rop @gol
 -mgeneral-regs-only -mcall-ms2sysv-xlogues @gol
--mindirect-branch=@var{choice}}
+-mindirect-branch=@var{choice} -mfunction-return==@var{choice}}
 
 @emph{x86 Windows Options}
 @gccoptlist{-mconsole  -mcygwin  -mno-cygwin  -mdll @gol
@@ -26850,6 +26850,17 @@  to external call and return thunk provided in a separate object file.
 You can control this behavior for a specific function by using the
 function attribute @code{indirect_branch}.  @xref{Function Attributes}.
 
+@item -mfunction-return=@var{choice}
+@opindex -mfunction-return
+Convert function return with @var{choice}.  The default is @samp{keep},
+which keeps function return unmodified.  @samp{thunk} converts function
+return to call and return thunk.  @samp{thunk-inline} converts function
+return to inlined call and return thunk.  @samp{thunk-extern} converts
+function return to external call and return thunk provided in a separate
+object file.  You can control this behavior for a specific function by
+using the function attribute @code{function_return}.
+@xref{Function Attributes}.
+
 @end table
 
 These @samp{-m} switches are supported in addition to the above
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c
index d1d2ee78797..527e447aea5 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c
index 08646c6b823..7dbc7607e2e 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c
index af244de2238..c085b21582c 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c
index b8aedd5a4e6..f92968bf616 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-5.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-5.c
index 6ffb9235f94..4d19fac21d8 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-5.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-5.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target *-*-linux* } } */
-/* { dg-options "-O2 -fpic -fno-plt -mindirect-branch=thunk" } */
+/* { dg-options "-O2 -mfunction-return=keep -fpic -fno-plt -mindirect-branch=thunk" } */
 
 extern void bar (void);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-6.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-6.c
index e6d9d148cd2..5cbdd85303e 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-6.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-6.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target *-*-linux* } } */
-/* { dg-options "-O2 -fpic -fno-plt -mindirect-branch=thunk" } */
+/* { dg-options "-O2 -mfunction-return=keep -fpic -fno-plt -mindirect-branch=thunk" } */
 
 extern void bar (void);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c
index d892d8f5992..c59dd049883 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */
 
 void func0 (void);
 void func1 (void);
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c
index 24188d0b62d..61b9c80de33 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c
index 03184b90cda..dcd2381c514 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c
index af167840b81..21b69728796 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c
index 146124894a0..0bd6aab2fd6 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c
index 568327cd8e7..99226dbdd1f 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c
index bd8a99e7828..aceb4041275 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c
index 356015c9799..43d19bbe876 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -fno-pic" } */
 
 void func0 (void);
 void func1 (void);
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-8.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-8.c
index 6960fa0bbfb..c246f974610 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-8.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-8.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */
 
 void func0 (void);
 void func1 (void);
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-1.c
index febf32d76ea..bbfaf6ba7e7 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-1.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-1.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target { ! x32 } } } */
-/* { dg-options "-O2 -mindirect-branch=thunk -fcheck-pointer-bounds -mmpx -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fcheck-pointer-bounds -mmpx -fno-pic" } */
 
 void (*dispatch) (char *);
 char buf[10];
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-2.c
index 319ba30b78b..6c82a236c1b 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-2.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-2.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target { ! x32 } } } */
-/* { dg-options "-O2 -mindirect-branch=thunk -fcheck-pointer-bounds -mmpx -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fcheck-pointer-bounds -mmpx -fno-pic" } */
 
 void (*dispatch) (char *);
 char buf[10];
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-3.c
index 9168b3146f5..299940de399 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-3.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-3.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target { *-*-linux* && { ! x32 } } } } */
-/* { dg-options "-O2 -mindirect-branch=thunk -fcheck-pointer-bounds -mmpx -fpic -fno-plt" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fcheck-pointer-bounds -mmpx -fpic -fno-plt" } */
 
 void bar (char *);
 char buf[10];
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-4.c
index d3b36d44c7c..77ee84b938b 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-4.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-4.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target { *-*-linux* && { ! x32 } } } } */
-/* { dg-options "-O2 -mindirect-branch=thunk -fcheck-pointer-bounds -mmpx -fpic -fno-plt" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fcheck-pointer-bounds -mmpx -fpic -fno-plt" } */
 
 void bar (char *);
 char buf[10];
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c
index 9e50b282f77..782960375af 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c
index f897d1c0497..240c15be8a6 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c
index 25905cd0016..6e49707875e 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c
index a7fa12183af..e1d8891380c 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-5.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-5.c
index 48a49760be6..6ad05b70604 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-5.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-5.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target *-*-linux* } } */
-/* { dg-options "-O2 -fpic -fno-plt -mindirect-branch=thunk-extern" } */
+/* { dg-options "-O2 -mfunction-return=keep -fpic -fno-plt -mindirect-branch=thunk-extern" } */
 
 extern void bar (void);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-6.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-6.c
index a1c662f7d23..cfb0894ae49 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-6.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-6.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target *-*-linux* } } */
-/* { dg-options "-O2 -fpic -fno-plt -mindirect-branch=thunk-extern" } */
+/* { dg-options "-O2 -mfunction-return=keep -fpic -fno-plt -mindirect-branch=thunk-extern" } */
 
 extern void bar (void);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c
index 40a665ea640..205b9b405bf 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */
 
 void func0 (void);
 void func1 (void);
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c
index 3ace8d1b031..efa0096e1e0 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c
index 6c97b96f1f2..775d0b8c53e 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c
index 8f6759cbf06..788271f049f 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c
index b07d08cab0f..ef8a2c746a7 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */
 
 typedef void (*dispatch_t)(long offset);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-5.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-5.c
index 10794886b1b..848ceefca02 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-5.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-5.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target *-*-linux* } } */
-/* { dg-options "-O2 -fpic -fno-plt -mindirect-branch=thunk-inline" } */
+/* { dg-options "-O2 -mfunction-return=keep -fpic -fno-plt -mindirect-branch=thunk-inline" } */
 
 extern void bar (void);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-6.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-6.c
index a26ec4b06ed..64608100782 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-6.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-6.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target *-*-linux* } } */
-/* { dg-options "-O2 -fpic -fno-plt -mindirect-branch=thunk-inline" } */
+/* { dg-options "-O2 -mfunction-return=keep -fpic -fno-plt -mindirect-branch=thunk-inline" } */
 
 extern void bar (void);
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c
index 77253af17c6..3c2758360f5 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */
 
 void func0 (void);
 void func1 (void);
diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-1.c b/gcc/testsuite/gcc.target/i386/ret-thunk-1.c
new file mode 100644
index 00000000000..42919722e13
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ret-thunk-1.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfunction-return=thunk" } */
+
+void
+foo (void)
+{
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler {\tpause} } } */
diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-10.c b/gcc/testsuite/gcc.target/i386/ret-thunk-10.c
new file mode 100644
index 00000000000..b5164bfc5ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ret-thunk-10.c
@@ -0,0 +1,22 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfunction-return=thunk-inline -mindirect-branch=thunk -fno-pic" } */
+
+extern void (*bar) (void);
+
+int
+foo (void)
+{
+  bar ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_return_thunk" } } */
+/* { dg-final { scan-assembler-times {\tpause} 2 } } */
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } }  } } */
+/* { dg-final { scan-assembler "__x86_indirect_thunk:" { target { ! x32 } }  } } */
+/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } }  } } */
+/* { dg-final { scan-assembler "__x86_indirect_thunk_(r|e)ax:" { target { x32 } }  } } */
+/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-11.c b/gcc/testsuite/gcc.target/i386/ret-thunk-11.c
new file mode 100644
index 00000000000..a26ac963ea5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ret-thunk-11.c
@@ -0,0 +1,22 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfunction-return=thunk-extern -mindirect-branch=thunk -fno-pic" } */
+
+extern void (*bar) (void);
+
+int
+foo (void)
+{
+  bar ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */
+/* { dg-final { scan-assembler-times {\tpause} 1 } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "__x86_indirect_thunk:" { target { ! x32 } }  } } */
+/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } }  } } */
+/* { dg-final { scan-assembler "__x86_indirect_thunk_(r|e)ax:" { target { x32 } }  } } */
+/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-12.c b/gcc/testsuite/gcc.target/i386/ret-thunk-12.c
new file mode 100644
index 00000000000..d0106da38ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ret-thunk-12.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */
+
+extern void (*bar) (void);
+
+int
+foo (void)
+{
+  bar ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_return_thunk" } } */
+/* { dg-final { scan-assembler-times {\tpause} 1 } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "__x86_indirect_thunk:" { target { ! x32 } }  } } */
+/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } }  } } */
+/* { dg-final { scan-assembler "__x86_indirect_thunk_(r|e)ax:" { target { x32 } }  } } */
+/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-13.c b/gcc/testsuite/gcc.target/i386/ret-thunk-13.c
new file mode 100644
index 00000000000..185ad366190
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ret-thunk-13.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */
+
+extern void (*bar) (void);
+extern int foo (void) __attribute__ ((function_return("thunk")));
+
+int
+foo (void)
+{
+  bar ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */
+/* { dg-final { scan-assembler-times {\tpause} 2 } } */
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 3 } } */
+/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 3 } } */
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_indirect_thunk" } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } }  } } */
+/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-14.c b/gcc/testsuite/gcc.target/i386/ret-thunk-14.c
new file mode 100644
index 00000000000..cce8b20b5f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ret-thunk-14.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */
+
+extern void (*bar) (void);
+
+__attribute__ ((function_return("thunk-inline")))
+int
+foo (void)
+{
+  bar ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times {\tpause} 1 } } */
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_return_thunk" } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } }  } } */
+/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-15.c b/gcc/testsuite/gcc.target/i386/ret-thunk-15.c
new file mode 100644
index 00000000000..0316d301d9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ret-thunk-15.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfunction-return=keep -mindirect-branch=keep -fno-pic" } */
+
+extern void (*bar) (void);
+
+__attribute__ ((function_return("thunk-extern"), indirect_branch("thunk")))
+int
+foo (void)
+{
+  bar ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-times {\tpause} 1 } } */
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */
+/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-16.c b/gcc/testsuite/gcc.target/i386/ret-thunk-16.c
new file mode 100644
index 00000000000..9de35bcbd27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ret-thunk-16.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfunction-return=thunk-inline -mindirect-branch=thunk-extern -fno-pic" } */
+
+extern void (*bar) (void);
+
+__attribute__ ((function_return("keep"), indirect_branch("keep")))
+int
+foo (void)
+{
+  bar ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */
+/* { dg-final { scan-assembler-not "__x86_return_thunk" } } */
+/* { dg-final { scan-assembler-not {\tpause} } } */
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */
diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-2.c b/gcc/testsuite/gcc.target/i386/ret-thunk-2.c
new file mode 100644
index 00000000000..a35bedc425d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ret-thunk-2.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfunction-return=thunk-inline" } */
+
+void
+foo (void)
+{
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler {\tpause} } } */
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_return_thunk" } } */
diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-3.c b/gcc/testsuite/gcc.target/i386/ret-thunk-3.c
new file mode 100644
index 00000000000..b4bbf27f7c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ret-thunk-3.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfunction-return=thunk-extern" } */
+
+void
+foo (void)
+{
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */
+/* { dg-final { scan-assembler-not {\tpause} } } */
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */
diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-4.c b/gcc/testsuite/gcc.target/i386/ret-thunk-4.c
new file mode 100644
index 00000000000..55223d0a41c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ret-thunk-4.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfunction-return=keep" } */
+
+void
+foo (void)
+{
+}
+
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_return_thunk" } } */
+/* { dg-final { scan-assembler-not {\tpause} } } */
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */
diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-5.c b/gcc/testsuite/gcc.target/i386/ret-thunk-5.c
new file mode 100644
index 00000000000..036787b2e01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ret-thunk-5.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfunction-return=keep" } */
+
+extern void foo (void) __attribute__ ((function_return("thunk")));
+
+void
+foo (void)
+{
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler {\tpause} } } */
diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-6.c b/gcc/testsuite/gcc.target/i386/ret-thunk-6.c
new file mode 100644
index 00000000000..29bb20b7f18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ret-thunk-6.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfunction-return=keep" } */
+
+__attribute__ ((function_return("thunk-inline")))
+void
+foo (void)
+{
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler {\tpause} } } */
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_return_thunk" } } */
diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-7.c b/gcc/testsuite/gcc.target/i386/ret-thunk-7.c
new file mode 100644
index 00000000000..39925c6201b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ret-thunk-7.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfunction-return=keep" } */
+
+__attribute__ ((function_return("thunk-extern")))
+void
+foo (void)
+{
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */
+/* { dg-final { scan-assembler-not {\tpause} } } */
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */
diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-8.c b/gcc/testsuite/gcc.target/i386/ret-thunk-8.c
new file mode 100644
index 00000000000..1e2e1e047a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ret-thunk-8.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfunction-return=thunk-inline" } */
+
+extern void foo (void) __attribute__ ((function_return("keep")));
+
+void
+foo (void)
+{
+}
+
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*__x86_return_thunk" } } */
+/* { dg-final { scan-assembler-not {\tpause} } } */
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */
diff --git a/gcc/testsuite/gcc.target/i386/ret-thunk-9.c b/gcc/testsuite/gcc.target/i386/ret-thunk-9.c
new file mode 100644
index 00000000000..92298c362ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ret-thunk-9.c
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfunction-return=thunk -mindirect-branch=thunk -fno-pic" } */
+
+extern void (*bar) (void);
+
+int
+foo (void)
+{
+  bar ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_return_thunk" } } */
+/* { dg-final { scan-assembler-not "__x86_return_thunk:" } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "__x86_indirect_thunk:" } } */
+/* { dg-final { scan-assembler-times {\tpause} 1 { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?bar" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler-times {\tpause} 2 { target { x32 } } } } */
+/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target { x32 } } } } */
+/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */