diff mbox series

[1/5] x86: Add -mindirect-branch=

Message ID 20180107225904.11535-2-hjl.tools@gmail.com
State New
Headers show
Series x86: CVE-2017-5715, aka Spectre | expand

Commit Message

H.J. Lu Jan. 7, 2018, 10:59 p.m. UTC
Add -mindirect-branch= option to convert indirect call and jump to call
and return thunks.  The default is 'keep', which keeps indirect call and
jump unmodified.  'thunk' converts indirect call and jump to call and
return thunk.  'thunk-inline' converts indirect call and jump to inlined
call and return thunk.  'thunk-extern' converts indirect call and jump to
external call and return thunk provided in a separate object file.  You
can control this behavior for a specific function by using the function
attribute indirect_branch.

2 kinds of thunks are geneated.  Memory thunk where the function address
is at the top of the stack:

__x86_indirect_thunk:
	call L2
L1:
	lfence
	jmp L1
L2:
	lea 8(%rsp), %rsp|lea 4(%esp), %esp
	ret

Indirect jmp via memory, "jmp mem", is converted to

	push memory
	jmp __x86_indirect_thunk

Indirect call via memory, "call mem", is converted to

	jmp L2
L1:
	push [mem]
	jmp __x86_indirect_thunk
L2:
	call L1

Register thunk where the function address is in a register, reg:

__x86_indirect_thunk_reg:
	call	L2
L1:
	lfence
	jmp	L1
L2:
	movq	%reg, (%rsp)|movl    %reg, (%esp)
	ret

where reg is one of (r|e)ax, (r|e)dx, (r|e)cx, (r|e)bx, (r|e)si, (r|e)di,
(r|e)bp, r8, r9, r10, r11, r12, r13, r14 and r15.

Indirect jmp via register, "jmp reg", is converted to

	jmp __x86_indirect_thunk_reg

Indirect call via register, "call reg", is converted to

	call __x86_indirect_thunk_reg

gcc/

	* config/i386/i386-opts.h (indirect_branch): New.
	* config/i386/i386-protos.h (ix86_output_indirect_jmp): Likewise.
	* config/i386/i386.c (ix86_using_red_zone): Disallow red-zone
	with local indirect jump when converting indirect call and jump.
	(ix86_set_indirect_branch_type): New.
	(ix86_set_current_function): Call ix86_set_indirect_branch_type.
	(indirectlabelno): New.
	(indirect_thunk_needed): Likewise.
	(indirect_thunk_bnd_needed): Likewise.
	(indirect_thunks_used): Likewise.
	(indirect_thunks_bnd_used): Likewise.
	(INDIRECT_LABEL): Likewise.
	(indirect_thunk_name): Likewise.
	(output_indirect_thunk): Likewise.
	(output_indirect_thunk_function): Likewise.
	(ix86_output_indirect_branch): Likewise.
	(ix86_output_indirect_jmp): Likewise.
	(ix86_code_end): Call output_indirect_thunk_function if needed.
	(ix86_output_call_insn): Call ix86_output_indirect_branch if
	needed.
	(ix86_handle_fndecl_attribute): Handle indirect_branch.
	(ix86_attribute_table): Add indirect_branch.
	* config/i386/i386.h (machine_function): Add indirect_branch_type
	and has_local_indirect_jump.
	* config/i386/i386.md (indirect_jump): Set has_local_indirect_jump
	to true.
	(tablejump): Likewise.
	(*indirect_jump): Use ix86_output_indirect_jmp.
	(*tablejump_1): Likewise.
	(simple_return_indirect_internal): Likewise.
	* config/i386/i386.opt (mindirect-branch=): New option.
	(indirect_branch): New.
	(keep): Likewise.
	(thunk): Likewise.
	(thunk-inline): Likewise.
	(thunk-extern): Likewise.
	* doc/extend.texi: Document indirect_branch function attribute.
	* doc/invoke.texi: Document -mindirect-branch= option.

gcc/testsuite/

	* gcc.target/i386/indirect-thunk-1.c: New test.
	* gcc.target/i386/indirect-thunk-2.c: Likewise.
	* gcc.target/i386/indirect-thunk-3.c: Likewise.
	* gcc.target/i386/indirect-thunk-4.c: Likewise.
	* gcc.target/i386/indirect-thunk-5.c: Likewise.
	* gcc.target/i386/indirect-thunk-6.c: Likewise.
	* gcc.target/i386/indirect-thunk-7.c: Likewise.
	* gcc.target/i386/indirect-thunk-attr-1.c: Likewise.
	* gcc.target/i386/indirect-thunk-attr-2.c: Likewise.
	* gcc.target/i386/indirect-thunk-attr-3.c: Likewise.
	* gcc.target/i386/indirect-thunk-attr-4.c: Likewise.
	* gcc.target/i386/indirect-thunk-attr-5.c: Likewise.
	* gcc.target/i386/indirect-thunk-attr-6.c: Likewise.
	* gcc.target/i386/indirect-thunk-attr-7.c: Likewise.
	* gcc.target/i386/indirect-thunk-attr-8.c: Likewise.
	* gcc.target/i386/indirect-thunk-bnd-1.c: Likewise.
	* gcc.target/i386/indirect-thunk-bnd-2.c: Likewise.
	* gcc.target/i386/indirect-thunk-bnd-3.c: Likewise.
	* gcc.target/i386/indirect-thunk-bnd-4.c: Likewise.
	* gcc.target/i386/indirect-thunk-extern-1.c: Likewise.
	* gcc.target/i386/indirect-thunk-extern-2.c: Likewise.
	* gcc.target/i386/indirect-thunk-extern-3.c: Likewise.
	* gcc.target/i386/indirect-thunk-extern-4.c: Likewise.
	* gcc.target/i386/indirect-thunk-extern-5.c: Likewise.
	* gcc.target/i386/indirect-thunk-extern-6.c: Likewise.
	* gcc.target/i386/indirect-thunk-extern-7.c: Likewise.
	* gcc.target/i386/indirect-thunk-inline-1.c: Likewise.
	* gcc.target/i386/indirect-thunk-inline-2.c: Likewise.
	* gcc.target/i386/indirect-thunk-inline-3.c: Likewise.
	* gcc.target/i386/indirect-thunk-inline-4.c: Likewise.
	* gcc.target/i386/indirect-thunk-inline-5.c: Likewise.
	* gcc.target/i386/indirect-thunk-inline-6.c: Likewise.
	* gcc.target/i386/indirect-thunk-inline-7.c: Likewise.
---
 gcc/config/i386/i386-opts.h                        |   8 +
 gcc/config/i386/i386-protos.h                      |   1 +
 gcc/config/i386/i386.c                             | 495 ++++++++++++++++++++-
 gcc/config/i386/i386.h                             |   7 +
 gcc/config/i386/i386.md                            |   8 +-
 gcc/config/i386/i386.opt                           |  20 +
 gcc/doc/extend.texi                                |  10 +
 gcc/doc/invoke.texi                                |  14 +-
 gcc/testsuite/gcc.target/i386/indirect-thunk-1.c   |  19 +
 gcc/testsuite/gcc.target/i386/indirect-thunk-2.c   |  19 +
 gcc/testsuite/gcc.target/i386/indirect-thunk-3.c   |  20 +
 gcc/testsuite/gcc.target/i386/indirect-thunk-4.c   |  20 +
 gcc/testsuite/gcc.target/i386/indirect-thunk-5.c   |  16 +
 gcc/testsuite/gcc.target/i386/indirect-thunk-6.c   |  17 +
 gcc/testsuite/gcc.target/i386/indirect-thunk-7.c   |  43 ++
 .../gcc.target/i386/indirect-thunk-attr-1.c        |  22 +
 .../gcc.target/i386/indirect-thunk-attr-2.c        |  20 +
 .../gcc.target/i386/indirect-thunk-attr-3.c        |  21 +
 .../gcc.target/i386/indirect-thunk-attr-4.c        |  20 +
 .../gcc.target/i386/indirect-thunk-attr-5.c        |  22 +
 .../gcc.target/i386/indirect-thunk-attr-6.c        |  21 +
 .../gcc.target/i386/indirect-thunk-attr-7.c        |  44 ++
 .../gcc.target/i386/indirect-thunk-attr-8.c        |  41 ++
 .../gcc.target/i386/indirect-thunk-bnd-1.c         |  19 +
 .../gcc.target/i386/indirect-thunk-bnd-2.c         |  20 +
 .../gcc.target/i386/indirect-thunk-bnd-3.c         |  18 +
 .../gcc.target/i386/indirect-thunk-bnd-4.c         |  19 +
 .../gcc.target/i386/indirect-thunk-extern-1.c      |  19 +
 .../gcc.target/i386/indirect-thunk-extern-2.c      |  19 +
 .../gcc.target/i386/indirect-thunk-extern-3.c      |  20 +
 .../gcc.target/i386/indirect-thunk-extern-4.c      |  20 +
 .../gcc.target/i386/indirect-thunk-extern-5.c      |  16 +
 .../gcc.target/i386/indirect-thunk-extern-6.c      |  17 +
 .../gcc.target/i386/indirect-thunk-extern-7.c      |  43 ++
 .../gcc.target/i386/indirect-thunk-inline-1.c      |  18 +
 .../gcc.target/i386/indirect-thunk-inline-2.c      |  18 +
 .../gcc.target/i386/indirect-thunk-inline-3.c      |  19 +
 .../gcc.target/i386/indirect-thunk-inline-4.c      |  19 +
 .../gcc.target/i386/indirect-thunk-inline-5.c      |  15 +
 .../gcc.target/i386/indirect-thunk-inline-6.c      |  16 +
 .../gcc.target/i386/indirect-thunk-inline-7.c      |  42 ++
 41 files changed, 1289 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-attr-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-extern-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-extern-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-inline-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-inline-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c

Comments

Martin Liška Jan. 8, 2018, 10:10 a.m. UTC | #1
On 01/07/2018 11:59 PM, H.J. Lu wrote:
> +static void
> +output_indirect_thunk_function (bool need_bnd_p, int regno)
> +{
> +  char name[32];
> +  tree decl;
> +
> +  /* Create __x86_indirect_thunk/__x86_indirect_thunk_bnd.  */
> +  indirect_thunk_name (name, regno, need_bnd_p);
> +  decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
> +		     get_identifier (name),
> +		     build_function_type_list (void_type_node, NULL_TREE));
> +  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
> +				   NULL_TREE, void_type_node);
> +  TREE_PUBLIC (decl) = 1;
> +  TREE_STATIC (decl) = 1;
> +  DECL_IGNORED_P (decl) = 1;
> +
> +#if TARGET_MACHO
> +  if (TARGET_MACHO)
> +    {
> +      switch_to_section (darwin_sections[picbase_thunk_section]);
> +      fputs ("\t.weak_definition\t", asm_out_file);
> +      assemble_name (asm_out_file, name);
> +      fputs ("\n\t.private_extern\t", asm_out_file);
> +      assemble_name (asm_out_file, name);
> +      putc ('\n', asm_out_file);
> +      ASM_OUTPUT_LABEL (asm_out_file, name);
> +      DECL_WEAK (decl) = 1;
> +    }
> +  else
> +#endif
> +    if (USE_HIDDEN_LINKONCE)
> +      {
> +	cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
> +
> +	targetm.asm_out.unique_section (decl, 0);
> +	switch_to_section (get_named_section (decl, NULL, 0));
> +
> +	targetm.asm_out.globalize_label (asm_out_file, name);
> +	fputs ("\t.hidden\t", asm_out_file);
> +	assemble_name (asm_out_file, name);
> +	putc ('\n', asm_out_file);
> +	ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
> +      }
> +    else
> +      {
> +	switch_to_section (text_section);
> +	ASM_OUTPUT_LABEL (asm_out_file, name);
> +      }
> +
> +  DECL_INITIAL (decl) = make_node (BLOCK);
> +  current_function_decl = decl;
> +  allocate_struct_function (decl, false);
> +  init_function_start (decl);
> +  /* We're about to hide the function body from callees of final_* by
> +     emitting it directly; tell them we're a thunk, if they care.  */
> +  cfun->is_thunk = true;
> +  first_function_block_is_cold = false;
> +  /* Make sure unwind info is emitted for the thunk if needed.  */
> +  final_start_function (emit_barrier (), asm_out_file, 1);
> +
> +  output_indirect_thunk (need_bnd_p, regno);
> +
> +  final_end_function ();
> +  init_insn_lengths ();
> +  free_after_compilation (cfun);
> +  set_cfun (NULL);
> +  current_function_decl = NULL;
> +}
> +

I'm wondering whether thunk creation can be a good target-independent generalization? I guess
we can emit the function declaration without direct writes to asm_out_file? And the emission
of function body can be potentially a target hook?

What about emitting body of the function with RTL instructions instead of direct assembly write?
My knowledge of RTL is quite small, but maybe it can bring some generalization and reusability
for other targets?

Thank you,
Martin
H.J. Lu Jan. 8, 2018, 11:55 a.m. UTC | #2
On Mon, Jan 8, 2018 at 2:10 AM, Martin Liška <mliska@suse.cz> wrote:
> On 01/07/2018 11:59 PM, H.J. Lu wrote:
>> +static void
>> +output_indirect_thunk_function (bool need_bnd_p, int regno)
>> +{
>> +  char name[32];
>> +  tree decl;
>> +
>> +  /* Create __x86_indirect_thunk/__x86_indirect_thunk_bnd.  */
>> +  indirect_thunk_name (name, regno, need_bnd_p);
>> +  decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
>> +                  get_identifier (name),
>> +                  build_function_type_list (void_type_node, NULL_TREE));
>> +  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
>> +                                NULL_TREE, void_type_node);
>> +  TREE_PUBLIC (decl) = 1;
>> +  TREE_STATIC (decl) = 1;
>> +  DECL_IGNORED_P (decl) = 1;
>> +
>> +#if TARGET_MACHO
>> +  if (TARGET_MACHO)
>> +    {
>> +      switch_to_section (darwin_sections[picbase_thunk_section]);
>> +      fputs ("\t.weak_definition\t", asm_out_file);
>> +      assemble_name (asm_out_file, name);
>> +      fputs ("\n\t.private_extern\t", asm_out_file);
>> +      assemble_name (asm_out_file, name);
>> +      putc ('\n', asm_out_file);
>> +      ASM_OUTPUT_LABEL (asm_out_file, name);
>> +      DECL_WEAK (decl) = 1;
>> +    }
>> +  else
>> +#endif
>> +    if (USE_HIDDEN_LINKONCE)
>> +      {
>> +     cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
>> +
>> +     targetm.asm_out.unique_section (decl, 0);
>> +     switch_to_section (get_named_section (decl, NULL, 0));
>> +
>> +     targetm.asm_out.globalize_label (asm_out_file, name);
>> +     fputs ("\t.hidden\t", asm_out_file);
>> +     assemble_name (asm_out_file, name);
>> +     putc ('\n', asm_out_file);
>> +     ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
>> +      }
>> +    else
>> +      {
>> +     switch_to_section (text_section);
>> +     ASM_OUTPUT_LABEL (asm_out_file, name);
>> +      }
>> +
>> +  DECL_INITIAL (decl) = make_node (BLOCK);
>> +  current_function_decl = decl;
>> +  allocate_struct_function (decl, false);
>> +  init_function_start (decl);
>> +  /* We're about to hide the function body from callees of final_* by
>> +     emitting it directly; tell them we're a thunk, if they care.  */
>> +  cfun->is_thunk = true;
>> +  first_function_block_is_cold = false;
>> +  /* Make sure unwind info is emitted for the thunk if needed.  */
>> +  final_start_function (emit_barrier (), asm_out_file, 1);
>> +
>> +  output_indirect_thunk (need_bnd_p, regno);
>> +
>> +  final_end_function ();
>> +  init_insn_lengths ();
>> +  free_after_compilation (cfun);
>> +  set_cfun (NULL);
>> +  current_function_decl = NULL;
>> +}
>> +
>
> I'm wondering whether thunk creation can be a good target-independent generalization? I guess
> we can emit the function declaration without direct writes to asm_out_file? And the emission
> of function body can be potentially a target hook?
>
> What about emitting body of the function with RTL instructions instead of direct assembly write?
> My knowledge of RTL is quite small, but maybe it can bring some generalization and reusability
> for other targets?

Thunks are x86 specific and they are created the same way as 32-bit PIC thunks.
I don't see how a target hook is used.
Jakub Jelinek Jan. 8, 2018, noon UTC | #3
On Mon, Jan 08, 2018 at 03:55:52AM -0800, H.J. Lu wrote:
> > I'm wondering whether thunk creation can be a good target-independent generalization? I guess
> > we can emit the function declaration without direct writes to asm_out_file? And the emission
> > of function body can be potentially a target hook?
> >
> > What about emitting body of the function with RTL instructions instead of direct assembly write?
> > My knowledge of RTL is quite small, but maybe it can bring some generalization and reusability
> > for other targets?
> 
> Thunks are x86 specific and they are created the same way as 32-bit PIC thunks.
> I don't see how a target hook is used.

Talking about PIC thunks, those have I believe . character in their symbols,
so that they can't be confused with user functions.  Any reason these
retpoline thunks aren't?

	Jakub
H.J. Lu Jan. 8, 2018, 12:07 p.m. UTC | #4
On Mon, Jan 8, 2018 at 4:00 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Mon, Jan 08, 2018 at 03:55:52AM -0800, H.J. Lu wrote:
>> > I'm wondering whether thunk creation can be a good target-independent generalization? I guess
>> > we can emit the function declaration without direct writes to asm_out_file? And the emission
>> > of function body can be potentially a target hook?
>> >
>> > What about emitting body of the function with RTL instructions instead of direct assembly write?
>> > My knowledge of RTL is quite small, but maybe it can bring some generalization and reusability
>> > for other targets?
>>
>> Thunks are x86 specific and they are created the same way as 32-bit PIC thunks.
>> I don't see how a target hook is used.
>
> Talking about PIC thunks, those have I believe . character in their symbols,
> so that they can't be confused with user functions.  Any reason these
> retpoline thunks aren't?
>

They used to have '.'.  It was changed at the last minute since kernel needs to
export them as regular symbols.
Andi Kleen Jan. 8, 2018, 4:46 p.m. UTC | #5
"H.J. Lu" <hjl.tools@gmail.com> writes:
>>
>> Talking about PIC thunks, those have I believe . character in their symbols,
>> so that they can't be confused with user functions.  Any reason these
>> retpoline thunks aren't?
>>
>
> They used to have '.'.  It was changed at the last minute since kernel needs to
> export them as regular symbols.

The kernel doesn't actually need that to export the symbols.

While symbol CRCs cannot be generated for symbols with '.', CRCs are not
needed and there were already patches to hide the resulting warnings.

-Andi
Michael Matz Jan. 8, 2018, 5:18 p.m. UTC | #6
Hi,

On Mon, 8 Jan 2018, H.J. Lu wrote:

> On Mon, Jan 8, 2018 at 4:00 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> > On Mon, Jan 08, 2018 at 03:55:52AM -0800, H.J. Lu wrote:
> >> > I'm wondering whether thunk creation can be a good target-independent generalization? I guess
> >> > we can emit the function declaration without direct writes to asm_out_file? And the emission
> >> > of function body can be potentially a target hook?
> >> >
> >> > What about emitting body of the function with RTL instructions instead of direct assembly write?
> >> > My knowledge of RTL is quite small, but maybe it can bring some generalization and reusability
> >> > for other targets?
> >>
> >> Thunks are x86 specific and they are created the same way as 32-bit PIC thunks.
> >> I don't see how a target hook is used.
> >
> > Talking about PIC thunks, those have I believe . character in their symbols,
> > so that they can't be confused with user functions.  Any reason these
> > retpoline thunks aren't?
> >
> 
> They used to have '.'.  It was changed at the last minute since kernel 
> needs to export them as regular symbols.

That can be done via asm aliases or direct assembler use; the kernel 
doesn't absolutely have to access them via C compatible symbol names.


Ciao,
Michael.
H.J. Lu Jan. 8, 2018, 5:25 p.m. UTC | #7
On Mon, Jan 8, 2018 at 9:18 AM, Michael Matz <matz@suse.de> wrote:
> Hi,
>
> On Mon, 8 Jan 2018, H.J. Lu wrote:
>
>> On Mon, Jan 8, 2018 at 4:00 AM, Jakub Jelinek <jakub@redhat.com> wrote:
>> > On Mon, Jan 08, 2018 at 03:55:52AM -0800, H.J. Lu wrote:
>> >> > I'm wondering whether thunk creation can be a good target-independent generalization? I guess
>> >> > we can emit the function declaration without direct writes to asm_out_file? And the emission
>> >> > of function body can be potentially a target hook?
>> >> >
>> >> > What about emitting body of the function with RTL instructions instead of direct assembly write?
>> >> > My knowledge of RTL is quite small, but maybe it can bring some generalization and reusability
>> >> > for other targets?
>> >>
>> >> Thunks are x86 specific and they are created the same way as 32-bit PIC thunks.
>> >> I don't see how a target hook is used.
>> >
>> > Talking about PIC thunks, those have I believe . character in their symbols,
>> > so that they can't be confused with user functions.  Any reason these
>> > retpoline thunks aren't?
>> >
>>
>> They used to have '.'.  It was changed at the last minute since kernel
>> needs to export them as regular symbols.
>
> That can be done via asm aliases or direct assembler use; the kernel
> doesn't absolutely have to access them via C compatible symbol names.
>

Hi David,

Can you comment on this?
David Woodhouse Jan. 8, 2018, 5:39 p.m. UTC | #8
On Mon, 2018-01-08 at 09:25 -0800, H.J. Lu wrote:
> On Mon, Jan 8, 2018 at 9:18 AM, Michael Matz <matz@suse.de> wrote:
> > 
> > Hi,
> > 
> > On Mon, 8 Jan 2018, H.J. Lu wrote:
> > 
> > > 
> > > On Mon, Jan 8, 2018 at 4:00 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> > > > 
> > > > On Mon, Jan 08, 2018 at 03:55:52AM -0800, H.J. Lu wrote:
> > > > > 
> > > > > > 
> > > > > > I'm wondering whether thunk creation can be a good target-independent generalization? I guess
> > > > > > we can emit the function declaration without direct writes to asm_out_file? And the emission
> > > > > > of function body can be potentially a target hook?
> > > > > > 
> > > > > > What about emitting body of the function with RTL instructions instead of direct assembly write?
> > > > > > My knowledge of RTL is quite small, but maybe it can bring some generalization and reusability
> > > > > > for other targets?
> > > > > Thunks are x86 specific and they are created the same way as 32-bit PIC thunks.
> > > > > I don't see how a target hook is used.
> > > > Talking about PIC thunks, those have I believe . character in their symbols,
> > > > so that they can't be confused with user functions.  Any reason these
> > > > retpoline thunks aren't?
> > > > 
> > > They used to have '.'.  It was changed at the last minute since kernel
> > > needs to export them as regular symbols.
> > That can be done via asm aliases or direct assembler use; the kernel
> > doesn't absolutely have to access them via C compatible symbol names.
> > 
> Hi David,
> 
> Can you comment on this?

It ends up being a real pain for the CONFIG_TRIM_UNUSED_SYMBOLS
mechanism in the kernel, which really doesn't cope well with the dots.
It *does* assume that exported symbols have C-compatible names.
MODVERSIONS too, although we had a simpler "just shut up the warnings"
solution for that. It was CONFIG_TRIM_UNUSED_SYMBOLS which was the
really horrid one.

I went a little way down the rabbit-hole of trying to make it cope, but
it was far from pretty:

https://patchwork.kernel.org/patch/10148081/

If there's a way to make it work sanely, I'm up for that. But if the
counter-argument is "But someone might genuinely want to make their own
C function called __x86_indirect_thunk_rax"... I'm not so receptive to
that argument :)
H.J. Lu Jan. 8, 2018, 9:32 p.m. UTC | #9
On Mon, Jan 8, 2018 at 8:46 AM, Andi Kleen <ak@linux.intel.com> wrote:
> "H.J. Lu" <hjl.tools@gmail.com> writes:
>>>
>>> Talking about PIC thunks, those have I believe . character in their symbols,
>>> so that they can't be confused with user functions.  Any reason these
>>> retpoline thunks aren't?
>>>
>>
>> They used to have '.'.  It was changed at the last minute since kernel needs to
>> export them as regular symbols.
>
> The kernel doesn't actually need that to export the symbols.
>
> While symbol CRCs cannot be generated for symbols with '.', CRCs are not
> needed and there were already patches to hide the resulting warnings.
>

Andi, can you work it out with David?
David Woodhouse Jan. 8, 2018, 9:35 p.m. UTC | #10
On Mon, 2018-01-08 at 13:32 -0800, H.J. Lu wrote:
> On Mon, Jan 8, 2018 at 8:46 AM, Andi Kleen <ak@linux.intel.com> wrote:
> > 
> > "H.J. Lu" <hjl.tools@gmail.com> writes:
> > > 
> > > > 
> > > > 
> > > > Talking about PIC thunks, those have I believe . character in their symbols,
> > > > so that they can't be confused with user functions.  Any reason these
> > > > retpoline thunks aren't?
> > > > 
> > > They used to have '.'.  It was changed at the last minute since kernel needs to
> > > export them as regular symbols.
> > The kernel doesn't actually need that to export the symbols.
> > 
> > While symbol CRCs cannot be generated for symbols with '.', CRCs are not
> > needed and there were already patches to hide the resulting warnings.
> > 
> Andi, can you work it out with David?

It wasn't CONFIG_MODVERSIONS but CONFIG_TRIM_UNUSED_SYMBOLS which was
the straw that broke the camel's back on that one. I'm open to a
solution for that one, but I couldn't see one that didn't make my eyes
bleed. Except for making the symbols not have dots in.

https://patchwork.kernel.org/patch/10148081/
Andi Kleen Jan. 8, 2018, 10:27 p.m. UTC | #11
On Mon, Jan 08, 2018 at 09:35:26PM +0000, David Woodhouse wrote:
> On Mon, 2018-01-08 at 13:32 -0800, H.J. Lu wrote:
> > On Mon, Jan 8, 2018 at 8:46 AM, Andi Kleen <ak@linux.intel.com> wrote:
> > > 
> > > "H.J. Lu" <hjl.tools@gmail.com> writes:
> > > > 
> > > > > 
> > > > > 
> > > > > Talking about PIC thunks, those have I believe . character in their symbols,
> > > > > so that they can't be confused with user functions.  Any reason these
> > > > > retpoline thunks aren't?
> > > > > 
> > > > They used to have '.'.  It was changed at the last minute since kernel needs to
> > > > export them as regular symbols.
> > > The kernel doesn't actually need that to export the symbols.
> > > 
> > > While symbol CRCs cannot be generated for symbols with '.', CRCs are not
> > > needed and there were already patches to hide the resulting warnings.
> > > 
> > Andi, can you work it out with David?
> 
> It wasn't CONFIG_MODVERSIONS but CONFIG_TRIM_UNUSED_SYMBOLS which was
> the straw that broke the camel's back on that one. I'm open to a
> solution for that one, but I couldn't see one that didn't make my eyes
> bleed. Except for making the symbols not have dots in.
> 
> https://patchwork.kernel.org/patch/10148081/

I guess we can stay with it the underscore version in the compiler now.

In theory it could conflict with something used in C, but the risk
is probably low.

-Andi
David Woodhouse Jan. 8, 2018, 10:36 p.m. UTC | #12
On Mon, 2018-01-08 at 14:27 -0800, Andi Kleen wrote:
> On Mon, Jan 08, 2018 at 09:35:26PM +0000, David Woodhouse wrote:
> > On Mon, 2018-01-08 at 13:32 -0800, H.J. Lu wrote:
> > > On Mon, Jan 8, 2018 at 8:46 AM, Andi Kleen <ak@linux.intel.com> wrote:
> > > > 
> > > > "H.J. Lu" <hjl.tools@gmail.com> writes:
> > > > > 
> > > > > > 
> > > > > > 
> > > > > > Talking about PIC thunks, those have I believe . character in their symbols,
> > > > > > so that they can't be confused with user functions.  Any reason these
> > > > > > retpoline thunks aren't?
> > > > > > 
> > > > > They used to have '.'.  It was changed at the last minute since kernel needs to
> > > > > export them as regular symbols.
> > > > The kernel doesn't actually need that to export the symbols.
> > > > 
> > > > While symbol CRCs cannot be generated for symbols with '.', CRCs are not
> > > > needed and there were already patches to hide the resulting warnings.
> > > > 
> > > Andi, can you work it out with David?
> > 
> > It wasn't CONFIG_MODVERSIONS but CONFIG_TRIM_UNUSED_SYMBOLS which was
> > the straw that broke the camel's back on that one. I'm open to a
> > solution for that one, but I couldn't see one that didn't make my eyes
> > bleed. Except for making the symbols not have dots in.
> > 
> > https://patchwork.kernel.org/patch/10148081/
> 
> I guess we can stay with it the underscore version in the compiler now.
> 
> In theory it could conflict with something used in C, but the risk
> is probably low.

If it makes anyone happier, we could perhaps stick with the dot version
for the *inline* thunks but only use underscores for the external one?

But really, any "innocent user" claiming to be *surprised* after
writing their own C function and calling it __x86_indirect_thunk_rax is
surely taking the piss.
Michael Matz Jan. 8, 2018, 11:02 p.m. UTC | #13
Hi,

On Mon, 8 Jan 2018, Woodhouse, David wrote:

> > > That can be done via asm aliases or direct assembler use; the kernel
> > > doesn't absolutely have to access them via C compatible symbol names.
> > > 
> > Hi David,
> > 
> > Can you comment on this?
> 
> It ends up being a real pain for the CONFIG_TRIM_UNUSED_SYMBOLS
> mechanism in the kernel, which really doesn't cope well with the dots.
> It *does* assume that exported symbols have C-compatible names.
> MODVERSIONS too, although we had a simpler "just shut up the warnings"
> solution for that. It was CONFIG_TRIM_UNUSED_SYMBOLS which was the
> really horrid one.
> 
> I went a little way down the rabbit-hole of trying to make it cope, but
> it was far from pretty:
> 
> https://patchwork.kernel.org/patch/10148081/
> 
> If there's a way to make it work sanely, I'm up for that. But if the
> counter-argument is "But someone might genuinely want to make their own
> C function called __x86_indirect_thunk_rax"... I'm not so receptive to
> that argument :)

Well, the naming of the extern thunk isn't so important that the above 
might not be a reason to just go with underscores.  I'll certainly not 
object to the patch on that basis.  But do keep in mind that GCC already 
uses '.' for other compiler generated symbols, and we're likely to 
continue doing this.  So eventually you'll want to fix your trim_unused 
infrastructure to cope with that.  (Perhaps by just ignoring those 
symbols?  It's not that they must be trimmed if unused, as the user didn't 
write them on his own to start with, right?)


Ciao,
Michael.
David Woodhouse Jan. 8, 2018, 11:59 p.m. UTC | #14
On Tue, 2018-01-09 at 00:02 +0100, Michael Matz wrote:
> Hi,
> 
> On Mon, 8 Jan 2018, Woodhouse, David wrote:
> 
> > 
> > > 
> > > > 
> > > > That can be done via asm aliases or direct assembler use; the kernel
> > > > doesn't absolutely have to access them via C compatible symbol names.
> > > > 
> > > Hi David,
> > > 
> > > Can you comment on this?
> >
> > It ends up being a real pain for the CONFIG_TRIM_UNUSED_SYMBOLS
> > mechanism in the kernel, which really doesn't cope well with the dots.
> > It *does* assume that exported symbols have C-compatible names.
> > MODVERSIONS too, although we had a simpler "just shut up the warnings"
> > solution for that. It was CONFIG_TRIM_UNUSED_SYMBOLS which was the
> > really horrid one.
> > 
> > I went a little way down the rabbit-hole of trying to make it cope, but
> > it was far from pretty:
> > 
> > https://patchwork.kernel.org/patch/10148081/
> > 
> > If there's a way to make it work sanely, I'm up for that. But if the
> > counter-argument is "But someone might genuinely want to make their own
> > C function called __x86_indirect_thunk_rax"... I'm not so receptive to
> > that argument :)
>
> Well, the naming of the extern thunk isn't so important that the above 
> might not be a reason to just go with underscores.  I'll certainly not 
> object to the patch on that basis.  But do keep in mind that GCC already 
> uses '.' for other compiler generated symbols, and we're likely to 
> continue doing this.  So eventually you'll want to fix your trim_unused 
> infrastructure to cope with that.  (Perhaps by just ignoring those 
> symbols?  It's not that they must be trimmed if unused, as the user didn't 
> write them on his own to start with, right?)

This is only for the symbols which are exported to loadable kernel modules.

When the compiler-generated symbols are emitted inline in a COMDAT
section, we basically never even notice them. The module probably has
its own copy, and that's fine.

These indirect thunks are special because we really care about
modifying them at run time according to which CPU we happen to be
running on and which other mitigations for the Spectre problem are in
use. That's why we asked for the -mindirect-branch=thunk-extern option
and provided our own copy of the thunks — and why we're exporting *our*
version to loadable modules.

I don't think it's hugely likely that we'll need to cope with other
compiler-generated symbols in quite the same way, in the near future.

The CONFIG_TRIM_UNUSED_SYMBOLS option is an optimisation to avoid
exporting functions which aren't actually used by any of the loadable
modules that were built in the currently-active kernel configuration.
Lots of in-kernel text can thus be dropped from the image once it's
known that it will never be used. And that's the code that has
difficulty with the dots.

And yes, it's perfectly OK to drop, for example, the %rsp-based thunk
completely if nobody happens to use it today (actually, why am I
creating that one in the first place? :)

There are various ways to address the problem, none of them very
pretty. The nicest was to just *not* have dots in the symbols.
Jeff Law Jan. 9, 2018, 6:55 p.m. UTC | #15
On 01/08/2018 03:10 AM, Martin Liška wrote:
> On 01/07/2018 11:59 PM, H.J. Lu wrote:
>> +static void
>> +output_indirect_thunk_function (bool need_bnd_p, int regno)
>> +{
>> +  char name[32];
>> +  tree decl;
>> +
>> +  /* Create __x86_indirect_thunk/__x86_indirect_thunk_bnd.  */
>> +  indirect_thunk_name (name, regno, need_bnd_p);
>> +  decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
>> +		     get_identifier (name),
>> +		     build_function_type_list (void_type_node, NULL_TREE));
>> +  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
>> +				   NULL_TREE, void_type_node);
>> +  TREE_PUBLIC (decl) = 1;
>> +  TREE_STATIC (decl) = 1;
>> +  DECL_IGNORED_P (decl) = 1;
>> +
>> +#if TARGET_MACHO
>> +  if (TARGET_MACHO)
>> +    {
>> +      switch_to_section (darwin_sections[picbase_thunk_section]);
>> +      fputs ("\t.weak_definition\t", asm_out_file);
>> +      assemble_name (asm_out_file, name);
>> +      fputs ("\n\t.private_extern\t", asm_out_file);
>> +      assemble_name (asm_out_file, name);
>> +      putc ('\n', asm_out_file);
>> +      ASM_OUTPUT_LABEL (asm_out_file, name);
>> +      DECL_WEAK (decl) = 1;
>> +    }
>> +  else
>> +#endif
>> +    if (USE_HIDDEN_LINKONCE)
>> +      {
>> +	cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
>> +
>> +	targetm.asm_out.unique_section (decl, 0);
>> +	switch_to_section (get_named_section (decl, NULL, 0));
>> +
>> +	targetm.asm_out.globalize_label (asm_out_file, name);
>> +	fputs ("\t.hidden\t", asm_out_file);
>> +	assemble_name (asm_out_file, name);
>> +	putc ('\n', asm_out_file);
>> +	ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
>> +      }
>> +    else
>> +      {
>> +	switch_to_section (text_section);
>> +	ASM_OUTPUT_LABEL (asm_out_file, name);
>> +      }
>> +
>> +  DECL_INITIAL (decl) = make_node (BLOCK);
>> +  current_function_decl = decl;
>> +  allocate_struct_function (decl, false);
>> +  init_function_start (decl);
>> +  /* We're about to hide the function body from callees of final_* by
>> +     emitting it directly; tell them we're a thunk, if they care.  */
>> +  cfun->is_thunk = true;
>> +  first_function_block_is_cold = false;
>> +  /* Make sure unwind info is emitted for the thunk if needed.  */
>> +  final_start_function (emit_barrier (), asm_out_file, 1);
>> +
>> +  output_indirect_thunk (need_bnd_p, regno);
>> +
>> +  final_end_function ();
>> +  init_insn_lengths ();
>> +  free_after_compilation (cfun);
>> +  set_cfun (NULL);
>> +  current_function_decl = NULL;
>> +}
>> +
> 
> I'm wondering whether thunk creation can be a good target-independent generalization? I guess
> we can emit the function declaration without direct writes to asm_out_file? And the emission
> of function body can be potentially a target hook?
> 
> What about emitting body of the function with RTL instructions instead of direct assembly write?
> My knowledge of RTL is quite small, but maybe it can bring some generalization and reusability
> for other targets?
That's the key point I'm trying to make.  We should be looking at
generalizing this stuff where it makes sense.

jeff
H.J. Lu Jan. 9, 2018, 7:05 p.m. UTC | #16
On Tue, Jan 9, 2018 at 10:55 AM, Jeff Law <law@redhat.com> wrote:
> On 01/08/2018 03:10 AM, Martin Liška wrote:
>> On 01/07/2018 11:59 PM, H.J. Lu wrote:
>>> +static void
>>> +output_indirect_thunk_function (bool need_bnd_p, int regno)
>>> +{
>>> +  char name[32];
>>> +  tree decl;
>>> +
>>> +  /* Create __x86_indirect_thunk/__x86_indirect_thunk_bnd.  */
>>> +  indirect_thunk_name (name, regno, need_bnd_p);
>>> +  decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
>>> +                 get_identifier (name),
>>> +                 build_function_type_list (void_type_node, NULL_TREE));
>>> +  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
>>> +                               NULL_TREE, void_type_node);
>>> +  TREE_PUBLIC (decl) = 1;
>>> +  TREE_STATIC (decl) = 1;
>>> +  DECL_IGNORED_P (decl) = 1;
>>> +
>>> +#if TARGET_MACHO
>>> +  if (TARGET_MACHO)
>>> +    {
>>> +      switch_to_section (darwin_sections[picbase_thunk_section]);
>>> +      fputs ("\t.weak_definition\t", asm_out_file);
>>> +      assemble_name (asm_out_file, name);
>>> +      fputs ("\n\t.private_extern\t", asm_out_file);
>>> +      assemble_name (asm_out_file, name);
>>> +      putc ('\n', asm_out_file);
>>> +      ASM_OUTPUT_LABEL (asm_out_file, name);
>>> +      DECL_WEAK (decl) = 1;
>>> +    }
>>> +  else
>>> +#endif
>>> +    if (USE_HIDDEN_LINKONCE)
>>> +      {
>>> +    cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
>>> +
>>> +    targetm.asm_out.unique_section (decl, 0);
>>> +    switch_to_section (get_named_section (decl, NULL, 0));
>>> +
>>> +    targetm.asm_out.globalize_label (asm_out_file, name);
>>> +    fputs ("\t.hidden\t", asm_out_file);
>>> +    assemble_name (asm_out_file, name);
>>> +    putc ('\n', asm_out_file);
>>> +    ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
>>> +      }
>>> +    else
>>> +      {
>>> +    switch_to_section (text_section);
>>> +    ASM_OUTPUT_LABEL (asm_out_file, name);
>>> +      }
>>> +
>>> +  DECL_INITIAL (decl) = make_node (BLOCK);
>>> +  current_function_decl = decl;
>>> +  allocate_struct_function (decl, false);
>>> +  init_function_start (decl);
>>> +  /* We're about to hide the function body from callees of final_* by
>>> +     emitting it directly; tell them we're a thunk, if they care.  */
>>> +  cfun->is_thunk = true;
>>> +  first_function_block_is_cold = false;
>>> +  /* Make sure unwind info is emitted for the thunk if needed.  */
>>> +  final_start_function (emit_barrier (), asm_out_file, 1);
>>> +
>>> +  output_indirect_thunk (need_bnd_p, regno);
>>> +
>>> +  final_end_function ();
>>> +  init_insn_lengths ();
>>> +  free_after_compilation (cfun);
>>> +  set_cfun (NULL);
>>> +  current_function_decl = NULL;
>>> +}
>>> +
>>
>> I'm wondering whether thunk creation can be a good target-independent generalization? I guess
>> we can emit the function declaration without direct writes to asm_out_file? And the emission
>> of function body can be potentially a target hook?
>>
>> What about emitting body of the function with RTL instructions instead of direct assembly write?
>> My knowledge of RTL is quite small, but maybe it can bring some generalization and reusability
>> for other targets?
> That's the key point I'm trying to make.  We should be looking at
> generalizing this stuff where it makes sense.
>

Thunks are x86 specific and they are created the same way as 32-bit PIC thunks.
I don't see how a target hook is used.
Jeff Law Jan. 11, 2018, 10:46 p.m. UTC | #17
On 01/07/2018 03:59 PM, H.J. Lu wrote:
> Add -mindirect-branch= option to convert indirect call and jump to call
> and return thunks.  The default is 'keep', which keeps indirect call and
> jump unmodified.  'thunk' converts indirect call and jump to call and
> return thunk.  'thunk-inline' converts indirect call and jump to inlined
> call and return thunk.  'thunk-extern' converts indirect call and jump to
> external call and return thunk provided in a separate object file.  You
> can control this behavior for a specific function by using the function
> attribute indirect_branch.
> 
> 2 kinds of thunks are geneated.  Memory thunk where the function address
> is at the top of the stack:
> 
> __x86_indirect_thunk:
> 	call L2
> L1:
> 	lfence
> 	jmp L1
> L2:
> 	lea 8(%rsp), %rsp|lea 4(%esp), %esp
> 	ret
> 
> Indirect jmp via memory, "jmp mem", is converted to
> 
> 	push memory
> 	jmp __x86_indirect_thunk
> 
> Indirect call via memory, "call mem", is converted to
> 
> 	jmp L2
> L1:
> 	push [mem]
> 	jmp __x86_indirect_thunk
> L2:
> 	call L1
> 
> Register thunk where the function address is in a register, reg:
> 
> __x86_indirect_thunk_reg:
> 	call	L2
> L1:
> 	lfence
> 	jmp	L1
> L2:
> 	movq	%reg, (%rsp)|movl    %reg, (%esp)
> 	ret
> 
> where reg is one of (r|e)ax, (r|e)dx, (r|e)cx, (r|e)bx, (r|e)si, (r|e)di,
> (r|e)bp, r8, r9, r10, r11, r12, r13, r14 and r15.
> 
> Indirect jmp via register, "jmp reg", is converted to
> 
> 	jmp __x86_indirect_thunk_reg
> 
> Indirect call via register, "call reg", is converted to
> 
> 	call __x86_indirect_thunk_reg
> 
> gcc/
> 
> 	* config/i386/i386-opts.h (indirect_branch): New.
> 	* config/i386/i386-protos.h (ix86_output_indirect_jmp): Likewise.
> 	* config/i386/i386.c (ix86_using_red_zone): Disallow red-zone
> 	with local indirect jump when converting indirect call and jump.
> 	(ix86_set_indirect_branch_type): New.
> 	(ix86_set_current_function): Call ix86_set_indirect_branch_type.
> 	(indirectlabelno): New.
> 	(indirect_thunk_needed): Likewise.
> 	(indirect_thunk_bnd_needed): Likewise.
> 	(indirect_thunks_used): Likewise.
> 	(indirect_thunks_bnd_used): Likewise.
> 	(INDIRECT_LABEL): Likewise.
> 	(indirect_thunk_name): Likewise.
> 	(output_indirect_thunk): Likewise.
> 	(output_indirect_thunk_function): Likewise.
> 	(ix86_output_indirect_branch): Likewise.
> 	(ix86_output_indirect_jmp): Likewise.
> 	(ix86_code_end): Call output_indirect_thunk_function if needed.
> 	(ix86_output_call_insn): Call ix86_output_indirect_branch if
> 	needed.
> 	(ix86_handle_fndecl_attribute): Handle indirect_branch.
> 	(ix86_attribute_table): Add indirect_branch.
> 	* config/i386/i386.h (machine_function): Add indirect_branch_type
> 	and has_local_indirect_jump.
> 	* config/i386/i386.md (indirect_jump): Set has_local_indirect_jump
> 	to true.
> 	(tablejump): Likewise.
> 	(*indirect_jump): Use ix86_output_indirect_jmp.
> 	(*tablejump_1): Likewise.
> 	(simple_return_indirect_internal): Likewise.
> 	* config/i386/i386.opt (mindirect-branch=): New option.
> 	(indirect_branch): New.
> 	(keep): Likewise.
> 	(thunk): Likewise.
> 	(thunk-inline): Likewise.
> 	(thunk-extern): Likewise.
> 	* doc/extend.texi: Document indirect_branch function attribute.
> 	* doc/invoke.texi: Document -mindirect-branch= option.
Note I'm expecting Uros to chime in.  So please do not consider this
ack'd until you hear from Uros.

At a high level is there really that much value in having thunks in the
object file?  Why not put the full set of thunks into libgcc and just
allow selection between inline sequences and external thunks
(thunk-inline and thunk-external)?  It's not a huge simplification, but
if there isn't a compelling reason, let's drop the in-object-file thunks.

> +
> +/* Fills in the label name that should be used for the indirect thunk.  */
> +
> +static void
> +indirect_thunk_name (char name[32], int regno, bool need_bnd_p)
Please document each argument in the function's comment.



> +
> +static void
> +output_indirect_thunk (bool need_bnd_p, int regno)
Needs a function comment.



> +
> +static void
> +output_indirect_thunk_function (bool need_bnd_p, int regno)
Needs a function comment.



> @@ -28119,12 +28357,182 @@ ix86_nopic_noplt_attribute_p (rtx call_op)
>    return false;
>  }
>  
> +static void
> +ix86_output_indirect_branch (rtx call_op, const char *xasm,
> +			     bool sibcall_p)
Needs a function comment.


I'd probably break this into a few smaller functions.  It's a lot of
inlined code.







> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 3f587806407..a7573c468ae 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -12313,12 +12313,13 @@
>  {
>    if (TARGET_X32)
>      operands[0] = convert_memory_address (word_mode, operands[0]);
> +  cfun->machine->has_local_indirect_jump = true;
Note this is not ideal in that it's set at expansion time and thus would
not be accurate if the RTL optimizers were able to simply things enough
such that the indirect jump has a known target.

But I wouldn't expect that to happen much in the RTL optimizers much as
the gimple optimizers are likely much better at doing that kind of
thing.  So I won't object to doing things this way as long as they
gracefully handle this case.


> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index 09aaa97c2fc..22c806206e4 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -1021,3 +1021,23 @@ indirect jump.
>  mforce-indirect-call
>  Target Report Var(flag_force_indirect_call) Init(0)
>  Make all function calls indirect.
> +
> +mindirect-branch=
> +Target Report RejectNegative Joined Enum(indirect_branch) Var(ix86_indirect_branch) Init(indirect_branch_keep)
> +Convert indirect call and jump.
Convert to what?  I realize there's an enum of the choices below, but
this doesn't read well.

Do you want to mention that CET and retpolines are inherently
incompatible?  Should an attempt to use them together generate a
compile-time error?

Jeff
H.J. Lu Jan. 11, 2018, 11 p.m. UTC | #18
On Thu, Jan 11, 2018 at 2:46 PM, Jeff Law <law@redhat.com> wrote:
> On 01/07/2018 03:59 PM, H.J. Lu wrote:
>> Add -mindirect-branch= option to convert indirect call and jump to call
>> and return thunks.  The default is 'keep', which keeps indirect call and
>> jump unmodified.  'thunk' converts indirect call and jump to call and
>> return thunk.  'thunk-inline' converts indirect call and jump to inlined
>> call and return thunk.  'thunk-extern' converts indirect call and jump to
>> external call and return thunk provided in a separate object file.  You
>> can control this behavior for a specific function by using the function
>> attribute indirect_branch.
>>
>> 2 kinds of thunks are geneated.  Memory thunk where the function address
>> is at the top of the stack:
>>
>> __x86_indirect_thunk:
>>       call L2
>> L1:
>>       lfence
>>       jmp L1
>> L2:
>>       lea 8(%rsp), %rsp|lea 4(%esp), %esp
>>       ret
>>
>> Indirect jmp via memory, "jmp mem", is converted to
>>
>>       push memory
>>       jmp __x86_indirect_thunk
>>
>> Indirect call via memory, "call mem", is converted to
>>
>>       jmp L2
>> L1:
>>       push [mem]
>>       jmp __x86_indirect_thunk
>> L2:
>>       call L1
>>
>> Register thunk where the function address is in a register, reg:
>>
>> __x86_indirect_thunk_reg:
>>       call    L2
>> L1:
>>       lfence
>>       jmp     L1
>> L2:
>>       movq    %reg, (%rsp)|movl    %reg, (%esp)
>>       ret
>>
>> where reg is one of (r|e)ax, (r|e)dx, (r|e)cx, (r|e)bx, (r|e)si, (r|e)di,
>> (r|e)bp, r8, r9, r10, r11, r12, r13, r14 and r15.
>>
>> Indirect jmp via register, "jmp reg", is converted to
>>
>>       jmp __x86_indirect_thunk_reg
>>
>> Indirect call via register, "call reg", is converted to
>>
>>       call __x86_indirect_thunk_reg
>>
>> gcc/
>>
>>       * config/i386/i386-opts.h (indirect_branch): New.
>>       * config/i386/i386-protos.h (ix86_output_indirect_jmp): Likewise.
>>       * config/i386/i386.c (ix86_using_red_zone): Disallow red-zone
>>       with local indirect jump when converting indirect call and jump.
>>       (ix86_set_indirect_branch_type): New.
>>       (ix86_set_current_function): Call ix86_set_indirect_branch_type.
>>       (indirectlabelno): New.
>>       (indirect_thunk_needed): Likewise.
>>       (indirect_thunk_bnd_needed): Likewise.
>>       (indirect_thunks_used): Likewise.
>>       (indirect_thunks_bnd_used): Likewise.
>>       (INDIRECT_LABEL): Likewise.
>>       (indirect_thunk_name): Likewise.
>>       (output_indirect_thunk): Likewise.
>>       (output_indirect_thunk_function): Likewise.
>>       (ix86_output_indirect_branch): Likewise.
>>       (ix86_output_indirect_jmp): Likewise.
>>       (ix86_code_end): Call output_indirect_thunk_function if needed.
>>       (ix86_output_call_insn): Call ix86_output_indirect_branch if
>>       needed.
>>       (ix86_handle_fndecl_attribute): Handle indirect_branch.
>>       (ix86_attribute_table): Add indirect_branch.
>>       * config/i386/i386.h (machine_function): Add indirect_branch_type
>>       and has_local_indirect_jump.
>>       * config/i386/i386.md (indirect_jump): Set has_local_indirect_jump
>>       to true.
>>       (tablejump): Likewise.
>>       (*indirect_jump): Use ix86_output_indirect_jmp.
>>       (*tablejump_1): Likewise.
>>       (simple_return_indirect_internal): Likewise.
>>       * config/i386/i386.opt (mindirect-branch=): New option.
>>       (indirect_branch): New.
>>       (keep): Likewise.
>>       (thunk): Likewise.
>>       (thunk-inline): Likewise.
>>       (thunk-extern): Likewise.
>>       * doc/extend.texi: Document indirect_branch function attribute.
>>       * doc/invoke.texi: Document -mindirect-branch= option.
> Note I'm expecting Uros to chime in.  So please do not consider this
> ack'd until you hear from Uros.
>
> At a high level is there really that much value in having thunks in the
> object file?  Why not put the full set of thunks into libgcc and just
> allow selection between inline sequences and external thunks
> (thunk-inline and thunk-external)?  It's not a huge simplification, but
> if there isn't a compelling reason, let's drop the in-object-file thunks.

I prefer to leave it in the object file just in case that
-mindirect-branch-loop=
is needed in the future.

>> +
>> +/* Fills in the label name that should be used for the indirect thunk.  */
>> +
>> +static void
>> +indirect_thunk_name (char name[32], int regno, bool need_bnd_p)
> Please document each argument in the function's comment.

Will do.

>
>
>> +
>> +static void
>> +output_indirect_thunk (bool need_bnd_p, int regno)
> Needs a function comment.

Will do.

>
>
>> +
>> +static void
>> +output_indirect_thunk_function (bool need_bnd_p, int regno)
> Needs a function comment.
>

Will do.

>
>> @@ -28119,12 +28357,182 @@ ix86_nopic_noplt_attribute_p (rtx call_op)
>>    return false;
>>  }
>>
>> +static void
>> +ix86_output_indirect_branch (rtx call_op, const char *xasm,
>> +                          bool sibcall_p)
> Needs a function comment.
>

Will do.

> I'd probably break this into a few smaller functions.  It's a lot of
> inlined code.
>

That function has 142 lines.  Unless there is a compelling need,
I prefer to leave it ASIS.

>
>
>
>
>
>> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
>> index 3f587806407..a7573c468ae 100644
>> --- a/gcc/config/i386/i386.md
>> +++ b/gcc/config/i386/i386.md
>> @@ -12313,12 +12313,13 @@
>>  {
>>    if (TARGET_X32)
>>      operands[0] = convert_memory_address (word_mode, operands[0]);
>> +  cfun->machine->has_local_indirect_jump = true;
> Note this is not ideal in that it's set at expansion time and thus would
> not be accurate if the RTL optimizers were able to simply things enough
> such that the indirect jump has a known target.
>
> But I wouldn't expect that to happen much in the RTL optimizers much as
> the gimple optimizers are likely much better at doing that kind of
> thing.  So I won't object to doing things this way as long as they
> gracefully handle this case.
>
>
>> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
>> index 09aaa97c2fc..22c806206e4 100644
>> --- a/gcc/config/i386/i386.opt
>> +++ b/gcc/config/i386/i386.opt
>> @@ -1021,3 +1021,23 @@ indirect jump.
>>  mforce-indirect-call
>>  Target Report Var(flag_force_indirect_call) Init(0)
>>  Make all function calls indirect.
>> +
>> +mindirect-branch=
>> +Target Report RejectNegative Joined Enum(indirect_branch) Var(ix86_indirect_branch) Init(indirect_branch_keep)
>> +Convert indirect call and jump.
> Convert to what?  I realize there's an enum of the choices below, but
> this doesn't read well.

I will update.

> Do you want to mention that CET and retpolines are inherently

I will document it.

> incompatible?  Should an attempt to use them together generate a
> compile-time error?
>

Compile-time error sounds a good idea.

Thanks.
Jakub Jelinek Jan. 11, 2018, 11:07 p.m. UTC | #19
On Thu, Jan 11, 2018 at 03:46:51PM -0700, Jeff Law wrote:
> Note I'm expecting Uros to chime in.  So please do not consider this
> ack'd until you hear from Uros.
> 
> At a high level is there really that much value in having thunks in the
> object file?  Why not put the full set of thunks into libgcc and just
> allow selection between inline sequences and external thunks
> (thunk-inline and thunk-external)?  It's not a huge simplification, but
> if there isn't a compelling reason, let's drop the in-object-file thunks.

Not everything is linked against libgcc.a, something is linked against just
libgcc_s.so.1, other stuff against both, some libraries against none of that.
Probably it is undesirable to have the thunks at non-constant offsets from
the uses, that would need text relocations.  Thunks emitted in the object
files, hidden and comdat merged between .o files like what we have say for
i686 PIC thunks seems like the best default to me and a way for the kernel
to override that.

	Jakub
Jeff Law Jan. 12, 2018, 5:57 p.m. UTC | #20
On 01/11/2018 04:07 PM, Jakub Jelinek wrote:
> On Thu, Jan 11, 2018 at 03:46:51PM -0700, Jeff Law wrote:
>> Note I'm expecting Uros to chime in.  So please do not consider this
>> ack'd until you hear from Uros.
>>
>> At a high level is there really that much value in having thunks in the
>> object file?  Why not put the full set of thunks into libgcc and just
>> allow selection between inline sequences and external thunks
>> (thunk-inline and thunk-external)?  It's not a huge simplification, but
>> if there isn't a compelling reason, let's drop the in-object-file thunks.
> 
> Not everything is linked against libgcc.a, something is linked against just
> libgcc_s.so.1, other stuff against both, some libraries against none of that.
> Probably it is undesirable to have the thunks at non-constant offsets from
> the uses, that would need text relocations.  Thunks emitted in the object
> files, hidden and comdat merged between .o files like what we have say for
> i686 PIC thunks seems like the best default to me and a way for the kernel
> to override that.
For things that don't link against libgcc, they would (of course) be
expected to provide the necessary thunks.  The kernel would be the
obvious example and that's precisely what they're going to do.

WRT text relocs, yea that sucks, but if we're going to have user space
mitigations, then we're likely going to need those relocs so that the
thunks can be patched out.  I'm actually hoping we're not going to need
user space mitigations for spectre v2 and we can avoid this problem..



I'm just not convinved there's a lot of value there, but I'm not going
to hold things up on that.  So I won't object on this basis.

jeff
Jakub Jelinek Jan. 12, 2018, 5:59 p.m. UTC | #21
On Fri, Jan 12, 2018 at 10:57:08AM -0700, Jeff Law wrote:
> On 01/11/2018 04:07 PM, Jakub Jelinek wrote:
> > On Thu, Jan 11, 2018 at 03:46:51PM -0700, Jeff Law wrote:
> >> Note I'm expecting Uros to chime in.  So please do not consider this
> >> ack'd until you hear from Uros.
> >>
> >> At a high level is there really that much value in having thunks in the
> >> object file?  Why not put the full set of thunks into libgcc and just
> >> allow selection between inline sequences and external thunks
> >> (thunk-inline and thunk-external)?  It's not a huge simplification, but
> >> if there isn't a compelling reason, let's drop the in-object-file thunks.
> > 
> > Not everything is linked against libgcc.a, something is linked against just
> > libgcc_s.so.1, other stuff against both, some libraries against none of that.
> > Probably it is undesirable to have the thunks at non-constant offsets from
> > the uses, that would need text relocations.  Thunks emitted in the object
> > files, hidden and comdat merged between .o files like what we have say for
> > i686 PIC thunks seems like the best default to me and a way for the kernel
> > to override that.
> For things that don't link against libgcc, they would (of course) be
> expected to provide the necessary thunks.  The kernel would be the
> obvious example and that's precisely what they're going to do.
> 
> WRT text relocs, yea that sucks, but if we're going to have user space
> mitigations, then we're likely going to need those relocs so that the
> thunks can be patched out.  I'm actually hoping we're not going to need
> user space mitigations for spectre v2 and we can avoid this problem..

Some architectures don't allow text relocations at all, including x86_64.
So any kind of runtime patching isn't that easy, and is generally a security
hole that e.g. SELinux prevents as well.

	Jakub
David Woodhouse Jan. 13, 2018, 9:03 a.m. UTC | #22
On Fri, 2018-01-12 at 10:57 -0700, Jeff Law wrote:
> 
> WRT text relocs, yea that sucks, but if we're going to have user space
> mitigations, then we're likely going to need those relocs so that the
> thunks can be patched out.  I'm actually hoping we're not going to need
> user space mitigations for spectre v2 and we can avoid this problem..

As things stand with retpoline in the kernel, userspace  processes
aren't protected from each other. The attack mode is complex and
probably fairly unlikely, and we need to get the new microcode support
into the kernel, with the IBPB (flush branch predictor) MSR. And for
the kernel to use it, of course.

In the meantime, it does potentially make sense for sensitive userspace
processes to be compiled this way. Especially if they're going to run
external code (like JavaScript) and attempt to sandbox it — which is
something that IBPB isn't going to solve either.
Jeff Law Jan. 13, 2018, 4:17 p.m. UTC | #23
On 01/13/2018 02:03 AM, David Woodhouse wrote:
> On Fri, 2018-01-12 at 10:57 -0700, Jeff Law wrote:
>>
>> WRT text relocs, yea that sucks, but if we're going to have user space
>> mitigations, then we're likely going to need those relocs so that the
>> thunks can be patched out.  I'm actually hoping we're not going to need
>> user space mitigations for spectre v2 and we can avoid this problem..
> 
> As things stand with retpoline in the kernel, userspace  processes
> aren't protected from each other. The attack mode is complex and
> probably fairly unlikely, and we need to get the new microcode support
> into the kernel, with the IBPB (flush branch predictor) MSR. And for
> the kernel to use it, of course.
Correct, but for a user<->user exploit don't you have to at some point
run through a context switch?  That seems to be a point where we should
seriously think about flushing the predictor.

That wouldn't help code user space threading packages such as npth or
goroutines that multiplex on top of pthreads, but I'm happy to punt
those in the immediate term.

> 
> In the meantime, it does potentially make sense for sensitive userspace
> processes to be compiled this way. Especially if they're going to run
> external code (like JavaScript) and attempt to sandbox it — which is
> something that IBPB isn't going to solve either.
I've suspected that in the immediate term there will likely be some
sensitive packages compiled with -mretpoline to at least cut down the
attack surface while the hardware side sorts itself out.  But to totally
address the problem you have to build the entire system with -mretpoline
-- and then 18 months out we're unable to turn on something like CET
because retpolines and CET are fundamentally incompatible.  That seems
like a losing proposition.

jeff
David Woodhouse Jan. 13, 2018, 4:29 p.m. UTC | #24
On Sat, 2018-01-13 at 09:17 -0700, Jeff Law wrote:
> On 01/13/2018 02:03 AM, David Woodhouse wrote:
> > On Fri, 2018-01-12 at 10:57 -0700, Jeff Law wrote:
> > As things stand with retpoline in the kernel, userspace  processes
> > aren't protected from each other. The attack mode is complex and
> > probably fairly unlikely, and we need to get the new microcode support
> > into the kernel, with the IBPB (flush branch predictor) MSR. And for
> > the kernel to use it, of course.
>
> Correct, but for a user<->user exploit don't you have to at some point
> run through a context switch?  That seems to be a point where we should
> seriously think about flushing the predictor.

Yes, that is the very next thing on our TODO list. It requires the new
CPU microcode, and the kernel patches which are being polished now.

> That wouldn't help code user space threading packages such as npth or
> goroutines that multiplex on top of pthreads, but I'm happy to punt
> those in the immediate term.

Agreed.

> > In the meantime, it does potentially make sense for sensitive userspace
> > processes to be compiled this way. Especially if they're going to run
> > external code (like JavaScript) and attempt to sandbox it — which is
> > something that IBPB isn't going to solve either.
>
> I've suspected that in the immediate term there will likely be some
> sensitive packages compiled with -mretpoline to at least cut down the
> attack surface while the hardware side sorts itself out.  But to totally
> address the problem you have to build the entire system with -mretpoline
> -- and then 18 months out we're unable to turn on something like CET
> because retpolines and CET are fundamentally incompatible.  That seems
> like a losing proposition.

This is one of the reasons I asked HJ for -mindirect-branch-register.
It means that we can runtime patch the thunk into a simple 'jmp *\reg',
which wasn't possible with the original ret-equivalent version (we
didn't have a clobberable register).

Any future CPU with CET is also going to have the new IBRS_ALL feature
which you can turn on and forget, and patch out your retpolines.

https://software.intel.com/sites/default/files/managed/c5/63/336996-Speculative-Execution-Side-Channel-Mitigations.pdf

Of course, that's in the kernel. For runtime patching to work in
userspace, you really do need to put it somewhere other than inline,
and patch/select it once. But I don't care about *that* discussion,
because all I care about right now is reaching agreement on the command
line option and the thunk symbol name. Did I mention that before? :)
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index f245c1573cf..f14cbeee7a1 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -106,4 +106,12 @@  enum prefer_vector_width {
     PVW_AVX512
 };
 
+enum indirect_branch {
+  indirect_branch_unset = 0,
+  indirect_branch_keep,
+  indirect_branch_thunk,
+  indirect_branch_thunk_inline,
+  indirect_branch_thunk_extern
+};
+
 #endif
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 0e49652898c..bf11cc426f9 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -305,6 +305,7 @@  extern enum attr_cpu ix86_schedule;
 #endif
 
 extern const char * ix86_output_call_insn (rtx_insn *insn, rtx call_op);
+extern const char * ix86_output_indirect_jmp (rtx call_op, bool ret_p);
 extern bool ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
 						machine_mode mode);
 extern int ix86_min_insn_size (rtx_insn *);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 8696f931806..ac4d1f62f50 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2724,12 +2724,19 @@  make_pass_insert_endbranch (gcc::context *ctxt)
   return new pass_insert_endbranch (ctxt);
 }
 
-/* Return true if a red-zone is in use.  */
+/* Return true if a red-zone is in use.  We can't use red-zone when
+   there are local indirect jumps, like "indirect_jump" or "tablejump",
+   which jumps to another place in the function, since "call" in the
+   indirect thunk pushes the return address onto stack, destroying
+   red-zone.  */
 
 bool
 ix86_using_red_zone (void)
 {
-  return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
+  return (TARGET_RED_ZONE
+	  && !TARGET_64BIT_MS_ABI
+	  && (!cfun->machine->has_local_indirect_jump
+	      || cfun->machine->indirect_branch_type == indirect_branch_keep));
 }
 
 /* Return a string that documents the current -m options.  The caller is
@@ -5797,6 +5804,37 @@  ix86_set_func_type (tree fndecl)
     }
 }
 
+/* Set the indirect_branch_type field from the function FNDECL.  */
+
+static void
+ix86_set_indirect_branch_type (tree fndecl)
+{
+  if (cfun->machine->indirect_branch_type == indirect_branch_unset)
+    {
+      tree attr = lookup_attribute ("indirect_branch",
+				    DECL_ATTRIBUTES (fndecl));
+      if (attr != NULL)
+	{
+	  tree args = TREE_VALUE (attr);
+	  if (args == NULL)
+	    gcc_unreachable ();
+	  tree cst = TREE_VALUE (args);
+	  if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0)
+	    cfun->machine->indirect_branch_type = indirect_branch_keep;
+	  else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0)
+	    cfun->machine->indirect_branch_type = indirect_branch_thunk;
+	  else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0)
+	    cfun->machine->indirect_branch_type = indirect_branch_thunk_inline;
+	  else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0)
+	    cfun->machine->indirect_branch_type = indirect_branch_thunk_extern;
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	cfun->machine->indirect_branch_type = ix86_indirect_branch;
+    }
+}
+
 /* Establish appropriate back-end context for processing the function
    FNDECL.  The argument might be NULL to indicate processing at top
    level, outside of any function scope.  */
@@ -5812,7 +5850,10 @@  ix86_set_current_function (tree fndecl)
 	 one is extern inline and one isn't.  Call ix86_set_func_type
 	 to set the func_type field.  */
       if (fndecl != NULL_TREE)
-	ix86_set_func_type (fndecl);
+	{
+	  ix86_set_func_type (fndecl);
+	  ix86_set_indirect_branch_type (fndecl);
+	}
       return;
     }
 
@@ -5832,6 +5873,7 @@  ix86_set_current_function (tree fndecl)
     }
 
   ix86_set_func_type (fndecl);
+  ix86_set_indirect_branch_type (fndecl);
 
   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
   if (new_tree == NULL_TREE)
@@ -10639,6 +10681,181 @@  ix86_setup_frame_addresses (void)
 # endif
 #endif
 
+static int indirectlabelno;
+static bool indirect_thunk_needed = false;
+static bool indirect_thunk_bnd_needed = false;
+
+static int indirect_thunks_used;
+static int indirect_thunks_bnd_used;
+
+#ifndef INDIRECT_LABEL
+# define INDIRECT_LABEL "LIND"
+#endif
+
+/* Fills in the label name that should be used for the indirect thunk.  */
+
+static void
+indirect_thunk_name (char name[32], int regno, bool need_bnd_p)
+{
+  if (USE_HIDDEN_LINKONCE)
+    {
+      const char *bnd = need_bnd_p ? "_bnd" : "";
+      if (regno >= 0)
+	{
+	  const char *reg_prefix;
+	  if (LEGACY_INT_REGNO_P (regno))
+	    reg_prefix = TARGET_64BIT ? "r" : "e";
+	  else
+	    reg_prefix = "";
+	  sprintf (name, "__x86_indirect_thunk%s_%s%s",
+		   bnd, reg_prefix, reg_names[regno]);
+	}
+      else
+	sprintf (name, "__x86_indirect_thunk%s", bnd);
+    }
+  else
+    {
+      if (regno >= 0)
+	{
+	  if (need_bnd_p)
+	    ASM_GENERATE_INTERNAL_LABEL (name, "LITBR", regno);
+	  else
+	    ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
+	}
+      else
+	{
+	  if (need_bnd_p)
+	    ASM_GENERATE_INTERNAL_LABEL (name, "LITB", 0);
+	  else
+	    ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
+	}
+    }
+}
+
+static void
+output_indirect_thunk (bool need_bnd_p, int regno)
+{
+  char indirectlabel1[32];
+  char indirectlabel2[32];
+
+  ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
+			       indirectlabelno++);
+  ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
+			       indirectlabelno++);
+
+  /* Call */
+  if (need_bnd_p)
+    fputs ("\tbnd call\t", asm_out_file);
+  else
+    fputs ("\tcall\t", asm_out_file);
+  assemble_name_raw (asm_out_file, indirectlabel2);
+  fputc ('\n', asm_out_file);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
+
+  /* lfence .  */
+  fprintf (asm_out_file, "\tlfence\n");
+
+  /* Jump.  */
+  fputs ("\tjmp\t", asm_out_file);
+  assemble_name_raw (asm_out_file, indirectlabel1);
+  fputc ('\n', asm_out_file);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
+
+  if (regno >= 0)
+    {
+      /* MOV.  */
+      rtx xops[2];
+      xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
+      xops[1] = gen_rtx_REG (word_mode, regno);
+      output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
+    }
+  else
+    {
+      /* LEA.  */
+      rtx xops[2];
+      xops[0] = stack_pointer_rtx;
+      xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
+      output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
+    }
+
+  if (need_bnd_p)
+    fputs ("\tbnd ret\n", asm_out_file);
+  else
+    fputs ("\tret\n", asm_out_file);
+}
+
+static void
+output_indirect_thunk_function (bool need_bnd_p, int regno)
+{
+  char name[32];
+  tree decl;
+
+  /* Create __x86_indirect_thunk/__x86_indirect_thunk_bnd.  */
+  indirect_thunk_name (name, regno, need_bnd_p);
+  decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
+		     get_identifier (name),
+		     build_function_type_list (void_type_node, NULL_TREE));
+  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
+				   NULL_TREE, void_type_node);
+  TREE_PUBLIC (decl) = 1;
+  TREE_STATIC (decl) = 1;
+  DECL_IGNORED_P (decl) = 1;
+
+#if TARGET_MACHO
+  if (TARGET_MACHO)
+    {
+      switch_to_section (darwin_sections[picbase_thunk_section]);
+      fputs ("\t.weak_definition\t", asm_out_file);
+      assemble_name (asm_out_file, name);
+      fputs ("\n\t.private_extern\t", asm_out_file);
+      assemble_name (asm_out_file, name);
+      putc ('\n', asm_out_file);
+      ASM_OUTPUT_LABEL (asm_out_file, name);
+      DECL_WEAK (decl) = 1;
+    }
+  else
+#endif
+    if (USE_HIDDEN_LINKONCE)
+      {
+	cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
+
+	targetm.asm_out.unique_section (decl, 0);
+	switch_to_section (get_named_section (decl, NULL, 0));
+
+	targetm.asm_out.globalize_label (asm_out_file, name);
+	fputs ("\t.hidden\t", asm_out_file);
+	assemble_name (asm_out_file, name);
+	putc ('\n', asm_out_file);
+	ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
+      }
+    else
+      {
+	switch_to_section (text_section);
+	ASM_OUTPUT_LABEL (asm_out_file, name);
+      }
+
+  DECL_INITIAL (decl) = make_node (BLOCK);
+  current_function_decl = decl;
+  allocate_struct_function (decl, false);
+  init_function_start (decl);
+  /* We're about to hide the function body from callees of final_* by
+     emitting it directly; tell them we're a thunk, if they care.  */
+  cfun->is_thunk = true;
+  first_function_block_is_cold = false;
+  /* Make sure unwind info is emitted for the thunk if needed.  */
+  final_start_function (emit_barrier (), asm_out_file, 1);
+
+  output_indirect_thunk (need_bnd_p, regno);
+
+  final_end_function ();
+  init_insn_lengths ();
+  free_after_compilation (cfun);
+  set_cfun (NULL);
+  current_function_decl = NULL;
+}
+
 static int pic_labels_used;
 
 /* Fills in the label name that should be used for a pc thunk for
@@ -10665,11 +10882,32 @@  ix86_code_end (void)
   rtx xops[2];
   int regno;
 
+  if (indirect_thunk_needed)
+    output_indirect_thunk_function (false, -1);
+  if (indirect_thunk_bnd_needed)
+    output_indirect_thunk_function (true, -1);
+
+  for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
+    {
+      int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1;
+      if ((indirect_thunks_used & (1 << i)))
+	output_indirect_thunk_function (false, regno);
+
+      if ((indirect_thunks_bnd_used & (1 << i)))
+	output_indirect_thunk_function (true, regno);
+    }
+
   for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
     {
       char name[32];
       tree decl;
 
+      if ((indirect_thunks_used & (1 << regno)))
+	output_indirect_thunk_function (false, regno);
+
+      if ((indirect_thunks_bnd_used & (1 << regno)))
+	output_indirect_thunk_function (true, regno);
+
       if (!(pic_labels_used & (1 << regno)))
 	continue;
 
@@ -28119,12 +28357,182 @@  ix86_nopic_noplt_attribute_p (rtx call_op)
   return false;
 }
 
+static void
+ix86_output_indirect_branch (rtx call_op, const char *xasm,
+			     bool sibcall_p)
+{
+  char thunk_name_buf[32];
+  char *thunk_name;
+  char push_buf[64];
+  bool need_bnd_p = ix86_bnd_prefixed_insn_p (current_output_insn);
+  int regno;
+
+  if (REG_P (call_op))
+    regno = REGNO (call_op);
+  else
+    regno = -1;
+
+  if (cfun->machine->indirect_branch_type
+      != indirect_branch_thunk_inline)
+    {
+      if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
+	{
+	  if (regno >= 0)
+	    {
+	      int i = regno;
+	      if (i >= FIRST_REX_INT_REG)
+		i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1);
+	      if (need_bnd_p)
+		indirect_thunks_bnd_used |= 1 << i;
+	      else
+		indirect_thunks_used |= 1 << i;
+	    }
+	  else
+	    {
+	      if (need_bnd_p)
+		indirect_thunk_bnd_needed = true;
+	      else
+		indirect_thunk_needed = true;
+	    }
+	}
+      indirect_thunk_name (thunk_name_buf, regno, need_bnd_p);
+      thunk_name = thunk_name_buf;
+    }
+  else
+    thunk_name = NULL;
+
+  snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
+	    TARGET_64BIT ? 'q' : 'l', xasm);
+
+  if (sibcall_p)
+    {
+      if (regno < 0)
+	output_asm_insn (push_buf, &call_op);
+      if (thunk_name != NULL)
+	{
+	  if (need_bnd_p)
+	    fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
+	  else
+	    fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
+	}
+      else
+	output_indirect_thunk (need_bnd_p, regno);
+    }
+  else
+    {
+      if (regno >= 0 && thunk_name != NULL)
+	{
+	  if (need_bnd_p)
+	    fprintf (asm_out_file, "\tbnd call\t%s\n", thunk_name);
+	  else
+	    fprintf (asm_out_file, "\tcall\t%s\n", thunk_name);
+	  return;
+	}
+
+      char indirectlabel1[32];
+      char indirectlabel2[32];
+
+      ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
+				   INDIRECT_LABEL,
+				   indirectlabelno++);
+      ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
+				   INDIRECT_LABEL,
+				   indirectlabelno++);
+
+      /* Jump.  */
+      if (need_bnd_p)
+	fputs ("\tbnd jmp\t", asm_out_file);
+      else
+	fputs ("\tjmp\t", asm_out_file);
+      assemble_name_raw (asm_out_file, indirectlabel2);
+      fputc ('\n', asm_out_file);
+
+      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
+
+      if (MEM_P (call_op))
+	{
+	  struct ix86_address parts;
+	  rtx addr = XEXP (call_op, 0);
+	  if (ix86_decompose_address (addr, &parts)
+	      && parts.base == stack_pointer_rtx)
+	    {
+	      /* Since call will adjust stack by -UNITS_PER_WORD,
+		 we must convert "disp(stack, index, scale)" to
+		 "disp+UNITS_PER_WORD(stack, index, scale)".  */
+	      if (parts.index)
+		{
+		  addr = gen_rtx_MULT (Pmode, parts.index,
+				       GEN_INT (parts.scale));
+		  addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+				       addr);
+		}
+	      else
+		addr = stack_pointer_rtx;
+
+	      rtx disp;
+	      if (parts.disp != NULL_RTX)
+		disp = plus_constant (Pmode, parts.disp,
+				      UNITS_PER_WORD);
+	      else
+		disp = GEN_INT (UNITS_PER_WORD);
+
+	      addr = gen_rtx_PLUS (Pmode, addr, disp);
+	      call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
+	    }
+	}
+
+      if (regno < 0)
+	output_asm_insn (push_buf, &call_op);
+
+      if (thunk_name != NULL)
+	{
+	  if (need_bnd_p)
+	    fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
+	  else
+	    fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
+	}
+      else
+	output_indirect_thunk (need_bnd_p, regno);
+
+      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
+
+      /* Call.  */
+      if (need_bnd_p)
+	fputs ("\tbnd call\t", asm_out_file);
+      else
+	fputs ("\tcall\t", asm_out_file);
+      assemble_name_raw (asm_out_file, indirectlabel1);
+      fputc ('\n', asm_out_file);
+    }
+}
+
+const char *
+ix86_output_indirect_jmp (rtx call_op, bool ret_p)
+{
+  if (cfun->machine->indirect_branch_type != indirect_branch_keep)
+    {
+      /* We can't have red-zone if this isn't a function return since
+	 "call" in the indirect thunk pushes the return address onto
+	 stack, destroying red-zone.  */
+      if (!ret_p && ix86_red_zone_size != 0)
+	gcc_unreachable ();
+
+      ix86_output_indirect_branch (call_op, "%0", true);
+      return "";
+    }
+  else
+    return "%!jmp\t%A0";
+}
+
 /* Output the assembly for a call instruction.  */
 
 const char *
 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
 {
   bool direct_p = constant_call_address_operand (call_op, VOIDmode);
+  bool output_indirect_p
+    = (!TARGET_SEH
+       && cfun->machine->indirect_branch_type != indirect_branch_keep);
   bool seh_nop_p = false;
   const char *xasm;
 
@@ -28134,10 +28542,21 @@  ix86_output_call_insn (rtx_insn *insn, rtx call_op)
 	{
 	  if (ix86_nopic_noplt_attribute_p (call_op))
 	    {
+	      direct_p = false;
 	      if (TARGET_64BIT)
-		xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
+		{
+		  if (output_indirect_p)
+		    xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
+		  else
+		    xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
+		}
 	      else
-		xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
+		{
+		  if (output_indirect_p)
+		    xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
+		  else
+		    xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
+		}
 	    }
 	  else
 	    xasm = "%!jmp\t%P0";
@@ -28147,9 +28566,17 @@  ix86_output_call_insn (rtx_insn *insn, rtx call_op)
       else if (TARGET_SEH)
 	xasm = "%!rex.W jmp\t%A0";
       else
-	xasm = "%!jmp\t%A0";
+	{
+	  if (output_indirect_p)
+	    xasm = "%0";
+	  else
+	    xasm = "%!jmp\t%A0";
+	}
 
-      output_asm_insn (xasm, &call_op);
+      if (output_indirect_p && !direct_p)
+	ix86_output_indirect_branch (call_op, xasm, true);
+      else
+	output_asm_insn (xasm, &call_op);
       return "";
     }
 
@@ -28187,18 +28614,37 @@  ix86_output_call_insn (rtx_insn *insn, rtx call_op)
     {
       if (ix86_nopic_noplt_attribute_p (call_op))
 	{
+	  direct_p = false;
 	  if (TARGET_64BIT)
-	    xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
+	    {
+	      if (output_indirect_p)
+		xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
+	      else
+		xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
+	    }
 	  else
-	    xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
+	    {
+	      if (output_indirect_p)
+		xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
+	      else
+		xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
+	    }
 	}
       else
 	xasm = "%!call\t%P0";
     }
   else
-    xasm = "%!call\t%A0";
+    {
+      if (output_indirect_p)
+	xasm = "%0";
+      else
+	xasm = "%!call\t%A0";
+    }
 
-  output_asm_insn (xasm, &call_op);
+  if (output_indirect_p && !direct_p)
+    ix86_output_indirect_branch (call_op, xasm, false);
+  else
+    output_asm_insn (xasm, &call_op);
 
   if (seh_nop_p)
     return "nop";
@@ -40356,7 +40802,7 @@  ix86_handle_struct_attribute (tree *node, tree name, tree, int,
 }
 
 static tree
-ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
+ix86_handle_fndecl_attribute (tree *node, tree name, tree args, int,
 			      bool *no_add_attrs)
 {
   if (TREE_CODE (*node) != FUNCTION_DECL)
@@ -40365,6 +40811,29 @@  ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
                name);
       *no_add_attrs = true;
     }
+
+  if (is_attribute_p ("indirect_branch", name))
+    {
+      tree cst = TREE_VALUE (args);
+      if (TREE_CODE (cst) != STRING_CST)
+	{
+	  warning (OPT_Wattributes,
+		   "%qE attribute requires a string constant argument",
+		   name);
+	  *no_add_attrs = true;
+	}
+      else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
+	       && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
+	       && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
+	       && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
+	{
+	  warning (OPT_Wattributes,
+		   "argument to %qE attribute is not "
+		   "(keep|thunk|thunk-inline|thunk-extern)", name);
+	  *no_add_attrs = true;
+	}
+    }
+
   return NULL_TREE;
 }
 
@@ -44777,6 +45246,8 @@  static const struct attribute_spec ix86_attribute_table[] =
     ix86_handle_no_caller_saved_registers_attribute, NULL },
   { "naked", 0, 0, true, false, false, false,
     ix86_handle_fndecl_attribute, NULL },
+  { "indirect_branch", 1, 1, true, false, false, false,
+    ix86_handle_fndecl_attribute, NULL },
 
   /* End element.  */
   { NULL, 0, 0, false, false, false, false, NULL, NULL }
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 93b7a2c5915..51a920298a4 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2568,6 +2568,13 @@  struct GTY(()) machine_function {
   /* Function type.  */
   ENUM_BITFIELD(function_type) func_type : 2;
 
+  /* How to generate indirec branch.  */
+  ENUM_BITFIELD(indirect_branch) indirect_branch_type : 3;
+
+  /* If true, the current function has local indirect jumps, like
+     "indirect_jump" or "tablejump".  */
+  BOOL_BITFIELD has_local_indirect_jump : 1;
+
   /* If true, the current function is a function specified with
      the "interrupt" or "no_caller_saved_registers" attribute.  */
   BOOL_BITFIELD no_caller_saved_registers : 1;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3f587806407..a7573c468ae 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -12313,12 +12313,13 @@ 
 {
   if (TARGET_X32)
     operands[0] = convert_memory_address (word_mode, operands[0]);
+  cfun->machine->has_local_indirect_jump = true;
 })
 
 (define_insn "*indirect_jump"
   [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))]
   ""
-  "%!jmp\t%A0"
+  "* return ix86_output_indirect_jmp (operands[0], false);"
   [(set_attr "type" "ibr")
    (set_attr "length_immediate" "0")
    (set_attr "maybe_prefix_bnd" "1")])
@@ -12362,13 +12363,14 @@ 
 
   if (TARGET_X32)
     operands[0] = convert_memory_address (word_mode, operands[0]);
+  cfun->machine->has_local_indirect_jump = true;
 })
 
 (define_insn "*tablejump_1"
   [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))
    (use (label_ref (match_operand 1)))]
   ""
-  "%!jmp\t%A0"
+  "* return ix86_output_indirect_jmp (operands[0], false);"
   [(set_attr "type" "ibr")
    (set_attr "length_immediate" "0")
    (set_attr "maybe_prefix_bnd" "1")])
@@ -13097,7 +13099,7 @@ 
   [(simple_return)
    (use (match_operand 0 "register_operand" "r"))]
   "reload_completed"
-  "%!jmp\t%A0"
+  "* return ix86_output_indirect_jmp (operands[0], true);"
   [(set_attr "type" "ibr")
    (set_attr "length_immediate" "0")
    (set_attr "maybe_prefix_bnd" "1")])
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 09aaa97c2fc..22c806206e4 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1021,3 +1021,23 @@  indirect jump.
 mforce-indirect-call
 Target Report Var(flag_force_indirect_call) Init(0)
 Make all function calls indirect.
+
+mindirect-branch=
+Target Report RejectNegative Joined Enum(indirect_branch) Var(ix86_indirect_branch) Init(indirect_branch_keep)
+Convert indirect call and jump.
+
+Enum
+Name(indirect_branch) Type(enum indirect_branch)
+Known indirect branch choices (for use with the -mindirect-branch= option):
+
+EnumValue
+Enum(indirect_branch) String(keep) Value(indirect_branch_keep)
+
+EnumValue
+Enum(indirect_branch) String(thunk) Value(indirect_branch_thunk)
+
+EnumValue
+Enum(indirect_branch) String(thunk-inline) Value(indirect_branch_thunk_inline)
+
+EnumValue
+Enum(indirect_branch) String(thunk-extern) Value(indirect_branch_thunk_extern)
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 5f0f4b86cb2..6e48d4108a2 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -5735,6 +5735,16 @@  Specify which floating-point unit to use.  You must specify the
 @code{target("fpmath=sse+387")} because the comma would separate
 different options.
 
+@item indirect_branch("@var{choice}")
+@cindex @code{indirect_branch} function attribute, x86
+On x86 targets, the @code{indirect_branch} attribute causes the compiler
+to convert indirect call and jump with @var{choice}.  @samp{keep}
+keeps indirect call and jump unmodified.  @samp{thunk} converts indirect
+call and jump to call and return thunk.  @samp{thunk-inline} converts
+indirect call and jump to inlined call and return thunk.
+@samp{thunk-extern} converts indirect call and jump to external call
+and return thunk provided in a separate object file.
+
 @item nocf_check
 @cindex @code{nocf_check} function attribute
 The @code{nocf_check} attribute on a function is used to inform the
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c6025382dbb..46461d1ada3 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1226,7 +1226,8 @@  See RS/6000 and PowerPC Options.
 -mstack-protector-guard-reg=@var{reg} @gol
 -mstack-protector-guard-offset=@var{offset} @gol
 -mstack-protector-guard-symbol=@var{symbol} -mmitigate-rop @gol
--mgeneral-regs-only  -mcall-ms2sysv-xlogues}
+-mgeneral-regs-only -mcall-ms2sysv-xlogues @gol
+-mindirect-branch=@var{choice}}
 
 @emph{x86 Windows Options}
 @gccoptlist{-mconsole  -mcygwin  -mno-cygwin  -mdll @gol
@@ -26764,6 +26765,17 @@  Generate code that uses only the general-purpose registers.  This
 prevents the compiler from using floating-point, vector, mask and bound
 registers.
 
+@item -mindirect-branch=@var{choice}
+@opindex -mindirect-branch
+Convert indirect call and jump with @var{choice}.  The default is
+@samp{keep}, which keeps indirect call and jump unmodified.
+@samp{thunk} converts indirect call and jump to call and return thunk.
+@samp{thunk-inline} converts indirect call and jump to inlined call
+and return thunk.  @samp{thunk-extern} converts indirect call and jump
+to external call and return thunk provided in a separate object file.
+You can control this behavior for a specific function by using the
+function attribute @code{indirect_branch}.  @xref{Function Attributes}.
+
 @end table
 
 These @samp{-m} switches are supported in addition to the above
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c
new file mode 100644
index 00000000000..08827448325
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-1.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch;
+
+void
+male_indirect_jump (long offset)
+{
+  dispatch(offset);
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler {\tlfence} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c
new file mode 100644
index 00000000000..1344b6bc0e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-2.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch[256];
+
+void
+male_indirect_jump (long offset)
+{
+  dispatch[offset](offset);
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler {\tlfence} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c
new file mode 100644
index 00000000000..dcc9ef75df6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-3.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch;
+
+int
+male_indirect_jump (long offset)
+{
+  dispatch(offset);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler {\tlfence} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c
new file mode 100644
index 00000000000..2502860b6e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-4.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch[256];
+
+int
+male_indirect_jump (long offset)
+{
+  dispatch[offset](offset);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler {\tlfence} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-5.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-5.c
new file mode 100644
index 00000000000..58c81d16316
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-5.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fno-plt -mindirect-branch=thunk" } */
+
+extern void bar (void);
+
+void
+foo (void)
+{
+  bar ();
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*bar@GOT" } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler {\tlfence} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-6.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-6.c
new file mode 100644
index 00000000000..b4c528a5b30
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-6.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fno-plt -mindirect-branch=thunk" } */
+
+extern void bar (void);
+
+int
+foo (void)
+{
+  bar ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*bar@GOT" } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" } } */
+/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */
+/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */
+/* { dg-final { scan-assembler {\tlfence} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c
new file mode 100644
index 00000000000..4553ef0b622
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c
@@ -0,0 +1,43 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */
+
+void func0 (void);
+void func1 (void);
+void func2 (void);
+void func3 (void);
+void func4 (void);
+void func4 (void);
+void func5 (void);
+
+void
+bar (int i)
+{
+  switch (i)
+    {
+    default:
+      func0 ();
+      break;
+    case 1:
+      func1 ();
+      break;
+    case 2:
+      func2 ();
+      break;
+    case 3:
+      func3 ();
+      break;
+    case 4:
+      func4 ();
+      break;
+    case 5:
+      func5 ();
+      break;
+    }
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler {\tlfence} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c
new file mode 100644
index 00000000000..b8e9851c76f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-1.c
@@ -0,0 +1,22 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch;
+
+extern void male_indirect_jump (long)
+  __attribute__ ((indirect_branch("thunk")));
+
+void
+male_indirect_jump (long offset)
+{
+  dispatch(offset);
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler {\tlfence} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c
new file mode 100644
index 00000000000..1d6d18c2aba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-2.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch[256];
+
+__attribute__ ((indirect_branch("thunk")))
+void
+male_indirect_jump (long offset)
+{
+  dispatch[offset](offset);
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler {\tlfence} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c
new file mode 100644
index 00000000000..af167840b81
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-3.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch;
+extern int male_indirect_jump (long)
+  __attribute__ ((indirect_branch("thunk-inline")));
+
+int
+male_indirect_jump (long offset)
+{
+  dispatch(offset);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */
+/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */
+/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */
+/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c
new file mode 100644
index 00000000000..146124894a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-4.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch[256];
+
+__attribute__ ((indirect_branch("thunk-inline")))
+int
+male_indirect_jump (long offset)
+{
+  dispatch[offset](offset);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */
+/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */
+/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */
+/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c
new file mode 100644
index 00000000000..0833606046b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-5.c
@@ -0,0 +1,22 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch;
+extern int male_indirect_jump (long)
+  __attribute__ ((indirect_branch("thunk-extern")));
+
+int
+male_indirect_jump (long offset)
+{
+  dispatch(offset);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */
+/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */
+/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c
new file mode 100644
index 00000000000..2eba0fbd9b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-6.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch[256];
+
+__attribute__ ((indirect_branch("thunk-extern")))
+int
+male_indirect_jump (long offset)
+{
+  dispatch[offset](offset);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */
+/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */
+/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c
new file mode 100644
index 00000000000..f58427eae11
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-7.c
@@ -0,0 +1,44 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-pic" } */
+
+void func0 (void);
+void func1 (void);
+void func2 (void);
+void func3 (void);
+void func4 (void);
+void func4 (void);
+void func5 (void);
+
+__attribute__ ((indirect_branch("thunk-extern")))
+void
+bar (int i)
+{
+  switch (i)
+    {
+    default:
+      func0 ();
+      break;
+    case 1:
+      func1 ();
+      break;
+    case 2:
+      func2 ();
+      break;
+    case 3:
+      func3 ();
+      break;
+    case 4:
+      func4 ();
+      break;
+    case 5:
+      func5 ();
+      break;
+    }
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" } } */
+/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-8.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-8.c
new file mode 100644
index 00000000000..6960fa0bbfb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-attr-8.c
@@ -0,0 +1,41 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mindirect-branch=thunk -fno-pic" } */
+
+void func0 (void);
+void func1 (void);
+void func2 (void);
+void func3 (void);
+void func4 (void);
+void func4 (void);
+void func5 (void);
+
+__attribute__ ((indirect_branch("keep")))
+void
+bar (int i)
+{
+  switch (i)
+    {
+    default:
+      func0 ();
+      break;
+    case 1:
+      func1 ();
+      break;
+    case 2:
+      func2 ();
+      break;
+    case 3:
+      func3 ();
+      break;
+    case 4:
+      func4 ();
+      break;
+    case 5:
+      func5 ();
+      break;
+    }
+}
+
+/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-1.c
new file mode 100644
index 00000000000..21b25ec5bbf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-1.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile { target { ! x32 } } } */
+/* { dg-options "-O2 -mindirect-branch=thunk -fcheck-pointer-bounds -mmpx -fno-pic" } */
+
+void (*dispatch) (char *);
+char buf[10];
+
+void
+foo (void)
+{
+  dispatch (buf);
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "pushq\[ \t\]%rax" { target x32 } } } */
+/* { dg-final { scan-assembler "bnd jmp\[ \t\]*__x86_indirect_thunk_bnd" } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "bnd call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "bnd ret" } } */
+/* { dg-final { scan-assembler {\tlfence} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-2.c
new file mode 100644
index 00000000000..7bf7e6a1095
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-2.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile { target { ! x32 } } } */
+/* { dg-options "-O2 -mindirect-branch=thunk -fcheck-pointer-bounds -mmpx -fno-pic" } */
+
+void (*dispatch) (char *);
+char buf[10];
+
+int
+foo (void)
+{
+  dispatch (buf);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "pushq\[ \t\]%rax" { target x32 } } } */
+/* { dg-final { scan-assembler "bnd jmp\[ \t\]*__x86_indirect_thunk_bnd" } } */
+/* { dg-final { scan-assembler "bnd jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "bnd call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "bnd ret" } } */
+/* { dg-final { scan-assembler {\tlfence} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-3.c
new file mode 100644
index 00000000000..14c60f232db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-3.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile { target { *-*-linux* && { ! x32 } } } } */
+/* { dg-options "-O2 -mindirect-branch=thunk -fcheck-pointer-bounds -mmpx -fpic -fno-plt" } */
+
+void bar (char *);
+char buf[10];
+
+void
+foo (void)
+{
+  bar (buf);
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*bar@GOT" } } */
+/* { dg-final { scan-assembler "bnd jmp\[ \t\]*__x86_indirect_thunk_bnd" } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "bnd call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "bnd ret" } } */
+/* { dg-final { scan-assembler {\tlfence} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-4.c
new file mode 100644
index 00000000000..4fd6f360801
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-bnd-4.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile { target { *-*-linux* && { ! x32 } } } } */
+/* { dg-options "-O2 -mindirect-branch=thunk -fcheck-pointer-bounds -mmpx -fpic -fno-plt" } */
+
+void bar (char *);
+char buf[10];
+
+int
+foo (void)
+{
+  bar (buf);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*bar@GOT" } } */
+/* { dg-final { scan-assembler "bnd jmp\[ \t\]*__x86_indirect_thunk" } } */
+/* { dg-final { scan-assembler "bnd jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-times "bnd call\[ \t\]*\.LIND" 2 } } */
+/* { dg-final { scan-assembler "bnd ret" } } */
+/* { dg-final { scan-assembler {\tlfence} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c
new file mode 100644
index 00000000000..49f27b49465
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-1.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch;
+
+void
+male_indirect_jump (long offset)
+{
+  dispatch(offset);
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */
+/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c
new file mode 100644
index 00000000000..a1e3eb6fc74
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-2.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch[256];
+
+void
+male_indirect_jump (long offset)
+{
+  dispatch[offset](offset);
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */
+/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c
new file mode 100644
index 00000000000..395634e7e5c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-3.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch;
+
+int
+male_indirect_jump (long offset)
+{
+  dispatch(offset);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */
+/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */
+/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c
new file mode 100644
index 00000000000..fd3f63379a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-4.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch[256];
+
+int
+male_indirect_jump (long offset)
+{
+  dispatch[offset](offset);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */
+/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 1 { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "call\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */
+/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-5.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-5.c
new file mode 100644
index 00000000000..ba2f92b6f34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-5.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fno-plt -mindirect-branch=thunk-extern" } */
+
+extern void bar (void);
+
+void
+foo (void)
+{
+  bar ();
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*bar@GOT" } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" } } */
+/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-6.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-6.c
new file mode 100644
index 00000000000..0c5a2d472c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-6.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fno-plt -mindirect-branch=thunk-extern" } */
+
+extern void bar (void);
+
+int
+foo (void)
+{
+  bar ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*bar@GOT" } } */
+/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 1 } } */
+/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 1 } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" } } */
+/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c
new file mode 100644
index 00000000000..665252327aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c
@@ -0,0 +1,43 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mindirect-branch=thunk-extern -fno-pic" } */
+
+void func0 (void);
+void func1 (void);
+void func2 (void);
+void func3 (void);
+void func4 (void);
+void func4 (void);
+void func5 (void);
+
+void
+bar (int i)
+{
+  switch (i)
+    {
+    default:
+      func0 ();
+      break;
+    case 1:
+      func1 ();
+      break;
+    case 2:
+      func2 ();
+      break;
+    case 3:
+      func3 ();
+      break;
+    case 4:
+      func4 ();
+      break;
+    case 5:
+      func5 ();
+      break;
+    }
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*__x86_indirect_thunk_(r|e)ax" { target x32 } } } */
+/* { dg-final { scan-assembler-not {\t(lfence|pause)} } } */
+/* { dg-final { scan-assembler-not "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*\.LIND" } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c
new file mode 100644
index 00000000000..3ace8d1b031
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-1.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch;
+
+void
+male_indirect_jump (long offset)
+{
+  dispatch(offset);
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */
+/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c
new file mode 100644
index 00000000000..6c97b96f1f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-2.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch[256];
+
+void
+male_indirect_jump (long offset)
+{
+  dispatch[offset](offset);
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */
+/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c
new file mode 100644
index 00000000000..8f6759cbf06
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-3.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch;
+
+int
+male_indirect_jump (long offset)
+{
+  dispatch(offset);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */
+/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */
+/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */
+/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c
new file mode 100644
index 00000000000..b07d08cab0f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-4.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */
+
+typedef void (*dispatch_t)(long offset);
+
+dispatch_t dispatch[256];
+
+int
+male_indirect_jump (long offset)
+{
+  dispatch[offset](offset);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*_?dispatch" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */
+/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */
+/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */
+/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-5.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-5.c
new file mode 100644
index 00000000000..10794886b1b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-5.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fno-plt -mindirect-branch=thunk-inline" } */
+
+extern void bar (void);
+
+void
+foo (void)
+{
+  bar ();
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*bar@GOT" } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-6.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-6.c
new file mode 100644
index 00000000000..a26ec4b06ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-6.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fno-plt -mindirect-branch=thunk-inline" } */
+
+extern void bar (void);
+
+int
+foo (void)
+{
+  bar ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*bar@GOT" } } */
+/* { dg-final { scan-assembler-times "jmp\[ \t\]*\.LIND" 2 } } */
+/* { dg-final { scan-assembler-times "call\[ \t\]*\.LIND" 2 } } */
+/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c
new file mode 100644
index 00000000000..77253af17c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c
@@ -0,0 +1,42 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mindirect-branch=thunk-inline -fno-pic" } */
+
+void func0 (void);
+void func1 (void);
+void func2 (void);
+void func3 (void);
+void func4 (void);
+void func4 (void);
+void func5 (void);
+
+void
+bar (int i)
+{
+  switch (i)
+    {
+    default:
+      func0 ();
+      break;
+    case 1:
+      func1 ();
+      break;
+    case 2:
+      func2 ();
+      break;
+    case 3:
+      func3 ();
+      break;
+    case 4:
+      func4 ();
+      break;
+    case 5:
+      func5 ();
+      break;
+    }
+}
+
+/* { dg-final { scan-assembler "push(?:l|q)\[ \t\]*\.L\[0-9\]+\\(,%" { target { ! x32 } } } } */
+/* { dg-final { scan-assembler-not "pushq\[ \t\]%rax" { target x32 } } } */
+/* { dg-final { scan-assembler "jmp\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler "call\[ \t\]*\.LIND" } } */
+/* { dg-final { scan-assembler-not "__x86_indirect_thunk" } } */