diff mbox series

[bpf-next] bpf: introduce BPF_JIT_ALWAYS_ON config

Message ID 20180108033519.3232547-1-ast@kernel.org
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series [bpf-next] bpf: introduce BPF_JIT_ALWAYS_ON config | expand

Commit Message

Alexei Starovoitov Jan. 8, 2018, 3:35 a.m. UTC
The BPF interpreter has been used as part of the spectre 2 attack CVE-2017-5715.

A quote from goolge project zero blog:
"At this point, it would normally be necessary to locate gadgets in
the host kernel code that can be used to actually leak data by reading
from an attacker-controlled location, shifting and masking the result
appropriately and then using the result of that as offset to an
attacker-controlled address for a load. But piecing gadgets together
and figuring out which ones work in a speculation context seems annoying.
So instead, we decided to use the eBPF interpreter, which is built into
the host kernel - while there is no legitimate way to invoke it from inside
a VM, the presence of the code in the host kernel's text section is sufficient
to make it usable for the attack, just like with ordinary ROP gadgets."

To make attacker job harder introduce BPF_JIT_ALWAYS_ON config
option that removes interpreter from the kernel in favor of JIT-only mode.
So far eBPF JIT is supported by:
x64, arm64, arm32, sparc64, s390, powerpc64, mips64

The start of JITed program is randomized and code page is marked as read-only.
In addition "constant blinding" can be turned on with net.core.bpf_jit_harden

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 init/Kconfig               | 7 +++++++
 kernel/bpf/core.c          | 9 +++++++++
 kernel/bpf/verifier.c      | 4 ++++
 net/core/sysctl_net_core.c | 9 +++++++++
 4 files changed, 29 insertions(+)

Comments

Daniel Borkmann Jan. 8, 2018, 9:59 p.m. UTC | #1
On 01/08/2018 04:35 AM, Alexei Starovoitov wrote:
> The BPF interpreter has been used as part of the spectre 2 attack CVE-2017-5715.
> 
> A quote from goolge project zero blog:
> "At this point, it would normally be necessary to locate gadgets in
> the host kernel code that can be used to actually leak data by reading
> from an attacker-controlled location, shifting and masking the result
> appropriately and then using the result of that as offset to an
> attacker-controlled address for a load. But piecing gadgets together
> and figuring out which ones work in a speculation context seems annoying.
> So instead, we decided to use the eBPF interpreter, which is built into
> the host kernel - while there is no legitimate way to invoke it from inside
> a VM, the presence of the code in the host kernel's text section is sufficient
> to make it usable for the attack, just like with ordinary ROP gadgets."
> 
> To make attacker job harder introduce BPF_JIT_ALWAYS_ON config
> option that removes interpreter from the kernel in favor of JIT-only mode.
> So far eBPF JIT is supported by:
> x64, arm64, arm32, sparc64, s390, powerpc64, mips64
> 
> The start of JITed program is randomized and code page is marked as read-only.
> In addition "constant blinding" can be turned on with net.core.bpf_jit_harden
> 
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---
>  init/Kconfig               | 7 +++++++
>  kernel/bpf/core.c          | 9 +++++++++
>  kernel/bpf/verifier.c      | 4 ++++
>  net/core/sysctl_net_core.c | 9 +++++++++
>  4 files changed, 29 insertions(+)
> 
> diff --git a/init/Kconfig b/init/Kconfig
> index 2934249fba46..5e2a4a391ba9 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1392,6 +1392,13 @@ config BPF_SYSCALL
>  	  Enable the bpf() system call that allows to manipulate eBPF
>  	  programs and maps via file descriptors.
>  
> +config BPF_JIT_ALWAYS_ON
> +	bool "Permanently enable BPF JIT and remove BPF interpreter"
> +	depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
> +	help
> +	  Enables BPF JIT and removes BPF interpreter to avoid
> +	  speculative execution of BPF instructions by the interpreter
> +
>  config USERFAULTFD
>  	bool "Enable userfaultfd() system call"
>  	select ANON_INODES
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 70a534549cd3..42756c434e0b 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -781,6 +781,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
>  }
>  EXPORT_SYMBOL_GPL(__bpf_call_base);
>  
> +#ifndef CONFIG_BPF_JIT_ALWAYS_ON
>  /**
>   *	__bpf_prog_run - run eBPF program on a given context
>   *	@ctx: is the data we are operating on
> @@ -1376,6 +1377,7 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth)
>  		__bpf_call_base_args;
>  	insn->code = BPF_JMP | BPF_CALL_ARGS;
>  }
> +#endif
>  
>  bool bpf_prog_array_compatible(struct bpf_array *array,
>  			       const struct bpf_prog *fp)
> @@ -1427,9 +1429,11 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
>   */
>  struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
>  {
> +#ifndef CONFIG_BPF_JIT_ALWAYS_ON
>  	u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
>  
>  	fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
> +#endif
>  
>  	/* eBPF JITs can rewrite the program in case constant
>  	 * blinding is active. However, in case of error during
> @@ -1453,6 +1457,11 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
>  	 */
>  	*err = bpf_check_tail_call(fp);
>  
> +#ifdef CONFIG_BPF_JIT_ALWAYS_ON
> +	if (!fp->jited)
> +		*err = -ENOTSUPP;
> +#endif

This part here and ...

>  	return fp;
>  }
>  EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
[...]
> @@ -524,6 +530,9 @@ static __net_initdata struct pernet_operations sysctl_core_ops = {
>  
>  static __init int sysctl_core_init(void)
>  {
> +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_JIT_ALWAYS_ON)
> +	bpf_jit_enable = 1;
> +#endif

... this one will race and break stuff in the current shape, one example
is the PTP classifier in the tree: sysctl_core_init() is done in fs_initcall(),
whereas ptp_classifier_init() is done in sock_init() which is done out of
core_initcall().

So what will happen is that at this point in time bpf_jit_enable is not yet
set to 1, so when ptp_classifier_init() calls the cBPF bpf_prog_create(), it
will migrate the insns over to eBPF and in bpf_prog_select_runtime() called
from bpf_migrate_filter() have the assumption that we always succeed here
since when JIT fails, we will fall back to the interpreter anyway. The only
error up until now in bpf_prog_select_runtime() that could happen is out of
native eBPF prog load, so bpf_migrate_filter() will thus return just fine
and on first call to PTP classifier from a network packet, we'll get NULL
pointer deref since the fp->bpf_func is still NULL. So this would rather
need to be set much earlier on init or e.g. in the JITs themselves.

Other than that I was wondering whether the arm32 eBPF JIT could cause
trouble for cBPF as well, but it looks not the case since only alu64 div/mod
and xadd is not implemented there yet, so that should be ok since not used
in the migration.

>  	register_net_sysctl(&init_net, "net/core", net_core_table);
>  	return register_pernet_subsys(&sysctl_core_ops);
>  }
>
Jakub Kicinski Jan. 9, 2018, 12:02 a.m. UTC | #2
On Mon, 8 Jan 2018 22:59:04 +0100, Daniel Borkmann wrote:
> > @@ -1453,6 +1457,11 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
> >  	 */
> >  	*err = bpf_check_tail_call(fp);
> >  
> > +#ifdef CONFIG_BPF_JIT_ALWAYS_ON
> > +	if (!fp->jited)
> > +		*err = -ENOTSUPP;
> > +#endif  

I think programs JITed for offload won't have fp->jited set, but
those are pretty safe from CPU bugs.  Should we set fp->jited = 1; in
bpf_prog_offload_compile()?  Just throwing "&& !bpf_prog_is_dev_bound()"
in here seems cleaner to me.

FWIW if you have netdevsim compiled and recent iproute2, this will
work to check:

# ip link add type netdevsim
# ip link set netdevsim0 xdpoffload obj ~/xdp/pass.o
Alexei Starovoitov Jan. 9, 2018, 2:23 a.m. UTC | #3
On 1/8/18 4:02 PM, Jakub Kicinski wrote:
> On Mon, 8 Jan 2018 22:59:04 +0100, Daniel Borkmann wrote:
>>> @@ -1453,6 +1457,11 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
>>>  	 */
>>>  	*err = bpf_check_tail_call(fp);
>>>
>>> +#ifdef CONFIG_BPF_JIT_ALWAYS_ON
>>> +	if (!fp->jited)
>>> +		*err = -ENOTSUPP;
>>> +#endif
>
> I think programs JITed for offload won't have fp->jited set, but
> those are pretty safe from CPU bugs.  Should we set fp->jited = 1; in
> bpf_prog_offload_compile()?  Just throwing "&& !bpf_prog_is_dev_bound()"
> in here seems cleaner to me.

good catch. will fix in the v2.
diff mbox series

Patch

diff --git a/init/Kconfig b/init/Kconfig
index 2934249fba46..5e2a4a391ba9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1392,6 +1392,13 @@  config BPF_SYSCALL
 	  Enable the bpf() system call that allows to manipulate eBPF
 	  programs and maps via file descriptors.
 
+config BPF_JIT_ALWAYS_ON
+	bool "Permanently enable BPF JIT and remove BPF interpreter"
+	depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
+	help
+	  Enables BPF JIT and removes BPF interpreter to avoid
+	  speculative execution of BPF instructions by the interpreter
+
 config USERFAULTFD
 	bool "Enable userfaultfd() system call"
 	select ANON_INODES
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 70a534549cd3..42756c434e0b 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -781,6 +781,7 @@  noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 }
 EXPORT_SYMBOL_GPL(__bpf_call_base);
 
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 /**
  *	__bpf_prog_run - run eBPF program on a given context
  *	@ctx: is the data we are operating on
@@ -1376,6 +1377,7 @@  void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth)
 		__bpf_call_base_args;
 	insn->code = BPF_JMP | BPF_CALL_ARGS;
 }
+#endif
 
 bool bpf_prog_array_compatible(struct bpf_array *array,
 			       const struct bpf_prog *fp)
@@ -1427,9 +1429,11 @@  static int bpf_check_tail_call(const struct bpf_prog *fp)
  */
 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 {
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 	u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
 
 	fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
+#endif
 
 	/* eBPF JITs can rewrite the program in case constant
 	 * blinding is active. However, in case of error during
@@ -1453,6 +1457,11 @@  struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 	 */
 	*err = bpf_check_tail_call(fp);
 
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+	if (!fp->jited)
+		*err = -ENOTSUPP;
+#endif
+
 	return fp;
 }
 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a2b211262c25..ca80559c4ec3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5267,7 +5267,11 @@  static int fixup_call_args(struct bpf_verifier_env *env)
 		depth = get_callee_stack_depth(env, insn, i);
 		if (depth < 0)
 			return depth;
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+		return -ENOTSUPP;
+#else
 		bpf_patch_call_args(insn, depth);
+#endif
 	}
 	return 0;
 }
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cbc3dde4cfcc..1c8af0f4f385 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -325,7 +325,13 @@  static struct ctl_table net_core_table[] = {
 		.data		= &bpf_jit_enable,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 		.proc_handler	= proc_dointvec
+#else
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+		.extra2		= &one,
+#endif
 	},
 # ifdef CONFIG_HAVE_EBPF_JIT
 	{
@@ -524,6 +530,9 @@  static __net_initdata struct pernet_operations sysctl_core_ops = {
 
 static __init int sysctl_core_init(void)
 {
+#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_JIT_ALWAYS_ON)
+	bpf_jit_enable = 1;
+#endif
 	register_net_sysctl(&init_net, "net/core", net_core_table);
 	return register_pernet_subsys(&sysctl_core_ops);
 }