diff mbox series

[V34,23/29] bpf: Restrict bpf when kernel lockdown is in confidentiality mode

Message ID 20190622000358.19895-24-matthewgarrett@google.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series None | expand

Commit Message

Matthew Garrett June 22, 2019, 12:03 a.m. UTC
From: David Howells <dhowells@redhat.com>

There are some bpf functions can be used to read kernel memory:
bpf_probe_read, bpf_probe_write_user and bpf_trace_printk.  These allow
private keys in kernel memory (e.g. the hibernation image signing key) to
be read by an eBPF program and kernel memory to be altered without
restriction. Disable them if the kernel has been locked down in
confidentiality mode.

Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
cc: netdev@vger.kernel.org
cc: Chun-Yi Lee <jlee@suse.com>
cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/security.h     |  1 +
 kernel/trace/bpf_trace.c     | 20 +++++++++++++++++++-
 security/lockdown/lockdown.c |  1 +
 3 files changed, 21 insertions(+), 1 deletion(-)

Comments

Kees Cook June 23, 2019, 12:09 a.m. UTC | #1
On Fri, Jun 21, 2019 at 05:03:52PM -0700, Matthew Garrett wrote:
> From: David Howells <dhowells@redhat.com>
> 
> There are some bpf functions can be used to read kernel memory:
> bpf_probe_read, bpf_probe_write_user and bpf_trace_printk.  These allow
> private keys in kernel memory (e.g. the hibernation image signing key) to
> be read by an eBPF program and kernel memory to be altered without
> restriction. Disable them if the kernel has been locked down in
> confidentiality mode.
> 
> Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> Signed-off-by: David Howells <dhowells@redhat.com>

Reviewed-by: Kees Cook <keescook@chromium.org>

-Kees

> Signed-off-by: Matthew Garrett <mjg59@google.com>
> cc: netdev@vger.kernel.org
> cc: Chun-Yi Lee <jlee@suse.com>
> cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> ---
>  include/linux/security.h     |  1 +
>  kernel/trace/bpf_trace.c     | 20 +++++++++++++++++++-
>  security/lockdown/lockdown.c |  1 +
>  3 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/security.h b/include/linux/security.h
> index e6e3e2403474..de0d37b1fe79 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -97,6 +97,7 @@ enum lockdown_reason {
>  	LOCKDOWN_INTEGRITY_MAX,
>  	LOCKDOWN_KCORE,
>  	LOCKDOWN_KPROBES,
> +	LOCKDOWN_BPF_READ,
>  	LOCKDOWN_CONFIDENTIALITY_MAX,
>  };
>  
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index d64c00afceb5..638f9b00a8df 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -137,6 +137,10 @@ BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
>  {
>  	int ret;
>  
> +	ret = security_locked_down(LOCKDOWN_BPF_READ);
> +	if (ret)
> +		return ret;
> +
>  	ret = probe_kernel_read(dst, unsafe_ptr, size);
>  	if (unlikely(ret < 0))
>  		memset(dst, 0, size);
> @@ -156,6 +160,12 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
>  BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
>  	   u32, size)
>  {
> +	int ret;
> +
> +	ret = security_locked_down(LOCKDOWN_BPF_READ);
> +	if (ret)
> +		return ret;
> +
>  	/*
>  	 * Ensure we're in user context which is safe for the helper to
>  	 * run. This helper has no business in a kthread.
> @@ -205,7 +215,11 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
>  	int fmt_cnt = 0;
>  	u64 unsafe_addr;
>  	char buf[64];
> -	int i;
> +	int i, ret;
> +
> +	ret = security_locked_down(LOCKDOWN_BPF_READ);
> +	if (ret)
> +		return ret;
>  
>  	/*
>  	 * bpf_check()->check_func_arg()->check_stack_boundary()
> @@ -534,6 +548,10 @@ BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
>  {
>  	int ret;
>  
> +	ret = security_locked_down(LOCKDOWN_BPF_READ);
> +	if (ret)
> +		return ret;
> +
>  	/*
>  	 * The strncpy_from_unsafe() call will likely not fill the entire
>  	 * buffer, but that's okay in this circumstance as we're probing
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index 5a08c17f224d..2eea2cc13117 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -33,6 +33,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
>  	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
>  	[LOCKDOWN_KCORE] = "/proc/kcore access",
>  	[LOCKDOWN_KPROBES] = "use of kprobes",
> +	[LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
>  	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
>  };
>  
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
>
Daniel Borkmann June 24, 2019, 3:15 p.m. UTC | #2
On 06/22/2019 02:03 AM, Matthew Garrett wrote:
> From: David Howells <dhowells@redhat.com>
> 
> There are some bpf functions can be used to read kernel memory:

Nit: that

> bpf_probe_read, bpf_probe_write_user and bpf_trace_printk.  These allow

Please explain how bpf_probe_write_user reads kernel memory ... ?!

> private keys in kernel memory (e.g. the hibernation image signing key) to
> be read by an eBPF program and kernel memory to be altered without

... and while we're at it, also how they allow "kernel memory to be
altered without restriction". I've been pointing this false statement
out long ago.

> restriction. Disable them if the kernel has been locked down in
> confidentiality mode.
> 
> Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> Signed-off-by: David Howells <dhowells@redhat.com>
> Signed-off-by: Matthew Garrett <mjg59@google.com>
> cc: netdev@vger.kernel.org
> cc: Chun-Yi Lee <jlee@suse.com>
> cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> Cc: Daniel Borkmann <daniel@iogearbox.net>

Nacked-by: Daniel Borkmann <daniel@iogearbox.net>

[...]
>  
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index d64c00afceb5..638f9b00a8df 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -137,6 +137,10 @@ BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
>  {
>  	int ret;
>  
> +	ret = security_locked_down(LOCKDOWN_BPF_READ);
> +	if (ret)
> +		return ret;

This whole thing is still buggy as has been pointed out before by
Jann. For helpers like above and few others below, error conditions
must clear the buffer ...

>  	ret = probe_kernel_read(dst, unsafe_ptr, size);
>  	if (unlikely(ret < 0))
>  		memset(dst, 0, size);
> @@ -156,6 +160,12 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
>  BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
>  	   u32, size)
>  {
> +	int ret;
> +
> +	ret = security_locked_down(LOCKDOWN_BPF_READ);
> +	if (ret)
> +		return ret;
> +
>  	/*
>  	 * Ensure we're in user context which is safe for the helper to
>  	 * run. This helper has no business in a kthread.
> @@ -205,7 +215,11 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
>  	int fmt_cnt = 0;
>  	u64 unsafe_addr;
>  	char buf[64];
> -	int i;
> +	int i, ret;
> +
> +	ret = security_locked_down(LOCKDOWN_BPF_READ);
> +	if (ret)
> +		return ret;
>  
>  	/*
>  	 * bpf_check()->check_func_arg()->check_stack_boundary()
> @@ -534,6 +548,10 @@ BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
>  {
>  	int ret;
>  
> +	ret = security_locked_down(LOCKDOWN_BPF_READ);
> +	if (ret)
> +		return ret;
> +
>  	/*
>  	 * The strncpy_from_unsafe() call will likely not fill the entire
>  	 * buffer, but that's okay in this circumstance as we're probing
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index 5a08c17f224d..2eea2cc13117 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -33,6 +33,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
>  	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
>  	[LOCKDOWN_KCORE] = "/proc/kcore access",
>  	[LOCKDOWN_KPROBES] = "use of kprobes",
> +	[LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
>  	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
>  };
>  
>
Matthew Garrett June 24, 2019, 7:54 p.m. UTC | #3
On Mon, Jun 24, 2019 at 8:37 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 06/22/2019 02:03 AM, Matthew Garrett wrote:
> > From: David Howells <dhowells@redhat.com>
> >
> > There are some bpf functions can be used to read kernel memory:
>
> Nit: that

Fixed.

> > bpf_probe_read, bpf_probe_write_user and bpf_trace_printk.  These allow
>
> Please explain how bpf_probe_write_user reads kernel memory ... ?!

Ha.

> > private keys in kernel memory (e.g. the hibernation image signing key) to
> > be read by an eBPF program and kernel memory to be altered without
>
> ... and while we're at it, also how they allow "kernel memory to be
> altered without restriction". I've been pointing this false statement
> out long ago.

Yup. How's the following description:

    bpf: Restrict bpf when kernel lockdown is in confidentiality mode

    There are some bpf functions that can be used to read kernel memory and
    exfiltrate it to userland: bpf_probe_read, bpf_probe_write_user and
    bpf_trace_printk.  These could be abused to (eg) allow private
keys in kernel
    memory to be leaked. Disable them if the kernel has been locked
down in confidentiality
    mode.

> This whole thing is still buggy as has been pointed out before by
> Jann. For helpers like above and few others below, error conditions
> must clear the buffer ...

Sorry, yes. My fault.
Andy Lutomirski June 24, 2019, 8:08 p.m. UTC | #4
On Mon, Jun 24, 2019 at 12:54 PM Matthew Garrett <mjg59@google.com> wrote:
>
> On Mon, Jun 24, 2019 at 8:37 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
> >
> > On 06/22/2019 02:03 AM, Matthew Garrett wrote:
> > > From: David Howells <dhowells@redhat.com>
> > >
> > > There are some bpf functions can be used to read kernel memory:
> >
> > Nit: that
>
> Fixed.
>
> > > bpf_probe_read, bpf_probe_write_user and bpf_trace_printk.  These allow
> >
> > Please explain how bpf_probe_write_user reads kernel memory ... ?!
>
> Ha.
>
> > > private keys in kernel memory (e.g. the hibernation image signing key) to
> > > be read by an eBPF program and kernel memory to be altered without
> >
> > ... and while we're at it, also how they allow "kernel memory to be
> > altered without restriction". I've been pointing this false statement
> > out long ago.
>
> Yup. How's the following description:
>
>     bpf: Restrict bpf when kernel lockdown is in confidentiality mode
>
>     There are some bpf functions that can be used to read kernel memory and
>     exfiltrate it to userland: bpf_probe_read, bpf_probe_write_user and
>     bpf_trace_printk.  These could be abused to (eg) allow private
> keys in kernel
>     memory to be leaked. Disable them if the kernel has been locked
> down in confidentiality
>     mode.

I'm confused.  I understand why we're restricting bpf_probe_read().
Why are we restricting bpf_probe_write_user() and bpf_trace_printk(),
though?

--Andy
Matthew Garrett June 24, 2019, 8:15 p.m. UTC | #5
On Mon, Jun 24, 2019 at 1:09 PM Andy Lutomirski <luto@kernel.org> wrote:

> I'm confused.  I understand why we're restricting bpf_probe_read().
> Why are we restricting bpf_probe_write_user() and bpf_trace_printk(),
> though?

Hmm. I think the thinking here was around exfiltration mechanisms, but
if the read is blocked then that seems less likely. This seems to
trace back to http://kernsec.org/pipermail/linux-security-module-archive/2017-October/003545.html
- Joey, do you know the reasoning here?
Daniel Borkmann June 24, 2019, 8:59 p.m. UTC | #6
On 06/24/2019 10:08 PM, Andy Lutomirski wrote:
> On Mon, Jun 24, 2019 at 12:54 PM Matthew Garrett <mjg59@google.com> wrote:
>> On Mon, Jun 24, 2019 at 8:37 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
>>> On 06/22/2019 02:03 AM, Matthew Garrett wrote:
>>>> From: David Howells <dhowells@redhat.com>
>>>>
>>>> There are some bpf functions can be used to read kernel memory:
>>>
>>> Nit: that
>>
>> Fixed.
>>
>>>> bpf_probe_read, bpf_probe_write_user and bpf_trace_printk.  These allow
>>>
>>> Please explain how bpf_probe_write_user reads kernel memory ... ?!
>>
>> Ha.
>>
>>>> private keys in kernel memory (e.g. the hibernation image signing key) to
>>>> be read by an eBPF program and kernel memory to be altered without
>>>
>>> ... and while we're at it, also how they allow "kernel memory to be
>>> altered without restriction". I've been pointing this false statement
>>> out long ago.
>>
>> Yup. How's the following description:
>>
>>     bpf: Restrict bpf when kernel lockdown is in confidentiality mode
>>
>>     There are some bpf functions that can be used to read kernel memory and
>>     exfiltrate it to userland: bpf_probe_read, bpf_probe_write_user and
>>     bpf_trace_printk.  These could be abused to (eg) allow private
>> keys in kernel
>>     memory to be leaked. Disable them if the kernel has been locked
>> down in confidentiality
>>     mode.
> 
> I'm confused.  I understand why we're restricting bpf_probe_read().
> Why are we restricting bpf_probe_write_user() and bpf_trace_printk(),
> though?

Agree, for example, bpf_probe_write_user() can never write into
kernel memory (only user one). Just thinking out loud, wouldn't it
be cleaner and more generic to perform this check at the actual function
which performs the kernel memory without faulting? All three of these
are in mm/maccess.c, and the very few occasions that override the
probe_kernel_read symbol are calling eventually into __probe_kernel_read(),
so this would catch all of them wrt lockdown restrictions. Otherwise
you'd need to keep tracking every bit of new code being merged that
calls into one of these, no? That way you only need to do it once like
below and are guaranteed that the check catches these in future as well.

Thanks,
Daniel

diff --git a/mm/maccess.c b/mm/maccess.c
index 482d4d6..2c8220f 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -29,6 +29,9 @@ long __probe_kernel_read(void *dst, const void *src, size_t size)
 	long ret;
 	mm_segment_t old_fs = get_fs();

+	if (security_locked_down(LOCKDOWN_KERNEL_READ))
+		return -EFAULT;
+
 	set_fs(KERNEL_DS);
 	pagefault_disable();
 	ret = __copy_from_user_inatomic(dst,
@@ -57,6 +60,9 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
 	long ret;
 	mm_segment_t old_fs = get_fs();

+	if (security_locked_down(LOCKDOWN_KERNEL_WRITE))
+		return -EFAULT;
+
 	set_fs(KERNEL_DS);
 	pagefault_disable();
 	ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
@@ -90,6 +96,9 @@ long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
 	const void *src = unsafe_addr;
 	long ret;

+	if (security_locked_down(LOCKDOWN_KERNEL_READ))
+		return -EFAULT;
+
 	if (unlikely(count <= 0))
 		return 0;
Matthew Garrett June 24, 2019, 9:30 p.m. UTC | #7
On Mon, Jun 24, 2019 at 2:22 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
> Agree, for example, bpf_probe_write_user() can never write into
> kernel memory (only user one). Just thinking out loud, wouldn't it
> be cleaner and more generic to perform this check at the actual function
> which performs the kernel memory without faulting? All three of these
> are in mm/maccess.c, and the very few occasions that override the
> probe_kernel_read symbol are calling eventually into __probe_kernel_read(),
> so this would catch all of them wrt lockdown restrictions. Otherwise
> you'd need to keep tracking every bit of new code being merged that
> calls into one of these, no? That way you only need to do it once like
> below and are guaranteed that the check catches these in future as well.

Not all paths into probe_kernel_read/write are from entry points that
need to be locked down (eg, as far as I can tell ftrace can't leak
anything interesting here).
diff mbox series

Patch

diff --git a/include/linux/security.h b/include/linux/security.h
index e6e3e2403474..de0d37b1fe79 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -97,6 +97,7 @@  enum lockdown_reason {
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_KCORE,
 	LOCKDOWN_KPROBES,
+	LOCKDOWN_BPF_READ,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index d64c00afceb5..638f9b00a8df 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -137,6 +137,10 @@  BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
 {
 	int ret;
 
+	ret = security_locked_down(LOCKDOWN_BPF_READ);
+	if (ret)
+		return ret;
+
 	ret = probe_kernel_read(dst, unsafe_ptr, size);
 	if (unlikely(ret < 0))
 		memset(dst, 0, size);
@@ -156,6 +160,12 @@  static const struct bpf_func_proto bpf_probe_read_proto = {
 BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
 	   u32, size)
 {
+	int ret;
+
+	ret = security_locked_down(LOCKDOWN_BPF_READ);
+	if (ret)
+		return ret;
+
 	/*
 	 * Ensure we're in user context which is safe for the helper to
 	 * run. This helper has no business in a kthread.
@@ -205,7 +215,11 @@  BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
 	int fmt_cnt = 0;
 	u64 unsafe_addr;
 	char buf[64];
-	int i;
+	int i, ret;
+
+	ret = security_locked_down(LOCKDOWN_BPF_READ);
+	if (ret)
+		return ret;
 
 	/*
 	 * bpf_check()->check_func_arg()->check_stack_boundary()
@@ -534,6 +548,10 @@  BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
 {
 	int ret;
 
+	ret = security_locked_down(LOCKDOWN_BPF_READ);
+	if (ret)
+		return ret;
+
 	/*
 	 * The strncpy_from_unsafe() call will likely not fill the entire
 	 * buffer, but that's okay in this circumstance as we're probing
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 5a08c17f224d..2eea2cc13117 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -33,6 +33,7 @@  static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_KCORE] = "/proc/kcore access",
 	[LOCKDOWN_KPROBES] = "use of kprobes",
+	[LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };