Message ID | 201601100657.u0A6vk1B025554@mail.home.local |
---|---|
State | Accepted, archived |
Delegated to: | David Miller |
Headers | show |
From: Willy Tarreau <w@1wt.eu> Date: Sun, Jan 10 07:54:56 CET 2016 > It is possible for a process to allocate and accumulate far more FDs than > the process' limit by sending them over a unix socket then closing them > to keep the process' fd count low. > > This change addresses this problem by keeping track of the number of FDs > in flight per user and preventing non-privileged processes from having > more FDs in flight than their configured FD limit. > > Reported-by: socketpair@gmail.com > Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> > Mitigates: CVE-2013-4312 (Linux 2.0+) > Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> > Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org> > Signed-off-by: Willy Tarreau <w@1wt.eu> Applied and queued up for -stable, thanks!
Hi On Sun, Jan 10, 2016 at 7:54 AM, Willy Tarreau <w@1wt.eu> wrote: > It is possible for a process to allocate and accumulate far more FDs than > the process' limit by sending them over a unix socket then closing them > to keep the process' fd count low. > > This change addresses this problem by keeping track of the number of FDs > in flight per user and preventing non-privileged processes from having > more FDs in flight than their configured FD limit. This is not really what this patch does. This patch rather prevents processes from having more of their owned *files* in flight than their configured FD limit. See below for details.. > Reported-by: socketpair@gmail.com > Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> > Mitigates: CVE-2013-4312 (Linux 2.0+) > Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> > Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org> > Signed-off-by: Willy Tarreau <w@1wt.eu> > --- > v2: add reported-by, mitigates and acked-by. > > It would be nice if (if accepted) it would be backported to -stable as the > issue is currently exploitable. > --- > include/linux/sched.h | 1 + > net/unix/af_unix.c | 24 ++++++++++++++++++++---- > net/unix/garbage.c | 13 ++++++++----- > 3 files changed, 29 insertions(+), 9 deletions(-) > > diff --git a/include/linux/sched.h b/include/linux/sched.h > index edad7a4..fbf25f1 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -830,6 +830,7 @@ struct user_struct { > unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ > #endif > unsigned long locked_shm; /* How many pages of mlocked shm ? */ > + unsigned long unix_inflight; /* How many files in flight in unix sockets */ > > #ifdef CONFIG_KEYS > struct key *uid_keyring; /* UID specific keyring */ > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c > index 45aebd9..d6d7b43 100644 > --- a/net/unix/af_unix.c > +++ b/net/unix/af_unix.c > @@ -1499,6 +1499,21 @@ static void unix_destruct_scm(struct sk_buff *skb) > sock_wfree(skb); > } > > +/* > + * The "user->unix_inflight" variable is protected by the garbage > + * collection lock, and we just read it locklessly here. If you go > + * over the limit, there might be a tiny race in actually noticing > + * it across threads. Tough. This limit is checked once per transaction. IIRC, a transaction can carry 255 files. Thus, it is easy to exceed this limit without any race involved. > + */ > +static inline bool too_many_unix_fds(struct task_struct *p) > +{ > + struct user_struct *user = current_user(); > + > + if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) > + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); > + return false; > +} > + > #define MAX_RECURSION_LEVEL 4 > > static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) > @@ -1507,6 +1522,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) > unsigned char max_level = 0; > int unix_sock_count = 0; > > + if (too_many_unix_fds(current)) > + return -ETOOMANYREFS; > + Ignoring the capabilities, this effectively resolves to: if (current_cred()->user->unix_inflight > rlimit(RLIMIT_NOFILE)) return -ETOOMANYREFS; It limits the number of inflight FDs of the *current* user to its *own* limit. But.. > for (i = scm->fp->count - 1; i >= 0; i--) { > struct sock *sk = unix_get_socket(scm->fp->fp[i]); > > @@ -1528,10 +1546,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) > if (!UNIXCB(skb).fp) > return -ENOMEM; > > - if (unix_sock_count) { > - for (i = scm->fp->count - 1; i >= 0; i--) > - unix_inflight(scm->fp->fp[i]); > - } > + for (i = scm->fp->count - 1; i >= 0; i--) > + unix_inflight(scm->fp->fp[i]); > return max_level; > } > > diff --git a/net/unix/garbage.c b/net/unix/garbage.c > index a73a226..8fcdc22 100644 > --- a/net/unix/garbage.c > +++ b/net/unix/garbage.c > @@ -120,11 +120,11 @@ void unix_inflight(struct file *fp) > { > struct sock *s = unix_get_socket(fp); > > + spin_lock(&unix_gc_lock); > + > if (s) { > struct unix_sock *u = unix_sk(s); > > - spin_lock(&unix_gc_lock); > - > if (atomic_long_inc_return(&u->inflight) == 1) { > BUG_ON(!list_empty(&u->link)); > list_add_tail(&u->link, &gc_inflight_list); > @@ -132,25 +132,28 @@ void unix_inflight(struct file *fp) > BUG_ON(list_empty(&u->link)); > } > unix_tot_inflight++; > - spin_unlock(&unix_gc_lock); > } > + fp->f_cred->user->unix_inflight++; ..this modifies the limit of the owner of the file you send. As such, if you only send files that you don't own, you will continuously bump the limit of the file-owner, but never your own. As such, the protection above will never fire. Is this really what this patch is supposed to do? Shouldn't the loop in unix_attach_fds() rather check for this: if (unlikely(fp->f_cred->user->unix_inflight > nofile && !file_ns_capable(fp, &init_user_ns, CAP_SYS_RESOURCE) && !file_ns_capable(fp, &init_user_ns, CAP_SYS_ADMIN))) return -ETOOMANYREFS; (or keeping capable() rather than file_ns_capable(), depending on the wanted semantics) Furthermore, with this patch in place, a process better not pass any file-descriptors to an untrusted process. This might have been a stupid idea in the first place, but I would have expected the patch to mention this. Because with this patch in place, a receiver of a file-descriptor can bump the unix_inflight limit of the sender arbitrarily. Did anyone notify the dbus maintainers of this? They might wanna document this, if not already done (CC: smcv). Why doesn't this patch modify "unix_inflight" of the sender rather than the passed descriptors? It would require pinning the affected user in 'scm' contexts, but that is probably already the case, anyway. This way, the original ETOOMANYREFS check would be perfectly fine. Anyway, can someone provide a high-level description of what exactly this patch is supposed to do? Which operation should be limited, who should inflight FDs be accounted on, and which rlimit should be used on each operation? I'm having a hard time auditing existing user-space, given just the scarce description of this commit. Thanks David > + spin_unlock(&unix_gc_lock); > } > > void unix_notinflight(struct file *fp) > { > struct sock *s = unix_get_socket(fp); > > + spin_lock(&unix_gc_lock); > + > if (s) { > struct unix_sock *u = unix_sk(s); > > - spin_lock(&unix_gc_lock); > BUG_ON(list_empty(&u->link)); > > if (atomic_long_dec_and_test(&u->inflight)) > list_del_init(&u->link); > unix_tot_inflight--; > - spin_unlock(&unix_gc_lock); > } > + fp->f_cred->user->unix_inflight--; > + spin_unlock(&unix_gc_lock); > } > > static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), > -- > 1.7.12.1 > > >
On 02/02/16 17:34, David Herrmann wrote: > Furthermore, with this patch in place, a process better not pass any > file-descriptors to an untrusted process. ... > Did anyone notify the dbus maintainers of this? They > might wanna document this, if not already done (CC: smcv). Sorry, I'm not clear from this message on what patterns I should be documenting as bad, and what the effect of non-compliance would be. dbus-daemon has a fd-passing feature, which uses AF_UNIX sockets' existing ability to pass fds to let users of D-Bus attach fds to their messages. The message is passed from the sending client to dbus-daemon, then from dbus-daemon to the recipient: AF_UNIX AF_UNIX | | sender -------> dbus-daemon -------> recipient | | This has been API since before I was a D-Bus maintainer, so I have no influence over its existence; just like the kernel doesn't want to break user-space, dbus-daemon doesn't want to break its users. The system dbus-daemon (dbus-daemon --system) is a privilege boundary, and accepts senders and recipients with differing privileges. Without configuration, messages are denied by default. Recipients can open this up (by installing system-wide configuration) to allow arbitrary processes to send messages to them, so that they can carry out their own discretionary access control. Since 2014, the system dbus-daemon accepts up to 16 file descriptors per message by default. There is also a session or user dbus-daemon (dbus-daemon --session) per uid, but that isn't normally a privilege boundary, so any user trying to carry out a denial of service there is only hurting themselves. Am I right in saying that the advice I give to D-Bus users should be something like this? * system services should not send fds at all, unless they trust the dbus-daemon * system services should not send fds via D-Bus that will be delivered to recipients that they do not trust * sending fds to an untrusted recipient would enable that recipient to carry out a denial-of-service attack (on what? the sender? the dbus-daemon?)
On 03.02.2016 12:36, Simon McVittie wrote: > On 02/02/16 17:34, David Herrmann wrote: >> Furthermore, with this patch in place, a process better not pass any >> file-descriptors to an untrusted process. > ... >> Did anyone notify the dbus maintainers of this? They >> might wanna document this, if not already done (CC: smcv). > > Sorry, I'm not clear from this message on what patterns I should be > documenting as bad, and what the effect of non-compliance would be. > > dbus-daemon has a fd-passing feature, which uses AF_UNIX sockets' > existing ability to pass fds to let users of D-Bus attach fds to their > messages. The message is passed from the sending client to dbus-daemon, > then from dbus-daemon to the recipient: > > AF_UNIX AF_UNIX > | | > sender -------> dbus-daemon -------> recipient > | | > > This has been API since before I was a D-Bus maintainer, so I have no > influence over its existence; just like the kernel doesn't want to break > user-space, dbus-daemon doesn't want to break its users. > > The system dbus-daemon (dbus-daemon --system) is a privilege boundary, > and accepts senders and recipients with differing privileges. Without > configuration, messages are denied by default. Recipients can open this > up (by installing system-wide configuration) to allow arbitrary > processes to send messages to them, so that they can carry out their own > discretionary access control. Since 2014, the system dbus-daemon accepts > up to 16 file descriptors per message by default. > > There is also a session or user dbus-daemon (dbus-daemon --session) per > uid, but that isn't normally a privilege boundary, so any user trying to > carry out a denial of service there is only hurting themselves. > > Am I right in saying that the advice I give to D-Bus users should be > something like this? > > * system services should not send fds at all, unless they trust the > dbus-daemon > * system services should not send fds via D-Bus that will be delivered > to recipients that they do not trust > * sending fds to an untrusted recipient would enable that recipient to > carry out a denial-of-service attack (on what? the sender? the > dbus-daemon?) > The described behavior was simply a bug in the referenced patch. I already posted a follow-up to change this behavior so that only the current sending process is credited with the number of fds in flight: <https://patchwork.ozlabs.org/patch/577653/> Other processes (in this case the original opener of the file) isn't credited anymore if it does not send the fd itself. That said, I don't think you need to change anything or give different advice because of this thread. Thanks, Hannes
Hi On Wed, Feb 3, 2016 at 12:36 PM, Simon McVittie <simon.mcvittie@collabora.co.uk> wrote: > Am I right in saying that the advice I give to D-Bus users should be > something like this? > > * system services should not send fds at all, unless they trust the > dbus-daemon > * system services should not send fds via D-Bus that will be delivered > to recipients that they do not trust > * sending fds to an untrusted recipient would enable that recipient to > carry out a denial-of-service attack (on what? the sender? the > dbus-daemon?) With the revised patch from Hannes, this should no longer be needed. My original concern was only about accounting inflight-fds on the file-owner, rather than the sender. However, with Hannes' revised patch, a different DoS attack against dbus-daemon is possible. Imagine a peer that receives batches of FDs, but never dequeues them. They will be accounted on the inflight-limit of dbus-daemon, as such causing messages of independent peers to be rejected in case they carry FDs. Preferably, dbus-daemon would avoid queuing more than 16 FDs on a single destination (total). But that would require POLLOUT to be capped by the number of queued fds. A possible workaround is to add CAP_SYS_RESOURCE to dbus-daemon. Thanks David
On 03/02/16 11:56, David Herrmann wrote: > However, with Hannes' revised patch, a different DoS attack against > dbus-daemon is possible. Imagine a peer that receives batches of FDs, > but never dequeues them. They will be accounted on the inflight-limit > of dbus-daemon, as such causing messages of independent peers to be > rejected in case they carry FDs. > Preferably, dbus-daemon would avoid queuing more than 16 FDs on a > single destination (total). But that would require POLLOUT to be > capped by the number of queued fds. A possible workaround is to add > CAP_SYS_RESOURCE to dbus-daemon. We have several mitigations for this: * there's a limit on outgoing fds queued inside dbus-daemon for a particular recipient connection, currently 64, and if that's exceeded, we stop accepting messages for that recipient, feeding back a send failure to the sender for messages to that recipient; * there's a time limit for how long any given fd can stay queued up inside dbus-daemon, currently 2.5 minutes, and if that's exceeded, we drop the message; * if started as root[1], we increase our fd limit to 64k before dropping privileges to the intended uid, which in combination with the maximum of 256 connections per uid and 64 fds per connection means it takes multiple uids working together to carry out a DoS; * all of those limits can be adjusted by a local sysadmin if necessary, if their users are particularly hostile (for instance 16 fds per message is a generous limit intended to avoid unforeseen breakage, and 4 would probably be enough in practice) The queueing logic is fairly involved, so I'd appreciate suggested patches on freedesktop.org Bugzilla or to dbus-security@lists.freedesktop.org if you have an idea for better limits. If you believe you have found a non-public vulnerability, please mark any Bugzilla bugs as restricted to the D-Bus security group. Thanks, S [1] The system dbus-daemon is started as root (and hence able to increase its own fd limit) on all systemd systems, anything that uses the Red Hat or Slackware sysvinit scripts bundled with dbus, and any Debian derivatives that use sysvinit and have taken security updates from at least Debian 7. Other distro-specific init system glue is up to the relevant distro.
On 03.02.2016 12:56, David Herrmann wrote: > However, with Hannes' revised patch, a different DoS attack against > dbus-daemon is possible. Imagine a peer that receives batches of FDs, > but never dequeues them. They will be accounted on the inflight-limit > of dbus-daemon, as such causing messages of independent peers to be > rejected in case they carry FDs. Yes, that is true. We also kind of have the problem with unconnected af-unix dgram sockets: if the receiver does not read the skbs on the receive queue we don't free up the sending socket's wmem, thus stop the socket from being destructed and can block the process during sendmsg on this socket. This is harder to DoS but pretty much the same schema. Bye, Hannes
diff --git a/include/linux/sched.h b/include/linux/sched.h index edad7a4..fbf25f1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -830,6 +830,7 @@ struct user_struct { unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ #endif unsigned long locked_shm; /* How many pages of mlocked shm ? */ + unsigned long unix_inflight; /* How many files in flight in unix sockets */ #ifdef CONFIG_KEYS struct key *uid_keyring; /* UID specific keyring */ diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 45aebd9..d6d7b43 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1499,6 +1499,21 @@ static void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } +/* + * The "user->unix_inflight" variable is protected by the garbage + * collection lock, and we just read it locklessly here. If you go + * over the limit, there might be a tiny race in actually noticing + * it across threads. Tough. + */ +static inline bool too_many_unix_fds(struct task_struct *p) +{ + struct user_struct *user = current_user(); + + if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); + return false; +} + #define MAX_RECURSION_LEVEL 4 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) @@ -1507,6 +1522,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) unsigned char max_level = 0; int unix_sock_count = 0; + if (too_many_unix_fds(current)) + return -ETOOMANYREFS; + for (i = scm->fp->count - 1; i >= 0; i--) { struct sock *sk = unix_get_socket(scm->fp->fp[i]); @@ -1528,10 +1546,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) if (!UNIXCB(skb).fp) return -ENOMEM; - if (unix_sock_count) { - for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); - } + for (i = scm->fp->count - 1; i >= 0; i--) + unix_inflight(scm->fp->fp[i]); return max_level; } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index a73a226..8fcdc22 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -120,11 +120,11 @@ void unix_inflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); - if (atomic_long_inc_return(&u->inflight) == 1) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); @@ -132,25 +132,28 @@ void unix_inflight(struct file *fp) BUG_ON(list_empty(&u->link)); } unix_tot_inflight++; - spin_unlock(&unix_gc_lock); } + fp->f_cred->user->unix_inflight++; + spin_unlock(&unix_gc_lock); } void unix_notinflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); BUG_ON(list_empty(&u->link)); if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); unix_tot_inflight--; - spin_unlock(&unix_gc_lock); } + fp->f_cred->user->unix_inflight--; + spin_unlock(&unix_gc_lock); } static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),