diff mbox

[RFC,v3] audit: use proper refcount locking on audit_sock

Message ID 61c37ca790bc11bc023aea8f9b70ab3098aa30f5.1481626466.git.rgb@redhat.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Richard Guy Briggs Dec. 13, 2016, 3:03 p.m. UTC
Resetting audit_sock appears to be racy.

audit_sock was being copied and dereferenced without using a refcount on
the source sock.

Bump the refcount on the underlying sock when we store a refrence in
audit_sock and release it when we reset audit_sock.  audit_sock
modification needs the audit_cmd_mutex.

See: https://lkml.org/lkml/2016/11/26/232

Thanks to Eric Dumazet <edumazet@google.com> and Cong Wang
<xiyou.wangcong@gmail.com> on ideas how to fix it.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
---
There has been a lot of change in the audit code that is about to go
upstream to address audit queue issues.  This patch is based on the
source tree: git://git.infradead.org/users/pcmoore/audit#next
---
 kernel/audit.c |   28 +++++++++++++++++++++++-----
 1 files changed, 23 insertions(+), 5 deletions(-)

Comments

Paul Moore Dec. 13, 2016, 8:50 p.m. UTC | #1
On Tue, Dec 13, 2016 at 10:03 AM, Richard Guy Briggs <rgb@redhat.com> wrote:
> Resetting audit_sock appears to be racy.
>
> audit_sock was being copied and dereferenced without using a refcount on
> the source sock.
>
> Bump the refcount on the underlying sock when we store a refrence in
> audit_sock and release it when we reset audit_sock.  audit_sock
> modification needs the audit_cmd_mutex.
>
> See: https://lkml.org/lkml/2016/11/26/232
>
> Thanks to Eric Dumazet <edumazet@google.com> and Cong Wang
> <xiyou.wangcong@gmail.com> on ideas how to fix it.
>
> Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
> ---
> There has been a lot of change in the audit code that is about to go
> upstream to address audit queue issues.  This patch is based on the
> source tree: git://git.infradead.org/users/pcmoore/audit#next
> ---
>  kernel/audit.c |   28 +++++++++++++++++++++++-----
>  1 files changed, 23 insertions(+), 5 deletions(-)

This looks more reasonable.  I still wonder about synchronization
between threads changing the audit_* connection variables and the
kauditd_thread, but I guess we can treat that as another issue; this
patch fixes a bug and is worth merging now.

I'm building a test kernel right now, assuming nothing blows up I'll
push this patch with the rest of the audit patches tomorrow; if
something bad happens, this is going to miss the first audit pull
request.

> diff --git a/kernel/audit.c b/kernel/audit.c
> index f20eee0..3bb4126 100644
> --- a/kernel/audit.c
> +++ b/kernel/audit.c
> @@ -446,14 +446,19 @@ static void kauditd_retry_skb(struct sk_buff *skb)
>   * Description:
>   * Break the auditd/kauditd connection and move all the records in the retry
>   * queue into the hold queue in case auditd reconnects.
> + * The audit_cmd_mutex must be held when calling this function.
>   */

Don't resend, but in the future please start comments like this on the
previous line.
Cong Wang Dec. 14, 2016, 12:19 a.m. UTC | #2
On Tue, Dec 13, 2016 at 7:03 AM, Richard Guy Briggs <rgb@redhat.com> wrote:
> @@ -1283,8 +1299,10 @@ static void __net_exit audit_net_exit(struct net *net)
>  {
>         struct audit_net *aunet = net_generic(net, audit_net_id);
>         struct sock *sock = aunet->nlsk;
> +       mutex_lock(&audit_cmd_mutex);
>         if (sock == audit_sock)
>                 auditd_reset();
> +       mutex_unlock(&audit_cmd_mutex);

This still doesn't look correct to me, b/c here we release the audit_sock
refcnt twice:

1) inside audit_reset()
2) netlink_kernel_release()
Richard Guy Briggs Dec. 14, 2016, 4 a.m. UTC | #3
On 2016-12-13 16:19, Cong Wang wrote:
> On Tue, Dec 13, 2016 at 7:03 AM, Richard Guy Briggs <rgb@redhat.com> wrote:
> > @@ -1283,8 +1299,10 @@ static void __net_exit audit_net_exit(struct net *net)
> >  {
> >         struct audit_net *aunet = net_generic(net, audit_net_id);
> >         struct sock *sock = aunet->nlsk;
> > +       mutex_lock(&audit_cmd_mutex);
> >         if (sock == audit_sock)
> >                 auditd_reset();
> > +       mutex_unlock(&audit_cmd_mutex);
> 
> This still doesn't look correct to me, b/c here we release the audit_sock
> refcnt twice:
> 
> 1) inside audit_reset()

The audit_reset() refcount decrement corresponds to a setting of
audit_sock only if audit_sock is still non-NULL.

> 2) netlink_kernel_release()

This refcount decrement corresponds to netlink_kernel_create().

- RGB

--
Richard Guy Briggs <rgb@redhat.com>
Kernel Security Engineering, Base Operating Systems, Red Hat
Remote, Ottawa, Canada
Voice: +1.647.777.2635, Internal: (81) 32635
Cong Wang Dec. 14, 2016, 5:36 a.m. UTC | #4
On Tue, Dec 13, 2016 at 8:00 PM, Richard Guy Briggs <rgb@redhat.com> wrote:
> On 2016-12-13 16:19, Cong Wang wrote:
>> On Tue, Dec 13, 2016 at 7:03 AM, Richard Guy Briggs <rgb@redhat.com> wrote:
>> > @@ -1283,8 +1299,10 @@ static void __net_exit audit_net_exit(struct net *net)
>> >  {
>> >         struct audit_net *aunet = net_generic(net, audit_net_id);
>> >         struct sock *sock = aunet->nlsk;
>> > +       mutex_lock(&audit_cmd_mutex);
>> >         if (sock == audit_sock)
>> >                 auditd_reset();
>> > +       mutex_unlock(&audit_cmd_mutex);
>>
>> This still doesn't look correct to me, b/c here we release the audit_sock
>> refcnt twice:
>>
>> 1) inside audit_reset()
>
> The audit_reset() refcount decrement corresponds to a setting of
> audit_sock only if audit_sock is still non-NULL.
>

Hmm, thinking about it again, looks like the sock == audit_sock
and audit_sock != NULL checks can guarantee we are safe. So,

Reviewed-by: Cong Wang <xiyou.wangcong@gmail.com>
diff mbox

Patch

diff --git a/kernel/audit.c b/kernel/audit.c
index f20eee0..3bb4126 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -446,14 +446,19 @@  static void kauditd_retry_skb(struct sk_buff *skb)
  * Description:
  * Break the auditd/kauditd connection and move all the records in the retry
  * queue into the hold queue in case auditd reconnects.
+ * The audit_cmd_mutex must be held when calling this function.
  */
 static void auditd_reset(void)
 {
 	struct sk_buff *skb;
 
 	/* break the connection */
+	if (audit_sock) {
+		sock_put(audit_sock);
+		audit_sock = NULL;
+	}
 	audit_pid = 0;
-	audit_sock = NULL;
+	audit_nlk_portid = 0;
 
 	/* flush all of the retry queue to the hold queue */
 	while ((skb = skb_dequeue(&audit_retry_queue)))
@@ -579,7 +584,9 @@  static int kauditd_thread(void *dummy)
 
 				auditd = 0;
 				if (AUDITD_BAD(rc, reschedule)) {
+					mutex_lock(&audit_cmd_mutex);
 					auditd_reset();
+					mutex_unlock(&audit_cmd_mutex);
 					reschedule = 0;
 				}
 			} else
@@ -594,7 +601,9 @@  static int kauditd_thread(void *dummy)
 				auditd = 0;
 				if (AUDITD_BAD(rc, reschedule)) {
 					kauditd_hold_skb(skb);
+					mutex_lock(&audit_cmd_mutex);
 					auditd_reset();
+					mutex_unlock(&audit_cmd_mutex);
 					reschedule = 0;
 				} else
 					/* temporary problem (we hope), queue
@@ -623,7 +632,9 @@  quick_loop:
 				if (rc) {
 					auditd = 0;
 					if (AUDITD_BAD(rc, reschedule)) {
+						mutex_lock(&audit_cmd_mutex);
 						auditd_reset();
+						mutex_unlock(&audit_cmd_mutex);
 						reschedule = 0;
 					}
 
@@ -1010,11 +1021,16 @@  static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			}
 			if (audit_enabled != AUDIT_OFF)
 				audit_log_config_change("audit_pid", new_pid, audit_pid, 1);
-			audit_pid = new_pid;
-			audit_nlk_portid = NETLINK_CB(skb).portid;
-			audit_sock = skb->sk;
-			if (!new_pid)
+			if (new_pid) {
+				if (audit_sock)
+					sock_put(audit_sock);
+				audit_pid = new_pid;
+				audit_nlk_portid = NETLINK_CB(skb).portid;
+				sock_hold(skb->sk);
+				audit_sock = skb->sk;
+			} else {
 				auditd_reset();
+			}
 			wake_up_interruptible(&kauditd_wait);
 		}
 		if (s.mask & AUDIT_STATUS_RATE_LIMIT) {
@@ -1283,8 +1299,10 @@  static void __net_exit audit_net_exit(struct net *net)
 {
 	struct audit_net *aunet = net_generic(net, audit_net_id);
 	struct sock *sock = aunet->nlsk;
+	mutex_lock(&audit_cmd_mutex);
 	if (sock == audit_sock)
 		auditd_reset();
+	mutex_unlock(&audit_cmd_mutex);
 
 	RCU_INIT_POINTER(aunet->nlsk, NULL);
 	synchronize_net();