From patchwork Thu Nov 21 15:50:58 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Daniel Borkmann X-Patchwork-Id: 293183 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id C49162C00A8 for ; Fri, 22 Nov 2013 02:51:12 +1100 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754725Ab3KUPvI (ORCPT ); Thu, 21 Nov 2013 10:51:08 -0500 Received: from mx1.redhat.com ([209.132.183.28]:24195 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753182Ab3KUPvH (ORCPT ); Thu, 21 Nov 2013 10:51:07 -0500 Received: from int-mx09.intmail.prod.int.phx2.redhat.com (int-mx09.intmail.prod.int.phx2.redhat.com [10.5.11.22]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id rALFp0eb002018 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Thu, 21 Nov 2013 10:51:00 -0500 Received: from localhost (vpn1-5-53.ams2.redhat.com [10.36.5.53]) by int-mx09.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id rALFoxw3006731; Thu, 21 Nov 2013 10:50:59 -0500 From: Daniel Borkmann To: davem@davemloft.net Cc: netdev@vger.kernel.org, Salam Noureddine , Ben Greear , Eric Dumazet Subject: [PATCH v3] packet: fix use after free race in send path when dev is released Date: Thu, 21 Nov 2013 16:50:58 +0100 Message-Id: <1385049058-1946-1-git-send-email-dborkman@redhat.com> X-Scanned-By: MIMEDefang 2.68 on 10.5.11.22 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Salam reported a use after free bug in PF_PACKET that occurs when we're sending out frames on a socket bound device and suddenly the net device is being unregistered. It appears that commit 827d9780 introduced a possible race condition between {t,}packet_snd() and packet_notifier(). In the case of a bound socket, packet_notifier() can drop the last reference to the net_device and {t,}packet_snd() might end up suddenly sending a packet over a freed net_device. To avoid reverting 827d9780 and thus introducing a performance regression compared to the current state of things, we decided to hold a cached RCU protected pointer to the net device and maintain it on write side via bind spin_lock protected register_prot_hook() and __unregister_prot_hook() calls. In {t,}packet_snd() path, we access this pointer under rcu_read_lock through packet_cached_dev_get() that holds reference to the device to prevent it from being freed through packet_notifier() while we're in send path. This is okay to do as dev_put()/dev_hold() are per-cpu counters, so this should not be a performance issue. Also, the code simplifies a bit as we don't need need_rls_dev anymore. Fixes: 827d978037d7 ("af-packet: Use existing netdev reference for bound sockets.") Reported-by: Salam Noureddine Signed-off-by: Daniel Borkmann Signed-off-by: Salam Noureddine Cc: Ben Greear Cc: Eric Dumazet --- v1->v2: - Applied feedback from Dave and Eric, thanks a lot for this! v2->v3: - As Eric points out, we're on the safe side w/o call_rcu() deferral, so remove it, rest stays the same. net/packet/af_packet.c | 59 ++++++++++++++++++++++++++++++-------------------- net/packet/internal.h | 1 + 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2e8286b..4204846 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -244,11 +244,15 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static void register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); + if (!po->running) { - if (po->fanout) + if (po->fanout) { __fanout_link(sk, po); - else + } else { dev_add_pack(&po->prot_hook); + rcu_assign_pointer(po->cached_dev, po->prot_hook.dev); + } + sock_hold(sk); po->running = 1; } @@ -266,10 +270,13 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) struct packet_sock *po = pkt_sk(sk); po->running = 0; - if (po->fanout) + if (po->fanout) { __fanout_unlink(sk, po); - else + } else { __dev_remove_pack(&po->prot_hook); + RCU_INIT_POINTER(po->cached_dev, NULL); + } + __sock_put(sk); if (sync) { @@ -2052,12 +2059,24 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return tp_len; } +static struct net_device *packet_cached_dev_get(struct packet_sock *po) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(po->cached_dev); + if (dev) + dev_hold(dev); + rcu_read_unlock(); + + return dev; +} + static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; int err, reserve = 0; void *ph; struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; @@ -2070,7 +2089,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2084,19 +2103,17 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; if (unlikely(dev == NULL)) goto out; - - reserve = dev->hard_header_len; - err = -ENETDOWN; if (unlikely(!(dev->flags & IFF_UP))) goto out_put; + reserve = dev->hard_header_len; + size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); @@ -2173,8 +2190,7 @@ out_status: __packet_set_status(po, ph, status); kfree_skb(skb); out_put: - if (need_rls_dev) - dev_put(dev); + dev_put(dev); out: mutex_unlock(&po->pg_vec_lock); return err; @@ -2212,7 +2228,6 @@ static int packet_snd(struct socket *sock, struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; unsigned char *addr; int err, reserve = 0; struct virtio_net_hdr vnet_hdr = { 0 }; @@ -2228,7 +2243,7 @@ static int packet_snd(struct socket *sock, */ if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2240,19 +2255,17 @@ static int packet_snd(struct socket *sock, proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; - if (dev == NULL) + if (unlikely(dev == NULL)) goto out_unlock; - if (sock->type == SOCK_RAW) - reserve = dev->hard_header_len; - err = -ENETDOWN; - if (!(dev->flags & IFF_UP)) + if (unlikely(!(dev->flags & IFF_UP))) goto out_unlock; + if (sock->type == SOCK_RAW) + reserve = dev->hard_header_len; if (po->has_vnet_hdr) { vnet_hdr_len = sizeof(vnet_hdr); @@ -2386,15 +2399,14 @@ static int packet_snd(struct socket *sock, if (err > 0 && (err = net_xmit_errno(err)) != 0) goto out_unlock; - if (need_rls_dev) - dev_put(dev); + dev_put(dev); return len; out_free: kfree_skb(skb); out_unlock: - if (dev && need_rls_dev) + if (dev) dev_put(dev); out: return err; @@ -2614,6 +2626,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po = pkt_sk(sk); sk->sk_family = PF_PACKET; po->num = proto; + RCU_INIT_POINTER(po->cached_dev, NULL); sk->sk_destruct = packet_sock_destruct; sk_refcnt_debug_inc(sk); diff --git a/net/packet/internal.h b/net/packet/internal.h index c4e4b45..1035fa2 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -113,6 +113,7 @@ struct packet_sock { unsigned int tp_loss:1; unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; + struct net_device __rcu *cached_dev; struct packet_type prot_hook ____cacheline_aligned_in_smp; };