From patchwork Sat Sep 11 03:23:23 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Changli Gao X-Patchwork-Id: 64483 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id C29AEB70AA for ; Sat, 11 Sep 2010 13:24:14 +1000 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754490Ab0IKDXz (ORCPT ); Fri, 10 Sep 2010 23:23:55 -0400 Received: from mail-pz0-f46.google.com ([209.85.210.46]:61980 "EHLO mail-pz0-f46.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753862Ab0IKDXy (ORCPT ); Fri, 10 Sep 2010 23:23:54 -0400 Received: by pzk34 with SMTP id 34so1242379pzk.19 for ; Fri, 10 Sep 2010 20:23:53 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:received:received:from:to:cc:subject:date :message-id:x-mailer; bh=DjAXaI3nQfi/7R4+X9ulLKQMxfA8KeSJh1quO6EOllc=; b=QPwTrfYrpWcQFzxDShTgM0mEj+MdqmCSLT1MBZUJ4k2ISVOzFB3hUbNURLESyvv/kM +NSqL/4rDqXA9sxLCIKAeBC1eqkrfbPRjNN4O08Of2pFk2HmjRNLG3PrmMrN6py0usEA QjthMVVzhgpi5nQ/JPtnWYkfMcZvtudytShiM= DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=from:to:cc:subject:date:message-id:x-mailer; b=Kl2HOlV5oyxPnou559OaKHcSjoXMGVCA6Iqm2pa9h1isLEvXEDL4Gv7px6wcqL4hAL n7UT5mEtcrFdtM0TnRIv6yH1CeX+5R9QcL6U46hcN+m7SR4PRNWghfzRxrATUIXo+t+v Lxi0vp9J0052/0WygLbn1kjYTNNSCECsoEnco= Received: by 10.114.111.15 with SMTP id j15mr1924008wac.119.1284175433216; Fri, 10 Sep 2010 20:23:53 -0700 (PDT) Received: from localhost.localdomain ([221.238.69.7]) by mx.google.com with ESMTPS id o17sm5830848wal.21.2010.09.10.20.23.41 (version=TLSv1/SSLv3 cipher=RC4-MD5); Fri, 10 Sep 2010 20:23:52 -0700 (PDT) From: Changli Gao To: "David S. Miller" Cc: Eric Dumazet , Oliver Hartkopp , "Michael S. Tsirkin" , netdev@vger.kernel.org, Changli Gao Subject: [PATCH v2] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out Date: Sat, 11 Sep 2010 11:23:23 +0800 Message-Id: <1284175403-3228-1-git-send-email-xiaosuo@gmail.com> X-Mailer: git-send-email 1.6.4.4 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Since skb->destructor() is used to account socket memory, and maybe called before the skb is sent out, a corrupt skb maybe sent out finally. A new destructor is added into structure skb_shared_info(), and it won't be called until the last reference to the data of an skb is put. af_packet uses this destructor instead. Signed-off-by: Changli Gao --- v2: avoid kmalloc/kfree include/linux/skbuff.h | 3 ++- net/core/skbuff.c | 19 ++++++++++++++----- net/packet/af_packet.c | 24 +++++++++++------------- 3 files changed, 27 insertions(+), 19 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9e8085a..1a8cfa1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -191,6 +191,7 @@ struct skb_shared_info { __u8 tx_flags; struct sk_buff *frag_list; struct skb_shared_hwtstamps hwtstamps; + void (*destructor)(struct sk_buff *skb); /* * Warning : all fields before dataref are cleared in __alloc_skb() @@ -199,7 +200,7 @@ struct skb_shared_info { /* Intermediate layers must ensure that destructor_arg * remains valid until skb destructor */ - void * destructor_arg; + void *destructor_arg[2]; /* must be last field, see pskb_expand_head() */ skb_frag_t frags[MAX_SKB_FRAGS]; }; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 752c197..fef81f3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -332,10 +332,14 @@ static void skb_release_data(struct sk_buff *skb) if (!skb->cloned || !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, &skb_shinfo(skb)->dataref)) { - if (skb_shinfo(skb)->nr_frags) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + + if (shinfo->destructor) + shinfo->destructor(skb); + if (shinfo->nr_frags) { int i; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - put_page(skb_shinfo(skb)->frags[i].page); + for (i = 0; i < shinfo->nr_frags; i++) + put_page(shinfo->frags[i].page); } if (skb_has_frag_list(skb)) @@ -497,9 +501,12 @@ bool skb_recycle_check(struct sk_buff *skb, int skb_size) if (skb_shared(skb) || skb_cloned(skb)) return false; + shinfo = skb_shinfo(skb); + if (shinfo->destructor) + return false; + skb_release_head_state(skb); - shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); @@ -799,7 +806,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), - offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); + offsetof(struct skb_shared_info, + frags[skb_shinfo(skb)->nr_frags])); + skb_shinfo(skb)->destructor = NULL; /* Check if we can avoid taking references on fragments if we own * the last reference on skb->head. (see skb_release_data()) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3616f27..ce81c45 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -825,19 +825,18 @@ ring_is_full: static void tpacket_destruct_skb(struct sk_buff *skb) { - struct packet_sock *po = pkt_sk(skb->sk); - void *ph; - - BUG_ON(skb == NULL); + struct packet_sock *po = pkt_sk(skb_shinfo(skb)->destructor_arg[0]); if (likely(po->tx_ring.pg_vec)) { - ph = skb_shinfo(skb)->destructor_arg; + void *ph = skb_shinfo(skb)->destructor_arg[1]; + BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); BUG_ON(atomic_read(&po->tx_ring.pending) == 0); atomic_dec(&po->tx_ring.pending); __packet_set_status(po, ph, TP_STATUS_AVAILABLE); } + skb->sk = &po->sk; sock_wfree(skb); } @@ -862,7 +861,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->dev = dev; skb->priority = po->sk.sk_priority; skb->mark = po->sk.sk_mark; - skb_shinfo(skb)->destructor_arg = ph.raw; switch (po->tp_version) { case TPACKET_V2: @@ -884,9 +882,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, to_write = tp_len; if (sock->type == SOCK_DGRAM) { - err = dev_hard_header(skb, dev, ntohs(proto), addr, - NULL, tp_len); - if (unlikely(err < 0)) + if (unlikely(dev_hard_header(skb, dev, ntohs(proto), addr, + NULL, tp_len) < 0)) return -EINVAL; } else if (dev->hard_header_len) { /* net device doesn't like empty head */ @@ -897,8 +894,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, } skb_push(skb, dev->hard_header_len); - err = skb_store_bits(skb, 0, data, - dev->hard_header_len); + err = skb_store_bits(skb, 0, data, dev->hard_header_len); if (unlikely(err)) return err; @@ -906,7 +902,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, to_write -= dev->hard_header_len; } - err = -EFAULT; page = virt_to_page(data); offset = offset_in_page(data); len_max = PAGE_SIZE - offset; @@ -1028,7 +1023,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } } - skb->destructor = tpacket_destruct_skb; + skb_shinfo(skb)->destructor_arg[0] = &po->sk; + skb_shinfo(skb)->destructor_arg[1] = ph; + skb->destructor = NULL; + skb_shinfo(skb)->destructor = tpacket_destruct_skb; __packet_set_status(po, ph, TP_STATUS_SENDING); atomic_inc(&po->tx_ring.pending);