diff mbox

net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out

Message ID 1284124960-3266-1-git-send-email-xiaosuo@gmail.com
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

Changli Gao Sept. 10, 2010, 1:22 p.m. UTC
Since skb->destructor() is used to account socket memory, and maybe called
before the skb is sent out, a corrupt skb maybe sent out finally.

A new destructor is added into structure skb_shared_info(), and it won't
be called until the last reference to the data of a skb is put. af_packet
uses this destructor instead.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
---
 include/linux/skbuff.h |    1 +
 net/core/skbuff.c      |   19 ++++++++++++++-----
 net/packet/af_packet.c |   38 +++++++++++++++++++++++++-------------
 3 files changed, 40 insertions(+), 18 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Eric Dumazet Sept. 10, 2010, 2:26 p.m. UTC | #1
Le vendredi 10 septembre 2010 à 21:22 +0800, Changli Gao a écrit :
> Since skb->destructor() is used to account socket memory, and maybe called
> before the skb is sent out, a corrupt skb maybe sent out finally.
> 
> A new destructor is added into structure skb_shared_info(), and it won't
> be called until the last reference to the data of a skb is put. af_packet
> uses this destructor instead.
> 

Hi Changli

> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ---
>  include/linux/skbuff.h |    1 +
>  net/core/skbuff.c      |   19 ++++++++++++++-----
>  net/packet/af_packet.c |   38 +++++++++++++++++++++++++-------------
>  3 files changed, 40 insertions(+), 18 deletions(-)
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 9e8085a..f874c13 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -191,6 +191,7 @@ struct skb_shared_info {
>  	__u8		tx_flags;
>  	struct sk_buff	*frag_list;
>  	struct skb_shared_hwtstamps hwtstamps;
> +	void		(*destructor)(struct sk_buff *skb);
>  
>  	/*
>  	 * Warning : all fields before dataref are cleared in __alloc_skb()
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 2d1bc76..ff37e54 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -332,10 +332,14 @@ static void skb_release_data(struct sk_buff *skb)
>  	if (!skb->cloned ||
>  	    !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
>  			       &skb_shinfo(skb)->dataref)) {
> -		if (skb_shinfo(skb)->nr_frags) {
> +		struct skb_shared_info *shinfo = skb_shinfo(skb);
> +
> +		if (shinfo->destructor)
> +			shinfo->destructor(skb);
> +		if (shinfo->nr_frags) {
>  			int i;
> -			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
> -				put_page(skb_shinfo(skb)->frags[i].page);
> +			for (i = 0; i < shinfo->nr_frags; i++)
> +				put_page(shinfo->frags[i].page);
>  		}
>  
>  		if (skb_has_frag_list(skb))
> @@ -497,9 +501,12 @@ bool skb_recycle_check(struct sk_buff *skb, int skb_size)
>  	if (skb_shared(skb) || skb_cloned(skb))
>  		return false;
>  
> +	shinfo = skb_shinfo(skb);
> +	if (shinfo->destructor)
> +		return false;
> +
>  	skb_release_head_state(skb);
>  
> -	shinfo = skb_shinfo(skb);
>  	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
>  	atomic_set(&shinfo->dataref, 1);
>  
> @@ -799,7 +806,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
>  
>  	memcpy((struct skb_shared_info *)(data + size),
>  	       skb_shinfo(skb),
> -	       offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
> +	       offsetof(struct skb_shared_info,
> +			frags[skb_shinfo(skb)->nr_frags]));
> +	skb_shinfo(skb)->destructor = NULL;
>  
>  	/* Check if we can avoid taking references on fragments if we own
>  	 * the last reference on skb->head. (see skb_release_data())
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 3616f27..7e16b55 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -823,22 +823,27 @@ ring_is_full:
>  	goto drop_n_restore;
>  }
>  
> +struct tpacket_destructor_arg {
> +	struct sock	*sk;
> +	void		*ph;
> +};
> +
>  static void tpacket_destruct_skb(struct sk_buff *skb)
>  {
> -	struct packet_sock *po = pkt_sk(skb->sk);
> -	void *ph;
> -
> -	BUG_ON(skb == NULL);
> +	struct tpacket_destructor_arg *arg = skb_shinfo(skb)->destructor_arg;
> +	struct packet_sock *po = pkt_sk(arg->sk);
> +	void *ph = arg->ph;
>  
>  	if (likely(po->tx_ring.pg_vec)) {
> -		ph = skb_shinfo(skb)->destructor_arg;
>  		BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
>  		BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
>  		atomic_dec(&po->tx_ring.pending);
>  		__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
>  	}
>  
> +	skb->sk = arg->sk;
>  	sock_wfree(skb);

Are you sure sock_wfree(skb) is still needed ?


> +	kfree(arg);

this new kmalloc()/kfree() for each sent packet wont please the guys
using af_packet/mmap interface...

>  }
>  
>  static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
> @@ -862,7 +867,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
>  	skb->dev = dev;
>  	skb->priority = po->sk.sk_priority;
>  	skb->mark = po->sk.sk_mark;
> -	skb_shinfo(skb)->destructor_arg = ph.raw;
>  
>  	switch (po->tp_version) {
>  	case TPACKET_V2:
> @@ -884,9 +888,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
>  	to_write = tp_len;
>  
>  	if (sock->type == SOCK_DGRAM) {
> -		err = dev_hard_header(skb, dev, ntohs(proto), addr,
> -				NULL, tp_len);
> -		if (unlikely(err < 0))
> +		if (unlikely(dev_hard_header(skb, dev, ntohs(proto), addr,
> +					     NULL, tp_len) < 0))
>  			return -EINVAL;
>  	} else if (dev->hard_header_len) {
>  		/* net device doesn't like empty head */
> @@ -897,8 +900,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
>  		}
>  
>  		skb_push(skb, dev->hard_header_len);
> -		err = skb_store_bits(skb, 0, data,
> -				dev->hard_header_len);
> +		err = skb_store_bits(skb, 0, data, dev->hard_header_len);
>  		if (unlikely(err))
>  			return err;
>  
> @@ -906,7 +908,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
>  		to_write -= dev->hard_header_len;
>  	}
>  
> -	err = -EFAULT;
>  	page = virt_to_page(data);
>  	offset = offset_in_page(data);
>  	len_max = PAGE_SIZE - offset;
> @@ -994,6 +995,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>  		size_max = dev->mtu + reserve;
>  
>  	do {
> +		struct tpacket_destructor_arg *arg;
> +
>  		ph = packet_current_frame(po, &po->tx_ring,
>  				TP_STATUS_SEND_REQUEST);
>  
> @@ -1028,7 +1031,16 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>  			}
>  		}
>  
> -		skb->destructor = tpacket_destruct_skb;
> +		arg = kmalloc(sizeof(*arg), GFP_KERNEL);
> +		if (unlikely(arg == NULL)) {
> +			err = -ENOBUFS;
> +			goto out_status;
> +		}
> +		arg->sk = &po->sk;
> +		arg->ph = ph;
> +		skb_shinfo(skb)->destructor_arg = arg;
> +		skb->destructor = NULL;

why setting skb->destructor to NULL here ?

> +		skb_shinfo(skb)->destructor = tpacket_destruct_skb;
>  		__packet_set_status(po, ph, TP_STATUS_SENDING);
>  		atomic_inc(&po->tx_ring.pending);
>  

I dont yet understand how this can prevent af_unix module being unloaded
while packets are in flight 

I believe sock_wfree() should be avoided (since early orphaning occurs),
to reduce number of atomic ops to the minimum.

af_packet/mmap users want fast operations, we should not use
sock_wfree() for them, because max number of in flight packets is known
(tx ring buffer)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Changli Gao Sept. 10, 2010, 4:47 p.m. UTC | #2
On Fri, Sep 10, 2010 at 10:26 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le vendredi 10 septembre 2010 à 21:22 +0800, Changli Gao a écrit :
>> Since skb->destructor() is used to account socket memory, and maybe called
>> before the skb is sent out, a corrupt skb maybe sent out finally.
>>
>> A new destructor is added into structure skb_shared_info(), and it won't
>> be called until the last reference to the data of a skb is put. af_packet
>> uses this destructor instead.
>>
>
> Hi Changli
>
>>  static void tpacket_destruct_skb(struct sk_buff *skb)
>>  {
>> -     struct packet_sock *po = pkt_sk(skb->sk);
>> -     void *ph;
>> -
>> -     BUG_ON(skb == NULL);
>> +     struct tpacket_destructor_arg *arg = skb_shinfo(skb)->destructor_arg;
>> +     struct packet_sock *po = pkt_sk(arg->sk);
>> +     void *ph = arg->ph;
>>
>>       if (likely(po->tx_ring.pg_vec)) {
>> -             ph = skb_shinfo(skb)->destructor_arg;
>>               BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
>>               BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
>>               atomic_dec(&po->tx_ring.pending);
>>               __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
>>       }
>>
>> +     skb->sk = arg->sk;
>>       sock_wfree(skb);
>
> Are you sure sock_wfree(skb) is still needed ?

sock_wfree(skb) is also used to wake up the users who sleep on
poll(2). If sock_wfree(skb) is moved into skb->destructor(), and
called before skb is sent out, pollers will be waked up without
POLLOUT, and since the later skb_shinfo(skb)->destructor() doesn't
wake up the pollers, POLLOUT events will be lost, and the poller will
be blocked forever.

>
>
>> +     kfree(arg);
>
> this new kmalloc()/kfree() for each sent packet wont please the guys
> using af_packet/mmap interface...

Embed these two pointers into skb_shared_info? It may slow the others.

>
>>  }
>>
>>  static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
>> @@ -862,7 +867,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
>>       skb->dev = dev;
>>       skb->priority = po->sk.sk_priority;
>>       skb->mark = po->sk.sk_mark;
>> -     skb_shinfo(skb)->destructor_arg = ph.raw;
>>
>>       switch (po->tp_version) {
>>       case TPACKET_V2:
>> @@ -884,9 +888,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
>>       to_write = tp_len;
>>
>>       if (sock->type == SOCK_DGRAM) {
>> -             err = dev_hard_header(skb, dev, ntohs(proto), addr,
>> -                             NULL, tp_len);
>> -             if (unlikely(err < 0))
>> +             if (unlikely(dev_hard_header(skb, dev, ntohs(proto), addr,
>> +                                          NULL, tp_len) < 0))
>>                       return -EINVAL;
>>       } else if (dev->hard_header_len) {
>>               /* net device doesn't like empty head */
>> @@ -897,8 +900,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
>>               }
>>
>>               skb_push(skb, dev->hard_header_len);
>> -             err = skb_store_bits(skb, 0, data,
>> -                             dev->hard_header_len);
>> +             err = skb_store_bits(skb, 0, data, dev->hard_header_len);
>>               if (unlikely(err))
>>                       return err;
>>
>> @@ -906,7 +908,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
>>               to_write -= dev->hard_header_len;
>>       }
>>
>> -     err = -EFAULT;
>>       page = virt_to_page(data);
>>       offset = offset_in_page(data);
>>       len_max = PAGE_SIZE - offset;
>> @@ -994,6 +995,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>>               size_max = dev->mtu + reserve;
>>
>>       do {
>> +             struct tpacket_destructor_arg *arg;
>> +
>>               ph = packet_current_frame(po, &po->tx_ring,
>>                               TP_STATUS_SEND_REQUEST);
>>
>> @@ -1028,7 +1031,16 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>>                       }
>>               }
>>
>> -             skb->destructor = tpacket_destruct_skb;
>> +             arg = kmalloc(sizeof(*arg), GFP_KERNEL);
>> +             if (unlikely(arg == NULL)) {
>> +                     err = -ENOBUFS;
>> +                     goto out_status;
>> +             }
>> +             arg->sk = &po->sk;
>> +             arg->ph = ph;
>> +             skb_shinfo(skb)->destructor_arg = arg;
>> +             skb->destructor = NULL;
>
> why setting skb->destructor to NULL here ?

Let skb_shinfo(skb)->destructor() do all the things which used to be
done by skb->destructor().

>
>> +             skb_shinfo(skb)->destructor = tpacket_destruct_skb;
>>               __packet_set_status(po, ph, TP_STATUS_SENDING);
>>               atomic_inc(&po->tx_ring.pending);
>>
>
> I dont yet understand how this can prevent af_unix module being unloaded
> while packets are in flight

This issue isn't addressed, and I think it should be fixed in a separate patch.

>
> I believe sock_wfree() should be avoided (since early orphaning occurs),
> to reduce number of atomic ops to the minimum.
>
> af_packet/mmap users want fast operations, we should not use
> sock_wfree() for them, because max number of in flight packets is known
> (tx ring buffer)
>
>

But the users rely on the kernel to inform them there is available
frame for use.
Eric Dumazet Sept. 10, 2010, 4:58 p.m. UTC | #3
Le samedi 11 septembre 2010 à 00:47 +0800, Changli Gao a écrit :
> On Fri, Sep 10, 2010 at 10:26 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:


> > Are you sure sock_wfree(skb) is still needed ?
> 
> sock_wfree(skb) is also used to wake up the users who sleep on
> poll(2). If sock_wfree(skb) is moved into skb->destructor(), and
> called before skb is sent out, pollers will be waked up without
> POLLOUT, and since the later skb_shinfo(skb)->destructor() doesn't
> wake up the pollers, POLLOUT events will be lost, and the poller will
> be blocked forever.
> 

Then implement poll() to use the number of available slots.
(not use the default poll() that relies on generic sk / inet queues and
counters)

Really, sock_wfree() cannot be used at all, or we also must disable
early orphaning of these skbs.

Goal is to replace skb->destructor use in af_packet by
shinfo->destructor, not mix the two.


> >
> >
> >> +     kfree(arg);
> >
> > this new kmalloc()/kfree() for each sent packet wont please the guys
> > using af_packet/mmap interface...
> 
> Embed these two pointers into skb_shared_info? It may slow the others.

we have some room because of SKB_PAD alignment,




--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Sept. 10, 2010, 5:12 p.m. UTC | #4
Le vendredi 10 septembre 2010 à 18:58 +0200, Eric Dumazet a écrit :
> Le samedi 11 septembre 2010 à 00:47 +0800, Changli Gao a écrit :
> > On Fri, Sep 10, 2010 at 10:26 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> 
> 
> > > Are you sure sock_wfree(skb) is still needed ?
> > 
> > sock_wfree(skb) is also used to wake up the users who sleep on
> > poll(2). If sock_wfree(skb) is moved into skb->destructor(), and
> > called before skb is sent out, pollers will be waked up without
> > POLLOUT, and since the later skb_shinfo(skb)->destructor() doesn't
> > wake up the pollers, POLLOUT events will be lost, and the poller will
> > be blocked forever.
> > 
> 
> Then implement poll() to use the number of available slots.
> (not use the default poll() that relies on generic sk / inet queues and
> counters)
> 
> Really, sock_wfree() cannot be used at all, or we also must disable
> early orphaning of these skbs.
> 
> Goal is to replace skb->destructor use in af_packet by
> shinfo->destructor, not mix the two.

Thinking again about this, we also might avoid taking references on
pages and releasing references too.

shinfo->destructor should replace the skb_release_data() logic,
not complement it.

if (shinfo->destructor) {
	shinfo->destructor(skb);
} else {
	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
		put_page(skb_shinfo(skb)->frags[i].page);
	if (skb_has_frag_list(skb))
	....
	kfree(skb->head);
}


As long as the mmap zone is correctly protected in af_packet code, of
course (not releasing it as long as some packets are still in flight)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Changli Gao Sept. 11, 2010, 12:19 a.m. UTC | #5
On Sat, Sep 11, 2010 at 1:12 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le vendredi 10 septembre 2010 à 18:58 +0200, Eric Dumazet a écrit :
>> Le samedi 11 septembre 2010 à 00:47 +0800, Changli Gao a écrit :
>> > On Fri, Sep 10, 2010 at 10:26 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>>
>>
>> > > Are you sure sock_wfree(skb) is still needed ?
>> >
>> > sock_wfree(skb) is also used to wake up the users who sleep on
>> > poll(2). If sock_wfree(skb) is moved into skb->destructor(), and
>> > called before skb is sent out, pollers will be waked up without
>> > POLLOUT, and since the later skb_shinfo(skb)->destructor() doesn't
>> > wake up the pollers, POLLOUT events will be lost, and the poller will
>> > be blocked forever.
>> >
>>
>> Then implement poll() to use the number of available slots.
>> (not use the default poll() that relies on generic sk / inet queues and
>> counters)
>>
>> Really, sock_wfree() cannot be used at all, or we also must disable
>> early orphaning of these skbs.
>>
>> Goal is to replace skb->destructor use in af_packet by
>> shinfo->destructor, not mix the two.
>
> Thinking again about this, we also might avoid taking references on
> pages and releasing references too.
>
> shinfo->destructor should replace the skb_release_data() logic,
> not complement it.
>
> if (shinfo->destructor) {
>        shinfo->destructor(skb);
> } else {
>        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
>                put_page(skb_shinfo(skb)->frags[i].page);
>        if (skb_has_frag_list(skb))
>        ....
>        kfree(skb->head);
> }
>
>
> As long as the mmap zone is correctly protected in af_packet code, of
> course (not releasing it as long as some packets are still in flight)
>

It touches too much internal implementation.

I think most of your ideas are about optimizations, and should be
addressed in separate patches. I'll avoid kmalloc/kfree in the next
version. Thanks.
diff mbox

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9e8085a..f874c13 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -191,6 +191,7 @@  struct skb_shared_info {
 	__u8		tx_flags;
 	struct sk_buff	*frag_list;
 	struct skb_shared_hwtstamps hwtstamps;
+	void		(*destructor)(struct sk_buff *skb);
 
 	/*
 	 * Warning : all fields before dataref are cleared in __alloc_skb()
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2d1bc76..ff37e54 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -332,10 +332,14 @@  static void skb_release_data(struct sk_buff *skb)
 	if (!skb->cloned ||
 	    !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
 			       &skb_shinfo(skb)->dataref)) {
-		if (skb_shinfo(skb)->nr_frags) {
+		struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+		if (shinfo->destructor)
+			shinfo->destructor(skb);
+		if (shinfo->nr_frags) {
 			int i;
-			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-				put_page(skb_shinfo(skb)->frags[i].page);
+			for (i = 0; i < shinfo->nr_frags; i++)
+				put_page(shinfo->frags[i].page);
 		}
 
 		if (skb_has_frag_list(skb))
@@ -497,9 +501,12 @@  bool skb_recycle_check(struct sk_buff *skb, int skb_size)
 	if (skb_shared(skb) || skb_cloned(skb))
 		return false;
 
+	shinfo = skb_shinfo(skb);
+	if (shinfo->destructor)
+		return false;
+
 	skb_release_head_state(skb);
 
-	shinfo = skb_shinfo(skb);
 	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
 
@@ -799,7 +806,9 @@  int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 
 	memcpy((struct skb_shared_info *)(data + size),
 	       skb_shinfo(skb),
-	       offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
+	       offsetof(struct skb_shared_info,
+			frags[skb_shinfo(skb)->nr_frags]));
+	skb_shinfo(skb)->destructor = NULL;
 
 	/* Check if we can avoid taking references on fragments if we own
 	 * the last reference on skb->head. (see skb_release_data())
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3616f27..7e16b55 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -823,22 +823,27 @@  ring_is_full:
 	goto drop_n_restore;
 }
 
+struct tpacket_destructor_arg {
+	struct sock	*sk;
+	void		*ph;
+};
+
 static void tpacket_destruct_skb(struct sk_buff *skb)
 {
-	struct packet_sock *po = pkt_sk(skb->sk);
-	void *ph;
-
-	BUG_ON(skb == NULL);
+	struct tpacket_destructor_arg *arg = skb_shinfo(skb)->destructor_arg;
+	struct packet_sock *po = pkt_sk(arg->sk);
+	void *ph = arg->ph;
 
 	if (likely(po->tx_ring.pg_vec)) {
-		ph = skb_shinfo(skb)->destructor_arg;
 		BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
 		BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
 		atomic_dec(&po->tx_ring.pending);
 		__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
 	}
 
+	skb->sk = arg->sk;
 	sock_wfree(skb);
+	kfree(arg);
 }
 
 static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
@@ -862,7 +867,6 @@  static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	skb->dev = dev;
 	skb->priority = po->sk.sk_priority;
 	skb->mark = po->sk.sk_mark;
-	skb_shinfo(skb)->destructor_arg = ph.raw;
 
 	switch (po->tp_version) {
 	case TPACKET_V2:
@@ -884,9 +888,8 @@  static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	to_write = tp_len;
 
 	if (sock->type == SOCK_DGRAM) {
-		err = dev_hard_header(skb, dev, ntohs(proto), addr,
-				NULL, tp_len);
-		if (unlikely(err < 0))
+		if (unlikely(dev_hard_header(skb, dev, ntohs(proto), addr,
+					     NULL, tp_len) < 0))
 			return -EINVAL;
 	} else if (dev->hard_header_len) {
 		/* net device doesn't like empty head */
@@ -897,8 +900,7 @@  static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		}
 
 		skb_push(skb, dev->hard_header_len);
-		err = skb_store_bits(skb, 0, data,
-				dev->hard_header_len);
+		err = skb_store_bits(skb, 0, data, dev->hard_header_len);
 		if (unlikely(err))
 			return err;
 
@@ -906,7 +908,6 @@  static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		to_write -= dev->hard_header_len;
 	}
 
-	err = -EFAULT;
 	page = virt_to_page(data);
 	offset = offset_in_page(data);
 	len_max = PAGE_SIZE - offset;
@@ -994,6 +995,8 @@  static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 		size_max = dev->mtu + reserve;
 
 	do {
+		struct tpacket_destructor_arg *arg;
+
 		ph = packet_current_frame(po, &po->tx_ring,
 				TP_STATUS_SEND_REQUEST);
 
@@ -1028,7 +1031,16 @@  static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 			}
 		}
 
-		skb->destructor = tpacket_destruct_skb;
+		arg = kmalloc(sizeof(*arg), GFP_KERNEL);
+		if (unlikely(arg == NULL)) {
+			err = -ENOBUFS;
+			goto out_status;
+		}
+		arg->sk = &po->sk;
+		arg->ph = ph;
+		skb_shinfo(skb)->destructor_arg = arg;
+		skb->destructor = NULL;
+		skb_shinfo(skb)->destructor = tpacket_destruct_skb;
 		__packet_set_status(po, ph, TP_STATUS_SENDING);
 		atomic_inc(&po->tx_ring.pending);