Patchwork packet_sendmsg_spkt sleeping from invalid context

login
register
mail settings
Submitter Eric Dumazet
Date Dec. 14, 2009, 9:39 p.m.
Message ID <4B26B09A.7050607@gmail.com>
Download mbox | patch
Permalink /patch/41149/
State RFC
Delegated to: David Miller
Headers show

Comments

Eric Dumazet - Dec. 14, 2009, 9:39 p.m.
Le 14/12/2009 22:30, Frederic Weisbecker a écrit :
> On Mon, Dec 14, 2009 at 10:25:57PM +0100, Eric Dumazet wrote:
>> Le 14/12/2009 21:52, Frederic Weisbecker a écrit :
>>>
>>> I also wonder. Are you using PREEMPT_RCU ?
>>
>> Not at all :)
>>
>> But yes, this is illegal to do the memcpy_fromiovec() in rcu_read_lock() context.
> 
> 
> I've just tested, and with rcu preempt it is mute, no warning :)
> 
> 
>>> That may explain why you haven't seen this issue because
>>> might_sleep() doesn't see you are in a rcu read locked
>>> section as preemption is not disabled, but it is illegal to
>>> voluntarily sleep in such area (although it's fine with
>>> preempt rcu) as doing so with non-prempt RCU config would barf.
>>>
>>> I'm trying a patch to handle that.
>>
>> As you want, I also have a patch testing right now :)
> 
> 
> But mine is to teach might_sleep() to handle rcu preempt case,
> not to fix this net dev thing.
> 
> But I'll happily test the fix you have :)
> 

OK here it is, I tested it with PREEMPT_RCU as well

Thanks !


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Frédéric Weisbecker - Dec. 15, 2009, 3:13 p.m.
On Mon, Dec 14, 2009 at 10:39:38PM +0100, Eric Dumazet wrote:
> OK here it is, I tested it with PREEMPT_RCU as well
> 
> Thanks !
> 
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 0205621..bc17351 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -415,7 +415,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
>  {
>  	struct sock *sk = sock->sk;
>  	struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
> -	struct sk_buff *skb;
> +	struct sk_buff *skb = NULL;
>  	struct net_device *dev;
>  	__be16 proto = 0;
>  	int err;
> @@ -437,6 +437,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
>  	 */
>  
>  	saddr->spkt_device[13] = 0;
> +retry:
>  	rcu_read_lock();
>  	dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
>  	err = -ENODEV;
> @@ -456,58 +457,48 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
>  	if (len > dev->mtu + dev->hard_header_len)
>  		goto out_unlock;
>  
> -	err = -ENOBUFS;
> -	skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
> -
> -	/*
> -	 * If the write buffer is full, then tough. At this level the user
> -	 * gets to deal with the problem - do your own algorithmic backoffs.
> -	 * That's far more flexible.
> -	 */
> -
> -	if (skb == NULL)
> -		goto out_unlock;
> -
> -	/*
> -	 *	Fill it in
> -	 */
> -
> -	/* FIXME: Save some space for broken drivers that write a
> -	 * hard header at transmission time by themselves. PPP is the
> -	 * notable one here. This should really be fixed at the driver level.
> -	 */
> -	skb_reserve(skb, LL_RESERVED_SPACE(dev));
> -	skb_reset_network_header(skb);
> -
> -	/* Try to align data part correctly */
> -	if (dev->header_ops) {
> -		skb->data -= dev->hard_header_len;
> -		skb->tail -= dev->hard_header_len;
> -		if (len < dev->hard_header_len)
> -			skb_reset_network_header(skb);
> +	if (!skb) {
> +		size_t reserved = LL_RESERVED_SPACE(dev);
> +		unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
> +
> +		rcu_read_unlock();
> +		skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
> +		if (skb == NULL)
> +			return -ENOBUFS;
> +		skb_reserve(skb, reserved);
> +		/* FIXME: Save some space for broken drivers that write a hard
> +		 * header at transmission time by themselves. PPP is the notable
> +		 * one here. This should really be fixed at the driver level.
> +		 */
> +		skb_reset_network_header(skb);
> +
> +		/* Try to align data part correctly */
> +		if (hhlen) {
> +			skb->data -= hhlen;
> +			skb->tail -= hhlen;
> +			if (len < hhlen)
> +				skb_reset_network_header(skb);
> +		}
> +		err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
> +		if (err)
> +			goto out_free;
> +		goto retry;
>  	}
>  
> -	/* Returns -EFAULT on error */
> -	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
> +
>  	skb->protocol = proto;
>  	skb->dev = dev;
>  	skb->priority = sk->sk_priority;
>  	skb->mark = sk->sk_mark;
> -	if (err)
> -		goto out_free;
> -
> -	/*
> -	 *	Now send it
> -	 */
>  
>  	dev_queue_xmit(skb);
>  	rcu_read_unlock();
>  	return len;
>  
> -out_free:
> -	kfree_skb(skb);
>  out_unlock:
>  	rcu_read_unlock();
> +out_free:
> +	kfree_skb(skb);
>  	return err;
>  }
>  
> 




Tested-by: Frederic Weisbecker <fweisbec@gmail.com>

Thanks!

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 0205621..bc17351 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -415,7 +415,7 @@  static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
 {
 	struct sock *sk = sock->sk;
 	struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
-	struct sk_buff *skb;
+	struct sk_buff *skb = NULL;
 	struct net_device *dev;
 	__be16 proto = 0;
 	int err;
@@ -437,6 +437,7 @@  static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
 	 */
 
 	saddr->spkt_device[13] = 0;
+retry:
 	rcu_read_lock();
 	dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
 	err = -ENODEV;
@@ -456,58 +457,48 @@  static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
 	if (len > dev->mtu + dev->hard_header_len)
 		goto out_unlock;
 
-	err = -ENOBUFS;
-	skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
-
-	/*
-	 * If the write buffer is full, then tough. At this level the user
-	 * gets to deal with the problem - do your own algorithmic backoffs.
-	 * That's far more flexible.
-	 */
-
-	if (skb == NULL)
-		goto out_unlock;
-
-	/*
-	 *	Fill it in
-	 */
-
-	/* FIXME: Save some space for broken drivers that write a
-	 * hard header at transmission time by themselves. PPP is the
-	 * notable one here. This should really be fixed at the driver level.
-	 */
-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	skb_reset_network_header(skb);
-
-	/* Try to align data part correctly */
-	if (dev->header_ops) {
-		skb->data -= dev->hard_header_len;
-		skb->tail -= dev->hard_header_len;
-		if (len < dev->hard_header_len)
-			skb_reset_network_header(skb);
+	if (!skb) {
+		size_t reserved = LL_RESERVED_SPACE(dev);
+		unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
+
+		rcu_read_unlock();
+		skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
+		if (skb == NULL)
+			return -ENOBUFS;
+		skb_reserve(skb, reserved);
+		/* FIXME: Save some space for broken drivers that write a hard
+		 * header at transmission time by themselves. PPP is the notable
+		 * one here. This should really be fixed at the driver level.
+		 */
+		skb_reset_network_header(skb);
+
+		/* Try to align data part correctly */
+		if (hhlen) {
+			skb->data -= hhlen;
+			skb->tail -= hhlen;
+			if (len < hhlen)
+				skb_reset_network_header(skb);
+		}
+		err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+		if (err)
+			goto out_free;
+		goto retry;
 	}
 
-	/* Returns -EFAULT on error */
-	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+
 	skb->protocol = proto;
 	skb->dev = dev;
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	if (err)
-		goto out_free;
-
-	/*
-	 *	Now send it
-	 */
 
 	dev_queue_xmit(skb);
 	rcu_read_unlock();
 	return len;
 
-out_free:
-	kfree_skb(skb);
 out_unlock:
 	rcu_read_unlock();
+out_free:
+	kfree_skb(skb);
 	return err;
 }