Patchwork lots of cpu time spent in skb_copy_bits() in net-next with small mtu

login
register
mail settings
Submitter David Miller
Date June 17, 2009, 5:53 a.m.
Message ID <20090616.225316.263195307.davem@davemloft.net>
Download mbox | patch
Permalink /patch/28766/
State RFC
Delegated to: David Miller
Headers show

Comments

David Miller - June 17, 2009, 5:53 a.m.
From: "Ilpo Järvinen" <ilpo.jarvinen@helsinki.fi>
Date: Wed, 17 Jun 2009 08:46:56 +0300 (EEST)

> While I took a very short view on it earlier I notice that handling of the 
> case when head is at the last skb and tail is at the end of queue was 
> changed (without mention in the commit message whether it was 
> intentionally) in the Dave's change (we used to exit at skb == tail while 
> we don't necessarily do that anymore but take the additional checks that 
> are made inside the loop ub tcp_collapse()). It would be nice if Dave 
> would split that kind of complex transformation into more easily 
> verifiable changes, even if the intermediate form itself would not remain 
> in the final form, as is it's rather hard to track it all :-).
> 
> I'm not sure if that change is significant though as one might view it as 
> the opposite, ie., that the previous was unintented early exit since we 
> wouldn't collapse bloated skb in the end but who knows... ...But I'm yet 
> to really _understand_ everything that is going on there.

Sorry, will be more careful in the future. :-)

Can we atleast verify that applying the following revert patch makes
the problem go away?  (it's a combination revert of commits
2df9001edc382c331f338f45d259feeaa740c418 and
915219441d566f1da0caa0e262be49b666159e17).

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ilpo Järvinen - June 17, 2009, 6:57 a.m.
On Tue, 16 Jun 2009, David Miller wrote:

> From: "Ilpo Järvinen" <ilpo.jarvinen@helsinki.fi>
> Date: Wed, 17 Jun 2009 08:46:56 +0300 (EEST)
> 
> > While I took a very short view on it earlier I notice that handling of the 
> > case when head is at the last skb and tail is at the end of queue was 
> > changed (without mention in the commit message whether it was 
> > intentionally) in the Dave's change (we used to exit at skb == tail while 
> > we don't necessarily do that anymore but take the additional checks that 
> > are made inside the loop ub tcp_collapse()). It would be nice if Dave 
> > would split that kind of complex transformation into more easily 
> > verifiable changes, even if the intermediate form itself would not remain 
> > in the final form, as is it's rather hard to track it all :-).
> > 
> > I'm not sure if that change is significant though as one might view it as 
> > the opposite, ie., that the previous was unintented early exit since we 
> > wouldn't collapse bloated skb in the end but who knows... ...But I'm yet 
> > to really _understand_ everything that is going on there.
> 
> Sorry, will be more careful in the future. :-)

I re-looked into the problem I supposedly found earlier and it seems that
it turned out to be a false alarm... Huoh, how complex it is track that 
change :-).

> Can we atleast verify that applying the following revert patch makes
> the problem go away?  (it's a combination revert of commits
> 2df9001edc382c331f338f45d259feeaa740c418 and
> 915219441d566f1da0caa0e262be49b666159e17).

Yeah, that's a good start...

Patch

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 17b89c5..0fb8b44 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -439,14 +439,12 @@  int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 			 !tp->urg_data ||
 			 before(tp->urg_seq, tp->copied_seq) ||
 			 !before(tp->urg_seq, tp->rcv_nxt)) {
-			struct sk_buff *skb;
-
 			answ = tp->rcv_nxt - tp->copied_seq;
 
 			/* Subtract 1, if FIN is in queue. */
-			skb = skb_peek_tail(&sk->sk_receive_queue);
-			if (answ && skb)
-				answ -= tcp_hdr(skb)->fin;
+			if (answ && !skb_queue_empty(&sk->sk_receive_queue))
+				answ -=
+		       tcp_hdr((struct sk_buff *)sk->sk_receive_queue.prev)->fin;
 		} else
 			answ = tp->urg_seq - tp->copied_seq;
 		release_sock(sk);
@@ -1384,7 +1382,11 @@  int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 		/* Next get a buffer. */
 
-		skb_queue_walk(&sk->sk_receive_queue, skb) {
+		skb = skb_peek(&sk->sk_receive_queue);
+		do {
+			if (!skb)
+				break;
+
 			/* Now that we have two receive queues this
 			 * shouldn't happen.
 			 */
@@ -1401,7 +1403,8 @@  int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			if (tcp_hdr(skb)->fin)
 				goto found_fin_ok;
 			WARN_ON(!(flags & MSG_PEEK));
-		}
+			skb = skb->next;
+		} while (skb != (struct sk_buff *)&sk->sk_receive_queue);
 
 		/* Well, if we have backlog, try to process it now yet. */
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2bdb0da..eeb8a92 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4426,7 +4426,7 @@  drop:
 		}
 		__skb_queue_head(&tp->out_of_order_queue, skb);
 	} else {
-		struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue);
+		struct sk_buff *skb1 = tp->out_of_order_queue.prev;
 		u32 seq = TCP_SKB_CB(skb)->seq;
 		u32 end_seq = TCP_SKB_CB(skb)->end_seq;
 
@@ -4443,18 +4443,15 @@  drop:
 		}
 
 		/* Find place to insert this segment. */
-		while (1) {
+		do {
 			if (!after(TCP_SKB_CB(skb1)->seq, seq))
 				break;
-			if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
-				skb1 = NULL;
-				break;
-			}
-			skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
-		}
+		} while ((skb1 = skb1->prev) !=
+			 (struct sk_buff *)&tp->out_of_order_queue);
 
 		/* Do skb overlap to previous one? */
-		if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
+		if (skb1 != (struct sk_buff *)&tp->out_of_order_queue &&
+		    before(seq, TCP_SKB_CB(skb1)->end_seq)) {
 			if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
 				/* All the bits are present. Drop. */
 				__kfree_skb(skb);
@@ -4466,26 +4463,15 @@  drop:
 				tcp_dsack_set(sk, seq,
 					      TCP_SKB_CB(skb1)->end_seq);
 			} else {
-				if (skb_queue_is_first(&tp->out_of_order_queue,
-						       skb1))
-					skb1 = NULL;
-				else
-					skb1 = skb_queue_prev(
-						&tp->out_of_order_queue,
-						skb1);
+				skb1 = skb1->prev;
 			}
 		}
-		if (!skb1)
-			__skb_queue_head(&tp->out_of_order_queue, skb);
-		else
-			__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+		__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
 
 		/* And clean segments covered by new one as whole. */
-		while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
-			skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
-
-			if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
-				break;
+		while ((skb1 = skb->next) !=
+		       (struct sk_buff *)&tp->out_of_order_queue &&
+		       after(end_seq, TCP_SKB_CB(skb1)->seq)) {
 			if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
 				tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
 						 end_seq);
@@ -4506,10 +4492,7 @@  add_sack:
 static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
 					struct sk_buff_head *list)
 {
-	struct sk_buff *next = NULL;
-
-	if (!skb_queue_is_last(list, skb))
-		next = skb_queue_next(list, skb);
+	struct sk_buff *next = skb->next;
 
 	__skb_unlink(skb, list);
 	__kfree_skb(skb);
@@ -4520,9 +4503,6 @@  static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
 
 /* Collapse contiguous sequence of skbs head..tail with
  * sequence numbers start..end.
- *
- * If tail is NULL, this means until the end of the list.
- *
  * Segments with FIN/SYN are not collapsed (only because this
  * simplifies code)
  */
@@ -4531,23 +4511,15 @@  tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 	     struct sk_buff *head, struct sk_buff *tail,
 	     u32 start, u32 end)
 {
-	struct sk_buff *skb, *n;
-	bool end_of_skbs;
+	struct sk_buff *skb;
 
 	/* First, check that queue is collapsible and find
 	 * the point where collapsing can be useful. */
-	skb = head;
-restart:
-	end_of_skbs = true;
-	skb_queue_walk_from_safe(list, skb, n) {
-		if (skb == tail)
-			break;
+	for (skb = head; skb != tail;) {
 		/* No new bits? It is possible on ofo queue. */
 		if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
 			skb = tcp_collapse_one(sk, skb, list);
-			if (!skb)
-				break;
-			goto restart;
+			continue;
 		}
 
 		/* The first skb to collapse is:
@@ -4557,24 +4529,16 @@  restart:
 		 */
 		if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
 		    (tcp_win_from_space(skb->truesize) > skb->len ||
-		     before(TCP_SKB_CB(skb)->seq, start))) {
-			end_of_skbs = false;
+		     before(TCP_SKB_CB(skb)->seq, start) ||
+		     (skb->next != tail &&
+		      TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb->next)->seq)))
 			break;
-		}
-
-		if (!skb_queue_is_last(list, skb)) {
-			struct sk_buff *next = skb_queue_next(list, skb);
-			if (next != tail &&
-			    TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
-				end_of_skbs = false;
-				break;
-			}
-		}
 
 		/* Decided to skip this, advance start seq. */
 		start = TCP_SKB_CB(skb)->end_seq;
+		skb = skb->next;
 	}
-	if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
+	if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
 		return;
 
 	while (before(start, end)) {
@@ -4619,8 +4583,7 @@  restart:
 			}
 			if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
 				skb = tcp_collapse_one(sk, skb, list);
-				if (!skb ||
-				    skb == tail ||
+				if (skb == tail ||
 				    tcp_hdr(skb)->syn ||
 				    tcp_hdr(skb)->fin)
 					return;
@@ -4647,21 +4610,17 @@  static void tcp_collapse_ofo_queue(struct sock *sk)
 	head = skb;
 
 	for (;;) {
-		struct sk_buff *next = NULL;
-
-		if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
-			next = skb_queue_next(&tp->out_of_order_queue, skb);
-		skb = next;
+		skb = skb->next;
 
 		/* Segment is terminated when we see gap or when
 		 * we are at the end of all the queue. */
-		if (!skb ||
+		if (skb == (struct sk_buff *)&tp->out_of_order_queue ||
 		    after(TCP_SKB_CB(skb)->seq, end) ||
 		    before(TCP_SKB_CB(skb)->end_seq, start)) {
 			tcp_collapse(sk, &tp->out_of_order_queue,
 				     head, skb, start, end);
 			head = skb;
-			if (!skb)
+			if (skb == (struct sk_buff *)&tp->out_of_order_queue)
 				break;
 			/* Start new segment */
 			start = TCP_SKB_CB(skb)->seq;
@@ -4722,11 +4681,10 @@  static int tcp_prune_queue(struct sock *sk)
 		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
 
 	tcp_collapse_ofo_queue(sk);
-	if (!skb_queue_empty(&sk->sk_receive_queue))
-		tcp_collapse(sk, &sk->sk_receive_queue,
-			     skb_peek(&sk->sk_receive_queue),
-			     NULL,
-			     tp->copied_seq, tp->rcv_nxt);
+	tcp_collapse(sk, &sk->sk_receive_queue,
+		     sk->sk_receive_queue.next,
+		     (struct sk_buff *)&sk->sk_receive_queue,
+		     tp->copied_seq, tp->rcv_nxt);
 	sk_mem_reclaim(sk);
 
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)