From patchwork Fri Oct 3 22:54:06 2008 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: =?utf-8?q?Ilpo_J=C3=A4rvinen?= X-Patchwork-Id: 2653 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by ozlabs.org (Postfix) with ESMTP id 3BAA4DDECA for ; Sat, 4 Oct 2008 08:55:10 +1000 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752534AbYJCWzG (ORCPT ); Fri, 3 Oct 2008 18:55:06 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1753352AbYJCWzF (ORCPT ); Fri, 3 Oct 2008 18:55:05 -0400 Received: from courier.cs.helsinki.fi ([128.214.9.1]:51574 "EHLO mail.cs.helsinki.fi" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752389AbYJCWzE (ORCPT ); Fri, 3 Oct 2008 18:55:04 -0400 Received: from wrl-59.cs.helsinki.fi (wrl-59.cs.helsinki.fi [128.214.166.179]) (AUTH: PLAIN cs-relay, TLS: TLSv1/SSLv3,256bits,AES256-SHA) by mail.cs.helsinki.fi with esmtp; Sat, 04 Oct 2008 01:54:37 +0300 id 00087D7C.48E6A2AD.0000173B Received: by wrl-59.cs.helsinki.fi (Postfix, from userid 50795) id 3590FA00A7; Sat, 4 Oct 2008 01:54:06 +0300 (EEST) Received: from localhost (localhost [127.0.0.1]) by wrl-59.cs.helsinki.fi (Postfix) with ESMTP id 32989A00A6; Sat, 4 Oct 2008 01:54:06 +0300 (EEST) Date: Sat, 4 Oct 2008 01:54:06 +0300 (EEST) From: "=?ISO-8859-1?Q?Ilpo_J=E4rvinen?=" X-X-Sender: ijjarvin@wrl-59.cs.helsinki.fi To: David Miller cc: Netdev Subject: Re: [PATCH net-next] tcp: kill pointless urg_mode In-Reply-To: Message-ID: References: MIME-Version: 1.0 Content-ID: Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 7672906..fe77e14 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -312,8 +312,11 @@ struct tcp_sock { u32 retrans_out; /* Retransmitted packets out */ u16 urg_data; /* Saved octet of OOB data and control flags */ - u8 urg_mode; /* In urgent mode */ u8 ecn_flags; /* ECN status bits. */ + u8 reordering; /* Packet reordering metric. */ + u32 snd_up; /* Urgent pointer */ + + u8 keepalive_probes; /* num of allowed keep alive probes */ /* * Options received (usually on last packet, some only on SYN packets). */ @@ -361,8 +364,6 @@ struct tcp_sock { u32 lost_retrans_low; /* Sent seq after any rxmit (lowest) */ - u8 reordering; /* Packet reordering metric. */ - u8 keepalive_probes; /* num of allowed keep alive probes */ u32 prior_ssthresh; /* ssthresh saved at recovery start */ u32 high_seq; /* snd_nxt at onset of congestion */ @@ -374,8 +375,6 @@ struct tcp_sock { u32 total_retrans; /* Total retransmits for entire connection */ u32 urg_seq; /* Seq of received urgent pointer */ - u32 snd_up; /* Urgent pointer */ - unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_intvl; /* time interval between keep alive probes */ diff --git a/include/net/tcp.h b/include/net/tcp.h index f6cc341..c390932 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -805,6 +805,11 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp) } extern int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight); +static inline int tcp_urg_mode(const struct tcp_sock *tp) +{ + return tp->snd_una != tp->snd_up; +} + static inline void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss, const struct sk_buff *skb) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1ab341e..87268d2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -493,10 +493,8 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, struct sk_buff *skb) { - if (flags & MSG_OOB) { - tp->urg_mode = 1; + if (flags & MSG_OOB) tp->snd_up = tp->write_seq; - } } static inline void tcp_push(struct sock *sk, int flags, int mss_now, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3b76bce..c19f429 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2836,7 +2836,8 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) * is before the ack sequence we can discard it as it's confirmed to have * arrived at the other end. */ -static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) +static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, + u32 prior_snd_una) { struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2903,9 +2904,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) if (sacked & TCPCB_LOST) tp->lost_out -= acked_pcount; - if (unlikely(tp->urg_mode && !before(end_seq, tp->snd_up))) - tp->urg_mode = 0; - tp->packets_out -= acked_pcount; pkts_acked += acked_pcount; @@ -2935,6 +2933,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) tp->lost_skb_hint = NULL; } + if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una))) + tp->snd_up = tp->snd_una; + if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; @@ -3311,7 +3312,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) goto no_queue; /* See if we can take anything off of the retransmit queue. */ - flag |= tcp_clean_rtx_queue(sk, prior_fackets); + flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f976fc5..779f2e9 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -395,6 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->pred_flags = 0; newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1; + newtp->snd_up = treq->snt_isn + 1; tcp_prequeue_init(newtp); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 493553c..e8e0e19 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -646,8 +646,9 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->check = 0; th->urg_ptr = 0; - if (unlikely(tp->urg_mode && - between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { + /* The urg_mode check is necessary during a below snd_una win probe */ + if (unlikely(between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF) && + tcp_urg_mode(tp))) { th->urg_ptr = htons(tp->snd_up - tcb->seq); th->urg = 1; } @@ -1012,7 +1013,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) /* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. * - * LARGESEND note: !urg_mode is overkill, only frames up to snd_up + * LARGESEND note: !tcp_urg_mode is overkill, only frames up to snd_up * cannot be large. However, taking into account rare use of URG, this * is not a big flaw. */ @@ -1029,7 +1030,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) mss_now = tp->mss_cache; - if (large_allowed && sk_can_gso(sk) && !tp->urg_mode) + if (large_allowed && sk_can_gso(sk) && !tcp_urg_mode(tp)) doing_tso = 1; if (dst) { @@ -1193,7 +1194,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, /* Don't use the nagle rule for urgent data (or for the final FIN). * Nagle can be ignored during F-RTO too (see RFC4138). */ - if (tp->urg_mode || (tp->frto_counter == 2) || + if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) return 1; @@ -2358,6 +2359,7 @@ static void tcp_connect_init(struct sock *sk) tcp_init_wl(tp, tp->write_seq, 0); tp->snd_una = tp->write_seq; tp->snd_sml = tp->write_seq; + tp->snd_up = tp->write_seq; tp->rcv_nxt = 0; tp->rcv_wup = 0; tp->copied_seq = 0; @@ -2567,8 +2569,7 @@ int tcp_write_wakeup(struct sock *sk) tcp_event_new_data_sent(sk, skb); return err; } else { - if (tp->urg_mode && - between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF)) + if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF)) tcp_xmit_probe_skb(sk, 1); return tcp_xmit_probe_skb(sk, 0); }