| Submitter | stephen hemminger |
|---|---|
| Date | Feb. 5, 2013, 5:25 p.m. |
| Message ID | <20130205092517.58d11171@nehalam.linuxnetplumber.net> |
| Download | mbox | patch |
| Permalink | /patch/218305/ |
| State | Accepted |
| Delegated to: | David Miller |
| Headers | show |
Comments
On Tue, Feb 5, 2013 at 9:25 AM, Stephen Hemminger <stephen@networkplumber.org> wrote: > TCP Appropriate Byte Count was added by me, but later disabled. > There is no point in maintaining it since it is a potential source > of bugs and Linux already implements other better window protection > heuristics. +1 > > Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> > > --- a/Documentation/networking/ip-sysctl.txt 2013-02-05 09:22:44.379791669 -0800 > +++ b/Documentation/networking/ip-sysctl.txt 2013-02-05 09:23:13.035421064 -0800 > @@ -130,17 +130,6 @@ somaxconn - INTEGER > Defaults to 128. See also tcp_max_syn_backlog for additional tuning > for TCP sockets. > > -tcp_abc - INTEGER > - Controls Appropriate Byte Count (ABC) defined in RFC3465. > - ABC is a way of increasing congestion window (cwnd) more slowly > - in response to partial acknowledgments. > - Possible values are: > - 0 increase cwnd once per acknowledgment (no ABC) > - 1 increase cwnd once per acknowledgment of full sized segment > - 2 allow increase cwnd by two if acknowledgment is > - of two segments to compensate for delayed acknowledgments. > - Default: 0 (off) > - > tcp_abort_on_overflow - BOOLEAN > If listening service is too slow to accept new connections, > reset them. Default state is FALSE. It means that if overflow > --- a/include/net/tcp.h 2013-02-05 09:22:44.379791669 -0800 > +++ b/include/net/tcp.h 2013-02-05 09:23:13.035421064 -0800 > @@ -279,7 +279,6 @@ extern int sysctl_tcp_dma_copybreak; > extern int sysctl_tcp_nometrics_save; > extern int sysctl_tcp_moderate_rcvbuf; > extern int sysctl_tcp_tso_win_divisor; > -extern int sysctl_tcp_abc; > extern int sysctl_tcp_mtu_probing; > extern int sysctl_tcp_base_mss; > extern int sysctl_tcp_workaround_signed_windows; > --- a/kernel/sysctl_binary.c 2013-02-05 09:22:44.379791669 -0800 > +++ b/kernel/sysctl_binary.c 2013-02-05 09:23:13.035421064 -0800 > @@ -387,7 +387,6 @@ static const struct bin_table bin_net_ip > { CTL_INT, NET_TCP_MODERATE_RCVBUF, "tcp_moderate_rcvbuf" }, > { CTL_INT, NET_TCP_TSO_WIN_DIVISOR, "tcp_tso_win_divisor" }, > { CTL_STR, NET_TCP_CONG_CONTROL, "tcp_congestion_control" }, > - { CTL_INT, NET_TCP_ABC, "tcp_abc" }, > { CTL_INT, NET_TCP_MTU_PROBING, "tcp_mtu_probing" }, > { CTL_INT, NET_TCP_BASE_MSS, "tcp_base_mss" }, > { CTL_INT, NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, "tcp_workaround_signed_windows" }, > --- a/net/ipv4/sysctl_net_ipv4.c 2013-02-05 09:22:49.035731453 -0800 > +++ b/net/ipv4/sysctl_net_ipv4.c 2013-02-05 09:23:13.035421064 -0800 > @@ -633,13 +633,6 @@ static struct ctl_table ipv4_table[] = { > .proc_handler = proc_tcp_congestion_control, > }, > { > - .procname = "tcp_abc", > - .data = &sysctl_tcp_abc, > - .maxlen = sizeof(int), > - .mode = 0644, > - .proc_handler = proc_dointvec, > - }, > - { > .procname = "tcp_mtu_probing", > .data = &sysctl_tcp_mtu_probing, > .maxlen = sizeof(int), > --- a/net/ipv4/tcp_cong.c 2013-02-05 09:22:44.379791669 -0800 > +++ b/net/ipv4/tcp_cong.c 2013-02-05 09:23:13.035421064 -0800 > @@ -311,26 +311,10 @@ void tcp_slow_start(struct tcp_sock *tp) > int cnt; /* increase in packets */ > unsigned int delta = 0; > > - /* RFC3465: ABC Slow start > - * Increase only after a full MSS of bytes is acked > - * > - * TCP sender SHOULD increase cwnd by the number of > - * previously unacknowledged bytes ACKed by each incoming > - * acknowledgment, provided the increase is not more than L > - */ > - if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache) > - return; > - > if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh) > cnt = sysctl_tcp_max_ssthresh >> 1; /* limited slow start */ > else > cnt = tp->snd_cwnd; /* exponential increase */ > - > - /* RFC3465: ABC > - * We MAY increase by 2 if discovered delayed ack > - */ > - if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) > - cnt <<= 1; > tp->bytes_acked = 0; Is bytes_acked still needed? > > tp->snd_cwnd_cnt += cnt; > @@ -372,20 +356,9 @@ void tcp_reno_cong_avoid(struct sock *sk > /* In "safe" area, increase. */ > if (tp->snd_cwnd <= tp->snd_ssthresh) > tcp_slow_start(tp); > - > /* In dangerous area, increase slowly. */ > - else if (sysctl_tcp_abc) { > - /* RFC3465: Appropriate Byte Count > - * increase once for each full cwnd acked > - */ > - if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) { > - tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache; > - if (tp->snd_cwnd < tp->snd_cwnd_clamp) > - tp->snd_cwnd++; > - } > - } else { > + else > tcp_cong_avoid_ai(tp, tp->snd_cwnd); > - } > } > EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); > > --- a/net/ipv4/tcp_input.c 2013-02-05 09:22:49.035731453 -0800 > +++ b/net/ipv4/tcp_input.c 2013-02-05 09:23:13.039421013 -0800 > @@ -98,7 +98,6 @@ int sysctl_tcp_frto_response __read_most > int sysctl_tcp_thin_dupack __read_mostly; > > int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; > -int sysctl_tcp_abc __read_mostly; > int sysctl_tcp_early_retrans __read_mostly = 2; > > #define FLAG_DATA 0x01 /* Incoming frame contained data. */ > @@ -3608,15 +3607,6 @@ static int tcp_ack(struct sock *sk, cons > if (after(ack, prior_snd_una)) > flag |= FLAG_SND_UNA_ADVANCED; > > - if (sysctl_tcp_abc) { > - if (icsk->icsk_ca_state < TCP_CA_CWR) > - tp->bytes_acked += ack - prior_snd_una; > - else if (icsk->icsk_ca_state == TCP_CA_Loss) > - /* we assume just one segment left network */ > - tp->bytes_acked += min(ack - prior_snd_una, > - tp->mss_cache); > - } > - > prior_fackets = tp->fackets_out; > prior_in_flight = tcp_packets_in_flight(tp); > > -- > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Yuchung Cheng <ycheng@google.com> Date: Tue, 5 Feb 2013 09:47:59 -0800 > On Tue, Feb 5, 2013 at 9:25 AM, Stephen Hemminger > <stephen@networkplumber.org> wrote: >> TCP Appropriate Byte Count was added by me, but later disabled. >> There is no point in maintaining it since it is a potential source >> of bugs and Linux already implements other better window protection >> heuristics. > +1 > >> >> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> Applied, and: >> tp->bytes_acked = 0; > Is bytes_acked still needed? I took care of this when I committed Stephen's patch. Thanks. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Patch
--- a/Documentation/networking/ip-sysctl.txt 2013-02-05 09:22:44.379791669 -0800 +++ b/Documentation/networking/ip-sysctl.txt 2013-02-05 09:23:13.035421064 -0800 @@ -130,17 +130,6 @@ somaxconn - INTEGER Defaults to 128. See also tcp_max_syn_backlog for additional tuning for TCP sockets. -tcp_abc - INTEGER - Controls Appropriate Byte Count (ABC) defined in RFC3465. - ABC is a way of increasing congestion window (cwnd) more slowly - in response to partial acknowledgments. - Possible values are: - 0 increase cwnd once per acknowledgment (no ABC) - 1 increase cwnd once per acknowledgment of full sized segment - 2 allow increase cwnd by two if acknowledgment is - of two segments to compensate for delayed acknowledgments. - Default: 0 (off) - tcp_abort_on_overflow - BOOLEAN If listening service is too slow to accept new connections, reset them. Default state is FALSE. It means that if overflow --- a/include/net/tcp.h 2013-02-05 09:22:44.379791669 -0800 +++ b/include/net/tcp.h 2013-02-05 09:23:13.035421064 -0800 @@ -279,7 +279,6 @@ extern int sysctl_tcp_dma_copybreak; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; -extern int sysctl_tcp_abc; extern int sysctl_tcp_mtu_probing; extern int sysctl_tcp_base_mss; extern int sysctl_tcp_workaround_signed_windows; --- a/kernel/sysctl_binary.c 2013-02-05 09:22:44.379791669 -0800 +++ b/kernel/sysctl_binary.c 2013-02-05 09:23:13.035421064 -0800 @@ -387,7 +387,6 @@ static const struct bin_table bin_net_ip { CTL_INT, NET_TCP_MODERATE_RCVBUF, "tcp_moderate_rcvbuf" }, { CTL_INT, NET_TCP_TSO_WIN_DIVISOR, "tcp_tso_win_divisor" }, { CTL_STR, NET_TCP_CONG_CONTROL, "tcp_congestion_control" }, - { CTL_INT, NET_TCP_ABC, "tcp_abc" }, { CTL_INT, NET_TCP_MTU_PROBING, "tcp_mtu_probing" }, { CTL_INT, NET_TCP_BASE_MSS, "tcp_base_mss" }, { CTL_INT, NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, "tcp_workaround_signed_windows" }, --- a/net/ipv4/sysctl_net_ipv4.c 2013-02-05 09:22:49.035731453 -0800 +++ b/net/ipv4/sysctl_net_ipv4.c 2013-02-05 09:23:13.035421064 -0800 @@ -633,13 +633,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_tcp_congestion_control, }, { - .procname = "tcp_abc", - .data = &sysctl_tcp_abc, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { .procname = "tcp_mtu_probing", .data = &sysctl_tcp_mtu_probing, .maxlen = sizeof(int), --- a/net/ipv4/tcp_cong.c 2013-02-05 09:22:44.379791669 -0800 +++ b/net/ipv4/tcp_cong.c 2013-02-05 09:23:13.035421064 -0800 @@ -311,26 +311,10 @@ void tcp_slow_start(struct tcp_sock *tp) int cnt; /* increase in packets */ unsigned int delta = 0; - /* RFC3465: ABC Slow start - * Increase only after a full MSS of bytes is acked - * - * TCP sender SHOULD increase cwnd by the number of - * previously unacknowledged bytes ACKed by each incoming - * acknowledgment, provided the increase is not more than L - */ - if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache) - return; - if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh) cnt = sysctl_tcp_max_ssthresh >> 1; /* limited slow start */ else cnt = tp->snd_cwnd; /* exponential increase */ - - /* RFC3465: ABC - * We MAY increase by 2 if discovered delayed ack - */ - if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) - cnt <<= 1; tp->bytes_acked = 0; tp->snd_cwnd_cnt += cnt; @@ -372,20 +356,9 @@ void tcp_reno_cong_avoid(struct sock *sk /* In "safe" area, increase. */ if (tp->snd_cwnd <= tp->snd_ssthresh) tcp_slow_start(tp); - /* In dangerous area, increase slowly. */ - else if (sysctl_tcp_abc) { - /* RFC3465: Appropriate Byte Count - * increase once for each full cwnd acked - */ - if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) { - tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache; - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - } - } else { + else tcp_cong_avoid_ai(tp, tp->snd_cwnd); - } } EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); --- a/net/ipv4/tcp_input.c 2013-02-05 09:22:49.035731453 -0800 +++ b/net/ipv4/tcp_input.c 2013-02-05 09:23:13.039421013 -0800 @@ -98,7 +98,6 @@ int sysctl_tcp_frto_response __read_most int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; -int sysctl_tcp_abc __read_mostly; int sysctl_tcp_early_retrans __read_mostly = 2; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ @@ -3608,15 +3607,6 @@ static int tcp_ack(struct sock *sk, cons if (after(ack, prior_snd_una)) flag |= FLAG_SND_UNA_ADVANCED; - if (sysctl_tcp_abc) { - if (icsk->icsk_ca_state < TCP_CA_CWR) - tp->bytes_acked += ack - prior_snd_una; - else if (icsk->icsk_ca_state == TCP_CA_Loss) - /* we assume just one segment left network */ - tp->bytes_acked += min(ack - prior_snd_una, - tp->mss_cache); - } - prior_fackets = tp->fackets_out; prior_in_flight = tcp_packets_in_flight(tp);
TCP Appropriate Byte Count was added by me, but later disabled. There is no point in maintaining it since it is a potential source of bugs and Linux already implements other better window protection heuristics. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html