diff mbox series

[net-next,1/2] tcp_bbr: better deal with suboptimal GSO (II)

Message ID 20180228224047.123054-2-edumazet@google.com
State Accepted, archived
Delegated to: David Miller
Headers show
Series tcp_bbr: more GSO work | expand

Commit Message

Eric Dumazet Feb. 28, 2018, 10:40 p.m. UTC
This is second part of dealing with suboptimal device gso parameters.
In first patch (350c9f484bde "tcp_bbr: better deal with suboptimal GSO")
we dealt with devices having low gso_max_segs

Some devices lower gso_max_size from 64KB to 16 KB (r8152 is an example)

In order to probe an optimal cwnd, we want BBR being not sensitive
to whatever GSO constraint a device can have.

This patch removes tso_segs_goal() CC callback in favor of
min_tso_segs() for CC wanting to override sysctl_tcp_min_tso_segs

Next patch will remove bbr->tso_segs_goal since it does not have
to be persistent.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/tcp.h     |  6 ++----
 net/ipv4/tcp_bbr.c    | 23 +++++++++++++----------
 net/ipv4/tcp_output.c | 15 ++++++++-------
 3 files changed, 23 insertions(+), 21 deletions(-)

Comments

Neal Cardwell March 1, 2018, 3:15 a.m. UTC | #1
On Wed, Feb 28, 2018 at 5:40 PM, Eric Dumazet <edumazet@google.com> wrote:
>
> This is second part of dealing with suboptimal device gso parameters.
> In first patch (350c9f484bde "tcp_bbr: better deal with suboptimal GSO")
> we dealt with devices having low gso_max_segs
>
> Some devices lower gso_max_size from 64KB to 16 KB (r8152 is an example)
>
> In order to probe an optimal cwnd, we want BBR being not sensitive
> to whatever GSO constraint a device can have.
>
> This patch removes tso_segs_goal() CC callback in favor of
> min_tso_segs() for CC wanting to override sysctl_tcp_min_tso_segs
>
> Next patch will remove bbr->tso_segs_goal since it does not have
> to be persistent.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---

Acked-by: Neal Cardwell <ncardwell@google.com>

Looks great to me. Thanks, Eric!

neal
diff mbox series

Patch

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 92b06c6e7732ad7c61b580427fc085fa0dff1063..9c9b3768b350abfd51776563d220d5e97ca9da69 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -511,8 +511,6 @@  __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
 #endif
 /* tcp_output.c */
 
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
-		     int min_tso_segs);
 void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
 			       int nonagle);
 int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
@@ -981,8 +979,8 @@  struct tcp_congestion_ops {
 	u32  (*undo_cwnd)(struct sock *sk);
 	/* hook for packet ack accounting (optional) */
 	void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
-	/* suggest number of segments for each skb to transmit (optional) */
-	u32 (*tso_segs_goal)(struct sock *sk);
+	/* override sysctl_tcp_min_tso_segs */
+	u32 (*min_tso_segs)(struct sock *sk);
 	/* returns the multiplier used in tcp_sndbuf_expand (optional) */
 	u32 (*sndbuf_expand)(struct sock *sk);
 	/* call when packets are delivered to update cwnd and pacing rate,
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index a471f696e13c82cddd11633fd4bfdbc6d84f4bcc..afc0567b8a98fbb718ba04505053aa3f62ab5784 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -261,23 +261,26 @@  static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
 		sk->sk_pacing_rate = rate;
 }
 
-/* Return count of segments we want in the skbs we send, or 0 for default. */
-static u32 bbr_tso_segs_goal(struct sock *sk)
+/* override sysctl_tcp_min_tso_segs */
+static u32 bbr_min_tso_segs(struct sock *sk)
 {
-	struct bbr *bbr = inet_csk_ca(sk);
-
-	return bbr->tso_segs_goal;
+	return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
 }
 
 static void bbr_set_tso_segs_goal(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
-	u32 min_segs;
+	u32 segs, bytes;
+
+	/* Sort of tcp_tso_autosize() but ignoring
+	 * driver provided sk_gso_max_size.
+	 */
+	bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
+		      GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+	segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
 
-	min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
-	bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
-				 0x7FU);
+	bbr->tso_segs_goal = min(segs, 0x7FU);
 }
 
 /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -936,7 +939,7 @@  static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
 	.undo_cwnd	= bbr_undo_cwnd,
 	.cwnd_event	= bbr_cwnd_event,
 	.ssthresh	= bbr_ssthresh,
-	.tso_segs_goal	= bbr_tso_segs_goal,
+	.min_tso_segs	= bbr_min_tso_segs,
 	.get_info	= bbr_get_info,
 	.set_state	= bbr_set_state,
 };
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 49d043de3476bdfcaf6e9a606d0da0f2094373a8..383cac0ff0ec059ca7dbc1a6304cc7f8183e008d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1703,8 +1703,8 @@  static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
 /* Return how many segs we'd like on a TSO packet,
  * to send one TSO packet per ms
  */
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
-		     int min_tso_segs)
+static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+			    int min_tso_segs)
 {
 	u32 bytes, segs;
 
@@ -1720,7 +1720,6 @@  u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
 
 	return segs;
 }
-EXPORT_SYMBOL(tcp_tso_autosize);
 
 /* Return the number of segments we want in the skb we are transmitting.
  * See if congestion control module wants to decide; otherwise, autosize.
@@ -1728,11 +1727,13 @@  EXPORT_SYMBOL(tcp_tso_autosize);
 static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
 {
 	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
-	u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+	u32 min_tso, tso_segs;
 
-	if (!tso_segs)
-		tso_segs = tcp_tso_autosize(sk, mss_now,
-				sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+	min_tso = ca_ops->min_tso_segs ?
+			ca_ops->min_tso_segs(sk) :
+			sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+
+	tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
 	return min_t(u32, tso_segs, sk->sk_gso_max_segs);
 }