diff mbox

[RFC,v2,net-next,3/7] tcp: Merge tx_flags/tskey/txstamp_ack in tcp_shifted_skb

Message ID 1461019569-3037369-4-git-send-email-kafai@fb.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Martin KaFai Lau April 18, 2016, 10:46 p.m. UTC
After receiving sacks, tcp_shifted_skb() will collapse
skbs if possible.  tx_flags/tskey/txstamp_ack also has
to be merged in this case.

This patch resues the tcp_skb_collapse_tstamp() to handle
them.

BPF Output Before:
~~~~~
<no-output-due-to-missing-tstamp-event>

BPF Output After:
~~~~~
<...>-2024  [007] d.s.    88.644374: : ee_data:14599

Packetdrill Script:
~~~~~
+0 `sysctl -q -w net.ipv4.tcp_min_tso_segs=10`
+0 `sysctl -q -w net.ipv4.tcp_no_metrics_save=1`
+0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0

0.100 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7>
0.100 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 7>
0.200 < . 1:1(0) ack 1 win 257
0.200 accept(3, ..., ...) = 4
+0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0

0.200 write(4, ..., 1460) = 1460
+0 setsockopt(4, SOL_SOCKET, 37, [2688], 4) = 0
0.200 write(4, ..., 13140) = 13140
+0 setsockopt(4, SOL_SOCKET, 37, [2176], 4) = 0

0.200 > P. 1:1461(1460) ack 1
0.200 > . 1461:8761(7300) ack 1
0.200 > P. 8761:14601(5840) ack 1

0.300 < . 1:1(0) ack 1 win 257 <sack 1461:14601,nop,nop>
0.300 > P. 1:1461(1460) ack 1
0.400 < . 1:1(0) ack 14601 win 257

0.400 close(4) = 0
0.400 > F. 14601:14601(0) ack 1
0.500 < F. 1:1(0) ack 14602 win 257
0.500 > . 14602:14602(0) ack 2

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Soheil Hassas Yeganeh <soheil.kdev@gmail.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
---
 include/net/tcp.h     | 2 ++
 net/ipv4/tcp_input.c  | 1 +
 net/ipv4/tcp_output.c | 4 ++--
 3 files changed, 5 insertions(+), 2 deletions(-)

Comments

Soheil Hassas Yeganeh April 19, 2016, 5:38 a.m. UTC | #1
On Mon, Apr 18, 2016 at 6:46 PM, Martin KaFai Lau <kafai@fb.com> wrote:
> After receiving sacks, tcp_shifted_skb() will collapse
> skbs if possible.  tx_flags/tskey/txstamp_ack also has
> to be merged in this case.
>
> This patch resues the tcp_skb_collapse_tstamp() to handle
> them.
>
> BPF Output Before:
> ~~~~~
> <no-output-due-to-missing-tstamp-event>
>
> BPF Output After:
> ~~~~~
> <...>-2024  [007] d.s.    88.644374: : ee_data:14599
>
> Packetdrill Script:
> ~~~~~
> +0 `sysctl -q -w net.ipv4.tcp_min_tso_segs=10`
> +0 `sysctl -q -w net.ipv4.tcp_no_metrics_save=1`
> +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
> +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
> +0 bind(3, ..., ...) = 0
> +0 listen(3, 1) = 0
>
> 0.100 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7>
> 0.100 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 7>
> 0.200 < . 1:1(0) ack 1 win 257
> 0.200 accept(3, ..., ...) = 4
> +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
>
> 0.200 write(4, ..., 1460) = 1460
> +0 setsockopt(4, SOL_SOCKET, 37, [2688], 4) = 0
> 0.200 write(4, ..., 13140) = 13140
> +0 setsockopt(4, SOL_SOCKET, 37, [2176], 4) = 0
>
> 0.200 > P. 1:1461(1460) ack 1
> 0.200 > . 1461:8761(7300) ack 1
> 0.200 > P. 8761:14601(5840) ack 1
>
> 0.300 < . 1:1(0) ack 1 win 257 <sack 1461:14601,nop,nop>
> 0.300 > P. 1:1461(1460) ack 1
> 0.400 < . 1:1(0) ack 14601 win 257
>
> 0.400 close(4) = 0
> 0.400 > F. 14601:14601(0) ack 1
> 0.500 < F. 1:1(0) ack 14602 win 257
> 0.500 > . 14602:14602(0) ack 2
>
> Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> Cc: Eric Dumazet <edumazet@google.com>
> Cc: Neal Cardwell <ncardwell@google.com>
> Cc: Soheil Hassas Yeganeh <soheil.kdev@gmail.com>

Acked-by: Soheil Hassas Yeganeh <soheil@google.com>

> Cc: Willem de Bruijn <willemb@google.com>
> Cc: Yuchung Cheng <ycheng@google.com>
> ---
>  include/net/tcp.h     | 2 ++
>  net/ipv4/tcp_input.c  | 1 +
>  net/ipv4/tcp_output.c | 4 ++--
>  3 files changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index fd40f8c..c0ef054 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -557,6 +557,8 @@ void tcp_send_ack(struct sock *sk);
>  void tcp_send_delayed_ack(struct sock *sk);
>  void tcp_send_loss_probe(struct sock *sk);
>  bool tcp_schedule_loss_probe(struct sock *sk);
> +void tcp_skb_collapse_tstamp(struct sk_buff *skb,
> +                            const struct sk_buff *next_skb);
>
>  /* tcp_input.c */
>  void tcp_resume_early_retransmit(struct sock *sk);
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 5e45a9c..75e8336 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -1309,6 +1309,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
>         if (skb == tcp_highest_sack(sk))
>                 tcp_advance_highest_sack(sk, skb);
>
> +       tcp_skb_collapse_tstamp(prev, skb);
>         tcp_unlink_write_queue(skb, sk);
>         sk_wmem_free_skb(sk, skb);
>
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 889ed96..d21a78f 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -2443,8 +2443,8 @@ u32 __tcp_select_window(struct sock *sk)
>         return window;
>  }
>
> -static void tcp_skb_collapse_tstamp(struct sk_buff *skb,
> -                                   const struct sk_buff *next_skb)
> +void tcp_skb_collapse_tstamp(struct sk_buff *skb,
> +                            const struct sk_buff *next_skb)
>  {
>         const struct skb_shared_info *next_shinfo = skb_shinfo(next_skb);
>

nice, thanks for the fix!

> --
> 2.5.1
>
diff mbox

Patch

diff --git a/include/net/tcp.h b/include/net/tcp.h
index fd40f8c..c0ef054 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -557,6 +557,8 @@  void tcp_send_ack(struct sock *sk);
 void tcp_send_delayed_ack(struct sock *sk);
 void tcp_send_loss_probe(struct sock *sk);
 bool tcp_schedule_loss_probe(struct sock *sk);
+void tcp_skb_collapse_tstamp(struct sk_buff *skb,
+			     const struct sk_buff *next_skb);
 
 /* tcp_input.c */
 void tcp_resume_early_retransmit(struct sock *sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5e45a9c..75e8336 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1309,6 +1309,7 @@  static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 	if (skb == tcp_highest_sack(sk))
 		tcp_advance_highest_sack(sk, skb);
 
+	tcp_skb_collapse_tstamp(prev, skb);
 	tcp_unlink_write_queue(skb, sk);
 	sk_wmem_free_skb(sk, skb);
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 889ed96..d21a78f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2443,8 +2443,8 @@  u32 __tcp_select_window(struct sock *sk)
 	return window;
 }
 
-static void tcp_skb_collapse_tstamp(struct sk_buff *skb,
-				    const struct sk_buff *next_skb)
+void tcp_skb_collapse_tstamp(struct sk_buff *skb,
+			     const struct sk_buff *next_skb)
 {
 	const struct skb_shared_info *next_shinfo = skb_shinfo(next_skb);