Message ID | 1457402465-1736429-1-git-send-email-kafai@fb.com |
---|---|
State | RFC, archived |
Delegated to: | David Miller |
Headers | show |
On Mon, Mar 07, 2016 at 06:01:05PM -0800, Martin KaFai Lau wrote: > v2: > Rework based on recent fix by Eric: > commit a9d99ce28ed3 ("tcp: fix tcpi_segs_in after connection establishment") > > v1: Patch itself looks good to me, just this patch history is better placed on the Notes region (together with the diffstat) or place a scissors mark, otherwise it will be present in the git changelog too. > Per RFC4898, they count segments sent/received > containing a positive length data segment (that includes > retransmission segments carrying data). Unlike > tcpi_segs_out/in, tcpi_data_segs_out/in excludes segments > carrying no data (e.g. pure ack). > > The patch also updates the segs_in in tcp_fastopen_add_skb() > so that segs_in >= data_segs_in property is kept. > > Together with retransmission data, tcpi_data_segs_out > gives a better signal on the rxmit rate. > > Signed-off-by: Martin KaFai Lau <kafai@fb.com> > Cc: Chris Rapier <rapier@psc.edu> > Cc: Eric Dumazet <edumazet@google.com> > Cc: Marcelo Ricardo Leitner <mleitner@redhat.com> > Cc: Neal Cardwell <ncardwell@google.com> > Cc: Yuchung Cheng <ycheng@google.com> > --- > include/linux/tcp.h | 6 ++++++ > include/net/tcp.h | 10 ++++++++++ > include/uapi/linux/tcp.h | 2 ++ > net/ipv4/tcp.c | 2 ++ > net/ipv4/tcp_fastopen.c | 4 ++++ > net/ipv4/tcp_ipv4.c | 2 +- > net/ipv4/tcp_minisocks.c | 2 +- > net/ipv4/tcp_output.c | 4 +++- > net/ipv6/tcp_ipv6.c | 2 +- > 9 files changed, 30 insertions(+), 4 deletions(-) > > diff --git a/include/linux/tcp.h b/include/linux/tcp.h > index bcbf51d..7be9b12 100644 > --- a/include/linux/tcp.h > +++ b/include/linux/tcp.h > @@ -158,6 +158,9 @@ struct tcp_sock { > u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn > * total number of segments in. > */ > + u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn > + * total number of data segments in. > + */ > u32 rcv_nxt; /* What we want to receive next */ > u32 copied_seq; /* Head of yet unread data */ > u32 rcv_wup; /* rcv_nxt on last window update sent */ > @@ -165,6 +168,9 @@ struct tcp_sock { > u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut > * The total number of segments sent. > */ > + u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut > + * total number of data segments sent. > + */ > u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked > * sum(delta(snd_una)), or how many bytes > * were acked. > diff --git a/include/net/tcp.h b/include/net/tcp.h > index e90db85..e2916cc 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h > @@ -1816,4 +1816,14 @@ static inline void skb_set_tcp_pure_ack(struct sk_buff *skb) > skb->truesize = 2; > } > > +static inline void tcp_segs_in(struct tcp_sock *tp, struct sk_buff *skb) > +{ > + u16 segs_in; > + > + segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs); > + tp->segs_in += segs_in; > + if (skb->len > tcp_hdrlen(skb)) > + tp->data_segs_in += segs_in; > +} > + > #endif /* _TCP_H */ > diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h > index fe95446..53e8e3f 100644 > --- a/include/uapi/linux/tcp.h > +++ b/include/uapi/linux/tcp.h > @@ -199,6 +199,8 @@ struct tcp_info { > > __u32 tcpi_notsent_bytes; > __u32 tcpi_min_rtt; > + __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */ > + __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */ > }; > > /* for TCP_MD5SIG socket option */ > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c > index f9faadb..6b01b48 100644 > --- a/net/ipv4/tcp.c > +++ b/net/ipv4/tcp.c > @@ -2728,6 +2728,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) > info->tcpi_notsent_bytes = max(0, notsent_bytes); > > info->tcpi_min_rtt = tcp_min_rtt(tp); > + info->tcpi_data_segs_in = tp->data_segs_in; > + info->tcpi_data_segs_out = tp->data_segs_out; > } > EXPORT_SYMBOL_GPL(tcp_get_info); > > diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c > index fdb286d..f583c85 100644 > --- a/net/ipv4/tcp_fastopen.c > +++ b/net/ipv4/tcp_fastopen.c > @@ -131,6 +131,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req, > void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) > { > struct tcp_sock *tp = tcp_sk(sk); > + u16 segs_in; > > if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt) > return; > @@ -154,6 +155,9 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) > * as we certainly are not changing upper 32bit value (0) > */ > tp->bytes_received = skb->len; > + segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs); > + tp->segs_in = segs_in; > + tp->data_segs_in = segs_in; > > if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) > tcp_fin(sk); > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c > index 4c8d58d..0b02ef7 100644 > --- a/net/ipv4/tcp_ipv4.c > +++ b/net/ipv4/tcp_ipv4.c > @@ -1650,7 +1650,7 @@ process: > sk_incoming_cpu_update(sk); > > bh_lock_sock_nested(sk); > - tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); > + tcp_segs_in(tcp_sk(sk), skb); > ret = 0; > if (!sock_owned_by_user(sk)) { > if (!tcp_prequeue(sk, skb)) > diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c > index ae90e4b..acb366d 100644 > --- a/net/ipv4/tcp_minisocks.c > +++ b/net/ipv4/tcp_minisocks.c > @@ -812,7 +812,7 @@ int tcp_child_process(struct sock *parent, struct sock *child, > int ret = 0; > int state = child->sk_state; > > - tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); > + tcp_segs_in(tcp_sk(child), skb); > if (!sock_owned_by_user(child)) { > ret = tcp_rcv_state_process(child, skb); > /* Wakeup parent, send SIGIO */ > diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c > index 7d2c7a4..7d2dc01 100644 > --- a/net/ipv4/tcp_output.c > +++ b/net/ipv4/tcp_output.c > @@ -1003,8 +1003,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, > if (likely(tcb->tcp_flags & TCPHDR_ACK)) > tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); > > - if (skb->len != tcp_header_size) > + if (skb->len != tcp_header_size) { > tcp_event_data_sent(tp, sk); > + tp->data_segs_out += tcp_skb_pcount(skb); > + } > > if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) > TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, > diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c > index 33f2820..9c16565 100644 > --- a/net/ipv6/tcp_ipv6.c > +++ b/net/ipv6/tcp_ipv6.c > @@ -1443,7 +1443,7 @@ process: > sk_incoming_cpu_update(sk); > > bh_lock_sock_nested(sk); > - tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); > + tcp_segs_in(tcp_sk(sk), skb); > ret = 0; > if (!sock_owned_by_user(sk)) { > if (!tcp_prequeue(sk, skb)) > -- > 2.5.1 >
From: Marcelo Ricardo Leitner <mleitner@redhat.com> Date: Tue, 8 Mar 2016 16:41:54 -0300 > On Mon, Mar 07, 2016 at 06:01:05PM -0800, Martin KaFai Lau wrote: >> v2: >> Rework based on recent fix by Eric: >> commit a9d99ce28ed3 ("tcp: fix tcpi_segs_in after connection establishment") >> >> v1: > > Patch itself looks good to me, just this patch history is better placed > on the Notes region (together with the diffstat) or place a scissors > mark, otherwise it will be present in the git changelog too. I like seeing it in the commit message. Then later people can figure out answers to questions like "why didn't you implement this like X" and see in the version history that this was attempted first and then intentionally changed. I even move version history into the commit message when people try to move it out, so please don't do that. This is important _especially_ for "0/N" header postings for a patch series. DO NOT LOSE INFORMATION.
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index bcbf51d..7be9b12 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -158,6 +158,9 @@ struct tcp_sock { u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn * total number of segments in. */ + u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn + * total number of data segments in. + */ u32 rcv_nxt; /* What we want to receive next */ u32 copied_seq; /* Head of yet unread data */ u32 rcv_wup; /* rcv_nxt on last window update sent */ @@ -165,6 +168,9 @@ struct tcp_sock { u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut * The total number of segments sent. */ + u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut + * total number of data segments sent. + */ u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked * sum(delta(snd_una)), or how many bytes * were acked. diff --git a/include/net/tcp.h b/include/net/tcp.h index e90db85..e2916cc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1816,4 +1816,14 @@ static inline void skb_set_tcp_pure_ack(struct sk_buff *skb) skb->truesize = 2; } +static inline void tcp_segs_in(struct tcp_sock *tp, struct sk_buff *skb) +{ + u16 segs_in; + + segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs); + tp->segs_in += segs_in; + if (skb->len > tcp_hdrlen(skb)) + tp->data_segs_in += segs_in; +} + #endif /* _TCP_H */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index fe95446..53e8e3f 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -199,6 +199,8 @@ struct tcp_info { __u32 tcpi_notsent_bytes; __u32 tcpi_min_rtt; + __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */ + __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */ }; /* for TCP_MD5SIG socket option */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f9faadb..6b01b48 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2728,6 +2728,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_notsent_bytes = max(0, notsent_bytes); info->tcpi_min_rtt = tcp_min_rtt(tp); + info->tcpi_data_segs_in = tp->data_segs_in; + info->tcpi_data_segs_out = tp->data_segs_out; } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index fdb286d..f583c85 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -131,6 +131,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req, void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + u16 segs_in; if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt) return; @@ -154,6 +155,9 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) * as we certainly are not changing upper 32bit value (0) */ tp->bytes_received = skb->len; + segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs); + tp->segs_in = segs_in; + tp->data_segs_in = segs_in; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) tcp_fin(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4c8d58d..0b02ef7 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1650,7 +1650,7 @@ process: sk_incoming_cpu_update(sk); bh_lock_sock_nested(sk); - tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); + tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ae90e4b..acb366d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -812,7 +812,7 @@ int tcp_child_process(struct sock *parent, struct sock *child, int ret = 0; int state = child->sk_state; - tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); + tcp_segs_in(tcp_sk(child), skb); if (!sock_owned_by_user(child)) { ret = tcp_rcv_state_process(child, skb); /* Wakeup parent, send SIGIO */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7d2c7a4..7d2dc01 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1003,8 +1003,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (likely(tcb->tcp_flags & TCPHDR_ACK)) tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); - if (skb->len != tcp_header_size) + if (skb->len != tcp_header_size) { tcp_event_data_sent(tp, sk); + tp->data_segs_out += tcp_skb_pcount(skb); + } if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 33f2820..9c16565 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1443,7 +1443,7 @@ process: sk_incoming_cpu_update(sk); bh_lock_sock_nested(sk); - tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); + tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb))
v2: Rework based on recent fix by Eric: commit a9d99ce28ed3 ("tcp: fix tcpi_segs_in after connection establishment") v1: Per RFC4898, they count segments sent/received containing a positive length data segment (that includes retransmission segments carrying data). Unlike tcpi_segs_out/in, tcpi_data_segs_out/in excludes segments carrying no data (e.g. pure ack). The patch also updates the segs_in in tcp_fastopen_add_skb() so that segs_in >= data_segs_in property is kept. Together with retransmission data, tcpi_data_segs_out gives a better signal on the rxmit rate. Signed-off-by: Martin KaFai Lau <kafai@fb.com> Cc: Chris Rapier <rapier@psc.edu> Cc: Eric Dumazet <edumazet@google.com> Cc: Marcelo Ricardo Leitner <mleitner@redhat.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Yuchung Cheng <ycheng@google.com> --- include/linux/tcp.h | 6 ++++++ include/net/tcp.h | 10 ++++++++++ include/uapi/linux/tcp.h | 2 ++ net/ipv4/tcp.c | 2 ++ net/ipv4/tcp_fastopen.c | 4 ++++ net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_output.c | 4 +++- net/ipv6/tcp_ipv6.c | 2 +- 9 files changed, 30 insertions(+), 4 deletions(-)