diff mbox

[v2,net-next,2/2] tcp: add tcpi_bytes_received to tcp_info

Message ID 1430260098-14127-3-git-send-email-edumazet@google.com
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet April 28, 2015, 10:28 p.m. UTC
This patch tracks total number of payload bytes received on a TCP socket.
This is the sum of all changes done to tp->rcv_nxt

RFC4898 named this : tcpEStatsAppHCThruOctetsReceived

This is a 64bit field, and can be fetched both from TCP_INFO
getsockopt() if one has a handle on a TCP socket, or from inet_diag
netlink facility (iproute2/ss patch will follow)

Note that tp->bytes_received was placed near tp->rcv_nxt for
best data locality and minimal performance impact.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Matt Mathis <mattmathis@google.com>
Cc: Eric Salo <salo@google.com>
Cc: Martin Lau <kafai@fb.com>
Cc: Chris Rapier <rapier@psc.edu>
---
 include/linux/tcp.h      |  4 ++++
 include/uapi/linux/tcp.h |  1 +
 net/ipv4/tcp.c           |  1 +
 net/ipv4/tcp_fastopen.c  |  1 +
 net/ipv4/tcp_input.c     | 17 +++++++++++++----
 5 files changed, 20 insertions(+), 4 deletions(-)

Comments

Yuchung Cheng April 28, 2015, 10:56 p.m. UTC | #1
On Tue, Apr 28, 2015 at 3:28 PM, Eric Dumazet <edumazet@google.com> wrote:
> This patch tracks total number of payload bytes received on a TCP socket.
> This is the sum of all changes done to tp->rcv_nxt
>
> RFC4898 named this : tcpEStatsAppHCThruOctetsReceived
>
> This is a 64bit field, and can be fetched both from TCP_INFO
> getsockopt() if one has a handle on a TCP socket, or from inet_diag
> netlink facility (iproute2/ss patch will follow)
>
> Note that tp->bytes_received was placed near tp->rcv_nxt for
> best data locality and minimal performance impact.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Yuchung Cheng <ycheng@google.com>
> Cc: Matt Mathis <mattmathis@google.com>
> Cc: Eric Salo <salo@google.com>
> Cc: Martin Lau <kafai@fb.com>
> Cc: Chris Rapier <rapier@psc.edu>
Acked-by: Yuchung Cheng <ycheng@google.com>

tho I slightly prefer to call tcp_rcv_nxt_update() when rcv_nxt is
updated in TFO for consistency.

> ---
>  include/linux/tcp.h      |  4 ++++
>  include/uapi/linux/tcp.h |  1 +
>  net/ipv4/tcp.c           |  1 +
>  net/ipv4/tcp_fastopen.c  |  1 +
>  net/ipv4/tcp_input.c     | 17 +++++++++++++----
>  5 files changed, 20 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index 0f73b43171da..3b2911502a8c 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -145,6 +145,10 @@ struct tcp_sock {
>   *     read the code and the spec side by side (and laugh ...)
>   *     See RFC793 and RFC1122. The RFC writes these in capitals.
>   */
> +       u64     bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived
> +                                * sum(delta(rcv_nxt)), or how many bytes
> +                                * were acked.
> +                                */
>         u32     rcv_nxt;        /* What we want to receive next         */
>         u32     copied_seq;     /* Head of yet unread data              */
>         u32     rcv_wup;        /* rcv_nxt on last window update sent   */
> diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
> index 6666e98a0af9..a48f93f3207b 100644
> --- a/include/uapi/linux/tcp.h
> +++ b/include/uapi/linux/tcp.h
> @@ -190,6 +190,7 @@ struct tcp_info {
>         __u64   tcpi_pacing_rate;
>         __u64   tcpi_max_pacing_rate;
>         __u64   tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
> +       __u64   tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
>  };
>
>  /* for TCP_MD5SIG socket option */
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 4bf0e8ca7b5b..99fcc0b22c92 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -2666,6 +2666,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
>
>         spin_lock_bh(&sk->sk_lock.slock);
>         info->tcpi_bytes_acked = tp->bytes_acked;
> +       info->tcpi_bytes_received = tp->bytes_received;
>         spin_unlock_bh(&sk->sk_lock.slock);
>  }
>  EXPORT_SYMBOL_GPL(tcp_get_info);
> diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
> index e3d87aca6be8..b1b110d07816 100644
> --- a/net/ipv4/tcp_fastopen.c
> +++ b/net/ipv4/tcp_fastopen.c
> @@ -206,6 +206,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
>                         skb_set_owner_r(skb2, child);
>                         __skb_queue_tail(&child->sk_receive_queue, skb2);
>                         tp->syn_data_acked = 1;
> +                       tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1;
>                 } else {
>                         end_seq = TCP_SKB_CB(skb)->seq + 1;
>                 }
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 378d3f4d4dc3..7e6962bcfc30 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -3289,6 +3289,15 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
>         tp->snd_una = ack;
>  }
>
> +/* If we update tp->rcv_nxt, also update tp->bytes_received */
> +static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
> +{
> +       u32 delta = seq - tp->rcv_nxt;
> +
> +       tp->bytes_received += delta;
> +       tp->rcv_nxt = seq;
> +}
> +
>  /* Update our send window.
>   *
>   * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
> @@ -4245,7 +4254,7 @@ static void tcp_ofo_queue(struct sock *sk)
>
>                 tail = skb_peek_tail(&sk->sk_receive_queue);
>                 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
> -               tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
> +               tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
>                 if (!eaten)
>                         __skb_queue_tail(&sk->sk_receive_queue, skb);
>                 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
> @@ -4413,7 +4422,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
>         __skb_pull(skb, hdrlen);
>         eaten = (tail &&
>                  tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
> -       tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
> +       tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
>         if (!eaten) {
>                 __skb_queue_tail(&sk->sk_receive_queue, skb);
>                 skb_set_owner_r(skb, sk);
> @@ -4506,7 +4515,7 @@ queue_and_out:
>
>                         eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
>                 }
> -               tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
> +               tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
>                 if (skb->len)
>                         tcp_event_data_recv(sk, skb);
>                 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
> @@ -5254,7 +5263,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
>                                         tcp_rcv_rtt_measure_ts(sk, skb);
>
>                                         __skb_pull(skb, tcp_header_len);
> -                                       tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
> +                                       tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
>                                         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
>                                         eaten = 1;
>                                 }
> --
> 2.2.0.rc0.207.ga3a616c
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet April 28, 2015, 11:07 p.m. UTC | #2
On Tue, 2015-04-28 at 15:56 -0700, Yuchung Cheng wrote:

> Acked-by: Yuchung Cheng <ycheng@google.com>
> 

Thanks !

> tho I slightly prefer to call tcp_rcv_nxt_update() when rcv_nxt is
> updated in TFO for consistency.

Right, but is the tp->rcv_nxt prior value even valid at this point ? :)

Anyway, this would need to make tcp_rcv_nxt_update() non static and
would convince compiler to not inline the code.

At this point, we can simply set tp->bytes_received.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller April 29, 2015, 10:12 p.m. UTC | #3
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 28 Apr 2015 15:28:18 -0700

> This patch tracks total number of payload bytes received on a TCP socket.
> This is the sum of all changes done to tp->rcv_nxt
> 
> RFC4898 named this : tcpEStatsAppHCThruOctetsReceived
> 
> This is a 64bit field, and can be fetched both from TCP_INFO
> getsockopt() if one has a handle on a TCP socket, or from inet_diag
> netlink facility (iproute2/ss patch will follow)
> 
> Note that tp->bytes_received was placed near tp->rcv_nxt for
> best data locality and minimal performance impact.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Applied.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 0f73b43171da..3b2911502a8c 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -145,6 +145,10 @@  struct tcp_sock {
  *	read the code and the spec side by side (and laugh ...)
  *	See RFC793 and RFC1122. The RFC writes these in capitals.
  */
+	u64	bytes_received;	/* RFC4898 tcpEStatsAppHCThruOctetsReceived
+				 * sum(delta(rcv_nxt)), or how many bytes
+				 * were acked.
+				 */
  	u32	rcv_nxt;	/* What we want to receive next 	*/
 	u32	copied_seq;	/* Head of yet unread data		*/
 	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 6666e98a0af9..a48f93f3207b 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -190,6 +190,7 @@  struct tcp_info {
 	__u64	tcpi_pacing_rate;
 	__u64	tcpi_max_pacing_rate;
 	__u64	tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
+	__u64	tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4bf0e8ca7b5b..99fcc0b22c92 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2666,6 +2666,7 @@  void tcp_get_info(struct sock *sk, struct tcp_info *info)
 
 	spin_lock_bh(&sk->sk_lock.slock);
 	info->tcpi_bytes_acked = tp->bytes_acked;
+	info->tcpi_bytes_received = tp->bytes_received;
 	spin_unlock_bh(&sk->sk_lock.slock);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index e3d87aca6be8..b1b110d07816 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -206,6 +206,7 @@  static bool tcp_fastopen_create_child(struct sock *sk,
 			skb_set_owner_r(skb2, child);
 			__skb_queue_tail(&child->sk_receive_queue, skb2);
 			tp->syn_data_acked = 1;
+			tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1;
 		} else {
 			end_seq = TCP_SKB_CB(skb)->seq + 1;
 		}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 378d3f4d4dc3..7e6962bcfc30 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3289,6 +3289,15 @@  static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
 	tp->snd_una = ack;
 }
 
+/* If we update tp->rcv_nxt, also update tp->bytes_received */
+static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
+{
+	u32 delta = seq - tp->rcv_nxt;
+
+	tp->bytes_received += delta;
+	tp->rcv_nxt = seq;
+}
+
 /* Update our send window.
  *
  * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
@@ -4245,7 +4254,7 @@  static void tcp_ofo_queue(struct sock *sk)
 
 		tail = skb_peek_tail(&sk->sk_receive_queue);
 		eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
-		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
 		if (!eaten)
 			__skb_queue_tail(&sk->sk_receive_queue, skb);
 		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -4413,7 +4422,7 @@  static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
 	__skb_pull(skb, hdrlen);
 	eaten = (tail &&
 		 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
-	tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+	tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
 	if (!eaten) {
 		__skb_queue_tail(&sk->sk_receive_queue, skb);
 		skb_set_owner_r(skb, sk);
@@ -4506,7 +4515,7 @@  queue_and_out:
 
 			eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
 		}
-		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
 		if (skb->len)
 			tcp_event_data_recv(sk, skb);
 		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -5254,7 +5263,7 @@  void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 					tcp_rcv_rtt_measure_ts(sk, skb);
 
 					__skb_pull(skb, tcp_header_len);
-					tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+					tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
 					NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
 					eaten = 1;
 				}