diff mbox series

[v4,1/2] tcp: Add TCP_INFO counter for packets received out-of-order

Message ID 20190913193629.55201-1-tph@fb.com
State Changes Requested
Delegated to: David Miller
Headers show
Series [v4,1/2] tcp: Add TCP_INFO counter for packets received out-of-order | expand

Commit Message

Thomas Higdon Sept. 13, 2019, 7:36 p.m. UTC
For receive-heavy cases on the server-side, we want to track the
connection quality for individual client IPs. This counter, similar to
the existing system-wide TCPOFOQueue counter in /proc/net/netstat,
tracks out-of-order packet reception. By providing this counter in
TCP_INFO, it will allow understanding to what degree receive-heavy
sockets are experiencing out-of-order delivery and packet drops
indicating congestion.

Please note that this is similar to the counter in NetBSD TCP_INFO, and
has the same name.

Signed-off-by: Thomas Higdon <tph@fb.com>
---

no changes from v3

 include/linux/tcp.h      | 2 ++
 include/uapi/linux/tcp.h | 2 ++
 net/ipv4/tcp.c           | 2 ++
 net/ipv4/tcp_input.c     | 1 +
 4 files changed, 7 insertions(+)

Comments

Neal Cardwell Sept. 13, 2019, 8:55 p.m. UTC | #1
On Fri, Sep 13, 2019 at 3:37 PM Thomas Higdon <tph@fb.com> wrote:
>
> For receive-heavy cases on the server-side, we want to track the
> connection quality for individual client IPs. This counter, similar to
> the existing system-wide TCPOFOQueue counter in /proc/net/netstat,
> tracks out-of-order packet reception. By providing this counter in
> TCP_INFO, it will allow understanding to what degree receive-heavy
> sockets are experiencing out-of-order delivery and packet drops
> indicating congestion.
>
> Please note that this is similar to the counter in NetBSD TCP_INFO, and
> has the same name.
>
> Signed-off-by: Thomas Higdon <tph@fb.com>
> ---
>
> no changes from v3
>
>  include/linux/tcp.h      | 2 ++
>  include/uapi/linux/tcp.h | 2 ++
>  net/ipv4/tcp.c           | 2 ++
>  net/ipv4/tcp_input.c     | 1 +
>  4 files changed, 7 insertions(+)
>
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index f3a85a7fb4b1..a01dc78218f1 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -393,6 +393,8 @@ struct tcp_sock {
>          */
>         struct request_sock *fastopen_rsk;
>         u32     *saved_syn;
> +
> +       u32 rcv_ooopack; /* Received out-of-order packets, for tcpinfo */

Thanks for adding this.

A thought: putting the new rcv_ooopack field here makes struct
tcp_sock bigger, and increases the odds of taking a cache miss
(according to "pahole" this field is the only one in a new cache
line).

I'd suggest putting the new rcv_ooopack field immediately before
rcv_rtt_last_tsecr. That would use up a 4-byte hole, and would place
it in a cache line already used on TCP receivers (for rcv_rtt logic).
This would make it less likely this new field causes more cache misses
or uses more space.

Details: looking at the output of "pahole" for tcp_sock in various cases:

net-next before this patch:
-------------------------------------
...
        u8                         bpf_sock_ops_cb_flags; /*  2076     1 */

        /* XXX 3 bytes hole, try to pack */

        u32                        rcv_rtt_last_tsecr;   /*  2080     4 */

        /* XXX 4 bytes hole, try to pack */

        struct {
                u32                rtt_us;               /*  2088     4 */
                u32                seq;                  /*  2092     4 */
                u64                time;                 /*  2096     8 */
        } rcv_rtt_est;                                   /*  2088    16 */
...
        /* size: 2176, cachelines: 34, members: 134 */
        /* sum members: 2164, holes: 4, sum holes: 12 */
        /* paddings: 3, sum paddings: 12 */


net-next with this patch:
-------------------------------------
...
        u32 *                      saved_syn;            /*  2168     8 */
        /* --- cacheline 34 boundary (2176 bytes) --- */
        u32                        rcv_ooopack;          /*  2176     4 */
...
        /* size: 2184, cachelines: 35, members: 135 */
        /* sum members: 2168, holes: 4, sum holes: 12 */
        /* padding: 4 */
        /* paddings: 3, sum paddings: 12 */
        /* last cacheline: 8 bytes */


net-next with this field in the suggested spot:
-------------------------------------
...
       /* XXX 3 bytes hole, try to pack */

        u32                        rcv_ooopack;          /*  2080     4 */
        u32                        rcv_rtt_last_tsecr;   /*  2084     4 */
        struct {
                u32                rtt_us;               /*  2088     4 */
                u32                seq;                  /*  2092     4 */
                u64                time;                 /*  2096     8 */
        } rcv_rtt_est;                                   /*  2088    16 */
...
        /* size: 2176, cachelines: 34, members: 135 */
        /* sum members: 2168, holes: 3, sum holes: 8 */
        /* paddings: 3, sum paddings: 12 */

neal


neal
diff mbox series

Patch

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index f3a85a7fb4b1..a01dc78218f1 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -393,6 +393,8 @@  struct tcp_sock {
 	 */
 	struct request_sock *fastopen_rsk;
 	u32	*saved_syn;
+
+	u32 rcv_ooopack; /* Received out-of-order packets, for tcpinfo */
 };
 
 enum tsq_enum {
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index b3564f85a762..20237987ccc8 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -270,6 +270,8 @@  struct tcp_info {
 	__u64	tcpi_bytes_retrans;  /* RFC4898 tcpEStatsPerfOctetsRetrans */
 	__u32	tcpi_dsack_dups;     /* RFC4898 tcpEStatsStackDSACKDups */
 	__u32	tcpi_reord_seen;     /* reordering events seen */
+
+	__u32	tcpi_rcv_ooopack;    /* Out-of-order packets received */
 };
 
 /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 94df48bcecc2..4cf58208270e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2653,6 +2653,7 @@  int tcp_disconnect(struct sock *sk, int flags)
 	tp->rx_opt.saw_tstamp = 0;
 	tp->rx_opt.dsack = 0;
 	tp->rx_opt.num_sacks = 0;
+	tp->rcv_ooopack = 0;
 
 
 	/* Clean up fastopen related fields */
@@ -3295,6 +3296,7 @@  void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_bytes_retrans = tp->bytes_retrans;
 	info->tcpi_dsack_dups = tp->dsack_dups;
 	info->tcpi_reord_seen = tp->reord_seen;
+	info->tcpi_rcv_ooopack = tp->rcv_ooopack;
 	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 706cbb3b2986..2ef333354026 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4555,6 +4555,7 @@  static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 	tp->pred_flags = 0;
 	inet_csk_schedule_ack(sk);
 
+	tp->rcv_ooopack += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
 	seq = TCP_SKB_CB(skb)->seq;
 	end_seq = TCP_SKB_CB(skb)->end_seq;