diff mbox series

[bpf-next,1/2] tcp: replace SOCK_DEBUG() with tcp_stats()

Message ID 1549971097-12627-2-git-send-email-laoar.shao@gmail.com
State Changes Requested
Headers show
Series cleanup SOCK_DEBUG() and introduce BPF_SOCK_OPS_STATS_CB | expand

Commit Message

Yafang Shao Feb. 12, 2019, 11:31 a.m. UTC
SOCK_DEBUG is a very ancient debugging interface, and it's not very useful
for debugging.
So this patch removes the SOCK_DEBUG() and introduce a new function
tcp_stats() to trace this kind of events.
Some MIBs are added for these events.

Regarding the SO_DEBUG in sock_{s,g}etsockopt, I think it is better to
keep as-is, because if we return an errno to tell the application that
this optname isn't supported for TCP, it may break the application.
The application still can use this option but don't take any effect for
TCP.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/uapi/linux/snmp.h |  3 +++
 net/ipv4/proc.c           |  3 +++
 net/ipv4/tcp_input.c      | 26 +++++++++++---------------
 net/ipv6/tcp_ipv6.c       |  2 --
 4 files changed, 17 insertions(+), 17 deletions(-)

Comments

Eric Dumazet Feb. 12, 2019, 3:07 p.m. UTC | #1
On 02/12/2019 03:31 AM, Yafang Shao wrote:
> SOCK_DEBUG is a very ancient debugging interface, and it's not very useful
> for debugging.
> So this patch removes the SOCK_DEBUG() and introduce a new function
> tcp_stats() to trace this kind of events.
> Some MIBs are added for these events.
> 
> Regarding the SO_DEBUG in sock_{s,g}etsockopt, I think it is better to
> keep as-is, because if we return an errno to tell the application that
> this optname isn't supported for TCP, it may break the application.
> The application still can use this option but don't take any effect for
> TCP.
> 
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> ---
>  include/uapi/linux/snmp.h |  3 +++
>  net/ipv4/proc.c           |  3 +++
>  net/ipv4/tcp_input.c      | 26 +++++++++++---------------
>  net/ipv6/tcp_ipv6.c       |  2 --
>  4 files changed, 17 insertions(+), 17 deletions(-)
> 
> diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
> index 86dc24a..fd5c09c 100644
> --- a/include/uapi/linux/snmp.h
> +++ b/include/uapi/linux/snmp.h
> @@ -283,6 +283,9 @@ enum
>  	LINUX_MIB_TCPACKCOMPRESSED,		/* TCPAckCompressed */
>  	LINUX_MIB_TCPZEROWINDOWDROP,		/* TCPZeroWindowDrop */
>  	LINUX_MIB_TCPRCVQDROP,			/* TCPRcvQDrop */
> +	LINUX_MIB_TCPINVALIDACK,		/* TCPInvalidAck */
> +	LINUX_MIB_TCPOLDACK,			/* TCPOldAck */
> +	LINUX_MIB_TCPPARTIALPACKET,		/* TCPPartialPacket */
>  	__LINUX_MIB_MAX
>  };
>  
> diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
> index c3610b3..1b0320a 100644
> --- a/net/ipv4/proc.c
> +++ b/net/ipv4/proc.c
> @@ -291,6 +291,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
>  	SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
>  	SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
>  	SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
> +	SNMP_MIB_ITEM("TCPInvalidAck", LINUX_MIB_TCPINVALIDACK),
> +	SNMP_MIB_ITEM("TCPOldAck", LINUX_MIB_TCPOLDACK),
> +	SNMP_MIB_ITEM("TCPPartialPacket", LINUX_MIB_TCPPARTIALPACKET),
>  	SNMP_MIB_SENTINEL
>  };
>  
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 7a027dec..88deb1f 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -3554,6 +3554,11 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
>  	return delivered;
>  }
>  
> +static void tcp_stats(struct sock *sk, int mib_idx)
> +{
> +	NET_INC_STATS(sock_net(sk), mib_idx);
> +}

This is not a very descriptive name.

Why is it static, and in net/ipv4/tcp_input.c ???

> +
>  /* This routine deals with incoming acks, but not outgoing ones. */
>  static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
>  {
> @@ -3715,7 +3720,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
>  	return 1;
>  
>  invalid_ack:
> -	SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
> +	tcp_stats(sk, LINUX_MIB_TCPINVALIDACK);
>  	return -1;
>  
>  old_ack:
> @@ -3731,7 +3736,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
>  		tcp_xmit_recovery(sk, rexmit);
>  	}
>  
> -	SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
> +	tcp_stats(sk, LINUX_MIB_TCPOLDACK);
>  	return 0;
>  }
>


These counters will add noise to an already crowded MIB space.

What bug do you expect to track and fix with these ?

I see many TCP patches coming adding icache pressure, enabling companies to build their own modified
TCP stack, but no real meat.
Yafang Shao Feb. 13, 2019, 2:07 a.m. UTC | #2
On Tue, Feb 12, 2019 at 11:07 PM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
>
> On 02/12/2019 03:31 AM, Yafang Shao wrote:
> > SOCK_DEBUG is a very ancient debugging interface, and it's not very useful
> > for debugging.
> > So this patch removes the SOCK_DEBUG() and introduce a new function
> > tcp_stats() to trace this kind of events.
> > Some MIBs are added for these events.
> >
> > Regarding the SO_DEBUG in sock_{s,g}etsockopt, I think it is better to
> > keep as-is, because if we return an errno to tell the application that
> > this optname isn't supported for TCP, it may break the application.
> > The application still can use this option but don't take any effect for
> > TCP.
> >
> > Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> > ---
> >  include/uapi/linux/snmp.h |  3 +++
> >  net/ipv4/proc.c           |  3 +++
> >  net/ipv4/tcp_input.c      | 26 +++++++++++---------------
> >  net/ipv6/tcp_ipv6.c       |  2 --
> >  4 files changed, 17 insertions(+), 17 deletions(-)
> >
> > diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
> > index 86dc24a..fd5c09c 100644
> > --- a/include/uapi/linux/snmp.h
> > +++ b/include/uapi/linux/snmp.h
> > @@ -283,6 +283,9 @@ enum
> >       LINUX_MIB_TCPACKCOMPRESSED,             /* TCPAckCompressed */
> >       LINUX_MIB_TCPZEROWINDOWDROP,            /* TCPZeroWindowDrop */
> >       LINUX_MIB_TCPRCVQDROP,                  /* TCPRcvQDrop */
> > +     LINUX_MIB_TCPINVALIDACK,                /* TCPInvalidAck */
> > +     LINUX_MIB_TCPOLDACK,                    /* TCPOldAck */
> > +     LINUX_MIB_TCPPARTIALPACKET,             /* TCPPartialPacket */
> >       __LINUX_MIB_MAX
> >  };
> >
> > diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
> > index c3610b3..1b0320a 100644
> > --- a/net/ipv4/proc.c
> > +++ b/net/ipv4/proc.c
> > @@ -291,6 +291,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
> >       SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
> >       SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
> >       SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
> > +     SNMP_MIB_ITEM("TCPInvalidAck", LINUX_MIB_TCPINVALIDACK),
> > +     SNMP_MIB_ITEM("TCPOldAck", LINUX_MIB_TCPOLDACK),
> > +     SNMP_MIB_ITEM("TCPPartialPacket", LINUX_MIB_TCPPARTIALPACKET),
> >       SNMP_MIB_SENTINEL
> >  };
> >
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index 7a027dec..88deb1f 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -3554,6 +3554,11 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
> >       return delivered;
> >  }
> >
> > +static void tcp_stats(struct sock *sk, int mib_idx)
> > +{
> > +     NET_INC_STATS(sock_net(sk), mib_idx);
> > +}
>
> This is not a very descriptive name.
>
> Why is it static, and in net/ipv4/tcp_input.c ???
>

Because it is only called in net/ipv4/tcp_input.c currently, so I
define it as static in this file,
the reseaon I don't define it as 'static inline' is that I think the
compiler can make a better decision than me.

In the future it may be called in other files, then we can put it into
a more proper file.

> > +
> >  /* This routine deals with incoming acks, but not outgoing ones. */
> >  static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
> >  {
> > @@ -3715,7 +3720,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
> >       return 1;
> >
> >  invalid_ack:
> > -     SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
> > +     tcp_stats(sk, LINUX_MIB_TCPINVALIDACK);
> >       return -1;
> >
> >  old_ack:
> > @@ -3731,7 +3736,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
> >               tcp_xmit_recovery(sk, rexmit);
> >       }
> >
> > -     SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
> > +     tcp_stats(sk, LINUX_MIB_TCPOLDACK);
> >       return 0;
> >  }
> >
>
>
> These counters will add noise to an already crowded MIB space.
>

I have another idea that we can define some tcp bpf events to replace
these MIB counters somehing like,
    #define BPF_EVENT_TCP_OLDACK 1
    #define BPF_EVENT_TCP_PARTIALPACKET 2
    ...
Maybe we could also cleanup some MIBs to make it less crowded.

> What bug do you expect to track and fix with these ?
>

Let me explain the background for you.
I want to track some TCP abnormal  behavior in TCP/IP stack. But I
find there's no good way to do it.
The current MIBs are per net, other than per socket, that makes it not
very powerful.
And the ancient SOCK_DEBUG is not good as well.
So we think why not cleanup this ancient SOCK_DEBUG() and introduce a
more powerful method.

> I see many TCP patches coming adding icache pressure, enabling companies to build their own modified
> TCP stack, but no real meat.
>

Thanks
Yafang
Eric Dumazet Feb. 13, 2019, 2:15 a.m. UTC | #3
On Tue, Feb 12, 2019 at 6:07 PM Yafang Shao <laoar.shao@gmail.com> wrote:
>

> Let me explain the background for you.
> I want to track some TCP abnormal  behavior in TCP/IP stack. But I
> find there's no good way to do it.
> The current MIBs are per net, other than per socket, that makes it not
> very powerful.
> And the ancient SOCK_DEBUG is not good as well.
> So we think why not cleanup this ancient SOCK_DEBUG() and introduce a
> more powerful method.


I am all for it, but this more powerful method does nothing at all in
the current patches.

I can not accept patches just because they seem to be harmless,
knowing that  the next patches
will be pushed later changing more stuff, just because the new
infrastructure is there "and can be used"

Just remove all SOCK_DEBUG() calls, there are leftovers of very ancient times.

Do not add more debugging stuff unless you can demonstrate
they actually allowed you to find a real bug and that you sent a
public fix for it.

Just adding "cool stuff" in TCP stack does not please me, it is only
more complexity for unproven gain.

Otherwise, I am tempted to think that these BPF hooks are there only
so that a company can more
easily build a private variant of TCP, yet letting the community
maintaining the hard part of TCP stack.

Thank you.
Yafang Shao Feb. 13, 2019, 2:46 a.m. UTC | #4
On Wed, Feb 13, 2019 at 10:15 AM Eric Dumazet <edumazet@google.com> wrote:
>
> On Tue, Feb 12, 2019 at 6:07 PM Yafang Shao <laoar.shao@gmail.com> wrote:
> >
>
> > Let me explain the background for you.
> > I want to track some TCP abnormal  behavior in TCP/IP stack. But I
> > find there's no good way to do it.
> > The current MIBs are per net, other than per socket, that makes it not
> > very powerful.
> > And the ancient SOCK_DEBUG is not good as well.
> > So we think why not cleanup this ancient SOCK_DEBUG() and introduce a
> > more powerful method.
>
>
> I am all for it, but this more powerful method does nothing at all in
> the current patches.
>
> I can not accept patches just because they seem to be harmless,
> knowing that  the next patches
> will be pushed later changing more stuff, just because the new
> infrastructure is there "and can be used"
>
> Just remove all SOCK_DEBUG() calls, there are leftovers of very ancient times.
>

OK. I will send a patch for it.

> Do not add more debugging stuff unless you can demonstrate
> they actually allowed you to find a real bug and that you sent a
> public fix for it.
>

Sure.

> Just adding "cool stuff" in TCP stack does not please me, it is only
> more complexity for unproven gain.
>
> Otherwise, I am tempted to think that these BPF hooks are there only
> so that a company can more
> easily build a private variant of TCP, yet letting the community
> maintaining the hard part of TCP stack.

:-)

>
> Thank you.


Thanks
Yafang
Alexei Starovoitov Feb. 13, 2019, 2:49 a.m. UTC | #5
On Tue, Feb 12, 2019 at 6:15 PM Eric Dumazet <edumazet@google.com> wrote:
>
> Do not add more debugging stuff unless you can demonstrate
> they actually allowed you to find a real bug and that you sent a
> public fix for it.
>
> Just adding "cool stuff" in TCP stack does not please me, it is only
> more complexity for unproven gain.

I agree.
I don't see why this debugging of 'abnormal TCP' cannot be done
with kprobes and tracepoints.
Instrumenting every tcp counter increment is overkill.
Yafang Shao Feb. 13, 2019, 3:04 a.m. UTC | #6
On Wed, Feb 13, 2019 at 10:49 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Tue, Feb 12, 2019 at 6:15 PM Eric Dumazet <edumazet@google.com> wrote:
> >
> > Do not add more debugging stuff unless you can demonstrate
> > they actually allowed you to find a real bug and that you sent a
> > public fix for it.
> >
> > Just adding "cool stuff" in TCP stack does not please me, it is only
> > more complexity for unproven gain.
>
> I agree.
> I don't see why this debugging of 'abnormal TCP' cannot be done
> with kprobes and tracepoints.

kprobe is not suitable, because we have to use the line number, that's
a trouble cross all kernel versions.
Regarding  tracepoints, I don't think it is a good idea to introduce
more and more tcp tracepoints and make them crowed as well.

> Instrumenting every tcp counter increment is overkill.
diff mbox series

Patch

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 86dc24a..fd5c09c 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -283,6 +283,9 @@  enum
 	LINUX_MIB_TCPACKCOMPRESSED,		/* TCPAckCompressed */
 	LINUX_MIB_TCPZEROWINDOWDROP,		/* TCPZeroWindowDrop */
 	LINUX_MIB_TCPRCVQDROP,			/* TCPRcvQDrop */
+	LINUX_MIB_TCPINVALIDACK,		/* TCPInvalidAck */
+	LINUX_MIB_TCPOLDACK,			/* TCPOldAck */
+	LINUX_MIB_TCPPARTIALPACKET,		/* TCPPartialPacket */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index c3610b3..1b0320a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -291,6 +291,9 @@  static int sockstat_seq_show(struct seq_file *seq, void *v)
 	SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
 	SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
 	SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
+	SNMP_MIB_ITEM("TCPInvalidAck", LINUX_MIB_TCPINVALIDACK),
+	SNMP_MIB_ITEM("TCPOldAck", LINUX_MIB_TCPOLDACK),
+	SNMP_MIB_ITEM("TCPPartialPacket", LINUX_MIB_TCPPARTIALPACKET),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7a027dec..88deb1f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3554,6 +3554,11 @@  static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
 	return delivered;
 }
 
+static void tcp_stats(struct sock *sk, int mib_idx)
+{
+	NET_INC_STATS(sock_net(sk), mib_idx);
+}
+
 /* This routine deals with incoming acks, but not outgoing ones. */
 static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 {
@@ -3715,7 +3720,7 @@  static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	return 1;
 
 invalid_ack:
-	SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
+	tcp_stats(sk, LINUX_MIB_TCPINVALIDACK);
 	return -1;
 
 old_ack:
@@ -3731,7 +3736,7 @@  static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 		tcp_xmit_recovery(sk, rexmit);
 	}
 
-	SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
+	tcp_stats(sk, LINUX_MIB_TCPOLDACK);
 	return 0;
 }
 
@@ -4432,13 +4437,10 @@  static void tcp_ofo_queue(struct sock *sk)
 		rb_erase(&skb->rbnode, &tp->out_of_order_queue);
 
 		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
-			SOCK_DEBUG(sk, "ofo packet was already received\n");
+			tcp_stats(sk, LINUX_MIB_TCPOFODROP);
 			tcp_drop(sk, skb);
 			continue;
 		}
-		SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
-			   tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
-			   TCP_SKB_CB(skb)->end_seq);
 
 		tail = skb_peek_tail(&sk->sk_receive_queue);
 		eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
@@ -4499,11 +4501,9 @@  static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 	tp->pred_flags = 0;
 	inet_csk_schedule_ack(sk);
 
-	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
 	seq = TCP_SKB_CB(skb)->seq;
 	end_seq = TCP_SKB_CB(skb)->end_seq;
-	SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
-		   tp->rcv_nxt, seq, end_seq);
+	tcp_stats(sk, LINUX_MIB_TCPOFOQUEUE);
 
 	p = &tp->out_of_order_queue.rb_node;
 	if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
@@ -4779,9 +4779,7 @@  static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 
 	if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
 		/* Partial packet, seq < rcv_next < end_seq */
-		SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
-			   tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
-			   TCP_SKB_CB(skb)->end_seq);
+		tcp_stats(sk, LINUX_MIB_TCPPARTIALPACKET);
 
 		tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
 
@@ -5061,9 +5059,7 @@  static int tcp_prune_queue(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
-
-	NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
+	tcp_stats(sk, LINUX_MIB_PRUNECALLED);
 
 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
 		tcp_clamp_window(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e51cda7..57ef69a1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -220,8 +220,6 @@  static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
 		struct sockaddr_in sin;
 
-		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
-
 		if (__ipv6_only_sock(sk))
 			return -ENETUNREACH;