diff mbox series

[v2,net-next] net/tcp: trace all TCP/IP state transition with tcp_set_state tracepoint

Message ID 1511019156-22039-1-git-send-email-laoar.shao@gmail.com
State Deferred, archived
Delegated to: David Miller
Headers show
Series [v2,net-next] net/tcp: trace all TCP/IP state transition with tcp_set_state tracepoint | expand

Commit Message

Yafang Shao Nov. 18, 2017, 3:32 p.m. UTC
The TCP/IP transition from TCP_LISTEN to TCP_SYN_RECV and some other
transitions are not traced with tcp_set_state tracepoint.

In order to trace the whole tcp lifespans, two helpers are introduced,
void __tcp_set_state(struct sock *sk, int state)
void __sk_state_store(struct sock *sk, int newstate)

When do TCP/IP state transition, we should use these two helpers or use
tcp_set_state() other than assigning a value to sk_state directly.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/net/tcp.h               |  2 ++
 net/ipv4/inet_connection_sock.c |  6 +++---
 net/ipv4/inet_hashtables.c      |  2 +-
 net/ipv4/tcp.c                  | 12 ++++++++++++
 4 files changed, 18 insertions(+), 4 deletions(-)

Comments

Song Liu Nov. 18, 2017, 6:49 p.m. UTC | #1
> On Nov 18, 2017, at 7:32 AM, Yafang Shao <laoar.shao@gmail.com> wrote:
> 
> The TCP/IP transition from TCP_LISTEN to TCP_SYN_RECV and some other
> transitions are not traced with tcp_set_state tracepoint.
> 
> In order to trace the whole tcp lifespans, two helpers are introduced,
> void __tcp_set_state(struct sock *sk, int state)
> void __sk_state_store(struct sock *sk, int newstate)
> 
> When do TCP/IP state transition, we should use these two helpers or use
> tcp_set_state() other than assigning a value to sk_state directly.
> 
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> ---
> include/net/tcp.h               |  2 ++
> net/ipv4/inet_connection_sock.c |  6 +++---
> net/ipv4/inet_hashtables.c      |  2 +-
> net/ipv4/tcp.c                  | 12 ++++++++++++
> 4 files changed, 18 insertions(+), 4 deletions(-)
> 
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 85ea578..4f2d015 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -1247,6 +1247,8 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb)
> 	"Close Wait","Last ACK","Listen","Closing"
> };
> #endif
> +void __sk_state_store(struct sock *sk, int newstate);
> +void __tcp_set_state(struct sock *sk, int state);
> void tcp_set_state(struct sock *sk, int state);
> 
> void tcp_done(struct sock *sk);
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index 4ca46dc..f3967f1 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -783,7 +783,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
> 	if (newsk) {
> 		struct inet_connection_sock *newicsk = inet_csk(newsk);
> 
> -		newsk->sk_state = TCP_SYN_RECV;
> +		__tcp_set_state(newsk, TCP_SYN_RECV);
> 		newicsk->icsk_bind_hash = NULL;
> 
> 		inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
> @@ -877,7 +877,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
> 	 * It is OK, because this socket enters to hash table only
> 	 * after validation is complete.
> 	 */
> -	sk_state_store(sk, TCP_LISTEN);
> +	__sk_state_store(sk, TCP_LISTEN);
> 	if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
> 		inet->inet_sport = htons(inet->inet_num);
> 
> @@ -888,7 +888,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
> 			return 0;
> 	}
> 
> -	sk->sk_state = TCP_CLOSE;
> +	__tcp_set_state(sk, TCP_CLOSE);
> 	return err;
> }
> EXPORT_SYMBOL_GPL(inet_csk_listen_start);
> diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
> index e7d15fb..72c15b6 100644
> --- a/net/ipv4/inet_hashtables.c
> +++ b/net/ipv4/inet_hashtables.c
> @@ -430,7 +430,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
> 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
> 	} else {
> 		percpu_counter_inc(sk->sk_prot->orphan_count);
> -		sk->sk_state = TCP_CLOSE;
> +		__tcp_set_state(sk, TCP_CLOSE);
> 		sock_set_flag(sk, SOCK_DEAD);
> 		inet_csk_destroy_sock(sk);
> 	}
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index bf97317..2bc7e04 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -2036,6 +2036,18 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
> }
> EXPORT_SYMBOL(tcp_recvmsg);
> 
> +void __sk_state_store(struct sock *sk, int newstate)
> +{
> +	trace_tcp_set_state(sk, sk->sk_state, newstate);
> +	sk_state_store(sk, newstate);
> +}
> +
> +void __tcp_set_state(struct sock *sk, int state)
> +{
> +	trace_tcp_set_state(sk, sk->sk_state, state);
> +	sk->sk_state = state;
> +}
> +
> void tcp_set_state(struct sock *sk, int state)
> {
> 	int oldstate = sk->sk_state;
> -- 
> 1.8.3.1
> 

+ Brendan
David Miller Nov. 19, 2017, 3:40 a.m. UTC | #2
From: Yafang Shao <laoar.shao@gmail.com>
Date: Sat, 18 Nov 2017 15:32:36 +0000

> The TCP/IP transition from TCP_LISTEN to TCP_SYN_RECV and some other
> transitions are not traced with tcp_set_state tracepoint.
> 
> In order to trace the whole tcp lifespans, two helpers are introduced,
> void __tcp_set_state(struct sock *sk, int state)
> void __sk_state_store(struct sock *sk, int newstate)
> 
> When do TCP/IP state transition, we should use these two helpers or use
> tcp_set_state() other than assigning a value to sk_state directly.
> 
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>

Please resubmit this when the net-next tree opens back up as it is
closed right now.

Thank you.
diff mbox series

Patch

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 85ea578..4f2d015 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1247,6 +1247,8 @@  static inline bool tcp_checksum_complete(struct sk_buff *skb)
 	"Close Wait","Last ACK","Listen","Closing"
 };
 #endif
+void __sk_state_store(struct sock *sk, int newstate);
+void __tcp_set_state(struct sock *sk, int state);
 void tcp_set_state(struct sock *sk, int state);
 
 void tcp_done(struct sock *sk);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4ca46dc..f3967f1 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -783,7 +783,7 @@  struct sock *inet_csk_clone_lock(const struct sock *sk,
 	if (newsk) {
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
 
-		newsk->sk_state = TCP_SYN_RECV;
+		__tcp_set_state(newsk, TCP_SYN_RECV);
 		newicsk->icsk_bind_hash = NULL;
 
 		inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
@@ -877,7 +877,7 @@  int inet_csk_listen_start(struct sock *sk, int backlog)
 	 * It is OK, because this socket enters to hash table only
 	 * after validation is complete.
 	 */
-	sk_state_store(sk, TCP_LISTEN);
+	__sk_state_store(sk, TCP_LISTEN);
 	if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
 		inet->inet_sport = htons(inet->inet_num);
 
@@ -888,7 +888,7 @@  int inet_csk_listen_start(struct sock *sk, int backlog)
 			return 0;
 	}
 
-	sk->sk_state = TCP_CLOSE;
+	__tcp_set_state(sk, TCP_CLOSE);
 	return err;
 }
 EXPORT_SYMBOL_GPL(inet_csk_listen_start);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e7d15fb..72c15b6 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -430,7 +430,7 @@  bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	} else {
 		percpu_counter_inc(sk->sk_prot->orphan_count);
-		sk->sk_state = TCP_CLOSE;
+		__tcp_set_state(sk, TCP_CLOSE);
 		sock_set_flag(sk, SOCK_DEAD);
 		inet_csk_destroy_sock(sk);
 	}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bf97317..2bc7e04 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2036,6 +2036,18 @@  int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 }
 EXPORT_SYMBOL(tcp_recvmsg);
 
+void __sk_state_store(struct sock *sk, int newstate)
+{
+	trace_tcp_set_state(sk, sk->sk_state, newstate);
+	sk_state_store(sk, newstate);
+}
+
+void __tcp_set_state(struct sock *sk, int state)
+{
+	trace_tcp_set_state(sk, sk->sk_state, state);
+	sk->sk_state = state;
+}
+
 void tcp_set_state(struct sock *sk, int state)
 {
 	int oldstate = sk->sk_state;