diff mbox

[v2] TCP: avoid to send keepalive probes if it is receiving data

Message ID 1271602519-6805-1-git-send-email-fleitner@redhat.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Flavio Leitner April 18, 2010, 2:55 p.m. UTC
RFC 1122 says the following:
...
  Keep-alive packets MUST only be sent when no data or
  acknowledgement packets have been received for the
  connection within an interval.
...

Fix this by storing the timestamp of last received data
packet and checking for it when the keepalive timer expires.

-v2 fix do_tcp_setsockopt() as pointed by Eric Dumazet <eric.dumazet@gmail.com>

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
---
 include/linux/tcp.h  |    1 +
 net/ipv4/tcp.c       |    5 ++++-
 net/ipv4/tcp_input.c |    3 +++
 net/ipv4/tcp_timer.c |    8 ++++++++
 4 files changed, 16 insertions(+), 1 deletions(-)

Comments

Eric Dumazet April 18, 2010, 5:15 p.m. UTC | #1
Le dimanche 18 avril 2010 à 11:55 -0300, Flavio Leitner a écrit :
> RFC 1122 says the following:
> ...
>   Keep-alive packets MUST only be sent when no data or
>   acknowledgement packets have been received for the
>   connection within an interval.
> ...
> 
> Fix this by storing the timestamp of last received data
> packet and checking for it when the keepalive timer expires.
> 
> -v2 fix do_tcp_setsockopt() as pointed by Eric Dumazet <eric.dumazet@gmail.com>
> 
> Signed-off-by: Flavio Leitner <fleitner@redhat.com>


I find this patch very welcome, and we could easily use this new
lrcvtime information available in diagnostic tools (ss command)

But are you sure you update it for all valid packets ?

If we receive a pure ACK, it seems you do not ...



> ---
>  include/linux/tcp.h  |    1 +
>  net/ipv4/tcp.c       |    5 ++++-
>  net/ipv4/tcp_input.c |    3 +++
>  net/ipv4/tcp_timer.c |    8 ++++++++
>  4 files changed, 16 insertions(+), 1 deletions(-)
> 
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index a778ee0..405678f 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -314,6 +314,7 @@ struct tcp_sock {
>   	u32	snd_sml;	/* Last byte of the most recently transmitted small packet */
>  	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
>  	u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */
> +	u32	lrcvtime;	/* timestamp of last received data packet (for keepalives) */
>  
>  	/* Data for direct copy to user */
>  	struct {
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 0f8caf6..a4048d7 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -2298,7 +2298,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
>  			if (sock_flag(sk, SOCK_KEEPOPEN) &&
>  			    !((1 << sk->sk_state) &
>  			      (TCPF_CLOSE | TCPF_LISTEN))) {
> -				__u32 elapsed = tcp_time_stamp - tp->rcv_tstamp;
> +				u32 elapsed = min_t(u32,
> +						      tcp_time_stamp - tp->rcv_tstamp,
> +						      tcp_time_stamp - tp->lrcvtime);
> +
>  				if (tp->keepalive_time > elapsed)
>  					elapsed = tp->keepalive_time - elapsed;
>  				else
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index f240f57..60d2980 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -5391,6 +5391,8 @@ no_ack:
>  				__kfree_skb(skb);
>  			else
>  				sk->sk_data_ready(sk, 0);
> +
> +			tp->lrcvtime = tcp_time_stamp;
>  			return 0;
>  		}
>  	}
> @@ -5421,6 +5423,7 @@ step5:
>  
>  	tcp_data_snd_check(sk);
>  	tcp_ack_snd_check(sk);
> +	tp->lrcvtime = tcp_time_stamp;
>  	return 0;
>  
>  csum_error:
> diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
> index 8a0ab29..74dd804 100644
> --- a/net/ipv4/tcp_timer.c
> +++ b/net/ipv4/tcp_timer.c
> @@ -554,6 +554,14 @@ static void tcp_keepalive_timer (unsigned long data)
>  	if (tp->packets_out || tcp_send_head(sk))
>  		goto resched;
>  
> +	elapsed = tcp_time_stamp - tp->lrcvtime;
> +	
> +	/* receiving data means alive */
> +	if (elapsed < keepalive_time_when(tp)) {
> +		elapsed = keepalive_time_when(tp) - elapsed;
> +		goto resched;
> +	}
> +
>  	elapsed = tcp_time_stamp - tp->rcv_tstamp;
>  
>  	if (elapsed >= keepalive_time_when(tp)) {


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ilpo Järvinen April 18, 2010, 8:34 p.m. UTC | #2
On Sun, 18 Apr 2010, Eric Dumazet wrote:

> Le dimanche 18 avril 2010 à 11:55 -0300, Flavio Leitner a écrit :
> > RFC 1122 says the following:
> > ...
> >   Keep-alive packets MUST only be sent when no data or
> >   acknowledgement packets have been received for the
> >   connection within an interval.
> > ...
> > 
> > Fix this by storing the timestamp of last received data
> > packet and checking for it when the keepalive timer expires.
> > 
> > -v2 fix do_tcp_setsockopt() as pointed by Eric Dumazet <eric.dumazet@gmail.com>
> > 
> > Signed-off-by: Flavio Leitner <fleitner@redhat.com>
> 
> 
> I find this patch very welcome, and we could easily use this new
> lrcvtime information available in diagnostic tools (ss command)
> 
> But are you sure you update it for all valid packets ?
> 
> If we receive a pure ACK, it seems you do not ...

I fail to see why the addition of this new variable is necessary at all, 
could either of you enlight me why exactly it's necessary and rcv_tstamp 
will not suffice?

> > ---
> >  include/linux/tcp.h  |    1 +
> >  net/ipv4/tcp.c       |    5 ++++-
> >  net/ipv4/tcp_input.c |    3 +++
> >  net/ipv4/tcp_timer.c |    8 ++++++++
> >  4 files changed, 16 insertions(+), 1 deletions(-)
> > 
> > diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> > index a778ee0..405678f 100644
> > --- a/include/linux/tcp.h
> > +++ b/include/linux/tcp.h
> > @@ -314,6 +314,7 @@ struct tcp_sock {
> >   	u32	snd_sml;	/* Last byte of the most recently transmitted small packet */
> >  	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
> >  	u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */
> > +	u32	lrcvtime;	/* timestamp of last received data packet (for keepalives) */
> >  
> >  	/* Data for direct copy to user */
> >  	struct {
> > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> > index 0f8caf6..a4048d7 100644
> > --- a/net/ipv4/tcp.c
> > +++ b/net/ipv4/tcp.c
> > @@ -2298,7 +2298,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
> >  			if (sock_flag(sk, SOCK_KEEPOPEN) &&
> >  			    !((1 << sk->sk_state) &
> >  			      (TCPF_CLOSE | TCPF_LISTEN))) {
> > -				__u32 elapsed = tcp_time_stamp - tp->rcv_tstamp;
> > +				u32 elapsed = min_t(u32,
> > +						      tcp_time_stamp - tp->rcv_tstamp,
> > +						      tcp_time_stamp - tp->lrcvtime);
> > +
> >  				if (tp->keepalive_time > elapsed)
> >  					elapsed = tp->keepalive_time - elapsed;
> >  				else
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index f240f57..60d2980 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -5391,6 +5391,8 @@ no_ack:
> >  				__kfree_skb(skb);
> >  			else
> >  				sk->sk_data_ready(sk, 0);
> > +
> > +			tp->lrcvtime = tcp_time_stamp;
> >  			return 0;
> >  		}
> >  	}
> > @@ -5421,6 +5423,7 @@ step5:
> >  
> >  	tcp_data_snd_check(sk);
> >  	tcp_ack_snd_check(sk);
> > +	tp->lrcvtime = tcp_time_stamp;
> >  	return 0;
> >  
> >  csum_error:
> > diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
> > index 8a0ab29..74dd804 100644
> > --- a/net/ipv4/tcp_timer.c
> > +++ b/net/ipv4/tcp_timer.c
> > @@ -554,6 +554,14 @@ static void tcp_keepalive_timer (unsigned long data)
> >  	if (tp->packets_out || tcp_send_head(sk))
> >  		goto resched;
> >  
> > +	elapsed = tcp_time_stamp - tp->lrcvtime;
> > +	
> > +	/* receiving data means alive */
> > +	if (elapsed < keepalive_time_when(tp)) {
> > +		elapsed = keepalive_time_when(tp) - elapsed;
> > +		goto resched;
> > +	}
> > +
> >  	elapsed = tcp_time_stamp - tp->rcv_tstamp;
> >  
> >  	if (elapsed >= keepalive_time_when(tp)) {
David Miller April 22, 2010, 5:42 a.m. UTC | #3
From: "Ilpo Järvinen" <ilpo.jarvinen@helsinki.fi>
Date: Sun, 18 Apr 2010 23:34:15 +0300 (EEST)

> I fail to see why the addition of this new variable is necessary at all, 
> could either of you enlight me why exactly it's necessary and rcv_tstamp 
> will not suffice?

I agree, the existing rcv_tstamp should serve this purpose just
fine.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Flavio Leitner April 25, 2010, 11:44 p.m. UTC | #4
On Sun, Apr 18, 2010 at 07:15:19PM +0200, Eric Dumazet wrote:
> Le dimanche 18 avril 2010 à 11:55 -0300, Flavio Leitner a écrit :
> > RFC 1122 says the following:
> > ...
> >   Keep-alive packets MUST only be sent when no data or
> >   acknowledgement packets have been received for the
> >   connection within an interval.
> > ...
> > 
> > Fix this by storing the timestamp of last received data
> > packet and checking for it when the keepalive timer expires.
> > 
> > -v2 fix do_tcp_setsockopt() as pointed by Eric Dumazet <eric.dumazet@gmail.com>
> > 
> > Signed-off-by: Flavio Leitner <fleitner@redhat.com>
> 
> 
> I find this patch very welcome, and we could easily use this new
> lrcvtime information available in diagnostic tools (ss command)
> 
> But are you sure you update it for all valid packets ?
> 
> If we receive a pure ACK, it seems you do not ...

Pure ack is handled by rcv_tstamp in the struct which is
considered in tcp_keepalive_time() too.

The idea of exporting those variables is nice, I'll see
how 'ss' works.

thanks for reviewing the patch!
 

 
> > ---
> >  include/linux/tcp.h  |    1 +
> >  net/ipv4/tcp.c       |    5 ++++-
> >  net/ipv4/tcp_input.c |    3 +++
> >  net/ipv4/tcp_timer.c |    8 ++++++++
> >  4 files changed, 16 insertions(+), 1 deletions(-)
> > 
> > diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> > index a778ee0..405678f 100644
> > --- a/include/linux/tcp.h
> > +++ b/include/linux/tcp.h
> > @@ -314,6 +314,7 @@ struct tcp_sock {
> >   	u32	snd_sml;	/* Last byte of the most recently transmitted small packet */
> >  	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
> >  	u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */
> > +	u32	lrcvtime;	/* timestamp of last received data packet (for keepalives) */
> >  
> >  	/* Data for direct copy to user */
> >  	struct {
> > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> > index 0f8caf6..a4048d7 100644
> > --- a/net/ipv4/tcp.c
> > +++ b/net/ipv4/tcp.c
> > @@ -2298,7 +2298,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
> >  			if (sock_flag(sk, SOCK_KEEPOPEN) &&
> >  			    !((1 << sk->sk_state) &
> >  			      (TCPF_CLOSE | TCPF_LISTEN))) {
> > -				__u32 elapsed = tcp_time_stamp - tp->rcv_tstamp;
> > +				u32 elapsed = min_t(u32,
> > +						      tcp_time_stamp - tp->rcv_tstamp,
> > +						      tcp_time_stamp - tp->lrcvtime);
> > +
> >  				if (tp->keepalive_time > elapsed)
> >  					elapsed = tp->keepalive_time - elapsed;
> >  				else
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index f240f57..60d2980 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -5391,6 +5391,8 @@ no_ack:
> >  				__kfree_skb(skb);
> >  			else
> >  				sk->sk_data_ready(sk, 0);
> > +
> > +			tp->lrcvtime = tcp_time_stamp;
> >  			return 0;
> >  		}
> >  	}
> > @@ -5421,6 +5423,7 @@ step5:
> >  
> >  	tcp_data_snd_check(sk);
> >  	tcp_ack_snd_check(sk);
> > +	tp->lrcvtime = tcp_time_stamp;
> >  	return 0;
> >  
> >  csum_error:
> > diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
> > index 8a0ab29..74dd804 100644
> > --- a/net/ipv4/tcp_timer.c
> > +++ b/net/ipv4/tcp_timer.c
> > @@ -554,6 +554,14 @@ static void tcp_keepalive_timer (unsigned long data)
> >  	if (tp->packets_out || tcp_send_head(sk))
> >  		goto resched;
> >  
> > +	elapsed = tcp_time_stamp - tp->lrcvtime;
> > +	
> > +	/* receiving data means alive */
> > +	if (elapsed < keepalive_time_when(tp)) {
> > +		elapsed = keepalive_time_when(tp) - elapsed;
> > +		goto resched;
> > +	}
> > +
> >  	elapsed = tcp_time_stamp - tp->rcv_tstamp;
> >  
> >  	if (elapsed >= keepalive_time_when(tp)) {
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Flavio Leitner April 25, 2010, 11:55 p.m. UTC | #5
On Wed, Apr 21, 2010 at 10:42:05PM -0700, David Miller wrote:
> From: "Ilpo Järvinen" <ilpo.jarvinen@helsinki.fi>
> Date: Sun, 18 Apr 2010 23:34:15 +0300 (EEST)
> 
> > I fail to see why the addition of this new variable is necessary at all, 
> > could either of you enlight me why exactly it's necessary and rcv_tstamp 
> > will not suffice?
> 
> I agree, the existing rcv_tstamp should serve this purpose just
> fine.

I thought it would break TCP_INFO/tcp_get_info(). Actually, reviewing
the code again I found another variable which does exactly what is needed.

I'll post a new patch for review.

thanks,
diff mbox

Patch

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index a778ee0..405678f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -314,6 +314,7 @@  struct tcp_sock {
  	u32	snd_sml;	/* Last byte of the most recently transmitted small packet */
 	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
 	u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */
+	u32	lrcvtime;	/* timestamp of last received data packet (for keepalives) */
 
 	/* Data for direct copy to user */
 	struct {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0f8caf6..a4048d7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2298,7 +2298,10 @@  static int do_tcp_setsockopt(struct sock *sk, int level,
 			if (sock_flag(sk, SOCK_KEEPOPEN) &&
 			    !((1 << sk->sk_state) &
 			      (TCPF_CLOSE | TCPF_LISTEN))) {
-				__u32 elapsed = tcp_time_stamp - tp->rcv_tstamp;
+				u32 elapsed = min_t(u32,
+						      tcp_time_stamp - tp->rcv_tstamp,
+						      tcp_time_stamp - tp->lrcvtime);
+
 				if (tp->keepalive_time > elapsed)
 					elapsed = tp->keepalive_time - elapsed;
 				else
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f240f57..60d2980 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5391,6 +5391,8 @@  no_ack:
 				__kfree_skb(skb);
 			else
 				sk->sk_data_ready(sk, 0);
+
+			tp->lrcvtime = tcp_time_stamp;
 			return 0;
 		}
 	}
@@ -5421,6 +5423,7 @@  step5:
 
 	tcp_data_snd_check(sk);
 	tcp_ack_snd_check(sk);
+	tp->lrcvtime = tcp_time_stamp;
 	return 0;
 
 csum_error:
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8a0ab29..74dd804 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -554,6 +554,14 @@  static void tcp_keepalive_timer (unsigned long data)
 	if (tp->packets_out || tcp_send_head(sk))
 		goto resched;
 
+	elapsed = tcp_time_stamp - tp->lrcvtime;
+	
+	/* receiving data means alive */
+	if (elapsed < keepalive_time_when(tp)) {
+		elapsed = keepalive_time_when(tp) - elapsed;
+		goto resched;
+	}
+
 	elapsed = tcp_time_stamp - tp->rcv_tstamp;
 
 	if (elapsed >= keepalive_time_when(tp)) {