diff mbox

TCP_DEFER_ACCEPT is missing counter update

Message ID Pine.LNX.4.58.0910171709190.10440@u.domain.uli
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

Julian Anastasov Oct. 17, 2009, 2:20 p.m. UTC
Hello,

On Sat, 17 Oct 2009, Eric Dumazet wrote:

> I really like this, but please define helper functions out of do_tcp_setsockopt()
> 
> /* Translate value in seconds to number of SYN-ACK retransmits */
> static u8 secs_to_retrans(int seconds)
> {
> 	u8 res = 0;
> 
> 	if (seconds > 0) {
> 		int timeout = TCP_TIMEOUT_INIT / HZ;
> 		int period = timeout;
> 
> 		res = 1;
> 		while (res < 255 && seconds > period) {
> 			res++;
> 			timeout <<= 1;
> 			if (timeout > TCP_RTO_MAX / HZ)
> 				timeout = TCP_RTO_MAX / HZ;
> 			period += timeout;
> 		}
> 	}
> 	return res;
> }
> 
> You also need the reverse function for getsockopt()...

	Yes, I forgot that. Here is what I tested, should
I split it later to 2 patches? May be it should go somewhere
in net/core/sock.c as extern funcs with EXPORT_SYMBOL to
allow other protocols one day to use it?

Signed-off-by: Julian Anastasov <ja@ssi.bg>


Regards

--
Julian Anastasov <ja@ssi.bg>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Eric Dumazet Oct. 19, 2009, 8:01 p.m. UTC | #1
Julian Anastasov a écrit :
> 	Hello,
> 
> On Sat, 17 Oct 2009, Eric Dumazet wrote:
> 
>> I really like this, but please define helper functions out of do_tcp_setsockopt()
>>
>> /* Translate value in seconds to number of SYN-ACK retransmits */
>> static u8 secs_to_retrans(int seconds)
>> {
>> 	u8 res = 0;
>>
>> 	if (seconds > 0) {
>> 		int timeout = TCP_TIMEOUT_INIT / HZ;
>> 		int period = timeout;
>>
>> 		res = 1;
>> 		while (res < 255 && seconds > period) {
>> 			res++;
>> 			timeout <<= 1;
>> 			if (timeout > TCP_RTO_MAX / HZ)
>> 				timeout = TCP_RTO_MAX / HZ;
>> 			period += timeout;
>> 		}
>> 	}
>> 	return res;
>> }
>>
>> You also need the reverse function for getsockopt()...
> 
> 	Yes, I forgot that. Here is what I tested, should
> I split it later to 2 patches? May be it should go somewhere
> in net/core/sock.c as extern funcs with EXPORT_SYMBOL to
> allow other protocols one day to use it?
> 

No need to EXPORT_SYMBOL it for the moment. We can add it later if really needed.

David, I think we should revert 6d01a026b7d3009a418326bdcf313503a314f1ea
(tcp: fix tcp_defer_accept to consider the timeout) 
since we know its broken.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Willy Tarreau Oct. 19, 2009, 8:11 p.m. UTC | #2
On Mon, Oct 19, 2009 at 10:01:15PM +0200, Eric Dumazet wrote:
> David, I think we should revert 6d01a026b7d3009a418326bdcf313503a314f1ea
> (tcp: fix tcp_defer_accept to consider the timeout) 
> since we know its broken.

yes I agree with Eric, please revert it and wait for Julian's
patch which gets it right. Sorry for the wrong fix, at least
it will have brought the discussion on the table, but without
a faulty patch it would have been better :-/

Thanks,
Willy

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Oct. 19, 2009, 8:17 p.m. UTC | #3
Willy Tarreau a écrit :
> On Mon, Oct 19, 2009 at 10:01:15PM +0200, Eric Dumazet wrote:
>> David, I think we should revert 6d01a026b7d3009a418326bdcf313503a314f1ea
>> (tcp: fix tcp_defer_accept to consider the timeout) 
>> since we know its broken.
> 
> yes I agree with Eric, please revert it and wait for Julian's
> patch which gets it right. Sorry for the wrong fix, at least
> it will have brought the discussion on the table, but without
> a faulty patch it would have been better :-/
> 

Thanks again Willy, we made progress and this is nice :)

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Oct. 20, 2009, 2:23 a.m. UTC | #4
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Oct 2009 22:01:15 +0200

> David, I think we should revert 6d01a026b7d3009a418326bdcf313503a314f1ea
> (tcp: fix tcp_defer_accept to consider the timeout) 
> since we know its broken.

I've reverted that patch and applied Julian's three tcp
patches, thanks!
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff -urp v2.6.31/linux/net/ipv4/tcp.c linux/net/ipv4/tcp.c
--- v2.6.31/linux/net/ipv4/tcp.c	2009-09-11 10:27:17.000000000 +0300
+++ linux/net/ipv4/tcp.c	2009-10-17 16:37:48.000000000 +0300
@@ -326,6 +326,43 @@  void tcp_enter_memory_pressure(struct so
 
 EXPORT_SYMBOL(tcp_enter_memory_pressure);
 
+/* Convert seconds to retransmits based on initial and max timeout */
+static u8 secs_to_retrans(int seconds, int timeout, int rto_max)
+{
+	u8 res = 0;
+
+	if (seconds > 0) {
+		int period = timeout;
+
+		res = 1;
+		while (seconds > period && res < 255) {
+			res++;
+			timeout <<= 1;
+			if (timeout > rto_max)
+				timeout = rto_max;
+			period += timeout;
+		}
+	}
+	return res;
+}
+
+/* Convert retransmits to seconds based on initial and max timeout */
+static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
+{
+	int period = 0;
+
+	if (retrans > 0) {
+		period = timeout;
+		while (--retrans) {
+			timeout <<= 1;
+			if (timeout > rto_max)
+				timeout = rto_max;
+			period += timeout;
+		}
+	}
+	return period;
+}
+
 /*
  *	Wait for a TCP event.
  *
@@ -2163,16 +2200,10 @@  static int do_tcp_setsockopt(struct sock
 		break;
 
 	case TCP_DEFER_ACCEPT:
-		icsk->icsk_accept_queue.rskq_defer_accept = 0;
-		if (val > 0) {
-			/* Translate value in seconds to number of
-			 * retransmits */
-			while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
-			       val > ((TCP_TIMEOUT_INIT / HZ) <<
-				       icsk->icsk_accept_queue.rskq_defer_accept))
-				icsk->icsk_accept_queue.rskq_defer_accept++;
-			icsk->icsk_accept_queue.rskq_defer_accept++;
-		}
+		/* Translate value in seconds to number of retransmits */
+		icsk->icsk_accept_queue.rskq_defer_accept =
+			secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
+					TCP_RTO_MAX / HZ);
 		break;
 
 	case TCP_WINDOW_CLAMP:
@@ -2353,8 +2384,8 @@  static int do_tcp_getsockopt(struct sock
 			val = (val ? : sysctl_tcp_fin_timeout) / HZ;
 		break;
 	case TCP_DEFER_ACCEPT:
-		val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
-			((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
+		val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
+				      TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
 		break;
 	case TCP_WINDOW_CLAMP:
 		val = tp->window_clamp;