diff mbox

[net-next,02/14] tcp: use windowed min filter library for TCP min_rtt estimation

Message ID 1474051743-13311-3-git-send-email-ncardwell@google.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Neal Cardwell Sept. 16, 2016, 6:48 p.m. UTC
Refactor the TCP min_rtt code to reuse the new win_minmax library in
lib/win_minmax.c to simplify the TCP code.

This is a pure refactor: the functionality is exactly the same. We
just moved the windowed min code to make TCP easier to read and
maintain, and to allow other parts of the kernel to use the windowed
min/max filter code.

Signed-off-by: Van Jacobson <vanj@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
---
 include/linux/tcp.h      |  5 ++--
 include/net/tcp.h        |  2 +-
 net/ipv4/tcp.c           |  2 +-
 net/ipv4/tcp_input.c     | 64 ++++--------------------------------------------
 net/ipv4/tcp_minisocks.c |  2 +-
 5 files changed, 10 insertions(+), 65 deletions(-)

Comments

kernel test robot Sept. 16, 2016, 7:21 p.m. UTC | #1
Hi Neal,

[auto build test ERROR on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Neal-Cardwell/tcp-BBR-congestion-control-algorithm/20160917-025323
config: x86_64-randconfig-x006-201637 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

>> net/ipv4/tcp_cdg.c:59:8: error: redefinition of 'struct minmax'
    struct minmax {
           ^~~~~~
   In file included from include/linux/tcp.h:22:0,
                    from include/net/tcp.h:24,
                    from net/ipv4/tcp_cdg.c:30:
   include/linux/win_minmax.h:17:8: note: originally defined here
    struct minmax {
           ^~~~~~

vim +59 net/ipv4/tcp_cdg.c

2b0a8c9e Kenneth Klette Jonassen 2015-06-10  43  module_param(window, int, 0444);
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  44  MODULE_PARM_DESC(window, "gradient window size (power of two <= 256)");
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  45  module_param(backoff_beta, uint, 0644);
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  46  MODULE_PARM_DESC(backoff_beta, "backoff beta (0-1024)");
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  47  module_param(backoff_factor, uint, 0644);
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  48  MODULE_PARM_DESC(backoff_factor, "backoff probability scale factor");
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  49  module_param(hystart_detect, uint, 0644);
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  50  MODULE_PARM_DESC(hystart_detect, "use Hybrid Slow start "
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  51  		 "(0: disabled, 1: ACK train, 2: delay threshold, 3: both)");
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  52  module_param(use_ineff, uint, 0644);
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  53  MODULE_PARM_DESC(use_ineff, "use ineffectual backoff detection (threshold)");
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  54  module_param(use_shadow, bool, 0644);
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  55  MODULE_PARM_DESC(use_shadow, "use shadow window heuristic");
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  56  module_param(use_tolerance, bool, 0644);
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  57  MODULE_PARM_DESC(use_tolerance, "use loss tolerance heuristic");
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  58  
2b0a8c9e Kenneth Klette Jonassen 2015-06-10 @59  struct minmax {
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  60  	union {
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  61  		struct {
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  62  			s32 min;
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  63  			s32 max;
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  64  		};
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  65  		u64 v64;
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  66  	};
2b0a8c9e Kenneth Klette Jonassen 2015-06-10  67  };

:::::: The code at line 59 was first introduced by commit
:::::: 2b0a8c9eee81882fc0001ccf6d9af62cdc682f9e tcp: add CDG congestion control

:::::: TO: Kenneth Klette Jonassen <kennetkl@ifi.uio.no>
:::::: CC: David S. Miller <davem@davemloft.net>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Neal Cardwell Sept. 16, 2016, 7:25 p.m. UTC | #2
On Fri, Sep 16, 2016 at 3:21 PM, kbuild test robot <lkp@intel.com> wrote:
> All errors (new ones prefixed by >>):
>
>>> net/ipv4/tcp_cdg.c:59:8: error: redefinition of 'struct minmax'
>     struct minmax {
>            ^~~~~~
>    In file included from include/linux/tcp.h:22:0,
>                     from include/net/tcp.h:24,
>                     from net/ipv4/tcp_cdg.c:30:
>    include/linux/win_minmax.h:17:8: note: originally defined here
>     struct minmax {
>            ^~~~~~
>
> vim +59 net/ipv4/tcp_cdg.c

Sorry about that. I will fix that and re-post.

neal
diff mbox

Patch

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index c723a46..6433cc8 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -19,6 +19,7 @@ 
 
 
 #include <linux/skbuff.h>
+#include <linux/win_minmax.h>
 #include <net/sock.h>
 #include <net/inet_connection_sock.h>
 #include <net/inet_timewait_sock.h>
@@ -234,9 +235,7 @@  struct tcp_sock {
 	u32	mdev_max_us;	/* maximal mdev for the last rtt period	*/
 	u32	rttvar_us;	/* smoothed mdev_max			*/
 	u32	rtt_seq;	/* sequence number to update rttvar	*/
-	struct rtt_meas {
-		u32 rtt, ts;	/* RTT in usec and sampling time in jiffies. */
-	} rtt_min[3];
+	struct  minmax rtt_min;
 
 	u32	packets_out;	/* Packets which are "in flight"	*/
 	u32	retrans_out;	/* Retransmitted packets out		*/
diff --git a/include/net/tcp.h b/include/net/tcp.h
index fdfbedd..2f1648a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -671,7 +671,7 @@  static inline bool tcp_ca_dst_locked(const struct dst_entry *dst)
 /* Minimum RTT in usec. ~0 means not available. */
 static inline u32 tcp_min_rtt(const struct tcp_sock *tp)
 {
-	return tp->rtt_min[0].rtt;
+	return minmax_get(&tp->rtt_min);
 }
 
 /* Compute the actual receive window we are currently advertising.
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a13fcb3..5b0b49c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -387,7 +387,7 @@  void tcp_init_sock(struct sock *sk)
 
 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
 	tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
-	tp->rtt_min[0].rtt = ~0U;
+	minmax_reset(&tp->rtt_min, tcp_time_stamp, ~0U);
 
 	/* So many TCP implementations out there (incorrectly) count the
 	 * initial SYN frame in their delayed-ACK and congestion control
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 70b892d..ac5b38f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2879,67 +2879,13 @@  static void tcp_fastretrans_alert(struct sock *sk, const int acked,
 	*rexmit = REXMIT_LOST;
 }
 
-/* Kathleen Nichols' algorithm for tracking the minimum value of
- * a data stream over some fixed time interval. (E.g., the minimum
- * RTT over the past five minutes.) It uses constant space and constant
- * time per update yet almost always delivers the same minimum as an
- * implementation that has to keep all the data in the window.
- *
- * The algorithm keeps track of the best, 2nd best & 3rd best min
- * values, maintaining an invariant that the measurement time of the
- * n'th best >= n-1'th best. It also makes sure that the three values
- * are widely separated in the time window since that bounds the worse
- * case error when that data is monotonically increasing over the window.
- *
- * Upon getting a new min, we can forget everything earlier because it
- * has no value - the new min is <= everything else in the window by
- * definition and it's the most recent. So we restart fresh on every new min
- * and overwrites 2nd & 3rd choices. The same property holds for 2nd & 3rd
- * best.
- */
 static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
 {
-	const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
-	struct rtt_meas *m = tcp_sk(sk)->rtt_min;
-	struct rtt_meas rttm = {
-		.rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1),
-		.ts = now,
-	};
-	u32 elapsed;
-
-	/* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
-	if (unlikely(rttm.rtt <= m[0].rtt))
-		m[0] = m[1] = m[2] = rttm;
-	else if (rttm.rtt <= m[1].rtt)
-		m[1] = m[2] = rttm;
-	else if (rttm.rtt <= m[2].rtt)
-		m[2] = rttm;
-
-	elapsed = now - m[0].ts;
-	if (unlikely(elapsed > wlen)) {
-		/* Passed entire window without a new min so make 2nd choice
-		 * the new min & 3rd choice the new 2nd. So forth and so on.
-		 */
-		m[0] = m[1];
-		m[1] = m[2];
-		m[2] = rttm;
-		if (now - m[0].ts > wlen) {
-			m[0] = m[1];
-			m[1] = rttm;
-			if (now - m[0].ts > wlen)
-				m[0] = rttm;
-		}
-	} else if (m[1].ts == m[0].ts && elapsed > wlen / 4) {
-		/* Passed a quarter of the window without a new min so
-		 * take 2nd choice from the 2nd quarter of the window.
-		 */
-		m[2] = m[1] = rttm;
-	} else if (m[2].ts == m[1].ts && elapsed > wlen / 2) {
-		/* Passed half the window without a new min so take the 3rd
-		 * choice from the last half of the window.
-		 */
-		m[2] = rttm;
-	}
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
+
+	minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp,
+			   rtt_us ? : jiffies_to_usecs(1));
 }
 
 static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f63c73d..5689471 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -464,7 +464,7 @@  struct sock *tcp_create_openreq_child(const struct sock *sk,
 
 		newtp->srtt_us = 0;
 		newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
-		newtp->rtt_min[0].rtt = ~0U;
+		minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U);
 		newicsk->icsk_rto = TCP_TIMEOUT_INIT;
 
 		newtp->packets_out = 0;