diff mbox

[RFC,SRU,Trusty] tcp: make challenge acks less predictable

Message ID 1470994320-26211-1-git-send-email-stefan.bader@canonical.com
State New
Headers show

Commit Message

Stefan Bader Aug. 12, 2016, 9:32 a.m. UTC
This is a rather quick (and not the most elegant) backport of the
upstream fix for Trusty (Precise will need the same or even more).
The Problem is that neither WRITE_ONCE nor prandom_u32_max do exist,
yet. The prandom_u32_max we might be able to just replace by the
actual function body. I am not sure I would trust myself enough to
try that for the WRITE_ONCE part.

-Stefan


From 16961a6e38afb30c0afb41d766b3fa487fb017f4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 10 Jul 2016 10:04:02 +0200
Subject: [PATCH] tcp: make challenge acks less predictable

Yue Cao claims that current host rate limiting of challenge ACKS
(RFC 5961) could leak enough information to allow a patient attacker
to hijack TCP sessions. He will soon provide details in an academic
paper.

This patch increases the default limit from 100 to 1000, and adds
some randomization so that the attacker can no longer hijack
sessions without spending a considerable amount of probes.

Based on initial analysis and patch from Linus.

Note that we also have per socket rate limiting, so it is tempting
to remove the host limit in the future.

v2: randomize the count of challenge acks per second, not the period.

Fixes: 282f23c6ee34 ("tcp: implement RFC 5961 3.2")
Reported-by: Yue Cao <ycao009@ucr.edu>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

CVE-2016-5696

(backported from commit 75ff39ccc1bd5d3c455b6822ab09e533c551f758 upstream)
Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
---
 net/ipv4/tcp_input.c | 43 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 39 insertions(+), 4 deletions(-)

Comments

Tim Gardner Aug. 12, 2016, 1:14 p.m. UTC | #1
On 08/12/2016 03:32 AM, Stefan Bader wrote:
> This is a rather quick (and not the most elegant) backport of the
> upstream fix for Trusty (Precise will need the same or even more).
> The Problem is that neither WRITE_ONCE nor prandom_u32_max do exist,
> yet. The prandom_u32_max we might be able to just replace by the
> actual function body. I am not sure I would trust myself enough to
> try that for the WRITE_ONCE part.
> 
> -Stefan
> 
> 
> From 16961a6e38afb30c0afb41d766b3fa487fb017f4 Mon Sep 17 00:00:00 2001
> From: Eric Dumazet <edumazet@google.com>
> Date: Sun, 10 Jul 2016 10:04:02 +0200
> Subject: [PATCH] tcp: make challenge acks less predictable
> 
> Yue Cao claims that current host rate limiting of challenge ACKS
> (RFC 5961) could leak enough information to allow a patient attacker
> to hijack TCP sessions. He will soon provide details in an academic
> paper.
> 
> This patch increases the default limit from 100 to 1000, and adds
> some randomization so that the attacker can no longer hijack
> sessions without spending a considerable amount of probes.
> 
> Based on initial analysis and patch from Linus.
> 
> Note that we also have per socket rate limiting, so it is tempting
> to remove the host limit in the future.
> 
> v2: randomize the count of challenge acks per second, not the period.
> 
> Fixes: 282f23c6ee34 ("tcp: implement RFC 5961 3.2")
> Reported-by: Yue Cao <ycao009@ucr.edu>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> Cc: Yuchung Cheng <ycheng@google.com>
> Cc: Neal Cardwell <ncardwell@google.com>
> Acked-by: Neal Cardwell <ncardwell@google.com>
> Acked-by: Yuchung Cheng <ycheng@google.com>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> 
> CVE-2016-5696
> 
> (backported from commit 75ff39ccc1bd5d3c455b6822ab09e533c551f758 upstream)
> Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
> ---
>  net/ipv4/tcp_input.c | 43 +++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 39 insertions(+), 4 deletions(-)
> 
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 2cc1313..4c244cd 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -86,7 +86,7 @@ int sysctl_tcp_adv_win_scale __read_mostly = 1;
>  EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
>  
>  /* rfc5961 challenge ack rate limiting */
> -int sysctl_tcp_challenge_ack_limit = 100;
> +int sysctl_tcp_challenge_ack_limit = 1000;
>  
>  int sysctl_tcp_stdurg __read_mostly;
>  int sysctl_tcp_rfc1337 __read_mostly;
> @@ -3282,23 +3282,58 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
>  	return flag;
>  }
>  
> +static __always_inline void __write_once_size(volatile void *p, void *res, int size)
> +{
> +	switch (size) {
> +	case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
> +	case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
> +	case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
> +	case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
> +	default:
> +		barrier();
> +		__builtin_memcpy((void *)p, (const void *)res, size);
> +		barrier();
> +	}
> +}
> +
> +#define WRITE_ONCE(x, val) \
> +({							\
> +	union { typeof(x) __val; char __c[1]; } __u =	\
> +		{ .__val = (__force typeof(x)) (val) };	\
> +	__write_once_size(&(x), __u.__c, sizeof(x));	\
> +	__u.__val;					\
> +})
> +
> +static inline u32 prandom_u32_max(u32 ep_ro)
> +{
> +	return (u32)(((u64) prandom_u32() * ep_ro) >> 32);
> +}
> +
>  /* RFC 5961 7 [ACK Throttling] */
>  static void tcp_send_challenge_ack(struct sock *sk)
>  {
>  	/* unprotected vars, we dont care of overwrites */
>  	static u32 challenge_timestamp;
>  	static unsigned int challenge_count;
> -	u32 now = jiffies / HZ;
> +	u32 count, now = jiffies / HZ;
>  
> +	/* Check host-wide RFC 5961 rate limit. */
>  	if (now != challenge_timestamp) {
> +		u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
> +
>  		challenge_timestamp = now;
> -		challenge_count = 0;
> +		*((volatile unsigned int *) &challenge_count) =
> +		WRITE_ONCE(challenge_count, half +
> +			   prandom_u32_max(sysctl_tcp_challenge_ack_limit));
>  	}
> -	if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
> +	count = ACCESS_ONCE(challenge_count);
> +	if (count > 0) {
> +		WRITE_ONCE(challenge_count, count - 1);
>  		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
>  		tcp_send_ack(sk);
>  	}
>  }
> +#undef WRITE_ONCE
>  
>  static void tcp_store_ts_recent(struct tcp_sock *tp)
>  {
> 

commit 002e47cc2aec702e1eb850816b44e12611a0eda1 ('kernel: Provide
READ_ONCE and ASSIGN_ONCE') and commit
76858ed8581706c776c56ee872fd5259e40b750d ('kernel: Change
ASSIGN_ONCE(val, x) to WRITE_ONCE(x, val)') are clean cherry-picks. That
would at least avoid having to embed WRITE_ONCE() in the backport.

rtg
Seth Forshee Aug. 12, 2016, 1:19 p.m. UTC | #2
On Fri, Aug 12, 2016 at 11:32:00AM +0200, Stefan Bader wrote:
> This is a rather quick (and not the most elegant) backport of the
> upstream fix for Trusty (Precise will need the same or even more).
> The Problem is that neither WRITE_ONCE nor prandom_u32_max do exist,
> yet. The prandom_u32_max we might be able to just replace by the
> actual function body. I am not sure I would trust myself enough to
> try that for the WRITE_ONCE part.

I note that in Ben Hutching's backports for 3.16 and 3.2 he used
ACCESS_ONCE rather than WRITE_ONCE:

https://git.kernel.org/cgit/linux/kernel/git/bwh/linux-stable-queue.git/tree/queue-3.2/tcp-make-challenge-acks-less-predictable.patch
https://git.kernel.org/cgit/linux/kernel/git/bwh/linux-stable-queue.git/tree/queue-3.16/tcp-make-challenge-acks-less-predictable.patch

That looks legitimate since challenge_count is an unsigned int, i.e. it
should result in the same assignment as the WRITE_ONCE backport.

Seth
Tim Gardner Aug. 12, 2016, 1:30 p.m. UTC | #3
In fact, how about this 4 patch set. the first 3 are clean cherry-picks, but required to provide
the inline macros upon which the real fix depends.

[PATCH 1/4] kernel: Provide READ_ONCE and ASSIGN_ONCE
[PATCH 2/4] kernel: Change ASSIGN_ONCE(val, x) to WRITE_ONCE(x, val)
[PATCH 3/4] random32: add prandom_u32_max and convert open coded
[PATCH 4/4] tcp: make challenge acks less predictable


rtg
diff mbox

Patch

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2cc1313..4c244cd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -86,7 +86,7 @@  int sysctl_tcp_adv_win_scale __read_mostly = 1;
 EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
 
 /* rfc5961 challenge ack rate limiting */
-int sysctl_tcp_challenge_ack_limit = 100;
+int sysctl_tcp_challenge_ack_limit = 1000;
 
 int sysctl_tcp_stdurg __read_mostly;
 int sysctl_tcp_rfc1337 __read_mostly;
@@ -3282,23 +3282,58 @@  static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
 	return flag;
 }
 
+static __always_inline void __write_once_size(volatile void *p, void *res, int size)
+{
+	switch (size) {
+	case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
+	case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
+	case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
+	case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
+	default:
+		barrier();
+		__builtin_memcpy((void *)p, (const void *)res, size);
+		barrier();
+	}
+}
+
+#define WRITE_ONCE(x, val) \
+({							\
+	union { typeof(x) __val; char __c[1]; } __u =	\
+		{ .__val = (__force typeof(x)) (val) };	\
+	__write_once_size(&(x), __u.__c, sizeof(x));	\
+	__u.__val;					\
+})
+
+static inline u32 prandom_u32_max(u32 ep_ro)
+{
+	return (u32)(((u64) prandom_u32() * ep_ro) >> 32);
+}
+
 /* RFC 5961 7 [ACK Throttling] */
 static void tcp_send_challenge_ack(struct sock *sk)
 {
 	/* unprotected vars, we dont care of overwrites */
 	static u32 challenge_timestamp;
 	static unsigned int challenge_count;
-	u32 now = jiffies / HZ;
+	u32 count, now = jiffies / HZ;
 
+	/* Check host-wide RFC 5961 rate limit. */
 	if (now != challenge_timestamp) {
+		u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
+
 		challenge_timestamp = now;
-		challenge_count = 0;
+		*((volatile unsigned int *) &challenge_count) =
+		WRITE_ONCE(challenge_count, half +
+			   prandom_u32_max(sysctl_tcp_challenge_ack_limit));
 	}
-	if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
+	count = ACCESS_ONCE(challenge_count);
+	if (count > 0) {
+		WRITE_ONCE(challenge_count, count - 1);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
 		tcp_send_ack(sk);
 	}
 }
+#undef WRITE_ONCE
 
 static void tcp_store_ts_recent(struct tcp_sock *tp)
 {