diff mbox

netfilter: nf_conntrack: fix tcp_in_window for Fast Open

Message ID 1376094087-17700-1-git-send-email-ycheng@google.com
State Accepted
Headers show

Commit Message

Yuchung Cheng Aug. 10, 2013, 12:21 a.m. UTC
Currently the conntrack checks if the ending sequence of a packet
falls within the observed receive window. However it does so even
if it has not observe any packet from the remote yet and uses an
uninitialized receive window (td_maxwin).

If a connection uses Fast Open to send a SYN-data packet which is
dropped afterward in the network. The subsequent SYNs retransmits
will all fail this check and be discarded, leading to a connection
timeout. This is because the SYN retransmit does not contain data
payload so

end == initial sequence number (isn) + 1
sender->td_end == isn + syn_data_len
receiver->td_maxwin == 0

The fix is to only apply this check after td_maxwin is initialized.

Reported-by: Michael Chan <mcfchan@stanford.edu>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

Comments

Eric Dumazet Aug. 10, 2013, 12:25 a.m. UTC | #1
On Fri, 2013-08-09 at 17:21 -0700, Yuchung Cheng wrote:
> Currently the conntrack checks if the ending sequence of a packet
> falls within the observed receive window. However it does so even
> if it has not observe any packet from the remote yet and uses an
> uninitialized receive window (td_maxwin).
> 
> If a connection uses Fast Open to send a SYN-data packet which is
> dropped afterward in the network. The subsequent SYNs retransmits
> will all fail this check and be discarded, leading to a connection
> timeout. This is because the SYN retransmit does not contain data
> payload so
> 
> end == initial sequence number (isn) + 1
> sender->td_end == isn + syn_data_len
> receiver->td_maxwin == 0
> 
> The fix is to only apply this check after td_maxwin is initialized.
> 
> Reported-by: Michael Chan <mcfchan@stanford.edu>
> Signed-off-by: Yuchung Cheng <ycheng@google.com>
> ---

Acked-by: Eric Dumazet <edumazet@google.com>



--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jozsef Kadlecsik Aug. 10, 2013, 1:01 p.m. UTC | #2
On Fri, 9 Aug 2013, Yuchung Cheng wrote:

> Currently the conntrack checks if the ending sequence of a packet
> falls within the observed receive window. However it does so even
> if it has not observe any packet from the remote yet and uses an
> uninitialized receive window (td_maxwin).
> 
> If a connection uses Fast Open to send a SYN-data packet which is
> dropped afterward in the network. The subsequent SYNs retransmits
> will all fail this check and be discarded, leading to a connection
> timeout. This is because the SYN retransmit does not contain data
> payload so
> 
> end == initial sequence number (isn) + 1
> sender->td_end == isn + syn_data_len
> receiver->td_maxwin == 0
> 
> The fix is to only apply this check after td_maxwin is initialized.
> 
> Reported-by: Michael Chan <mcfchan@stanford.edu>
> Signed-off-by: Yuchung Cheng <ycheng@google.com>

Acked-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>

> ---
>  net/netfilter/nf_conntrack_proto_tcp.c | 12 ++++++++----
>  1 file changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
> index 4d4d8f1..e0f9a32 100644
> --- a/net/netfilter/nf_conntrack_proto_tcp.c
> +++ b/net/netfilter/nf_conntrack_proto_tcp.c
> @@ -526,7 +526,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
>  	const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
>  	__u32 seq, ack, sack, end, win, swin;
>  	s16 receiver_offset;
> -	bool res;
> +	bool res, in_recv_win;
>  
>  	/*
>  	 * Get the required data from the packet.
> @@ -649,14 +649,18 @@ static bool tcp_in_window(const struct nf_conn *ct,
>  		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
>  		 receiver->td_scale);
>  
> +	/* Is the ending sequence in the receive window (if available)? */
> +	in_recv_win = !receiver->td_maxwin ||
> +		      after(end, sender->td_end - receiver->td_maxwin - 1);
> +
>  	pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
>  		 before(seq, sender->td_maxend + 1),
> -		 after(end, sender->td_end - receiver->td_maxwin - 1),
> +		 (in_recv_win ? 1 : 0),
>  		 before(sack, receiver->td_end + 1),
>  		 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
>  
>  	if (before(seq, sender->td_maxend + 1) &&
> -	    after(end, sender->td_end - receiver->td_maxwin - 1) &&
> +	    in_recv_win &&
>  	    before(sack, receiver->td_end + 1) &&
>  	    after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
>  		/*
> @@ -725,7 +729,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
>  			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
>  			"nf_ct_tcp: %s ",
>  			before(seq, sender->td_maxend + 1) ?
> -			after(end, sender->td_end - receiver->td_maxwin - 1) ?
> +			in_recv_win ?
>  			before(sack, receiver->td_end + 1) ?
>  			after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
>  			: "ACK is under the lower bound (possible overly delayed ACK)"
> -- 
> 1.8.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso Aug. 10, 2013, 4:38 p.m. UTC | #3
On Sat, Aug 10, 2013 at 03:01:36PM +0200, Jozsef Kadlecsik wrote:
> On Fri, 9 Aug 2013, Yuchung Cheng wrote:
> 
> > Currently the conntrack checks if the ending sequence of a packet
> > falls within the observed receive window. However it does so even
> > if it has not observe any packet from the remote yet and uses an
> > uninitialized receive window (td_maxwin).
> > 
> > If a connection uses Fast Open to send a SYN-data packet which is
> > dropped afterward in the network. The subsequent SYNs retransmits
> > will all fail this check and be discarded, leading to a connection
> > timeout. This is because the SYN retransmit does not contain data
> > payload so
> > 
> > end == initial sequence number (isn) + 1
> > sender->td_end == isn + syn_data_len
> > receiver->td_maxwin == 0
> > 
> > The fix is to only apply this check after td_maxwin is initialized.
> > 
> > Reported-by: Michael Chan <mcfchan@stanford.edu>
> > Signed-off-by: Yuchung Cheng <ycheng@google.com>
> 
> Acked-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>

Applied, thanks everyone.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 4d4d8f1..e0f9a32 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -526,7 +526,7 @@  static bool tcp_in_window(const struct nf_conn *ct,
 	const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
 	__u32 seq, ack, sack, end, win, swin;
 	s16 receiver_offset;
-	bool res;
+	bool res, in_recv_win;
 
 	/*
 	 * Get the required data from the packet.
@@ -649,14 +649,18 @@  static bool tcp_in_window(const struct nf_conn *ct,
 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
 		 receiver->td_scale);
 
+	/* Is the ending sequence in the receive window (if available)? */
+	in_recv_win = !receiver->td_maxwin ||
+		      after(end, sender->td_end - receiver->td_maxwin - 1);
+
 	pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
 		 before(seq, sender->td_maxend + 1),
-		 after(end, sender->td_end - receiver->td_maxwin - 1),
+		 (in_recv_win ? 1 : 0),
 		 before(sack, receiver->td_end + 1),
 		 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
 
 	if (before(seq, sender->td_maxend + 1) &&
-	    after(end, sender->td_end - receiver->td_maxwin - 1) &&
+	    in_recv_win &&
 	    before(sack, receiver->td_end + 1) &&
 	    after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
 		/*
@@ -725,7 +729,7 @@  static bool tcp_in_window(const struct nf_conn *ct,
 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 			"nf_ct_tcp: %s ",
 			before(seq, sender->td_maxend + 1) ?
-			after(end, sender->td_end - receiver->td_maxwin - 1) ?
+			in_recv_win ?
 			before(sack, receiver->td_end + 1) ?
 			after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
 			: "ACK is under the lower bound (possible overly delayed ACK)"