[v3,net,4/5] tcp: prevent bogus undos when SACK is not enabled

Message ID 1520936711-16784-5-git-send-email-ilpo.jarvinen@helsinki.fi
State Deferred
Delegated to: David Miller
Headers show
Series
  • tcp: fixes to non-SACK TCP
Related show

Commit Message

Ilpo Järvinen March 13, 2018, 10:25 a.m.
When a cumulative ACK lands to high_seq at the end of loss
recovery and SACK is not enabled, the sender needs to avoid
false fast retransmits (RFC6582). The avoidance mechanisms is
implemented by remaining in the loss recovery CA state until
one additional cumulative ACK arrives. During the operation of
this avoidance mechanism, there is internal transient in the
use of state variables which will always trigger a bogus undo.

When we enter to this transient state in tcp_try_undo_recovery,
tcp_any_retrans_done is often (always?) false resulting in
clearing retrans_stamp. On the next cumulative ACK,
tcp_try_undo_recovery again executes because CA state still
remains in the same recovery state and tcp_may_undo will always
return true because tcp_packet_delayed has this condition:
    return !tp->retrans_stamp || ...

Check if the false fast retransmit transient avoidance is in
progress in tcp_packet_delayed to avoid bogus undos. Since snd_una
has advanced already on this ACK but CA state still remains
unchanged (CA state is updated slightly later than undo is
checked), prior_snd_una needs to be passed to tcp_packet_delayed
(instead of tp->snd_una). Passing prior_snd_una around to
the tcp_packet_delayed makes this change look more involved than
it really is.

The additional checks done in this change only affect non-SACK
case, the SACK case remains the same.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
---
 net/ipv4/tcp_input.c | 42 ++++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 16 deletions(-)

Comments

Yuchung Cheng March 28, 2018, 10:36 p.m. | #1
On Tue, Mar 13, 2018 at 3:25 AM, Ilpo Järvinen
<ilpo.jarvinen@helsinki.fi> wrote:
> When a cumulative ACK lands to high_seq at the end of loss
> recovery and SACK is not enabled, the sender needs to avoid
> false fast retransmits (RFC6582). The avoidance mechanisms is
> implemented by remaining in the loss recovery CA state until
> one additional cumulative ACK arrives. During the operation of
> this avoidance mechanism, there is internal transient in the
> use of state variables which will always trigger a bogus undo.
Do we have to make undo in non-sack perfect? can we consider a much
simpler but imperfect fix of

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8d480542aa07..95225d9de0af 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2356,6 +2356,7 @@ static bool tcp_try_undo_recovery(struct sock *sk)
                 * fast retransmits (RFC2582). SACK TCP is safe. */
                if (!tcp_any_retrans_done(sk))
                        tp->retrans_stamp = 0;
+               tp->undo_marker = 0;
                return true;
        }



>
> When we enter to this transient state in tcp_try_undo_recovery,
> tcp_any_retrans_done is often (always?) false resulting in
> clearing retrans_stamp. On the next cumulative ACK,
> tcp_try_undo_recovery again executes because CA state still
> remains in the same recovery state and tcp_may_undo will always
> return true because tcp_packet_delayed has this condition:
>     return !tp->retrans_stamp || ...
>
> Check if the false fast retransmit transient avoidance is in
> progress in tcp_packet_delayed to avoid bogus undos. Since snd_una
> has advanced already on this ACK but CA state still remains
> unchanged (CA state is updated slightly later than undo is
> checked), prior_snd_una needs to be passed to tcp_packet_delayed
> (instead of tp->snd_una). Passing prior_snd_una around to
> the tcp_packet_delayed makes this change look more involved than
> it really is.
>
> The additional checks done in this change only affect non-SACK
> case, the SACK case remains the same.
>
> Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
> ---
>  net/ipv4/tcp_input.c | 42 ++++++++++++++++++++++++++----------------
>  1 file changed, 26 insertions(+), 16 deletions(-)
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 72ecfbb..270aa48 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -2241,10 +2241,17 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
>  /* Nothing was retransmitted or returned timestamp is less
>   * than timestamp of the first retransmission.
>   */
> -static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
> +static inline bool tcp_packet_delayed(const struct tcp_sock *tp,
> +                                     const u32 prior_snd_una)
>  {
> -       return !tp->retrans_stamp ||
> -              tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
> +       if (!tp->retrans_stamp) {
> +               /* Sender will be in a transient state with cleared
> +                * retrans_stamp during false fast retransmit prevention
> +                * mechanism
> +                */
> +               return !tcp_false_fast_retrans_possible(tp, prior_snd_una);
> +       }
> +       return tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
>  }
>
>  /* Undo procedures. */
> @@ -2334,17 +2341,19 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
>         tp->rack.advanced = 1; /* Force RACK to re-exam losses */
>  }
>
> -static inline bool tcp_may_undo(const struct tcp_sock *tp)
> +static inline bool tcp_may_undo(const struct tcp_sock *tp,
> +                               const u32 prior_snd_una)
>  {
> -       return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
> +       return tp->undo_marker &&
> +              (!tp->undo_retrans || tcp_packet_delayed(tp, prior_snd_una));
>  }
>
>  /* People celebrate: "We love our President!" */
> -static bool tcp_try_undo_recovery(struct sock *sk)
> +static bool tcp_try_undo_recovery(struct sock *sk, const u32 prior_snd_una)
>  {
>         struct tcp_sock *tp = tcp_sk(sk);
>
> -       if (tcp_may_undo(tp)) {
> +       if (tcp_may_undo(tp, prior_snd_una)) {
>                 int mib_idx;
>
>                 /* Happy end! We did not retransmit anything
> @@ -2391,11 +2400,12 @@ static bool tcp_try_undo_dsack(struct sock *sk)
>  }
>
>  /* Undo during loss recovery after partial ACK or using F-RTO. */
> -static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
> +static bool tcp_try_undo_loss(struct sock *sk, const u32 prior_snd_una,
> +                             bool frto_undo)
>  {
>         struct tcp_sock *tp = tcp_sk(sk);
>
> -       if (frto_undo || tcp_may_undo(tp)) {
> +       if (frto_undo || tcp_may_undo(tp, prior_snd_una)) {
>                 tcp_undo_cwnd_reduction(sk, true);
>
>                 DBGUNDO(sk, "partial loss");
> @@ -2628,13 +2638,13 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack)
>   * recovered or spurious. Otherwise retransmits more on partial ACKs.
>   */
>  static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
> -                            int *rexmit)
> +                            int *rexmit, const u32 prior_snd_una)
>  {
>         struct tcp_sock *tp = tcp_sk(sk);
>         bool recovered = !before(tp->snd_una, tp->high_seq);
>
>         if ((flag & FLAG_SND_UNA_ADVANCED) &&
> -           tcp_try_undo_loss(sk, false))
> +           tcp_try_undo_loss(sk, prior_snd_una, false))
>                 return;
>
>         if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
> @@ -2642,7 +2652,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
>                  * lost, i.e., never-retransmitted data are (s)acked.
>                  */
>                 if ((flag & FLAG_ORIG_SACK_ACKED) &&
> -                   tcp_try_undo_loss(sk, true))
> +                   tcp_try_undo_loss(sk, prior_snd_una, true))
>                         return;
>
>                 if (after(tp->snd_nxt, tp->high_seq)) {
> @@ -2665,7 +2675,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
>
>         if (recovered) {
>                 /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
> -               tcp_try_undo_recovery(sk);
> +               tcp_try_undo_recovery(sk, prior_snd_una);
>                 return;
>         }
>         if (tcp_is_reno(tp)) {
> @@ -2685,7 +2695,7 @@ static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
>  {
>         struct tcp_sock *tp = tcp_sk(sk);
>
> -       if (tp->undo_marker && tcp_packet_delayed(tp)) {
> +       if (tp->undo_marker && tcp_packet_delayed(tp, prior_snd_una)) {
>                 /* Plain luck! Hole if filled with delayed
>                  * packet, rather than with a retransmit. Check reordering.
>                  */
> @@ -2788,7 +2798,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
>                 case TCP_CA_Recovery:
>                         if (tcp_is_reno(tp))
>                                 tcp_reset_reno_sack(tp);
> -                       if (tcp_try_undo_recovery(sk))
> +                       if (tcp_try_undo_recovery(sk, prior_snd_una))
>                                 return;
>                         tcp_end_cwnd_reduction(sk);
>                         break;
> @@ -2815,7 +2825,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
>                 tcp_rack_identify_loss(sk, ack_flag);
>                 break;
>         case TCP_CA_Loss:
> -               tcp_process_loss(sk, flag, is_dupack, rexmit);
> +               tcp_process_loss(sk, flag, is_dupack, rexmit, prior_snd_una);
>                 tcp_rack_identify_loss(sk, ack_flag);
>                 if (!(icsk->icsk_ca_state == TCP_CA_Open ||
>                       (*ack_flag & FLAG_LOST_RETRANS)))
> --
> 2.7.4
>
Ilpo Järvinen April 4, 2018, 10:23 a.m. | #2
On Wed, 28 Mar 2018, Yuchung Cheng wrote:

> On Tue, Mar 13, 2018 at 3:25 AM, Ilpo Järvinen
> <ilpo.jarvinen@helsinki.fi> wrote:
> > When a cumulative ACK lands to high_seq at the end of loss
> > recovery and SACK is not enabled, the sender needs to avoid
> > false fast retransmits (RFC6582). The avoidance mechanisms is
> > implemented by remaining in the loss recovery CA state until
> > one additional cumulative ACK arrives. During the operation of
> > this avoidance mechanism, there is internal transient in the
> > use of state variables which will always trigger a bogus undo.
>
> Do we have to make undo in non-sack perfect? can we consider a much
> simpler but imperfect fix of
> 
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 8d480542aa07..95225d9de0af 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -2356,6 +2356,7 @@ static bool tcp_try_undo_recovery(struct sock *sk)
>                  * fast retransmits (RFC2582). SACK TCP is safe. */
>                 if (!tcp_any_retrans_done(sk))
>                         tp->retrans_stamp = 0;
> +               tp->undo_marker = 0;
>                 return true;
>         }

Yes, that's of course a possible and would workaround the issue too. 
In fact, I initially did that kind of fix for myself (I put it into a 
block with tp->retrans_stamp = 0 though). But then I realized that it is 
not that complicated to make the fix locally into tcp_packet_delayed()
(except the annoyance of passing all the necessary state parameters
through the deep static call-chain but that should pose no big challenge 
for the compiler to handle I guess).

BTW, do you know under what circumstances that tcp_any_retrans_done(sk) 
would return non-zero here (snd_una == high_seq so those rexmit 
would need to be above high_seq)?

Patch

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 72ecfbb..270aa48 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2241,10 +2241,17 @@  static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
 /* Nothing was retransmitted or returned timestamp is less
  * than timestamp of the first retransmission.
  */
-static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
+static inline bool tcp_packet_delayed(const struct tcp_sock *tp,
+				      const u32 prior_snd_una)
 {
-	return !tp->retrans_stamp ||
-	       tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
+	if (!tp->retrans_stamp) {
+		/* Sender will be in a transient state with cleared
+		 * retrans_stamp during false fast retransmit prevention
+		 * mechanism
+		 */
+		return !tcp_false_fast_retrans_possible(tp, prior_snd_una);
+	}
+	return tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
 }
 
 /* Undo procedures. */
@@ -2334,17 +2341,19 @@  static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
 	tp->rack.advanced = 1; /* Force RACK to re-exam losses */
 }
 
-static inline bool tcp_may_undo(const struct tcp_sock *tp)
+static inline bool tcp_may_undo(const struct tcp_sock *tp,
+				const u32 prior_snd_una)
 {
-	return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
+	return tp->undo_marker &&
+	       (!tp->undo_retrans || tcp_packet_delayed(tp, prior_snd_una));
 }
 
 /* People celebrate: "We love our President!" */
-static bool tcp_try_undo_recovery(struct sock *sk)
+static bool tcp_try_undo_recovery(struct sock *sk, const u32 prior_snd_una)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (tcp_may_undo(tp)) {
+	if (tcp_may_undo(tp, prior_snd_una)) {
 		int mib_idx;
 
 		/* Happy end! We did not retransmit anything
@@ -2391,11 +2400,12 @@  static bool tcp_try_undo_dsack(struct sock *sk)
 }
 
 /* Undo during loss recovery after partial ACK or using F-RTO. */
-static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
+static bool tcp_try_undo_loss(struct sock *sk, const u32 prior_snd_una,
+			      bool frto_undo)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (frto_undo || tcp_may_undo(tp)) {
+	if (frto_undo || tcp_may_undo(tp, prior_snd_una)) {
 		tcp_undo_cwnd_reduction(sk, true);
 
 		DBGUNDO(sk, "partial loss");
@@ -2628,13 +2638,13 @@  void tcp_enter_recovery(struct sock *sk, bool ece_ack)
  * recovered or spurious. Otherwise retransmits more on partial ACKs.
  */
 static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
-			     int *rexmit)
+			     int *rexmit, const u32 prior_snd_una)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	bool recovered = !before(tp->snd_una, tp->high_seq);
 
 	if ((flag & FLAG_SND_UNA_ADVANCED) &&
-	    tcp_try_undo_loss(sk, false))
+	    tcp_try_undo_loss(sk, prior_snd_una, false))
 		return;
 
 	if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
@@ -2642,7 +2652,7 @@  static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
 		 * lost, i.e., never-retransmitted data are (s)acked.
 		 */
 		if ((flag & FLAG_ORIG_SACK_ACKED) &&
-		    tcp_try_undo_loss(sk, true))
+		    tcp_try_undo_loss(sk, prior_snd_una, true))
 			return;
 
 		if (after(tp->snd_nxt, tp->high_seq)) {
@@ -2665,7 +2675,7 @@  static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
 
 	if (recovered) {
 		/* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
-		tcp_try_undo_recovery(sk);
+		tcp_try_undo_recovery(sk, prior_snd_una);
 		return;
 	}
 	if (tcp_is_reno(tp)) {
@@ -2685,7 +2695,7 @@  static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (tp->undo_marker && tcp_packet_delayed(tp)) {
+	if (tp->undo_marker && tcp_packet_delayed(tp, prior_snd_una)) {
 		/* Plain luck! Hole if filled with delayed
 		 * packet, rather than with a retransmit. Check reordering.
 		 */
@@ -2788,7 +2798,7 @@  static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
 		case TCP_CA_Recovery:
 			if (tcp_is_reno(tp))
 				tcp_reset_reno_sack(tp);
-			if (tcp_try_undo_recovery(sk))
+			if (tcp_try_undo_recovery(sk, prior_snd_una))
 				return;
 			tcp_end_cwnd_reduction(sk);
 			break;
@@ -2815,7 +2825,7 @@  static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
 		tcp_rack_identify_loss(sk, ack_flag);
 		break;
 	case TCP_CA_Loss:
-		tcp_process_loss(sk, flag, is_dupack, rexmit);
+		tcp_process_loss(sk, flag, is_dupack, rexmit, prior_snd_una);
 		tcp_rack_identify_loss(sk, ack_flag);
 		if (!(icsk->icsk_ca_state == TCP_CA_Open ||
 		      (*ack_flag & FLAG_LOST_RETRANS)))