diff mbox

[net-next] tcp-tso: do not split TSO packets at retransmit time

Message ID 1461012972-15757-1-git-send-email-edumazet@google.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet April 18, 2016, 8:56 p.m. UTC
Linux TCP stack painfully segments all TSO/GSO packets before retransmits.

This was fine back in the days when TSO/GSO were emerging, with their
bugs, but we believe the dark age is over.

Keeping big packets in write queues, but also in stack traversal
has a lot of benefits.
 - Less memory overhead, because write queues have less skbs
 - Less cpu overhead at ACK processing.
 - Better SACK processing, as lot of studies mentioned how
   awful linux was at this ;)
 - Less cpu overhead to send the rtx packets
   (IP stack traversal, netfilter traversal, qdisc, drivers...)
 - Better latencies in presence of losses.
 - Smaller spikes in fq like packet schedulers, as retransmits
   are not constrained by TCP Small Queues.

1 % packet losses are common today, and at 100Gbit speeds, this
translates to ~80,000 losses per second. If we are unlucky and
first MSS of a 45-MSS TSO is lost, we are cooking 44 MSS segments
at rtx instead of a single 44-MSS TSO packet.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/tcp.h     |  4 ++--
 net/ipv4/tcp_input.c  |  2 +-
 net/ipv4/tcp_output.c | 64 +++++++++++++++++++++++----------------------------
 net/ipv4/tcp_timer.c  |  4 ++--
 4 files changed, 34 insertions(+), 40 deletions(-)

Comments

Yuchung Cheng April 18, 2016, 9:12 p.m. UTC | #1
On Mon, Apr 18, 2016 at 1:56 PM, Eric Dumazet <edumazet@google.com> wrote:
> Linux TCP stack painfully segments all TSO/GSO packets before retransmits.
>
> This was fine back in the days when TSO/GSO were emerging, with their
> bugs, but we believe the dark age is over.
>
> Keeping big packets in write queues, but also in stack traversal
> has a lot of benefits.
>  - Less memory overhead, because write queues have less skbs
>  - Less cpu overhead at ACK processing.
>  - Better SACK processing, as lot of studies mentioned how
>    awful linux was at this ;)
>  - Less cpu overhead to send the rtx packets
>    (IP stack traversal, netfilter traversal, qdisc, drivers...)
>  - Better latencies in presence of losses.
>  - Smaller spikes in fq like packet schedulers, as retransmits
>    are not constrained by TCP Small Queues.
>
> 1 % packet losses are common today, and at 100Gbit speeds, this
> translates to ~80,000 losses per second. If we are unlucky and
> first MSS of a 45-MSS TSO is lost, we are cooking 44 MSS segments
> at rtx instead of a single 44-MSS TSO packet.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
> ---
>  include/net/tcp.h     |  4 ++--
>  net/ipv4/tcp_input.c  |  2 +-
>  net/ipv4/tcp_output.c | 64 +++++++++++++++++++++++----------------------------
>  net/ipv4/tcp_timer.c  |  4 ++--
>  4 files changed, 34 insertions(+), 40 deletions(-)
>
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index fd40f8c64d5f..0dc272dcd772 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -538,8 +538,8 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
>  void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
>                                int nonagle);
>  bool tcp_may_send_now(struct sock *sk);
> -int __tcp_retransmit_skb(struct sock *, struct sk_buff *);
> -int tcp_retransmit_skb(struct sock *, struct sk_buff *);
> +int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
> +int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
>  void tcp_retransmit_timer(struct sock *sk);
>  void tcp_xmit_retransmit_queue(struct sock *);
>  void tcp_simple_retransmit(struct sock *);
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 90e0d9256b74..729e489b5608 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -5543,7 +5543,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
>         if (data) { /* Retransmit unacked data in SYN */
>                 tcp_for_write_queue_from(data, sk) {
>                         if (data == tcp_send_head(sk) ||
> -                           __tcp_retransmit_skb(sk, data))
> +                           __tcp_retransmit_skb(sk, data, 1))
>                                 break;
>                 }
>                 tcp_rearm_rto(sk);
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 6451b83d81e9..4876b256a70a 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -2266,7 +2266,7 @@ void tcp_send_loss_probe(struct sock *sk)
>         if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
>                 goto rearm_timer;
>
> -       if (__tcp_retransmit_skb(sk, skb))
> +       if (__tcp_retransmit_skb(sk, skb, 1))
>                 goto rearm_timer;
>
>         /* Record snd_nxt for loss detection. */
> @@ -2551,17 +2551,17 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
>   * state updates are done by the caller.  Returns non-zero if an
>   * error occurred which prevented the send.
>   */
> -int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
> +int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
>  {
> -       struct tcp_sock *tp = tcp_sk(sk);
>         struct inet_connection_sock *icsk = inet_csk(sk);
> +       struct tcp_sock *tp = tcp_sk(sk);
>         unsigned int cur_mss;
> -       int err;
> +       int diff, len, err;
> +
>
> -       /* Inconslusive MTU probe */
> -       if (icsk->icsk_mtup.probe_size) {
> +       /* Inconclusive MTU probe */
> +       if (icsk->icsk_mtup.probe_size)
>                 icsk->icsk_mtup.probe_size = 0;
> -       }
>
>         /* Do not sent more than we queued. 1/4 is reserved for possible
>          * copying overhead: fragmentation, tunneling, mangling etc.
> @@ -2594,30 +2594,27 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
>             TCP_SKB_CB(skb)->seq != tp->snd_una)
>                 return -EAGAIN;
>
> -       if (skb->len > cur_mss) {
> -               if (tcp_fragment(sk, skb, cur_mss, cur_mss, GFP_ATOMIC))
> +       len = cur_mss * segs;
> +       if (skb->len > len) {
> +               if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC))
>                         return -ENOMEM; /* We'll try again later. */
>         } else {
> -               int oldpcount = tcp_skb_pcount(skb);
> +               if (skb_unclone(skb, GFP_ATOMIC))
> +                       return -ENOMEM;
>
> -               if (unlikely(oldpcount > 1)) {
> -                       if (skb_unclone(skb, GFP_ATOMIC))
> -                               return -ENOMEM;
> -                       tcp_init_tso_segs(skb, cur_mss);
> -                       tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
> -               }
> +               diff = tcp_skb_pcount(skb);
> +               tcp_set_skb_tso_segs(skb, cur_mss);
> +               diff -= tcp_skb_pcount(skb);
> +               if (diff)
> +                       tcp_adjust_pcount(sk, skb, diff);
> +               if (skb->len < cur_mss)
> +                       tcp_retrans_try_collapse(sk, skb, cur_mss);
>         }
>
>         /* RFC3168, section 6.1.1.1. ECN fallback */
>         if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN)
>                 tcp_ecn_clear_syn(sk, skb);
>
> -       tcp_retrans_try_collapse(sk, skb, cur_mss);
> -
> -       /* Make a copy, if the first transmission SKB clone we made
> -        * is still in somebody's hands, else make a clone.
> -        */
> -
>         /* make sure skb->data is aligned on arches that require it
>          * and check if ack-trimming & collapsing extended the headroom
>          * beyond what csum_start can cover.
> @@ -2633,20 +2630,22 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
>         }
>
>         if (likely(!err)) {
> +               segs = tcp_skb_pcount(skb);
> +
>                 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
>                 /* Update global TCP statistics. */
> -               TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
> +               TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs);
>                 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
>                         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
> -               tp->total_retrans++;
> +               tp->total_retrans += segs;
>         }
>         return err;
>  }
>
> -int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
> +int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
>  {
>         struct tcp_sock *tp = tcp_sk(sk);
> -       int err = __tcp_retransmit_skb(sk, skb);
> +       int err = __tcp_retransmit_skb(sk, skb, segs);
>
>         if (err == 0) {
>  #if FASTRETRANS_DEBUG > 0
> @@ -2737,6 +2736,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
>
>         tcp_for_write_queue_from(skb, sk) {
>                 __u8 sacked = TCP_SKB_CB(skb)->sacked;
> +               int segs;
>
>                 if (skb == tcp_send_head(sk))
>                         break;
> @@ -2744,14 +2744,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
>                 if (!hole)
>                         tp->retransmit_skb_hint = skb;
>
> -               /* Assume this retransmit will generate
> -                * only one packet for congestion window
> -                * calculation purposes.  This works because
> -                * tcp_retransmit_skb() will chop up the
> -                * packet to be MSS sized and all the
> -                * packet counting works out.
> -                */
> -               if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
> +               segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
> +               if (segs <= 0)
>                         return;
>
>                 if (fwd_rexmitting) {
> @@ -2788,7 +2782,7 @@ begin_fwd:
>                 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
>                         continue;
>
> -               if (tcp_retransmit_skb(sk, skb))
> +               if (tcp_retransmit_skb(sk, skb, segs))
>                         return;
>
>                 NET_INC_STATS_BH(sock_net(sk), mib_idx);
> diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
> index 49bc474f8e35..373b03e78aaa 100644
> --- a/net/ipv4/tcp_timer.c
> +++ b/net/ipv4/tcp_timer.c
> @@ -404,7 +404,7 @@ void tcp_retransmit_timer(struct sock *sk)
>                         goto out;
>                 }
>                 tcp_enter_loss(sk);
> -               tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
> +               tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1);
>                 __sk_dst_reset(sk);
>                 goto out_reset_timer;
>         }
> @@ -436,7 +436,7 @@ void tcp_retransmit_timer(struct sock *sk)
>
>         tcp_enter_loss(sk);
>
> -       if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
> +       if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1) > 0) {
>                 /* Retransmission failed because of local congestion,
>                  * do not backoff.
>                  */
> --
> 2.8.0.rc3.226.g39d4020
>
David Miller April 20, 2016, 12:36 a.m. UTC | #2
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 18 Apr 2016 13:56:12 -0700

> 1 % packet losses are common today, and at 100Gbit speeds, this
> translates to ~80,000 losses per second. If we are unlucky and
> first MSS of a 45-MSS TSO is lost, we are cooking 44 MSS segments
> at rtx instead of a single 44-MSS TSO packet.

I'm having trouble understanding this.

If the first mss is lost, then we simply chop the 45 MSS TSO skb into
two pieces.  The first piece is a 1 MSS chunk for the retransmit, and
the second piece is remaining 44 MSS TSO skb.

I am pretty sure that is what the current stack does, and regardless
it is certainly what I intended it to do all those years ago when I
wrote this code. :-)

The only case where I can see this patch helping is when we have to
retransmit multi-mss chunks.  And yes indeed, it might be a useful
optimization to TSO those frames rather than sending them one MSS at a
time.
Eric Dumazet April 20, 2016, 12:49 a.m. UTC | #3
On Tue, 2016-04-19 at 20:36 -0400, David Miller wrote:
> From: Eric Dumazet <edumazet@google.com>
> Date: Mon, 18 Apr 2016 13:56:12 -0700
> 
> > 1 % packet losses are common today, and at 100Gbit speeds, this
> > translates to ~80,000 losses per second. If we are unlucky and
> > first MSS of a 45-MSS TSO is lost, we are cooking 44 MSS segments
> > at rtx instead of a single 44-MSS TSO packet.
> 
> I'm having trouble understanding this.
> 
> If the first mss is lost, then we simply chop the 45 MSS TSO skb into
> two pieces.  The first piece is a 1 MSS chunk for the retransmit, and
> the second piece is remaining 44 MSS TSO skb.
> 
> I am pretty sure that is what the current stack does, and regardless
> it is certainly what I intended it to do all those years ago when I
> wrote this code. :-)
> 
> The only case where I can see this patch helping is when we have to
> retransmit multi-mss chunks.  And yes indeed, it might be a useful
> optimization to TSO those frames rather than sending them one MSS at a
> time.

Yeah, it looks like I got the changelog wrong. We definitely see these
1-MSS splits during retransmits all the time, and we had to change the
sch_fq flow_limit from 100 to 1000 packets to cope with that. (TCP Small
Queues does not guard TCP from sending hundred of rtx at the same time)
David Miller April 20, 2016, 1:10 a.m. UTC | #4
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 19 Apr 2016 17:49:50 -0700

> On Tue, 2016-04-19 at 20:36 -0400, David Miller wrote:
>> From: Eric Dumazet <edumazet@google.com>
>> Date: Mon, 18 Apr 2016 13:56:12 -0700
>> 
>> > 1 % packet losses are common today, and at 100Gbit speeds, this
>> > translates to ~80,000 losses per second. If we are unlucky and
>> > first MSS of a 45-MSS TSO is lost, we are cooking 44 MSS segments
>> > at rtx instead of a single 44-MSS TSO packet.
>> 
>> I'm having trouble understanding this.
>> 
>> If the first mss is lost, then we simply chop the 45 MSS TSO skb into
>> two pieces.  The first piece is a 1 MSS chunk for the retransmit, and
>> the second piece is remaining 44 MSS TSO skb.
>> 
>> I am pretty sure that is what the current stack does, and regardless
>> it is certainly what I intended it to do all those years ago when I
>> wrote this code. :-)
>> 
>> The only case where I can see this patch helping is when we have to
>> retransmit multi-mss chunks.  And yes indeed, it might be a useful
>> optimization to TSO those frames rather than sending them one MSS at a
>> time.
> 
> Yeah, it looks like I got the changelog wrong. We definitely see these
> 1-MSS splits during retransmits all the time, and we had to change the
> sch_fq flow_limit from 100 to 1000 packets to cope with that. (TCP Small
> Queues does not guard TCP from sending hundred of rtx at the same time)

Ok, please rewrite the commit log message so that it is more accuate.

Thank you.
diff mbox

Patch

diff --git a/include/net/tcp.h b/include/net/tcp.h
index fd40f8c64d5f..0dc272dcd772 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -538,8 +538,8 @@  __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
 void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
 			       int nonagle);
 bool tcp_may_send_now(struct sock *sk);
-int __tcp_retransmit_skb(struct sock *, struct sk_buff *);
-int tcp_retransmit_skb(struct sock *, struct sk_buff *);
+int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
+int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
 void tcp_retransmit_timer(struct sock *sk);
 void tcp_xmit_retransmit_queue(struct sock *);
 void tcp_simple_retransmit(struct sock *);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 90e0d9256b74..729e489b5608 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5543,7 +5543,7 @@  static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 	if (data) { /* Retransmit unacked data in SYN */
 		tcp_for_write_queue_from(data, sk) {
 			if (data == tcp_send_head(sk) ||
-			    __tcp_retransmit_skb(sk, data))
+			    __tcp_retransmit_skb(sk, data, 1))
 				break;
 		}
 		tcp_rearm_rto(sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6451b83d81e9..4876b256a70a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2266,7 +2266,7 @@  void tcp_send_loss_probe(struct sock *sk)
 	if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
 		goto rearm_timer;
 
-	if (__tcp_retransmit_skb(sk, skb))
+	if (__tcp_retransmit_skb(sk, skb, 1))
 		goto rearm_timer;
 
 	/* Record snd_nxt for loss detection. */
@@ -2551,17 +2551,17 @@  static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
  * state updates are done by the caller.  Returns non-zero if an
  * error occurred which prevented the send.
  */
-int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
+int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned int cur_mss;
-	int err;
+	int diff, len, err;
+
 
-	/* Inconslusive MTU probe */
-	if (icsk->icsk_mtup.probe_size) {
+	/* Inconclusive MTU probe */
+	if (icsk->icsk_mtup.probe_size)
 		icsk->icsk_mtup.probe_size = 0;
-	}
 
 	/* Do not sent more than we queued. 1/4 is reserved for possible
 	 * copying overhead: fragmentation, tunneling, mangling etc.
@@ -2594,30 +2594,27 @@  int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	    TCP_SKB_CB(skb)->seq != tp->snd_una)
 		return -EAGAIN;
 
-	if (skb->len > cur_mss) {
-		if (tcp_fragment(sk, skb, cur_mss, cur_mss, GFP_ATOMIC))
+	len = cur_mss * segs;
+	if (skb->len > len) {
+		if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC))
 			return -ENOMEM; /* We'll try again later. */
 	} else {
-		int oldpcount = tcp_skb_pcount(skb);
+		if (skb_unclone(skb, GFP_ATOMIC))
+			return -ENOMEM;
 
-		if (unlikely(oldpcount > 1)) {
-			if (skb_unclone(skb, GFP_ATOMIC))
-				return -ENOMEM;
-			tcp_init_tso_segs(skb, cur_mss);
-			tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
-		}
+		diff = tcp_skb_pcount(skb);
+		tcp_set_skb_tso_segs(skb, cur_mss);
+		diff -= tcp_skb_pcount(skb);
+		if (diff)
+			tcp_adjust_pcount(sk, skb, diff);
+		if (skb->len < cur_mss)
+			tcp_retrans_try_collapse(sk, skb, cur_mss);
 	}
 
 	/* RFC3168, section 6.1.1.1. ECN fallback */
 	if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN)
 		tcp_ecn_clear_syn(sk, skb);
 
-	tcp_retrans_try_collapse(sk, skb, cur_mss);
-
-	/* Make a copy, if the first transmission SKB clone we made
-	 * is still in somebody's hands, else make a clone.
-	 */
-
 	/* make sure skb->data is aligned on arches that require it
 	 * and check if ack-trimming & collapsing extended the headroom
 	 * beyond what csum_start can cover.
@@ -2633,20 +2630,22 @@  int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	}
 
 	if (likely(!err)) {
+		segs = tcp_skb_pcount(skb);
+
 		TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
 		/* Update global TCP statistics. */
-		TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
+		TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs);
 		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
 			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
-		tp->total_retrans++;
+		tp->total_retrans += segs;
 	}
 	return err;
 }
 
-int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
+int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int err = __tcp_retransmit_skb(sk, skb);
+	int err = __tcp_retransmit_skb(sk, skb, segs);
 
 	if (err == 0) {
 #if FASTRETRANS_DEBUG > 0
@@ -2737,6 +2736,7 @@  void tcp_xmit_retransmit_queue(struct sock *sk)
 
 	tcp_for_write_queue_from(skb, sk) {
 		__u8 sacked = TCP_SKB_CB(skb)->sacked;
+		int segs;
 
 		if (skb == tcp_send_head(sk))
 			break;
@@ -2744,14 +2744,8 @@  void tcp_xmit_retransmit_queue(struct sock *sk)
 		if (!hole)
 			tp->retransmit_skb_hint = skb;
 
-		/* Assume this retransmit will generate
-		 * only one packet for congestion window
-		 * calculation purposes.  This works because
-		 * tcp_retransmit_skb() will chop up the
-		 * packet to be MSS sized and all the
-		 * packet counting works out.
-		 */
-		if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
+		segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
+		if (segs <= 0)
 			return;
 
 		if (fwd_rexmitting) {
@@ -2788,7 +2782,7 @@  begin_fwd:
 		if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
 			continue;
 
-		if (tcp_retransmit_skb(sk, skb))
+		if (tcp_retransmit_skb(sk, skb, segs))
 			return;
 
 		NET_INC_STATS_BH(sock_net(sk), mib_idx);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 49bc474f8e35..373b03e78aaa 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -404,7 +404,7 @@  void tcp_retransmit_timer(struct sock *sk)
 			goto out;
 		}
 		tcp_enter_loss(sk);
-		tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
+		tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1);
 		__sk_dst_reset(sk);
 		goto out_reset_timer;
 	}
@@ -436,7 +436,7 @@  void tcp_retransmit_timer(struct sock *sk)
 
 	tcp_enter_loss(sk);
 
-	if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
+	if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1) > 0) {
 		/* Retransmission failed because of local congestion,
 		 * do not backoff.
 		 */