diff mbox series

[SRU,F/raspi,G/raspi] tcp: do not mess with cloned skbs in tcp_add_backlog()

Message ID 20210129072914.10104-1-juergh@canonical.com
State New
Headers show
Series [SRU,F/raspi,G/raspi] tcp: do not mess with cloned skbs in tcp_add_backlog() | expand

Commit Message

Juerg Haefliger Jan. 29, 2021, 7:29 a.m. UTC
From: Eric Dumazet <edumazet@google.com>

BugLink: https://bugs.launchpad.net/bugs/1861936

Heiner Kallweit reported that some skbs were sent with
the following invalid GSO properties :
- gso_size > 0
- gso_type == 0

This was triggerring a WARN_ON_ONCE() in rtl8169_tso_csum_v2.

Juerg Haefliger was able to reproduce a similar issue using
a lan78xx NIC and a workload mixing TCP incoming traffic
and forwarded packets.

The problem is that tcp_add_backlog() is writing
over gso_segs and gso_size even if the incoming packet will not
be coalesced to the backlog tail packet.

While skb_try_coalesce() would bail out if tail packet is cloned,
this overwriting would lead to corruptions of other packets
cooked by lan78xx, sharing a common super-packet.

The strategy used by lan78xx is to use a big skb, and split
it into all received packets using skb_clone() to avoid copies.
The drawback of this strategy is that all the small skb share a common
struct skb_shared_info.

This patch rewrites TCP gso_size/gso_segs handling to only
happen on the tail skb, since skb_try_coalesce() made sure
it was not cloned.

Fixes: 4f693b55c3d2 ("tcp: implement coalescing on backlog queue")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Bisected-by: Juerg Haefliger <juergh@canonical.com>
Tested-by: Juerg Haefliger <juergh@canonical.com>
Reported-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=209423
Link: https://lore.kernel.org/r/20210119164900.766957-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
(cherry picked from commit b160c28548bc0a87cbd16d5af6d3edcfd70b8c9a)
Signed-off-by: Juerg Haefliger <juergh@canonical.com>
---
 net/ipv4/tcp_ipv4.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

Comments

Stefan Bader Jan. 29, 2021, 9:18 a.m. UTC | #1
On 29.01.21 08:29, Juerg Haefliger wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/1861936
> 
> Heiner Kallweit reported that some skbs were sent with
> the following invalid GSO properties :
> - gso_size > 0
> - gso_type == 0
> 
> This was triggerring a WARN_ON_ONCE() in rtl8169_tso_csum_v2.
> 
> Juerg Haefliger was able to reproduce a similar issue using
> a lan78xx NIC and a workload mixing TCP incoming traffic
> and forwarded packets.
> 
> The problem is that tcp_add_backlog() is writing
> over gso_segs and gso_size even if the incoming packet will not
> be coalesced to the backlog tail packet.
> 
> While skb_try_coalesce() would bail out if tail packet is cloned,
> this overwriting would lead to corruptions of other packets
> cooked by lan78xx, sharing a common super-packet.
> 
> The strategy used by lan78xx is to use a big skb, and split
> it into all received packets using skb_clone() to avoid copies.
> The drawback of this strategy is that all the small skb share a common
> struct skb_shared_info.
> 
> This patch rewrites TCP gso_size/gso_segs handling to only
> happen on the tail skb, since skb_try_coalesce() made sure
> it was not cloned.
> 
> Fixes: 4f693b55c3d2 ("tcp: implement coalescing on backlog queue")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Bisected-by: Juerg Haefliger <juergh@canonical.com>
> Tested-by: Juerg Haefliger <juergh@canonical.com>
> Reported-by: Heiner Kallweit <hkallweit1@gmail.com>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=209423
> Link: https://lore.kernel.org/r/20210119164900.766957-1-eric.dumazet@gmail.com
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> (cherry picked from commit b160c28548bc0a87cbd16d5af6d3edcfd70b8c9a)
> Signed-off-by: Juerg Haefliger <juergh@canonical.com>
Acked-by: Stefan Bader <stefan.bader@canonical.com>
> ---
>  net/ipv4/tcp_ipv4.c | 25 +++++++++++++------------
>  1 file changed, 13 insertions(+), 12 deletions(-)
> 
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index ab4576ac1fe6..f5976b4011b6 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -1725,6 +1725,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
>  bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
>  {
>  	u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf);
> +	u32 tail_gso_size, tail_gso_segs;
>  	struct skb_shared_info *shinfo;
>  	const struct tcphdr *th;
>  	struct tcphdr *thtail;
> @@ -1732,6 +1733,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
>  	unsigned int hdrlen;
>  	bool fragstolen;
>  	u32 gso_segs;
> +	u32 gso_size;
>  	int delta;
>  
>  	/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
> @@ -1757,13 +1759,6 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
>  	 */
>  	th = (const struct tcphdr *)skb->data;
>  	hdrlen = th->doff * 4;
> -	shinfo = skb_shinfo(skb);
> -
> -	if (!shinfo->gso_size)
> -		shinfo->gso_size = skb->len - hdrlen;
> -
> -	if (!shinfo->gso_segs)
> -		shinfo->gso_segs = 1;
>  
>  	tail = sk->sk_backlog.tail;
>  	if (!tail)
> @@ -1786,6 +1781,15 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
>  		goto no_coalesce;
>  
>  	__skb_pull(skb, hdrlen);
> +
> +	shinfo = skb_shinfo(skb);
> +	gso_size = shinfo->gso_size ?: skb->len;
> +	gso_segs = shinfo->gso_segs ?: 1;
> +
> +	shinfo = skb_shinfo(tail);
> +	tail_gso_size = shinfo->gso_size ?: (tail->len - hdrlen);
> +	tail_gso_segs = shinfo->gso_segs ?: 1;
> +
>  	if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
>  		TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
>  
> @@ -1812,11 +1816,8 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
>  		}
>  
>  		/* Not as strict as GRO. We only need to carry mss max value */
> -		skb_shinfo(tail)->gso_size = max(shinfo->gso_size,
> -						 skb_shinfo(tail)->gso_size);
> -
> -		gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs;
> -		skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
> +		shinfo->gso_size = max(gso_size, tail_gso_size);
> +		shinfo->gso_segs = min_t(u32, gso_segs + tail_gso_segs, 0xFFFF);
>  
>  		sk->sk_backlog.len += delta;
>  		__NET_INC_STATS(sock_net(sk),
>
Kleber Sacilotto de Souza Jan. 29, 2021, 10:40 a.m. UTC | #2
On 29.01.21 08:29, Juerg Haefliger wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/1861936
> 
> Heiner Kallweit reported that some skbs were sent with
> the following invalid GSO properties :
> - gso_size > 0
> - gso_type == 0
> 
> This was triggerring a WARN_ON_ONCE() in rtl8169_tso_csum_v2.
> 
> Juerg Haefliger was able to reproduce a similar issue using
> a lan78xx NIC and a workload mixing TCP incoming traffic
> and forwarded packets.
> 
> The problem is that tcp_add_backlog() is writing
> over gso_segs and gso_size even if the incoming packet will not
> be coalesced to the backlog tail packet.
> 
> While skb_try_coalesce() would bail out if tail packet is cloned,
> this overwriting would lead to corruptions of other packets
> cooked by lan78xx, sharing a common super-packet.
> 
> The strategy used by lan78xx is to use a big skb, and split
> it into all received packets using skb_clone() to avoid copies.
> The drawback of this strategy is that all the small skb share a common
> struct skb_shared_info.
> 
> This patch rewrites TCP gso_size/gso_segs handling to only
> happen on the tail skb, since skb_try_coalesce() made sure
> it was not cloned.
> 
> Fixes: 4f693b55c3d2 ("tcp: implement coalescing on backlog queue")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Bisected-by: Juerg Haefliger <juergh@canonical.com>
> Tested-by: Juerg Haefliger <juergh@canonical.com>
> Reported-by: Heiner Kallweit <hkallweit1@gmail.com>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=209423
> Link: https://lore.kernel.org/r/20210119164900.766957-1-eric.dumazet@gmail.com
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> (cherry picked from commit b160c28548bc0a87cbd16d5af6d3edcfd70b8c9a)
> Signed-off-by: Juerg Haefliger <juergh@canonical.com>

Could this affect the generic kernels as well?

Acked-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>

> ---
>   net/ipv4/tcp_ipv4.c | 25 +++++++++++++------------
>   1 file changed, 13 insertions(+), 12 deletions(-)
> 
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index ab4576ac1fe6..f5976b4011b6 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -1725,6 +1725,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
>   bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
>   {
>   	u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf);
> +	u32 tail_gso_size, tail_gso_segs;
>   	struct skb_shared_info *shinfo;
>   	const struct tcphdr *th;
>   	struct tcphdr *thtail;
> @@ -1732,6 +1733,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
>   	unsigned int hdrlen;
>   	bool fragstolen;
>   	u32 gso_segs;
> +	u32 gso_size;
>   	int delta;
>   
>   	/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
> @@ -1757,13 +1759,6 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
>   	 */
>   	th = (const struct tcphdr *)skb->data;
>   	hdrlen = th->doff * 4;
> -	shinfo = skb_shinfo(skb);
> -
> -	if (!shinfo->gso_size)
> -		shinfo->gso_size = skb->len - hdrlen;
> -
> -	if (!shinfo->gso_segs)
> -		shinfo->gso_segs = 1;
>   
>   	tail = sk->sk_backlog.tail;
>   	if (!tail)
> @@ -1786,6 +1781,15 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
>   		goto no_coalesce;
>   
>   	__skb_pull(skb, hdrlen);
> +
> +	shinfo = skb_shinfo(skb);
> +	gso_size = shinfo->gso_size ?: skb->len;
> +	gso_segs = shinfo->gso_segs ?: 1;
> +
> +	shinfo = skb_shinfo(tail);
> +	tail_gso_size = shinfo->gso_size ?: (tail->len - hdrlen);
> +	tail_gso_segs = shinfo->gso_segs ?: 1;
> +
>   	if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
>   		TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
>   
> @@ -1812,11 +1816,8 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
>   		}
>   
>   		/* Not as strict as GRO. We only need to carry mss max value */
> -		skb_shinfo(tail)->gso_size = max(shinfo->gso_size,
> -						 skb_shinfo(tail)->gso_size);
> -
> -		gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs;
> -		skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
> +		shinfo->gso_size = max(gso_size, tail_gso_size);
> +		shinfo->gso_segs = min_t(u32, gso_segs + tail_gso_segs, 0xFFFF);
>   
>   		sk->sk_backlog.len += delta;
>   		__NET_INC_STATS(sock_net(sk),
>
William Breathitt Gray Feb. 9, 2021, 5:48 a.m. UTC | #3
On Fri, Jan 29, 2021 at 08:29:14AM +0100, Juerg Haefliger wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/1861936
> 
> Heiner Kallweit reported that some skbs were sent with
> the following invalid GSO properties :
> - gso_size > 0
> - gso_type == 0
> 
> This was triggerring a WARN_ON_ONCE() in rtl8169_tso_csum_v2.
> 
> Juerg Haefliger was able to reproduce a similar issue using
> a lan78xx NIC and a workload mixing TCP incoming traffic
> and forwarded packets.
> 
> The problem is that tcp_add_backlog() is writing
> over gso_segs and gso_size even if the incoming packet will not
> be coalesced to the backlog tail packet.
> 
> While skb_try_coalesce() would bail out if tail packet is cloned,
> this overwriting would lead to corruptions of other packets
> cooked by lan78xx, sharing a common super-packet.
> 
> The strategy used by lan78xx is to use a big skb, and split
> it into all received packets using skb_clone() to avoid copies.
> The drawback of this strategy is that all the small skb share a common
> struct skb_shared_info.
> 
> This patch rewrites TCP gso_size/gso_segs handling to only
> happen on the tail skb, since skb_try_coalesce() made sure
> it was not cloned.
> 
> Fixes: 4f693b55c3d2 ("tcp: implement coalescing on backlog queue")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Bisected-by: Juerg Haefliger <juergh@canonical.com>
> Tested-by: Juerg Haefliger <juergh@canonical.com>
> Reported-by: Heiner Kallweit <hkallweit1@gmail.com>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=209423
> Link: https://lore.kernel.org/r/20210119164900.766957-1-eric.dumazet@gmail.com
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> (cherry picked from commit b160c28548bc0a87cbd16d5af6d3edcfd70b8c9a)
> Signed-off-by: Juerg Haefliger <juergh@canonical.com>
> ---
>  net/ipv4/tcp_ipv4.c | 25 +++++++++++++------------
>  1 file changed, 13 insertions(+), 12 deletions(-)
> 
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index ab4576ac1fe6..f5976b4011b6 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -1725,6 +1725,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
>  bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
>  {
>  	u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf);
> +	u32 tail_gso_size, tail_gso_segs;
>  	struct skb_shared_info *shinfo;
>  	const struct tcphdr *th;
>  	struct tcphdr *thtail;
> @@ -1732,6 +1733,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
>  	unsigned int hdrlen;
>  	bool fragstolen;
>  	u32 gso_segs;
> +	u32 gso_size;
>  	int delta;
>  
>  	/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
> @@ -1757,13 +1759,6 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
>  	 */
>  	th = (const struct tcphdr *)skb->data;
>  	hdrlen = th->doff * 4;
> -	shinfo = skb_shinfo(skb);
> -
> -	if (!shinfo->gso_size)
> -		shinfo->gso_size = skb->len - hdrlen;
> -
> -	if (!shinfo->gso_segs)
> -		shinfo->gso_segs = 1;
>  
>  	tail = sk->sk_backlog.tail;
>  	if (!tail)
> @@ -1786,6 +1781,15 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
>  		goto no_coalesce;
>  
>  	__skb_pull(skb, hdrlen);
> +
> +	shinfo = skb_shinfo(skb);
> +	gso_size = shinfo->gso_size ?: skb->len;
> +	gso_segs = shinfo->gso_segs ?: 1;
> +
> +	shinfo = skb_shinfo(tail);
> +	tail_gso_size = shinfo->gso_size ?: (tail->len - hdrlen);
> +	tail_gso_segs = shinfo->gso_segs ?: 1;
> +
>  	if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
>  		TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
>  
> @@ -1812,11 +1816,8 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
>  		}
>  
>  		/* Not as strict as GRO. We only need to carry mss max value */
> -		skb_shinfo(tail)->gso_size = max(shinfo->gso_size,
> -						 skb_shinfo(tail)->gso_size);
> -
> -		gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs;
> -		skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
> +		shinfo->gso_size = max(gso_size, tail_gso_size);
> +		shinfo->gso_segs = min_t(u32, gso_segs + tail_gso_segs, 0xFFFF);
>  
>  		sk->sk_backlog.len += delta;
>  		__NET_INC_STATS(sock_net(sk),
> -- 
> 2.27.0
> 
> 
> -- 
> kernel-team mailing list
> kernel-team@lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team

Applied to focal,groovy:linux-raspi/master-next.

William Breathitt Gray
diff mbox series

Patch

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ab4576ac1fe6..f5976b4011b6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1725,6 +1725,7 @@  int tcp_v4_early_demux(struct sk_buff *skb)
 bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf);
+	u32 tail_gso_size, tail_gso_segs;
 	struct skb_shared_info *shinfo;
 	const struct tcphdr *th;
 	struct tcphdr *thtail;
@@ -1732,6 +1733,7 @@  bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 	unsigned int hdrlen;
 	bool fragstolen;
 	u32 gso_segs;
+	u32 gso_size;
 	int delta;
 
 	/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
@@ -1757,13 +1759,6 @@  bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 	 */
 	th = (const struct tcphdr *)skb->data;
 	hdrlen = th->doff * 4;
-	shinfo = skb_shinfo(skb);
-
-	if (!shinfo->gso_size)
-		shinfo->gso_size = skb->len - hdrlen;
-
-	if (!shinfo->gso_segs)
-		shinfo->gso_segs = 1;
 
 	tail = sk->sk_backlog.tail;
 	if (!tail)
@@ -1786,6 +1781,15 @@  bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 		goto no_coalesce;
 
 	__skb_pull(skb, hdrlen);
+
+	shinfo = skb_shinfo(skb);
+	gso_size = shinfo->gso_size ?: skb->len;
+	gso_segs = shinfo->gso_segs ?: 1;
+
+	shinfo = skb_shinfo(tail);
+	tail_gso_size = shinfo->gso_size ?: (tail->len - hdrlen);
+	tail_gso_segs = shinfo->gso_segs ?: 1;
+
 	if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
 		TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
 
@@ -1812,11 +1816,8 @@  bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 		}
 
 		/* Not as strict as GRO. We only need to carry mss max value */
-		skb_shinfo(tail)->gso_size = max(shinfo->gso_size,
-						 skb_shinfo(tail)->gso_size);
-
-		gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs;
-		skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
+		shinfo->gso_size = max(gso_size, tail_gso_size);
+		shinfo->gso_segs = min_t(u32, gso_segs + tail_gso_segs, 0xFFFF);
 
 		sk->sk_backlog.len += delta;
 		__NET_INC_STATS(sock_net(sk),