diff mbox

[ovs-dev,1/1] datapath: Ensure correct L4 checksum with NAT helpers.

Message ID 1483725730-7827-1-git-send-email-john.hurley@netronome.com
State Accepted
Headers show

Commit Message

John Hurley Jan. 6, 2017, 6:02 p.m. UTC
Setting the CHECKSUM_PARTIAL flag before sending to helper mods
can mean that the kernel code will not modify the first part of
the L4 checksum correctly after changing packet IPs/ports/payload
in kernels <4.6. This can mean that the L4 checksum is incorrect
when the packet egresses the system.

Giving the packet a temp skb_dst with RTCF_LOCAL flag not set
ensures the 'csum_*_magic' functions are hit in the kernel (if
required) and the modified packet will get the correct checksum
when fully processed.

This has tested with FTP NAT helpers on kernel version 3.13.

Signed-off-by: John Hurley <john.hurley@netronome.com>
---
 datapath/conntrack.c | 47 +++++++++++++++++------------------------------
 1 file changed, 17 insertions(+), 30 deletions(-)

Comments

Jarno Rajahalme Jan. 7, 2017, 2:07 a.m. UTC | #1
Acked-by: Jarno Rajahalme <jarno@ovn.org>

Pushed to master and branch-2.6 with slightly edited commit message and one indentation fix.

  Jarno

> On Jan 6, 2017, at 10:02 AM, John Hurley <john.hurley@netronome.com> wrote:
> 
> Setting the CHECKSUM_PARTIAL flag before sending to helper mods
> can mean that the kernel code will not modify the first part of
> the L4 checksum correctly after changing packet IPs/ports/payload
> in kernels <4.6. This can mean that the L4 checksum is incorrect
> when the packet egresses the system.
> 
> Giving the packet a temp skb_dst with RTCF_LOCAL flag not set
> ensures the 'csum_*_magic' functions are hit in the kernel (if
> required) and the modified packet will get the correct checksum
> when fully processed.
> 
> This has tested with FTP NAT helpers on kernel version 3.13.
> 
> Signed-off-by: John Hurley <john.hurley@netronome.com>
> ---
> datapath/conntrack.c | 47 +++++++++++++++++------------------------------
> 1 file changed, 17 insertions(+), 30 deletions(-)
> 
> diff --git a/datapath/conntrack.c b/datapath/conntrack.c
> index d942884..6e0bfed 100644
> --- a/datapath/conntrack.c
> +++ b/datapath/conntrack.c
> @@ -314,6 +314,11 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
> 	u8 nexthdr;
> 	int err;
> 
> +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
> +	bool dst_set = false;
> +	struct rtable rt = { .rt_flags = 0 };
> +#endif
> +
> 	ct = nf_ct_get(skb, &ctinfo);
> 	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
> 		return NF_ACCEPT;
> @@ -352,43 +357,25 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
> #if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
> 	/* Linux 4.5 and older depend on skb_dst being set when recalculating
> 	 * checksums after NAT helper has mangled TCP or UDP packet payload.
> -	 * This dependency is avoided when skb is CHECKSUM_PARTIAL or when UDP
> -	 * has no checksum.
> -	 *
> -	 * The dependency is not triggered when the main NAT code updates
> -	 * checksums after translating the IP header (address, port), so this
> -	 * fix only needs to be executed on packets that are both being NATted
> -	 * and that have a helper assigned.
> +	 * skb_dst is cast to a rtable struct and the flags examined.
> +	 * Forcing these flags to have RTCF_LOCAL not set ensures checksum mod
> +	 * is carried out in the same way as kernel versions > 4.5
> 	 */
> -	if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL) {
> -		u8 ipproto = (proto == NFPROTO_IPV4)
> -			? ip_hdr(skb)->protocol : nexthdr;
> -		u16 offset = 0;
> -
> -		switch (ipproto) {
> -		case IPPROTO_TCP:
> -			offset = offsetof(struct tcphdr, check);
> -			break;
> -		case IPPROTO_UDP:
> -			/* Skip if no csum. */
> -			if (udp_hdr(skb)->check)
> -				offset = offsetof(struct udphdr, check);
> -			break;
> -		}
> -		if (offset) {
> -			if (unlikely(!pskb_may_pull(skb, protoff + offset + 2)))
> -				return NF_DROP;
> -
> -			skb->csum_start = skb_headroom(skb) + protoff;
> -			skb->csum_offset = offset;
> -			skb->ip_summed = CHECKSUM_PARTIAL;
> -		}
> +	if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL
> +		&& !skb_dst(skb)) {
> +		dst_set = true;
> +		skb_dst_set(skb, &rt.dst);
> 	}
> #endif
> 	err = helper->help(skb, protoff, ct, ctinfo);
> 	if (err != NF_ACCEPT)
> 		return err;
> 
> +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
> +	if (dst_set)
> +		skb_dst_set(skb, NULL);
> +#endif
> +
> 	/* Adjust seqs after helper.  This is needed due to some helpers (e.g.,
> 	 * FTP with NAT) adusting the TCP payload size when mangling IP
> 	 * addresses and/or port numbers in the text-based control connection.
> -- 
> 2.7.4
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
diff mbox

Patch

diff --git a/datapath/conntrack.c b/datapath/conntrack.c
index d942884..6e0bfed 100644
--- a/datapath/conntrack.c
+++ b/datapath/conntrack.c
@@ -314,6 +314,11 @@  static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
 	u8 nexthdr;
 	int err;
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
+	bool dst_set = false;
+	struct rtable rt = { .rt_flags = 0 };
+#endif
+
 	ct = nf_ct_get(skb, &ctinfo);
 	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
 		return NF_ACCEPT;
@@ -352,43 +357,25 @@  static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
 	/* Linux 4.5 and older depend on skb_dst being set when recalculating
 	 * checksums after NAT helper has mangled TCP or UDP packet payload.
-	 * This dependency is avoided when skb is CHECKSUM_PARTIAL or when UDP
-	 * has no checksum.
-	 *
-	 * The dependency is not triggered when the main NAT code updates
-	 * checksums after translating the IP header (address, port), so this
-	 * fix only needs to be executed on packets that are both being NATted
-	 * and that have a helper assigned.
+	 * skb_dst is cast to a rtable struct and the flags examined.
+	 * Forcing these flags to have RTCF_LOCAL not set ensures checksum mod
+	 * is carried out in the same way as kernel versions > 4.5
 	 */
-	if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL) {
-		u8 ipproto = (proto == NFPROTO_IPV4)
-			? ip_hdr(skb)->protocol : nexthdr;
-		u16 offset = 0;
-
-		switch (ipproto) {
-		case IPPROTO_TCP:
-			offset = offsetof(struct tcphdr, check);
-			break;
-		case IPPROTO_UDP:
-			/* Skip if no csum. */
-			if (udp_hdr(skb)->check)
-				offset = offsetof(struct udphdr, check);
-			break;
-		}
-		if (offset) {
-			if (unlikely(!pskb_may_pull(skb, protoff + offset + 2)))
-				return NF_DROP;
-
-			skb->csum_start = skb_headroom(skb) + protoff;
-			skb->csum_offset = offset;
-			skb->ip_summed = CHECKSUM_PARTIAL;
-		}
+	if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL
+		&& !skb_dst(skb)) {
+		dst_set = true;
+		skb_dst_set(skb, &rt.dst);
 	}
 #endif
 	err = helper->help(skb, protoff, ct, ctinfo);
 	if (err != NF_ACCEPT)
 		return err;
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
+	if (dst_set)
+		skb_dst_set(skb, NULL);
+#endif
+
 	/* Adjust seqs after helper.  This is needed due to some helpers (e.g.,
 	 * FTP with NAT) adusting the TCP payload size when mangling IP
 	 * addresses and/or port numbers in the text-based control connection.