diff mbox

[ovs-dev] Conntrack: Fix L4 Checksums in kernel <4.6 when using NAT and helpers

Message ID CAK+XE=nhdKYjOOxSHWgzA318R2MRdQoVs2VUvOSQLx-+A3WmoA@mail.gmail.com
State Changes Requested
Headers show

Commit Message

John Hurley Jan. 4, 2017, 5:50 p.m. UTC
From 64ce83672aab5634990426e0a51af16d882ac2f9 Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Wed, 4 Jan 2017 16:52:43 +0000
Subject: [PATCH] ensure correct checksum when a packet is sent to NAT helper
 in kernel < 4.6

---
 datapath/conntrack.c | 45 ++++++++++++++++-----------------------------
 1 file changed, 16 insertions(+), 29 deletions(-)

  return NF_ACCEPT;
@@ -352,43 +356,26 @@ static int ovs_ct_helper(struct sk_buff *skb, u16
proto)
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
  /* Linux 4.5 and older depend on skb_dst being set when recalculating
  * checksums after NAT helper has mangled TCP or UDP packet payload.
- * This dependency is avoided when skb is CHECKSUM_PARTIAL or when UDP
- * has no checksum.
- *
- * The dependency is not triggered when the main NAT code updates
- * checksums after translating the IP header (address, port), so this
- * fix only needs to be executed on packets that are both being NATted
- * and that have a helper assigned.
+ * skb_dst is cast to a rtable struct and the flags examined.
+ * Forcing these flags to have RTCF_LOCAL set allows checksum mod
+ * to be carried out in the same way as kernel versions > 4.5
  */
  if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL) {
- u8 ipproto = (proto == NFPROTO_IPV4)
- ? ip_hdr(skb)->protocol : nexthdr;
- u16 offset = 0;
-
- switch (ipproto) {
- case IPPROTO_TCP:
- offset = offsetof(struct tcphdr, check);
- break;
- case IPPROTO_UDP:
- /* Skip if no csum. */
- if (udp_hdr(skb)->check)
- offset = offsetof(struct udphdr, check);
- break;
- }
- if (offset) {
- if (unlikely(!pskb_may_pull(skb, protoff + offset + 2)))
- return NF_DROP;
-
- skb->csum_start = skb_headroom(skb) + protoff;
- skb->csum_offset = offset;
- skb->ip_summed = CHECKSUM_PARTIAL;
- }
+ rt.rt_flags = RTCF_LOCAL;
+ skb_dst_set(skb, (struct dst_entry *)&rt);
  }
 #endif
  err = helper->help(skb, protoff, ct, ctinfo);
  if (err != NF_ACCEPT)
  return err;

+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
+ if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL) {
+ /* reset dst of skb to NULL */
+ skb_dst_set(skb, NULL);
+ }
+#endif
+
  /* Adjust seqs after helper.  This is needed due to some helpers (e.g.,
  * FTP with NAT) adusting the TCP payload size when mangling IP
  * addresses and/or port numbers in the text-based control connection.

Comments

Jarno Rajahalme Jan. 4, 2017, 9:33 p.m. UTC | #1
> On Jan 4, 2017, at 9:50 AM, John Hurley <john.hurley@netronome.com> wrote:
> 
> From 64ce83672aab5634990426e0a51af16d882ac2f9 Mon Sep 17 00:00:00 2001
> From: John Hurley <john.hurley@netronome.com <mailto:john.hurley@netronome.com>>
> Date: Wed, 4 Jan 2017 16:52:43 +0000
> Subject: [PATCH] ensure correct checksum when a packet is sent to NAT helper
>  in kernel < 4.6
> 
> ---
>  datapath/conntrack.c | 45 ++++++++++++++++-----------------------------
>  1 file changed, 16 insertions(+), 29 deletions(-)
> 
> diff --git a/datapath/conntrack.c b/datapath/conntrack.c
> index d942884..fa3b0b5 100644
> --- a/datapath/conntrack.c
> +++ b/datapath/conntrack.c
> @@ -314,6 +314,10 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
>  	u8 nexthdr;
>  	int err;
>  
> +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
> +	struct rtable rt;
> +#endif
> +

This should be properly initialized (e.g., " = { .rt_flags = 0 };”). With this RFCF_LOCAL will be unset (see below) and everything else will also be initialized to 0.

>  	ct = nf_ct_get(skb, &ctinfo);
>  	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
>  		return NF_ACCEPT;
> @@ -352,43 +356,26 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
>  #if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
>  	/* Linux 4.5 and older depend on skb_dst being set when recalculating
>  	 * checksums after NAT helper has mangled TCP or UDP packet payload.
> -	 * This dependency is avoided when skb is CHECKSUM_PARTIAL or when UDP
> -	 * has no checksum.
> -	 *
> -	 * The dependency is not triggered when the main NAT code updates
> -	 * checksums after translating the IP header (address, port), so this
> -	 * fix only needs to be executed on packets that are both being NATted
> -	 * and that have a helper assigned.
> +	 * skb_dst is cast to a rtable struct and the flags examined.
> +	 * Forcing these flags to have RTCF_LOCAL set allows checksum mod
> +	 * to be carried out in the same way as kernel versions > 4.5

As noted in the previous email, the reverse is the case. When RTCF_LOCAL is not set, then the checksum update code will behave the same as in kernel versions > 4.5.

>  	 */
>  	if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL) {

Maybe it would be a bit more robust to set the dst only if not already set, i.e., add “!skb_rtable(skb)” to the condition above?

> -		u8 ipproto = (proto == NFPROTO_IPV4)
> -			? ip_hdr(skb)->protocol : nexthdr;
> -		u16 offset = 0;
> -
> -		switch (ipproto) {
> -		case IPPROTO_TCP:
> -			offset = offsetof(struct tcphdr, check);
> -			break;
> -		case IPPROTO_UDP:
> -			/* Skip if no csum. */
> -			if (udp_hdr(skb)->check)
> -				offset = offsetof(struct udphdr, check);
> -			break;
> -		}
> -		if (offset) {
> -			if (unlikely(!pskb_may_pull(skb, protoff + offset + 2)))
> -				return NF_DROP;
> -
> -			skb->csum_start = skb_headroom(skb) + protoff;
> -			skb->csum_offset = offset;
> -			skb->ip_summed = CHECKSUM_PARTIAL;
> -		}
> +		rt.rt_flags = RTCF_LOCAL;

Given the initialization above this line is not needed any more.

> +		skb_dst_set(skb, (struct dst_entry *)&rt);
>  	}
>  #endif
>  	err = helper->help(skb, protoff, ct, ctinfo);
>  	if (err != NF_ACCEPT)
>  		return err;
>  
> +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
> +	if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL) {

The checksum code may change skb->ip_summed value, so we cannot use this check here any more. Maybe set a boolean (e.g., "dst_set = true;”) if the skb_dst_set call was done above and use that as the check to undo it here?

> +		/* reset dst of skb to NULL */
> +		skb_dst_set(skb, NULL);
> +	}
> +#endif
> +
>  	/* Adjust seqs after helper.  This is needed due to some helpers (e.g.,
>  	 * FTP with NAT) adusting the TCP payload size when mangling IP
>  	 * addresses and/or port numbers in the text-based control connection.
> -- 
> 2.7.4
> 
> 
> On Wed, Jan 4, 2017 at 5:50 PM, John Hurley <john.hurley@netronome.com <mailto:john.hurley@netronome.com>> wrote:
> 
> 
> On Wed, Jan 4, 2017 at 12:53 AM, Jarno Rajahalme <jarno@ovn.org <mailto:jarno@ovn.org>> wrote:
> 
> > On Dec 28, 2016, at 3:05 PM, John Hurley <john.hurley@netronome.com <mailto:john.hurley@netronome.com>> wrote:
> >
> > sorry, updated patch….
> 
> This patch is still whitespace damaged. Maybe use git format-patch and git send-email to send it?
> 
> >
> > --------------------------------
> >
> > Fix for a bug when sending a NATed packet to helper function in kernels
> > <4.6.
> >
> > Setting CHECKSUM_PARTIAL flag can lead to L4 checksum corruption.
> >
> > Corruption can occur in datapath.c/queue_userspace_packet().
> >
> 
> You mean this code:
> 
>         /* Complete checksum if needed */
>         if (skb->ip_summed == CHECKSUM_PARTIAL &&
>             (err = skb_checksum_help(skb)))
>                 goto out;
> 
> 
> 
> 
> 
> Yes, my finding was that if a packet was sent to conntrack NAT and an FTP helper, but had only IP addresses/ports translated (no payload changes), that the TCP checksum would be correct prior to the call of skb_checksum_help and corrupt after.
>  
> If the packet payload was mangled then the checksum was incorrect both before and after this call.
> 
> 
>  
> Would you care detailing why the corruption happens? Does it always happen if ip_summed is CHECKSUM_PARTIAL?
> 
> 
> 
> If ip_summed was not set to CHECKSUM_PARTIAL then the non payload mangled FTP packets on the tests I was running would have the correct checksum but mangled payload packets would not.
> 
> My take on the CHECKSUM_PARTIAL flag from the documentation is that it signals that the checksum had been partially complete and that the remaining (payload section) section needs to be calculated and combined to the existing field. However, the NAT kernel modules seem to handle this update so we end up doing the 'remainder' of the calculation without needing to.  It may also expect this to be corrected in the network card but the cards I was testing on were not running checksum offloading.
> 
> 
>  
> > Giving the packet an skb_dst allows the kernel to correct the checksum.
> >
> > Verified with packets mangled by Conntrack/NAT helpers.
> >
> 
> It would also be helpful to add the reference to the patch that this fixes (“Fixes:”)
> 
> > Signed-off-by: John Hurley <john.hurley@netronome.com <mailto:john.hurley@netronome.com>>
> > ---
> >
> > diff --git a/datapath/conntrack.c b/datapath/conntrack.c
> > index d942884..fef67ba 100644
> > --- a/datapath/conntrack.c
> > +++ b/datapath/conntrack.c
> > @@ -314,6 +314,10 @@ static int ovs_ct_helper(struct sk_buff *skb, u16
> > proto)
> >  u8 nexthdr;
> >  int err;
> >
> > +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
> > + struct rtable *rt = NULL;
> > +#endif
> > +
> >  ct = nf_ct_get(skb, &ctinfo);
> >  if (!ct || ctinfo == IP_CT_RELATED_REPLY)
> >  return NF_ACCEPT;
> > @@ -352,43 +356,28 @@ static int ovs_ct_helper(struct sk_buff *skb, u16
> > proto)
> > #if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
> >  /* Linux 4.5 and older depend on skb_dst being set when recalculating
> >  * checksums after NAT helper has mangled TCP or UDP packet payload.
> > - * This dependency is avoided when skb is CHECKSUM_PARTIAL or when UDP
> > - * has no checksum.
> >  *
> > - * The dependency is not triggered when the main NAT code updates
> > - * checksums after translating the IP header (address, port), so this
> > - * fix only needs to be executed on packets that are both being NATted
> > - * and that have a helper assigned.
> > + * skb_dst is cast to a rtable struct and the flags examined.
> > + * Forcing these flags to have RTCF_LOCAL set allows checksum mod
> > + * to be carried out in the same way as kernel versions > 4.5
> 
> Are kernels > 4.5 treating all these packets as RTCF_LOCAL, or are some of them possibly CHECKSUM_PARTIAL?
> 
> 
> 
> 
> yes, it appears so - assuming CHECKSUM_PARTIAL is not set already.
> 
> The following are the nat checksum calculations from kernel 4.5:
> 126 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L126> static void nf_nat_ipv4_csum_recalc <http://lxr.free-electrons.com/ident?v=4.5;i=nf_nat_ipv4_csum_recalc>(struct sk_buff <http://lxr.free-electrons.com/ident?v=4.5;i=sk_buff> *skb <http://lxr.free-electrons.com/ident?v=4.5;i=skb>,
> 127 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L127>                                     u8 <http://lxr.free-electrons.com/ident?v=4.5;i=u8> proto <http://lxr.free-electrons.com/ident?v=4.5;i=proto>, void *data <http://lxr.free-electrons.com/ident?v=4.5;i=data>, __sum16 <http://lxr.free-electrons.com/ident?v=4.5;i=__sum16> *check <http://lxr.free-electrons.com/ident?v=4.5;i=check>,
> 128 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L128>                                     int datalen, int oldlen)
> 129 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L129> {
> 130 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L130>         const struct iphdr <http://lxr.free-electrons.com/ident?v=4.5;i=iphdr> *iph = ip_hdr <http://lxr.free-electrons.com/ident?v=4.5;i=ip_hdr>(skb <http://lxr.free-electrons.com/ident?v=4.5;i=skb>);
> 131 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L131>         struct rtable <http://lxr.free-electrons.com/ident?v=4.5;i=rtable> *rt <http://lxr.free-electrons.com/ident?v=4.5;i=rt> = skb_rtable <http://lxr.free-electrons.com/ident?v=4.5;i=skb_rtable>(skb <http://lxr.free-electrons.com/ident?v=4.5;i=skb>);
> 132 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L132> 
> 133 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L133>         if (skb <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->ip_summed != CHECKSUM_PARTIAL <http://lxr.free-electrons.com/ident?v=4.5;i=CHECKSUM_PARTIAL>) {
> 134 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L134>                 if (!(rt <http://lxr.free-electrons.com/ident?v=4.5;i=rt>->rt_flags & RTCF_LOCAL <http://lxr.free-electrons.com/ident?v=4.5;i=RTCF_LOCAL>) &&
> 135 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L135>                     (!skb <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->dev <http://lxr.free-electrons.com/ident?v=4.5;i=dev> || skb <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->dev <http://lxr.free-electrons.com/ident?v=4.5;i=dev>->features <http://lxr.free-electrons.com/ident?v=4.5;i=features> &
> 136 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L136>                      (NETIF_F_IP_CSUM <http://lxr.free-electrons.com/ident?v=4.5;i=NETIF_F_IP_CSUM> | NETIF_F_HW_CSUM <http://lxr.free-electrons.com/ident?v=4.5;i=NETIF_F_HW_CSUM>))) {
> 137 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L137>                         skb <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->ip_summed = CHECKSUM_PARTIAL <http://lxr.free-electrons.com/ident?v=4.5;i=CHECKSUM_PARTIAL>;
> 138 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L138>                         skb <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->csum_start = skb_headroom <http://lxr.free-electrons.com/ident?v=4.5;i=skb_headroom>(skb <http://lxr.free-electrons.com/ident?v=4.5;i=skb>) +
> 139 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L139>                                           skb_network_offset <http://lxr.free-electrons.com/ident?v=4.5;i=skb_network_offset>(skb <http://lxr.free-electrons.com/ident?v=4.5;i=skb>) +
> 140 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L140>                                           ip_hdrlen <http://lxr.free-electrons.com/ident?v=4.5;i=ip_hdrlen>(skb <http://lxr.free-electrons.com/ident?v=4.5;i=skb>);
> 141 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L141>                         skb <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->csum_offset = (void *)check <http://lxr.free-electrons.com/ident?v=4.5;i=check> - data <http://lxr.free-electrons.com/ident?v=4.5;i=data>;
> 142 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L142>                         *check <http://lxr.free-electrons.com/ident?v=4.5;i=check> = ~csum_tcpudp_magic(iph->saddr <http://lxr.free-electrons.com/ident?v=4.5;i=saddr>, iph->daddr <http://lxr.free-electrons.com/ident?v=4.5;i=daddr>,
> 143 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L143>                                                     datalen, proto <http://lxr.free-electrons.com/ident?v=4.5;i=proto>, 0);
> 144 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L144>                 } else {
> 145 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L145>                         *check <http://lxr.free-electrons.com/ident?v=4.5;i=check> = 0;
> 146 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L146>                         *check <http://lxr.free-electrons.com/ident?v=4.5;i=check> = csum_tcpudp_magic <http://lxr.free-electrons.com/ident?v=4.5;i=csum_tcpudp_magic>(iph->saddr <http://lxr.free-electrons.com/ident?v=4.5;i=saddr>, iph->daddr <http://lxr.free-electrons.com/ident?v=4.5;i=daddr>,
> 147 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L147>                                                    datalen, proto <http://lxr.free-electrons.com/ident?v=4.5;i=proto>,
> 148 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L148>                                                    csum_partial <http://lxr.free-electrons.com/ident?v=4.5;i=csum_partial>(data <http://lxr.free-electrons.com/ident?v=4.5;i=data>, datalen,
> 149 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L149>                                                                 0));
> 150 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L150>                         if (proto <http://lxr.free-electrons.com/ident?v=4.5;i=proto> == IPPROTO_UDP <http://lxr.free-electrons.com/ident?v=4.5;i=IPPROTO_UDP> && !*check <http://lxr.free-electrons.com/ident?v=4.5;i=check>)
> 151 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L151>                                 *check <http://lxr.free-electrons.com/ident?v=4.5;i=check> = CSUM_MANGLED_0 <http://lxr.free-electrons.com/ident?v=4.5;i=CSUM_MANGLED_0>;
> 152 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L152>                 }
> 153 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L153>         } else
> 154 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L154>                 inet_proto_csum_replace2 <http://lxr.free-electrons.com/ident?v=4.5;i=inet_proto_csum_replace2>(check <http://lxr.free-electrons.com/ident?v=4.5;i=check>, skb <http://lxr.free-electrons.com/ident?v=4.5;i=skb>,
> 155 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L155>                                          htons <http://lxr.free-electrons.com/ident?v=4.5;i=htons>(oldlen), htons <http://lxr.free-electrons.com/ident?v=4.5;i=htons>(datalen), true <http://lxr.free-electrons.com/ident?v=4.5;i=true>);
> 156 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L156> }
> and version 4.6:
> 126 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L126> static void nf_nat_ipv4_csum_recalc <http://lxr.free-electrons.com/ident?v=4.6;i=nf_nat_ipv4_csum_recalc>(struct sk_buff <http://lxr.free-electrons.com/ident?v=4.6;i=sk_buff> *skb <http://lxr.free-electrons.com/ident?v=4.6;i=skb>,
> 127 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L127>                                     u8 <http://lxr.free-electrons.com/ident?v=4.6;i=u8> proto <http://lxr.free-electrons.com/ident?v=4.6;i=proto>, void *data <http://lxr.free-electrons.com/ident?v=4.6;i=data>, __sum16 <http://lxr.free-electrons.com/ident?v=4.6;i=__sum16> *check <http://lxr.free-electrons.com/ident?v=4.6;i=check>,
> 128 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L128>                                     int datalen, int oldlen)
> 129 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L129> {
> 130 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L130>         if (skb <http://lxr.free-electrons.com/ident?v=4.6;i=skb>->ip_summed != CHECKSUM_PARTIAL <http://lxr.free-electrons.com/ident?v=4.6;i=CHECKSUM_PARTIAL>) {
> 131 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L131>                 const struct iphdr <http://lxr.free-electrons.com/ident?v=4.6;i=iphdr> *iph = ip_hdr <http://lxr.free-electrons.com/ident?v=4.6;i=ip_hdr>(skb <http://lxr.free-electrons.com/ident?v=4.6;i=skb>);
> 132 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L132> 
> 133 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L133>                 skb <http://lxr.free-electrons.com/ident?v=4.6;i=skb>->ip_summed = CHECKSUM_PARTIAL <http://lxr.free-electrons.com/ident?v=4.6;i=CHECKSUM_PARTIAL>;
> 134 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L134>                 skb <http://lxr.free-electrons.com/ident?v=4.6;i=skb>->csum_start = skb_headroom <http://lxr.free-electrons.com/ident?v=4.6;i=skb_headroom>(skb <http://lxr.free-electrons.com/ident?v=4.6;i=skb>) + skb_network_offset <http://lxr.free-electrons.com/ident?v=4.6;i=skb_network_offset>(skb <http://lxr.free-electrons.com/ident?v=4.6;i=skb>) +
> 135 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L135>                         ip_hdrlen <http://lxr.free-electrons.com/ident?v=4.6;i=ip_hdrlen>(skb <http://lxr.free-electrons.com/ident?v=4.6;i=skb>);
> 136 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L136>                 skb <http://lxr.free-electrons.com/ident?v=4.6;i=skb>->csum_offset = (void *)check <http://lxr.free-electrons.com/ident?v=4.6;i=check> - data <http://lxr.free-electrons.com/ident?v=4.6;i=data>;
> 137 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L137>                 *check <http://lxr.free-electrons.com/ident?v=4.6;i=check> = ~csum_tcpudp_magic(iph->saddr <http://lxr.free-electrons.com/ident?v=4.6;i=saddr>, iph->daddr <http://lxr.free-electrons.com/ident?v=4.6;i=daddr>, datalen,
> 138 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L138>                                             proto <http://lxr.free-electrons.com/ident?v=4.6;i=proto>, 0);
> 139 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L139>         } else
> 140 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L140>                 inet_proto_csum_replace2 <http://lxr.free-electrons.com/ident?v=4.6;i=inet_proto_csum_replace2>(check <http://lxr.free-electrons.com/ident?v=4.6;i=check>, skb <http://lxr.free-electrons.com/ident?v=4.6;i=skb>,
> 141 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L141>                                          htons <http://lxr.free-electrons.com/ident?v=4.6;i=htons>(oldlen), htons <http://lxr.free-electrons.com/ident?v=4.6;i=htons>(datalen), true <http://lxr.free-electrons.com/ident?v=4.6;i=true>);
> 142 <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L142> } 
> 
> >  */
> >  if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL) {
> > - u8 ipproto = (proto == NFPROTO_IPV4)
> > - ? ip_hdr(skb)->protocol : nexthdr;
> > - u16 offset = 0;
> > -
> > - switch (ipproto) {
> > - case IPPROTO_TCP:
> > - offset = offsetof(struct tcphdr, check);
> > - break;
> > - case IPPROTO_UDP:
> > - /* Skip if no csum. */
> > - if (udp_hdr(skb)->check)
> > - offset = offsetof(struct udphdr, check);
> > - break;
> > - }
> > - if (offset) {
> > - if (unlikely(!pskb_may_pull(skb, protoff + offset + 2)))
> > - return NF_DROP;
> > -
> > - skb->csum_start = skb_headroom(skb) + protoff;
> > - skb->csum_offset = offset;
> > - skb->ip_summed = CHECKSUM_PARTIAL;
> > - }
> > + rt = kmalloc(sizeof(struct rtable), GFP_KERNEL);
> 
> Could the struct rtable be allocated from stack instead? Or could it be a static variable in the file scope? In either case we would avoid dynamic memory allocation/free. I recall setting ip_summed to CHECKSUM_PARTIAL was deemed a simpler way to backport as it (also) avoids the dynamic memory allocations.
> 
> 
> 
> yes, it could be added as a stack variable. As it is only added to the skb temporarily to force the checksum change then this would make more sense!
> I'll forward on a further email after this with a patch using a stack based variable - I tested this patch using an FTP session across OVS. 
> 
> 
>  
> > + rt->rt_flags = RTCF_LOCAL;
> > + skb_dst_set(skb, (struct dst_entry *)rt);
> >  }
> > #endif
> >  err = helper->help(skb, protoff, ct, ctinfo);
> >  if (err != NF_ACCEPT)
> >  return err;
> >
> > +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
> > + if (rt) {
> > + skb_dst_set(skb, NULL);
> > + kfree(rt);
> > + }
> > +#endif
> > +
> >  /* Adjust seqs after helper.  This is needed due to some helpers (e.g.,
> >  * FTP with NAT) adusting the TCP payload size when mangling IP
> >  * addresses and/or port numbers in the text-based control connection.
> >
> > --
> >
> > On Wed, Dec 28, 2016 at 10:08 PM, Ben Pfaff <blp@ovn.org <mailto:blp@ovn.org>> wrote:
> >
> >> On Wed, Dec 28, 2016 at 09:37:30PM +0000, John Hurley wrote:
> >>> Fix for a bug when sending a NATed packet to helper function in kernels
> >>> <4.6.
> >>>
> >>> Setting CHECKSUM_PARTIAL flag means packets could have L4 checksum
> >>> corrupted in
> >>>
> >>> datapath.c/queue_userspace_packet().
> >>>
> >>> Giving the packet an skb_dst allows the kernel to correct the checksum if
> >>> packet
> >>> mangling happens in Conntrack/NAT helpers.
> >>>
> >>> Signed-off-by: John Hurley <john.hurley@netronome.com <mailto:john.hurley@netronome.com>>
> >>
> >> I'm not the right person to review this but I did notice that the patch
> >> is wordwrapped and otherwise space-damaged.
> >>
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org <mailto:dev@openvswitch.org>
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
> 
> 
>
diff mbox

Patch

diff --git a/datapath/conntrack.c b/datapath/conntrack.c
index d942884..fa3b0b5 100644
--- a/datapath/conntrack.c
+++ b/datapath/conntrack.c
@@ -314,6 +314,10 @@  static int ovs_ct_helper(struct sk_buff *skb, u16
proto)
  u8 nexthdr;
  int err;

+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
+ struct rtable rt;
+#endif
+
  ct = nf_ct_get(skb, &ctinfo);
  if (!ct || ctinfo == IP_CT_RELATED_REPLY)