[ovs-dev,PATCHv2] datapath: Add support for kernel 4.19.x and 4.20.x
diff mbox series

Message ID 1555089779-1685-1-git-send-email-pkusunyifeng@gmail.com
State New
Headers show
Series
  • [ovs-dev,PATCHv2] datapath: Add support for kernel 4.19.x and 4.20.x
Related show

Commit Message

Yifeng Sun April 12, 2019, 5:22 p.m. UTC
This patch introduces changes needed by OVS to support latest
Linux kernels (4.19.x and 4.20.x). Recent kernels changed many
APIs that are being used by OVS. One major change is that
struct nf_conntrack_l3proto became invisible outside of kernel, so
get_l4proto function is added in file compact/nf_conntrack_core.c to
accommodate this issue.

In addition, if kernel is not compiled with CONFIG_NF_NAT_IPV4
or CONFIG_NF_NAT_IPV6, flow action 'ct(nat)' can cause kernel
to crash. This patch handles this condition.

This patch doesn't introduce new failed tests when running
'make check-kmod' for kernels listed below:
    3.10.0-957.5.1.el7.x86_64
    4.4.0-142-generic
    4.17.14
    4.18.0-16-generic
    4.19.34
    4.20.17

Travis passed at
https://travis-ci.org/yifsun/ovs-travis/builds/519011670

Signed-off-by: Yifeng Sun <pkusunyifeng@gmail.com>
v1->v2: Fixed the CONFIG_NF_NAT_IPV4 bug by using Greg's config
        file. Thanks Greg!
---
 .travis.yml                                        | 20 ++---
 NEWS                                               |  2 +
 acinclude.m4                                       | 20 ++++-
 datapath/conntrack.c                               | 86 +++++++++++++++++++++-
 .../include/net/netfilter/nf_conntrack_core.h      |  6 ++
 .../include/net/netfilter/nf_conntrack_count.h     |  2 +
 datapath/linux/compat/nf_conncount.c               |  6 +-
 datapath/linux/compat/nf_conntrack_core.c          | 80 ++++++++++++++++++++
 datapath/linux/compat/nf_conntrack_proto.c         |  3 +
 9 files changed, 209 insertions(+), 16 deletions(-)

Comments

Yi-Hung Wei April 16, 2019, 12:39 a.m. UTC | #1
On Fri, Apr 12, 2019 at 10:23 AM Yifeng Sun <pkusunyifeng@gmail.com> wrote:
>
> This patch introduces changes needed by OVS to support latest
> Linux kernels (4.19.x and 4.20.x). Recent kernels changed many
> APIs that are being used by OVS. One major change is that
> struct nf_conntrack_l3proto became invisible outside of kernel, so
> get_l4proto function is added in file compact/nf_conntrack_core.c to
> accommodate this issue.
>
> In addition, if kernel is not compiled with CONFIG_NF_NAT_IPV4
> or CONFIG_NF_NAT_IPV6, flow action 'ct(nat)' can cause kernel
> to crash. This patch handles this condition.
>
> This patch doesn't introduce new failed tests when running
> 'make check-kmod' for kernels listed below:
>     3.10.0-957.5.1.el7.x86_64
>     4.4.0-142-generic
>     4.17.14
>     4.18.0-16-generic
>     4.19.34
>     4.20.17
>
> Travis passed at
> https://travis-ci.org/yifsun/ovs-travis/builds/519011670
>
> Signed-off-by: Yifeng Sun <pkusunyifeng@gmail.com>
> v1->v2: Fixed the CONFIG_NF_NAT_IPV4 bug by using Greg's config
>         file. Thanks Greg!
> ---
Hi Yifeng,

Thanks for the patch.

I think this patch mixes a couple upstream patches backport, 4.19,
4.20 compilation issues, and the nf_nat issue together so that it may
be hard to keep track of the kernel backport.  IMHO, it would be
easier to break this patch down to a couple of them, so that it would
be easier to maintain and review. My detailed comments are as below.

> diff --git a/datapath/conntrack.c b/datapath/conntrack.c
> index 52208bad3029..ce36a8ddea50 100644
> --- a/datapath/conntrack.c
> +++ b/datapath/conntrack.c
> @@ -38,6 +38,10 @@
>  #include <net/netfilter/nf_nat_l3proto.h>
>  #endif
>
> +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && defined(HAVE_IPV6_FRAG_H)
> +#include <net/ipv6_frag.h>
> +#endif
> +
I think this is related to an upstream change 70b095c843266 ("ipv6:
remove dependency of nf_defrag_ipv6 on ipv6 module"). Should we split
this out in anther patch? We may be able to hide the following in the
compat layer.
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && defined(HAVE_IPV6_FRAG_H)
+#endif


>  #include "datapath.h"
>  #include "conntrack.h"
>  #include "flow.h"
> @@ -645,32 +649,62 @@ static struct nf_conn *
>  ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
>                      u8 l3num, struct sk_buff *skb, bool natted)
>  {
> -       const struct nf_conntrack_l3proto *l3proto;
>         const struct nf_conntrack_l4proto *l4proto;
>         struct nf_conntrack_tuple tuple;
>         struct nf_conntrack_tuple_hash *h;
>         struct nf_conn *ct;
> -       unsigned int dataoff;
>         u8 protonum;
>
> +#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
> +       const struct nf_conntrack_l3proto *l3proto;
> +       unsigned int dataoff;
> +
>         l3proto = __nf_ct_l3proto_find(l3num);
>         if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
>                                  &protonum) <= 0) {
>                 pr_debug("ovs_ct_find_existing: Can't get protonum\n");
>                 return NULL;
>         }
> +#else
> +       int protooff;
> +
> +       protooff = get_l4proto(skb, skb_network_offset(skb),
> +                              l3num, &protonum);
> +       if (protooff <= 0) {
> +               pr_warn("ovs_ct_find_existing: Can't get protonum\n");
> +               return NULL;
> +       }
> +#endif
> +
> +#ifdef HAVE_NF_CT_L4PROTO_FIND_TAKES_L3PROTO
>         l4proto = __nf_ct_l4proto_find(l3num, protonum);
> -       if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
> -                            protonum, net, &tuple, l3proto, l4proto)) {
> +#else
> +       l4proto = __nf_ct_l4proto_find(protonum);
> +#endif
> +
> +#ifdef HAVE_NF_CT_GET_TUPLE
> +       if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
> +                                      l3num, net, &tuple)) {
> +               pr_debug("ovs_ct_find_existing: Can't get tuple\n");
> +               return NULL;
> +       }
> +#else
> +       if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
> +                                      l3num, net, &tuple)) {
>                 pr_debug("ovs_ct_find_existing: Can't get tuple\n");
>                 return NULL;
>         }
> +#endif
>
>         /* Must invert the tuple if skb has been transformed by NAT. */
>         if (natted) {
>                 struct nf_conntrack_tuple inverse;
>
> +#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
>                 if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, l4proto)) {
> +#else
> +               if (!nf_ct_invert_tuple(&inverse, &tuple, l4proto)) {
> +#endif
>                         pr_debug("ovs_ct_find_existing: Inversion failed!\n");
>                         return NULL;
>                 }
The changes in ovs_ct_find_existing() is due to upstream commit
60e3be94e6a ("openvswitch: use nf_ct_get_tuplepr, invert_tuplepr").
Can we split it out as a separate patch?

From the upstream commit 60e3be94e6a, instead of using
nf_ct_get_tuple() it invokes nf_ct_get_tuplepr(), and it looks like
nf_ct_get_tuplepr() was available in quite old kernel (at least
2.6.26), and it gets updated to add network namespace support in
a31f1adc09489 ("netfilter: nf_conntrack: Add a struct net parameter to
l4_pkt_to_tuple"). Can we see if we can replace nf_ct_get_tuple() to
nf_ct_get_tuplepr() to avoid the above #ifde #else #endif logic.


> @@ -989,6 +1023,9 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
>         if (!cached) {
>                 struct nf_conn *tmpl = info->ct;
>                 int err;
> +#ifndef HAVE_NF_CONNTRACK_IN_TAKES_NET
> +               struct nf_hook_state state = {};
> +#endif
>
>                 /* Associate skb with specified zone. */
>                 if (tmpl) {
> @@ -998,8 +1035,15 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
>                         nf_ct_set(skb, tmpl, IP_CT_NEW);
>                 }
>
> +#ifdef HAVE_NF_CONNTRACK_IN_TAKES_NET
>                 err = nf_conntrack_in(net, info->family,
>                                       NF_INET_PRE_ROUTING, skb);
> +#else
> +               state.hook = NF_INET_PRE_ROUTING,
> +               state.pf = info->family,
> +               state.net = net,
> +               err = nf_conntrack_in(skb, &state);
> +#endif
>                 if (err != NF_ACCEPT)
>                         return -ENOENT;
>

The changes in __ovs_ct_lookup() is related to 93e66024b024
("netfilter: conntrack: pass nf_hook_state to packet and error
handlers").  In general, we would like to sychronize our
./datapatch/*.c code to be as similar as to the upstream
./net/openvswitch/*.c. In this case, We can try to hide the #if #else
#endif in the compat layer in ./datapath/linux/compat/

Here is an example.  I only tested it on 4.4 kernel, it may need to be
tested on other kernels.

diff --git a/acinclude.m4 b/acinclude.m4
index 3cd6ea7302d5..d6cfbd54e357 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -675,6 +675,9 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
                   [nf_ct_set])
   OVS_GREP_IFELSE([$KSRC/include/net/netfilter/nf_conntrack.h],
                   [nf_ct_is_untracked])
+  OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_core.h],
+                  [nf_conntrack_in], [u_int8_t pf],
+                  [OVS_DEFINE([HAVE_NF_CONNTRACK_IN_PF])])
   OVS_GREP_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_zones.h],
                   [nf_ct_zone_init])
   OVS_GREP_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_l3proto.h],
diff --git a/datapath/conntrack.c b/datapath/conntrack.c
index a7dc9e0c3513..5a97f913f0b2 100644
--- a/datapath/conntrack.c
+++ b/datapath/conntrack.c
@@ -987,6 +987,11 @@ static int __ovs_ct_lookup(struct net *net,
struct sw_flow_key *key,
        struct nf_conn *ct;

        if (!cached) {
+               struct nf_hook_state state = {
+                       .hook = NF_INET_PRE_ROUTING,
+                       .pf = info->family,
+                       .net = net,
+               };
                struct nf_conn *tmpl = info->ct;
                int err;

@@ -998,8 +1003,7 @@ static int __ovs_ct_lookup(struct net *net,
struct sw_flow_key *key,
                        nf_ct_set(skb, tmpl, IP_CT_NEW);
                }

-               err = nf_conntrack_in(net, info->family,
-                                     NF_INET_PRE_ROUTING, skb);
+               err = nf_conntrack_in(skb, &state);
                if (err != NF_ACCEPT)
                        return -ENOENT;

diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
index 7834c8c25f79..b05a5beda3cc 100644
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
+++ b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
@@ -104,4 +104,14 @@ static inline bool rpl_nf_ct_delete(struct
nf_conn *ct, u32 portid, int report)
 #define nf_ct_delete rpl_nf_ct_delete
 #endif /* HAVE_NF_CONN_TIMER */

+#ifdef HAVE_NF_CONNTRACK_IN_PF
+
+static inline bool rpl_nf_conntrack_in(struct sk_buff *skb,
+                                       struct nf_hook_state *state)
+{
+    return nf_conntrack_in(state->net, state->pf, state->hook, skb);
+}
+#define nf_conntrack_in rpl_nf_conntrack_in
+#endif /* HAVE_NF_CONNTRACK_IN_PF */
+
 #endif /* _NF_CONNTRACK_CORE_WRAPPER_H */


> @@ -1307,9 +1351,17 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
>  {
>         int nh_ofs;
>         int err;
> +       /* From kernel 4.19.0+, Function handle_fragments may shrink skb's
> +        * headroom, which will result in loss of ethernet header data.
> +        * This buf is used to backup the header data before calling
> +        * handle_fragments. */
> +       char buf[32];
>
>         /* The conntrack module expects to be working at L3. */
>         nh_ofs = skb_network_offset(skb);
> +       if (nh_ofs > sizeof(buf))
> +               return -EINVAL;
> +       memcpy(buf, skb->data, nh_ofs);
>         skb_pull_rcsum(skb, nh_ofs);
>
>         err = ovs_skb_network_trim(skb);
> @@ -1326,8 +1378,16 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
>                 err = ovs_ct_commit(net, key, info, skb);
>         else
>                 err = ovs_ct_lookup(net, key, info, skb);
> +       if (err)
> +               return err;
>
> +       if (skb_headroom(skb) < nh_ofs) {
> +               err = pskb_expand_head(skb, nh_ofs, 0, GFP_ATOMIC);
> +               if (err)
> +                       return err;
> +       }
>         skb_push(skb, nh_ofs);
> +       memcpy(skb->data, buf, nh_ofs);
>         skb_postpush_rcsum(skb, skb->data, nh_ofs);
>         if (err)
>                 kfree_skb(skb);
The change in ovs_ct_execute() looks like a bug fix in upstream
kernel. According to our backport policy,
http://docs.openvswitch.org/en/latest/internals/contributing/backporting-patches/
 Please upstream it to net-next before bring it back to datapath.



> @@ -1362,7 +1422,11 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
>                 return -EINVAL;
>         }
>
> +#ifdef HAVE_NF_CT_HELPER_EXT_ADD_TAKES_HELPER
>         help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
> +#else
> +       help = nf_ct_helper_ext_add(info->ct, GFP_KERNEL);
> +#endif
>         if (!help) {
>                 nf_conntrack_helper_put(helper);
>                 return -ENOMEM;
The change here is related to upstream patch 440534d3c56b ("netfilter:
Remove useless param helper of nf_ct_helper_ext_add"). Can you try to
hide the #if #else #endif logic in the compat layer as the example in
__ovs_ct_lookup().


> @@ -1387,6 +1451,20 @@ static int parse_nat(const struct nlattr *attr,
>         bool have_proto_max = false;
>         bool ip_vers = (info->family == NFPROTO_IPV6);
>
> +#ifndef CONFIG_NF_NAT_IPV4
> +       if (info->family == NFPROTO_IPV4) {
> +               OVS_NLERR(log, "Flow action ct(nat) not supported without nf_nat_ipv4");
> +               return -ENOTSUPP;
> +       }
> +#endif
> +
> +#ifndef CONFIG_NF_NAT_IPV6
> +       if (info->family == NFPROTO_IPV6) {
> +               OVS_NLERR(log, "Flow action ct(nat) not supported without nf_nat_ipv6");
> +               return -ENOTSUPP;
> +        }
> +#endif
> +
>         nla_for_each_nested(a, attr, rem) {
>                 static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] = {
>                         [OVS_NAT_ATTR_SRC] = {0, 0},
Is this something that would happen in the upstream kernel? If this is
the case, we should upstream that before backport it to datapath.


I did not review the following compat code since they may need to
change accroding with different backport approach.

> diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
> index 7834c8c25f79..7fca7dc551c8 100644

Thanks,

-Yi-Hung
Yifeng Sun April 16, 2019, 6:12 p.m. UTC | #2
Thanks Yi-Hung! I will look at it and come up with a new version.
Yifeng

On Mon, Apr 15, 2019 at 5:39 PM Yi-Hung Wei <yihung.wei@gmail.com> wrote:
>
> On Fri, Apr 12, 2019 at 10:23 AM Yifeng Sun <pkusunyifeng@gmail.com> wrote:
> >
> > This patch introduces changes needed by OVS to support latest
> > Linux kernels (4.19.x and 4.20.x). Recent kernels changed many
> > APIs that are being used by OVS. One major change is that
> > struct nf_conntrack_l3proto became invisible outside of kernel, so
> > get_l4proto function is added in file compact/nf_conntrack_core.c to
> > accommodate this issue.
> >
> > In addition, if kernel is not compiled with CONFIG_NF_NAT_IPV4
> > or CONFIG_NF_NAT_IPV6, flow action 'ct(nat)' can cause kernel
> > to crash. This patch handles this condition.
> >
> > This patch doesn't introduce new failed tests when running
> > 'make check-kmod' for kernels listed below:
> >     3.10.0-957.5.1.el7.x86_64
> >     4.4.0-142-generic
> >     4.17.14
> >     4.18.0-16-generic
> >     4.19.34
> >     4.20.17
> >
> > Travis passed at
> > https://travis-ci.org/yifsun/ovs-travis/builds/519011670
> >
> > Signed-off-by: Yifeng Sun <pkusunyifeng@gmail.com>
> > v1->v2: Fixed the CONFIG_NF_NAT_IPV4 bug by using Greg's config
> >         file. Thanks Greg!
> > ---
> Hi Yifeng,
>
> Thanks for the patch.
>
> I think this patch mixes a couple upstream patches backport, 4.19,
> 4.20 compilation issues, and the nf_nat issue together so that it may
> be hard to keep track of the kernel backport.  IMHO, it would be
> easier to break this patch down to a couple of them, so that it would
> be easier to maintain and review. My detailed comments are as below.
>
> > diff --git a/datapath/conntrack.c b/datapath/conntrack.c
> > index 52208bad3029..ce36a8ddea50 100644
> > --- a/datapath/conntrack.c
> > +++ b/datapath/conntrack.c
> > @@ -38,6 +38,10 @@
> >  #include <net/netfilter/nf_nat_l3proto.h>
> >  #endif
> >
> > +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && defined(HAVE_IPV6_FRAG_H)
> > +#include <net/ipv6_frag.h>
> > +#endif
> > +
> I think this is related to an upstream change 70b095c843266 ("ipv6:
> remove dependency of nf_defrag_ipv6 on ipv6 module"). Should we split
> this out in anther patch? We may be able to hide the following in the
> compat layer.
> +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && defined(HAVE_IPV6_FRAG_H)
> +#endif
>
>
> >  #include "datapath.h"
> >  #include "conntrack.h"
> >  #include "flow.h"
> > @@ -645,32 +649,62 @@ static struct nf_conn *
> >  ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
> >                      u8 l3num, struct sk_buff *skb, bool natted)
> >  {
> > -       const struct nf_conntrack_l3proto *l3proto;
> >         const struct nf_conntrack_l4proto *l4proto;
> >         struct nf_conntrack_tuple tuple;
> >         struct nf_conntrack_tuple_hash *h;
> >         struct nf_conn *ct;
> > -       unsigned int dataoff;
> >         u8 protonum;
> >
> > +#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
> > +       const struct nf_conntrack_l3proto *l3proto;
> > +       unsigned int dataoff;
> > +
> >         l3proto = __nf_ct_l3proto_find(l3num);
> >         if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
> >                                  &protonum) <= 0) {
> >                 pr_debug("ovs_ct_find_existing: Can't get protonum\n");
> >                 return NULL;
> >         }
> > +#else
> > +       int protooff;
> > +
> > +       protooff = get_l4proto(skb, skb_network_offset(skb),
> > +                              l3num, &protonum);
> > +       if (protooff <= 0) {
> > +               pr_warn("ovs_ct_find_existing: Can't get protonum\n");
> > +               return NULL;
> > +       }
> > +#endif
> > +
> > +#ifdef HAVE_NF_CT_L4PROTO_FIND_TAKES_L3PROTO
> >         l4proto = __nf_ct_l4proto_find(l3num, protonum);
> > -       if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
> > -                            protonum, net, &tuple, l3proto, l4proto)) {
> > +#else
> > +       l4proto = __nf_ct_l4proto_find(protonum);
> > +#endif
> > +
> > +#ifdef HAVE_NF_CT_GET_TUPLE
> > +       if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
> > +                                      l3num, net, &tuple)) {
> > +               pr_debug("ovs_ct_find_existing: Can't get tuple\n");
> > +               return NULL;
> > +       }
> > +#else
> > +       if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
> > +                                      l3num, net, &tuple)) {
> >                 pr_debug("ovs_ct_find_existing: Can't get tuple\n");
> >                 return NULL;
> >         }
> > +#endif
> >
> >         /* Must invert the tuple if skb has been transformed by NAT. */
> >         if (natted) {
> >                 struct nf_conntrack_tuple inverse;
> >
> > +#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
> >                 if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, l4proto)) {
> > +#else
> > +               if (!nf_ct_invert_tuple(&inverse, &tuple, l4proto)) {
> > +#endif
> >                         pr_debug("ovs_ct_find_existing: Inversion failed!\n");
> >                         return NULL;
> >                 }
> The changes in ovs_ct_find_existing() is due to upstream commit
> 60e3be94e6a ("openvswitch: use nf_ct_get_tuplepr, invert_tuplepr").
> Can we split it out as a separate patch?
>
> From the upstream commit 60e3be94e6a, instead of using
> nf_ct_get_tuple() it invokes nf_ct_get_tuplepr(), and it looks like
> nf_ct_get_tuplepr() was available in quite old kernel (at least
> 2.6.26), and it gets updated to add network namespace support in
> a31f1adc09489 ("netfilter: nf_conntrack: Add a struct net parameter to
> l4_pkt_to_tuple"). Can we see if we can replace nf_ct_get_tuple() to
> nf_ct_get_tuplepr() to avoid the above #ifde #else #endif logic.
>
>
> > @@ -989,6 +1023,9 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
> >         if (!cached) {
> >                 struct nf_conn *tmpl = info->ct;
> >                 int err;
> > +#ifndef HAVE_NF_CONNTRACK_IN_TAKES_NET
> > +               struct nf_hook_state state = {};
> > +#endif
> >
> >                 /* Associate skb with specified zone. */
> >                 if (tmpl) {
> > @@ -998,8 +1035,15 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
> >                         nf_ct_set(skb, tmpl, IP_CT_NEW);
> >                 }
> >
> > +#ifdef HAVE_NF_CONNTRACK_IN_TAKES_NET
> >                 err = nf_conntrack_in(net, info->family,
> >                                       NF_INET_PRE_ROUTING, skb);
> > +#else
> > +               state.hook = NF_INET_PRE_ROUTING,
> > +               state.pf = info->family,
> > +               state.net = net,
> > +               err = nf_conntrack_in(skb, &state);
> > +#endif
> >                 if (err != NF_ACCEPT)
> >                         return -ENOENT;
> >
>
> The changes in __ovs_ct_lookup() is related to 93e66024b024
> ("netfilter: conntrack: pass nf_hook_state to packet and error
> handlers").  In general, we would like to sychronize our
> ./datapatch/*.c code to be as similar as to the upstream
> ./net/openvswitch/*.c. In this case, We can try to hide the #if #else
> #endif in the compat layer in ./datapath/linux/compat/
>
> Here is an example.  I only tested it on 4.4 kernel, it may need to be
> tested on other kernels.
>
> diff --git a/acinclude.m4 b/acinclude.m4
> index 3cd6ea7302d5..d6cfbd54e357 100644
> --- a/acinclude.m4
> +++ b/acinclude.m4
> @@ -675,6 +675,9 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
>                    [nf_ct_set])
>    OVS_GREP_IFELSE([$KSRC/include/net/netfilter/nf_conntrack.h],
>                    [nf_ct_is_untracked])
> +  OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_core.h],
> +                  [nf_conntrack_in], [u_int8_t pf],
> +                  [OVS_DEFINE([HAVE_NF_CONNTRACK_IN_PF])])
>    OVS_GREP_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_zones.h],
>                    [nf_ct_zone_init])
>    OVS_GREP_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_l3proto.h],
> diff --git a/datapath/conntrack.c b/datapath/conntrack.c
> index a7dc9e0c3513..5a97f913f0b2 100644
> --- a/datapath/conntrack.c
> +++ b/datapath/conntrack.c
> @@ -987,6 +987,11 @@ static int __ovs_ct_lookup(struct net *net,
> struct sw_flow_key *key,
>         struct nf_conn *ct;
>
>         if (!cached) {
> +               struct nf_hook_state state = {
> +                       .hook = NF_INET_PRE_ROUTING,
> +                       .pf = info->family,
> +                       .net = net,
> +               };
>                 struct nf_conn *tmpl = info->ct;
>                 int err;
>
> @@ -998,8 +1003,7 @@ static int __ovs_ct_lookup(struct net *net,
> struct sw_flow_key *key,
>                         nf_ct_set(skb, tmpl, IP_CT_NEW);
>                 }
>
> -               err = nf_conntrack_in(net, info->family,
> -                                     NF_INET_PRE_ROUTING, skb);
> +               err = nf_conntrack_in(skb, &state);
>                 if (err != NF_ACCEPT)
>                         return -ENOENT;
>
> diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
> b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
> index 7834c8c25f79..b05a5beda3cc 100644
> --- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
> +++ b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
> @@ -104,4 +104,14 @@ static inline bool rpl_nf_ct_delete(struct
> nf_conn *ct, u32 portid, int report)
>  #define nf_ct_delete rpl_nf_ct_delete
>  #endif /* HAVE_NF_CONN_TIMER */
>
> +#ifdef HAVE_NF_CONNTRACK_IN_PF
> +
> +static inline bool rpl_nf_conntrack_in(struct sk_buff *skb,
> +                                       struct nf_hook_state *state)
> +{
> +    return nf_conntrack_in(state->net, state->pf, state->hook, skb);
> +}
> +#define nf_conntrack_in rpl_nf_conntrack_in
> +#endif /* HAVE_NF_CONNTRACK_IN_PF */
> +
>  #endif /* _NF_CONNTRACK_CORE_WRAPPER_H */
>
>
> > @@ -1307,9 +1351,17 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
> >  {
> >         int nh_ofs;
> >         int err;
> > +       /* From kernel 4.19.0+, Function handle_fragments may shrink skb's
> > +        * headroom, which will result in loss of ethernet header data.
> > +        * This buf is used to backup the header data before calling
> > +        * handle_fragments. */
> > +       char buf[32];
> >
> >         /* The conntrack module expects to be working at L3. */
> >         nh_ofs = skb_network_offset(skb);
> > +       if (nh_ofs > sizeof(buf))
> > +               return -EINVAL;
> > +       memcpy(buf, skb->data, nh_ofs);
> >         skb_pull_rcsum(skb, nh_ofs);
> >
> >         err = ovs_skb_network_trim(skb);
> > @@ -1326,8 +1378,16 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
> >                 err = ovs_ct_commit(net, key, info, skb);
> >         else
> >                 err = ovs_ct_lookup(net, key, info, skb);
> > +       if (err)
> > +               return err;
> >
> > +       if (skb_headroom(skb) < nh_ofs) {
> > +               err = pskb_expand_head(skb, nh_ofs, 0, GFP_ATOMIC);
> > +               if (err)
> > +                       return err;
> > +       }
> >         skb_push(skb, nh_ofs);
> > +       memcpy(skb->data, buf, nh_ofs);
> >         skb_postpush_rcsum(skb, skb->data, nh_ofs);
> >         if (err)
> >                 kfree_skb(skb);
> The change in ovs_ct_execute() looks like a bug fix in upstream
> kernel. According to our backport policy,
> http://docs.openvswitch.org/en/latest/internals/contributing/backporting-patches/
>  Please upstream it to net-next before bring it back to datapath.
>
>
>
> > @@ -1362,7 +1422,11 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
> >                 return -EINVAL;
> >         }
> >
> > +#ifdef HAVE_NF_CT_HELPER_EXT_ADD_TAKES_HELPER
> >         help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
> > +#else
> > +       help = nf_ct_helper_ext_add(info->ct, GFP_KERNEL);
> > +#endif
> >         if (!help) {
> >                 nf_conntrack_helper_put(helper);
> >                 return -ENOMEM;
> The change here is related to upstream patch 440534d3c56b ("netfilter:
> Remove useless param helper of nf_ct_helper_ext_add"). Can you try to
> hide the #if #else #endif logic in the compat layer as the example in
> __ovs_ct_lookup().
>
>
> > @@ -1387,6 +1451,20 @@ static int parse_nat(const struct nlattr *attr,
> >         bool have_proto_max = false;
> >         bool ip_vers = (info->family == NFPROTO_IPV6);
> >
> > +#ifndef CONFIG_NF_NAT_IPV4
> > +       if (info->family == NFPROTO_IPV4) {
> > +               OVS_NLERR(log, "Flow action ct(nat) not supported without nf_nat_ipv4");
> > +               return -ENOTSUPP;
> > +       }
> > +#endif
> > +
> > +#ifndef CONFIG_NF_NAT_IPV6
> > +       if (info->family == NFPROTO_IPV6) {
> > +               OVS_NLERR(log, "Flow action ct(nat) not supported without nf_nat_ipv6");
> > +               return -ENOTSUPP;
> > +        }
> > +#endif
> > +
> >         nla_for_each_nested(a, attr, rem) {
> >                 static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] = {
> >                         [OVS_NAT_ATTR_SRC] = {0, 0},
> Is this something that would happen in the upstream kernel? If this is
> the case, we should upstream that before backport it to datapath.
>
>
> I did not review the following compat code since they may need to
> change accroding with different backport approach.
>
> > diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
> > index 7834c8c25f79..7fca7dc551c8 100644
>
> Thanks,
>
> -Yi-Hung

Patch
diff mbox series

diff --git a/.travis.yml b/.travis.yml
index 32d5f1918495..8578d1497f6c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -28,22 +28,24 @@  before_script: export PATH=$PATH:$HOME/bin
 
 env:
   - OPTS="--disable-ssl"
-  - TESTSUITE=1 KERNEL=3.16.54
+  - TESTSUITE=1 KERNEL=3.16.65
   - TESTSUITE=1 OPTS="--enable-shared"
   - BUILD_ENV="-m32" OPTS="--disable-ssl"
-  - KERNEL=3.16.54 DPDK=1 OPTS="--enable-shared"
-  - KERNEL=3.16.54 TESTSUITE=1 DPDK=1
-  - KERNEL=3.16.54 DPDK_SHARED=1
-  - KERNEL=3.16.54 DPDK_SHARED=1 OPTS="--enable-shared"
+  - KERNEL=3.16.65 DPDK=1 OPTS="--enable-shared"
+  - KERNEL=3.16.65 TESTSUITE=1 DPDK=1
+  - KERNEL=3.16.65 DPDK_SHARED=1
+  - KERNEL=3.16.65 DPDK_SHARED=1 OPTS="--enable-shared"
+  - KERNEL=4.20.17
+  - KERNEL=4.19.34
   - KERNEL=4.18.20
   - KERNEL=4.17.19
   - KERNEL=4.16.18
   - KERNEL=4.15.18
-  - KERNEL=4.14.63
-  - KERNEL=4.9.149
-  - KERNEL=4.4.148
+  - KERNEL=4.14.111
+  - KERNEL=4.9.168
+  - KERNEL=4.4.178
   - KERNEL=3.19.8
-  - KERNEL=3.16.57
+  - KERNEL=3.16.65
   - TESTSUITE=1 LIBS=-ljemalloc
 
 matrix:
diff --git a/NEWS b/NEWS
index 1e4744dbd244..af5b5222f78f 100644
--- a/NEWS
+++ b/NEWS
@@ -25,6 +25,8 @@  Post-v2.11.0
    - OVN:
      * Select IPAM mac_prefix in a random manner if not provided by the user
    - New QoS type "linux-netem" on Linux.
+   - Linux datapath:
+     * Support for the kernel versions 4.19.x, 4.20.x.
 
 v2.11.0 - 19 Feb 2019
 ---------------------
diff --git a/acinclude.m4 b/acinclude.m4
index cfc8bcd06397..a2e1f9500955 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -151,10 +151,10 @@  AC_DEFUN([OVS_CHECK_LINUX], [
     AC_MSG_RESULT([$kversion])
 
     if test "$version" -ge 4; then
-       if test "$version" = 4 && test "$patchlevel" -le 18; then
+       if test "$version" = 4 && test "$patchlevel" -le 20; then
           : # Linux 4.x
        else
-          AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version newer than 4.18.x is not supported (please refer to the FAQ for advice)])
+          AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version newer than 4.20.x is not supported (please refer to the FAQ for advice)])
        fi
     elif test "$version" = 3 && test "$patchlevel" -ge 10; then
        : # Linux 3.x
@@ -590,6 +590,22 @@  AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
                   [OVS_DEFINE([HAVE_VOID_INET_FRAGS_INIT])])
   OVS_GREP_IFELSE([$KSRC/include/net/inetpeer.h], [vif],
                   [OVS_DEFINE([HAVE_INETPEER_VIF_SUPPORT])])
+  OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_helper.h],
+                        [nf_ct_helper_ext_add], [nf_conntrack_helper],
+                        [OVS_DEFINE([HAVE_NF_CT_HELPER_EXT_ADD_TAKES_HELPER])])
+  OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_core.h],
+                        [nf_ct_invert_tuple], [l3proto],
+                        [OVS_DEFINE([HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO])])
+  OVS_GREP_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_core.h], [nf_ct_get_tuple],
+                  [OVS_DEFINE([HAVE_NF_CT_GET_TUPLE])])
+  OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_core.h],
+                        [nf_conntrack_in], [net],
+                        [OVS_DEFINE([HAVE_NF_CONNTRACK_IN_TAKES_NET])])
+  OVS_GREP_IFELSE([$KSRC/include/net/ipv6_frag.h], [IP6_DEFRAG_CONNTRACK_IN],
+                  [OVS_DEFINE([HAVE_IPV6_FRAG_H])])
+  OVS_FIND_PARAM_IFELSE([$KSRC/include/net/netfilter/nf_conntrack_l4proto.h],
+                        [__nf_ct_l4proto_find], [l3proto],
+                        [OVS_DEFINE([HAVE_NF_CT_L4PROTO_FIND_TAKES_L3PROTO])])
 
   dnl Check for dst_cache and ipv6 lable to use backported tunnel infrastructure.
   dnl OVS does not really need ipv6 label field, but its presence signifies that
diff --git a/datapath/conntrack.c b/datapath/conntrack.c
index 52208bad3029..ce36a8ddea50 100644
--- a/datapath/conntrack.c
+++ b/datapath/conntrack.c
@@ -38,6 +38,10 @@ 
 #include <net/netfilter/nf_nat_l3proto.h>
 #endif
 
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && defined(HAVE_IPV6_FRAG_H)
+#include <net/ipv6_frag.h>
+#endif
+
 #include "datapath.h"
 #include "conntrack.h"
 #include "flow.h"
@@ -645,32 +649,62 @@  static struct nf_conn *
 ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
 		     u8 l3num, struct sk_buff *skb, bool natted)
 {
-	const struct nf_conntrack_l3proto *l3proto;
 	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
-	unsigned int dataoff;
 	u8 protonum;
 
+#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
+	const struct nf_conntrack_l3proto *l3proto;
+	unsigned int dataoff;
+
 	l3proto = __nf_ct_l3proto_find(l3num);
 	if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
 				 &protonum) <= 0) {
 		pr_debug("ovs_ct_find_existing: Can't get protonum\n");
 		return NULL;
 	}
+#else
+	int protooff;
+
+	protooff = get_l4proto(skb, skb_network_offset(skb),
+			       l3num, &protonum);
+	if (protooff <= 0) {
+		pr_warn("ovs_ct_find_existing: Can't get protonum\n");
+		return NULL;
+	}
+#endif
+
+#ifdef HAVE_NF_CT_L4PROTO_FIND_TAKES_L3PROTO
 	l4proto = __nf_ct_l4proto_find(l3num, protonum);
-	if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
-			     protonum, net, &tuple, l3proto, l4proto)) {
+#else
+	l4proto = __nf_ct_l4proto_find(protonum);
+#endif
+
+#ifdef HAVE_NF_CT_GET_TUPLE
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+				       l3num, net, &tuple)) {
+		pr_debug("ovs_ct_find_existing: Can't get tuple\n");
+		return NULL;
+	}
+#else
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+				       l3num, net, &tuple)) {
 		pr_debug("ovs_ct_find_existing: Can't get tuple\n");
 		return NULL;
 	}
+#endif
 
 	/* Must invert the tuple if skb has been transformed by NAT. */
 	if (natted) {
 		struct nf_conntrack_tuple inverse;
 
+#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
 		if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, l4proto)) {
+#else
+		if (!nf_ct_invert_tuple(&inverse, &tuple, l4proto)) {
+#endif
 			pr_debug("ovs_ct_find_existing: Inversion failed!\n");
 			return NULL;
 		}
@@ -989,6 +1023,9 @@  static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 	if (!cached) {
 		struct nf_conn *tmpl = info->ct;
 		int err;
+#ifndef HAVE_NF_CONNTRACK_IN_TAKES_NET
+		struct nf_hook_state state = {};
+#endif
 
 		/* Associate skb with specified zone. */
 		if (tmpl) {
@@ -998,8 +1035,15 @@  static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 			nf_ct_set(skb, tmpl, IP_CT_NEW);
 		}
 
+#ifdef HAVE_NF_CONNTRACK_IN_TAKES_NET
 		err = nf_conntrack_in(net, info->family,
 				      NF_INET_PRE_ROUTING, skb);
+#else
+		state.hook = NF_INET_PRE_ROUTING,
+		state.pf = info->family,
+		state.net = net,
+		err = nf_conntrack_in(skb, &state);
+#endif
 		if (err != NF_ACCEPT)
 			return -ENOENT;
 
@@ -1307,9 +1351,17 @@  int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 {
 	int nh_ofs;
 	int err;
+	/* From kernel 4.19.0+, Function handle_fragments may shrink skb's
+	 * headroom, which will result in loss of ethernet header data.
+	 * This buf is used to backup the header data before calling
+	 * handle_fragments. */
+	char buf[32];
 
 	/* The conntrack module expects to be working at L3. */
 	nh_ofs = skb_network_offset(skb);
+	if (nh_ofs > sizeof(buf))
+		return -EINVAL;
+	memcpy(buf, skb->data, nh_ofs);
 	skb_pull_rcsum(skb, nh_ofs);
 
 	err = ovs_skb_network_trim(skb);
@@ -1326,8 +1378,16 @@  int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 		err = ovs_ct_commit(net, key, info, skb);
 	else
 		err = ovs_ct_lookup(net, key, info, skb);
+	if (err)
+		return err;
 
+	if (skb_headroom(skb) < nh_ofs) {
+		err = pskb_expand_head(skb, nh_ofs, 0, GFP_ATOMIC);
+		if (err)
+			return err;
+	}
 	skb_push(skb, nh_ofs);
+	memcpy(skb->data, buf, nh_ofs);
 	skb_postpush_rcsum(skb, skb->data, nh_ofs);
 	if (err)
 		kfree_skb(skb);
@@ -1362,7 +1422,11 @@  static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
 		return -EINVAL;
 	}
 
+#ifdef HAVE_NF_CT_HELPER_EXT_ADD_TAKES_HELPER
 	help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
+#else
+	help = nf_ct_helper_ext_add(info->ct, GFP_KERNEL);
+#endif
 	if (!help) {
 		nf_conntrack_helper_put(helper);
 		return -ENOMEM;
@@ -1387,6 +1451,20 @@  static int parse_nat(const struct nlattr *attr,
 	bool have_proto_max = false;
 	bool ip_vers = (info->family == NFPROTO_IPV6);
 
+#ifndef CONFIG_NF_NAT_IPV4
+	if (info->family == NFPROTO_IPV4) {
+		OVS_NLERR(log, "Flow action ct(nat) not supported without nf_nat_ipv4");
+		return -ENOTSUPP;
+	}
+#endif
+
+#ifndef CONFIG_NF_NAT_IPV6
+	if (info->family == NFPROTO_IPV6) {
+		OVS_NLERR(log, "Flow action ct(nat) not supported without nf_nat_ipv6");
+		return -ENOTSUPP;
+        }
+#endif
+
 	nla_for_each_nested(a, attr, rem) {
 		static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] = {
 			[OVS_NAT_ATTR_SRC] = {0, 0},
diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
index 7834c8c25f79..7fca7dc551c8 100644
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
+++ b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
@@ -104,4 +104,10 @@  static inline bool rpl_nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
 #define nf_ct_delete rpl_nf_ct_delete
 #endif /* HAVE_NF_CONN_TIMER */
 
+#ifndef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
+int rpl_get_l4proto(const struct sk_buff *skb,
+		    unsigned int nhoff, u8 pf, u8 *l4num);
+#define get_l4proto rpl_get_l4proto
+#endif
+
 #endif /* _NF_CONNTRACK_CORE_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h
index fd536f3e1854..a26eb9f87971 100644
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h
+++ b/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h
@@ -4,6 +4,8 @@ 
 #include <linux/list.h>
 
 #ifdef HAVE_UPSTREAM_NF_CONNCOUNT
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include_next <net/netfilter/nf_conntrack_count.h>
 
 static inline int rpl_nf_conncount_modinit(void)
diff --git a/datapath/linux/compat/nf_conncount.c b/datapath/linux/compat/nf_conncount.c
index 0bee96274b00..eeae440f872d 100644
--- a/datapath/linux/compat/nf_conncount.c
+++ b/datapath/linux/compat/nf_conncount.c
@@ -13,6 +13,8 @@ 
  *		only ignore TIME_WAIT or gone connections
  *   (C) CC Computer Consultants GmbH, 2007
  */
+#ifndef HAVE_UPSTREAM_NF_CONNCOUNT
+
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/in.h>
 #include <linux/in6.h>
@@ -138,7 +140,7 @@  static bool conn_free(struct nf_conncount_list *list,
 
 	if (list->count == 0) {
 		spin_unlock(&list->list_lock);
-                return free_entry;
+		return free_entry;
 	}
 
 	list->count--;
@@ -635,3 +637,5 @@  void rpl_nf_conncount_modexit(void)
 	kmem_cache_destroy(conncount_conn_cachep);
 	kmem_cache_destroy(conncount_rb_cachep);
 }
+
+#endif /* HAVE_UPSTREAM_NF_CONNCOUNT */
diff --git a/datapath/linux/compat/nf_conntrack_core.c b/datapath/linux/compat/nf_conntrack_core.c
index a7d3d4331e4a..ecfeae1a037e 100644
--- a/datapath/linux/compat/nf_conntrack_core.c
+++ b/datapath/linux/compat/nf_conntrack_core.c
@@ -1,4 +1,7 @@ 
+#include <linux/types.h>
 #include <linux/version.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
 
 #ifndef HAVE_NF_CT_ZONE_INIT
 
@@ -11,3 +14,80 @@  const struct nf_conntrack_zone nf_ct_zone_dflt = {
 };
 
 #endif /* HAVE_NF_CT_ZONE_INIT */
+
+#ifndef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
+static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+			    u_int8_t *protonum)
+{
+	int dataoff = -1;
+	const struct iphdr *iph;
+	struct iphdr _iph;
+
+	iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+	if (!iph)
+		return -1;
+
+	/* Conntrack defragments packets, we might still see fragments
+	 * inside ICMP packets though.
+	 */
+	if (iph->frag_off & htons(IP_OFFSET))
+		return -1;
+
+	dataoff = nhoff + (iph->ihl << 2);
+	*protonum = iph->protocol;
+
+	/* Check bogus IP headers */
+	if (dataoff > skb->len) {
+		pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n",
+			 nhoff, iph->ihl << 2, skb->len);
+		return -1;
+	}
+	return dataoff;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+			    u8 *protonum)
+{
+	int protoff = -1;
+	unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
+	__be16 frag_off;
+	u8 nexthdr;
+
+	if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
+			  &nexthdr, sizeof(nexthdr)) != 0) {
+		pr_debug("can't get nexthdr\n");
+		return -1;
+	}
+	protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
+	/*
+	 * (protoff == skb->len) means the packet has not data, just
+	 * IPv6 and possibly extensions headers, but it is tracked anyway
+	 */
+	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+		pr_debug("can't find proto in pkt\n");
+		return -1;
+	}
+
+	*protonum = nexthdr;
+	return protoff;
+}
+#endif
+
+int rpl_get_l4proto(const struct sk_buff *skb,
+		    unsigned int nhoff, u8 pf, u8 *l4num)
+{
+	switch (pf) {
+	case NFPROTO_IPV4:
+		return ipv4_get_l4proto(skb, nhoff, l4num);
+#if IS_ENABLED(CONFIG_IPV6)
+	case NFPROTO_IPV6:
+		return ipv6_get_l4proto(skb, nhoff, l4num);
+#endif
+	default:
+		*l4num = 0;
+		break;
+	}
+	return -1;
+}
+#endif /* HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO */
diff --git a/datapath/linux/compat/nf_conntrack_proto.c b/datapath/linux/compat/nf_conntrack_proto.c
index 4ac66f61c70d..89c2f542247b 100644
--- a/datapath/linux/compat/nf_conntrack_proto.c
+++ b/datapath/linux/compat/nf_conntrack_proto.c
@@ -1,7 +1,10 @@ 
 #include <linux/types.h>
 
 #include <net/netfilter/nf_conntrack.h>
+
+#ifdef HAVE_NF_CT_INVERT_TUPLE_TAKES_L3PROTO
 #include <net/netfilter/nf_conntrack_l3proto.h>
+#endif
 
 /*
  * Upstream net-next commmit 7e35ec0e8044