diff mbox series

[ovs-dev,v5,9/9] ofproto-dpif-xlate: Translate timeout policy in ct action

Message ID 1567030469-120137-10-git-send-email-yihung.wei@gmail.com
State Accepted
Commit 187bb41fbf447acf9fb6ac117dc923bbe649e78c
Headers show
Series Support zone-based conntrack timeout policy | expand

Commit Message

Yi-Hung Wei Aug. 28, 2019, 10:14 p.m. UTC
This patch derives the timeout policy based on ct zone from the
internal data structure that we maintain on dpif layer.

It also adds a system traffic test to verify the zone-based conntrack
timeout feature.  The test uses ovs-vsctl commands to configure
the customized ICMP and UDP timeout on zone 5 to a shorter period.
It then injects ICMP and UDP traffic to conntrack, and checks if the
corresponding conntrack entry expires after the predefined timeout.

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>

ofproto-dpif: Checks if datapath supports OVS_CT_ATTR_TIMEOUT

This patch checks whether datapath supports OVS_CT_ATTR_TIMEOUT. With this
check, ofproto-dpif-xlate can use this information to decide whether to
translate the ct timeout policy.

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
---
 NEWS                             |  1 +
 lib/ct-dpif.c                    | 11 +++++
 lib/ct-dpif.h                    |  3 ++
 lib/dpif-netdev.c                |  1 +
 lib/dpif-netlink.c               | 12 +++++
 lib/dpif-provider.h              | 10 +++++
 ofproto/ofproto-dpif-xlate.c     | 23 ++++++++++
 ofproto/ofproto-dpif.c           | 96 ++++++++++++++++++++++++++++++++++++++++
 ofproto/ofproto-dpif.h           | 12 ++++-
 tests/system-kmod-macros.at      | 20 +++++++++
 tests/system-traffic.at          | 66 +++++++++++++++++++++++++++
 tests/system-userspace-macros.at | 19 ++++++++
 12 files changed, 272 insertions(+), 2 deletions(-)

Comments

Justin Pettit Sept. 25, 2019, 10:59 p.m. UTC | #1
> On Aug 28, 2019, at 3:14 PM, Yi-Hung Wei <yihung.wei@gmail.com> wrote:
> 
> diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
> index e988626ea05b..4a599c8eb866 100644
> --- a/lib/dpif-provider.h
> +++ b/lib/dpif-provider.h
> @@ -542,6 +542,16 @@ struct dpif_class {
>                                        struct ct_dpif_timeout_policy *tp);
>     int (*ct_timeout_policy_dump_done)(struct dpif *, void *state);
> 
> +    /* Gets timeout policy based on 'tp_id', 'dl_type' and 'nw_proto'.
> +     * On success, returns 0, stores the timeout policy name in 'tp_name',
> +     * and sets 'is_generic'. 'is_generic' is false if the returned timeout
> +     * policy in the 'dpif' is specific to 'dl_type' and 'nw_proto' in the
> +     * datapath, i.e. in kernel datapath.  Sets 'is_generic' to true, if
> +     * the timeout policy supports all OVS supported L3/L4 protocols. */
> +    int (*ct_get_timeout_policy_name)(struct dpif *, uint32_t tp_id,
> +                                      uint16_t dl_type, uint8_t nw_proto,
> +                                      struct ds *tp_name, bool *is_generic);

I don't have a great justification, but we've generally used "char **" instead of "struct ds" in these interfaces.  Let me know what you think of the attached incremental.  It ended up being a more invasive change than I expected, but I think it will be more consistent in our API.

> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index 17800f3c8a3f..c8e508bca2c8 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -5983,6 +5983,25 @@ put_ct_helper(struct xlate_ctx *ctx,
> }
> 
> static void
> +put_ct_timeout(struct ofpbuf *odp_actions, const struct dpif_backer *backer,
> +               const struct flow *flow, struct flow_wildcards *wc,
> +               uint16_t zone_id)
> +{
> +    struct ds tp_name = DS_EMPTY_INITIALIZER;
> +    bool unwildcard;
> +
> +    if (ofproto_dpif_ct_zone_timeout_policy_get_name(backer, zone_id,
> +            ntohs(flow->dl_type), flow->nw_proto, &tp_name, &unwildcard)) {
> +        nl_msg_put_string(odp_actions, OVS_CT_ATTR_TIMEOUT, ds_cstr(&tp_name));
> +
> +        if (unwildcard) {
> +                memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);

I think it's worth mentioning again why we're unwildcarding "nw_proto" and why it wasn't necessary to also unwildcard "dl_type".  How about something like the following:

            /* The underlying datapath requires separate timeout
             * policies for different Ethertypes and IP protocols.  We
             * don't need to unwildcard 'wc->masks.dl_type' since that
             * field is always unwildcarded in megaflows. */
            memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);

Sorry for the long delay in finishing this review.  Assuming you're happy with the changes, I'll merge the series into master.

Thanks,

--Justin


-=-=-=-=-=-=-=-=-=-=-

diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c
index 2181c83157ad..23db72dbbbb1 100644
--- a/lib/ct-dpif.c
+++ b/lib/ct-dpif.c
@@ -866,7 +866,7 @@ ct_dpif_timeout_policy_dump_done(struct dpif *dpif, void *state)
 int
 ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id,
                                 uint16_t dl_type, uint8_t nw_proto,
-                                struct ds *tp_name, bool *is_generic)
+                                char **tp_name, bool *is_generic)
 {
     return (dpif->dpif_class->ct_get_timeout_policy_name
             ? dpif->dpif_class->ct_get_timeout_policy_name(
diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h
index b95e6ac762ab..bbcb537ba46d 100644
--- a/lib/ct-dpif.h
+++ b/lib/ct-dpif.h
@@ -320,6 +320,6 @@ int ct_dpif_timeout_policy_dump_next(struct dpif *dpif, void *state,
 int ct_dpif_timeout_policy_dump_done(struct dpif *dpif, void *state);
 int ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id,
                                     uint16_t dl_type, uint8_t nw_proto,
-                                    struct ds *tp_name, bool *is_generic);
+                                    char **tp_name, bool *is_generic);
 
 #endif /* CT_DPIF_H */
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 8f1c6d1ffde7..71d9cdbabc5b 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -3056,28 +3056,32 @@ static struct dpif_netlink_timeout_policy_protocol tp_protos[] = {
 
 static void
 dpif_netlink_format_tp_name(uint32_t id, uint16_t l3num, uint8_t l4num,
-                            struct ds *tp_name)
+                            char **tp_name)
 {
-    ds_clear(tp_name);
-    ds_put_format(tp_name, "%s%"PRIu32"_", NL_TP_NAME_PREFIX, id);
-    ct_dpif_format_ipproto(tp_name, l4num);
+    struct ds ds = DS_EMPTY_INITIALIZER;
+    ds_put_format(&ds, "%s%"PRIu32"_", NL_TP_NAME_PREFIX, id);
+    ct_dpif_format_ipproto(&ds, l4num);
 
     if (l3num == AF_INET) {
-        ds_put_cstr(tp_name, "4");
+        ds_put_cstr(&ds, "4");
     } else if (l3num == AF_INET6 && l4num != IPPROTO_ICMPV6) {
-        ds_put_cstr(tp_name, "6");
+        ds_put_cstr(&ds, "6");
     }
 
-    ovs_assert(tp_name->length < CTNL_TIMEOUT_NAME_MAX);
+    ovs_assert(ds.length < CTNL_TIMEOUT_NAME_MAX);
+
+    *tp_name = ds_steal_cstr(&ds);
 }
 
 static int
 dpif_netlink_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED,
-    uint32_t tp_id, uint16_t dl_type, uint8_t nw_proto, struct ds *tp_name,
-    bool *is_generic)
+                                        uint32_t tp_id, uint16_t dl_type,
+                                        uint8_t nw_proto, char **tp_name,
+                                        bool *is_generic)
 {
     dpif_netlink_format_tp_name(tp_id,
-        dl_type == ETH_TYPE_IP ? AF_INET : AF_INET6, nw_proto, tp_name);
+                                dl_type == ETH_TYPE_IP ? AF_INET : AF_INET6,
+                                nw_proto, tp_name);
     *is_generic = false;
     return 0;
 }
@@ -3275,14 +3279,17 @@ static int
 dpif_netlink_ct_set_timeout_policy(struct dpif *dpif OVS_UNUSED,
                                    const struct ct_dpif_timeout_policy *tp)
 {
-    struct nl_ct_timeout_policy nl_tp;
-    struct ds nl_tp_name = DS_EMPTY_INITIALIZER;
     int err = 0;
 
     for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
+        struct nl_ct_timeout_policy nl_tp;
+        char *nl_tp_name;
+
         dpif_netlink_format_tp_name(tp->id, tp_protos[i].l3num,
                                     tp_protos[i].l4num, &nl_tp_name);
-        ovs_strlcpy(nl_tp.name, ds_cstr(&nl_tp_name), sizeof nl_tp.name);
+        ovs_strlcpy(nl_tp.name, nl_tp_name, sizeof nl_tp.name);
+        free(nl_tp_name);
+
         nl_tp.l3num = tp_protos[i].l3num;
         nl_tp.l4num = tp_protos[i].l4num;
         dpif_netlink_get_nl_tp_attrs(tp, tp_protos[i].l4num, &nl_tp);
@@ -3295,7 +3302,6 @@ dpif_netlink_ct_set_timeout_policy(struct dpif *dpif OVS_UNUSED,
     }
 
 out:
-    ds_destroy(&nl_tp_name);
     return err;
 }
 
@@ -3304,27 +3310,29 @@ dpif_netlink_ct_get_timeout_policy(struct dpif *dpif OVS_UNUSED,
                                    uint32_t tp_id,
                                    struct ct_dpif_timeout_policy *tp)
 {
-    struct nl_ct_timeout_policy nl_tp;
-    struct ds nl_tp_name = DS_EMPTY_INITIALIZER;
     int err = 0;
 
     tp->id = tp_id;
     tp->present = 0;
     for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
+        struct nl_ct_timeout_policy nl_tp;
+        char *nl_tp_name;
+
         dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num,
                                     tp_protos[i].l4num, &nl_tp_name);
-        err = nl_ct_get_timeout_policy(ds_cstr(&nl_tp_name), &nl_tp);
+        err = nl_ct_get_timeout_policy(nl_tp_name, &nl_tp);
 
         if (err) {
             VLOG_WARN_RL(&error_rl, "failed to get timeout policy %s (%s)",
-                         nl_tp.name, ovs_strerror(err));
+                         nl_tp_name, ovs_strerror(err));
+            free(nl_tp_name);
             goto out;
         }
+        free(nl_tp_name);
         dpif_netlink_set_ct_dpif_tp_attrs(&nl_tp, tp);
     }
 
 out:
-    ds_destroy(&nl_tp_name);
     return err;
 }
 
@@ -3334,25 +3342,25 @@ static int
 dpif_netlink_ct_del_timeout_policy(struct dpif *dpif OVS_UNUSED,
                                    uint32_t tp_id)
 {
-    struct ds nl_tp_name = DS_EMPTY_INITIALIZER;
     int ret = 0;
 
     for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
+        char *nl_tp_name;
         dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num,
                                     tp_protos[i].l4num, &nl_tp_name);
-        int err = nl_ct_del_timeout_policy(ds_cstr(&nl_tp_name));
+        int err = nl_ct_del_timeout_policy(nl_tp_name);
         if (err == ENOENT) {
             err = 0;
         }
         if (err) {
             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(6, 6);
             VLOG_INFO_RL(&rl, "failed to delete timeout policy %s (%s)",
-                         ds_cstr(&nl_tp_name), ovs_strerror(err));
+                         nl_tp_name, ovs_strerror(err));
             ret = 1;
         }
+        free(nl_tp_name);
     }
 
-    ds_destroy(&nl_tp_name);
     return ret;
 }
 
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index 4a599c8eb866..61feb94e8fca 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -546,11 +546,14 @@ struct dpif_class {
      * On success, returns 0, stores the timeout policy name in 'tp_name',
      * and sets 'is_generic'. 'is_generic' is false if the returned timeout
      * policy in the 'dpif' is specific to 'dl_type' and 'nw_proto' in the
-     * datapath, i.e. in kernel datapath.  Sets 'is_generic' to true, if
-     * the timeout policy supports all OVS supported L3/L4 protocols. */
+     * datapath (e.g., the Linux kernel datapath).  Sets 'is_generic' to
+     * true, if the timeout policy supports all OVS supported L3/L4
+     * protocols.
+     *
+     * The caller is responsible for freeing 'tp_name'. */
     int (*ct_get_timeout_policy_name)(struct dpif *, uint32_t tp_id,
                                       uint16_t dl_type, uint8_t nw_proto,
-                                      struct ds *tp_name, bool *is_generic);
+                                      char **tp_name, bool *is_generic);
 
     /* IP Fragmentation. */
 
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index c8e508bca2c8..a9d5aa754ebc 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -5987,18 +5987,22 @@ put_ct_timeout(struct ofpbuf *odp_actions, const struct dpif_backer *backer,
                const struct flow *flow, struct flow_wildcards *wc,
                uint16_t zone_id)
 {
-    struct ds tp_name = DS_EMPTY_INITIALIZER;
     bool unwildcard;
+    char *tp_name = NULL;
 
     if (ofproto_dpif_ct_zone_timeout_policy_get_name(backer, zone_id,
             ntohs(flow->dl_type), flow->nw_proto, &tp_name, &unwildcard)) {
-        nl_msg_put_string(odp_actions, OVS_CT_ATTR_TIMEOUT, ds_cstr(&tp_name));
+        nl_msg_put_string(odp_actions, OVS_CT_ATTR_TIMEOUT, tp_name);
 
         if (unwildcard) {
-                memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
+            /* The underlying datapath requires separate timeout
+             * policies for different Ethertypes and IP protocols.  We
+             * don't need to unwildcard 'wc->masks.dl_type' since that
+             * field is always unwildcarded in megaflows. */
+            memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
         }
     }
-    ds_destroy(&tp_name);
+    free(tp_name);
 }
 
 static void
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 53c3515d15db..496a16c8a4af 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -5441,19 +5441,19 @@ ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone_id)
 }
 
 /* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' and
- * 'nw_proto'.  Returns true if the zoned-based timeout policy is configured.
+ * 'nw_proto'.  Returns true if the zone-based timeout policy is configured.
  * On success, stores the timeout policy name in 'tp_name', and sets
  * 'unwildcard' based on the dpif implementation.  If 'unwildcard' is true,
  * the returned timeout policy is 'dl_type' and 'nw_proto' specific, and OVS
  * needs to unwildcard the datapath flow for this timeout policy in flow
- * translation. */
+ * translation.
+ *
+ * The caller is responsible for freeing 'tp_name'. */
 bool
 ofproto_dpif_ct_zone_timeout_policy_get_name(
     const struct dpif_backer *backer, uint16_t zone, uint16_t dl_type,
-    uint8_t nw_proto, struct ds *tp_name, bool *unwildcard)
+    uint8_t nw_proto, char **tp_name, bool *unwildcard)
 {
-    bool is_generic_tp;
-
     if (!ct_dpif_timeout_policy_support_ipproto(nw_proto)) {
         return false;
     }
@@ -5463,14 +5463,15 @@ ofproto_dpif_ct_zone_timeout_policy_get_name(
         return false;
     }
 
+    bool is_generic;
     if (ct_dpif_get_timeout_policy_name(backer->dpif,
-                                         ct_zone->ct_tp->tp_id, dl_type,
-                                         nw_proto, tp_name, &is_generic_tp)) {
+                                        ct_zone->ct_tp->tp_id, dl_type,
+                                        nw_proto, tp_name, &is_generic)) {
         return false;
     }
 
     /* Unwildcard datapath flow if it is not a generic timeout policy. */
-    *unwildcard = !is_generic_tp;
+    *unwildcard = !is_generic;
     return true;
 }
 
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index 7703be9899e6..cd453636c920 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -380,6 +380,6 @@ bool ovs_native_tunneling_is_on(struct ofproto_dpif *);
 
 bool ofproto_dpif_ct_zone_timeout_policy_get_name(
     const struct dpif_backer *backer, uint16_t zone, uint16_t dl_type,
-    uint8_t nw_proto, struct ds *tp_name, bool *unwildcard);
+    uint8_t nw_proto, char **tp_name, bool *unwildcard);
 
 #endif /* ofproto-dpif.h */
Yi-Hung Wei Sept. 25, 2019, 11:34 p.m. UTC | #2
On Wed, Sep 25, 2019 at 3:59 PM Justin Pettit <jpettit@ovn.org> wrote:
>
>
> > On Aug 28, 2019, at 3:14 PM, Yi-Hung Wei <yihung.wei@gmail.com> wrote:
> >
> > diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
> > index e988626ea05b..4a599c8eb866 100644
> > --- a/lib/dpif-provider.h
> > +++ b/lib/dpif-provider.h
> > @@ -542,6 +542,16 @@ struct dpif_class {
> >                                        struct ct_dpif_timeout_policy *tp);
> >     int (*ct_timeout_policy_dump_done)(struct dpif *, void *state);
> >
> > +    /* Gets timeout policy based on 'tp_id', 'dl_type' and 'nw_proto'.
> > +     * On success, returns 0, stores the timeout policy name in 'tp_name',
> > +     * and sets 'is_generic'. 'is_generic' is false if the returned timeout
> > +     * policy in the 'dpif' is specific to 'dl_type' and 'nw_proto' in the
> > +     * datapath, i.e. in kernel datapath.  Sets 'is_generic' to true, if
> > +     * the timeout policy supports all OVS supported L3/L4 protocols. */
> > +    int (*ct_get_timeout_policy_name)(struct dpif *, uint32_t tp_id,
> > +                                      uint16_t dl_type, uint8_t nw_proto,
> > +                                      struct ds *tp_name, bool *is_generic);
>
> I don't have a great justification, but we've generally used "char **" instead of "struct ds" in these interfaces.  Let me know what you think of the attached incremental.  It ended up being a more invasive change than I expected, but I think it will be more consistent in our API.
>
> > diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> > index 17800f3c8a3f..c8e508bca2c8 100644
> > --- a/ofproto/ofproto-dpif-xlate.c
> > +++ b/ofproto/ofproto-dpif-xlate.c
> > @@ -5983,6 +5983,25 @@ put_ct_helper(struct xlate_ctx *ctx,
> > }
> >
> > static void
> > +put_ct_timeout(struct ofpbuf *odp_actions, const struct dpif_backer *backer,
> > +               const struct flow *flow, struct flow_wildcards *wc,
> > +               uint16_t zone_id)
> > +{
> > +    struct ds tp_name = DS_EMPTY_INITIALIZER;
> > +    bool unwildcard;
> > +
> > +    if (ofproto_dpif_ct_zone_timeout_policy_get_name(backer, zone_id,
> > +            ntohs(flow->dl_type), flow->nw_proto, &tp_name, &unwildcard)) {
> > +        nl_msg_put_string(odp_actions, OVS_CT_ATTR_TIMEOUT, ds_cstr(&tp_name));
> > +
> > +        if (unwildcard) {
> > +                memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
>
> I think it's worth mentioning again why we're unwildcarding "nw_proto" and why it wasn't necessary to also unwildcard "dl_type".  How about something like the following:
>
>             /* The underlying datapath requires separate timeout
>              * policies for different Ethertypes and IP protocols.  We
>              * don't need to unwildcard 'wc->masks.dl_type' since that
>              * field is always unwildcarded in megaflows. */
>             memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
>
> Sorry for the long delay in finishing this review.  Assuming you're happy with the changes, I'll merge the series into master.
>
> Thanks,
>
> --Justin

Thanks Justin for the review.  The proposed diff looks good to me.

Thanks,

-Yi-Hung
Justin Pettit Sept. 26, 2019, 9:11 p.m. UTC | #3
> On Sep 25, 2019, at 4:34 PM, Yi-Hung Wei <yihung.wei@gmail.com> wrote:
> 
> Thanks Justin for the review.  The proposed diff looks good to me.

Thanks.  I pushed the series to master.

--Justin
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index c5caa13d6374..9f7fbb852e08 100644
--- a/NEWS
+++ b/NEWS
@@ -69,6 +69,7 @@  v2.12.0 - xx xxx xxxx
    - Linux datapath:
      * Support for the kernel versions 4.19.x and 4.20.x.
      * Support for the kernel version 5.0.x.
+     * Add support for conntrack zone-based timeout policy.
    - 'ovs-dpctl dump-flows' is no longer suitable for dumping offloaded flows.
      'ovs-appctl dpctl/dump-flows' should be used instead.
    - Add L2 GRE tunnel over IPv6 support.
diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c
index be59f34ff995..2181c83157ad 100644
--- a/lib/ct-dpif.c
+++ b/lib/ct-dpif.c
@@ -862,3 +862,14 @@  ct_dpif_timeout_policy_dump_done(struct dpif *dpif, void *state)
             ? dpif->dpif_class->ct_timeout_policy_dump_done(dpif, state)
             : EOPNOTSUPP);
 }
+
+int
+ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id,
+                                uint16_t dl_type, uint8_t nw_proto,
+                                struct ds *tp_name, bool *is_generic)
+{
+    return (dpif->dpif_class->ct_get_timeout_policy_name
+            ? dpif->dpif_class->ct_get_timeout_policy_name(
+                dpif, tp_id, dl_type, nw_proto, tp_name, is_generic)
+            : EOPNOTSUPP);
+}
diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h
index aabd6962f2c0..b95e6ac762ab 100644
--- a/lib/ct-dpif.h
+++ b/lib/ct-dpif.h
@@ -318,5 +318,8 @@  int ct_dpif_timeout_policy_dump_start(struct dpif *dpif, void **statep);
 int ct_dpif_timeout_policy_dump_next(struct dpif *dpif, void *state,
                                      struct ct_dpif_timeout_policy *tp);
 int ct_dpif_timeout_policy_dump_done(struct dpif *dpif, void *state);
+int ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id,
+                                    uint16_t dl_type, uint8_t nw_proto,
+                                    struct ds *tp_name, bool *is_generic);
 
 #endif /* CT_DPIF_H */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 7240a3e6f3c8..36637052e598 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -7539,6 +7539,7 @@  const struct dpif_class dpif_netdev_class = {
     NULL,                       /* ct_timeout_policy_dump_start */
     NULL,                       /* ct_timeout_policy_dump_next */
     NULL,                       /* ct_timeout_policy_dump_done */
+    NULL,                       /* ct_get_timeout_policy_name */
     dpif_netdev_ipf_set_enabled,
     dpif_netdev_ipf_set_min_frag,
     dpif_netdev_ipf_set_max_nfrags,
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 85827cd65503..8f1c6d1ffde7 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -3071,6 +3071,17 @@  dpif_netlink_format_tp_name(uint32_t id, uint16_t l3num, uint8_t l4num,
     ovs_assert(tp_name->length < CTNL_TIMEOUT_NAME_MAX);
 }
 
+static int
+dpif_netlink_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED,
+    uint32_t tp_id, uint16_t dl_type, uint8_t nw_proto, struct ds *tp_name,
+    bool *is_generic)
+{
+    dpif_netlink_format_tp_name(tp_id,
+        dl_type == ETH_TYPE_IP ? AF_INET : AF_INET6, nw_proto, tp_name);
+    *is_generic = false;
+    return 0;
+}
+
 #define CT_DPIF_NL_TP_TCP_MAPPINGS                              \
     CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT, SYN_SENT)         \
     CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_RECV, SYN_RECV)         \
@@ -3905,6 +3916,7 @@  const struct dpif_class dpif_netlink_class = {
     dpif_netlink_ct_timeout_policy_dump_start,
     dpif_netlink_ct_timeout_policy_dump_next,
     dpif_netlink_ct_timeout_policy_dump_done,
+    dpif_netlink_ct_get_timeout_policy_name,
     NULL,                       /* ipf_set_enabled */
     NULL,                       /* ipf_set_min_frag */
     NULL,                       /* ipf_set_max_nfrags */
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index e988626ea05b..4a599c8eb866 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -542,6 +542,16 @@  struct dpif_class {
                                        struct ct_dpif_timeout_policy *tp);
     int (*ct_timeout_policy_dump_done)(struct dpif *, void *state);
 
+    /* Gets timeout policy based on 'tp_id', 'dl_type' and 'nw_proto'.
+     * On success, returns 0, stores the timeout policy name in 'tp_name',
+     * and sets 'is_generic'. 'is_generic' is false if the returned timeout
+     * policy in the 'dpif' is specific to 'dl_type' and 'nw_proto' in the
+     * datapath, i.e. in kernel datapath.  Sets 'is_generic' to true, if
+     * the timeout policy supports all OVS supported L3/L4 protocols. */
+    int (*ct_get_timeout_policy_name)(struct dpif *, uint32_t tp_id,
+                                      uint16_t dl_type, uint8_t nw_proto,
+                                      struct ds *tp_name, bool *is_generic);
+
     /* IP Fragmentation. */
 
     /* Disables or enables conntrack fragment reassembly.  The default
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 17800f3c8a3f..c8e508bca2c8 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -5983,6 +5983,25 @@  put_ct_helper(struct xlate_ctx *ctx,
 }
 
 static void
+put_ct_timeout(struct ofpbuf *odp_actions, const struct dpif_backer *backer,
+               const struct flow *flow, struct flow_wildcards *wc,
+               uint16_t zone_id)
+{
+    struct ds tp_name = DS_EMPTY_INITIALIZER;
+    bool unwildcard;
+
+    if (ofproto_dpif_ct_zone_timeout_policy_get_name(backer, zone_id,
+            ntohs(flow->dl_type), flow->nw_proto, &tp_name, &unwildcard)) {
+        nl_msg_put_string(odp_actions, OVS_CT_ATTR_TIMEOUT, ds_cstr(&tp_name));
+
+        if (unwildcard) {
+                memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
+        }
+    }
+    ds_destroy(&tp_name);
+}
+
+static void
 put_ct_nat(struct xlate_ctx *ctx)
 {
     struct ofpact_nat *ofn = ctx->ct_nat_action;
@@ -6072,6 +6091,10 @@  compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc,
             nl_msg_put_u32(ctx->odp_actions, OVS_CT_ATTR_EVENTMASK,
                            OVS_CT_EVENTMASK_DEFAULT);
         }
+        if (ctx->xbridge->support.ct_timeout) {
+            put_ct_timeout(ctx->odp_actions, ctx->xbridge->ofproto->backer,
+                           &ctx->xin->flow, ctx->wc, zone);
+        }
     }
     nl_msg_put_u16(ctx->odp_actions, OVS_CT_ATTR_ZONE, zone);
     put_ct_mark(&ctx->xin->flow, ctx->odp_actions, ctx->wc);
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 4b4c4d722645..eb9dea3528c6 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -1323,6 +1323,67 @@  check_ct_clear(struct dpif_backer *backer)
     return supported;
 }
 
+/* Tests whether 'backer''s datapath supports the OVS_CT_ATTR_TIMEOUT
+ * attribute in OVS_ACTION_ATTR_CT. */
+static bool
+check_ct_timeout_policy(struct dpif_backer *backer)
+{
+    struct dpif_execute execute;
+    struct dp_packet packet;
+    struct ofpbuf actions;
+    struct flow flow = {
+        .dl_type = CONSTANT_HTONS(ETH_TYPE_IP),
+        .nw_proto = IPPROTO_UDP,
+        .nw_ttl = 64,
+        /* Use the broadcast address on the loopback address range 127/8 to
+         * avoid hitting any real conntrack entries.  We leave the UDP ports to
+         * zeroes for the same purpose. */
+        .nw_src = CONSTANT_HTONL(0x7fffffff),
+        .nw_dst = CONSTANT_HTONL(0x7fffffff),
+    };
+    size_t ct_start;
+    int error;
+
+    /* Compose CT action with timeout policy attribute and check if datapath
+     * can decode the message.  */
+    ofpbuf_init(&actions, 64);
+    ct_start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_CT);
+    /* Timeout policy has no effect without the commit flag, but currently the
+     * datapath will accept a timeout policy even without commit.  This is
+     * useful as we do not want to persist the probe connection in the
+     * conntrack table. */
+    nl_msg_put_string(&actions, OVS_CT_ATTR_TIMEOUT, "ovs_test_tp");
+    nl_msg_end_nested(&actions, ct_start);
+
+    /* Compose a dummy UDP packet. */
+    dp_packet_init(&packet, 0);
+    flow_compose(&packet, &flow, NULL, 64);
+
+    /* Execute the actions.  On older datapaths this fails with EINVAL, on
+     * newer datapaths it succeeds. */
+    execute.actions = actions.data;
+    execute.actions_len = actions.size;
+    execute.packet = &packet;
+    execute.flow = &flow;
+    execute.needs_help = false;
+    execute.probe = true;
+    execute.mtu = 0;
+
+    error = dpif_execute(backer->dpif, &execute);
+
+    dp_packet_uninit(&packet);
+    ofpbuf_uninit(&actions);
+
+    if (error) {
+        VLOG_INFO("%s: Datapath does not support timeout policy in conntrack "
+                  "action", dpif_name(backer->dpif));
+    } else {
+        VLOG_INFO("%s: Datapath supports timeout policy in conntrack action",
+                  dpif_name(backer->dpif));
+    }
+
+    return !error;
+}
 
 /* Tests whether 'backer''s datapath supports the
  * OVS_ACTION_ATTR_CHECK_PKT_LEN action. */
@@ -1473,6 +1534,7 @@  check_support(struct dpif_backer *backer)
     backer->rt_support.ct_clear = check_ct_clear(backer);
     backer->rt_support.max_hash_alg = check_max_dp_hash_alg(backer);
     backer->rt_support.check_pkt_len = check_check_pkt_len(backer);
+    backer->rt_support.ct_timeout = check_ct_timeout_policy(backer);
 
     /* Flow fields. */
     backer->rt_support.odp.ct_state = check_ct_state(backer);
@@ -5378,6 +5440,40 @@  ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone)
     }
 }
 
+/* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' and
+ * 'nw_proto'.  Returns true if the zoned-based timeout policy is configured.
+ * On success, stores the timeout policy name in 'tp_name', and sets
+ * 'unwildcard' based on the dpif implementation.  If 'unwildcard' is true,
+ * the returned timeout policy is 'dl_type' and 'nw_proto' specific, and OVS
+ * needs to unwildcard the datapath flow for this timeout policy in flow
+ * translation. */
+bool
+ofproto_dpif_ct_zone_timeout_policy_get_name(
+    const struct dpif_backer *backer, uint16_t zone, uint16_t dl_type,
+    uint8_t nw_proto, struct ds *tp_name, bool *unwildcard)
+{
+    bool is_generic_tp;
+
+    if (!ct_dpif_timeout_policy_support_ipproto(nw_proto)) {
+        return false;
+    }
+
+    struct ct_zone *ct_zone = ct_zone_lookup(&backer->ct_zones, zone);
+    if (!ct_zone) {
+        return false;
+    }
+
+    if (ct_dpif_get_timeout_policy_name(backer->dpif,
+                                         ct_zone->ct_tp->tp_id, dl_type,
+                                         nw_proto, tp_name, &is_generic_tp)) {
+        return false;
+    }
+
+    /* Unwildcard datapath flow if it is not a generic timeout policy. */
+    *unwildcard = !is_generic_tp;
+    return true;
+}
+
 static bool
 set_frag_handling(struct ofproto *ofproto_,
                   enum ofputil_frag_handling frag_handling)
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index 0dd7a45fe550..7703be9899e6 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -194,8 +194,12 @@  struct group_dpif *group_dpif_lookup(struct ofproto_dpif *,
     /* Highest supported dp_hash algorithm. */                              \
     DPIF_SUPPORT_FIELD(size_t, max_hash_alg, "Max dp_hash algorithm")       \
                                                                             \
-    /* True if the datapath supports OVS_ACTION_ATTR_CHECK_PKT_LEN. */   \
-    DPIF_SUPPORT_FIELD(bool, check_pkt_len, "Check pkt length action")
+    /* True if the datapath supports OVS_ACTION_ATTR_CHECK_PKT_LEN. */      \
+    DPIF_SUPPORT_FIELD(bool, check_pkt_len, "Check pkt length action")      \
+                                                                            \
+    /* True if the datapath supports OVS_CT_ATTR_TIMEOUT in                 \
+     * OVS_ACTION_ATTR_CT action. */                                        \
+    DPIF_SUPPORT_FIELD(bool, ct_timeout, "Conntrack timeout policy")
 
 /* Stores the various features which the corresponding backer supports. */
 struct dpif_backer_support {
@@ -374,4 +378,8 @@  int ofproto_dpif_delete_internal_flow(struct ofproto_dpif *, struct match *,
 
 bool ovs_native_tunneling_is_on(struct ofproto_dpif *);
 
+bool ofproto_dpif_ct_zone_timeout_policy_get_name(
+    const struct dpif_backer *backer, uint16_t zone, uint16_t dl_type,
+    uint8_t nw_proto, struct ds *tp_name, bool *unwildcard);
+
 #endif /* ofproto-dpif.h */
diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at
index 48e94642b948..75b59ae66ba7 100644
--- a/tests/system-kmod-macros.at
+++ b/tests/system-kmod-macros.at
@@ -100,6 +100,17 @@  m4_define([CHECK_CONNTRACK_FRAG_OVERLAP],
 #
 m4_define([CHECK_CONNTRACK_NAT])
 
+# CHECK_CONNTRACK_TIMEOUT()
+#
+# Perform requirements checks for running conntrack customized timeout tests.
+#
+m4_define([CHECK_CONNTRACK_TIMEOUT],
+[
+    AT_SKIP_IF([! cat /boot/config-$(uname -r) | grep NF_CONNTRACK_TIMEOUT | grep '=y' > /dev/null])
+    modprobe nfnetlink_cttimeout
+    on_exit 'modprobe -r nfnetlink_cttimeout'
+])
+
 # CHECK_CT_DPIF_PER_ZONE_LIMIT()
 #
 # Perform requirements checks for running ovs-dpctl ct-[set|get|del]-limits per
@@ -185,3 +196,12 @@  m4_define([OVS_CHECK_KERNEL_EXCL],
     sublevel=$(uname -r | sed -e 's/\./ /g' | awk '{print $ 2}')
     AT_SKIP_IF([ ! ( test $version -lt $1 || ( test $version -eq $1 && test $sublevel -lt $2 ) || test $version -gt $3 || ( test $version -eq $3 && test $sublevel -gt $4 ) ) ])
 ])
+
+# VSCTL_ADD_DATAPATH_TABLE()
+#
+# Create system datapath table "system" for kernel tests in ovsdb
+m4_define([VSCTL_ADD_DATAPATH_TABLE],
+[
+    AT_CHECK([ovs-vsctl -- --id=@m create Datapath datapath_version=0 -- set Open_vSwitch . datapaths:"system"=@m], [0], [stdout])
+    DP_TYPE=$(echo "system")
+])
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 1a04199dcfe9..f2870e922906 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -3179,6 +3179,72 @@  NXST_FLOW reply:
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([conntrack - zone-based timeout policy])
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_TIMEOUT()
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=100,in_port=1,ip,action=ct(zone=5, table=1)
+priority=100,in_port=2,ip,action=ct(zone=5, table=1)
+table=1,in_port=2,ip,ct_state=+trk+est,action=1
+table=1,in_port=1,ip,ct_state=+trk+new,action=ct(commit,zone=5),2
+table=1,in_port=1,ip,ct_state=+trk+est,action=2
+])
+
+AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
+
+dnl Test with default timeout
+dnl The default udp_single and icmp_first timeouts are 30 seconds in
+dnl kernel DP, and 60 seconds in userspace DP.
+
+dnl Send ICMP and UDP traffic
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
+
+sleep 4
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sort], [0], [dnl
+icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=5
+udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=5
+])
+
+AT_CHECK([ovs-appctl dpctl/flush-conntrack])
+
+dnl Shorten the udp_single and icmp_first timeout in zone 5
+VSCTL_ADD_DATAPATH_TABLE()
+AT_CHECK([ovs-vsctl add-zone-tp $DP_TYPE zone=5 udp_single=3 icmp_first=3])
+
+dnl Send ICMP and UDP traffic
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sort], [0], [dnl
+icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=5
+udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=5
+])
+
+dnl Wait until the timeout expire.
+dnl We intend to wait a bit longer, because conntrack does not recycle the entry right after it is expired.
+sleep 4
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
 AT_BANNER([conntrack - L7])
 
 AT_SETUP([conntrack - IPv4 HTTP])
diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at
index a411e3d8919d..0a21db88a998 100644
--- a/tests/system-userspace-macros.at
+++ b/tests/system-userspace-macros.at
@@ -98,6 +98,16 @@  m4_define([CHECK_CONNTRACK_FRAG_OVERLAP])
 #
 m4_define([CHECK_CONNTRACK_NAT])
 
+# CHECK_CONNTRACK_TIMEOUT()
+#
+# Perform requirements checks for running conntrack customized timeout tests.
+* The userspace datapath does not support this feature yet.
+#
+m4_define([CHECK_CONNTRACK_TIMEOUT],
+[
+    AT_SKIP_IF([:])
+])
+
 # CHECK_CT_DPIF_PER_ZONE_LIMIT()
 #
 # Perform requirements checks for running ovs-dpctl ct-[set|get|del]-limits per
@@ -295,3 +305,12 @@  m4_define([OVS_CHECK_KERNEL_EXCL],
 [
     AT_SKIP_IF([:])
 ])
+
+# VSCTL_ADD_DATAPATH_TABLE()
+#
+# Create datapath table "netdev" for userspace tests in ovsdb
+m4_define([VSCTL_ADD_DATAPATH_TABLE],
+[
+    AT_CHECK([ovs-vsctl -- --id=@m create Datapath datapath_version=0 -- set Open_vSwitch . datapaths:"netdev"=@m], [0], [stdout])
+    DP_TYPE=$(echo "netdev")
+])