diff mbox series

[ovs-dev,PATCHv3] userspace: Add conntrack timeout policy support.

Message ID 1587312311-59294-1-git-send-email-u9012063@gmail.com
State Superseded
Headers show
Series [ovs-dev,PATCHv3] userspace: Add conntrack timeout policy support. | expand

Commit Message

William Tu April 19, 2020, 4:05 p.m. UTC
Commit 1f1613183733 ("ct-dpif, dpif-netlink: Add conntrack timeout
policy support") adds conntrack timeout policy for kernel datapath.
This patch enables support for the userspace datapath.  I tested
using the 'make check-system-userspace' which checks the timeout
policies for ICMP and UDP cases.

Signed-off-by: William Tu <u9012063@gmail.com>
---
v3:
    - address feedback from Yi-Hung
    - use ID 0 as default policy
    - move conn_{init,update}_expiration to lib/conntrack-tp.c
    - s/tpid/tp_id/
    - add default timeout value to CT_DPIF_TP_*_ATTRs
    - reduce the CT_CLEAN_INTERVAL from 5 to 3s, to make the tests
      run faster.
    - add more tests to system-traffic.at
    - code refactoring and renaming
    - travis: https://travis-ci.org/github/williamtu/ovs-travis/builds/676683451
---
 Documentation/faq/releases.rst   |   2 +-
 NEWS                             |   2 +
 lib/automake.mk                  |   2 +
 lib/conntrack-icmp.c             |  13 +-
 lib/conntrack-other.c            |   4 +-
 lib/conntrack-private.h          |  38 +----
 lib/conntrack-tcp.c              |   5 +-
 lib/conntrack-tp.c               | 319 +++++++++++++++++++++++++++++++++++++++
 lib/conntrack-tp.h               |  30 ++++
 lib/conntrack.c                  |  41 ++---
 lib/conntrack.h                  |   8 +-
 lib/ct-dpif.c                    |   6 +-
 lib/ct-dpif.h                    |  57 ++++---
 lib/dpif-netdev.c                |  75 ++++++++-
 ofproto/ofproto-dpif.c           |   3 +-
 tests/system-traffic.at          |  29 +++-
 tests/system-userspace-macros.at |   6 +-
 tests/test-conntrack.c           |   6 +-
 18 files changed, 544 insertions(+), 102 deletions(-)
 create mode 100644 lib/conntrack-tp.c
 create mode 100644 lib/conntrack-tp.h

Comments

Yi-Hung Wei April 22, 2020, 10:52 p.m. UTC | #1
On Sun, Apr 19, 2020 at 9:05 AM William Tu <u9012063@gmail.com> wrote:
>
> Commit 1f1613183733 ("ct-dpif, dpif-netlink: Add conntrack timeout
> policy support") adds conntrack timeout policy for kernel datapath.
> This patch enables support for the userspace datapath.  I tested
> using the 'make check-system-userspace' which checks the timeout
> policies for ICMP and UDP cases.
>
> Signed-off-by: William Tu <u9012063@gmail.com>
> ---
> v3:
>     - address feedback from Yi-Hung
>     - use ID 0 as default policy
>     - move conn_{init,update}_expiration to lib/conntrack-tp.c
>     - s/tpid/tp_id/
>     - add default timeout value to CT_DPIF_TP_*_ATTRs
>     - reduce the CT_CLEAN_INTERVAL from 5 to 3s, to make the tests
>       run faster.
>     - add more tests to system-traffic.at
>     - code refactoring and renaming
>     - travis: https://travis-ci.org/github/williamtu/ovs-travis/builds/676683451
> ---

Thanks for the updated patch.

> diff --git a/lib/conntrack-icmp.c b/lib/conntrack-icmp.c
> index 63246f0124d0..5c6ef37d2eb4 100644
> --- a/lib/conntrack-icmp.c
> +++ b/lib/conntrack-icmp.c
> @@ -22,6 +22,7 @@
>  #include <netinet/icmp6.h>
>
>  #include "conntrack-private.h"
> +#include "conntrack-tp.h"
>  #include "dp-packet.h"
>
>  enum OVS_PACKED_ENUM icmp_state {
> @@ -50,9 +51,12 @@ icmp_conn_update(struct conntrack *ct, struct conn *conn_,
>                   struct dp_packet *pkt OVS_UNUSED, bool reply, long long now)
>  {
>      struct conn_icmp *conn = conn_icmp_cast(conn_);
> -    conn->state = reply ? ICMPS_REPLY : ICMPS_FIRST;
> -    conn_update_expiration(ct, &conn->up, icmp_timeouts[conn->state], now);
>
> +    if (reply && conn->state == ICMPS_FIRST) {
> +        conn->state = ICMPS_REPLY;
> +    }

This code snippet looks like a reasonable fix on the ICMP state
handling, should we pull it out as a separate patch?

Previously, we will change the ICMP state from ICMP_REPLY back to
ICMP_FIRST for  every echo request packet after an echo reply packet.
With this patch, the ICMP connection will stay in ICMP_REPLY state
after the first echo reply packet.



> diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c
> new file mode 100644
> index 000000000000..5e59f56eb949
> --- /dev/null
> +++ b/lib/conntrack-tp.c
> @@ -0,0 +1,319 @@
> +#include <config.h>
> +
> +#include "conntrack-private.h"
> +#include "conntrack-tp.h"
> +#include "ct-dpif.h"
> +#include "dp-packet.h"

Is "dp-packet.h" required?

> +#include "openvswitch/vlog.h"

May be put this line with the other #include?


> +VLOG_DEFINE_THIS_MODULE(conntrack_tp);
> +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> +
> +#define DEFAULT_TP_ID 0

DEFAULT_TP_ID is also defined in ct-dpif.h, we should keep it in one place.


> +static const char *ct_timeout_str[] = {
> +#define CT_TIMEOUT(NAME, VALUE) #NAME,
> +    CT_TIMEOUTS
> +#undef CT_TIMEOUT
> +};
> +
> +static const char *dpif_ct_timeout_str[] = {
> +#define xstr(s) #s
> +#define CT_DPIF_TP_TCP_ATTR(NAME, VAL) xstr(TCP_##NAME),
> +    CT_DPIF_TP_TCP_ATTRS
> +#undef CT_DPIF_TP_TCP_ATTR
> +#define CT_DPIF_TP_UDP_ATTR(NAME, VAL) xstr(UDP_##NAME),
> +    CT_DPIF_TP_UDP_ATTRS
> +#undef CT_DPIF_TP_UDP_ATTR
> +#define CT_DPIF_TP_ICMP_ATTR(NAME, VAL) xstr(ICMP_##NAME),
> +    CT_DPIF_TP_ICMP_ATTRS
> +#undef CT_DPIF_TP_ICMP_ATTR
> +#undef xstr
> +};
> +
> +static unsigned int ct_dpif_timeout_value_def[] = {
> +#define CT_DPIF_TP_TCP_ATTR(NAME, VAL) [CT_DPIF_TP_ATTR_TCP_##NAME] = VAL,
> +    CT_DPIF_TP_TCP_ATTRS
> +#undef CT_DPIF_TP_TCP_ATTR
> +#define CT_DPIF_TP_UDP_ATTR(NAME, VAL) [CT_DPIF_TP_ATTR_UDP_##NAME] = VAL,
> +    CT_DPIF_TP_UDP_ATTRS
> +#undef CT_DPIF_TP_UDP_ATTR
> +#define CT_DPIF_TP_ICMP_ATTR(NAME, VAL) [CT_DPIF_TP_ATTR_ICMP_##NAME] = VAL,
> +    CT_DPIF_TP_ICMP_ATTRS
> +#undef CT_DPIF_TP_ICMP_ATTR
> +};

We now have two places that define the default timeout of the L4
protocols, one in ct-dpif.h (CT_DPIF_TP_TCP_ATTRS,
CT_DPIF_TP_UDP_ATTRS, CT_DPIF_ICMP_ATTRS),  and one in
conntrack-private.h (CT_TIMEOUTS).

Since ct-dpif.h is a common conntrack dpif interface for both the
kernel and the userspace datapath, and the default value is different
in this two datapaths, should we put the userspace default timeout
setting in lib/conntrack-tp.h, and clean up CT_TIMEOUTS in
lib/conntrack-private.h ?

Maybe rename ct_dpif_timeout_value_def to ct_dpif_netdev* ?


> +
> +static void OVS_UNUSED
> +timeout_policy_dump(struct timeout_policy *tp)
> +{
> +    bool present;
> +    int i;
> +
> +    VLOG_DBG("Timeout Policy ID %u:", tp->policy.id);
> +    for (i = 0; i < ARRAY_SIZE(tp->policy.attrs); i++) {
> +        if (tp->policy.present & (1 << i)) {
> +            present = !!(tp->policy.present & (1 << i));
> +            VLOG_DBG("  Policy: %s present: %u value: %u",
> +                      dpif_ct_timeout_str[i], present, tp->policy.attrs[i]);
> +        }
> +    }
> +}

Is this static function needed? It does not seem to be called within
conntrack-tp.c. If it is not needed, we should remove
dpif_ct_timeout_str as well.


> +
> +static struct timeout_policy *
> +timeout_policy_lookup(struct conntrack *ct, int32_t tp_id)
> +    OVS_REQUIRES(ct->ct_lock)
> +{
> +    struct timeout_policy *tp;
> +    uint32_t hash;
> +
> +    hash = hash_int(tp_id, ct->hash_basis);
> +    HMAP_FOR_EACH_IN_BUCKET (tp, node, hash, &ct->timeout_policies) {
> +        if (tp->policy.id == tp_id) {
> +            return tp;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +struct timeout_policy *
> +timeout_policy_get(struct conntrack *ct, int32_t tp_id)
> +{
> +    struct timeout_policy *tp;
> +
> +    ovs_mutex_lock(&ct->ct_lock);
> +    tp = timeout_policy_lookup(ct, tp_id);
> +    if (!tp) {
> +        ovs_mutex_unlock(&ct->ct_lock);
> +        return NULL;
> +    }
> +
> +    ovs_mutex_unlock(&ct->ct_lock);
> +    return tp;
> +}
> +
> +static void
> +update_existing_tp(struct timeout_policy *tp_dst,
> +                   struct timeout_policy *tp_src)
> +{
> +    struct ct_dpif_timeout_policy *dst, *src;
> +    int i;
> +
> +    dst = &tp_dst->policy;
> +    src = &tp_src->policy;
> +
> +    /* Set the value and present bit to dst if present
> +     * bit in src is set.
> +     */
> +    for (i = 0; i < ARRAY_SIZE(dst->attrs); i++) {
> +        if (src->present & (1 << i)) {
> +            dst->attrs[i] = src->attrs[i];
> +            dst->present |= (1 << i);
> +        }
> +    }
> +}

This logic does not unset some timeout value if it is not present in
tp_src.  May be it is easier to update the timeout policy as we create
a new one (restore every timeout values to default, and updates the
present ones in tp_src).


> +
> +static void
> +init_default_tp(struct timeout_policy *tp, uint32_t tp_id)
> +{
> +    tp->policy.id = tp_id;
> +    /* Initialize the timeout value to default, but not
> +     * setting the present bit.
> +     */
> +    tp->policy.present = 0;
> +    memcpy(tp->policy.attrs, ct_dpif_timeout_value_def,
> +           sizeof tp->policy.attrs);
> +}
> +
> +static void
> +timeout_policy_create(struct conntrack *ct,
> +                      struct timeout_policy *new_tp)
> +    OVS_REQUIRES(ct->ct_lock)
> +{
> +    uint32_t tp_id = new_tp->policy.id;
> +    struct timeout_policy *tp;
> +    uint32_t hash;
> +
> +    tp = xzalloc(sizeof *tp);
> +    init_default_tp(tp, tp_id);
> +    update_existing_tp(tp, new_tp);
> +    hash = hash_int(tp_id, ct->hash_basis);
> +    hmap_insert(&ct->timeout_policies, &tp->node, hash);
> +}
> +
> +int
> +timeout_policy_update(struct conntrack *ct,
> +                      struct timeout_policy *new_tp)
> +{
> +    int err = 0;
> +    uint32_t tp_id = new_tp->policy.id;
> +
> +    ovs_mutex_lock(&ct->ct_lock);
> +    struct timeout_policy *tp = timeout_policy_lookup(ct, tp_id);
> +    if (tp) {
> +        update_existing_tp(tp, new_tp);
> +    } else {
> +        timeout_policy_create(ct, new_tp);
> +    }
> +    ovs_mutex_unlock(&ct->ct_lock);
> +    return err;
> +}
> +
> +void
> +timeout_policy_clean(struct conntrack *ct, struct timeout_policy *tp)
> +    OVS_REQUIRES(ct->ct_lock)
> +{
> +    hmap_remove(&ct->timeout_policies, &tp->node);
> +    free(tp);
> +}

Maybe make timeout_policy_clean() to be static ?


> +int
> +timeout_policy_delete(struct conntrack *ct, uint32_t tp_id)
> +{
> +    ovs_mutex_lock(&ct->ct_lock);
> +    struct timeout_policy *tp = timeout_policy_lookup(ct, tp_id);
> +    if (tp) {
> +        timeout_policy_clean(ct, tp);
> +    } else {
> +        VLOG_WARN_RL(&rl, "Attempted delete of non-existent timeout "
> +                     "policy: zone %d", tp_id);

I would sugguest the following minor change on the warning log
+        VLOG_WARN_RL(&rl, "Failed to delete a non-existent timeout "
+                     "policy: id=%d", tp_id);

> +    }
> +    ovs_mutex_unlock(&ct->ct_lock);
> +    return 0;
> +}
> +
> +void
> +timeout_policy_init(struct conntrack *ct)
> +    OVS_REQUIRES(ct->ct_lock)
> +{
> +    struct timeout_policy tp;
> +
> +    hmap_init(&ct->timeout_policies);
> +
> +    /* Create default timeout policy. */
> +    memset(&tp, 0, sizeof tp);
> +    tp.policy.id = DEFAULT_TP_ID;
> +    timeout_policy_create(ct, &tp);
> +}
> +
> +static enum ct_dpif_tp_attr
> +tm_to_ct_dpif_tp(enum ct_timeout tm)
> +{
> +    switch (tm) {
> +    case CT_TM_TCP_FIRST_PACKET:
> +        return CT_DPIF_TP_ATTR_TCP_SYN_SENT;
> +    case CT_TM_TCP_OPENING:
> +        return CT_DPIF_TP_ATTR_TCP_SYN_RECV;
> +    case CT_TM_TCP_ESTABLISHED:
> +        return CT_DPIF_TP_ATTR_TCP_ESTABLISHED;
> +    case CT_TM_TCP_CLOSING:
> +        return CT_DPIF_TP_ATTR_TCP_FIN_WAIT;
> +    case CT_TM_TCP_FIN_WAIT:
> +        return CT_DPIF_TP_ATTR_TCP_TIME_WAIT;
> +    case CT_TM_TCP_CLOSED:
> +        return CT_DPIF_TP_ATTR_TCP_CLOSE;
> +    case CT_TM_OTHER_FIRST:
> +        return CT_DPIF_TP_ATTR_UDP_FIRST;
> +    case CT_TM_OTHER_BIDIR:
> +        return CT_DPIF_TP_ATTR_UDP_SINGLE;
> +    case CT_TM_OTHER_MULTIPLE:
> +        return CT_DPIF_TP_ATTR_UDP_MULTIPLE;
> +    case CT_TM_ICMP_FIRST:
> +        return CT_DPIF_TP_ATTR_ICMP_FIRST;
> +    case CT_TM_ICMP_REPLY:
> +        return CT_DPIF_TP_ATTR_ICMP_REPLY;
> +    case N_CT_TM:
> +    default:
> +        OVS_NOT_REACHED();
> +        break;
> +    }
> +    OVS_NOT_REACHED();
> +    return CT_DPIF_TP_ATTR_MAX;
> +}
> +
> +static void
> +conn_update_expiration__(struct conntrack *ct, struct conn *conn,
> +                         enum ct_timeout tm, long long now,
> +                         uint32_t tp_value)
> +    OVS_NO_THREAD_SAFETY_ANALYSIS
> +{
> +    ovs_mutex_unlock(&conn->lock);
> +
> +    ovs_mutex_lock(&ct->ct_lock);
> +    ovs_mutex_lock(&conn->lock);
> +    if (!conn->cleaned) {
> +        conn->expiration = now + tp_value * 1000;
> +        ovs_list_remove(&conn->exp_node);
> +        ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
> +    }
> +    ovs_mutex_unlock(&conn->lock);
> +    ovs_mutex_unlock(&ct->ct_lock);
> +
> +    ovs_mutex_lock(&conn->lock);
> +}
> +
> +/* The conn entry lock must be held on entry and exit. */
> +void
> +conn_update_expiration(struct conntrack *ct, struct conn *conn,
> +                       enum ct_timeout tm, long long now)
> +    OVS_REQUIRES(ct->ct_lock)

Does ct->ct_lock require here?  We lock ct->ct_lock in conn_update_expiration__


> +{
> +    struct timeout_policy *tp;
> +    uint32_t val;
> +
> +    tp = timeout_policy_lookup(ct, conn->tp_id);
> +    if (tp) {
> +        val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)];
> +    } else {
> +        val = ct_dpif_timeout_value_def[tm_to_ct_dpif_tp(tm)];
> +    }
> +    VLOG_DBG_RL(&rl, "Update timeout %s with policy id=%d val=%u sec.",
> +                ct_timeout_str[tm], conn->tp_id, val);

Should we include the zone id here and in the conn_init_expiration()?

> +
> +    conn_update_expiration__(ct, conn, tm, now, val);
> +}
> +
> +static void
> +conn_init_expiration__(struct conntrack *ct, struct conn *conn,
> +                       enum ct_timeout tm, long long now,
> +                       uint32_t tp_value)
> +{
> +    conn->expiration = now + tp_value * 1000;
> +    ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
> +}
> +
> +/* ct_lock must be held. */
> +void
> +conn_init_expiration(struct conntrack *ct, struct conn *conn,
> +                     enum ct_timeout tm, long long now)
> +    OVS_REQUIRES(ct->ct_lock)
> +{
> +    struct timeout_policy *tp;
> +    uint32_t val;
> +
> +    tp = timeout_policy_lookup(ct, conn->tp_id);
> +    if (tp) {
> +        val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)];
> +    } else {
> +        val = ct_dpif_timeout_value_def[tm_to_ct_dpif_tp(tm)];
> +    }
> +    VLOG_DBG_RL(&rl, "Init timeout %s with policy id=%d val=%u sec.",
> +                ct_timeout_str[tm], conn->tp_id, val);
> +
> +    conn_init_expiration__(ct, conn, tm, now, val);
> +}


> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -25,6 +25,7 @@
>  #include "bitmap.h"
>  #include "conntrack.h"
>  #include "conntrack-private.h"
> +#include "conntrack-tp.h"
>  #include "coverage.h"
>  #include "csum.h"
>  #include "ct-dpif.h"
> @@ -88,7 +89,8 @@ static uint32_t conn_key_hash(const struct conn_key *, uint32_t basis);
>  static void conn_key_reverse(struct conn_key *);
>  static bool valid_new(struct dp_packet *pkt, struct conn_key *);
>  static struct conn *new_conn(struct conntrack *ct, struct dp_packet *pkt,
> -                             struct conn_key *, long long now);
> +                             struct conn_key *, long long now,
> +                             uint32_t tp_id);
>  static void delete_conn_cmn(struct conn *);
>  static void delete_conn(struct conn *);
>  static void delete_conn_one(struct conn *conn);
> @@ -175,12 +177,6 @@ static alg_helper alg_helpers[] = {
>      [CT_ALG_CTL_TFTP] = handle_tftp_ctl,
>  };
>
> -long long ct_timeout_val[] = {
> -#define CT_TIMEOUT(NAME, VAL) [CT_TM_##NAME] = VAL,
> -    CT_TIMEOUTS
> -#undef CT_TIMEOUT
> -};
> -
>  /* The maximum TCP or UDP port number. */
>  #define CT_MAX_L4_PORT 65535
>  /* String buffer used for parsing FTP string messages.
> @@ -312,6 +308,7 @@ conntrack_init(void)
>      }
>      hmap_init(&ct->zone_limits);
>      ct->zone_limit_seq = 0;
> +    timeout_policy_init(ct);
>      ovs_mutex_unlock(&ct->ct_lock);
>
>      ct->hash_basis = random_uint32();
> @@ -502,6 +499,12 @@ conntrack_destroy(struct conntrack *ct)
>      }
>      hmap_destroy(&ct->zone_limits);
>
> +    struct timeout_policy *tp;
> +    HMAP_FOR_EACH_POP (tp, node, &ct->timeout_policies) {
> +        free(tp);
> +    }
> +    hmap_destroy(&ct->timeout_policies);
> +
>      ovs_mutex_unlock(&ct->ct_lock);
>      ovs_mutex_destroy(&ct->ct_lock);
>
> @@ -956,7 +959,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
>                 struct conn_lookup_ctx *ctx, bool commit, long long now,
>                 const struct nat_action_info_t *nat_action_info,
>                 const char *helper, const struct alg_exp_node *alg_exp,
> -               enum ct_alg_ctl_type ct_alg_ctl)
> +               enum ct_alg_ctl_type ct_alg_ctl, uint32_t tp_id)
>      OVS_REQUIRES(ct->ct_lock)
>  {
>      struct conn *nc = NULL;
> @@ -987,7 +990,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
>              return nc;
>          }
>
> -        nc = new_conn(ct, pkt, &ctx->key, now);
> +        nc = new_conn(ct, pkt, &ctx->key, now, tp_id);
>          memcpy(&nc->key, &ctx->key, sizeof nc->key);
>          memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key);
>          conn_key_reverse(&nc->rev_key);
> @@ -1275,7 +1278,8 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
>              bool force, bool commit, long long now, const uint32_t *setmark,
>              const struct ovs_key_ct_labels *setlabel,
>              const struct nat_action_info_t *nat_action_info,
> -            ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper)
> +            ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
> +            uint32_t tp_id)
>  {
>      /* Reset ct_state whenever entering a new zone. */
>      if (pkt->md.ct_state && pkt->md.ct_zone != zone) {
> @@ -1359,7 +1363,7 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
>          ovs_mutex_lock(&ct->ct_lock);
>          if (!conn_lookup(ct, &ctx->key, now, NULL, NULL)) {
>              conn = conn_not_found(ct, pkt, ctx, commit, now, nat_action_info,
> -                                  helper, alg_exp, ct_alg_ctl);
> +                                  helper, alg_exp, ct_alg_ctl, tp_id);
>          }
>          ovs_mutex_unlock(&ct->ct_lock);
>      }
> @@ -1395,7 +1399,7 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
>                    const struct ovs_key_ct_labels *setlabel,
>                    ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
>                    const struct nat_action_info_t *nat_action_info,
> -                  long long now)
> +                  long long now, uint32_t tp_id)
>  {
>      ipf_preprocess_conntrack(ct->ipf, pkt_batch, now, dl_type, zone,
>                               ct->hash_basis);
> @@ -1417,7 +1421,8 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
>              write_ct_md(packet, zone, NULL, NULL, NULL);
>          } else {
>              process_one(ct, packet, &ctx, zone, force, commit, now, setmark,
> -                        setlabel, nat_action_info, tp_src, tp_dst, helper);
> +                        setlabel, nat_action_info, tp_src, tp_dst, helper,
> +                        tp_id);
>          }
>      }
>
> @@ -1523,7 +1528,7 @@ conntrack_clean(struct conntrack *ct, long long now)
>      atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit);
>      size_t clean_max = n_conn_limit > 10 ? n_conn_limit / 10 : 1;
>      long long min_exp = ct_sweep(ct, now, clean_max);
> -    long long next_wakeup = MIN(min_exp, now + CT_TM_MIN);
> +    long long next_wakeup = MIN(min_exp, now + CT_DPIF_TP_MIN);
>
>      return next_wakeup;
>  }
> @@ -1540,14 +1545,14 @@ conntrack_clean(struct conntrack *ct, long long now)
>   * - We want to reduce the number of wakeups and batch connection cleanup
>   *   when the load is not very high.  CT_CLEAN_INTERVAL ensures that if we
>   *   are coping with the current cleanup tasks, then we wait at least
> - *   5 seconds to do further cleanup.
> + *   3 seconds to do further cleanup.
>   *
>   * - We don't want to keep the map locked too long, as we might prevent
>   *   traffic from flowing.  CT_CLEAN_MIN_INTERVAL ensures that if cleanup is
>   *   behind, there is at least some 200ms blocks of time when the map will be
>   *   left alone, so the datapath can operate unhindered.
>   */
> -#define CT_CLEAN_INTERVAL 5000 /* 5 seconds */
> +#define CT_CLEAN_INTERVAL 3000 /* 3 second */

I'm not sure whether we should reduce the CLEAN_INTERVAL from 5
seconds to 3 seconds.  It definitely help to clean up the expired
conntrack entries more freqently, but it also increase the workload.


>  #define CT_CLEAN_MIN_INTERVAL 200  /* 0.2 seconds */
>
>  static void *
> @@ -2347,9 +2352,9 @@ valid_new(struct dp_packet *pkt, struct conn_key *key)
>
>  static struct conn *
>  new_conn(struct conntrack *ct, struct dp_packet *pkt, struct conn_key *key,
> -         long long now)
> +         long long now, uint32_t tp_id)
>  {
> -    return l4_protos[key->nw_proto]->new_conn(ct, pkt, now);
> +    return l4_protos[key->nw_proto]->new_conn(ct, pkt, now, tp_id);
>  }
>
>  static void


> diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h
> index 3e227d9e3b6e..707adf70402c 100644
> --- a/lib/ct-dpif.h
> +++ b/lib/ct-dpif.h
> @@ -59,6 +59,8 @@ struct ct_dpif_timestamp {
>      uint64_t stop;
>  };
>
> +#define DEFAULT_TP_ID 0
> +
>  #define CT_DPIF_TCP_STATES \
>      CT_DPIF_TCP_STATE(CLOSED) \
>      CT_DPIF_TCP_STATE(LISTEN) \
> @@ -225,36 +227,53 @@ struct ct_dpif_zone_limit {
>      struct ovs_list node;
>  };
>
> +/* TP attr with default timeout in seconds. */
>  #define CT_DPIF_TP_TCP_ATTRS \
> -    CT_DPIF_TP_TCP_ATTR(SYN_SENT) \
> -    CT_DPIF_TP_TCP_ATTR(SYN_RECV) \
> -    CT_DPIF_TP_TCP_ATTR(ESTABLISHED) \
> -    CT_DPIF_TP_TCP_ATTR(FIN_WAIT) \
> -    CT_DPIF_TP_TCP_ATTR(CLOSE_WAIT) \
> -    CT_DPIF_TP_TCP_ATTR(LAST_ACK) \
> -    CT_DPIF_TP_TCP_ATTR(TIME_WAIT) \
> -    CT_DPIF_TP_TCP_ATTR(CLOSE) \
> -    CT_DPIF_TP_TCP_ATTR(SYN_SENT2) \
> -    CT_DPIF_TP_TCP_ATTR(RETRANSMIT) \
> -    CT_DPIF_TP_TCP_ATTR(UNACK)
> +    CT_DPIF_TP_TCP_ATTR(SYN_SENT, 30) \
> +    CT_DPIF_TP_TCP_ATTR(SYN_RECV, 30) \
> +    CT_DPIF_TP_TCP_ATTR(ESTABLISHED, 24 * 60 * 60) \
> +    CT_DPIF_TP_TCP_ATTR(FIN_WAIT, 15 * 60) \
> +    CT_DPIF_TP_TCP_ATTR(CLOSE_WAIT, 45) \
> +    CT_DPIF_TP_TCP_ATTR(LAST_ACK, 30) \
> +    CT_DPIF_TP_TCP_ATTR(TIME_WAIT, 45) \
> +    CT_DPIF_TP_TCP_ATTR(CLOSE, 30) \
> +    CT_DPIF_TP_TCP_ATTR(SYN_SENT2, 30) \
> +    CT_DPIF_TP_TCP_ATTR(RETRANSMIT, 30) \
> +    CT_DPIF_TP_TCP_ATTR(UNACK, 30)
>
>  #define CT_DPIF_TP_UDP_ATTRS \
> -    CT_DPIF_TP_UDP_ATTR(FIRST) \
> -    CT_DPIF_TP_UDP_ATTR(SINGLE) \
> -    CT_DPIF_TP_UDP_ATTR(MULTIPLE)
> +    CT_DPIF_TP_UDP_ATTR(FIRST, 60) \
> +    CT_DPIF_TP_UDP_ATTR(SINGLE, 60) \
> +    CT_DPIF_TP_UDP_ATTR(MULTIPLE, 30)
>
>  #define CT_DPIF_TP_ICMP_ATTRS \
> -    CT_DPIF_TP_ICMP_ATTR(FIRST) \
> -    CT_DPIF_TP_ICMP_ATTR(REPLY)
> +    CT_DPIF_TP_ICMP_ATTR(FIRST, 60) \
> +    CT_DPIF_TP_ICMP_ATTR(REPLY, 30)
> +
> +/* The minimum value of the default timeout. */
> +#define CT_DPIF_TP_MIN 30

This value is similar to CT_TM_MIN, and it is for dpif-netdev only,
should we keep it in lib/conntrack-tp.h?


> +#define CT_DPIF_TP_TCP_ATTR(ATTR, VAL) \
> +            BUILD_ASSERT_DECL(VAL >= CT_DPIF_TP_MIN);
> +    CT_DPIF_TP_TCP_ATTRS
> +#define CT_DPIF_TP_UDP_ATTR(ATTR, VAL) \
> +            BUILD_ASSERT_DECL(VAL >= CT_DPIF_TP_MIN);
> +    CT_DPIF_TP_UDP_ATTRS
> +#undef CT_DPIF_TP_UDP_ATTR
> +#define CT_DPIF_TP_ICMP_ATTR(ATTR, VAL) \
> +            BUILD_ASSERT_DECL(VAL >= CT_DPIF_TP_MIN);
> +    CT_DPIF_TP_ICMP_ATTRS
> +#undef CT_DPIF_TP_ICMP_ATTR
> +
> +#undef CT_DPIF_TP_TCP_ATTR
>
>  enum OVS_PACKED_ENUM ct_dpif_tp_attr {
> -#define CT_DPIF_TP_TCP_ATTR(ATTR) CT_DPIF_TP_ATTR_TCP_##ATTR,
> +#define CT_DPIF_TP_TCP_ATTR(ATTR, VAL) CT_DPIF_TP_ATTR_TCP_##ATTR,
>      CT_DPIF_TP_TCP_ATTRS
>  #undef CT_DPIF_TP_TCP_ATTR
> -#define CT_DPIF_TP_UDP_ATTR(ATTR) CT_DPIF_TP_ATTR_UDP_##ATTR,
> +#define CT_DPIF_TP_UDP_ATTR(ATTR, VAL) CT_DPIF_TP_ATTR_UDP_##ATTR,
>      CT_DPIF_TP_UDP_ATTRS
>  #undef CT_DPIF_TP_UDP_ATTR
> -#define CT_DPIF_TP_ICMP_ATTR(ATTR) CT_DPIF_TP_ATTR_ICMP_##ATTR,
> +#define CT_DPIF_TP_ICMP_ATTR(ATTR, VAL) CT_DPIF_TP_ATTR_ICMP_##ATTR,
>      CT_DPIF_TP_ICMP_ATTRS
>  #undef CT_DPIF_TP_ICMP_ATTR
>      CT_DPIF_TP_ATTR_MAX

> --- a/tests/system-traffic.at
> +++ b/tests/system-traffic.at

> @@ -3301,8 +3301,15 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=>  AT_CHECK([ovs-appctl dpctl/flush-conntrack])
>
>  dnl Shorten the udp_single and icmp_first timeout in zone 5
> +dnl Userspace datapath uses udp_first and icmp_reply, and
> +dnl kernel datapath uses udp_single and icmp_first
>  VSCTL_ADD_DATAPATH_TABLE()
> -AT_CHECK([ovs-vsctl add-zone-tp $DP_TYPE zone=5 udp_single=3 icmp_first=3])
> +
> +dnl Creating more timeout policies
> +for i in `seq 1 255`; do
> +ovs-vsctl --may-exist add-zone-tp $DP_TYPE zone=5 udp_first=$i udp_single=$i icmp_first=$i icmp_reply=$i;

Should we add zone-tp to different zone?


> +done
> +AT_CHECK([ovs-vsctl --may-exist add-zone-tp $DP_TYPE zone=5 udp_first=1 udp_single=1 icmp_first=1 icmp_reply=1])
>
>  dnl Send ICMP and UDP traffic
>  NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
> @@ -3317,7 +3324,7 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
>
>  dnl Wait until the timeout expire.
>  dnl We intend to wait a bit longer, because conntrack does not recycle the entry right after it is expired.
> -sleep 4
> +sleep 5
>
>  AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
>  ])
> @@ -3335,11 +3342,25 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
>
>  dnl Wait until the timeout expire.
>  dnl We intend to wait a bit longer, because conntrack does not recycle the entry right after it is expired.
> -sleep 4
> +sleep 5
>
>  AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
>  ])
>
> +dnl Set the timeout policy to default again.
> +AT_CHECK([ovs-vsctl --may-exist add-zone-tp $DP_TYPE zone=5 icmp_first=30])

Maybe delete this zone-tp?


> +
> +dnl Send ICMP and UDP traffic
> +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])

Add some delay before dump-conntrack?

> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sort], [0], [dnl
> +icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=5
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=5
> +])
> +
>  OVS_TRAFFIC_VSWITCHD_STOP
>  AT_CLEANUP
>

Thanks,

-Yi-Hung
William Tu April 26, 2020, 11:37 p.m. UTC | #2
Hi Yi-Hung,
Thanks for your review.

On Wed, Apr 22, 2020 at 03:52:20PM -0700, Yi-Hung Wei wrote:
> On Sun, Apr 19, 2020 at 9:05 AM William Tu <u9012063@gmail.com> wrote:
> >
> > Commit 1f1613183733 ("ct-dpif, dpif-netlink: Add conntrack timeout
> > policy support") adds conntrack timeout policy for kernel datapath.
> > This patch enables support for the userspace datapath.  I tested
> > using the 'make check-system-userspace' which checks the timeout
> > policies for ICMP and UDP cases.
> >
> > Signed-off-by: William Tu <u9012063@gmail.com>
> > ---
> > v3:
> >     - address feedback from Yi-Hung
> >     - use ID 0 as default policy
> >     - move conn_{init,update}_expiration to lib/conntrack-tp.c
> >     - s/tpid/tp_id/
> >     - add default timeout value to CT_DPIF_TP_*_ATTRs
> >     - reduce the CT_CLEAN_INTERVAL from 5 to 3s, to make the tests
> >       run faster.
> >     - add more tests to system-traffic.at
> >     - code refactoring and renaming
> >     - travis: https://travis-ci.org/github/williamtu/ovs-travis/builds/676683451
> > ---
> 
> Thanks for the updated patch.
> 
> > diff --git a/lib/conntrack-icmp.c b/lib/conntrack-icmp.c
> > index 63246f0124d0..5c6ef37d2eb4 100644
> > --- a/lib/conntrack-icmp.c
> > +++ b/lib/conntrack-icmp.c
> > @@ -22,6 +22,7 @@
> >  #include <netinet/icmp6.h>
> >
> >  #include "conntrack-private.h"
> > +#include "conntrack-tp.h"
> >  #include "dp-packet.h"
> >
> >  enum OVS_PACKED_ENUM icmp_state {
> > @@ -50,9 +51,12 @@ icmp_conn_update(struct conntrack *ct, struct conn *conn_,
> >                   struct dp_packet *pkt OVS_UNUSED, bool reply, long long now)
> >  {
> >      struct conn_icmp *conn = conn_icmp_cast(conn_);
> > -    conn->state = reply ? ICMPS_REPLY : ICMPS_FIRST;
> > -    conn_update_expiration(ct, &conn->up, icmp_timeouts[conn->state], now);
> >
> > +    if (reply && conn->state == ICMPS_FIRST) {
> > +        conn->state = ICMPS_REPLY;
> > +    }
> 
> This code snippet looks like a reasonable fix on the ICMP state
> handling, should we pull it out as a separate patch?
> 
> Previously, we will change the ICMP state from ICMP_REPLY back to
> ICMP_FIRST for  every echo request packet after an echo reply packet.
> With this patch, the ICMP connection will stay in ICMP_REPLY state
> after the first echo reply packet.
>
yes, it's a bug in icmp conntrack, I will send separate patch.

> 
> 
> > diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c
> > new file mode 100644
> > index 000000000000..5e59f56eb949
> > --- /dev/null
> > +++ b/lib/conntrack-tp.c
> > @@ -0,0 +1,319 @@
> > +#include <config.h>
> > +
> > +#include "conntrack-private.h"
> > +#include "conntrack-tp.h"
> > +#include "ct-dpif.h"
> > +#include "dp-packet.h"
> 
> Is "dp-packet.h" required?
> 
ok

> > +#include "openvswitch/vlog.h"
> 
> May be put this line with the other #include?
ok

> 
> 
> > +VLOG_DEFINE_THIS_MODULE(conntrack_tp);
> > +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> > +
> > +#define DEFAULT_TP_ID 0
> 
> DEFAULT_TP_ID is also defined in ct-dpif.h, we should keep it in one place.
OK, I will keep it in ct-dpif.h.

> 
> 
> > +static const char *ct_timeout_str[] = {
> > +#define CT_TIMEOUT(NAME, VALUE) #NAME,
> > +    CT_TIMEOUTS
> > +#undef CT_TIMEOUT
> > +};
> > +
> > +static const char *dpif_ct_timeout_str[] = {
> > +#define xstr(s) #s
> > +#define CT_DPIF_TP_TCP_ATTR(NAME, VAL) xstr(TCP_##NAME),
> > +    CT_DPIF_TP_TCP_ATTRS
> > +#undef CT_DPIF_TP_TCP_ATTR
> > +#define CT_DPIF_TP_UDP_ATTR(NAME, VAL) xstr(UDP_##NAME),
> > +    CT_DPIF_TP_UDP_ATTRS
> > +#undef CT_DPIF_TP_UDP_ATTR
> > +#define CT_DPIF_TP_ICMP_ATTR(NAME, VAL) xstr(ICMP_##NAME),
> > +    CT_DPIF_TP_ICMP_ATTRS
> > +#undef CT_DPIF_TP_ICMP_ATTR
> > +#undef xstr
> > +};
> > +
> > +static unsigned int ct_dpif_timeout_value_def[] = {
> > +#define CT_DPIF_TP_TCP_ATTR(NAME, VAL) [CT_DPIF_TP_ATTR_TCP_##NAME] = VAL,
> > +    CT_DPIF_TP_TCP_ATTRS
> > +#undef CT_DPIF_TP_TCP_ATTR
> > +#define CT_DPIF_TP_UDP_ATTR(NAME, VAL) [CT_DPIF_TP_ATTR_UDP_##NAME] = VAL,
> > +    CT_DPIF_TP_UDP_ATTRS
> > +#undef CT_DPIF_TP_UDP_ATTR
> > +#define CT_DPIF_TP_ICMP_ATTR(NAME, VAL) [CT_DPIF_TP_ATTR_ICMP_##NAME] = VAL,
> > +    CT_DPIF_TP_ICMP_ATTRS
> > +#undef CT_DPIF_TP_ICMP_ATTR
> > +};
> 
> We now have two places that define the default timeout of the L4
> protocols, one in ct-dpif.h (CT_DPIF_TP_TCP_ATTRS,
> CT_DPIF_TP_UDP_ATTRS, CT_DPIF_ICMP_ATTRS),  and one in
> conntrack-private.h (CT_TIMEOUTS).
> 
> Since ct-dpif.h is a common conntrack dpif interface for both the
> kernel and the userspace datapath, and the default value is different
> in this two datapaths, should we put the userspace default timeout
> setting in lib/conntrack-tp.h, and clean up CT_TIMEOUTS in
> lib/conntrack-private.h ?
> 
> Maybe rename ct_dpif_timeout_value_def to ct_dpif_netdev* ?

Sure, will do it in next version.

> 
> 
> > +
> > +static void OVS_UNUSED
> > +timeout_policy_dump(struct timeout_policy *tp)
> > +{
> > +    bool present;
> > +    int i;
> > +
> > +    VLOG_DBG("Timeout Policy ID %u:", tp->policy.id);
> > +    for (i = 0; i < ARRAY_SIZE(tp->policy.attrs); i++) {
> > +        if (tp->policy.present & (1 << i)) {
> > +            present = !!(tp->policy.present & (1 << i));
> > +            VLOG_DBG("  Policy: %s present: %u value: %u",
> > +                      dpif_ct_timeout_str[i], present, tp->policy.attrs[i]);
> > +        }
> > +    }
> > +}
> 
> Is this static function needed? It does not seem to be called within
> conntrack-tp.c. If it is not needed, we should remove
> dpif_ct_timeout_str as well.

This is used for debugging, let me think about keeping it or not.
> 
> 
> > +
> > +static struct timeout_policy *
> > +timeout_policy_lookup(struct conntrack *ct, int32_t tp_id)
> > +    OVS_REQUIRES(ct->ct_lock)
> > +{
> > +    struct timeout_policy *tp;
> > +    uint32_t hash;
> > +
> > +    hash = hash_int(tp_id, ct->hash_basis);
> > +    HMAP_FOR_EACH_IN_BUCKET (tp, node, hash, &ct->timeout_policies) {
> > +        if (tp->policy.id == tp_id) {
> > +            return tp;
> > +        }
> > +    }
> > +    return NULL;
> > +}
> > +
> > +struct timeout_policy *
> > +timeout_policy_get(struct conntrack *ct, int32_t tp_id)
> > +{
> > +    struct timeout_policy *tp;
> > +
> > +    ovs_mutex_lock(&ct->ct_lock);
> > +    tp = timeout_policy_lookup(ct, tp_id);
> > +    if (!tp) {
> > +        ovs_mutex_unlock(&ct->ct_lock);
> > +        return NULL;
> > +    }
> > +
> > +    ovs_mutex_unlock(&ct->ct_lock);
> > +    return tp;
> > +}
> > +
> > +static void
> > +update_existing_tp(struct timeout_policy *tp_dst,
> > +                   struct timeout_policy *tp_src)
> > +{
> > +    struct ct_dpif_timeout_policy *dst, *src;
> > +    int i;
> > +
> > +    dst = &tp_dst->policy;
> > +    src = &tp_src->policy;
> > +
> > +    /* Set the value and present bit to dst if present
> > +     * bit in src is set.
> > +     */
> > +    for (i = 0; i < ARRAY_SIZE(dst->attrs); i++) {
> > +        if (src->present & (1 << i)) {
> > +            dst->attrs[i] = src->attrs[i];
> > +            dst->present |= (1 << i);
> > +        }
> > +    }
> > +}
> 
> This logic does not unset some timeout value if it is not present in
> tp_src.  May be it is easier to update the timeout policy as we create
> a new one (restore every timeout values to default, and updates the
> present ones in tp_src).
> 
OK will do it.

> 
> > +
> > +static void
> > +init_default_tp(struct timeout_policy *tp, uint32_t tp_id)
> > +{
> > +    tp->policy.id = tp_id;
> > +    /* Initialize the timeout value to default, but not
> > +     * setting the present bit.
> > +     */
> > +    tp->policy.present = 0;
> > +    memcpy(tp->policy.attrs, ct_dpif_timeout_value_def,
> > +           sizeof tp->policy.attrs);
> > +}
> > +
> > +static void
> > +timeout_policy_create(struct conntrack *ct,
> > +                      struct timeout_policy *new_tp)
> > +    OVS_REQUIRES(ct->ct_lock)
> > +{
> > +    uint32_t tp_id = new_tp->policy.id;
> > +    struct timeout_policy *tp;
> > +    uint32_t hash;
> > +
> > +    tp = xzalloc(sizeof *tp);
> > +    init_default_tp(tp, tp_id);
> > +    update_existing_tp(tp, new_tp);
> > +    hash = hash_int(tp_id, ct->hash_basis);
> > +    hmap_insert(&ct->timeout_policies, &tp->node, hash);
> > +}
> > +
> > +int
> > +timeout_policy_update(struct conntrack *ct,
> > +                      struct timeout_policy *new_tp)
> > +{
> > +    int err = 0;
> > +    uint32_t tp_id = new_tp->policy.id;
> > +
> > +    ovs_mutex_lock(&ct->ct_lock);
> > +    struct timeout_policy *tp = timeout_policy_lookup(ct, tp_id);
> > +    if (tp) {
> > +        update_existing_tp(tp, new_tp);
> > +    } else {
> > +        timeout_policy_create(ct, new_tp);
> > +    }
> > +    ovs_mutex_unlock(&ct->ct_lock);
> > +    return err;
> > +}
> > +
> > +void
> > +timeout_policy_clean(struct conntrack *ct, struct timeout_policy *tp)
> > +    OVS_REQUIRES(ct->ct_lock)
> > +{
> > +    hmap_remove(&ct->timeout_policies, &tp->node);
> > +    free(tp);
> > +}
> 
> Maybe make timeout_policy_clean() to be static ?

OK

> 
> 
> > +int
> > +timeout_policy_delete(struct conntrack *ct, uint32_t tp_id)
> > +{
> > +    ovs_mutex_lock(&ct->ct_lock);
> > +    struct timeout_policy *tp = timeout_policy_lookup(ct, tp_id);
> > +    if (tp) {
> > +        timeout_policy_clean(ct, tp);
> > +    } else {
> > +        VLOG_WARN_RL(&rl, "Attempted delete of non-existent timeout "
> > +                     "policy: zone %d", tp_id);
> 
> I would sugguest the following minor change on the warning log
> +        VLOG_WARN_RL(&rl, "Failed to delete a non-existent timeout "
> +                     "policy: id=%d", tp_id);

OK

> 
> > +    }
> > +    ovs_mutex_unlock(&ct->ct_lock);
> > +    return 0;
> > +}
> > +
> > +void
> > +timeout_policy_init(struct conntrack *ct)
> > +    OVS_REQUIRES(ct->ct_lock)
> > +{
> > +    struct timeout_policy tp;
> > +
> > +    hmap_init(&ct->timeout_policies);
> > +
> > +    /* Create default timeout policy. */
> > +    memset(&tp, 0, sizeof tp);
> > +    tp.policy.id = DEFAULT_TP_ID;
> > +    timeout_policy_create(ct, &tp);
> > +}
> > +
> > +static enum ct_dpif_tp_attr
> > +tm_to_ct_dpif_tp(enum ct_timeout tm)
> > +{
> > +    switch (tm) {
> > +    case CT_TM_TCP_FIRST_PACKET:
> > +        return CT_DPIF_TP_ATTR_TCP_SYN_SENT;
> > +    case CT_TM_TCP_OPENING:
> > +        return CT_DPIF_TP_ATTR_TCP_SYN_RECV;
> > +    case CT_TM_TCP_ESTABLISHED:
> > +        return CT_DPIF_TP_ATTR_TCP_ESTABLISHED;
> > +    case CT_TM_TCP_CLOSING:
> > +        return CT_DPIF_TP_ATTR_TCP_FIN_WAIT;
> > +    case CT_TM_TCP_FIN_WAIT:
> > +        return CT_DPIF_TP_ATTR_TCP_TIME_WAIT;
> > +    case CT_TM_TCP_CLOSED:
> > +        return CT_DPIF_TP_ATTR_TCP_CLOSE;
> > +    case CT_TM_OTHER_FIRST:
> > +        return CT_DPIF_TP_ATTR_UDP_FIRST;
> > +    case CT_TM_OTHER_BIDIR:
> > +        return CT_DPIF_TP_ATTR_UDP_SINGLE;
> > +    case CT_TM_OTHER_MULTIPLE:
> > +        return CT_DPIF_TP_ATTR_UDP_MULTIPLE;
> > +    case CT_TM_ICMP_FIRST:
> > +        return CT_DPIF_TP_ATTR_ICMP_FIRST;
> > +    case CT_TM_ICMP_REPLY:
> > +        return CT_DPIF_TP_ATTR_ICMP_REPLY;
> > +    case N_CT_TM:
> > +    default:
> > +        OVS_NOT_REACHED();
> > +        break;
> > +    }
> > +    OVS_NOT_REACHED();
> > +    return CT_DPIF_TP_ATTR_MAX;
> > +}
> > +
> > +static void
> > +conn_update_expiration__(struct conntrack *ct, struct conn *conn,
> > +                         enum ct_timeout tm, long long now,
> > +                         uint32_t tp_value)
> > +    OVS_NO_THREAD_SAFETY_ANALYSIS
> > +{
> > +    ovs_mutex_unlock(&conn->lock);
> > +
> > +    ovs_mutex_lock(&ct->ct_lock);
> > +    ovs_mutex_lock(&conn->lock);
> > +    if (!conn->cleaned) {
> > +        conn->expiration = now + tp_value * 1000;
> > +        ovs_list_remove(&conn->exp_node);
> > +        ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
> > +    }
> > +    ovs_mutex_unlock(&conn->lock);
> > +    ovs_mutex_unlock(&ct->ct_lock);
> > +
> > +    ovs_mutex_lock(&conn->lock);
> > +}
> > +
> > +/* The conn entry lock must be held on entry and exit. */
> > +void
> > +conn_update_expiration(struct conntrack *ct, struct conn *conn,
> > +                       enum ct_timeout tm, long long now)
> > +    OVS_REQUIRES(ct->ct_lock)
> 
> Does ct->ct_lock require here?  We lock ct->ct_lock in conn_update_expiration__

Good catch.

> 
> 
> > +{
> > +    struct timeout_policy *tp;
> > +    uint32_t val;
> > +
> > +    tp = timeout_policy_lookup(ct, conn->tp_id);
> > +    if (tp) {
> > +        val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)];
> > +    } else {
> > +        val = ct_dpif_timeout_value_def[tm_to_ct_dpif_tp(tm)];
> > +    }
> > +    VLOG_DBG_RL(&rl, "Update timeout %s with policy id=%d val=%u sec.",
> > +                ct_timeout_str[tm], conn->tp_id, val);
> 
> Should we include the zone id here and in the conn_init_expiration()?

OK

> 
> > +
> > +    conn_update_expiration__(ct, conn, tm, now, val);
> > +}
> > +
> > +static void
> > +conn_init_expiration__(struct conntrack *ct, struct conn *conn,
> > +                       enum ct_timeout tm, long long now,
> > +                       uint32_t tp_value)
> > +{
> > +    conn->expiration = now + tp_value * 1000;
> > +    ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
> > +}
> > +
> > +/* ct_lock must be held. */
> > +void
> > +conn_init_expiration(struct conntrack *ct, struct conn *conn,
> > +                     enum ct_timeout tm, long long now)
> > +    OVS_REQUIRES(ct->ct_lock)
> > +{
> > +    struct timeout_policy *tp;
> > +    uint32_t val;
> > +
> > +    tp = timeout_policy_lookup(ct, conn->tp_id);
> > +    if (tp) {
> > +        val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)];
> > +    } else {
> > +        val = ct_dpif_timeout_value_def[tm_to_ct_dpif_tp(tm)];
> > +    }
> > +    VLOG_DBG_RL(&rl, "Init timeout %s with policy id=%d val=%u sec.",
> > +                ct_timeout_str[tm], conn->tp_id, val);
> > +
> > +    conn_init_expiration__(ct, conn, tm, now, val);
> > +}
> 
> 
> > --- a/lib/conntrack.c
> > +++ b/lib/conntrack.c
> > @@ -25,6 +25,7 @@
> >  #include "bitmap.h"
> >  #include "conntrack.h"
> >  #include "conntrack-private.h"
> > +#include "conntrack-tp.h"
> >  #include "coverage.h"
> >  #include "csum.h"
> >  #include "ct-dpif.h"
> > @@ -88,7 +89,8 @@ static uint32_t conn_key_hash(const struct conn_key *, uint32_t basis);
> >  static void conn_key_reverse(struct conn_key *);
> >  static bool valid_new(struct dp_packet *pkt, struct conn_key *);
> >  static struct conn *new_conn(struct conntrack *ct, struct dp_packet *pkt,
> > -                             struct conn_key *, long long now);
> > +                             struct conn_key *, long long now,
> > +                             uint32_t tp_id);
> >  static void delete_conn_cmn(struct conn *);
> >  static void delete_conn(struct conn *);
> >  static void delete_conn_one(struct conn *conn);
> > @@ -175,12 +177,6 @@ static alg_helper alg_helpers[] = {
> >      [CT_ALG_CTL_TFTP] = handle_tftp_ctl,
> >  };
> >
> > -long long ct_timeout_val[] = {
> > -#define CT_TIMEOUT(NAME, VAL) [CT_TM_##NAME] = VAL,
> > -    CT_TIMEOUTS
> > -#undef CT_TIMEOUT
> > -};
> > -
> >  /* The maximum TCP or UDP port number. */
> >  #define CT_MAX_L4_PORT 65535
> >  /* String buffer used for parsing FTP string messages.
> > @@ -312,6 +308,7 @@ conntrack_init(void)
> >      }
> >      hmap_init(&ct->zone_limits);
> >      ct->zone_limit_seq = 0;
> > +    timeout_policy_init(ct);
> >      ovs_mutex_unlock(&ct->ct_lock);
> >
> >      ct->hash_basis = random_uint32();
> > @@ -502,6 +499,12 @@ conntrack_destroy(struct conntrack *ct)
> >      }
> >      hmap_destroy(&ct->zone_limits);
> >
> > +    struct timeout_policy *tp;
> > +    HMAP_FOR_EACH_POP (tp, node, &ct->timeout_policies) {
> > +        free(tp);
> > +    }
> > +    hmap_destroy(&ct->timeout_policies);
> > +
> >      ovs_mutex_unlock(&ct->ct_lock);
> >      ovs_mutex_destroy(&ct->ct_lock);
> >
> > @@ -956,7 +959,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
> >                 struct conn_lookup_ctx *ctx, bool commit, long long now,
> >                 const struct nat_action_info_t *nat_action_info,
> >                 const char *helper, const struct alg_exp_node *alg_exp,
> > -               enum ct_alg_ctl_type ct_alg_ctl)
> > +               enum ct_alg_ctl_type ct_alg_ctl, uint32_t tp_id)
> >      OVS_REQUIRES(ct->ct_lock)
> >  {
> >      struct conn *nc = NULL;
> > @@ -987,7 +990,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
> >              return nc;
> >          }
> >
> > -        nc = new_conn(ct, pkt, &ctx->key, now);
> > +        nc = new_conn(ct, pkt, &ctx->key, now, tp_id);
> >          memcpy(&nc->key, &ctx->key, sizeof nc->key);
> >          memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key);
> >          conn_key_reverse(&nc->rev_key);
> > @@ -1275,7 +1278,8 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
> >              bool force, bool commit, long long now, const uint32_t *setmark,
> >              const struct ovs_key_ct_labels *setlabel,
> >              const struct nat_action_info_t *nat_action_info,
> > -            ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper)
> > +            ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
> > +            uint32_t tp_id)
> >  {
> >      /* Reset ct_state whenever entering a new zone. */
> >      if (pkt->md.ct_state && pkt->md.ct_zone != zone) {
> > @@ -1359,7 +1363,7 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
> >          ovs_mutex_lock(&ct->ct_lock);
> >          if (!conn_lookup(ct, &ctx->key, now, NULL, NULL)) {
> >              conn = conn_not_found(ct, pkt, ctx, commit, now, nat_action_info,
> > -                                  helper, alg_exp, ct_alg_ctl);
> > +                                  helper, alg_exp, ct_alg_ctl, tp_id);
> >          }
> >          ovs_mutex_unlock(&ct->ct_lock);
> >      }
> > @@ -1395,7 +1399,7 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
> >                    const struct ovs_key_ct_labels *setlabel,
> >                    ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
> >                    const struct nat_action_info_t *nat_action_info,
> > -                  long long now)
> > +                  long long now, uint32_t tp_id)
> >  {
> >      ipf_preprocess_conntrack(ct->ipf, pkt_batch, now, dl_type, zone,
> >                               ct->hash_basis);
> > @@ -1417,7 +1421,8 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
> >              write_ct_md(packet, zone, NULL, NULL, NULL);
> >          } else {
> >              process_one(ct, packet, &ctx, zone, force, commit, now, setmark,
> > -                        setlabel, nat_action_info, tp_src, tp_dst, helper);
> > +                        setlabel, nat_action_info, tp_src, tp_dst, helper,
> > +                        tp_id);
> >          }
> >      }
> >
> > @@ -1523,7 +1528,7 @@ conntrack_clean(struct conntrack *ct, long long now)
> >      atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit);
> >      size_t clean_max = n_conn_limit > 10 ? n_conn_limit / 10 : 1;
> >      long long min_exp = ct_sweep(ct, now, clean_max);
> > -    long long next_wakeup = MIN(min_exp, now + CT_TM_MIN);
> > +    long long next_wakeup = MIN(min_exp, now + CT_DPIF_TP_MIN);
> >
> >      return next_wakeup;
> >  }
> > @@ -1540,14 +1545,14 @@ conntrack_clean(struct conntrack *ct, long long now)
> >   * - We want to reduce the number of wakeups and batch connection cleanup
> >   *   when the load is not very high.  CT_CLEAN_INTERVAL ensures that if we
> >   *   are coping with the current cleanup tasks, then we wait at least
> > - *   5 seconds to do further cleanup.
> > + *   3 seconds to do further cleanup.
> >   *
> >   * - We don't want to keep the map locked too long, as we might prevent
> >   *   traffic from flowing.  CT_CLEAN_MIN_INTERVAL ensures that if cleanup is
> >   *   behind, there is at least some 200ms blocks of time when the map will be
> >   *   left alone, so the datapath can operate unhindered.
> >   */
> > -#define CT_CLEAN_INTERVAL 5000 /* 5 seconds */
> > +#define CT_CLEAN_INTERVAL 3000 /* 3 second */
> 
> I'm not sure whether we should reduce the CLEAN_INTERVAL from 5
> seconds to 3 seconds.  It definitely help to clean up the expired
> conntrack entries more freqently, but it also increase the workload.
> 

Maybe keeping it 5 seconds is better. I will remove it.

> 
> >  #define CT_CLEAN_MIN_INTERVAL 200  /* 0.2 seconds */
> >
> >  static void *
> > @@ -2347,9 +2352,9 @@ valid_new(struct dp_packet *pkt, struct conn_key *key)
> >
> >  static struct conn *
> >  new_conn(struct conntrack *ct, struct dp_packet *pkt, struct conn_key *key,
> > -         long long now)
> > +         long long now, uint32_t tp_id)
> >  {
> > -    return l4_protos[key->nw_proto]->new_conn(ct, pkt, now);
> > +    return l4_protos[key->nw_proto]->new_conn(ct, pkt, now, tp_id);
> >  }
> >
> >  static void
> 
> 
> > diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h
> > index 3e227d9e3b6e..707adf70402c 100644
> > --- a/lib/ct-dpif.h
> > +++ b/lib/ct-dpif.h
> > @@ -59,6 +59,8 @@ struct ct_dpif_timestamp {
> >      uint64_t stop;
> >  };
> >
> > +#define DEFAULT_TP_ID 0
> > +
> >  #define CT_DPIF_TCP_STATES \
> >      CT_DPIF_TCP_STATE(CLOSED) \
> >      CT_DPIF_TCP_STATE(LISTEN) \
> > @@ -225,36 +227,53 @@ struct ct_dpif_zone_limit {
> >      struct ovs_list node;
> >  };
> >
> > +/* TP attr with default timeout in seconds. */
> >  #define CT_DPIF_TP_TCP_ATTRS \
> > -    CT_DPIF_TP_TCP_ATTR(SYN_SENT) \
> > -    CT_DPIF_TP_TCP_ATTR(SYN_RECV) \
> > -    CT_DPIF_TP_TCP_ATTR(ESTABLISHED) \
> > -    CT_DPIF_TP_TCP_ATTR(FIN_WAIT) \
> > -    CT_DPIF_TP_TCP_ATTR(CLOSE_WAIT) \
> > -    CT_DPIF_TP_TCP_ATTR(LAST_ACK) \
> > -    CT_DPIF_TP_TCP_ATTR(TIME_WAIT) \
> > -    CT_DPIF_TP_TCP_ATTR(CLOSE) \
> > -    CT_DPIF_TP_TCP_ATTR(SYN_SENT2) \
> > -    CT_DPIF_TP_TCP_ATTR(RETRANSMIT) \
> > -    CT_DPIF_TP_TCP_ATTR(UNACK)
> > +    CT_DPIF_TP_TCP_ATTR(SYN_SENT, 30) \
> > +    CT_DPIF_TP_TCP_ATTR(SYN_RECV, 30) \
> > +    CT_DPIF_TP_TCP_ATTR(ESTABLISHED, 24 * 60 * 60) \
> > +    CT_DPIF_TP_TCP_ATTR(FIN_WAIT, 15 * 60) \
> > +    CT_DPIF_TP_TCP_ATTR(CLOSE_WAIT, 45) \
> > +    CT_DPIF_TP_TCP_ATTR(LAST_ACK, 30) \
> > +    CT_DPIF_TP_TCP_ATTR(TIME_WAIT, 45) \
> > +    CT_DPIF_TP_TCP_ATTR(CLOSE, 30) \
> > +    CT_DPIF_TP_TCP_ATTR(SYN_SENT2, 30) \
> > +    CT_DPIF_TP_TCP_ATTR(RETRANSMIT, 30) \
> > +    CT_DPIF_TP_TCP_ATTR(UNACK, 30)
> >
> >  #define CT_DPIF_TP_UDP_ATTRS \
> > -    CT_DPIF_TP_UDP_ATTR(FIRST) \
> > -    CT_DPIF_TP_UDP_ATTR(SINGLE) \
> > -    CT_DPIF_TP_UDP_ATTR(MULTIPLE)
> > +    CT_DPIF_TP_UDP_ATTR(FIRST, 60) \
> > +    CT_DPIF_TP_UDP_ATTR(SINGLE, 60) \
> > +    CT_DPIF_TP_UDP_ATTR(MULTIPLE, 30)
> >
> >  #define CT_DPIF_TP_ICMP_ATTRS \
> > -    CT_DPIF_TP_ICMP_ATTR(FIRST) \
> > -    CT_DPIF_TP_ICMP_ATTR(REPLY)
> > +    CT_DPIF_TP_ICMP_ATTR(FIRST, 60) \
> > +    CT_DPIF_TP_ICMP_ATTR(REPLY, 30)
> > +
> > +/* The minimum value of the default timeout. */
> > +#define CT_DPIF_TP_MIN 30
> 
> This value is similar to CT_TM_MIN, and it is for dpif-netdev only,
> should we keep it in lib/conntrack-tp.h?
> 
OK, I will remove the default timeout value above.
So this CT_DPIF_TP_MIN will be removed too.

> 
> > +#define CT_DPIF_TP_TCP_ATTR(ATTR, VAL) \
> > +            BUILD_ASSERT_DECL(VAL >= CT_DPIF_TP_MIN);
> > +    CT_DPIF_TP_TCP_ATTRS
> > +#define CT_DPIF_TP_UDP_ATTR(ATTR, VAL) \
> > +            BUILD_ASSERT_DECL(VAL >= CT_DPIF_TP_MIN);
> > +    CT_DPIF_TP_UDP_ATTRS
> > +#undef CT_DPIF_TP_UDP_ATTR
> > +#define CT_DPIF_TP_ICMP_ATTR(ATTR, VAL) \
> > +            BUILD_ASSERT_DECL(VAL >= CT_DPIF_TP_MIN);
> > +    CT_DPIF_TP_ICMP_ATTRS
> > +#undef CT_DPIF_TP_ICMP_ATTR
> > +
> > +#undef CT_DPIF_TP_TCP_ATTR
> >
> >  enum OVS_PACKED_ENUM ct_dpif_tp_attr {
> > -#define CT_DPIF_TP_TCP_ATTR(ATTR) CT_DPIF_TP_ATTR_TCP_##ATTR,
> > +#define CT_DPIF_TP_TCP_ATTR(ATTR, VAL) CT_DPIF_TP_ATTR_TCP_##ATTR,
> >      CT_DPIF_TP_TCP_ATTRS
> >  #undef CT_DPIF_TP_TCP_ATTR
> > -#define CT_DPIF_TP_UDP_ATTR(ATTR) CT_DPIF_TP_ATTR_UDP_##ATTR,
> > +#define CT_DPIF_TP_UDP_ATTR(ATTR, VAL) CT_DPIF_TP_ATTR_UDP_##ATTR,
> >      CT_DPIF_TP_UDP_ATTRS
> >  #undef CT_DPIF_TP_UDP_ATTR
> > -#define CT_DPIF_TP_ICMP_ATTR(ATTR) CT_DPIF_TP_ATTR_ICMP_##ATTR,
> > +#define CT_DPIF_TP_ICMP_ATTR(ATTR, VAL) CT_DPIF_TP_ATTR_ICMP_##ATTR,
> >      CT_DPIF_TP_ICMP_ATTRS
> >  #undef CT_DPIF_TP_ICMP_ATTR
> >      CT_DPIF_TP_ATTR_MAX
> 
> > --- a/tests/system-traffic.at
> > +++ b/tests/system-traffic.at
> 
> > @@ -3301,8 +3301,15 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=>  AT_CHECK([ovs-appctl dpctl/flush-conntrack])
> >
> >  dnl Shorten the udp_single and icmp_first timeout in zone 5
> > +dnl Userspace datapath uses udp_first and icmp_reply, and
> > +dnl kernel datapath uses udp_single and icmp_first
> >  VSCTL_ADD_DATAPATH_TABLE()
> > -AT_CHECK([ovs-vsctl add-zone-tp $DP_TYPE zone=5 udp_single=3 icmp_first=3])
> > +
> > +dnl Creating more timeout policies
> > +for i in `seq 1 255`; do
> > +ovs-vsctl --may-exist add-zone-tp $DP_TYPE zone=5 udp_first=$i udp_single=$i icmp_first=$i icmp_reply=$i;
> 
> Should we add zone-tp to different zone?
> 
OK
Thanks
William
diff mbox series

Patch

diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
index b3507bd1c7fa..4884515446d7 100644
--- a/Documentation/faq/releases.rst
+++ b/Documentation/faq/releases.rst
@@ -118,7 +118,7 @@  Q: Are all features available with all datapaths?
     ========================== ============== ============== ========= =======
     Connection tracking             4.3            2.5          2.6      YES
     Conntrack Fragment Reass.       4.3            2.6          2.12     YES
-    Conntrack Timeout Policies      5.2            2.12         NO       NO
+    Conntrack Timeout Policies      5.2            2.12         2.14     NO
     Conntrack Zone Limit            4.18           2.10         2.13     YES
     Conntrack NAT                   4.6            2.6          2.8      YES
     Tunnel - LISP                   NO             2.11         NO       NO
diff --git a/NEWS b/NEWS
index 70bd17584594..1e6af8f57bdd 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,8 @@  Post-v2.13.0
      * Deprecated DPDK ring ports (dpdkr) are no longer supported.
    - Linux datapath:
      * Support for kernel versions up to 5.5.x.
+   - Userspace datapath:
+     * Add support for conntrack zone-based timeout policy.
 
 
 v2.13.0 - 14 Feb 2020
diff --git a/lib/automake.mk b/lib/automake.mk
index 95925b57c351..86940ccd2f9e 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -53,6 +53,8 @@  lib_libopenvswitch_la_SOURCES = \
 	lib/conntrack-icmp.c \
 	lib/conntrack-private.h \
 	lib/conntrack-tcp.c \
+	lib/conntrack-tp.c \
+	lib/conntrack-tp.h \
 	lib/conntrack-other.c \
 	lib/conntrack.c \
 	lib/conntrack.h \
diff --git a/lib/conntrack-icmp.c b/lib/conntrack-icmp.c
index 63246f0124d0..5c6ef37d2eb4 100644
--- a/lib/conntrack-icmp.c
+++ b/lib/conntrack-icmp.c
@@ -22,6 +22,7 @@ 
 #include <netinet/icmp6.h>
 
 #include "conntrack-private.h"
+#include "conntrack-tp.h"
 #include "dp-packet.h"
 
 enum OVS_PACKED_ENUM icmp_state {
@@ -50,9 +51,12 @@  icmp_conn_update(struct conntrack *ct, struct conn *conn_,
                  struct dp_packet *pkt OVS_UNUSED, bool reply, long long now)
 {
     struct conn_icmp *conn = conn_icmp_cast(conn_);
-    conn->state = reply ? ICMPS_REPLY : ICMPS_FIRST;
-    conn_update_expiration(ct, &conn->up, icmp_timeouts[conn->state], now);
 
+    if (reply && conn->state == ICMPS_FIRST) {
+        conn->state = ICMPS_REPLY;
+    }
+
+    conn_update_expiration(ct, &conn->up, icmp_timeouts[conn->state], now);
     return CT_UPDATE_VALID;
 }
 
@@ -76,12 +80,13 @@  icmp6_valid_new(struct dp_packet *pkt)
 
 static struct conn *
 icmp_new_conn(struct conntrack *ct, struct dp_packet *pkt OVS_UNUSED,
-              long long now)
+              long long now, uint32_t tp_id)
 {
     struct conn_icmp *conn = xzalloc(sizeof *conn);
     conn->state = ICMPS_FIRST;
-    conn_init_expiration(ct, &conn->up, icmp_timeouts[conn->state], now);
+    conn->up.tp_id = tp_id;
 
+    conn_init_expiration(ct, &conn->up, icmp_timeouts[conn->state], now);
     return &conn->up;
 }
 
diff --git a/lib/conntrack-other.c b/lib/conntrack-other.c
index de22ef87cc19..d3b46018586c 100644
--- a/lib/conntrack-other.c
+++ b/lib/conntrack-other.c
@@ -17,6 +17,7 @@ 
 #include <config.h>
 
 #include "conntrack-private.h"
+#include "conntrack-tp.h"
 #include "dp-packet.h"
 
 enum OVS_PACKED_ENUM other_state {
@@ -69,12 +70,13 @@  other_valid_new(struct dp_packet *pkt OVS_UNUSED)
 
 static struct conn *
 other_new_conn(struct conntrack *ct, struct dp_packet *pkt OVS_UNUSED,
-               long long now)
+               long long now, uint32_t tp_id)
 {
     struct conn_other *conn;
 
     conn = xzalloc(sizeof *conn);
     conn->state = OTHERS_FIRST;
+    conn->up.tp_id = tp_id;
 
     conn_init_expiration(ct, &conn->up, other_timeouts[conn->state], now);
 
diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index 9a8ca3910157..ccccc1d523ea 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -118,6 +118,8 @@  struct conn {
     /* Immutable data. */
     bool alg_related; /* True if alg data connection. */
     enum ct_conn_type conn_type;
+
+    uint32_t tp_id; /* Timeout policy ID. */
 };
 
 enum ct_update_res {
@@ -163,6 +165,7 @@  struct conntrack {
     struct cmap conns OVS_GUARDED;
     struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED;
     struct hmap zone_limits OVS_GUARDED;
+    struct hmap timeout_policies OVS_GUARDED;
     uint32_t hash_basis; /* Salt for hashing a connection key. */
     pthread_t clean_thread; /* Periodically cleans up connection tracker. */
     struct latch clean_thread_exit; /* To destroy the 'clean_thread'. */
@@ -197,7 +200,7 @@  extern struct ct_l4_proto ct_proto_icmp6;
 
 struct ct_l4_proto {
     struct conn *(*new_conn)(struct conntrack *ct, struct dp_packet *pkt,
-                             long long now);
+                             long long now, uint32_t tp_id);
     bool (*valid_new)(struct dp_packet *pkt);
     enum ct_update_res (*conn_update)(struct conntrack *ct, struct conn *conn,
                                       struct dp_packet *pkt, bool reply,
@@ -206,39 +209,6 @@  struct ct_l4_proto {
                                struct ct_dpif_protoinfo *);
 };
 
-extern long long ct_timeout_val[];
-
-
-/* ct_lock must be held. */
-static inline void
-conn_init_expiration(struct conntrack *ct, struct conn *conn,
-                     enum ct_timeout tm, long long now)
-{
-    conn->expiration = now + ct_timeout_val[tm];
-    ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
-}
-
-/* The conn entry lock must be held on entry and exit. */
-static inline void
-conn_update_expiration(struct conntrack *ct, struct conn *conn,
-                       enum ct_timeout tm, long long now)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
-{
-    ovs_mutex_unlock(&conn->lock);
-
-    ovs_mutex_lock(&ct->ct_lock);
-    ovs_mutex_lock(&conn->lock);
-    if (!conn->cleaned) {
-        conn->expiration = now + ct_timeout_val[tm];
-        ovs_list_remove(&conn->exp_node);
-        ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
-    }
-    ovs_mutex_unlock(&conn->lock);
-    ovs_mutex_unlock(&ct->ct_lock);
-
-    ovs_mutex_lock(&conn->lock);
-}
-
 static inline uint32_t
 tcp_payload_length(struct dp_packet *pkt)
 {
diff --git a/lib/conntrack-tcp.c b/lib/conntrack-tcp.c
index 47261c7551d1..18a2aa7c7d02 100644
--- a/lib/conntrack-tcp.c
+++ b/lib/conntrack-tcp.c
@@ -39,6 +39,7 @@ 
 #include <config.h>
 
 #include "conntrack-private.h"
+#include "conntrack-tp.h"
 #include "coverage.h"
 #include "ct-dpif.h"
 #include "dp-packet.h"
@@ -435,7 +436,8 @@  tcp_valid_new(struct dp_packet *pkt)
 }
 
 static struct conn *
-tcp_new_conn(struct conntrack *ct, struct dp_packet *pkt, long long now)
+tcp_new_conn(struct conntrack *ct, struct dp_packet *pkt, long long now,
+             uint32_t tp_id)
 {
     struct conn_tcp* newconn = NULL;
     struct tcp_header *tcp = dp_packet_l4(pkt);
@@ -471,6 +473,7 @@  tcp_new_conn(struct conntrack *ct, struct dp_packet *pkt, long long now)
     src->state = CT_DPIF_TCPS_SYN_SENT;
     dst->state = CT_DPIF_TCPS_CLOSED;
 
+    newconn->up.tp_id = tp_id;
     conn_init_expiration(ct, &newconn->up, CT_TM_TCP_FIRST_PACKET, now);
 
     return &newconn->up;
diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c
new file mode 100644
index 000000000000..5e59f56eb949
--- /dev/null
+++ b/lib/conntrack-tp.c
@@ -0,0 +1,319 @@ 
+/*
+ * Copyright (c) 2020 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "conntrack-private.h"
+#include "conntrack-tp.h"
+#include "ct-dpif.h"
+#include "dp-packet.h"
+
+#include "openvswitch/vlog.h"
+VLOG_DEFINE_THIS_MODULE(conntrack_tp);
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+#define DEFAULT_TP_ID 0
+
+static const char *ct_timeout_str[] = {
+#define CT_TIMEOUT(NAME, VALUE) #NAME,
+    CT_TIMEOUTS
+#undef CT_TIMEOUT
+};
+
+static const char *dpif_ct_timeout_str[] = {
+#define xstr(s) #s
+#define CT_DPIF_TP_TCP_ATTR(NAME, VAL) xstr(TCP_##NAME),
+    CT_DPIF_TP_TCP_ATTRS
+#undef CT_DPIF_TP_TCP_ATTR
+#define CT_DPIF_TP_UDP_ATTR(NAME, VAL) xstr(UDP_##NAME),
+    CT_DPIF_TP_UDP_ATTRS
+#undef CT_DPIF_TP_UDP_ATTR
+#define CT_DPIF_TP_ICMP_ATTR(NAME, VAL) xstr(ICMP_##NAME),
+    CT_DPIF_TP_ICMP_ATTRS
+#undef CT_DPIF_TP_ICMP_ATTR
+#undef xstr
+};
+
+static unsigned int ct_dpif_timeout_value_def[] = {
+#define CT_DPIF_TP_TCP_ATTR(NAME, VAL) [CT_DPIF_TP_ATTR_TCP_##NAME] = VAL,
+    CT_DPIF_TP_TCP_ATTRS
+#undef CT_DPIF_TP_TCP_ATTR
+#define CT_DPIF_TP_UDP_ATTR(NAME, VAL) [CT_DPIF_TP_ATTR_UDP_##NAME] = VAL,
+    CT_DPIF_TP_UDP_ATTRS
+#undef CT_DPIF_TP_UDP_ATTR
+#define CT_DPIF_TP_ICMP_ATTR(NAME, VAL) [CT_DPIF_TP_ATTR_ICMP_##NAME] = VAL,
+    CT_DPIF_TP_ICMP_ATTRS
+#undef CT_DPIF_TP_ICMP_ATTR
+};
+
+static void OVS_UNUSED
+timeout_policy_dump(struct timeout_policy *tp)
+{
+    bool present;
+    int i;
+
+    VLOG_DBG("Timeout Policy ID %u:", tp->policy.id);
+    for (i = 0; i < ARRAY_SIZE(tp->policy.attrs); i++) {
+        if (tp->policy.present & (1 << i)) {
+            present = !!(tp->policy.present & (1 << i));
+            VLOG_DBG("  Policy: %s present: %u value: %u",
+                      dpif_ct_timeout_str[i], present, tp->policy.attrs[i]);
+        }
+    }
+}
+
+static struct timeout_policy *
+timeout_policy_lookup(struct conntrack *ct, int32_t tp_id)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    struct timeout_policy *tp;
+    uint32_t hash;
+
+    hash = hash_int(tp_id, ct->hash_basis);
+    HMAP_FOR_EACH_IN_BUCKET (tp, node, hash, &ct->timeout_policies) {
+        if (tp->policy.id == tp_id) {
+            return tp;
+        }
+    }
+    return NULL;
+}
+
+struct timeout_policy *
+timeout_policy_get(struct conntrack *ct, int32_t tp_id)
+{
+    struct timeout_policy *tp;
+
+    ovs_mutex_lock(&ct->ct_lock);
+    tp = timeout_policy_lookup(ct, tp_id);
+    if (!tp) {
+        ovs_mutex_unlock(&ct->ct_lock);
+        return NULL;
+    }
+
+    ovs_mutex_unlock(&ct->ct_lock);
+    return tp;
+}
+
+static void
+update_existing_tp(struct timeout_policy *tp_dst,
+                   struct timeout_policy *tp_src)
+{
+    struct ct_dpif_timeout_policy *dst, *src;
+    int i;
+
+    dst = &tp_dst->policy;
+    src = &tp_src->policy;
+
+    /* Set the value and present bit to dst if present
+     * bit in src is set.
+     */
+    for (i = 0; i < ARRAY_SIZE(dst->attrs); i++) {
+        if (src->present & (1 << i)) {
+            dst->attrs[i] = src->attrs[i];
+            dst->present |= (1 << i);
+        }
+    }
+}
+
+static void
+init_default_tp(struct timeout_policy *tp, uint32_t tp_id)
+{
+    tp->policy.id = tp_id;
+    /* Initialize the timeout value to default, but not
+     * setting the present bit.
+     */
+    tp->policy.present = 0;
+    memcpy(tp->policy.attrs, ct_dpif_timeout_value_def,
+           sizeof tp->policy.attrs);
+}
+
+static void
+timeout_policy_create(struct conntrack *ct,
+                      struct timeout_policy *new_tp)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    uint32_t tp_id = new_tp->policy.id;
+    struct timeout_policy *tp;
+    uint32_t hash;
+
+    tp = xzalloc(sizeof *tp);
+    init_default_tp(tp, tp_id);
+    update_existing_tp(tp, new_tp);
+    hash = hash_int(tp_id, ct->hash_basis);
+    hmap_insert(&ct->timeout_policies, &tp->node, hash);
+}
+
+int
+timeout_policy_update(struct conntrack *ct,
+                      struct timeout_policy *new_tp)
+{
+    int err = 0;
+    uint32_t tp_id = new_tp->policy.id;
+
+    ovs_mutex_lock(&ct->ct_lock);
+    struct timeout_policy *tp = timeout_policy_lookup(ct, tp_id);
+    if (tp) {
+        update_existing_tp(tp, new_tp);
+    } else {
+        timeout_policy_create(ct, new_tp);
+    }
+    ovs_mutex_unlock(&ct->ct_lock);
+    return err;
+}
+
+void
+timeout_policy_clean(struct conntrack *ct, struct timeout_policy *tp)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    hmap_remove(&ct->timeout_policies, &tp->node);
+    free(tp);
+}
+
+int
+timeout_policy_delete(struct conntrack *ct, uint32_t tp_id)
+{
+    ovs_mutex_lock(&ct->ct_lock);
+    struct timeout_policy *tp = timeout_policy_lookup(ct, tp_id);
+    if (tp) {
+        timeout_policy_clean(ct, tp);
+    } else {
+        VLOG_WARN_RL(&rl, "Attempted delete of non-existent timeout "
+                     "policy: zone %d", tp_id);
+    }
+    ovs_mutex_unlock(&ct->ct_lock);
+    return 0;
+}
+
+void
+timeout_policy_init(struct conntrack *ct)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    struct timeout_policy tp;
+
+    hmap_init(&ct->timeout_policies);
+
+    /* Create default timeout policy. */
+    memset(&tp, 0, sizeof tp);
+    tp.policy.id = DEFAULT_TP_ID;
+    timeout_policy_create(ct, &tp);
+}
+
+static enum ct_dpif_tp_attr
+tm_to_ct_dpif_tp(enum ct_timeout tm)
+{
+    switch (tm) {
+    case CT_TM_TCP_FIRST_PACKET:
+        return CT_DPIF_TP_ATTR_TCP_SYN_SENT;
+    case CT_TM_TCP_OPENING:
+        return CT_DPIF_TP_ATTR_TCP_SYN_RECV;
+    case CT_TM_TCP_ESTABLISHED:
+        return CT_DPIF_TP_ATTR_TCP_ESTABLISHED;
+    case CT_TM_TCP_CLOSING:
+        return CT_DPIF_TP_ATTR_TCP_FIN_WAIT;
+    case CT_TM_TCP_FIN_WAIT:
+        return CT_DPIF_TP_ATTR_TCP_TIME_WAIT;
+    case CT_TM_TCP_CLOSED:
+        return CT_DPIF_TP_ATTR_TCP_CLOSE;
+    case CT_TM_OTHER_FIRST:
+        return CT_DPIF_TP_ATTR_UDP_FIRST;
+    case CT_TM_OTHER_BIDIR:
+        return CT_DPIF_TP_ATTR_UDP_SINGLE;
+    case CT_TM_OTHER_MULTIPLE:
+        return CT_DPIF_TP_ATTR_UDP_MULTIPLE;
+    case CT_TM_ICMP_FIRST:
+        return CT_DPIF_TP_ATTR_ICMP_FIRST;
+    case CT_TM_ICMP_REPLY:
+        return CT_DPIF_TP_ATTR_ICMP_REPLY;
+    case N_CT_TM:
+    default:
+        OVS_NOT_REACHED();
+        break;
+    }
+    OVS_NOT_REACHED();
+    return CT_DPIF_TP_ATTR_MAX;
+}
+
+static void
+conn_update_expiration__(struct conntrack *ct, struct conn *conn,
+                         enum ct_timeout tm, long long now,
+                         uint32_t tp_value)
+    OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+    ovs_mutex_unlock(&conn->lock);
+
+    ovs_mutex_lock(&ct->ct_lock);
+    ovs_mutex_lock(&conn->lock);
+    if (!conn->cleaned) {
+        conn->expiration = now + tp_value * 1000;
+        ovs_list_remove(&conn->exp_node);
+        ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
+    }
+    ovs_mutex_unlock(&conn->lock);
+    ovs_mutex_unlock(&ct->ct_lock);
+
+    ovs_mutex_lock(&conn->lock);
+}
+
+/* The conn entry lock must be held on entry and exit. */
+void
+conn_update_expiration(struct conntrack *ct, struct conn *conn,
+                       enum ct_timeout tm, long long now)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    struct timeout_policy *tp;
+    uint32_t val;
+
+    tp = timeout_policy_lookup(ct, conn->tp_id);
+    if (tp) {
+        val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)];
+    } else {
+        val = ct_dpif_timeout_value_def[tm_to_ct_dpif_tp(tm)];
+    }
+    VLOG_DBG_RL(&rl, "Update timeout %s with policy id=%d val=%u sec.",
+                ct_timeout_str[tm], conn->tp_id, val);
+
+    conn_update_expiration__(ct, conn, tm, now, val);
+}
+
+static void
+conn_init_expiration__(struct conntrack *ct, struct conn *conn,
+                       enum ct_timeout tm, long long now,
+                       uint32_t tp_value)
+{
+    conn->expiration = now + tp_value * 1000;
+    ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
+}
+
+/* ct_lock must be held. */
+void
+conn_init_expiration(struct conntrack *ct, struct conn *conn,
+                     enum ct_timeout tm, long long now)
+    OVS_REQUIRES(ct->ct_lock)
+{
+    struct timeout_policy *tp;
+    uint32_t val;
+
+    tp = timeout_policy_lookup(ct, conn->tp_id);
+    if (tp) {
+        val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)];
+    } else {
+        val = ct_dpif_timeout_value_def[tm_to_ct_dpif_tp(tm)];
+    }
+    VLOG_DBG_RL(&rl, "Init timeout %s with policy id=%d val=%u sec.",
+                ct_timeout_str[tm], conn->tp_id, val);
+
+    conn_init_expiration__(ct, conn, tm, now, val);
+}
diff --git a/lib/conntrack-tp.h b/lib/conntrack-tp.h
new file mode 100644
index 000000000000..c912aa65443c
--- /dev/null
+++ b/lib/conntrack-tp.h
@@ -0,0 +1,30 @@ 
+/*
+ * Copyright (c) 2020 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CONNTRACK_TP_H
+#define CONNTRACK_TP_H 1
+
+enum ct_timeout;
+void timeout_policy_init(struct conntrack *ct);
+void timeout_policy_clean(struct conntrack *ct, struct timeout_policy *tp);
+int timeout_policy_update(struct conntrack *ct, struct timeout_policy *tp);
+int timeout_policy_delete(struct conntrack *ct, uint32_t tp_id);
+struct timeout_policy *timeout_policy_get(struct conntrack *ct, int32_t tp_id);
+void conn_init_expiration(struct conntrack *ct, struct conn *conn,
+                          enum ct_timeout tm, long long now);
+void conn_update_expiration(struct conntrack *ct, struct conn *conn,
+                            enum ct_timeout tm, long long now);
+#endif
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 0cbc8f6d2b25..42ed1963569f 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -25,6 +25,7 @@ 
 #include "bitmap.h"
 #include "conntrack.h"
 #include "conntrack-private.h"
+#include "conntrack-tp.h"
 #include "coverage.h"
 #include "csum.h"
 #include "ct-dpif.h"
@@ -88,7 +89,8 @@  static uint32_t conn_key_hash(const struct conn_key *, uint32_t basis);
 static void conn_key_reverse(struct conn_key *);
 static bool valid_new(struct dp_packet *pkt, struct conn_key *);
 static struct conn *new_conn(struct conntrack *ct, struct dp_packet *pkt,
-                             struct conn_key *, long long now);
+                             struct conn_key *, long long now,
+                             uint32_t tp_id);
 static void delete_conn_cmn(struct conn *);
 static void delete_conn(struct conn *);
 static void delete_conn_one(struct conn *conn);
@@ -175,12 +177,6 @@  static alg_helper alg_helpers[] = {
     [CT_ALG_CTL_TFTP] = handle_tftp_ctl,
 };
 
-long long ct_timeout_val[] = {
-#define CT_TIMEOUT(NAME, VAL) [CT_TM_##NAME] = VAL,
-    CT_TIMEOUTS
-#undef CT_TIMEOUT
-};
-
 /* The maximum TCP or UDP port number. */
 #define CT_MAX_L4_PORT 65535
 /* String buffer used for parsing FTP string messages.
@@ -312,6 +308,7 @@  conntrack_init(void)
     }
     hmap_init(&ct->zone_limits);
     ct->zone_limit_seq = 0;
+    timeout_policy_init(ct);
     ovs_mutex_unlock(&ct->ct_lock);
 
     ct->hash_basis = random_uint32();
@@ -502,6 +499,12 @@  conntrack_destroy(struct conntrack *ct)
     }
     hmap_destroy(&ct->zone_limits);
 
+    struct timeout_policy *tp;
+    HMAP_FOR_EACH_POP (tp, node, &ct->timeout_policies) {
+        free(tp);
+    }
+    hmap_destroy(&ct->timeout_policies);
+
     ovs_mutex_unlock(&ct->ct_lock);
     ovs_mutex_destroy(&ct->ct_lock);
 
@@ -956,7 +959,7 @@  conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
                struct conn_lookup_ctx *ctx, bool commit, long long now,
                const struct nat_action_info_t *nat_action_info,
                const char *helper, const struct alg_exp_node *alg_exp,
-               enum ct_alg_ctl_type ct_alg_ctl)
+               enum ct_alg_ctl_type ct_alg_ctl, uint32_t tp_id)
     OVS_REQUIRES(ct->ct_lock)
 {
     struct conn *nc = NULL;
@@ -987,7 +990,7 @@  conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
             return nc;
         }
 
-        nc = new_conn(ct, pkt, &ctx->key, now);
+        nc = new_conn(ct, pkt, &ctx->key, now, tp_id);
         memcpy(&nc->key, &ctx->key, sizeof nc->key);
         memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key);
         conn_key_reverse(&nc->rev_key);
@@ -1275,7 +1278,8 @@  process_one(struct conntrack *ct, struct dp_packet *pkt,
             bool force, bool commit, long long now, const uint32_t *setmark,
             const struct ovs_key_ct_labels *setlabel,
             const struct nat_action_info_t *nat_action_info,
-            ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper)
+            ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
+            uint32_t tp_id)
 {
     /* Reset ct_state whenever entering a new zone. */
     if (pkt->md.ct_state && pkt->md.ct_zone != zone) {
@@ -1359,7 +1363,7 @@  process_one(struct conntrack *ct, struct dp_packet *pkt,
         ovs_mutex_lock(&ct->ct_lock);
         if (!conn_lookup(ct, &ctx->key, now, NULL, NULL)) {
             conn = conn_not_found(ct, pkt, ctx, commit, now, nat_action_info,
-                                  helper, alg_exp, ct_alg_ctl);
+                                  helper, alg_exp, ct_alg_ctl, tp_id);
         }
         ovs_mutex_unlock(&ct->ct_lock);
     }
@@ -1395,7 +1399,7 @@  conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
                   const struct ovs_key_ct_labels *setlabel,
                   ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
                   const struct nat_action_info_t *nat_action_info,
-                  long long now)
+                  long long now, uint32_t tp_id)
 {
     ipf_preprocess_conntrack(ct->ipf, pkt_batch, now, dl_type, zone,
                              ct->hash_basis);
@@ -1417,7 +1421,8 @@  conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
             write_ct_md(packet, zone, NULL, NULL, NULL);
         } else {
             process_one(ct, packet, &ctx, zone, force, commit, now, setmark,
-                        setlabel, nat_action_info, tp_src, tp_dst, helper);
+                        setlabel, nat_action_info, tp_src, tp_dst, helper,
+                        tp_id);
         }
     }
 
@@ -1523,7 +1528,7 @@  conntrack_clean(struct conntrack *ct, long long now)
     atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit);
     size_t clean_max = n_conn_limit > 10 ? n_conn_limit / 10 : 1;
     long long min_exp = ct_sweep(ct, now, clean_max);
-    long long next_wakeup = MIN(min_exp, now + CT_TM_MIN);
+    long long next_wakeup = MIN(min_exp, now + CT_DPIF_TP_MIN);
 
     return next_wakeup;
 }
@@ -1540,14 +1545,14 @@  conntrack_clean(struct conntrack *ct, long long now)
  * - We want to reduce the number of wakeups and batch connection cleanup
  *   when the load is not very high.  CT_CLEAN_INTERVAL ensures that if we
  *   are coping with the current cleanup tasks, then we wait at least
- *   5 seconds to do further cleanup.
+ *   3 seconds to do further cleanup.
  *
  * - We don't want to keep the map locked too long, as we might prevent
  *   traffic from flowing.  CT_CLEAN_MIN_INTERVAL ensures that if cleanup is
  *   behind, there is at least some 200ms blocks of time when the map will be
  *   left alone, so the datapath can operate unhindered.
  */
-#define CT_CLEAN_INTERVAL 5000 /* 5 seconds */
+#define CT_CLEAN_INTERVAL 3000 /* 3 second */
 #define CT_CLEAN_MIN_INTERVAL 200  /* 0.2 seconds */
 
 static void *
@@ -2347,9 +2352,9 @@  valid_new(struct dp_packet *pkt, struct conn_key *key)
 
 static struct conn *
 new_conn(struct conntrack *ct, struct dp_packet *pkt, struct conn_key *key,
-         long long now)
+         long long now, uint32_t tp_id)
 {
-    return l4_protos[key->nw_proto]->new_conn(ct, pkt, now);
+    return l4_protos[key->nw_proto]->new_conn(ct, pkt, now, tp_id);
 }
 
 static void
diff --git a/lib/conntrack.h b/lib/conntrack.h
index b0d0fc8d9597..9553b188a410 100644
--- a/lib/conntrack.h
+++ b/lib/conntrack.h
@@ -20,6 +20,7 @@ 
 #include <stdbool.h>
 
 #include "cmap.h"
+#include "ct-dpif.h"
 #include "latch.h"
 #include "odp-netlink.h"
 #include "openvswitch/hmap.h"
@@ -93,7 +94,7 @@  int conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
                       const struct ovs_key_ct_labels *setlabel,
                       ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
                       const struct nat_action_info_t *nat_action_info,
-                      long long now);
+                      long long now, uint32_t tp_id);
 void conntrack_clear(struct dp_packet *packet);
 
 struct conntrack_dump {
@@ -111,6 +112,11 @@  struct conntrack_zone_limit {
     uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */
 };
 
+struct timeout_policy {
+    struct hmap_node node;
+    struct ct_dpif_timeout_policy policy;
+};
+
 enum {
     INVALID_ZONE = -2,
     DEFAULT_ZONE = -1, /* Default zone for zone limit management. */
diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c
index 8c2480e7ac39..48062a1b8d3d 100644
--- a/lib/ct-dpif.c
+++ b/lib/ct-dpif.c
@@ -778,15 +778,15 @@  ct_dpif_format_zone_limits(uint32_t default_limit,
 }
 
 static const char *const ct_dpif_tp_attr_string[] = {
-#define CT_DPIF_TP_TCP_ATTR(ATTR) \
+#define CT_DPIF_TP_TCP_ATTR(ATTR, VAL) \
     [CT_DPIF_TP_ATTR_TCP_##ATTR] = "TCP_"#ATTR,
     CT_DPIF_TP_TCP_ATTRS
 #undef CT_DPIF_TP_TCP_ATTR
-#define CT_DPIF_TP_UDP_ATTR(ATTR) \
+#define CT_DPIF_TP_UDP_ATTR(ATTR, VAL) \
     [CT_DPIF_TP_ATTR_UDP_##ATTR] = "UDP_"#ATTR,
     CT_DPIF_TP_UDP_ATTRS
 #undef CT_DPIF_TP_UDP_ATTR
-#define CT_DPIF_TP_ICMP_ATTR(ATTR) \
+#define CT_DPIF_TP_ICMP_ATTR(ATTR, VAL) \
     [CT_DPIF_TP_ATTR_ICMP_##ATTR] = "ICMP_"#ATTR,
     CT_DPIF_TP_ICMP_ATTRS
 #undef CT_DPIF_TP_ICMP_ATTR
diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h
index 3e227d9e3b6e..707adf70402c 100644
--- a/lib/ct-dpif.h
+++ b/lib/ct-dpif.h
@@ -59,6 +59,8 @@  struct ct_dpif_timestamp {
     uint64_t stop;
 };
 
+#define DEFAULT_TP_ID 0
+
 #define CT_DPIF_TCP_STATES \
     CT_DPIF_TCP_STATE(CLOSED) \
     CT_DPIF_TCP_STATE(LISTEN) \
@@ -225,36 +227,53 @@  struct ct_dpif_zone_limit {
     struct ovs_list node;
 };
 
+/* TP attr with default timeout in seconds. */
 #define CT_DPIF_TP_TCP_ATTRS \
-    CT_DPIF_TP_TCP_ATTR(SYN_SENT) \
-    CT_DPIF_TP_TCP_ATTR(SYN_RECV) \
-    CT_DPIF_TP_TCP_ATTR(ESTABLISHED) \
-    CT_DPIF_TP_TCP_ATTR(FIN_WAIT) \
-    CT_DPIF_TP_TCP_ATTR(CLOSE_WAIT) \
-    CT_DPIF_TP_TCP_ATTR(LAST_ACK) \
-    CT_DPIF_TP_TCP_ATTR(TIME_WAIT) \
-    CT_DPIF_TP_TCP_ATTR(CLOSE) \
-    CT_DPIF_TP_TCP_ATTR(SYN_SENT2) \
-    CT_DPIF_TP_TCP_ATTR(RETRANSMIT) \
-    CT_DPIF_TP_TCP_ATTR(UNACK)
+    CT_DPIF_TP_TCP_ATTR(SYN_SENT, 30) \
+    CT_DPIF_TP_TCP_ATTR(SYN_RECV, 30) \
+    CT_DPIF_TP_TCP_ATTR(ESTABLISHED, 24 * 60 * 60) \
+    CT_DPIF_TP_TCP_ATTR(FIN_WAIT, 15 * 60) \
+    CT_DPIF_TP_TCP_ATTR(CLOSE_WAIT, 45) \
+    CT_DPIF_TP_TCP_ATTR(LAST_ACK, 30) \
+    CT_DPIF_TP_TCP_ATTR(TIME_WAIT, 45) \
+    CT_DPIF_TP_TCP_ATTR(CLOSE, 30) \
+    CT_DPIF_TP_TCP_ATTR(SYN_SENT2, 30) \
+    CT_DPIF_TP_TCP_ATTR(RETRANSMIT, 30) \
+    CT_DPIF_TP_TCP_ATTR(UNACK, 30)
 
 #define CT_DPIF_TP_UDP_ATTRS \
-    CT_DPIF_TP_UDP_ATTR(FIRST) \
-    CT_DPIF_TP_UDP_ATTR(SINGLE) \
-    CT_DPIF_TP_UDP_ATTR(MULTIPLE)
+    CT_DPIF_TP_UDP_ATTR(FIRST, 60) \
+    CT_DPIF_TP_UDP_ATTR(SINGLE, 60) \
+    CT_DPIF_TP_UDP_ATTR(MULTIPLE, 30)
 
 #define CT_DPIF_TP_ICMP_ATTRS \
-    CT_DPIF_TP_ICMP_ATTR(FIRST) \
-    CT_DPIF_TP_ICMP_ATTR(REPLY)
+    CT_DPIF_TP_ICMP_ATTR(FIRST, 60) \
+    CT_DPIF_TP_ICMP_ATTR(REPLY, 30)
+
+/* The minimum value of the default timeout. */
+#define CT_DPIF_TP_MIN 30
+#define CT_DPIF_TP_TCP_ATTR(ATTR, VAL) \
+            BUILD_ASSERT_DECL(VAL >= CT_DPIF_TP_MIN);
+    CT_DPIF_TP_TCP_ATTRS
+#define CT_DPIF_TP_UDP_ATTR(ATTR, VAL) \
+            BUILD_ASSERT_DECL(VAL >= CT_DPIF_TP_MIN);
+    CT_DPIF_TP_UDP_ATTRS
+#undef CT_DPIF_TP_UDP_ATTR
+#define CT_DPIF_TP_ICMP_ATTR(ATTR, VAL) \
+            BUILD_ASSERT_DECL(VAL >= CT_DPIF_TP_MIN);
+    CT_DPIF_TP_ICMP_ATTRS
+#undef CT_DPIF_TP_ICMP_ATTR
+
+#undef CT_DPIF_TP_TCP_ATTR
 
 enum OVS_PACKED_ENUM ct_dpif_tp_attr {
-#define CT_DPIF_TP_TCP_ATTR(ATTR) CT_DPIF_TP_ATTR_TCP_##ATTR,
+#define CT_DPIF_TP_TCP_ATTR(ATTR, VAL) CT_DPIF_TP_ATTR_TCP_##ATTR,
     CT_DPIF_TP_TCP_ATTRS
 #undef CT_DPIF_TP_TCP_ATTR
-#define CT_DPIF_TP_UDP_ATTR(ATTR) CT_DPIF_TP_ATTR_UDP_##ATTR,
+#define CT_DPIF_TP_UDP_ATTR(ATTR, VAL) CT_DPIF_TP_ATTR_UDP_##ATTR,
     CT_DPIF_TP_UDP_ATTRS
 #undef CT_DPIF_TP_UDP_ATTR
-#define CT_DPIF_TP_ICMP_ATTR(ATTR) CT_DPIF_TP_ATTR_ICMP_##ATTR,
+#define CT_DPIF_TP_ICMP_ATTR(ATTR, VAL) CT_DPIF_TP_ATTR_ICMP_##ATTR,
     CT_DPIF_TP_ICMP_ATTRS
 #undef CT_DPIF_TP_ICMP_ATTR
     CT_DPIF_TP_ATTR_MAX
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index e456cc9befbe..916efc5370c9 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -36,6 +36,7 @@ 
 #include "bitmap.h"
 #include "cmap.h"
 #include "conntrack.h"
+#include "conntrack-tp.h"
 #include "coverage.h"
 #include "ct-dpif.h"
 #include "csum.h"
@@ -7337,6 +7338,7 @@  dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
         bool commit = false;
         unsigned int left;
         uint16_t zone = 0;
+        uint32_t tp_id = 0;
         const char *helper = NULL;
         const uint32_t *setmark = NULL;
         const struct ovs_key_ct_labels *setlabel = NULL;
@@ -7372,8 +7374,11 @@  dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
                  * netlink events. */
                 break;
             case OVS_CT_ATTR_TIMEOUT:
-                /* Userspace datapath does not support customized timeout
-                 * policy yet. */
+                if (!str_to_uint(nl_attr_get_string(b), 10, &tp_id)) {
+                    VLOG_WARN("Invalid Timeout Policy ID: %s.",
+                              nl_attr_get_string(b));
+                    tp_id = DEFAULT_TP_ID;
+                }
                 break;
             case OVS_CT_ATTR_NAT: {
                 const struct nlattr *b_nest;
@@ -7459,7 +7464,7 @@  dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
         conntrack_execute(dp->conntrack, packets_, aux->flow->dl_type, force,
                           commit, zone, setmark, setlabel, aux->flow->tp_src,
                           aux->flow->tp_dst, helper, nat_action_info_ref,
-                          pmd->ctx.now / 1000);
+                          pmd->ctx.now / 1000, tp_id);
         break;
     }
 
@@ -7693,6 +7698,62 @@  dpif_netdev_ct_del_limits(struct dpif *dpif OVS_UNUSED,
 }
 
 static int
+dpif_netdev_ct_set_timeout_policy(struct dpif *dpif,
+                                  const struct ct_dpif_timeout_policy *dpif_tp)
+{
+    struct timeout_policy tp;
+    struct dp_netdev *dp;
+
+    dp = get_dp_netdev(dpif);
+    memcpy(&tp.policy, dpif_tp, sizeof tp.policy);
+    return timeout_policy_update(dp->conntrack, &tp);
+}
+
+static int
+dpif_netdev_ct_get_timeout_policy(struct dpif *dpif, uint32_t tp_id,
+                                  struct ct_dpif_timeout_policy *dpif_tp)
+{
+    struct timeout_policy *tp;
+    struct dp_netdev *dp;
+    int err = 0;
+
+    dp = get_dp_netdev(dpif);
+    tp = timeout_policy_get(dp->conntrack, tp_id);
+    if (!tp) {
+        return EINVAL;
+    }
+    memcpy(dpif_tp, &tp->policy, sizeof tp->policy);
+    return err;
+}
+
+static int
+dpif_netdev_ct_del_timeout_policy(struct dpif *dpif,
+                                  uint32_t tp_id)
+{
+    struct dp_netdev *dp;
+    int err = 0;
+
+    dp = get_dp_netdev(dpif);
+    err = timeout_policy_delete(dp->conntrack, tp_id);
+    return err;
+}
+
+static int
+dpif_netdev_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED,
+                                       uint32_t tp_id,
+                                       uint16_t dl_type OVS_UNUSED,
+                                       uint8_t nw_proto OVS_UNUSED,
+                                       char **tp_name, bool *is_generic)
+{
+    struct ds ds = DS_EMPTY_INITIALIZER;
+
+    ds_put_format(&ds, "%"PRIu32, tp_id);
+    *tp_name = ds_steal_cstr(&ds);
+    *is_generic = true;
+    return 0;
+}
+
+static int
 dpif_netdev_ipf_set_enabled(struct dpif *dpif, bool v6, bool enable)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
@@ -7802,13 +7863,13 @@  const struct dpif_class dpif_netdev_class = {
     dpif_netdev_ct_set_limits,
     dpif_netdev_ct_get_limits,
     dpif_netdev_ct_del_limits,
-    NULL,                       /* ct_set_timeout_policy */
-    NULL,                       /* ct_get_timeout_policy */
-    NULL,                       /* ct_del_timeout_policy */
+    dpif_netdev_ct_set_timeout_policy,
+    dpif_netdev_ct_get_timeout_policy,
+    dpif_netdev_ct_del_timeout_policy,
     NULL,                       /* ct_timeout_policy_dump_start */
     NULL,                       /* ct_timeout_policy_dump_next */
     NULL,                       /* ct_timeout_policy_dump_done */
-    NULL,                       /* ct_get_timeout_policy_name */
+    dpif_netdev_ct_get_timeout_policy_name,
     dpif_netdev_ipf_set_enabled,
     dpif_netdev_ipf_set_min_frag,
     dpif_netdev_ipf_set_max_nfrags,
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index d21874b466f8..7e10375f2af8 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -5426,7 +5426,8 @@  clear_existing_ct_timeout_policies(struct dpif_backer *backer)
 static void
 ct_zone_config_init(struct dpif_backer *backer)
 {
-    backer->tp_ids = id_pool_create(0, MAX_TIMEOUT_POLICY_ID);
+    backer->tp_ids = id_pool_create(DEFAULT_TP_ID + 1,
+                                    MAX_TIMEOUT_POLICY_ID - 1);
     cmap_init(&backer->ct_zones);
     hmap_init(&backer->ct_tps);
     ovs_list_init(&backer->ct_tp_kill_list);
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 4a39c929c207..f69a6cde7816 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -3291,7 +3291,7 @@  NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0],
 ])
 AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
 
-sleep 4
+sleep 3
 
 AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sort], [0], [dnl
 icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=5
@@ -3301,8 +3301,15 @@  udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
 AT_CHECK([ovs-appctl dpctl/flush-conntrack])
 
 dnl Shorten the udp_single and icmp_first timeout in zone 5
+dnl Userspace datapath uses udp_first and icmp_reply, and
+dnl kernel datapath uses udp_single and icmp_first
 VSCTL_ADD_DATAPATH_TABLE()
-AT_CHECK([ovs-vsctl add-zone-tp $DP_TYPE zone=5 udp_single=3 icmp_first=3])
+
+dnl Creating more timeout policies
+for i in `seq 1 255`; do
+ovs-vsctl --may-exist add-zone-tp $DP_TYPE zone=5 udp_first=$i udp_single=$i icmp_first=$i icmp_reply=$i;
+done
+AT_CHECK([ovs-vsctl --may-exist add-zone-tp $DP_TYPE zone=5 udp_first=1 udp_single=1 icmp_first=1 icmp_reply=1])
 
 dnl Send ICMP and UDP traffic
 NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
@@ -3317,7 +3324,7 @@  udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
 
 dnl Wait until the timeout expire.
 dnl We intend to wait a bit longer, because conntrack does not recycle the entry right after it is expired.
-sleep 4
+sleep 5
 
 AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
 ])
@@ -3335,11 +3342,25 @@  udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
 
 dnl Wait until the timeout expire.
 dnl We intend to wait a bit longer, because conntrack does not recycle the entry right after it is expired.
-sleep 4
+sleep 5
 
 AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
 ])
 
+dnl Set the timeout policy to default again.
+AT_CHECK([ovs-vsctl --may-exist add-zone-tp $DP_TYPE zone=5 icmp_first=30])
+
+dnl Send ICMP and UDP traffic
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sort], [0], [dnl
+icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=5
+udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=5
+])
+
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at
index ba7f4102f494..72c84b9c7c82 100644
--- a/tests/system-userspace-macros.at
+++ b/tests/system-userspace-macros.at
@@ -99,12 +99,8 @@  m4_define([CHECK_CONNTRACK_NAT])
 # CHECK_CONNTRACK_TIMEOUT()
 #
 # Perform requirements checks for running conntrack customized timeout tests.
-* The userspace datapath does not support this feature yet.
 #
-m4_define([CHECK_CONNTRACK_TIMEOUT],
-[
-    AT_SKIP_IF([:])
-])
+m4_define([CHECK_CONNTRACK_TIMEOUT])
 
 # CHECK_CT_DPIF_SET_GET_MAXCONNS()
 #
diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c
index f77ee75e38df..e7c73220aef5 100644
--- a/tests/test-conntrack.c
+++ b/tests/test-conntrack.c
@@ -90,7 +90,7 @@  ct_thread_main(void *aux_)
     ovs_barrier_block(&barrier);
     for (i = 0; i < n_pkts; i += batch_size) {
         conntrack_execute(ct, pkt_batch, dl_type, false, true, 0, NULL, NULL,
-                          0, 0, NULL, NULL, now);
+                          0, 0, NULL, NULL, now, 0);
     }
     ovs_barrier_block(&barrier);
     destroy_packets(pkt_batch);
@@ -174,7 +174,7 @@  pcap_batch_execute_conntrack(struct conntrack *ct_,
 
         if (flow.dl_type != dl_type) {
             conntrack_execute(ct_, &new_batch, dl_type, false, true, 0,
-                              NULL, NULL, 0, 0, NULL, NULL, now);
+                              NULL, NULL, 0, 0, NULL, NULL, now, 0);
             dp_packet_batch_init(&new_batch);
         }
         dp_packet_batch_add(&new_batch, packet);
@@ -182,7 +182,7 @@  pcap_batch_execute_conntrack(struct conntrack *ct_,
 
     if (!dp_packet_batch_is_empty(&new_batch)) {
         conntrack_execute(ct_, &new_batch, dl_type, false, true, 0, NULL, NULL,
-                          0, 0, NULL, NULL, now);
+                          0, 0, NULL, NULL, now, 0);
     }
 
 }