diff mbox series

[ovs-dev,v5,6/9] ofproto-dpif: Consume CT_Zone, and CT_Timeout_Policy tables

Message ID 1567030469-120137-7-git-send-email-yihung.wei@gmail.com
State Accepted
Commit 993cae678bca283cf0ef553bed5df99c7bb15b13
Headers show
Series Support zone-based conntrack timeout policy | expand

Commit Message

Yi-Hung Wei Aug. 28, 2019, 10:14 p.m. UTC
This patch consumes the CT_Zone and CT_Timeout_Policy tables, maintains
the zone-based configuration in the vswitchd.  Whenever there is a
database change, vswitchd will read the datapath, CT_Zone, and
CT_Timeout_Policy tables from ovsdb, builds an internal snapshot of the
database configuration in bridge.c, and pushes down the change into
ofproto and dpif layer.

If a new zone-based timeout policy is added, it updates the zone to
timeout policy mapping in the per datapath type datapath structure in
dpif-backer, and pushes down the timeout policy into the datapath via
dpif interface.

If a timeout policy is no longer used, for kernel datapath, vswitchd
may not be able to remove it from datapath immediately since
datapath flows can still reference the to-be-deleted timeout policies.
Thus, we keep an timeout policy kill list, that vswitchd will go
back to the list periodically and try to kill the unused timeout policies.

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
---
 ofproto/ofproto-dpif.c     | 294 +++++++++++++++++++++++++++++++++++++++++++++
 ofproto/ofproto-dpif.h     |  10 ++
 ofproto/ofproto-provider.h |  10 ++
 ofproto/ofproto.c          |  26 ++++
 ofproto/ofproto.h          |   5 +
 vswitchd/bridge.c          | 199 ++++++++++++++++++++++++++++++
 6 files changed, 544 insertions(+)

Comments

Yi-Hung Wei Sept. 10, 2019, 12:07 a.m. UTC | #1
On Wed, Aug 28, 2019 at 3:15 PM Yi-Hung Wei <yihung.wei@gmail.com> wrote:
>
> This patch consumes the CT_Zone and CT_Timeout_Policy tables, maintains
> the zone-based configuration in the vswitchd.  Whenever there is a
> database change, vswitchd will read the datapath, CT_Zone, and
> CT_Timeout_Policy tables from ovsdb, builds an internal snapshot of the
> database configuration in bridge.c, and pushes down the change into
> ofproto and dpif layer.
>
> If a new zone-based timeout policy is added, it updates the zone to
> timeout policy mapping in the per datapath type datapath structure in
> dpif-backer, and pushes down the timeout policy into the datapath via
> dpif interface.
>
> If a timeout policy is no longer used, for kernel datapath, vswitchd
> may not be able to remove it from datapath immediately since
> datapath flows can still reference the to-be-deleted timeout policies.
> Thus, we keep an timeout policy kill list, that vswitchd will go
> back to the list periodically and try to kill the unused timeout policies.
>
> Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
> ---

When reviewing this patch (the 6th patch in v5), please squash in the
following diff from Darrell.

Thanks,

-Yi-Hung

<---------- beginning of patch ------------------>
From 7518e557281140d58d6e9ce89bd575f73e33f615 Mon Sep 17 00:00:00 2001
From: Darrell Ball <dball@vmware.com>
Date: Fri, 6 Sep 2019 12:52:54 -0700
Subject: [PATCH] bridge: Don't keep IDL state across iterations.

Refactor code to remove keeping OVSDB client IDL state
across iterations, since this leads to unpredictable
behavior, including crashes.

VMWare-BZ: #2413581

Signed-off-by: Darrell Ball <dball@vmware.com>
---
 vswitchd/bridge.c | 55 ++++++++++++++++++++-----------------------------------
 1 file changed, 20 insertions(+), 35 deletions(-)

diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 56f42c0736a2..5defaf428571 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -171,7 +171,6 @@ struct datapath {
     struct hmap ct_zones;       /* Map of 'struct ct_zone' elements, indexed
                                  * by 'zone'. */
     struct hmap_node node;      /* Node in 'all_datapaths' hmap. */
-    const struct ovsrec_datapath *dp_cfg;
     unsigned int last_used;     /* The last idl_seqno that this 'datapath'
                                  * used in OVSDB. This number is used for
                                  * garbage collection. */
@@ -680,12 +679,10 @@ datapath_lookup(const char *type)
 }

 static struct datapath *
-datapath_create(const struct ovsrec_datapath *dp_cfg, const char *type)
+datapath_create(const char *type)
 {
     struct datapath *dp = xzalloc(sizeof *dp);
     dp->type = xstrdup(type);
-    dp->dp_cfg = dp_cfg;
-
     hmap_init(&dp->ct_zones);
     hmap_insert(&all_datapaths, &dp->node, hash_string(type, 0));
     return dp;
@@ -710,34 +707,8 @@ datapath_destroy(struct datapath *dp)
 }

 static void
-update_datapath_cfgs(const struct ovsrec_open_vswitch *cfg)
+reconfigure_ct_zones(struct datapath *dp, struct ovsrec_datapath *dp_cfg)
 {
-    struct datapath *dp, *next;
-
-    /* Add new 'datapath's or update existing ones. */
-    for (size_t i = 0; i < cfg->n_datapaths; i++) {
-        const struct ovsrec_datapath *dp_cfg = cfg->value_datapaths[i];
-        char *dp_name = cfg->key_datapaths[i];
-
-        dp = datapath_lookup(dp_name);
-        if (!dp) {
-            dp = datapath_create(dp_cfg, dp_name);
-        }
-        dp->last_used = idl_seqno;
-    }
-
-    /* Purge deleted 'datapath's. */
-    HMAP_FOR_EACH_SAFE (dp, next, node, &all_datapaths) {
-        if (dp->last_used != idl_seqno) {
-            datapath_destroy(dp);
-        }
-    }
-}
-
-static void
-reconfigure_ct_zones(struct datapath *dp)
-{
-    const struct ovsrec_datapath *dp_cfg = dp->dp_cfg;
     struct ct_zone *ct_zone, *next;

     /* Add new 'ct_zone's or update existing 'ct_zone's based on the database
@@ -776,12 +747,26 @@ reconfigure_ct_zones(struct datapath *dp)
 static void
 reconfigure_datapath_cfgs(const struct ovsrec_open_vswitch *cfg)
 {
-    struct datapath *dp;
+    struct datapath *dp, *next;

-    update_datapath_cfgs(cfg);
+    /* Add new 'datapath's or update existing ones. */
+    for (size_t i = 0; i < cfg->n_datapaths; i++) {
+        struct ovsrec_datapath *dp_cfg = cfg->value_datapaths[i];
+        char *dp_name = cfg->key_datapaths[i];

-    HMAP_FOR_EACH (dp, node, &all_datapaths) {
-        reconfigure_ct_zones(dp);
+        dp = datapath_lookup(dp_name);
+        if (!dp) {
+            dp = datapath_create(dp_name);
+        }
+        dp->last_used = idl_seqno;
+        reconfigure_ct_zones(dp, dp_cfg);
+    }
+
+    /* Purge deleted 'datapath's. */
+    HMAP_FOR_EACH_SAFE (dp, next, node, &all_datapaths) {
+        if (dp->last_used != idl_seqno) {
+            datapath_destroy(dp);
+        }
     }
 }
Justin Pettit Sept. 14, 2019, 12:33 a.m. UTC | #2
> On Aug 28, 2019, at 3:14 PM, Yi-Hung Wei <yihung.wei@gmail.com> wrote:
> 
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index 751535249e21..4b4c4d722645 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> 
> +static void
> +ct_timeout_policy_destroy(struct ct_timeout_policy *ct_tp,
> +                          struct id_pool *tp_ids)
> +{
> +    id_pool_free_id(tp_ids, ct_tp->tp_id);
> +    simap_destroy(&ct_tp->tp);
> +    ovsrcu_postpone(free, ct_tp);
> +}

I assume we're delaying the freeing of 'ct_tp' in case there are people that still have references to it.  Should id_pool_free_id() and simap_destroy() be called from ovsrcu_postpone()?  (I didn't propose a change for this.)

> +static void
> +ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone,
> +                           struct simap *timeout_policy)
> +{
> +    struct dpif_backer *backer = shash_find_data(&all_dpif_backers,
> +                                                 datapath_type);
> +    if (!backer) {
> +        return;
> +    }
> +
> +    struct ct_timeout_policy *ct_tp = ct_timeout_policy_lookup(&backer->ct_tps,
> +                                                               timeout_policy);
> +    if (!ct_tp) {
> +        ct_tp = ct_timeout_policy_alloc(timeout_policy, backer->tp_ids);
> +        if (ct_tp) {
> +            hmap_insert(&backer->ct_tps, &ct_tp->node, simap_hash(&ct_tp->tp));
> +            ct_add_timeout_policy_to_dpif(backer->dpif, ct_tp);
> +        } else {
> +            VLOG_ERR_RL(&rl, "failed to allocate timeout policy");

The call to ct_timeout_policy_alloc() will already log this error message if there was a problem.

> +            return;
> +        }
> +    }
> +
> +    struct ct_zone *ct_zone = ct_zone_lookup(&backer->ct_zones, zone);
> +    if (ct_zone) {
> +        if (ct_zone->ct_tp != ct_tp) {
> +            /* Add the new zone timeout pollicy. */
> +            struct ct_zone *new_ct_zone = ct_zone_alloc(zone);
> +            new_ct_zone->ct_tp = ct_tp;
> +            ct_tp->ref_count++;
> +            cmap_replace(&backer->ct_zones, &ct_zone->node, &new_ct_zone->node,
> +                         hash_int(zone, 0));
> +
> +            /* Deletes the old zone timeout policy. */
> +            ct_timeout_policy_unref(backer, ct_zone->ct_tp);
> +            ct_zone_destroy(ct_zone);

Is there a reason that a new ct_zone needs to be allocated anew?  Couldn't you just do something like the following?

    struct ct_zone *ct_zone = ct_zone_lookup(&backer->ct_zones, zone_id);
     if (ct_zone) {
         if (ct_zone->ct_tp != ct_tp) {
-            /* Add the new zone timeout pollicy. */
-            struct ct_zone *new_ct_zone = ct_zone_alloc(zone);
-            new_ct_zone->ct_tp = ct_tp;
-            ct_tp->ref_count++;
-            cmap_replace(&backer->ct_zones, &ct_zone->node, &new_ct_zone->node,
-                         hash_int(zone, 0));
-
-            /* Deletes the old zone timeout policy. */
+            /* Update the zone timeout policy. */
             ct_timeout_policy_unref(backer, ct_zone->ct_tp);
-            ct_zone_destroy(ct_zone);
+            ct_zone->ct_tp = ct_tp;
+            ct_tp->ref_count++;
         }

> diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
> index 6cc454371dc7..d53c8e2f80a4 100644
> --- a/ofproto/ofproto-provider.h
> +++ b/ofproto/ofproto-provider.h
> @@ -58,6 +58,7 @@
> #include "tun-metadata.h"
> #include "versions.h"
> #include "vl-mff-map.h"
> +#include "vswitch-idl.h"

I don't think this #include is necessary with these changes.

> +    /* Sets conntrack timeout policy specified by 'timeout_policy' to 'zone'
> +     * in datapath type 'dp_type'. */
> +    void (*ct_set_zone_timeout_policy)(const char *dp_type, uint16_t zone,
> +                                       struct simap *timeout_policy);
> +
> +    /* Deletes the timeout policy associated with 'zone' in datapath type
> +     * 'dp_type'. */
> +    void (*ct_del_zone_timeout_policy)(const char *dp_type, uint16_t zone);
> };

I think it's clearer to use 'zone_id' throughout instead of 'zone'.

> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> index d921c4ef8d5f..c43f2ac34e67 100644
> --- a/vswitchd/bridge.c
> +++ b/vswitchd/bridge.c
> @@ -153,9 +153,36 @@ struct aa_mapping {
>     char *br_name;
> };
> 
> +/* Internal representation of conntrack zone configuration table in OVSDB. */
> +struct ct_zone {
> +    uint16_t zone;

I think 'zone_id' is a clearer name for what this is.  I'd also suggest using that through bridge.c.

> +    struct simap tp;            /* A map from timeout policy attribute to
> +                                 * timeout value. */
> +    unsigned int last_used;     /* The last idl_seqno that this 'ct_zone' used
> +                                 * in OVSDB. This number is used for garbage
> +                                 * collection. */
> +    struct hmap_node node;      /* Node in 'struct datapath' 'ct_zones'
> +                                 * hmap. */
> +};
> +
> +/* Internal representation of datapath configuration table in OVSDB. */
> +struct datapath {
> +    char *type;                 /* Datapath type. */
> +    struct hmap ct_zones;       /* Map of 'struct ct_zone' elements, indexed
> +                                 * by 'zone'. */
> +    struct hmap_node node;      /* Node in 'all_datapaths' hmap. */
> +    const struct ovsrec_datapath *dp_cfg;
> +    unsigned int last_used;     /* The last idl_seqno that this 'datapath'
> +                                 * used in OVSDB. This number is used for
> +                                 * garbage collection. */
> +};

It really doesn't matter, but I'd suggest making the order of "last_used" and "node" in the same order.

> +/* All datapath configuartions, indexed by type. */
> +static struct hmap all_datapaths = HMAP_INITIALIZER(&all_datapaths);

I don't think it's necessary to use an hmap to store the datapaths, since the usual case will be a single datapath and at most two.  The hmap code special-cases a one-node hmap, so there won't be too much overhead usually, but there is some unnecessary complication and hashing on every lookup.  I'll send out a patch later to switch it to a linked list, and we can see how it compares.

> +/* Replace 'old_tp' by 'new_tp' (destroyed 'new_tp'). Returns true if 'old_tp'
> + * and 'new_tp' contains different data, false if they are the same. */
> +static bool
> +update_timeout_policy(struct simap *old_tp, struct simap *new_tp)
> +{
> +    bool changed = !simap_equal(old_tp, new_tp);
> +    simap_swap(old_tp, new_tp);
> +    simap_destroy(new_tp);
> +    return changed;
> +}

It seems like unnecessary work to always call simap_swap().  What about only doing it they're changed?

> +static void
> +reconfigure_ct_zones(struct datapath *dp)

I think it would be a bit more consistent to call this ct_zones_reconfigure() to match bridge_reconfigure().

> +static void
> +reconfigure_datapath_cfgs(const struct ovsrec_open_vswitch *cfg)

I think it would be a bit more consistent to call this datapath_reconfigure() to match bridge_reconfigure().

I agree with Darrell's follow-on patch to this function not to depend "dp_cfg" between database runs.

I would recommend calling datapath_destroy() on all the datapaths on bridge_exit().  It's not strictly necessary, but it makes it easier to track down memory leaks if things are cleaned up as much as possible.

Thanks for the patch!  Let me know what you think about the questions I posed above and the incremental below.

--Justin


-=-=-=-=-=-=-=-=-

diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 4b4c4d722645..2271f9f48bd8 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -5183,12 +5183,12 @@ ct_timeout_policy_unref(struct dpif_backer *backer,
 }
 
 static struct ct_zone *
-ct_zone_lookup(const struct cmap *ct_zones, uint16_t zone)
+ct_zone_lookup(const struct cmap *ct_zones, uint16_t zone_id)
 {
     struct ct_zone *ct_zone;
 
-    CMAP_FOR_EACH_WITH_HASH (ct_zone, node, hash_int(zone, 0), ct_zones) {
-        if (ct_zone->zone_id == zone) {
+    CMAP_FOR_EACH_WITH_HASH (ct_zone, node, hash_int(zone_id, 0), ct_zones) {
+        if (ct_zone->zone_id == zone_id) {
             return ct_zone;
         }
     }
@@ -5196,10 +5196,10 @@ ct_zone_lookup(const struct cmap *ct_zones, uint16_t zone)
 }
 
 static struct ct_zone *
-ct_zone_alloc(uint16_t zone)
+ct_zone_alloc(uint16_t zone_id)
 {
     struct ct_zone *ct_zone = xzalloc(sizeof *ct_zone);
-    ct_zone->zone_id = zone;
+    ct_zone->zone_id = zone_id;
     return ct_zone;
 }
 
@@ -5240,7 +5240,7 @@ static void
 clear_existing_ct_timeout_policies(struct dpif_backer *backer)
 {
     /* In kernel datapath, when OVS starts, there may be some pre-existing
-     * timeout policies in the kernel.  To avoid reassign the same timeout
+     * timeout policies in the kernel.  To avoid reassigning the same timeout
      * policy ids, we dump all the pre-existing timeout policies and keep
      * the ids in the pool.  Since OVS will not use those timeout policies
      * for new datapath flow, we add them to the kill list and remove
@@ -5267,9 +5267,9 @@ clear_existing_ct_timeout_policies(struct dpif_backer *backer)
 static void
 ct_zone_config_init(struct dpif_backer *backer)
 {
+    backer->tp_ids = id_pool_create(0, MAX_TIMEOUT_POLICY_ID);
     cmap_init(&backer->ct_zones);
     hmap_init(&backer->ct_tps);
-    backer->tp_ids = id_pool_create(0, MAX_TIMEOUT_POLICY_ID);
     ovs_list_init(&backer->ct_tp_kill_list);
     clear_existing_ct_timeout_policies(backer);
 }
@@ -5277,13 +5277,12 @@ ct_zone_config_init(struct dpif_backer *backer)
 static void
 ct_zone_config_uninit(struct dpif_backer *backer)
 {
-    struct ct_timeout_policy *ct_tp;
     struct ct_zone *ct_zone;
-
     CMAP_FOR_EACH (ct_zone, node, &backer->ct_zones) {
         ct_zone_remove_and_destroy(backer, ct_zone);
     }
 
+    struct ct_timeout_policy *ct_tp;
     HMAP_FOR_EACH_POP (ct_tp, node, &backer->ct_tps) {
         ct_timeout_policy_destroy(ct_tp, backer->tp_ids);
     }
@@ -5292,9 +5291,9 @@ ct_zone_config_uninit(struct dpif_backer *backer)
         ct_timeout_policy_destroy(ct_tp, backer->tp_ids);
     }
 
+    id_pool_destroy(backer->tp_ids);
     cmap_destroy(&backer->ct_zones);
     hmap_destroy(&backer->ct_tps);
-    id_pool_destroy(backer->tp_ids);
 }
 
 static void
@@ -5318,7 +5317,7 @@ ct_zone_timeout_policy_sweep(struct dpif_backer *backer)
 }
 
 static void
-ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone,
+ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone_id,
                            struct simap *timeout_policy)
 {
     struct dpif_backer *backer = shash_find_data(&all_dpif_backers,
@@ -5335,35 +5334,29 @@ ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone,
             hmap_insert(&backer->ct_tps, &ct_tp->node, simap_hash(&ct_tp->tp));
             ct_add_timeout_policy_to_dpif(backer->dpif, ct_tp);
         } else {
-            VLOG_ERR_RL(&rl, "failed to allocate timeout policy");
             return;
         }
     }
 
-    struct ct_zone *ct_zone = ct_zone_lookup(&backer->ct_zones, zone);
+    struct ct_zone *ct_zone = ct_zone_lookup(&backer->ct_zones, zone_id);
     if (ct_zone) {
         if (ct_zone->ct_tp != ct_tp) {
-            /* Add the new zone timeout pollicy. */
-            struct ct_zone *new_ct_zone = ct_zone_alloc(zone);
-            new_ct_zone->ct_tp = ct_tp;
-            ct_tp->ref_count++;
-            cmap_replace(&backer->ct_zones, &ct_zone->node, &new_ct_zone->node,
-                         hash_int(zone, 0));
-
-            /* Deletes the old zone timeout policy. */
+            /* Update the zone timeout policy. */
             ct_timeout_policy_unref(backer, ct_zone->ct_tp);
-            ct_zone_destroy(ct_zone);
+            ct_zone->ct_tp = ct_tp;
+            ct_tp->ref_count++;
         }
     } else {
-        struct ct_zone *new_ct_zone = ct_zone_alloc(zone);
+        struct ct_zone *new_ct_zone = ct_zone_alloc(zone_id);
         new_ct_zone->ct_tp = ct_tp;
-        cmap_insert(&backer->ct_zones, &new_ct_zone->node, hash_int(zone, 0));
+        cmap_insert(&backer->ct_zones, &new_ct_zone->node,
+                    hash_int(zone_id, 0));
         ct_tp->ref_count++;
     }
 }
 
 static void
-ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone)
+ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone_id)
 {
     struct dpif_backer *backer = shash_find_data(&all_dpif_backers,
                                                  datapath_type);
@@ -5371,7 +5364,7 @@ ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone)
         return;
     }
 
-    struct ct_zone *ct_zone = ct_zone_lookup(&backer->ct_zones, zone);
+    struct ct_zone *ct_zone = ct_zone_lookup(&backer->ct_zones, zone_id);
     if (ct_zone) {
         ct_timeout_policy_unref(backer, ct_zone->ct_tp);
         ct_zone_remove_and_destroy(backer, ct_zone);
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index d53c8e2f80a4..c980e6bffff5 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -58,7 +58,6 @@
 #include "tun-metadata.h"
 #include "versions.h"
 #include "vl-mff-map.h"
-#include "vswitch-idl.h"
 
 struct match;
 struct ofputil_flow_mod;
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index e1fe3814c5ac..b4249b0d8818 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -956,26 +956,26 @@ ofproto_get_flow_restore_wait(void)
 
 /* Connection tracking configuration. */
 void
-ofproto_ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone,
+ofproto_ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone_id,
                                    struct simap *timeout_policy)
 {
     datapath_type = ofproto_normalize_type(datapath_type);
     const struct ofproto_class *class = ofproto_class_find__(datapath_type);
 
     if (class->ct_set_zone_timeout_policy) {
-        class->ct_set_zone_timeout_policy(datapath_type, zone,
+        class->ct_set_zone_timeout_policy(datapath_type, zone_id,
                                           timeout_policy);
     }
 }
 
 void
-ofproto_ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone)
+ofproto_ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone_id)
 {
     datapath_type = ofproto_normalize_type(datapath_type);
     const struct ofproto_class *class = ofproto_class_find__(datapath_type);
 
     if (class->ct_del_zone_timeout_policy) {
-        class->ct_del_zone_timeout_policy(datapath_type, zone);
+        class->ct_del_zone_timeout_policy(datapath_type, zone_id);
     }
 
 }
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 13e3d0c74b99..280ee81e37ae 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -155,14 +155,14 @@ struct aa_mapping {
 
 /* Internal representation of conntrack zone configuration table in OVSDB. */
 struct ct_zone {
-    uint16_t zone;
+    uint16_t zone_id;
     struct simap tp;            /* A map from timeout policy attribute to
                                  * timeout value. */
+    struct hmap_node node;      /* Node in 'struct datapath' 'ct_zones'
+                                 * hmap. */
     unsigned int last_used;     /* The last idl_seqno that this 'ct_zone' used
                                  * in OVSDB. This number is used for garbage
                                  * collection. */
-    struct hmap_node node;      /* Node in 'struct datapath' 'ct_zones'
-                                 * hmap. */
 };
 
 /* Internal representation of datapath configuration table in OVSDB. */
@@ -297,6 +297,8 @@ static bool bridge_has_bond_fake_iface(const struct bridge *,
                                        const char *name);
 static bool port_is_bond_fake_iface(const struct port *);
 
+static void datapath_destroy(struct datapath *dp);
+
 static unixctl_cb_func qos_unixctl_show_types;
 static unixctl_cb_func qos_unixctl_show;
 
@@ -527,13 +529,19 @@ bridge_init(const char *remote)
 void
 bridge_exit(bool delete_datapath)
 {
-    struct bridge *br, *next_br;
-
     if_notifier_destroy(ifnotifier);
     seq_destroy(ifaces_changed);
+
+    struct datapath *dp, *next;
+    HMAP_FOR_EACH_SAFE (dp, next, node, &all_datapaths) {
+        datapath_destroy(dp);
+    }
+
+    struct bridge *br, *next_br;
     HMAP_FOR_EACH_SAFE (br, next_br, node, &all_bridges) {
         bridge_destroy(br, delete_datapath);
     }
+
     ovsdb_idl_destroy(idl);
 }
 
@@ -623,12 +631,12 @@ get_timeout_policy_from_ovsrec(struct simap *tp,
 }
 
 static struct ct_zone *
-ct_zone_lookup(struct hmap *ct_zones, uint16_t zone)
+ct_zone_lookup(struct hmap *ct_zones, uint16_t zone_id)
 {
     struct ct_zone *ct_zone;
 
-    HMAP_FOR_EACH_WITH_HASH (ct_zone, node, hash_int(zone, 0), ct_zones) {
-        if (ct_zone->zone == zone) {
+    HMAP_FOR_EACH_WITH_HASH (ct_zone, node, hash_int(zone_id, 0), ct_zones) {
+        if (ct_zone->zone_id == zone_id) {
             return ct_zone;
         }
     }
@@ -636,11 +644,11 @@ ct_zone_lookup(struct hmap *ct_zones, uint16_t zone)
 }
 
 static struct ct_zone *
-ct_zone_alloc(uint16_t zone, struct ovsrec_ct_timeout_policy *tp_cfg)
+ct_zone_alloc(uint16_t zone_id, struct ovsrec_ct_timeout_policy *tp_cfg)
 {
     struct ct_zone *ct_zone = xzalloc(sizeof *ct_zone);
 
-    ct_zone->zone = zone;
+    ct_zone->zone_id = zone_id;
     simap_init(&ct_zone->tp);
     get_timeout_policy_from_ovsrec(&ct_zone->tp, tp_cfg);
     return ct_zone;
@@ -660,7 +668,9 @@ static bool
 update_timeout_policy(struct simap *old_tp, struct simap *new_tp)
 {
     bool changed = !simap_equal(old_tp, new_tp);
-    simap_swap(old_tp, new_tp);
+    if (changed) {
+        simap_swap(old_tp, new_tp);
+    }
     simap_destroy(new_tp);
     return changed;
 }
@@ -691,11 +701,10 @@ datapath_create(const char *type)
 static void
 datapath_destroy(struct datapath *dp)
 {
-    struct ct_zone *ct_zone, *next;
-
     if (dp) {
+        struct ct_zone *ct_zone, *next;
         HMAP_FOR_EACH_SAFE (ct_zone, next, node, &dp->ct_zones) {
-            ofproto_ct_del_zone_timeout_policy(dp->type, ct_zone->zone);
+            ofproto_ct_del_zone_timeout_policy(dp->type, ct_zone->zone_id);
             ct_zone_remove_and_destroy(dp, ct_zone);
         }
 
@@ -707,29 +716,29 @@ datapath_destroy(struct datapath *dp)
 }
 
 static void
-reconfigure_ct_zones(struct datapath *dp, struct ovsrec_datapath *dp_cfg)
+ct_zones_reconfigure(struct datapath *dp, struct ovsrec_datapath *dp_cfg)
 {
     struct ct_zone *ct_zone, *next;
 
     /* Add new 'ct_zone's or update existing 'ct_zone's based on the database
      * state. */
     for (size_t i = 0; i < dp_cfg->n_ct_zones; i++) {
-        uint16_t zone = dp_cfg->key_ct_zones[i];
+        uint16_t zone_id = dp_cfg->key_ct_zones[i];
         struct ovsrec_ct_zone *zone_cfg = dp_cfg->value_ct_zones[i];
         struct ovsrec_ct_timeout_policy *tp_cfg = zone_cfg->timeout_policy;
 
-        ct_zone = ct_zone_lookup(&dp->ct_zones, zone);
+        ct_zone = ct_zone_lookup(&dp->ct_zones, zone_id);
         if (ct_zone) {
             struct simap new_tp = SIMAP_INITIALIZER(&new_tp);
             get_timeout_policy_from_ovsrec(&new_tp, tp_cfg);
             if (update_timeout_policy(&ct_zone->tp, &new_tp)) {
-                ofproto_ct_set_zone_timeout_policy(dp->type, ct_zone->zone,
+                ofproto_ct_set_zone_timeout_policy(dp->type, ct_zone->zone_id,
                                                    &ct_zone->tp);
             }
         } else {
-            ct_zone = ct_zone_alloc(zone, tp_cfg);
-            hmap_insert(&dp->ct_zones, &ct_zone->node, hash_int(zone, 0));
-            ofproto_ct_set_zone_timeout_policy(dp->type, ct_zone->zone,
+            ct_zone = ct_zone_alloc(zone_id, tp_cfg);
+            hmap_insert(&dp->ct_zones, &ct_zone->node, hash_int(zone_id, 0));
+            ofproto_ct_set_zone_timeout_policy(dp->type, ct_zone->zone_id,
                                                &ct_zone->tp);
         }
         ct_zone->last_used = idl_seqno;
@@ -738,14 +747,14 @@ reconfigure_ct_zones(struct datapath *dp, struct ovsrec_datapath *dp_cfg)
     /* Purge 'ct_zone's no longer found in the database. */
     HMAP_FOR_EACH_SAFE (ct_zone, next, node, &dp->ct_zones) {
         if (ct_zone->last_used != idl_seqno) {
-            ofproto_ct_del_zone_timeout_policy(dp->type, ct_zone->zone);
+            ofproto_ct_del_zone_timeout_policy(dp->type, ct_zone->zone_id);
             ct_zone_remove_and_destroy(dp, ct_zone);
         }
     }
 }
 
 static void
-reconfigure_datapath_cfgs(const struct ovsrec_open_vswitch *cfg)
+datapath_reconfigure(const struct ovsrec_open_vswitch *cfg)
 {
     struct datapath *dp, *next;
 
@@ -759,7 +768,7 @@ reconfigure_datapath_cfgs(const struct ovsrec_open_vswitch *cfg)
             dp = datapath_create(dp_name);
         }
         dp->last_used = idl_seqno;
-        reconfigure_ct_zones(dp, dp_cfg);
+        ct_zones_reconfigure(dp, dp_cfg);
     }
 
     /* Purge deleted 'datapath's. */
@@ -858,7 +867,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
     }
 
     reconfigure_system_stats(ovs_cfg);
-    reconfigure_datapath_cfgs(ovs_cfg);
+    datapath_reconfigure(ovs_cfg);
 
     /* Complete the configuration. */
     sflow_bridge_number = 0;
Yi-Hung Wei Sept. 16, 2019, 8:36 p.m. UTC | #3
On Fri, Sep 13, 2019 at 5:33 PM Justin Pettit <jpettit@ovn.org> wrote:
>
>
> > On Aug 28, 2019, at 3:14 PM, Yi-Hung Wei <yihung.wei@gmail.com> wrote:
> >
> > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> > index 751535249e21..4b4c4d722645 100644
> > --- a/ofproto/ofproto-dpif.c
> > +++ b/ofproto/ofproto-dpif.c
> >
> > +static void
> > +ct_timeout_policy_destroy(struct ct_timeout_policy *ct_tp,
> > +                          struct id_pool *tp_ids)
> > +{
> > +    id_pool_free_id(tp_ids, ct_tp->tp_id);
> > +    simap_destroy(&ct_tp->tp);
> > +    ovsrcu_postpone(free, ct_tp);
> > +}
>
> I assume we're delaying the freeing of 'ct_tp' in case there are people that still have references to it.  Should id_pool_free_id() and simap_destroy() be called from ovsrcu_postpone()?  (I didn't propose a change for this.)

Yes, simap_destroy() should be called from ovsrcu_postpone(). I will
fix that in the next version with the following diff.

For id_pool_free_id(), I think it is fine to keep that in
ct_timeout_policy_destroy().  Currently, we free the timeout policy id
from the pool only when all of the sub timeout policies are deleted in
the datapath.  Therefore, it should be fine to reuse the id.

diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index fc970074ced2..910f415dfea0 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -5222,12 +5222,18 @@ ct_timeout_policy_alloc(struct simap *tp,
struct id_pool *tp_ids)
 }

 static void
+ct_timeout_policy_destroy__(struct ct_timeout_policy *ct_tp)
+{
+    simap_destroy(&ct_tp->tp);
+    free(ct_tp);
+}
+
+static void
 ct_timeout_policy_destroy(struct ct_timeout_policy *ct_tp,
                           struct id_pool *tp_ids)
 {
     id_pool_free_id(tp_ids, ct_tp->tp_id);
-    simap_destroy(&ct_tp->tp);
-    ovsrcu_postpone(free, ct_tp);
+    ovsrcu_postpone(ct_timeout_policy_destroy__, ct_tp);
 }


> > +static void
> > +ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone,
> > +                           struct simap *timeout_policy)
> > +{
> > +    struct dpif_backer *backer = shash_find_data(&all_dpif_backers,
> > +                                                 datapath_type);
> > +    if (!backer) {
> > +        return;
> > +    }
> > +
> > +    struct ct_timeout_policy *ct_tp = ct_timeout_policy_lookup(&backer->ct_tps,
> > +                                                               timeout_policy);
> > +    if (!ct_tp) {
> > +        ct_tp = ct_timeout_policy_alloc(timeout_policy, backer->tp_ids);
> > +        if (ct_tp) {
> > +            hmap_insert(&backer->ct_tps, &ct_tp->node, simap_hash(&ct_tp->tp));
> > +            ct_add_timeout_policy_to_dpif(backer->dpif, ct_tp);
> > +        } else {
> > +            VLOG_ERR_RL(&rl, "failed to allocate timeout policy");
>
> The call to ct_timeout_policy_alloc() will already log this error message if there was a problem.
Sounds good.


> > +            return;
> > +        }
> > +    }
> > +
> > +    struct ct_zone *ct_zone = ct_zone_lookup(&backer->ct_zones, zone);
> > +    if (ct_zone) {
> > +        if (ct_zone->ct_tp != ct_tp) {
> > +            /* Add the new zone timeout pollicy. */
> > +            struct ct_zone *new_ct_zone = ct_zone_alloc(zone);
> > +            new_ct_zone->ct_tp = ct_tp;
> > +            ct_tp->ref_count++;
> > +            cmap_replace(&backer->ct_zones, &ct_zone->node, &new_ct_zone->node,
> > +                         hash_int(zone, 0));
> > +
> > +            /* Deletes the old zone timeout policy. */
> > +            ct_timeout_policy_unref(backer, ct_zone->ct_tp);
> > +            ct_zone_destroy(ct_zone);
>
> Is there a reason that a new ct_zone needs to be allocated anew?  Couldn't you just do something like the following?

Thanks, I think it is unnecessary to allocate a new ct_zone. The new
approach looks good to me.

> > diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
> > index 6cc454371dc7..d53c8e2f80a4 100644
> > --- a/ofproto/ofproto-provider.h
> > +++ b/ofproto/ofproto-provider.h
> > @@ -58,6 +58,7 @@
> > #include "tun-metadata.h"
> > #include "versions.h"
> > #include "vl-mff-map.h"
> > +#include "vswitch-idl.h"
>
> I don't think this #include is necessary with these changes.

Oops, I forget to take this line out in this version.  Thanks for
pointing this out.


> > diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> > index d921c4ef8d5f..c43f2ac34e67 100644
> > --- a/vswitchd/bridge.c
> > +++ b/vswitchd/bridge.c
> > @@ -153,9 +153,36 @@ struct aa_mapping {
> >     char *br_name;
> > };
> >
> > +/* Internal representation of conntrack zone configuration table in OVSDB. */
> > +struct ct_zone {
> > +    uint16_t zone;
>
> I think 'zone_id' is a clearer name for what this is.  I'd also suggest using that through bridge.c.

Yes, it is much clearer to replace 'zone' with 'zone_id' and the other
suggested naming changes in the following. Thanks for the diff.


> > +/* All datapath configuartions, indexed by type. */
> > +static struct hmap all_datapaths = HMAP_INITIALIZER(&all_datapaths);
>
> I don't think it's necessary to use an hmap to store the datapaths, since the usual case will be a single datapath and at most two.  The hmap code special-cases a one-node hmap, so there won't be too much overhead usually, but there is some unnecessary complication and hashing on every lookup.  I'll send out a patch later to switch it to a linked list, and we can see how it compares.

Agreed. It makes more sense to use linked list than hamp.

> > +/* Replace 'old_tp' by 'new_tp' (destroyed 'new_tp'). Returns true if 'old_tp'
> > + * and 'new_tp' contains different data, false if they are the same. */
> > +static bool
> > +update_timeout_policy(struct simap *old_tp, struct simap *new_tp)
> > +{
> > +    bool changed = !simap_equal(old_tp, new_tp);
> > +    simap_swap(old_tp, new_tp);
> > +    simap_destroy(new_tp);
> > +    return changed;
> > +}
>
> It seems like unnecessary work to always call simap_swap().  What about only doing it they're changed?
Sounds good.


> > +static void
> > +reconfigure_datapath_cfgs(const struct ovsrec_open_vswitch *cfg)
>
> I think it would be a bit more consistent to call this datapath_reconfigure() to match bridge_reconfigure().
>
> I agree with Darrell's follow-on patch to this function not to depend "dp_cfg" between database runs.
>
> I would recommend calling datapath_destroy() on all the datapaths on bridge_exit().  It's not strictly necessary, but it makes it easier to track down memory leaks if things are cleaned up as much as possible.

Make sense. Thanks.


> Thanks for the patch!  Let me know what you think about the questions I posed above and the incremental below.

The incremental looks good to me. Thanks for review and the diff!

-Yi-Hung
diff mbox series

Patch

diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 751535249e21..4b4c4d722645 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -156,6 +156,31 @@  struct ofport_dpif {
     size_t n_qdscp;
 };
 
+struct ct_timeout_policy {
+    int ref_count;              /* The number of ct zones that use this
+                                 * timeout policy. */
+    uint32_t tp_id;             /* Timeout policy id in the datapath. */
+    struct simap tp;            /* A map from timeout policy attribute to
+                                 * timeout value. */
+    struct hmap_node node;      /* Element in struct dpif_backer's "ct_tps"
+                                 * cmap. */
+    struct ovs_list list_node;  /* Element in struct dpif_backer's
+                                 * "ct_tp_kill_list" list. */
+};
+
+/* Periodically try to purge deleted timeout policies from the datapath. Retry
+ * may be necessary if the kernel datapath has a non-zero datapath flow
+ * reference count for the timeout policy. */
+#define TIMEOUT_POLICY_CLEANUP_INTERVAL (20000) /* 20 seconds. */
+static long long int timeout_policy_cleanup_timer = LLONG_MIN;
+
+struct ct_zone {
+    uint16_t zone_id;
+    struct ct_timeout_policy *ct_tp;
+    struct cmap_node node;          /* Element in struct dpif_backer's
+                                     * "ct_zones" cmap. */
+};
+
 static odp_port_t ofp_port_to_odp_port(const struct ofproto_dpif *,
                                        ofp_port_t);
 
@@ -196,6 +221,9 @@  static struct hmap all_ofproto_dpifs_by_uuid =
 
 static bool ofproto_use_tnl_push_pop = true;
 static void ofproto_unixctl_init(void);
+static void ct_zone_config_init(struct dpif_backer *backer);
+static void ct_zone_config_uninit(struct dpif_backer *backer);
+static void ct_zone_timeout_policy_sweep(struct dpif_backer *backer);
 
 static inline struct ofproto_dpif *
 ofproto_dpif_cast(const struct ofproto *ofproto)
@@ -488,6 +516,7 @@  type_run(const char *type)
     }
 
     process_dpif_port_changes(backer);
+    ct_zone_timeout_policy_sweep(backer);
 
     return 0;
 }
@@ -683,6 +712,7 @@  close_dpif_backer(struct dpif_backer *backer, bool del)
     }
     dpif_close(backer->dpif);
     id_pool_destroy(backer->meter_ids);
+    ct_zone_config_uninit(backer);
     free(backer);
 }
 
@@ -811,6 +841,8 @@  open_dpif_backer(const char *type, struct dpif_backer **backerp)
         backer->meter_ids = NULL;
     }
 
+    ct_zone_config_init(backer);
+
     /* Make a pristine snapshot of 'support' into 'boottime_support'.
      * 'boottime_support' can be checked to prevent 'support' to be changed
      * beyond the datapath capabilities. In case 'support' is changed by
@@ -5086,6 +5118,266 @@  ct_flush(const struct ofproto *ofproto_, const uint16_t *zone)
     ct_dpif_flush(ofproto->backer->dpif, zone, NULL);
 }
 
+static struct ct_timeout_policy *
+ct_timeout_policy_lookup(const struct hmap *ct_tps, struct simap *tp)
+{
+    struct ct_timeout_policy *ct_tp;
+
+    HMAP_FOR_EACH_WITH_HASH (ct_tp, node, simap_hash(tp), ct_tps) {
+        if (simap_equal(&ct_tp->tp, tp)) {
+            return ct_tp;
+        }
+    }
+    return NULL;
+}
+
+static struct ct_timeout_policy *
+ct_timeout_policy_alloc__(void)
+{
+    struct ct_timeout_policy *ct_tp = xzalloc(sizeof *ct_tp);
+    simap_init(&ct_tp->tp);
+    return ct_tp;
+}
+
+static struct ct_timeout_policy *
+ct_timeout_policy_alloc(struct simap *tp, struct id_pool *tp_ids)
+{
+    struct simap_node *node;
+
+    struct ct_timeout_policy *ct_tp = ct_timeout_policy_alloc__();
+    SIMAP_FOR_EACH (node, tp) {
+        simap_put(&ct_tp->tp, node->name, node->data);
+    }
+
+    if (!id_pool_alloc_id(tp_ids, &ct_tp->tp_id)) {
+        VLOG_ERR_RL(&rl, "failed to allocate timeout policy id.");
+        simap_destroy(&ct_tp->tp);
+        free(ct_tp);
+        return NULL;
+    }
+
+    return ct_tp;
+}
+
+static void
+ct_timeout_policy_destroy(struct ct_timeout_policy *ct_tp,
+                          struct id_pool *tp_ids)
+{
+    id_pool_free_id(tp_ids, ct_tp->tp_id);
+    simap_destroy(&ct_tp->tp);
+    ovsrcu_postpone(free, ct_tp);
+}
+
+static void
+ct_timeout_policy_unref(struct dpif_backer *backer,
+                        struct ct_timeout_policy *ct_tp)
+{
+    if (ct_tp) {
+        ct_tp->ref_count--;
+
+        if (!ct_tp->ref_count) {
+            hmap_remove(&backer->ct_tps, &ct_tp->node);
+            ovs_list_push_back(&backer->ct_tp_kill_list, &ct_tp->list_node);
+        }
+    }
+}
+
+static struct ct_zone *
+ct_zone_lookup(const struct cmap *ct_zones, uint16_t zone)
+{
+    struct ct_zone *ct_zone;
+
+    CMAP_FOR_EACH_WITH_HASH (ct_zone, node, hash_int(zone, 0), ct_zones) {
+        if (ct_zone->zone_id == zone) {
+            return ct_zone;
+        }
+    }
+    return NULL;
+}
+
+static struct ct_zone *
+ct_zone_alloc(uint16_t zone)
+{
+    struct ct_zone *ct_zone = xzalloc(sizeof *ct_zone);
+    ct_zone->zone_id = zone;
+    return ct_zone;
+}
+
+static void
+ct_zone_destroy(struct ct_zone *ct_zone)
+{
+    ovsrcu_postpone(free, ct_zone);
+}
+
+static void
+ct_zone_remove_and_destroy(struct dpif_backer *backer, struct ct_zone *ct_zone)
+{
+    cmap_remove(&backer->ct_zones, &ct_zone->node,
+                hash_int(ct_zone->zone_id, 0));
+    ct_zone_destroy(ct_zone);
+}
+
+static void
+ct_add_timeout_policy_to_dpif(struct dpif *dpif,
+                              struct ct_timeout_policy *ct_tp)
+{
+    struct ct_dpif_timeout_policy cdtp;
+    struct simap_node *node;
+
+    cdtp.id = ct_tp->tp_id;
+    SIMAP_FOR_EACH (node, &ct_tp->tp) {
+        ct_dpif_set_timeout_policy_attr_by_name(&cdtp, node->name, node->data);
+    }
+
+    int err = ct_dpif_set_timeout_policy(dpif, &cdtp);
+    if (err) {
+        VLOG_ERR_RL(&rl, "failed to set timeout policy %"PRIu32" (%s)",
+                    ct_tp->tp_id, ovs_strerror(err));
+    }
+}
+
+static void
+clear_existing_ct_timeout_policies(struct dpif_backer *backer)
+{
+    /* In kernel datapath, when OVS starts, there may be some pre-existing
+     * timeout policies in the kernel.  To avoid reassign the same timeout
+     * policy ids, we dump all the pre-existing timeout policies and keep
+     * the ids in the pool.  Since OVS will not use those timeout policies
+     * for new datapath flow, we add them to the kill list and remove
+     * them later on. */
+    struct ct_dpif_timeout_policy cdtp;
+    void *state;
+
+    if (ct_dpif_timeout_policy_dump_start(backer->dpif, &state)) {
+        return;
+    }
+
+    while (!ct_dpif_timeout_policy_dump_next(backer->dpif, state, &cdtp)) {
+        struct ct_timeout_policy *ct_tp = ct_timeout_policy_alloc__();
+        ct_tp->tp_id = cdtp.id;
+        id_pool_add(backer->tp_ids, cdtp.id);
+        ovs_list_push_back(&backer->ct_tp_kill_list, &ct_tp->list_node);
+    }
+
+    ct_dpif_timeout_policy_dump_done(backer->dpif, state);
+}
+
+#define MAX_TIMEOUT_POLICY_ID UINT32_MAX
+
+static void
+ct_zone_config_init(struct dpif_backer *backer)
+{
+    cmap_init(&backer->ct_zones);
+    hmap_init(&backer->ct_tps);
+    backer->tp_ids = id_pool_create(0, MAX_TIMEOUT_POLICY_ID);
+    ovs_list_init(&backer->ct_tp_kill_list);
+    clear_existing_ct_timeout_policies(backer);
+}
+
+static void
+ct_zone_config_uninit(struct dpif_backer *backer)
+{
+    struct ct_timeout_policy *ct_tp;
+    struct ct_zone *ct_zone;
+
+    CMAP_FOR_EACH (ct_zone, node, &backer->ct_zones) {
+        ct_zone_remove_and_destroy(backer, ct_zone);
+    }
+
+    HMAP_FOR_EACH_POP (ct_tp, node, &backer->ct_tps) {
+        ct_timeout_policy_destroy(ct_tp, backer->tp_ids);
+    }
+
+    LIST_FOR_EACH_POP (ct_tp, list_node, &backer->ct_tp_kill_list) {
+        ct_timeout_policy_destroy(ct_tp, backer->tp_ids);
+    }
+
+    cmap_destroy(&backer->ct_zones);
+    hmap_destroy(&backer->ct_tps);
+    id_pool_destroy(backer->tp_ids);
+}
+
+static void
+ct_zone_timeout_policy_sweep(struct dpif_backer *backer)
+{
+    if (!ovs_list_is_empty(&backer->ct_tp_kill_list)
+        && time_msec() >= timeout_policy_cleanup_timer) {
+        struct ct_timeout_policy *ct_tp, *next;
+
+        LIST_FOR_EACH_SAFE (ct_tp, next, list_node, &backer->ct_tp_kill_list) {
+            if (!ct_dpif_del_timeout_policy(backer->dpif, ct_tp->tp_id)) {
+                ovs_list_remove(&ct_tp->list_node);
+                ct_timeout_policy_destroy(ct_tp, backer->tp_ids);
+            } else {
+                /* INFO log raised by 'dpif' layer. */
+            }
+        }
+        timeout_policy_cleanup_timer = time_msec() +
+            TIMEOUT_POLICY_CLEANUP_INTERVAL;
+    }
+}
+
+static void
+ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone,
+                           struct simap *timeout_policy)
+{
+    struct dpif_backer *backer = shash_find_data(&all_dpif_backers,
+                                                 datapath_type);
+    if (!backer) {
+        return;
+    }
+
+    struct ct_timeout_policy *ct_tp = ct_timeout_policy_lookup(&backer->ct_tps,
+                                                               timeout_policy);
+    if (!ct_tp) {
+        ct_tp = ct_timeout_policy_alloc(timeout_policy, backer->tp_ids);
+        if (ct_tp) {
+            hmap_insert(&backer->ct_tps, &ct_tp->node, simap_hash(&ct_tp->tp));
+            ct_add_timeout_policy_to_dpif(backer->dpif, ct_tp);
+        } else {
+            VLOG_ERR_RL(&rl, "failed to allocate timeout policy");
+            return;
+        }
+    }
+
+    struct ct_zone *ct_zone = ct_zone_lookup(&backer->ct_zones, zone);
+    if (ct_zone) {
+        if (ct_zone->ct_tp != ct_tp) {
+            /* Add the new zone timeout pollicy. */
+            struct ct_zone *new_ct_zone = ct_zone_alloc(zone);
+            new_ct_zone->ct_tp = ct_tp;
+            ct_tp->ref_count++;
+            cmap_replace(&backer->ct_zones, &ct_zone->node, &new_ct_zone->node,
+                         hash_int(zone, 0));
+
+            /* Deletes the old zone timeout policy. */
+            ct_timeout_policy_unref(backer, ct_zone->ct_tp);
+            ct_zone_destroy(ct_zone);
+        }
+    } else {
+        struct ct_zone *new_ct_zone = ct_zone_alloc(zone);
+        new_ct_zone->ct_tp = ct_tp;
+        cmap_insert(&backer->ct_zones, &new_ct_zone->node, hash_int(zone, 0));
+        ct_tp->ref_count++;
+    }
+}
+
+static void
+ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone)
+{
+    struct dpif_backer *backer = shash_find_data(&all_dpif_backers,
+                                                 datapath_type);
+    if (!backer) {
+        return;
+    }
+
+    struct ct_zone *ct_zone = ct_zone_lookup(&backer->ct_zones, zone);
+    if (ct_zone) {
+        ct_timeout_policy_unref(backer, ct_zone->ct_tp);
+        ct_zone_remove_and_destroy(backer, ct_zone);
+    }
+}
+
 static bool
 set_frag_handling(struct ofproto *ofproto_,
                   enum ofputil_frag_handling frag_handling)
@@ -6189,4 +6481,6 @@  const struct ofproto_class ofproto_dpif_class = {
     get_datapath_version,       /* get_datapath_version */
     type_set_config,
     ct_flush,                   /* ct_flush */
+    ct_set_zone_timeout_policy,
+    ct_del_zone_timeout_policy,
 };
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index cd5321eb942c..0dd7a45fe550 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -245,6 +245,16 @@  struct dpif_backer {
     /* Meter. */
     struct id_pool *meter_ids;     /* Datapath meter allocation. */
 
+    /* Connection tracking. */
+    struct id_pool *tp_ids;             /* Datapath timeout policy id
+                                         * allocation. */
+    struct cmap ct_zones;               /* "struct ct_zone"s indexed by zone
+                                         * id. */
+    struct hmap ct_tps;                 /* "struct ct_timeout_policy"s indexed
+                                         * by timeout policy (struct simap). */
+    struct ovs_list ct_tp_kill_list;    /* A list of timeout policy to be
+                                         * deleted. */
+
     /* Version string of the datapath stored in OVSDB. */
     char *dp_version_string;
 
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 6cc454371dc7..d53c8e2f80a4 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -58,6 +58,7 @@ 
 #include "tun-metadata.h"
 #include "versions.h"
 #include "vl-mff-map.h"
+#include "vswitch-idl.h"
 
 struct match;
 struct ofputil_flow_mod;
@@ -1880,6 +1881,15 @@  struct ofproto_class {
     /* Flushes the connection tracking tables. If 'zone' is not NULL,
      * only deletes connections in '*zone'. */
     void (*ct_flush)(const struct ofproto *, const uint16_t *zone);
+
+    /* Sets conntrack timeout policy specified by 'timeout_policy' to 'zone'
+     * in datapath type 'dp_type'. */
+    void (*ct_set_zone_timeout_policy)(const char *dp_type, uint16_t zone,
+                                       struct simap *timeout_policy);
+
+    /* Deletes the timeout policy associated with 'zone' in datapath type
+     * 'dp_type'. */
+    void (*ct_del_zone_timeout_policy)(const char *dp_type, uint16_t zone);
 };
 
 extern const struct ofproto_class ofproto_dpif_class;
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 12758a3f7651..e1fe3814c5ac 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -954,6 +954,32 @@  ofproto_get_flow_restore_wait(void)
     return flow_restore_wait;
 }
 
+/* Connection tracking configuration. */
+void
+ofproto_ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone,
+                                   struct simap *timeout_policy)
+{
+    datapath_type = ofproto_normalize_type(datapath_type);
+    const struct ofproto_class *class = ofproto_class_find__(datapath_type);
+
+    if (class->ct_set_zone_timeout_policy) {
+        class->ct_set_zone_timeout_policy(datapath_type, zone,
+                                          timeout_policy);
+    }
+}
+
+void
+ofproto_ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone)
+{
+    datapath_type = ofproto_normalize_type(datapath_type);
+    const struct ofproto_class *class = ofproto_class_find__(datapath_type);
+
+    if (class->ct_del_zone_timeout_policy) {
+        class->ct_del_zone_timeout_policy(datapath_type, zone);
+    }
+
+}
+
 
 /* Spanning Tree Protocol (STP) configuration. */
 
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index 1977bc2b5c1c..6c1b47e036c7 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -366,6 +366,11 @@  int ofproto_get_stp_status(struct ofproto *, struct ofproto_stp_status *);
 int ofproto_set_rstp(struct ofproto *, const struct ofproto_rstp_settings *);
 int ofproto_get_rstp_status(struct ofproto *, struct ofproto_rstp_status *);
 void ofproto_set_vlan_limit(int vlan_limit);
+void ofproto_ct_set_zone_timeout_policy(const char *datapath_type,
+                                        uint16_t zone,
+                                        struct simap *timeout_policy);
+void ofproto_ct_del_zone_timeout_policy(const char *datapath_type,
+                                        uint16_t zone);
 
 /* Configuration of ports. */
 void ofproto_port_unregister(struct ofproto *, ofp_port_t ofp_port);
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index d921c4ef8d5f..c43f2ac34e67 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -153,9 +153,36 @@  struct aa_mapping {
     char *br_name;
 };
 
+/* Internal representation of conntrack zone configuration table in OVSDB. */
+struct ct_zone {
+    uint16_t zone;
+    struct simap tp;            /* A map from timeout policy attribute to
+                                 * timeout value. */
+    unsigned int last_used;     /* The last idl_seqno that this 'ct_zone' used
+                                 * in OVSDB. This number is used for garbage
+                                 * collection. */
+    struct hmap_node node;      /* Node in 'struct datapath' 'ct_zones'
+                                 * hmap. */
+};
+
+/* Internal representation of datapath configuration table in OVSDB. */
+struct datapath {
+    char *type;                 /* Datapath type. */
+    struct hmap ct_zones;       /* Map of 'struct ct_zone' elements, indexed
+                                 * by 'zone'. */
+    struct hmap_node node;      /* Node in 'all_datapaths' hmap. */
+    const struct ovsrec_datapath *dp_cfg;
+    unsigned int last_used;     /* The last idl_seqno that this 'datapath'
+                                 * used in OVSDB. This number is used for
+                                 * garbage collection. */
+};
+
 /* All bridges, indexed by name. */
 static struct hmap all_bridges = HMAP_INITIALIZER(&all_bridges);
 
+/* All datapath configuartions, indexed by type. */
+static struct hmap all_datapaths = HMAP_INITIALIZER(&all_datapaths);
+
 /* OVSDB IDL used to obtain configuration. */
 static struct ovsdb_idl *idl;
 
@@ -588,6 +615,177 @@  config_ofproto_types(const struct smap *other_config)
 }
 
 static void
+get_timeout_policy_from_ovsrec(struct simap *tp,
+                               const struct ovsrec_ct_timeout_policy *tp_cfg)
+{
+    for (size_t i = 0; i < tp_cfg->n_timeouts; i++) {
+        simap_put(tp, tp_cfg->key_timeouts[i], tp_cfg->value_timeouts[i]);
+    }
+}
+
+static struct ct_zone *
+ct_zone_lookup(struct hmap *ct_zones, uint16_t zone)
+{
+    struct ct_zone *ct_zone;
+
+    HMAP_FOR_EACH_WITH_HASH (ct_zone, node, hash_int(zone, 0), ct_zones) {
+        if (ct_zone->zone == zone) {
+            return ct_zone;
+        }
+    }
+    return NULL;
+}
+
+static struct ct_zone *
+ct_zone_alloc(uint16_t zone, struct ovsrec_ct_timeout_policy *tp_cfg)
+{
+    struct ct_zone *ct_zone = xzalloc(sizeof *ct_zone);
+
+    ct_zone->zone = zone;
+    simap_init(&ct_zone->tp);
+    get_timeout_policy_from_ovsrec(&ct_zone->tp, tp_cfg);
+    return ct_zone;
+}
+
+static void
+ct_zone_remove_and_destroy(struct datapath *dp, struct ct_zone *ct_zone)
+{
+    hmap_remove(&dp->ct_zones, &ct_zone->node);
+    simap_destroy(&ct_zone->tp);
+    free(ct_zone);
+}
+
+/* Replace 'old_tp' by 'new_tp' (destroyed 'new_tp'). Returns true if 'old_tp'
+ * and 'new_tp' contains different data, false if they are the same. */
+static bool
+update_timeout_policy(struct simap *old_tp, struct simap *new_tp)
+{
+    bool changed = !simap_equal(old_tp, new_tp);
+    simap_swap(old_tp, new_tp);
+    simap_destroy(new_tp);
+    return changed;
+}
+
+static struct datapath *
+datapath_lookup(const char *type)
+{
+    struct datapath *dp;
+
+    HMAP_FOR_EACH_WITH_HASH (dp, node, hash_string(type, 0), &all_datapaths) {
+        if (!strcmp(dp->type, type)) {
+            return dp;
+        }
+    }
+    return NULL;
+}
+
+static struct datapath *
+datapath_create(const struct ovsrec_datapath *dp_cfg, const char *type)
+{
+    struct datapath *dp = xzalloc(sizeof *dp);
+    dp->type = xstrdup(type);
+    dp->dp_cfg = dp_cfg;
+
+    hmap_init(&dp->ct_zones);
+    hmap_insert(&all_datapaths, &dp->node, hash_string(type, 0));
+    return dp;
+}
+
+static void
+datapath_destroy(struct datapath *dp)
+{
+    struct ct_zone *ct_zone, *next;
+
+    if (dp) {
+        HMAP_FOR_EACH_SAFE (ct_zone, next, node, &dp->ct_zones) {
+            ofproto_ct_del_zone_timeout_policy(dp->type, ct_zone->zone);
+            ct_zone_remove_and_destroy(dp, ct_zone);
+        }
+
+        hmap_remove(&all_datapaths, &dp->node);
+        hmap_destroy(&dp->ct_zones);
+        free(dp->type);
+        free(dp);
+    }
+}
+
+static void
+update_datapath_cfgs(const struct ovsrec_open_vswitch *cfg)
+{
+    struct datapath *dp, *next;
+
+    /* Add new 'datapath's or update existing ones. */
+    for (size_t i = 0; i < cfg->n_datapaths; i++) {
+        const struct ovsrec_datapath *dp_cfg = cfg->value_datapaths[i];
+        char *dp_name = cfg->key_datapaths[i];
+
+        dp = datapath_lookup(dp_name);
+        if (!dp) {
+            dp = datapath_create(dp_cfg, dp_name);
+        }
+        dp->last_used = idl_seqno;
+    }
+
+    /* Purge deleted 'datapath's. */
+    HMAP_FOR_EACH_SAFE (dp, next, node, &all_datapaths) {
+        if (dp->last_used != idl_seqno) {
+            datapath_destroy(dp);
+        }
+    }
+}
+
+static void
+reconfigure_ct_zones(struct datapath *dp)
+{
+    const struct ovsrec_datapath *dp_cfg = dp->dp_cfg;
+    struct ct_zone *ct_zone, *next;
+
+    /* Add new 'ct_zone's or update existing 'ct_zone's based on the database
+     * state. */
+    for (size_t i = 0; i < dp_cfg->n_ct_zones; i++) {
+        uint16_t zone = dp_cfg->key_ct_zones[i];
+        struct ovsrec_ct_zone *zone_cfg = dp_cfg->value_ct_zones[i];
+        struct ovsrec_ct_timeout_policy *tp_cfg = zone_cfg->timeout_policy;
+
+        ct_zone = ct_zone_lookup(&dp->ct_zones, zone);
+        if (ct_zone) {
+            struct simap new_tp = SIMAP_INITIALIZER(&new_tp);
+            get_timeout_policy_from_ovsrec(&new_tp, tp_cfg);
+            if (update_timeout_policy(&ct_zone->tp, &new_tp)) {
+                ofproto_ct_set_zone_timeout_policy(dp->type, ct_zone->zone,
+                                                   &ct_zone->tp);
+            }
+        } else {
+            ct_zone = ct_zone_alloc(zone, tp_cfg);
+            hmap_insert(&dp->ct_zones, &ct_zone->node, hash_int(zone, 0));
+            ofproto_ct_set_zone_timeout_policy(dp->type, ct_zone->zone,
+                                               &ct_zone->tp);
+        }
+        ct_zone->last_used = idl_seqno;
+    }
+
+    /* Purge 'ct_zone's no longer found in the database. */
+    HMAP_FOR_EACH_SAFE (ct_zone, next, node, &dp->ct_zones) {
+        if (ct_zone->last_used != idl_seqno) {
+            ofproto_ct_del_zone_timeout_policy(dp->type, ct_zone->zone);
+            ct_zone_remove_and_destroy(dp, ct_zone);
+        }
+    }
+}
+
+static void
+reconfigure_datapath_cfgs(const struct ovsrec_open_vswitch *cfg)
+{
+    struct datapath *dp;
+
+    update_datapath_cfgs(cfg);
+
+    HMAP_FOR_EACH (dp, node, &all_datapaths) {
+        reconfigure_ct_zones(dp);
+    }
+}
+
+static void
 bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
 {
     struct sockaddr_in *managers;
@@ -675,6 +873,7 @@  bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
     }
 
     reconfigure_system_stats(ovs_cfg);
+    reconfigure_datapath_cfgs(ovs_cfg);
 
     /* Complete the configuration. */
     sflow_bridge_number = 0;