[ovs-dev,3/4] ovn-controller: Store conntrack zone mappings to OVS database.
diff mbox

Message ID 1474620783-96388-3-git-send-email-jpettit@ovn.org
State Accepted
Headers show

Commit Message

Justin Pettit Sept. 23, 2016, 8:53 a.m. UTC
If ovn-controller is restarted, it may choose different conntrack zones
than had been previously used, which could cause the wrong conntrack
entries to be associated with a logical port.  This commit stores in the
integration bridge's OVS "Bridge" table the mapping to the conntrack zone.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
---
 ovn/controller/ovn-controller.8.xml |  14 ++++
 ovn/controller/ovn-controller.c     | 136 ++++++++++++++++++++++++++++++++++--
 2 files changed, 146 insertions(+), 4 deletions(-)

Comments

Ben Pfaff Sept. 23, 2016, 5:11 p.m. UTC | #1
On Fri, Sep 23, 2016 at 01:53:02AM -0700, Justin Pettit wrote:
> If ovn-controller is restarted, it may choose different conntrack zones
> than had been previously used, which could cause the wrong conntrack
> entries to be associated with a logical port.  This commit stores in the
> integration bridge's OVS "Bridge" table the mapping to the conntrack zone.
> 
> Signed-off-by: Justin Pettit <jpettit@ovn.org>

I'd feel more comfortable given a test or two.  I hope that those can be
written, later if necessary.

Acked-by: Ben Pfaff <blp@ovn.org>
Guru Shetty Sept. 23, 2016, 5:19 p.m. UTC | #2
On 23 September 2016 at 01:53, Justin Pettit <jpettit@ovn.org> wrote:

> If ovn-controller is restarted, it may choose different conntrack zones
> than had been previously used, which could cause the wrong conntrack
> entries to be associated with a logical port.  This commit stores in the
> integration bridge's OVS "Bridge" table the mapping to the conntrack zone.
>
> Signed-off-by: Justin Pettit <jpettit@ovn.org>
> ---
>  ovn/controller/ovn-controller.8.xml |  14 ++++
>  ovn/controller/ovn-controller.c     | 136 ++++++++++++++++++++++++++++++
> ++++--
>  2 files changed, 146 insertions(+), 4 deletions(-)
>
> diff --git a/ovn/controller/ovn-controller.8.xml b/ovn/controller/ovn-
> controller.8.xml
> index 559031f..0484263 100644
> --- a/ovn/controller/ovn-controller.8.xml
> +++ b/ovn/controller/ovn-controller.8.xml
> @@ -200,6 +200,20 @@
>        </dd>
>
>        <dt>
> +        <code>external_ids:ct-zone-*</code> in the <code>Bridge</code>
> table
> +      </dt>
> +      <dd>
> +        Logical ports and gateway routers are assigned a connection
> +        tracking zone by <code>ovn-controller</code> for stateful
> +        services.  To keep state across restarts of
> +        <code>ovn-controller</code>, these keys are stored in the
> +        integration bridge's Bridge table.  The name contains a prefix
> +        of <code>ct-zone-</code> followed by the name of the logical
> +        port.  The value for this key identifies the zone used for this
> +        port.
>
I suppose the above is not really true as we also have gateway routers.


> +      </dd>
> +
> +      <dt>
>          <code>external_ids:ovn-localnet-port</code> in the
> <code>Port</code>
>          table
>        </dt>
> diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-
> controller.c
> index 49821f7..b051a75 100644
> --- a/ovn/controller/ovn-controller.c
> +++ b/ovn/controller/ovn-controller.c
> @@ -229,9 +229,21 @@ get_ovnsb_remote(struct ovsdb_idl *ovs_idl)
>      }
>  }
>
> +enum ct_zone_pending_state {
> +    CT_ZONE_DB_QUEUED,    /* Waiting for DB transaction to open. */
> +    CT_ZONE_DB_SENT,      /* Sent and waiting for confirmation from DB. */
> +};
> +
> +struct ct_zone_pending_entry {
> +    enum ct_zone_pending_state state;
> +    int zone;
> +    bool add;             /* Is the entry being added? */
> +};
> +
>  static void
>  update_ct_zones(struct sset *lports, struct hmap *patched_datapaths,
> -                struct simap *ct_zones, unsigned long *ct_zone_bitmap)
> +                struct simap *ct_zones, unsigned long *ct_zone_bitmap,
> +                struct shash *pending_ct_zones)
>  {
>      struct simap_node *ct_zone, *ct_zone_next;
>      int scan_start = 1;
> @@ -260,6 +272,15 @@ update_ct_zones(struct sset *lports, struct hmap
> *patched_datapaths,
>      /* Delete zones that do not exist in above sset. */
>      SIMAP_FOR_EACH_SAFE(ct_zone, ct_zone_next, ct_zones) {
>          if (!sset_contains(&all_users, ct_zone->name)) {
> +            VLOG_DBG("removing ct zone %"PRId32" for '%s'",
> +                     ct_zone->data, ct_zone->name);
> +
> +            struct ct_zone_pending_entry *pending = xmalloc(sizeof
> *pending);
> +            pending->state = CT_ZONE_DB_QUEUED;
> +            pending->zone = ct_zone->data;
> +            pending->add = false;
> +            shash_add(pending_ct_zones, ct_zone->name, pending);
> +
>              bitmap_set0(ct_zone_bitmap, ct_zone->data);
>              simap_delete(ct_zones, ct_zone);
>          }
> @@ -271,7 +292,7 @@ update_ct_zones(struct sset *lports, struct hmap
> *patched_datapaths,
>      /* Assign a unique zone id for each logical port and two zones
>       * to a gateway router. */
>      SSET_FOR_EACH(user, &all_users) {
> -        size_t zone;
> +        int zone;
>
>          if (simap_contains(ct_zones, user)) {
>              continue;
> @@ -286,6 +307,14 @@ update_ct_zones(struct sset *lports, struct hmap
> *patched_datapaths,
>          }
>          scan_start = zone + 1;
>
> +        VLOG_DBG("assigning ct zone %"PRId32" to '%s'", zone, user);
> +
> +        struct ct_zone_pending_entry *pending = xmalloc(sizeof *pending);
> +        pending->state = CT_ZONE_DB_QUEUED;
> +        pending->zone = zone;
> +        pending->add = true;
> +        shash_add(pending_ct_zones, user, pending);
> +
>          bitmap_set1(ct_zone_bitmap, zone);
>          simap_put(ct_zones, user, zone);
>
> @@ -297,6 +326,90 @@ update_ct_zones(struct sset *lports, struct hmap
> *patched_datapaths,
>      sset_destroy(&all_users);
>  }
>
> +static void
> +commit_ct_zones(struct controller_ctx *ctx,
> +                const struct ovsrec_bridge *br_int,
> +                struct shash *pending_ct_zones)
> +{
> +    if (!ctx->ovs_idl_txn) {
> +        return;
> +    }
> +
> +    struct smap new_ids;
> +    smap_clone(&new_ids, &br_int->external_ids);
> +
> +    bool updated = false;
> +    struct shash_node *iter;
> +    SHASH_FOR_EACH(iter, pending_ct_zones) {
> +        struct ct_zone_pending_entry *ctzpe = iter->data;
> +
> +        /* The transaction is open, so any pending entries in the
> +         * CT_ZONE_DB_QUEUED must be sent and any in CT_ZONE_DB_QUEUED
> +         * need to be retried. */
> +        if (ctzpe->state != CT_ZONE_DB_QUEUED
> +            && ctzpe->state != CT_ZONE_DB_SENT) {
> +            continue;
> +        }
> +
> +        char *user_str = xasprintf("ct-zone-%s", iter->name);
> +        if (ctzpe->add) {
> +            char *zone_str = xasprintf("%"PRId32, ctzpe->zone);
> +            smap_replace(&new_ids, user_str, zone_str);
> +            free(zone_str);
> +        } else {
> +            smap_remove(&new_ids, user_str);
> +        }
> +        free(user_str);
> +
> +        ctzpe->state = CT_ZONE_DB_SENT;
> +        updated = true;
> +    }
> +
> +    if (updated) {
> +        ovsrec_bridge_verify_external_ids(br_int);
> +        ovsrec_bridge_set_external_ids(br_int, &new_ids);
> +    }
> +    smap_destroy(&new_ids);
> +}
> +
> +static void
> +restore_ct_zones(struct ovsdb_idl *ovs_idl,
> +                 struct simap *ct_zones, unsigned long *ct_zone_bitmap)
> +{
> +    const struct ovsrec_open_vswitch *cfg;
> +    cfg = ovsrec_open_vswitch_first(ovs_idl);
> +    if (!cfg) {
> +        return;
> +    }
> +
> +    const char *br_int_name = smap_get_def(&cfg->external_ids,
> "ovn-bridge",
> +                                           DEFAULT_BRIDGE_NAME);
> +
> +    const struct ovsrec_bridge *br_int;
> +    br_int = get_bridge(ovs_idl, br_int_name);
> +    if (!br_int) {
> +        /* If the integration bridge hasn't been defined, assume that
> +         * any existing ct-zone definitions aren't valid. */
> +        return;
> +    }
> +
> +    struct smap_node *node;
> +    SMAP_FOR_EACH(node, &br_int->external_ids) {
> +        if (strncmp(node->key, "ct-zone-", 8)) {
> +            continue;
> +        }
> +
> +        const char *user = node->key + 8;
> +        int zone = atoi(node->value);
> +
> +        if (user[0] && zone) {
> +            VLOG_DBG("restoring ct zone %"PRId32" for '%s'", zone, user);
> +            bitmap_set1(ct_zone_bitmap, zone);
> +            simap_put(ct_zones, user, zone);
> +        }
> +    }
> +}
> +
>  static int64_t
>  get_nb_cfg(struct ovsdb_idl *idl)
>  {
> @@ -362,6 +475,7 @@ main(int argc, char *argv[])
>      ovsdb_idl_add_column(ovs_idl_loop.idl, &ovsrec_bridge_col_name);
>      ovsdb_idl_add_column(ovs_idl_loop.idl, &ovsrec_bridge_col_fail_mode);
>      ovsdb_idl_add_column(ovs_idl_loop.idl, &ovsrec_bridge_col_other_
> config);
> +    ovsdb_idl_add_column(ovs_idl_loop.idl, &ovsrec_bridge_col_external_
> ids);
>      chassis_register_ovs_idl(ovs_idl_loop.idl);
>      encaps_register_ovs_idl(ovs_idl_loop.idl);
>      binding_register_ovs_idl(ovs_idl_loop.idl);
> @@ -381,9 +495,11 @@ main(int argc, char *argv[])
>
>      /* Initialize connection tracking zones. */
>      struct simap ct_zones = SIMAP_INITIALIZER(&ct_zones);
> +    struct shash pending_ct_zones = SHASH_INITIALIZER(&pending_ct_zones);
>      unsigned long ct_zone_bitmap[BITMAP_N_LONGS(MAX_CT_ZONES)];
>      memset(ct_zone_bitmap, 0, sizeof ct_zone_bitmap);
>      bitmap_set1(ct_zone_bitmap, 0); /* Zone 0 is reserved. */
> +    restore_ct_zones(ovs_idl_loop.idl, &ct_zones, ct_zone_bitmap);
>      unixctl_command_register("ct-zone-list", "", 0, 0,
>                               ct_zone_list, &ct_zones);
>
> @@ -440,7 +556,8 @@ main(int argc, char *argv[])
>
>              pinctrl_run(&ctx, &lports, br_int, chassis_id,
> &local_datapaths);
>              update_ct_zones(&all_lports, &patched_datapaths, &ct_zones,
> -                            ct_zone_bitmap);
> +                            ct_zone_bitmap, &pending_ct_zones);
> +            commit_ct_zones(&ctx, br_int, &pending_ct_zones);
>
>              struct hmap flow_table = HMAP_INITIALIZER(&flow_table);
>              lflow_run(&ctx, &lports, &mcgroups, &local_datapaths,
> @@ -493,9 +610,20 @@ main(int argc, char *argv[])
>              pinctrl_wait(&ctx);
>          }
>          ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
> -        ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop);
>          ovsdb_idl_track_clear(ovnsb_idl_loop.idl);
> +
> +        if (ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop) == 1) {
> +            struct shash_node *iter, *iter_next;
> +            SHASH_FOR_EACH_SAFE(iter, iter_next, &pending_ct_zones) {
> +                struct ct_zone_pending_entry *ctzpe = iter->data;
> +                if (ctzpe->state == CT_ZONE_DB_SENT) {
> +                    shash_delete(&pending_ct_zones, iter);
> +                    free(ctzpe);
> +                }
> +            }
> +        }
>          ovsdb_idl_track_clear(ovs_idl_loop.idl);
> +
>          poll_block();
>          if (should_service_stop()) {
>              exiting = true;
> --
> 1.9.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
Justin Pettit Sept. 23, 2016, 6:01 p.m. UTC | #3
> On Sep 23, 2016, at 10:19 AM, Guru Shetty <guru@ovn.org> wrote:
> 
> 
> 
> On 23 September 2016 at 01:53, Justin Pettit <jpettit@ovn.org> wrote:
> If ovn-controller is restarted, it may choose different conntrack zones
> than had been previously used, which could cause the wrong conntrack
> entries to be associated with a logical port.  This commit stores in the
> integration bridge's OVS "Bridge" table the mapping to the conntrack zone.
> 
> Signed-off-by: Justin Pettit <jpettit@ovn.org>
> ---
>  ovn/controller/ovn-controller.8.xml |  14 ++++
>  ovn/controller/ovn-controller.c     | 136 ++++++++++++++++++++++++++++++++++--
>  2 files changed, 146 insertions(+), 4 deletions(-)
> 
> diff --git a/ovn/controller/ovn-controller.8.xml b/ovn/controller/ovn-controller.8.xml
> index 559031f..0484263 100644
> --- a/ovn/controller/ovn-controller.8.xml
> +++ b/ovn/controller/ovn-controller.8.xml
> @@ -200,6 +200,20 @@
>        </dd>
> 
>        <dt>
> +        <code>external_ids:ct-zone-*</code> in the <code>Bridge</code> table
> +      </dt>
> +      <dd>
> +        Logical ports and gateway routers are assigned a connection
> +        tracking zone by <code>ovn-controller</code> for stateful
> +        services.  To keep state across restarts of
> +        <code>ovn-controller</code>, these keys are stored in the
> +        integration bridge's Bridge table.  The name contains a prefix
> +        of <code>ct-zone-</code> followed by the name of the logical
> +        port.  The value for this key identifies the zone used for this
> +        port.
> I suppose the above is not really true as we also have gateway routers. 

I'm not sure I understand.  Are you saying that my use of port doesn't cover the gateway routers or something else?  Do you have a suggested change of text or are you raising a more fundamental concern?

Thanks,

--Justin
Guru Shetty Sept. 23, 2016, 6:12 p.m. UTC | #4
On 23 September 2016 at 11:01, Justin Pettit <jpettit@ovn.org> wrote:

>
> > On Sep 23, 2016, at 10:19 AM, Guru Shetty <guru@ovn.org> wrote:
> >
> >
> >
> > On 23 September 2016 at 01:53, Justin Pettit <jpettit@ovn.org> wrote:
> > If ovn-controller is restarted, it may choose different conntrack zones
> > than had been previously used, which could cause the wrong conntrack
> > entries to be associated with a logical port.  This commit stores in the
> > integration bridge's OVS "Bridge" table the mapping to the conntrack
> zone.
> >
> > Signed-off-by: Justin Pettit <jpettit@ovn.org>
> > ---
> >  ovn/controller/ovn-controller.8.xml |  14 ++++
> >  ovn/controller/ovn-controller.c     | 136
> ++++++++++++++++++++++++++++++++++--
> >  2 files changed, 146 insertions(+), 4 deletions(-)
> >
> > diff --git a/ovn/controller/ovn-controller.8.xml b/ovn/controller/ovn-
> controller.8.xml
> > index 559031f..0484263 100644
> > --- a/ovn/controller/ovn-controller.8.xml
> > +++ b/ovn/controller/ovn-controller.8.xml
> > @@ -200,6 +200,20 @@
> >        </dd>
> >
> >        <dt>
> > +        <code>external_ids:ct-zone-*</code> in the <code>Bridge</code>
> table
> > +      </dt>
> > +      <dd>
> > +        Logical ports and gateway routers are assigned a connection
> > +        tracking zone by <code>ovn-controller</code> for stateful
> > +        services.  To keep state across restarts of
> > +        <code>ovn-controller</code>, these keys are stored in the
> > +        integration bridge's Bridge table.  The name contains a prefix
> > +        of <code>ct-zone-</code> followed by the name of the logical
> > +        port.  The value for this key identifies the zone used for this
> > +        port.
> > I suppose the above is not really true as we also have gateway routers.
>
> I'm not sure I understand.  Are you saying that my use of port doesn't
> cover the gateway routers or something else?  Do you have a suggested
> change of text or are you raising a more fundamental concern?
>

No fundamental concerns. The text would be more right with the following
substitution.

s/ followed by the name of the logical port/ followed by a key that
identifies the
 logical port or the gateway router/


>
> Thanks,
>
> --Justin
>
>
>
Justin Pettit Sept. 23, 2016, 6:18 p.m. UTC | #5
> On Sep 23, 2016, at 11:12 AM, Guru Shetty <guru@ovn.org> wrote:
> 
> 
> 
> On 23 September 2016 at 11:01, Justin Pettit <jpettit@ovn.org> wrote:
> 
> > On Sep 23, 2016, at 10:19 AM, Guru Shetty <guru@ovn.org> wrote:
> >
> >
> >
> > On 23 September 2016 at 01:53, Justin Pettit <jpettit@ovn.org> wrote:
> > If ovn-controller is restarted, it may choose different conntrack zones
> > than had been previously used, which could cause the wrong conntrack
> > entries to be associated with a logical port.  This commit stores in the
> > integration bridge's OVS "Bridge" table the mapping to the conntrack zone.
> >
> > Signed-off-by: Justin Pettit <jpettit@ovn.org>
> > ---
> >  ovn/controller/ovn-controller.8.xml |  14 ++++
> >  ovn/controller/ovn-controller.c     | 136 ++++++++++++++++++++++++++++++++++--
> >  2 files changed, 146 insertions(+), 4 deletions(-)
> >
> > diff --git a/ovn/controller/ovn-controller.8.xml b/ovn/controller/ovn-controller.8.xml
> > index 559031f..0484263 100644
> > --- a/ovn/controller/ovn-controller.8.xml
> > +++ b/ovn/controller/ovn-controller.8.xml
> > @@ -200,6 +200,20 @@
> >        </dd>
> >
> >        <dt>
> > +        <code>external_ids:ct-zone-*</code> in the <code>Bridge</code> table
> > +      </dt>
> > +      <dd>
> > +        Logical ports and gateway routers are assigned a connection
> > +        tracking zone by <code>ovn-controller</code> for stateful
> > +        services.  To keep state across restarts of
> > +        <code>ovn-controller</code>, these keys are stored in the
> > +        integration bridge's Bridge table.  The name contains a prefix
> > +        of <code>ct-zone-</code> followed by the name of the logical
> > +        port.  The value for this key identifies the zone used for this
> > +        port.
> > I suppose the above is not really true as we also have gateway routers.
> 
> I'm not sure I understand.  Are you saying that my use of port doesn't cover the gateway routers or something else?  Do you have a suggested change of text or are you raising a more fundamental concern?
> 
> No fundamental concerns. The text would be more right with the following substitution.
> 
> s/ followed by the name of the logical port/ followed by a key that identifies the
>  logical port or the gateway router/

I changed the sentence to the following, since I was worried that "key" was being used in different contexts in that paragraph:

        The name contains a prefix
        of <code>ct-zone-</code> followed by the name of the logical
        port or the gateway router.

Are you good with it?

--Justin
Guru Shetty Sept. 23, 2016, 6:21 p.m. UTC | #6
>
>
>
> I changed the sentence to the following, since I was worried that "key"
> was being used in different contexts in that paragraph:
>
>         The name contains a prefix
>         of <code>ct-zone-</code> followed by the name of the logical
>         port or the gateway router.
>
> Are you good with it?
>

The "key" for a gateway router is "dnat_$UUID" and "snat_$UUID" where UUID
is the southbound database's Datapath_Binding record's UUID. I am not sure
how best to express that complexity.


>
> --Justin
>
>
>
Justin Pettit Sept. 23, 2016, 6:25 p.m. UTC | #7
> On Sep 23, 2016, at 11:21 AM, Guru Shetty <guru@ovn.org> wrote:
> 
> 
> 
> I changed the sentence to the following, since I was worried that "key" was being used in different contexts in that paragraph:
> 
>         The name contains a prefix
>         of <code>ct-zone-</code> followed by the name of the logical
>         port or the gateway router.
> 
> Are you good with it?
> 
> The "key" for a gateway router is "dnat_$UUID" and "snat_$UUID" where UUID is the southbound database's Datapath_Binding record's UUID. I am not sure how best to express that complexity.

How about " followed by the name of the logical port or the gateway router's zone key"?

--Justin
Guru Shetty Sept. 23, 2016, 6:29 p.m. UTC | #8
On 23 September 2016 at 11:25, Justin Pettit <jpettit@ovn.org> wrote:

>
> > On Sep 23, 2016, at 11:21 AM, Guru Shetty <guru@ovn.org> wrote:
> >
> >
> >
> > I changed the sentence to the following, since I was worried that "key"
> was being used in different contexts in that paragraph:
> >
> >         The name contains a prefix
> >         of <code>ct-zone-</code> followed by the name of the logical
> >         port or the gateway router.
> >
> > Are you good with it?
> >
> > The "key" for a gateway router is "dnat_$UUID" and "snat_$UUID" where
> UUID is the southbound database's Datapath_Binding record's UUID. I am not
> sure how best to express that complexity.
>
> How about " followed by the name of the logical port or the gateway
> router's zone key"?
>

Sounds good to me.


>
> --Justin
>
>
>
Justin Pettit Sept. 23, 2016, 6:29 p.m. UTC | #9
> On Sep 23, 2016, at 11:29 AM, Guru Shetty <guru@ovn.org> wrote:
> 
> 
> 
> On 23 September 2016 at 11:25, Justin Pettit <jpettit@ovn.org> wrote:
> 
> > On Sep 23, 2016, at 11:21 AM, Guru Shetty <guru@ovn.org> wrote:
> >
> >
> >
> > I changed the sentence to the following, since I was worried that "key" was being used in different contexts in that paragraph:
> >
> >         The name contains a prefix
> >         of <code>ct-zone-</code> followed by the name of the logical
> >         port or the gateway router.
> >
> > Are you good with it?
> >
> > The "key" for a gateway router is "dnat_$UUID" and "snat_$UUID" where UUID is the southbound database's Datapath_Binding record's UUID. I am not sure how best to express that complexity.
> 
> How about " followed by the name of the logical port or the gateway router's zone key"?
> 
> Sounds good to me.

Thanks.  We'll go with that.

--Justin

Patch
diff mbox

diff --git a/ovn/controller/ovn-controller.8.xml b/ovn/controller/ovn-controller.8.xml
index 559031f..0484263 100644
--- a/ovn/controller/ovn-controller.8.xml
+++ b/ovn/controller/ovn-controller.8.xml
@@ -200,6 +200,20 @@ 
       </dd>
 
       <dt>
+        <code>external_ids:ct-zone-*</code> in the <code>Bridge</code> table
+      </dt>
+      <dd>
+        Logical ports and gateway routers are assigned a connection
+        tracking zone by <code>ovn-controller</code> for stateful
+        services.  To keep state across restarts of
+        <code>ovn-controller</code>, these keys are stored in the
+        integration bridge's Bridge table.  The name contains a prefix
+        of <code>ct-zone-</code> followed by the name of the logical
+        port.  The value for this key identifies the zone used for this
+        port.
+      </dd>
+
+      <dt>
         <code>external_ids:ovn-localnet-port</code> in the <code>Port</code>
         table
       </dt>
diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index 49821f7..b051a75 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -229,9 +229,21 @@  get_ovnsb_remote(struct ovsdb_idl *ovs_idl)
     }
 }
 
+enum ct_zone_pending_state {
+    CT_ZONE_DB_QUEUED,    /* Waiting for DB transaction to open. */
+    CT_ZONE_DB_SENT,      /* Sent and waiting for confirmation from DB. */
+};
+
+struct ct_zone_pending_entry {
+    enum ct_zone_pending_state state;
+    int zone;
+    bool add;             /* Is the entry being added? */
+};
+
 static void
 update_ct_zones(struct sset *lports, struct hmap *patched_datapaths,
-                struct simap *ct_zones, unsigned long *ct_zone_bitmap)
+                struct simap *ct_zones, unsigned long *ct_zone_bitmap,
+                struct shash *pending_ct_zones)
 {
     struct simap_node *ct_zone, *ct_zone_next;
     int scan_start = 1;
@@ -260,6 +272,15 @@  update_ct_zones(struct sset *lports, struct hmap *patched_datapaths,
     /* Delete zones that do not exist in above sset. */
     SIMAP_FOR_EACH_SAFE(ct_zone, ct_zone_next, ct_zones) {
         if (!sset_contains(&all_users, ct_zone->name)) {
+            VLOG_DBG("removing ct zone %"PRId32" for '%s'",
+                     ct_zone->data, ct_zone->name);
+
+            struct ct_zone_pending_entry *pending = xmalloc(sizeof *pending);
+            pending->state = CT_ZONE_DB_QUEUED;
+            pending->zone = ct_zone->data;
+            pending->add = false;
+            shash_add(pending_ct_zones, ct_zone->name, pending);
+
             bitmap_set0(ct_zone_bitmap, ct_zone->data);
             simap_delete(ct_zones, ct_zone);
         }
@@ -271,7 +292,7 @@  update_ct_zones(struct sset *lports, struct hmap *patched_datapaths,
     /* Assign a unique zone id for each logical port and two zones
      * to a gateway router. */
     SSET_FOR_EACH(user, &all_users) {
-        size_t zone;
+        int zone;
 
         if (simap_contains(ct_zones, user)) {
             continue;
@@ -286,6 +307,14 @@  update_ct_zones(struct sset *lports, struct hmap *patched_datapaths,
         }
         scan_start = zone + 1;
 
+        VLOG_DBG("assigning ct zone %"PRId32" to '%s'", zone, user);
+
+        struct ct_zone_pending_entry *pending = xmalloc(sizeof *pending);
+        pending->state = CT_ZONE_DB_QUEUED;
+        pending->zone = zone;
+        pending->add = true;
+        shash_add(pending_ct_zones, user, pending);
+
         bitmap_set1(ct_zone_bitmap, zone);
         simap_put(ct_zones, user, zone);
 
@@ -297,6 +326,90 @@  update_ct_zones(struct sset *lports, struct hmap *patched_datapaths,
     sset_destroy(&all_users);
 }
 
+static void
+commit_ct_zones(struct controller_ctx *ctx,
+                const struct ovsrec_bridge *br_int,
+                struct shash *pending_ct_zones)
+{
+    if (!ctx->ovs_idl_txn) {
+        return;
+    }
+
+    struct smap new_ids;
+    smap_clone(&new_ids, &br_int->external_ids);
+
+    bool updated = false;
+    struct shash_node *iter;
+    SHASH_FOR_EACH(iter, pending_ct_zones) {
+        struct ct_zone_pending_entry *ctzpe = iter->data;
+
+        /* The transaction is open, so any pending entries in the
+         * CT_ZONE_DB_QUEUED must be sent and any in CT_ZONE_DB_QUEUED
+         * need to be retried. */
+        if (ctzpe->state != CT_ZONE_DB_QUEUED
+            && ctzpe->state != CT_ZONE_DB_SENT) {
+            continue;
+        }
+
+        char *user_str = xasprintf("ct-zone-%s", iter->name);
+        if (ctzpe->add) {
+            char *zone_str = xasprintf("%"PRId32, ctzpe->zone);
+            smap_replace(&new_ids, user_str, zone_str);
+            free(zone_str);
+        } else {
+            smap_remove(&new_ids, user_str);
+        }
+        free(user_str);
+
+        ctzpe->state = CT_ZONE_DB_SENT;
+        updated = true;
+    }
+
+    if (updated) {
+        ovsrec_bridge_verify_external_ids(br_int);
+        ovsrec_bridge_set_external_ids(br_int, &new_ids);
+    }
+    smap_destroy(&new_ids);
+}
+
+static void
+restore_ct_zones(struct ovsdb_idl *ovs_idl,
+                 struct simap *ct_zones, unsigned long *ct_zone_bitmap)
+{
+    const struct ovsrec_open_vswitch *cfg;
+    cfg = ovsrec_open_vswitch_first(ovs_idl);
+    if (!cfg) {
+        return;
+    }
+
+    const char *br_int_name = smap_get_def(&cfg->external_ids, "ovn-bridge",
+                                           DEFAULT_BRIDGE_NAME);
+
+    const struct ovsrec_bridge *br_int;
+    br_int = get_bridge(ovs_idl, br_int_name);
+    if (!br_int) {
+        /* If the integration bridge hasn't been defined, assume that
+         * any existing ct-zone definitions aren't valid. */
+        return;
+    }
+
+    struct smap_node *node;
+    SMAP_FOR_EACH(node, &br_int->external_ids) {
+        if (strncmp(node->key, "ct-zone-", 8)) {
+            continue;
+        }
+
+        const char *user = node->key + 8;
+        int zone = atoi(node->value);
+
+        if (user[0] && zone) {
+            VLOG_DBG("restoring ct zone %"PRId32" for '%s'", zone, user);
+            bitmap_set1(ct_zone_bitmap, zone);
+            simap_put(ct_zones, user, zone);
+        }
+    }
+}
+
 static int64_t
 get_nb_cfg(struct ovsdb_idl *idl)
 {
@@ -362,6 +475,7 @@  main(int argc, char *argv[])
     ovsdb_idl_add_column(ovs_idl_loop.idl, &ovsrec_bridge_col_name);
     ovsdb_idl_add_column(ovs_idl_loop.idl, &ovsrec_bridge_col_fail_mode);
     ovsdb_idl_add_column(ovs_idl_loop.idl, &ovsrec_bridge_col_other_config);
+    ovsdb_idl_add_column(ovs_idl_loop.idl, &ovsrec_bridge_col_external_ids);
     chassis_register_ovs_idl(ovs_idl_loop.idl);
     encaps_register_ovs_idl(ovs_idl_loop.idl);
     binding_register_ovs_idl(ovs_idl_loop.idl);
@@ -381,9 +495,11 @@  main(int argc, char *argv[])
 
     /* Initialize connection tracking zones. */
     struct simap ct_zones = SIMAP_INITIALIZER(&ct_zones);
+    struct shash pending_ct_zones = SHASH_INITIALIZER(&pending_ct_zones);
     unsigned long ct_zone_bitmap[BITMAP_N_LONGS(MAX_CT_ZONES)];
     memset(ct_zone_bitmap, 0, sizeof ct_zone_bitmap);
     bitmap_set1(ct_zone_bitmap, 0); /* Zone 0 is reserved. */
+    restore_ct_zones(ovs_idl_loop.idl, &ct_zones, ct_zone_bitmap);
     unixctl_command_register("ct-zone-list", "", 0, 0,
                              ct_zone_list, &ct_zones);
 
@@ -440,7 +556,8 @@  main(int argc, char *argv[])
 
             pinctrl_run(&ctx, &lports, br_int, chassis_id, &local_datapaths);
             update_ct_zones(&all_lports, &patched_datapaths, &ct_zones,
-                            ct_zone_bitmap);
+                            ct_zone_bitmap, &pending_ct_zones);
+            commit_ct_zones(&ctx, br_int, &pending_ct_zones);
 
             struct hmap flow_table = HMAP_INITIALIZER(&flow_table);
             lflow_run(&ctx, &lports, &mcgroups, &local_datapaths,
@@ -493,9 +610,20 @@  main(int argc, char *argv[])
             pinctrl_wait(&ctx);
         }
         ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
-        ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop);
         ovsdb_idl_track_clear(ovnsb_idl_loop.idl);
+
+        if (ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop) == 1) {
+            struct shash_node *iter, *iter_next;
+            SHASH_FOR_EACH_SAFE(iter, iter_next, &pending_ct_zones) {
+                struct ct_zone_pending_entry *ctzpe = iter->data;
+                if (ctzpe->state == CT_ZONE_DB_SENT) {
+                    shash_delete(&pending_ct_zones, iter);
+                    free(ctzpe);
+                }
+            }
+        }
         ovsdb_idl_track_clear(ovs_idl_loop.idl);
+
         poll_block();
         if (should_service_stop()) {
             exiting = true;