[ovs-dev,4/4] ovn-controller: Flush conntrack entries for newly allocated zones.
diff mbox

Message ID 1474620783-96388-4-git-send-email-jpettit@ovn.org
State Accepted
Headers show

Commit Message

Justin Pettit Sept. 23, 2016, 8:53 a.m. UTC
Flush any existing conntrack entries for a zone when that zone is
allocated to a new logical port.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
---
 ovn/controller/ofctrl.c         | 76 ++++++++++++++++++++++++++++++++++++-----
 ovn/controller/ofctrl.h         |  8 +++--
 ovn/controller/ovn-controller.c | 25 ++++----------
 ovn/controller/ovn-controller.h | 15 ++++++++
 4 files changed, 95 insertions(+), 29 deletions(-)

Comments

Ben Pfaff Sept. 23, 2016, 5:14 p.m. UTC | #1
On Fri, Sep 23, 2016 at 01:53:03AM -0700, Justin Pettit wrote:
> Flush any existing conntrack entries for a zone when that zone is
> allocated to a new logical port.
> 
> Signed-off-by: Justin Pettit <jpettit@ovn.org>

Also would be nice to have a test.

Acked-by: Ben Pfaff <blp@ovn.org>
Justin Pettit Sept. 23, 2016, 7:23 p.m. UTC | #2
> On Sep 23, 2016, at 10:14 AM, Ben Pfaff <blp@ovn.org> wrote:
> 
> On Fri, Sep 23, 2016 at 01:53:03AM -0700, Justin Pettit wrote:
>> Flush any existing conntrack entries for a zone when that zone is
>> allocated to a new logical port.
>> 
>> Signed-off-by: Justin Pettit <jpettit@ovn.org>
> 
> Also would be nice to have a test.

Agreed.  I'll put adding unit tests on my to-do list.

> Acked-by: Ben Pfaff <blp@ovn.org>

Thanks for the quick turnaround.  I'll push this series to master and branch-2.6 in a few minutes.

--Justin

Patch
diff mbox

diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
index 6ea593b..1d8fbf3 100644
--- a/ovn/controller/ofctrl.c
+++ b/ovn/controller/ofctrl.c
@@ -170,7 +170,8 @@  run_S_NEW(void)
 
 static void
 recv_S_NEW(const struct ofp_header *oh OVS_UNUSED,
-           enum ofptype type OVS_UNUSED)
+           enum ofptype type OVS_UNUSED,
+           struct shash *pending_ct_zones OVS_UNUSED)
 {
     OVS_NOT_REACHED();
 }
@@ -256,7 +257,8 @@  process_tlv_table_reply(const struct ofputil_tlv_table_reply *reply)
 }
 
 static void
-recv_S_TLV_TABLE_REQUESTED(const struct ofp_header *oh, enum ofptype type)
+recv_S_TLV_TABLE_REQUESTED(const struct ofp_header *oh, enum ofptype type,
+                           struct shash *pending_ct_zones OVS_UNUSED)
 {
     if (oh->xid != xid) {
         ofctrl_recv(oh, type);
@@ -311,7 +313,8 @@  run_S_TLV_TABLE_MOD_SENT(void)
 }
 
 static void
-recv_S_TLV_TABLE_MOD_SENT(const struct ofp_header *oh, enum ofptype type)
+recv_S_TLV_TABLE_MOD_SENT(const struct ofp_header *oh, enum ofptype type,
+                          struct shash *pending_ct_zones OVS_UNUSED)
 {
     if (oh->xid != xid && oh->xid != xid2) {
         ofctrl_recv(oh, type);
@@ -390,7 +393,8 @@  run_S_CLEAR_FLOWS(void)
 }
 
 static void
-recv_S_CLEAR_FLOWS(const struct ofp_header *oh, enum ofptype type)
+recv_S_CLEAR_FLOWS(const struct ofp_header *oh, enum ofptype type,
+                   struct shash *pending_ct_zones OVS_UNUSED)
 {
     ofctrl_recv(oh, type);
 }
@@ -412,7 +416,8 @@  run_S_UPDATE_FLOWS(void)
 }
 
 static void
-recv_S_UPDATE_FLOWS(const struct ofp_header *oh, enum ofptype type)
+recv_S_UPDATE_FLOWS(const struct ofp_header *oh, enum ofptype type,
+                    struct shash *pending_ct_zones)
 {
     if (type == OFPTYPE_BARRIER_REPLY && !ovs_list_is_empty(&flow_updates)) {
         struct ofctrl_flow_update *fup = ofctrl_flow_update_from_list_node(
@@ -424,6 +429,17 @@  recv_S_UPDATE_FLOWS(const struct ofp_header *oh, enum ofptype type)
             ovs_list_remove(&fup->list_node);
             free(fup);
         }
+
+        /* If the barrier xid is associated with an outstanding conntrack
+         * flush, the flush succeeded.  Move the pending ct zone entry
+         * to the next stage. */
+        struct shash_node *iter;
+        SHASH_FOR_EACH(iter, pending_ct_zones) {
+            struct ct_zone_pending_entry *ctzpe = iter->data;
+            if (ctzpe->state == CT_ZONE_OF_SENT && ctzpe->of_xid == oh->xid) {
+                ctzpe->state = CT_ZONE_DB_QUEUED;
+            }
+        }
     } else {
         ofctrl_recv(oh, type);
     }
@@ -434,7 +450,7 @@  recv_S_UPDATE_FLOWS(const struct ofp_header *oh, enum ofptype type)
  * field for class OVN_GENEVE_CLASS, type OVN_GENEVE_TYPE.  If successful,
  * returns the MFF_* field ID for the option, otherwise returns 0. */
 enum mf_field_id
-ofctrl_run(const struct ovsrec_bridge *br_int)
+ofctrl_run(const struct ovsrec_bridge *br_int, struct shash *pending_ct_zones)
 {
     char *target = xasprintf("unix:%s/%s.mgmt", ovs_rundir(), br_int->name);
     if (strcmp(target, rconn_get_target(swconn))) {
@@ -451,6 +467,15 @@  ofctrl_run(const struct ovsrec_bridge *br_int)
     if (seqno != rconn_get_connection_seqno(swconn)) {
         seqno = rconn_get_connection_seqno(swconn);
         state = S_NEW;
+
+        /* Reset the state of any outstanding ct flushes to resend them. */
+        struct shash_node *iter;
+        SHASH_FOR_EACH(iter, pending_ct_zones) {
+            struct ct_zone_pending_entry *ctzpe = iter->data;
+            if (ctzpe->state == CT_ZONE_OF_SENT) {
+                ctzpe->state = CT_ZONE_OF_QUEUED;
+            }
+        }
     }
 
     bool progress = true;
@@ -475,7 +500,7 @@  ofctrl_run(const struct ovsrec_bridge *br_int)
             error = ofptype_decode(&type, oh);
             if (!error) {
                 switch (state) {
-#define STATE(NAME) case NAME: recv_##NAME(oh, type); break;
+#define STATE(NAME) case NAME: recv_##NAME(oh, type, pending_ct_zones); break;
                     STATES
 #undef STATE
                 default:
@@ -767,6 +792,16 @@  add_group_mod(const struct ofputil_group_mod *gm, struct ovs_list *msgs)
     ovs_list_push_back(msgs, &msg->list_node);
 }
 
+static void
+add_ct_flush_zone(uint16_t zone_id, struct ovs_list *msgs)
+{
+    struct ofpbuf *msg = ofpraw_alloc(OFPRAW_NXT_CT_FLUSH_ZONE,
+                                      rconn_get_version(swconn), 0);
+    struct nx_zone_id *nzi = ofpbuf_put_zeros(msg, sizeof *nzi);
+    nzi->zone_id = htons(zone_id);
+
+    ovs_list_push_back(msgs, &msg->list_node);
+}
 
 /* Replaces the flow table on the switch, if possible, by the flows added
  * with ofctrl_add_flow().
@@ -777,9 +812,14 @@  add_group_mod(const struct ofputil_group_mod *gm, struct ovs_list *msgs)
  * 'groups->desired_groups' and frees them. (The hmap itself isn't
  * destroyed.)
  *
+ * Sends conntrack flush messages to each zone in 'pending_ct_zones' that
+ * is in the CT_ZONE_OF_QUEUED state and then moves the zone into the
+ * CT_ZONE_OF_SENT state.
+ *
  * This should be called after ofctrl_run() within the main loop. */
 void
-ofctrl_put(struct hmap *flow_table, int64_t nb_cfg)
+ofctrl_put(struct hmap *flow_table, struct shash *pending_ct_zones,
+           int64_t nb_cfg)
 {
     /* The flow table can be updated if the connection to the switch is up and
      * in the correct state and not backlogged with existing flow_mods.  (Our
@@ -795,6 +835,17 @@  ofctrl_put(struct hmap *flow_table, int64_t nb_cfg)
     /* OpenFlow messages to send to the switch to bring it up-to-date. */
     struct ovs_list msgs = OVS_LIST_INITIALIZER(&msgs);
 
+    /* Iterate through ct zones that need to be flushed. */
+    struct shash_node *iter;
+    SHASH_FOR_EACH(iter, pending_ct_zones) {
+        struct ct_zone_pending_entry *ctzpe = iter->data;
+        if (ctzpe->state == CT_ZONE_OF_QUEUED) {
+            add_ct_flush_zone(ctzpe->zone, &msgs);
+            ctzpe->state = CT_ZONE_OF_SENT;
+            ctzpe->of_xid = 0;
+        }
+    }
+
     /* Iterate through all the desired groups. If there are new ones,
      * add them to the switch. */
     struct group_info *desired;
@@ -957,6 +1008,15 @@  ofctrl_put(struct hmap *flow_table, int64_t nb_cfg)
             queue_msg(msg);
         }
 
+        /* Store the barrier's xid with any newly sent ct flushes. */
+        struct shash_node *iter;
+        SHASH_FOR_EACH(iter, pending_ct_zones) {
+            struct ct_zone_pending_entry *ctzpe = iter->data;
+            if (ctzpe->state == CT_ZONE_OF_SENT && !ctzpe->of_xid) {
+                ctzpe->of_xid = xid;
+            }
+        }
+
         /* Track the flow update. */
         struct ofctrl_flow_update *fup, *prev;
         LIST_FOR_EACH_REVERSE_SAFE (fup, prev, list_node, &flow_updates) {
diff --git a/ovn/controller/ofctrl.h b/ovn/controller/ofctrl.h
index 5cd4128..5308b61 100644
--- a/ovn/controller/ofctrl.h
+++ b/ovn/controller/ofctrl.h
@@ -31,14 +31,18 @@  struct group_table;
 
 /* Interface for OVN main loop. */
 void ofctrl_init(struct group_table *group_table);
-enum mf_field_id ofctrl_run(const struct ovsrec_bridge *br_int);
-void ofctrl_put(struct hmap *flow_table, int64_t nb_cfg);
+enum mf_field_id ofctrl_run(const struct ovsrec_bridge *br_int,
+                            struct shash *pending_ct_zones);
+void ofctrl_put(struct hmap *flow_table, struct shash *pending_ct_zones,
+                int64_t nb_cfg);
 void ofctrl_wait(void);
 void ofctrl_destroy(void);
 int64_t ofctrl_get_cur_cfg(void);
 
 struct ovn_flow *ofctrl_dup_flow(struct ovn_flow *source);
 
+void ofctrl_ct_flush_zone(uint16_t zone_id);
+
 /* Flow table interfaces to the rest of ovn-controller. */
 void ofctrl_add_flow(struct hmap *desired_flows, uint8_t table_id,
                      uint16_t priority, const struct match *,
diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index b051a75..00392ca 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -229,17 +229,6 @@  get_ovnsb_remote(struct ovsdb_idl *ovs_idl)
     }
 }
 
-enum ct_zone_pending_state {
-    CT_ZONE_DB_QUEUED,    /* Waiting for DB transaction to open. */
-    CT_ZONE_DB_SENT,      /* Sent and waiting for confirmation from DB. */
-};
-
-struct ct_zone_pending_entry {
-    enum ct_zone_pending_state state;
-    int zone;
-    bool add;             /* Is the entry being added? */
-};
-
 static void
 update_ct_zones(struct sset *lports, struct hmap *patched_datapaths,
                 struct simap *ct_zones, unsigned long *ct_zone_bitmap,
@@ -276,7 +265,7 @@  update_ct_zones(struct sset *lports, struct hmap *patched_datapaths,
                      ct_zone->data, ct_zone->name);
 
             struct ct_zone_pending_entry *pending = xmalloc(sizeof *pending);
-            pending->state = CT_ZONE_DB_QUEUED;
+            pending->state = CT_ZONE_DB_QUEUED; /* Skip flushing zone. */
             pending->zone = ct_zone->data;
             pending->add = false;
             shash_add(pending_ct_zones, ct_zone->name, pending);
@@ -310,17 +299,13 @@  update_ct_zones(struct sset *lports, struct hmap *patched_datapaths,
         VLOG_DBG("assigning ct zone %"PRId32" to '%s'", zone, user);
 
         struct ct_zone_pending_entry *pending = xmalloc(sizeof *pending);
-        pending->state = CT_ZONE_DB_QUEUED;
+        pending->state = CT_ZONE_OF_QUEUED;
         pending->zone = zone;
         pending->add = true;
         shash_add(pending_ct_zones, user, pending);
 
         bitmap_set1(ct_zone_bitmap, zone);
         simap_put(ct_zones, user, zone);
-
-        /* xxx We should erase any old entries for this
-         * xxx zone, but we need a generic interface to the conntrack
-         * xxx table. */
     }
 
     sset_destroy(&all_users);
@@ -552,7 +537,8 @@  main(int argc, char *argv[])
             lport_index_init(&lports, ctx.ovnsb_idl);
             mcgroup_index_init(&mcgroups, ctx.ovnsb_idl);
 
-            enum mf_field_id mff_ovn_geneve = ofctrl_run(br_int);
+            enum mf_field_id mff_ovn_geneve = ofctrl_run(br_int,
+                                                         &pending_ct_zones);
 
             pinctrl_run(&ctx, &lports, br_int, chassis_id, &local_datapaths);
             update_ct_zones(&all_lports, &patched_datapaths, &ct_zones,
@@ -568,7 +554,8 @@  main(int argc, char *argv[])
                          br_int, chassis_id, &ct_zones, &flow_table,
                          &local_datapaths, &patched_datapaths);
 
-            ofctrl_put(&flow_table, get_nb_cfg(ctx.ovnsb_idl));
+            ofctrl_put(&flow_table, &pending_ct_zones,
+                       get_nb_cfg(ctx.ovnsb_idl));
             hmap_destroy(&flow_table);
             if (ctx.ovnsb_idl_txn) {
                 int64_t cur_cfg = ofctrl_get_cur_cfg();
diff --git a/ovn/controller/ovn-controller.h b/ovn/controller/ovn-controller.h
index c1e06ca..4dcf4e5 100644
--- a/ovn/controller/ovn-controller.h
+++ b/ovn/controller/ovn-controller.h
@@ -31,6 +31,21 @@  struct controller_ctx {
     struct ovsdb_idl_txn *ovs_idl_txn;
 };
 
+/* States to move through when a new conntrack zone has been allocated. */
+enum ct_zone_pending_state {
+    CT_ZONE_OF_QUEUED,    /* Waiting to send conntrack flush command. */
+    CT_ZONE_OF_SENT,      /* Sent and waiting for confirmation on flush. */
+    CT_ZONE_DB_QUEUED,    /* Waiting for DB transaction to open. */
+    CT_ZONE_DB_SENT,      /* Sent and waiting for confirmation from DB. */
+};
+
+struct ct_zone_pending_entry {
+    int zone;
+    bool add;             /* Is the entry being added? */
+    ovs_be32 of_xid;      /* Transaction id for barrier. */
+    enum ct_zone_pending_state state;
+};
+
 /* Contains hmap_node whose hash values are the tunnel_key of datapaths
  * with at least one local port binding. It also stores the port binding of
  * "localnet" port if such a port exists on the datapath, which indicates