[ovs-dev,RFC,v3,08/10] ovn-controller: port-binding incremental processing for physical flows

Message ID 1523909665-27961-9-git-send-email-hzhou8@ebay.com
State Superseded
Headers show
Series
  • ovn-controller Incremental Processing
Related show

Commit Message

Han Zhou April 16, 2018, 8:14 p.m.
This patch implements change handler for port-binding in flow_output
for physical flows computing, so that physical flow computing will
be incremental.

This patch together with previous incremental processing engine
related changes supports incremental processing for lflow changes
and port-binding changes of lports on other HVs, which are the most
common scenarios in a cloud where workloads come up and down.

In ovn-scale-test env [1], the total execution time of creating and
binding 10k ports on 1k HVs with 40 lswitches and 8 lrouters
(5 lswitches/lrouter), decreased from 3h40m to 1h50m because of the
less CPU on HVs. The CPU time of ovn-controller for additional 500
lports creating and binding (on top of already existed 10k lports)
decreased 90% comparing with master. Latency for end-to-end operations
of one extra port on top of the 10k lports, start from port-creation
until all flows installation on all related HVs is also improved
significantly from 20.6s to 7.3s.

[1] https://github.com/openvswitch/ovn-scale-test

Signed-off-by: Han Zhou <hzhou8@ebay.com>
---
 ovn/controller/ovn-controller.c |  42 ++++++++++++-
 ovn/controller/physical.c       | 131 +++++++++++++++++++++++++++++-----------
 ovn/controller/physical.h       |   9 +++
 3 files changed, 147 insertions(+), 35 deletions(-)

Patch

diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index 8855296..c9598f3 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -903,6 +903,46 @@  flow_output_sb_logical_flow_handler(struct engine_node *node)
     return handled;
 }
 
+static bool
+flow_output_sb_port_binding_handler(struct engine_node *node)
+{
+    struct controller_ctx *ctx = (struct controller_ctx *)node->context;
+    struct ed_type_runtime_data *data =
+        (struct ed_type_runtime_data *)engine_get_input(
+                "runtime_data", node)->data;
+    struct hmap *local_datapaths = &data->local_datapaths;
+    struct sset *active_tunnels = &data->active_tunnels;
+    struct chassis_index *chassis_index = &data->chassis_index;
+    struct simap *ct_zones = &data->ct_zones;
+    const struct ovsrec_bridge *br_int = get_br_int(ctx);
+
+    const char *chassis_id = get_chassis_id(ctx->ovs_idl);
+
+
+    const struct sbrec_chassis *chassis = NULL;
+    if (chassis_id) {
+        chassis = get_chassis(ctx->ovnsb_idl, chassis_id);
+    }
+
+    ovs_assert(br_int && chassis);
+
+    // TODO: handle port-binding for lflow processing
+
+    struct ed_type_flow_output *fod =
+        (struct ed_type_flow_output *)node->data;
+    struct ovn_desired_flow_table *flow_table = &fod->flow_table;
+
+    enum mf_field_id mff_ovn_geneve = ofctrl_get_mf_field_id();
+    physical_handle_port_binding_changes(flow_table,
+                                         ctx, mff_ovn_geneve,
+                                         chassis, ct_zones,
+                                         local_datapaths,
+                                         chassis_index, active_tunnels);
+
+    node->changed = true;
+    return true;
+}
+
 int
 main(int argc, char *argv[])
 {
@@ -987,7 +1027,7 @@  main(int argc, char *argv[])
     engine_add_input(&en_flow_output, &en_sb_encap, NULL);
     engine_add_input(&en_flow_output, &en_sb_multicast_group, NULL);
     engine_add_input(&en_flow_output, &en_sb_datapath_binding, NULL);
-    engine_add_input(&en_flow_output, &en_sb_port_binding, NULL);
+    engine_add_input(&en_flow_output, &en_sb_port_binding, flow_output_sb_port_binding_handler);
     engine_add_input(&en_flow_output, &en_sb_mac_binding, NULL);
     engine_add_input(&en_flow_output, &en_sb_logical_flow, flow_output_sb_logical_flow_handler);
     engine_add_input(&en_flow_output, &en_sb_dhcp_options, NULL);
diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index 513d986..9fbcac0 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -360,7 +360,7 @@  consider_port_binding(struct ovn_desired_flow_table *flow_table,
         ofpact_finish_CLONE(ofpacts_p, &clone);
 
         ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, ofpacts_p, &binding->header_.uuid);
         return;
     }
 
@@ -428,7 +428,7 @@  consider_port_binding(struct ovn_desired_flow_table *flow_table,
         }
 
         ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, ofpacts_p, &binding->header_.uuid);
 
         goto out;
     }
@@ -571,7 +571,8 @@  consider_port_binding(struct ovn_desired_flow_table *flow_table,
         /* Resubmit to first logical ingress pipeline table. */
         put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts_p);
         ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG,
-                        tag ? 150 : 100, 0, &match, ofpacts_p, hc_uuid);
+                        tag ? 150 : 100, 0, &match, ofpacts_p,
+                        &binding->header_.uuid);
 
         if (!tag && (!strcmp(binding->type, "localnet")
                      || !strcmp(binding->type, "l2gateway"))) {
@@ -581,7 +582,8 @@  consider_port_binding(struct ovn_desired_flow_table *flow_table,
              * action. */
             ofpbuf_pull(ofpacts_p, ofpacts_orig_size);
             match_set_dl_tci_masked(&match, 0, htons(VLAN_CFI));
-            ofctrl_add_flow(flow_table, 0, 100, 0, &match, ofpacts_p, hc_uuid);
+            ofctrl_add_flow(flow_table, 0, 100, 0, &match, ofpacts_p,
+                            &binding->header_.uuid);
         }
 
         /* Table 65, Priority 100.
@@ -609,7 +611,7 @@  consider_port_binding(struct ovn_desired_flow_table *flow_table,
             ofpact_put_STRIP_VLAN(ofpacts_p);
         }
         ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, ofpacts_p, &binding->header_.uuid);
     } else if (!tun && !is_ha_remote) {
         /* Remote port connected by localnet port */
         /* Table 33, priority 100.
@@ -632,7 +634,7 @@  consider_port_binding(struct ovn_desired_flow_table *flow_table,
         /* Resubmit to table 33. */
         put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts_p);
         ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, ofpacts_p, &binding->header_.uuid);
     } else {
         /* Remote port connected by tunnel */
 
@@ -723,7 +725,7 @@  consider_port_binding(struct ovn_desired_flow_table *flow_table,
             ofpact_finish_BUNDLE(ofpacts_p, &bundle);
         }
         ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, ofpacts_p, &binding->header_.uuid);
     }
 out:
     if (gateway_chassis) {
@@ -737,9 +739,7 @@  consider_mc_group(struct ovn_desired_flow_table *flow_table,
                   const struct simap *ct_zones,
                   struct hmap *local_datapaths,
                   const struct sbrec_chassis *chassis,
-                  const struct sbrec_multicast_group *mc,
-                  struct ofpbuf *ofpacts_p,
-                  struct ofpbuf *remote_ofpacts_p)
+                  const struct sbrec_multicast_group *mc)
 {
     uint32_t dp_key = mc->datapath->tunnel_key;
     if (!get_local_datapath(local_datapaths, dp_key)) {
@@ -765,8 +765,10 @@  consider_mc_group(struct ovn_desired_flow_table *flow_table,
      *      would happen on every hypervisor in the multicast group,
      *      effectively duplicating the packet.)
      */
-    ofpbuf_clear(ofpacts_p);
-    ofpbuf_clear(remote_ofpacts_p);
+    struct ofpbuf ofpacts;
+    ofpbuf_init(&ofpacts, 0);
+    struct ofpbuf remote_ofpacts;
+    ofpbuf_init(&remote_ofpacts, 0);
     for (size_t i = 0; i < mc->n_ports; i++) {
         struct sbrec_port_binding *port = mc->ports[i];
 
@@ -780,20 +782,20 @@  consider_mc_group(struct ovn_desired_flow_table *flow_table,
 
         int zone_id = simap_get(ct_zones, port->logical_port);
         if (zone_id) {
-            put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, ofpacts_p);
+            put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, &ofpacts);
         }
 
         if (!strcmp(port->type, "patch")) {
             put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
-                     remote_ofpacts_p);
-            put_resubmit(OFTABLE_CHECK_LOOPBACK, remote_ofpacts_p);
+                     &remote_ofpacts);
+            put_resubmit(OFTABLE_CHECK_LOOPBACK, &remote_ofpacts);
         } else if (simap_contains(&localvif_to_ofport,
                            (port->parent_port && *port->parent_port)
                            ? port->parent_port : port->logical_port)
                    || (!strcmp(port->type, "l3gateway")
                        && port->chassis == chassis)) {
-            put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, ofpacts_p);
-            put_resubmit(OFTABLE_CHECK_LOOPBACK, ofpacts_p);
+            put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
+            put_resubmit(OFTABLE_CHECK_LOOPBACK, &ofpacts);
         } else if (port->chassis && !get_localnet_port(local_datapaths,
                                          mc->datapath->tunnel_key)) {
             /* Add remote chassis only when localnet port not exist,
@@ -808,14 +810,14 @@  consider_mc_group(struct ovn_desired_flow_table *flow_table,
      *
      * Handle output to the local logical ports in the multicast group, if
      * any. */
-    bool local_ports = ofpacts_p->size > 0;
+    bool local_ports = ofpacts.size > 0;
     if (local_ports) {
         /* Following delivery to local logical ports, restore the multicast
          * group as the logical output port. */
-        put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, ofpacts_p);
+        put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
 
         ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, &ofpacts, &mc->header_.uuid);
     }
 
     /* Table 32, priority 100.
@@ -823,12 +825,12 @@  consider_mc_group(struct ovn_desired_flow_table *flow_table,
      *
      * Handle output to the remote chassis in the multicast group, if
      * any. */
-    if (!sset_is_empty(&remote_chassis) || remote_ofpacts_p->size > 0) {
-        if (remote_ofpacts_p->size > 0) {
+    if (!sset_is_empty(&remote_chassis) || remote_ofpacts.size > 0) {
+        if (remote_ofpacts.size > 0) {
             /* Following delivery to logical patch ports, restore the
              * multicast group as the logical output port. */
             put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
-                     remote_ofpacts_p);
+                     &remote_ofpacts);
         }
 
         const char *chassis_name;
@@ -842,20 +844,22 @@  consider_mc_group(struct ovn_desired_flow_table *flow_table,
 
             if (!prev || tun->type != prev->type) {
                 put_encapsulation(mff_ovn_geneve, tun, mc->datapath,
-                                  mc->tunnel_key, remote_ofpacts_p);
+                                  mc->tunnel_key, &remote_ofpacts);
                 prev = tun;
             }
-            ofpact_put_OUTPUT(remote_ofpacts_p)->port = tun->ofport;
+            ofpact_put_OUTPUT(&remote_ofpacts)->port = tun->ofport;
         }
 
-        if (remote_ofpacts_p->size) {
+        if (remote_ofpacts.size) {
             if (local_ports) {
-                put_resubmit(OFTABLE_LOCAL_OUTPUT, remote_ofpacts_p);
+                put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ofpacts);
             }
             ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100, 0,
-                            &match, remote_ofpacts_p, hc_uuid);
+                            &match, &remote_ofpacts, &mc->header_.uuid);
         }
     }
+    ofpbuf_uninit(&ofpacts);
+    ofpbuf_uninit(&remote_ofpacts);
     sset_destroy(&remote_chassis);
 }
 
@@ -870,6 +874,68 @@  update_ofports(struct simap *old, struct simap *new)
     return changed;
 }
 
+static void
+reconsider_mc_group_for_pb(struct ovn_desired_flow_table *flow_table,
+                           struct controller_ctx *ctx,
+                           const struct sbrec_port_binding *pb,
+                           const char *mc_name,
+                           enum mf_field_id mff_ovn_geneve,
+                           const struct sbrec_chassis *chassis,
+                           const struct simap *ct_zones,
+                           struct hmap *local_datapaths)
+{
+    const struct sbrec_multicast_group *mc
+        = mcgroup_lookup_by_dp_name(ctx->ovnsb_idl, pb->datapath, mc_name);
+    if (mc) {
+        ofctrl_remove_flows(flow_table, &mc->header_.uuid);
+
+        consider_mc_group(flow_table, mff_ovn_geneve, ct_zones,
+                          local_datapaths, chassis, mc);
+    } else {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+        VLOG_WARN_RL(&rl, "MC group %s is not found in datapath: "UUID_FMT,
+                     mc_name, UUID_ARGS(&pb->datapath->header_.uuid));
+    }
+}
+
+void
+physical_handle_port_binding_changes(struct ovn_desired_flow_table *flow_table,
+                                     struct controller_ctx *ctx,
+                                     enum mf_field_id mff_ovn_geneve,
+                                     const struct sbrec_chassis *chassis,
+                                     const struct simap *ct_zones,
+                                     struct hmap *local_datapaths,
+                                     struct chassis_index *chassis_index,
+                                     struct sset *active_tunnels)
+{
+    const struct sbrec_port_binding *binding;
+    struct ofpbuf ofpacts;
+    ofpbuf_init(&ofpacts, 0);
+    SBREC_PORT_BINDING_FOR_EACH_TRACKED (binding, ctx->ovnsb_idl) {
+        if (sbrec_port_binding_is_deleted(binding)) {
+            ofctrl_remove_flows(flow_table, &binding->header_.uuid);
+        } else {
+            if (!sbrec_port_binding_is_new(binding)) {
+                ofctrl_remove_flows(flow_table, &binding->header_.uuid);
+
+                reconsider_mc_group_for_pb(flow_table,
+                                           ctx, binding, "_MC_flood",
+                                           mff_ovn_geneve, chassis,
+                                           ct_zones, local_datapaths);
+                reconsider_mc_group_for_pb(flow_table,
+                                           ctx, binding, "_MC_unknown",
+                                           mff_ovn_geneve, chassis,
+                                           ct_zones, local_datapaths);
+            }
+            consider_port_binding(flow_table, ctx, mff_ovn_geneve, ct_zones,
+                                  chassis_index, active_tunnels,
+                                  local_datapaths, binding, chassis,
+                                  &ofpacts);
+        }
+    }
+
+}
+
 void
 physical_run(struct ovn_desired_flow_table *flow_table,
              struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve,
@@ -993,6 +1059,7 @@  physical_run(struct ovn_desired_flow_table *flow_table,
     /* Capture changed or removed openflow ports. */
     physical_map_changed |= update_ofports(&localvif_to_ofport,
                                            &new_localvif_to_ofport);
+    // TODO: maybe this is not needed any more?
     if (physical_map_changed) {
         /* Reprocess logical flow table immediately. */
         poll_immediate_wake();
@@ -1013,8 +1080,6 @@  physical_run(struct ovn_desired_flow_table *flow_table,
 
     /* Handle output to multicast groups, in tables 32 and 33. */
     const struct sbrec_multicast_group *mc;
-    struct ofpbuf remote_ofpacts;
-    ofpbuf_init(&remote_ofpacts, 0);
     SBREC_MULTICAST_GROUP_FOR_EACH (mc, ctx->ovnsb_idl) {
         /* Table 32, priority 150.
          * =======================
@@ -1031,12 +1096,10 @@  physical_run(struct ovn_desired_flow_table *flow_table,
         ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 150, 0, &match,
                         &ofpacts, hc_uuid);
 
-        consider_mc_group(flow_table, mff_ovn_geneve, ct_zones, local_datapaths, chassis,
-                          mc, &ofpacts, &remote_ofpacts);
+        consider_mc_group(flow_table, mff_ovn_geneve, ct_zones,
+                          local_datapaths, chassis, mc);
     }
 
-    ofpbuf_uninit(&remote_ofpacts);
-
     /* Table 0, priority 100.
      * ======================
      *
diff --git a/ovn/controller/physical.h b/ovn/controller/physical.h
index ac2da2a..cb55d94 100644
--- a/ovn/controller/physical.h
+++ b/ovn/controller/physical.h
@@ -52,5 +52,14 @@  void physical_run(struct ovn_desired_flow_table *flow_table,
                   const struct sset *local_lports,
                   struct chassis_index *chassis_index,
                   struct sset *active_tunnels);
+void physical_handle_port_binding_changes(
+             struct ovn_desired_flow_table *flow_table,
+             struct controller_ctx *ctx,
+             enum mf_field_id mff_ovn_geneve,
+             const struct sbrec_chassis *chassis,
+             const struct simap *ct_zones,
+             struct hmap *local_datapaths,
+             struct chassis_index *chassis_index,
+             struct sset *active_tunnels);
 
 #endif /* ovn/physical.h */