[ovs-dev,RFC,07/14] ovn-controller: port-binding incremental processing for physical flows

Message ID 1532480380-97578-8-git-send-email-hzhou8@ebay.com
State RFC
Headers show
Series
  • ovn-controller incremental processing.
Related show

Commit Message

Han Zhou July 25, 2018, 12:59 a.m.
This patch implements change handler for port-binding in flow_output
for physical flows computing, so that physical flow computing will
be incremental.

This patch together with previous incremental processing engine
related changes supports incremental processing for lflow changes
and port-binding changes of lports on other HVs, which are the most
common scenarios in a cloud where workloads come up and down.

In ovn-scale-test env [1], the total execution time of creating and
binding 10k ports on 1k HVs with 40 lswitches and 8 lrouters
(5 lswitches/lrouter), decreased from 3h40m to 1h50m because of the
less CPU on HVs. The CPU time of ovn-controller for additional 500
lports creating and binding (on top of already existed 10k lports)
decreased 90% comparing with master.

Latency for end-to-end operations of one extra port on top of the
10k lports, start from port-creation until all flows installation
on all related HVs is also improved significantly:

before: 20.6s in total
- lsp-add: 0.4s
- wait-until port up=true: 4.8s
- --wait=hv sync: 15.4s

after: 7.3s in total
- lsp-add: 0.4s
- wait-until port up=true: 4.0s
- --wait=hv sync: 2.9s

[1] https://github.com/openvswitch/ovn-scale-test

Signed-off-by: Han Zhou <hzhou8@ebay.com>
---
 ovn/controller/ovn-controller.c |  97 ++++++++++++++++++++++++++-
 ovn/controller/physical.c       | 142 +++++++++++++++++++++++++++++-----------
 ovn/controller/physical.h       |  11 ++++
 3 files changed, 212 insertions(+), 38 deletions(-)

Patch

diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index ed70c36..ff3cb08 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -1050,6 +1050,101 @@  flow_output_sb_logical_flow_handler(struct engine_node *node)
     return handled;
 }
 
+static bool
+flow_output_sb_port_binding_handler(struct engine_node *node)
+{
+    struct ed_type_runtime_data *data =
+        (struct ed_type_runtime_data *)engine_get_input(
+                "runtime_data", node)->data;
+    struct hmap *local_datapaths = &data->local_datapaths;
+    struct sset *active_tunnels = &data->active_tunnels;
+    struct simap *ct_zones = &data->ct_zones;
+
+    struct ovsrec_open_vswitch_table *ovs_table =
+        (struct ovsrec_open_vswitch_table *)EN_OVSDB_GET(
+            engine_get_input("OVS_open_vswitch", node));
+    struct ovsrec_bridge_table *bridge_table =
+        (struct ovsrec_bridge_table *)EN_OVSDB_GET(
+            engine_get_input("OVS_bridge", node));
+    const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
+    const char *chassis_id = get_chassis_id(ovs_table);
+
+    struct ovsdb_idl_index *sbrec_chassis_by_name =
+        engine_ovsdb_node_get_index(
+                engine_get_input("SB_chassis", node),
+                "name");
+    const struct sbrec_chassis *chassis = NULL;
+    if (chassis_id) {
+        chassis = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
+    }
+    ovs_assert(br_int && chassis);
+
+    struct ed_type_flow_output *fo =
+        (struct ed_type_flow_output *)node->data;
+    struct ovn_desired_flow_table *flow_table = &fo->flow_table;
+
+    struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath =
+        engine_ovsdb_node_get_index(
+                engine_get_input("SB_multicast_group", node),
+                "name_datapath");
+
+    struct ovsdb_idl_index *sbrec_port_binding_by_name =
+        engine_ovsdb_node_get_index(
+                engine_get_input("SB_port_binding", node),
+                "name");
+
+    struct sbrec_port_binding_table *port_binding_table =
+        (struct sbrec_port_binding_table *)EN_OVSDB_GET(
+            engine_get_input("SB_port_binding", node));
+
+    /* XXX: now we handles port-binding changes for physical flow processing
+     * only, but port-binding change can have impact to logical flow
+     * processing, too, in below circumstances:
+     *
+     *  - When a port-binding for a lport is inserted/deleted but the lflow
+     *    using that lport doesn't change.
+     *
+     *    This is likely to happen only when the lport name is used by ACL
+     *    match condition, which is specified by user. Even in that case, when
+     *    port is actually bound on the chassis it will trigger recompute on
+     *    that chassis since ovs interface is updated. So the only situation
+     *    this would have real impact is when user defines an ACL that includes
+     *    lport that is not the ingress/egress lport, e.g.:
+     *
+     *    to-lport 1000 'outport=="A" && inport=="B"' allow-related
+     *
+     *    If "B" is created and bound after the ACL is created, the ACL may not
+     *    take effect on the chassis where "A" is bound, until a recompute is
+     *    triggered there later.
+     *
+     *  - When is_chassis_resident is used in lflow. In this case the port
+     *    binding is patch type, since this condition is used only for lrouter
+     *    ports. In current "runtime_data" handling, port-binding changes of
+     *    patch ports always trigger recomputing. So it is fine even if we do
+     *    not handle it here.
+     *
+     *  - When a mac-binding doesn't change but the port-binding related to
+     *    that mac-binding is deleted. In this case the neighbor flow generated
+     *    for the mac-binding should be deleted. This would not cause any real
+     *    issue for now, since mac-binding change triggers recomputing.
+     *
+     * To address the above issues, we will need to maintain a mapping between
+     * lport names and the lflows that uses them, and reprocess the related
+     * lflows when a port-binding corresponding to a lport name changes.
+     */
+
+    enum mf_field_id mff_ovn_geneve = ofctrl_get_mf_field_id();
+    physical_handle_port_binding_changes(
+            sbrec_chassis_by_name, sbrec_port_binding_by_name,
+            sbrec_multicast_group_by_name_datapath,
+            port_binding_table, mff_ovn_geneve,
+            chassis, ct_zones, local_datapaths,
+            active_tunnels, flow_table);
+
+    node->changed = true;
+    return true;
+}
+
 int
 main(int argc, char *argv[])
 {
@@ -1147,7 +1242,7 @@  main(int argc, char *argv[])
     engine_add_input(&en_flow_output, &en_sb_encap, NULL);
     engine_add_input(&en_flow_output, &en_sb_multicast_group, NULL);
     engine_add_input(&en_flow_output, &en_sb_datapath_binding, NULL);
-    engine_add_input(&en_flow_output, &en_sb_port_binding, NULL);
+    engine_add_input(&en_flow_output, &en_sb_port_binding, flow_output_sb_port_binding_handler);
     engine_add_input(&en_flow_output, &en_sb_mac_binding, NULL);
     engine_add_input(&en_flow_output, &en_sb_logical_flow, flow_output_sb_logical_flow_handler);
     engine_add_input(&en_flow_output, &en_sb_dhcp_options, NULL);
diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index 34434a7..e2c7f54 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -360,7 +360,7 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_chassis_by_name,
         ofpact_finish_CLONE(ofpacts_p, &clone);
 
         ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, ofpacts_p, &binding->header_.uuid);
         return;
     }
 
@@ -429,7 +429,7 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_chassis_by_name,
         }
 
         ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, ofpacts_p, &binding->header_.uuid);
 
         goto out;
     }
@@ -572,7 +572,8 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_chassis_by_name,
         /* Resubmit to first logical ingress pipeline table. */
         put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts_p);
         ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG,
-                        tag ? 150 : 100, 0, &match, ofpacts_p, hc_uuid);
+                        tag ? 150 : 100, 0, &match, ofpacts_p,
+                        &binding->header_.uuid);
 
         if (!tag && (!strcmp(binding->type, "localnet")
                      || !strcmp(binding->type, "l2gateway"))) {
@@ -582,7 +583,8 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_chassis_by_name,
              * action. */
             ofpbuf_pull(ofpacts_p, ofpacts_orig_size);
             match_set_dl_tci_masked(&match, 0, htons(VLAN_CFI));
-            ofctrl_add_flow(flow_table, 0, 100, 0, &match, ofpacts_p, hc_uuid);
+            ofctrl_add_flow(flow_table, 0, 100, 0, &match, ofpacts_p,
+                            &binding->header_.uuid);
         }
 
         /* Table 65, Priority 100.
@@ -610,7 +612,7 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_chassis_by_name,
             ofpact_put_STRIP_VLAN(ofpacts_p);
         }
         ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, ofpacts_p, &binding->header_.uuid);
     } else if (!tun && !is_ha_remote) {
         /* Remote port connected by localnet port */
         /* Table 33, priority 100.
@@ -633,7 +635,7 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_chassis_by_name,
         /* Resubmit to table 33. */
         put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts_p);
         ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, ofpacts_p, &binding->header_.uuid);
     } else {
         /* Remote port connected by tunnel */
 
@@ -724,7 +726,7 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_chassis_by_name,
             ofpact_finish_BUNDLE(ofpacts_p, &bundle);
         }
         ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, ofpacts_p, &binding->header_.uuid);
     }
 out:
     if (gateway_chassis) {
@@ -738,8 +740,6 @@  consider_mc_group(enum mf_field_id mff_ovn_geneve,
                   const struct hmap *local_datapaths,
                   const struct sbrec_chassis *chassis,
                   const struct sbrec_multicast_group *mc,
-                  struct ofpbuf *ofpacts_p,
-                  struct ofpbuf *remote_ofpacts_p,
                   struct ovn_desired_flow_table *flow_table)
 {
     uint32_t dp_key = mc->datapath->tunnel_key;
@@ -759,15 +759,17 @@  consider_mc_group(enum mf_field_id mff_ovn_geneve,
      *    - For remote ports, add the chassis to 'remote_chassis'.
      *
      *    - For local ports (other than logical patch ports), add actions
-     *      to 'ofpacts_p' to set the output port and resubmit.
+     *      to 'ofpacts' to set the output port and resubmit.
      *
-     *    - For logical patch ports, add actions to 'remote_ofpacts_p'
+     *    - For logical patch ports, add actions to 'remote_ofpacts'
      *      instead.  (If we put them in 'ofpacts', then the output
      *      would happen on every hypervisor in the multicast group,
      *      effectively duplicating the packet.)
      */
-    ofpbuf_clear(ofpacts_p);
-    ofpbuf_clear(remote_ofpacts_p);
+    struct ofpbuf ofpacts;
+    ofpbuf_init(&ofpacts, 0);
+    struct ofpbuf remote_ofpacts;
+    ofpbuf_init(&remote_ofpacts, 0);
     for (size_t i = 0; i < mc->n_ports; i++) {
         struct sbrec_port_binding *port = mc->ports[i];
 
@@ -781,20 +783,20 @@  consider_mc_group(enum mf_field_id mff_ovn_geneve,
 
         int zone_id = simap_get(ct_zones, port->logical_port);
         if (zone_id) {
-            put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, ofpacts_p);
+            put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, &ofpacts);
         }
 
         if (!strcmp(port->type, "patch")) {
             put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
-                     remote_ofpacts_p);
-            put_resubmit(OFTABLE_CHECK_LOOPBACK, remote_ofpacts_p);
+                     &remote_ofpacts);
+            put_resubmit(OFTABLE_CHECK_LOOPBACK, &remote_ofpacts);
         } else if (simap_contains(&localvif_to_ofport,
                            (port->parent_port && *port->parent_port)
                            ? port->parent_port : port->logical_port)
                    || (!strcmp(port->type, "l3gateway")
                        && port->chassis == chassis)) {
-            put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, ofpacts_p);
-            put_resubmit(OFTABLE_CHECK_LOOPBACK, ofpacts_p);
+            put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
+            put_resubmit(OFTABLE_CHECK_LOOPBACK, &ofpacts);
         } else if (port->chassis && !get_localnet_port(local_datapaths,
                                          mc->datapath->tunnel_key)) {
             /* Add remote chassis only when localnet port not exist,
@@ -809,14 +811,14 @@  consider_mc_group(enum mf_field_id mff_ovn_geneve,
      *
      * Handle output to the local logical ports in the multicast group, if
      * any. */
-    bool local_ports = ofpacts_p->size > 0;
+    bool local_ports = ofpacts.size > 0;
     if (local_ports) {
         /* Following delivery to local logical ports, restore the multicast
          * group as the logical output port. */
-        put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, ofpacts_p);
+        put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
 
         ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, 0,
-                        &match, ofpacts_p, hc_uuid);
+                        &match, &ofpacts, &mc->header_.uuid);
     }
 
     /* Table 32, priority 100.
@@ -824,12 +826,12 @@  consider_mc_group(enum mf_field_id mff_ovn_geneve,
      *
      * Handle output to the remote chassis in the multicast group, if
      * any. */
-    if (!sset_is_empty(&remote_chassis) || remote_ofpacts_p->size > 0) {
-        if (remote_ofpacts_p->size > 0) {
+    if (!sset_is_empty(&remote_chassis) || remote_ofpacts.size > 0) {
+        if (remote_ofpacts.size > 0) {
             /* Following delivery to logical patch ports, restore the
              * multicast group as the logical output port. */
             put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
-                     remote_ofpacts_p);
+                     &remote_ofpacts);
         }
 
         const char *chassis_name;
@@ -843,20 +845,22 @@  consider_mc_group(enum mf_field_id mff_ovn_geneve,
 
             if (!prev || tun->type != prev->type) {
                 put_encapsulation(mff_ovn_geneve, tun, mc->datapath,
-                                  mc->tunnel_key, remote_ofpacts_p);
+                                  mc->tunnel_key, &remote_ofpacts);
                 prev = tun;
             }
-            ofpact_put_OUTPUT(remote_ofpacts_p)->port = tun->ofport;
+            ofpact_put_OUTPUT(&remote_ofpacts)->port = tun->ofport;
         }
 
-        if (remote_ofpacts_p->size) {
+        if (remote_ofpacts.size) {
             if (local_ports) {
-                put_resubmit(OFTABLE_LOCAL_OUTPUT, remote_ofpacts_p);
+                put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ofpacts);
             }
             ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100, 0,
-                            &match, remote_ofpacts_p, hc_uuid);
+                            &match, &remote_ofpacts, &mc->header_.uuid);
         }
     }
+    ofpbuf_uninit(&ofpacts);
+    ofpbuf_uninit(&remote_ofpacts);
     sset_destroy(&remote_chassis);
 }
 
@@ -871,6 +875,74 @@  update_ofports(struct simap *old, struct simap *new)
     return changed;
 }
 
+static void
+reconsider_mc_group_for_pb(
+        struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath,
+        enum mf_field_id mff_ovn_geneve,
+        const struct sbrec_chassis *chassis,
+        const struct simap *ct_zones,
+        const struct hmap *local_datapaths,
+        const struct sbrec_port_binding *pb,
+        const char *mc_name,
+        struct ovn_desired_flow_table *flow_table)
+{
+    const struct sbrec_multicast_group *mc
+        = mcgroup_lookup_by_dp_name(sbrec_multicast_group_by_name_datapath,
+                                    pb->datapath, mc_name);
+    if (mc) {
+        ofctrl_remove_flows(flow_table, &mc->header_.uuid);
+
+        consider_mc_group(mff_ovn_geneve, ct_zones, local_datapaths,
+                          chassis, mc, flow_table);
+    } else {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+        VLOG_DBG_RL(&rl, "MC group %s is not found in datapath: "UUID_FMT,
+                     mc_name, UUID_ARGS(&pb->datapath->header_.uuid));
+    }
+}
+
+void physical_handle_port_binding_changes(
+        struct ovsdb_idl_index *sbrec_chassis_by_name,
+        struct ovsdb_idl_index *sbrec_port_binding_by_name,
+        struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath,
+        const struct sbrec_port_binding_table *pb_table,
+        enum mf_field_id mff_ovn_geneve,
+        const struct sbrec_chassis *chassis,
+        const struct simap *ct_zones,
+        struct hmap *local_datapaths,
+        struct sset *active_tunnels,
+        struct ovn_desired_flow_table *flow_table)
+{
+    const struct sbrec_port_binding *binding;
+    struct ofpbuf ofpacts;
+    ofpbuf_init(&ofpacts, 0);
+    SBREC_PORT_BINDING_TABLE_FOR_EACH_TRACKED (binding, pb_table) {
+        if (sbrec_port_binding_is_deleted(binding)) {
+            ofctrl_remove_flows(flow_table, &binding->header_.uuid);
+        } else {
+            if (!sbrec_port_binding_is_new(binding)) {
+                ofctrl_remove_flows(flow_table, &binding->header_.uuid);
+
+                reconsider_mc_group_for_pb(sbrec_multicast_group_by_name_datapath,
+                                           mff_ovn_geneve, chassis,
+                                           ct_zones, local_datapaths,
+                                           binding, "_MC_flood", flow_table);
+                reconsider_mc_group_for_pb(sbrec_multicast_group_by_name_datapath,
+                                           mff_ovn_geneve, chassis,
+                                           ct_zones, local_datapaths,
+                                           binding, "_MC_unknown", flow_table);
+            }
+            consider_port_binding(sbrec_chassis_by_name,
+                                  sbrec_port_binding_by_name,
+                                  mff_ovn_geneve, ct_zones,
+                                  active_tunnels, local_datapaths,
+                                  binding, chassis,
+                                  flow_table, &ofpacts);
+        }
+    }
+
+}
+
 void
 physical_run(struct ovsdb_idl_index *sbrec_chassis_by_name,
              struct ovsdb_idl_index *sbrec_port_binding_by_name,
@@ -1011,22 +1083,18 @@  physical_run(struct ovsdb_idl_index *sbrec_chassis_by_name,
         consider_port_binding(sbrec_chassis_by_name,
                               sbrec_port_binding_by_name,
                               mff_ovn_geneve, ct_zones,
-                              active_tunnels,
-                              local_datapaths, binding, chassis,
+                              active_tunnels, local_datapaths,
+                              binding, chassis,
                               flow_table, &ofpacts);
     }
 
     /* Handle output to multicast groups, in tables 32 and 33. */
     const struct sbrec_multicast_group *mc;
-    struct ofpbuf remote_ofpacts;
-    ofpbuf_init(&remote_ofpacts, 0);
     SBREC_MULTICAST_GROUP_TABLE_FOR_EACH (mc, multicast_group_table) {
-        consider_mc_group(mff_ovn_geneve, ct_zones, local_datapaths, chassis,
-                          mc, &ofpacts, &remote_ofpacts, flow_table);
+        consider_mc_group(mff_ovn_geneve, ct_zones, local_datapaths,
+                          chassis, mc, flow_table);
     }
 
-    ofpbuf_uninit(&remote_ofpacts);
-
     /* Table 0, priority 100.
      * ======================
      *
diff --git a/ovn/controller/physical.h b/ovn/controller/physical.h
index 1dd2b9d..097e9f5 100644
--- a/ovn/controller/physical.h
+++ b/ovn/controller/physical.h
@@ -55,5 +55,16 @@  void physical_run(struct ovsdb_idl_index *sbrec_chassis_by_name,
                   const struct sset *local_lports,
                   const struct sset *active_tunnels,
                   struct ovn_desired_flow_table *);
+void physical_handle_port_binding_changes(
+        struct ovsdb_idl_index *sbrec_chassis_by_name,
+        struct ovsdb_idl_index *sbrec_port_binding_by_name,
+        struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath,
+        const struct sbrec_port_binding_table *,
+        enum mf_field_id mff_ovn_geneve,
+        const struct sbrec_chassis *,
+        const struct simap *ct_zones,
+        struct hmap *local_datapaths,
+        struct sset *active_tunnels,
+        struct ovn_desired_flow_table *);
 
 #endif /* ovn/physical.h */