diff mbox

[ovs-dev,v2,2/2] ovn-controller: Use separate thread for packet-in processing.

Message ID 1496853167-61206-4-git-send-email-zhouhan@gmail.com
State Deferred
Headers show

Commit Message

Han Zhou June 7, 2017, 4:32 p.m. UTC
This patch introduces multi-threading for ovn-controller and use
dedicated thread for packet-in processing as a start. It decouples
packet-in processing and ovs flow computing, so that packet-in inputs
won't trigger flow recomputing, and flow computing won't block
packet-in processing. In large scale environment this largely reduces
CPU cost and improves performance.

Related effort and discussion:
https://mail.openvswitch.org/pipermail/ovs-dev/2017-May/331813.html

Signed-off-by: Han Zhou <zhouhan@gmail.com>
---
 ovn/controller/ovn-controller.c | 43 ++++++++++++------
 ovn/controller/ovn-controller.h | 33 ++++++++++++++
 ovn/controller/pinctrl.c        | 96 +++++++++++++++++++++++++++++++++++++++++
 ovn/controller/pinctrl.h        |  1 +
 4 files changed, 161 insertions(+), 12 deletions(-)

Comments

Ben Pfaff July 10, 2017, 6:09 p.m. UTC | #1
On Wed, Jun 07, 2017 at 09:32:47AM -0700, Han Zhou wrote:
> This patch introduces multi-threading for ovn-controller and use
> dedicated thread for packet-in processing as a start. It decouples
> packet-in processing and ovs flow computing, so that packet-in inputs
> won't trigger flow recomputing, and flow computing won't block
> packet-in processing. In large scale environment this largely reduces
> CPU cost and improves performance.
> 
> Related effort and discussion:
> https://mail.openvswitch.org/pipermail/ovs-dev/2017-May/331813.html
> 
> Signed-off-by: Han Zhou <zhouhan@gmail.com>

Doesn't this still keep a second connection to the SB database, and a
second replica of its data, on every ovn-controller?  Maybe I'm missing
something.
Han Zhou July 10, 2017, 6:16 p.m. UTC | #2
On Mon, Jul 10, 2017 at 11:09 AM, Ben Pfaff <blp@ovn.org> wrote:
>
> On Wed, Jun 07, 2017 at 09:32:47AM -0700, Han Zhou wrote:
> > This patch introduces multi-threading for ovn-controller and use
> > dedicated thread for packet-in processing as a start. It decouples
> > packet-in processing and ovs flow computing, so that packet-in inputs
> > won't trigger flow recomputing, and flow computing won't block
> > packet-in processing. In large scale environment this largely reduces
> > CPU cost and improves performance.
> >
> > Related effort and discussion:
> > https://mail.openvswitch.org/pipermail/ovs-dev/2017-May/331813.html
> >
> > Signed-off-by: Han Zhou <zhouhan@gmail.com>
>
> Doesn't this still keep a second connection to the SB database, and a
> second replica of its data, on every ovn-controller?  Maybe I'm missing
> something.

Right, this is not changed yet as mentioned in the cover letter. (This
should be 3/3, and I can't remember why it was put 2/2 though).
I described my thoughts and proposals and I need your input to decide which
option to continue:

https://mail.openvswitch.org/pipermail/ovs-dev/2017-June/333927.html
diff mbox

Patch

diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index 5f21b20..89e0a3d 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -54,6 +54,8 @@ 
 #include "stream.h"
 #include "unixctl.h"
 #include "util.h"
+#include "latch.h"
+#include "ovs-thread.h"
 
 VLOG_DEFINE_THIS_MODULE(main);
 
@@ -64,8 +66,6 @@  static unixctl_cb_func inject_pkt;
 #define DEFAULT_BRIDGE_NAME "br-int"
 #define DEFAULT_PROBE_INTERVAL_MSEC 5000
 
-static void update_probe_interval(struct controller_ctx *,
-                                  const char *ovnsb_remote);
 static void parse_options(int argc, char *argv[]);
 OVS_NO_RETURN static void usage(void);
 
@@ -76,7 +76,7 @@  struct pending_pkt {
     char *flow_s;
 };
 
-static char *ovs_remote;
+char *ovs_remote;
 
 struct local_datapath *
 get_local_datapath(const struct hmap *local_datapaths, uint32_t tunnel_key)
@@ -127,7 +127,7 @@  get_bridge(struct ovsdb_idl *ovs_idl, const char *br_name)
     return NULL;
 }
 
-static void
+void
 update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
                    const struct sbrec_chassis *chassis,
                    const struct sset *local_ifaces,
@@ -243,7 +243,7 @@  create_br_int(struct controller_ctx *ctx,
     return bridge;
 }
 
-static const struct ovsrec_bridge *
+const struct ovsrec_bridge *
 get_br_int(struct controller_ctx *ctx, bool need_create)
 {
     const struct ovsrec_open_vswitch *cfg;
@@ -263,7 +263,7 @@  get_br_int(struct controller_ctx *ctx, bool need_create)
     return br;
 }
 
-static const char *
+const char *
 get_chassis_id(const struct ovsdb_idl *ovs_idl)
 {
     const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(ovs_idl);
@@ -303,7 +303,7 @@  update_ssl_config(const struct ovsdb_idl *ovs_idl)
 
 /* Retrieves the OVN Southbound remote location from the
  * "external-ids:ovn-remote" key in 'ovs_idl' and returns a copy of it. */
-static char *
+char *
 get_ovnsb_remote(struct ovsdb_idl *ovs_idl)
 {
     while (1) {
@@ -492,6 +492,22 @@  get_nb_cfg(struct ovsdb_idl *idl)
 }
 
 static void
+ctrl_thread_create(struct ctrl_thread *thread, const char *name,
+    void *(*start)(void *))
+{
+    latch_init(&thread->exit_latch);
+    thread->thread = ovs_thread_create(name, start, thread);
+}
+
+static void
+ctrl_thread_exit(struct ctrl_thread *thread)
+{
+    latch_set(&thread->exit_latch);
+    xpthread_join(thread->thread, NULL);
+    latch_destroy(&thread->exit_latch);
+}
+
+void
 ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
 {
     /* We do not monitor all tables by default, so modules must register
@@ -556,7 +572,6 @@  main(int argc, char *argv[])
     daemonize_complete();
 
     ofctrl_init(&group_table);
-    pinctrl_init();
     lflow_init();
 
     /* Connect to OVS OVSDB instance. */
@@ -587,6 +602,10 @@  main(int argc, char *argv[])
     unixctl_command_register("inject-pkt", "MICROFLOW", 1, 1, inject_pkt,
                              &pending_pkt);
 
+
+    struct ctrl_thread pinctrl_thread;
+    ctrl_thread_create(&pinctrl_thread, "pinctrl", pinctrl_thread_main);
+
     /* Main loop. */
     exiting = false;
     while (!exiting) {
@@ -648,7 +667,6 @@  main(int argc, char *argv[])
             enum mf_field_id mff_ovn_geneve = ofctrl_run(br_int,
                                                          &pending_ct_zones);
 
-            pinctrl_run(&ctx, &lports, br_int, chassis, &local_datapaths);
             update_ct_zones(&local_lports, &local_datapaths, &ct_zones,
                             ct_zone_bitmap, &pending_ct_zones);
             if (ctx.ovs_idl_txn) {
@@ -728,7 +746,6 @@  main(int argc, char *argv[])
 
         if (br_int) {
             ofctrl_wait();
-            pinctrl_wait(&ctx);
         }
         ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
 
@@ -777,10 +794,12 @@  main(int argc, char *argv[])
         poll_block();
     }
 
+    /* stop child controller threads */
+    ctrl_thread_exit(&pinctrl_thread);
+
     unixctl_server_destroy(unixctl);
     lflow_destroy();
     ofctrl_destroy();
-    pinctrl_destroy();
 
     simap_destroy(&ct_zones);
 
@@ -937,7 +956,7 @@  inject_pkt(struct unixctl_conn *conn, int argc OVS_UNUSED,
 
 /* Get the desired SB probe timer from the OVS database and configure it into
  * the SB database. */
-static void
+void
 update_probe_interval(struct controller_ctx *ctx, const char *ovnsb_remote)
 {
     const struct ovsrec_open_vswitch *cfg
diff --git a/ovn/controller/ovn-controller.h b/ovn/controller/ovn-controller.h
index 4bc0467..2fb3f93 100644
--- a/ovn/controller/ovn-controller.h
+++ b/ovn/controller/ovn-controller.h
@@ -18,7 +18,9 @@ 
 #define OVN_CONTROLLER_H 1
 
 #include "simap.h"
+#include "sset.h"
 #include "ovn/lib/ovn-sb-idl.h"
+#include "latch.h"
 
 /* Linux supports a maximum of 64K zones, which seems like a fine default. */
 #define MAX_CT_ZONES 65535
@@ -68,6 +70,13 @@  struct local_datapath {
     bool has_local_l3gateway;
 };
 
+struct ctrl_thread {
+    pthread_t thread;
+
+    /* Controls thread exit. */
+    struct latch exit_latch;
+};
+
 struct local_datapath *get_local_datapath(const struct hmap *,
                                           uint32_t tunnel_key);
 
@@ -87,5 +96,29 @@  enum chassis_tunnel_type {
 
 uint32_t get_tunnel_type(const char *name);
 
+/* Retrieves the OVN Southbound remote location from the
+ * "external-ids:ovn-remote" key in 'ovs_idl' and returns a copy of it. */
+char *get_ovnsb_remote(struct ovsdb_idl *ovs_idl);
+
+void
+update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
+                   const struct sbrec_chassis *chassis,
+                   const struct sset *local_ifaces,
+                   struct hmap *local_datapaths);
+
+/* Get the desired SB probe timer from the OVS database and configure it into
+ * the SB database. */
+void
+update_probe_interval(struct controller_ctx *ctx, const char *ovnsb_remote);
+
+const struct ovsrec_bridge *
+get_br_int(struct controller_ctx *ctx, bool need_create);
+
+const char *
+get_chassis_id(const struct ovsdb_idl *ovs_idl);
+
+void
+ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl);
 
+extern char *ovs_remote;
 #endif /* ovn/ovn-controller.h */
diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
index 660233a..0282654 100644
--- a/ovn/controller/pinctrl.c
+++ b/ovn/controller/pinctrl.c
@@ -46,6 +46,8 @@ 
 #include "socket-util.h"
 #include "timeval.h"
 #include "vswitch-idl.h"
+#include "latch.h"
+#include "binding.h"
 
 VLOG_DEFINE_THIS_MODULE(pinctrl);
 
@@ -81,6 +83,100 @@  static void reload_metadata(struct ofpbuf *ofpacts,
 
 COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
 
+void *
+pinctrl_thread_main(void *arg)
+{
+    struct ctrl_thread *thread = arg;
+    pinctrl_init();
+
+    struct ovsdb_idl_loop ovs_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
+        ovsdb_idl_create(ovs_remote, &ovsrec_idl_class, false, true));
+    ctrl_register_ovs_idl(ovs_idl_loop.idl);
+    ovsdb_idl_get_initial_snapshot(ovs_idl_loop.idl);
+
+    char *ovnsb_remote = get_ovnsb_remote(ovs_idl_loop.idl);
+    struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
+        ovsdb_idl_create(ovnsb_remote, &sbrec_idl_class, true, true));
+    ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
+    update_sb_monitors(ovnsb_idl_loop.idl, NULL, NULL, NULL);
+    ovsdb_idl_get_initial_snapshot(ovnsb_idl_loop.idl);
+
+    while (!latch_is_set(&thread->exit_latch)) {
+        /* Below logic is similar as in main loop in ovn-controller.c,
+         * while the purpose here is packet-in processing only */
+        char *new_ovnsb_remote = get_ovnsb_remote(ovs_idl_loop.idl);
+        if (strcmp(ovnsb_remote, new_ovnsb_remote)) {
+            free(ovnsb_remote);
+            ovnsb_remote = new_ovnsb_remote;
+            ovsdb_idl_set_remote(ovnsb_idl_loop.idl, ovnsb_remote, true);
+        } else {
+            free(new_ovnsb_remote);
+        }
+
+        struct controller_ctx ctx = {
+            .ovs_idl = ovs_idl_loop.idl,
+            .ovs_idl_txn = ovsdb_idl_loop_run(&ovs_idl_loop),
+            .ovnsb_idl = ovnsb_idl_loop.idl,
+            .ovnsb_idl_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
+        };
+
+        update_probe_interval(&ctx, ovnsb_remote);
+
+        struct hmap local_datapaths = HMAP_INITIALIZER(&local_datapaths);
+        struct sset local_lports = SSET_INITIALIZER(&local_lports);
+        const struct ovsrec_bridge *br_int = get_br_int(&ctx, false);
+        const char *chassis_id = get_chassis_id(ctx.ovs_idl);
+        struct ldatapath_index ldatapaths;
+        struct lport_index lports;
+        ldatapath_index_init(&ldatapaths, ctx.ovnsb_idl);
+        lport_index_init(&lports, ctx.ovnsb_idl);
+
+        const struct sbrec_chassis *chassis = NULL;
+        if (chassis_id) {
+            chassis = get_chassis(ctx.ovnsb_idl, chassis_id);
+            binding_run(&ctx, br_int, chassis, &ldatapaths, &lports,
+                        &local_datapaths, &local_lports, false);
+        }
+
+        if (br_int && chassis) {
+            pinctrl_run(&ctx, &lports, br_int, chassis, &local_datapaths);
+            update_sb_monitors(ctx.ovnsb_idl, chassis,
+                               &local_lports, &local_datapaths);
+        }
+
+        lport_index_destroy(&lports);
+        ldatapath_index_destroy(&ldatapaths);
+
+        sset_destroy(&local_lports);
+
+        struct local_datapath *cur_node, *next_node;
+        HMAP_FOR_EACH_SAFE (cur_node, next_node, hmap_node, &local_datapaths) {
+            hmap_remove(&local_datapaths, &cur_node->hmap_node);
+            free(cur_node);
+        }
+        hmap_destroy(&local_datapaths);
+
+        if (br_int) {
+            pinctrl_wait(&ctx);
+        }
+        ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
+        ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop);
+
+        latch_wait(&thread->exit_latch);
+        poll_block();
+    }
+
+    pinctrl_destroy();
+
+    ovsdb_idl_loop_destroy(&ovs_idl_loop);
+    ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
+
+    free(ovnsb_remote);
+
+    VLOG_INFO("pinctrl thread done");
+    return NULL;
+}
+
 void
 pinctrl_init(void)
 {
diff --git a/ovn/controller/pinctrl.h b/ovn/controller/pinctrl.h
index 230580b..573b536 100644
--- a/ovn/controller/pinctrl.h
+++ b/ovn/controller/pinctrl.h
@@ -31,6 +31,7 @@  void pinctrl_init(void);
 void pinctrl_run(struct controller_ctx *, const struct lport_index *,
                  const struct ovsrec_bridge *, const struct sbrec_chassis *,
                  struct hmap *local_datapaths);
+void *pinctrl_thread_main(void *arg);
 void pinctrl_wait(struct controller_ctx *);
 void pinctrl_destroy(void);