diff mbox series

[ovs-dev,v3,2/2] Measure performance of ovn-controller loop.

Message ID 20180209230034.742-3-mmichels@redhat.com
State Changes Requested
Headers show
Series Add Performance Measurement Library | expand

Commit Message

Mark Michelson Feb. 9, 2018, 11 p.m. UTC
This modifies ovn-controller to measure the amount of time it takes to
detect a change in the southbound database, generate the resulting flow
table, and write the flow table down to vswitchd.

The statistics can be queried using:

ovs-appctl -t ovn-controller performance/show ovn-controller-loop

Signed-off-by: Mark Michelson <mmichels@redhat.com>
---
 ovn/controller/ovn-controller.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

Comments

Han Zhou Feb. 10, 2018, 1:36 a.m. UTC | #1
Looks a great tool! Just some minor comments:

On Fri, Feb 9, 2018 at 3:00 PM, Mark Michelson <mmichels@redhat.com> wrote:
>
> This modifies ovn-controller to measure the amount of time it takes to
> detect a change in the southbound database, generate the resulting flow
> table, and write the flow table down to vswitchd.

The comment is a little inaccurate. The change cannot guarantee measurement
of "write the flow table down to vswitchd", since sending the flow-adding
messages to vswitchd could be performed in multiple iterations of the main
loop, depending on the buffering status of vconn_send(). If it can't be
completed in one round, the message sendings will continue in next loops in
ofctrl_run().

>
> The statistics can be queried using:
>
> ovs-appctl -t ovn-controller performance/show ovn-controller-loop
>
> Signed-off-by: Mark Michelson <mmichels@redhat.com>
> ---
>  ovn/controller/ovn-controller.c | 17 +++++++++++++++++
>  1 file changed, 17 insertions(+)
>
> diff --git a/ovn/controller/ovn-controller.c
b/ovn/controller/ovn-controller.c
> index 7592bda25..b9f8950d4 100644
> --- a/ovn/controller/ovn-controller.c
> +++ b/ovn/controller/ovn-controller.c
> @@ -57,6 +57,9 @@
>  #include "stream.h"
>  #include "unixctl.h"
>  #include "util.h"
> +#include "timeval.h"
> +#include "timer.h"
> +#include "performance.h"
>
>  VLOG_DEFINE_THIS_MODULE(main);
>
> @@ -67,6 +70,8 @@ static unixctl_cb_func inject_pkt;
>  #define DEFAULT_BRIDGE_NAME "br-int"
>  #define DEFAULT_PROBE_INTERVAL_MSEC 5000
>
> +#define CONTROLLER_LOOP_PERFORMANCE_NAME "ovn-controller-loop"
> +
>  static void update_probe_interval(struct controller_ctx *,
>                                    const char *ovnsb_remote);
>  static void parse_options(int argc, char *argv[]);
> @@ -639,8 +644,10 @@ main(int argc, char *argv[])
>      unixctl_command_register("inject-pkt", "MICROFLOW", 1, 1, inject_pkt,
>                               &pending_pkt);
>
> +    performance_create(CONTROLLER_LOOP_PERFORMANCE_NAME, PERF_MS);
>      /* Main loop. */
>      exiting = false;
> +    unsigned int our_seqno = 0;
>      while (!exiting) {
>          /* Check OVN SB database. */
>          char *new_ovnsb_remote = get_ovnsb_remote(ovs_idl_loop.idl);
> @@ -659,6 +666,12 @@ main(int argc, char *argv[])
>              .ovnsb_idl_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
>          };
>
> +        if (our_seqno != ovsdb_idl_get_seqno(ctx.ovnsb_idl)) {

Shall we measure when there is no SB change, too? E.g. the loop can be
triggered by local ovs-idl change, or by pin-ctrl messages, and these
changes will trigger all the flow computations, which contributes a
significant cost in ovn-controller, so I think we'd better measure them as
well. I am working on an incremental processing framework which would avoid
this kind of unnecessary recompute. I hope with this tool the improvements
can be measured accurately :)

> +            performance_start_sample(CONTROLLER_LOOP_PERFORMANCE_NAME,
> +                                     time_msec());
> +            our_seqno = ovsdb_idl_get_seqno(ctx.ovnsb_idl);
> +        }
> +
>          update_probe_interval(&ctx, ovnsb_remote);
>
>          update_ssl_config(ctx.ovs_idl);
> @@ -728,6 +741,9 @@ main(int argc, char *argv[])
>                      ofctrl_put(&flow_table, &pending_ct_zones,
>                                 get_nb_cfg(ctx.ovnsb_idl));
>
> +
 performance_end_sample(CONTROLLER_LOOP_PERFORMANCE_NAME,
> +                                           time_msec());
> +
>                      hmap_destroy(&flow_table);
>                  }
>                  if (ctx.ovnsb_idl_txn) {
> @@ -792,6 +808,7 @@ main(int argc, char *argv[])
>              ofctrl_wait();
>              pinctrl_wait(&ctx);
>          }
> +
>          ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
>
>          if (ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop) == 1) {
> --
> 2.13.6
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Acked-by: Han Zhan <zhouhan@gmail.com>
Mark Michelson Feb. 12, 2018, 2:16 p.m. UTC | #2
On 02/09/2018 07:36 PM, Han Zhou wrote:
> Looks a great tool! Just some minor comments:
> 
> On Fri, Feb 9, 2018 at 3:00 PM, Mark Michelson <mmichels@redhat.com 
> <mailto:mmichels@redhat.com>> wrote:
>  >
>  > This modifies ovn-controller to measure the amount of time it takes to
>  > detect a change in the southbound database, generate the resulting flow
>  > table, and write the flow table down to vswitchd.
> 
> The comment is a little inaccurate. The change cannot guarantee 
> measurement of "write the flow table down to vswitchd", since sending 
> the flow-adding messages to vswitchd could be performed in multiple 
> iterations of the main loop, depending on the buffering status of 
> vconn_send(). If it can't be completed in one round, the message 
> sendings will continue in next loops in ofctrl_run().

Thanks for the review Han. I attempted to account for this in the code 
by only ending the sample in the case that ofctrl_can_put() returns 
true. This way, we might start a measurement in one loop iteration, but 
the measurement might not end until a later iteration if 
ofctrl_can_put() returns false. Do you have suggestions for other 
factors that I should take into account for determining when to end a 
measurement?


> 
>  >
>  > The statistics can be queried using:
>  >
>  > ovs-appctl -t ovn-controller performance/show ovn-controller-loop
>  >
>  > Signed-off-by: Mark Michelson <mmichels@redhat.com 
> <mailto:mmichels@redhat.com>>
>  > ---
>  >  ovn/controller/ovn-controller.c | 17 +++++++++++++++++
>  >  1 file changed, 17 insertions(+)
>  >
>  > diff --git a/ovn/controller/ovn-controller.c 
> b/ovn/controller/ovn-controller.c
>  > index 7592bda25..b9f8950d4 100644
>  > --- a/ovn/controller/ovn-controller.c
>  > +++ b/ovn/controller/ovn-controller.c
>  > @@ -57,6 +57,9 @@
>  >  #include "stream.h"
>  >  #include "unixctl.h"
>  >  #include "util.h"
>  > +#include "timeval.h"
>  > +#include "timer.h"
>  > +#include "performance.h"
>  >
>  >  VLOG_DEFINE_THIS_MODULE(main);
>  >
>  > @@ -67,6 +70,8 @@ static unixctl_cb_func inject_pkt;
>  >  #define DEFAULT_BRIDGE_NAME "br-int"
>  >  #define DEFAULT_PROBE_INTERVAL_MSEC 5000
>  >
>  > +#define CONTROLLER_LOOP_PERFORMANCE_NAME "ovn-controller-loop"
>  > +
>  >  static void update_probe_interval(struct controller_ctx *,
>  >                                    const char *ovnsb_remote);
>  >  static void parse_options(int argc, char *argv[]);
>  > @@ -639,8 +644,10 @@ main(int argc, char *argv[])
>  >      unixctl_command_register("inject-pkt", "MICROFLOW", 1, 1, 
> inject_pkt,
>  >                               &pending_pkt);
>  >
>  > +    performance_create(CONTROLLER_LOOP_PERFORMANCE_NAME, PERF_MS);
>  >      /* Main loop. */
>  >      exiting = false;
>  > +    unsigned int our_seqno = 0;
>  >      while (!exiting) {
>  >          /* Check OVN SB database. */
>  >          char *new_ovnsb_remote = get_ovnsb_remote(ovs_idl_loop.idl);
>  > @@ -659,6 +666,12 @@ main(int argc, char *argv[])
>  >              .ovnsb_idl_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
>  >          };
>  >
>  > +        if (our_seqno != ovsdb_idl_get_seqno(ctx.ovnsb_idl)) {
> 
> Shall we measure when there is no SB change, too? E.g. the loop can be 
> triggered by local ovs-idl change, or by pin-ctrl messages, and these 
> changes will trigger all the flow computations, which contributes a 
> significant cost in ovn-controller, so I think we'd better measure them 
> as well. I am working on an incremental processing framework which would 
> avoid this kind of unnecessary recompute. I hope with this tool the 
> improvements can be measured accurately :)
> 
>  > +            performance_start_sample(CONTROLLER_LOOP_PERFORMANCE_NAME,
>  > +                                     time_msec());
>  > +            our_seqno = ovsdb_idl_get_seqno(ctx.ovnsb_idl);
>  > +        }
>  > +
>  >          update_probe_interval(&ctx, ovnsb_remote);
>  >
>  >          update_ssl_config(ctx.ovs_idl);
>  > @@ -728,6 +741,9 @@ main(int argc, char *argv[])
>  >                      ofctrl_put(&flow_table, &pending_ct_zones,
>  >                                 get_nb_cfg(ctx.ovnsb_idl));
>  >
>  > +                   
>   performance_end_sample(CONTROLLER_LOOP_PERFORMANCE_NAME,
>  > +                                           time_msec());
>  > +
>  >                      hmap_destroy(&flow_table);
>  >                  }
>  >                  if (ctx.ovnsb_idl_txn) {
>  > @@ -792,6 +808,7 @@ main(int argc, char *argv[])
>  >              ofctrl_wait();
>  >              pinctrl_wait(&ctx);
>  >          }
>  > +
>  >          ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
>  >
>  >          if (ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop) == 1) {
>  > --
>  > 2.13.6
>  >
>  > _______________________________________________
>  > dev mailing list
>  > dev@openvswitch.org <mailto:dev@openvswitch.org>
>  > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> 
> Acked-by: Han Zhan <zhouhan@gmail.com <mailto:zhouhan@gmail.com>>
Han Zhou Feb. 12, 2018, 6:58 p.m. UTC | #3
On Mon, Feb 12, 2018 at 6:16 AM, Mark Michelson <mmichels@redhat.com> wrote:
>
> On 02/09/2018 07:36 PM, Han Zhou wrote:
>>
>> Looks a great tool! Just some minor comments:
>>
>> On Fri, Feb 9, 2018 at 3:00 PM, Mark Michelson <mmichels@redhat.com
<mailto:mmichels@redhat.com>> wrote:
>>  >
>>  > This modifies ovn-controller to measure the amount of time it takes to
>>  > detect a change in the southbound database, generate the resulting
flow
>>  > table, and write the flow table down to vswitchd.
>>
>> The comment is a little inaccurate. The change cannot guarantee
measurement of "write the flow table down to vswitchd", since sending the
flow-adding messages to vswitchd could be performed in multiple iterations
of the main loop, depending on the buffering status of vconn_send(). If it
can't be completed in one round, the message sendings will continue in next
loops in ofctrl_run().
>
>
> Thanks for the review Han. I attempted to account for this in the code by
only ending the sample in the case that ofctrl_can_put() returns true. This
way, we might start a measurement in one loop iteration, but the
measurement might not end until a later iteration if ofctrl_can_put()
returns false. Do you have suggestions for other factors that I should take
into account for determining when to end a measurement?
>

I think one way to address this, if we really want to, is to compare
ofctrl_get_cur_cfg() with get_nb_cfg(ctx.ovnsb_idl). If they are equal, it
means ovs-vswitchd caught up with SB with all required flows installed.
I suggest to have separate measurements, one for main loop execution time,
one for SB change handling up until flows installed to OVS. They could be
separate patches. What do you think?

>
>>
>>  >
>>  > The statistics can be queried using:
>>  >
>>  > ovs-appctl -t ovn-controller performance/show ovn-controller-loop
>>  >
>>  > Signed-off-by: Mark Michelson <mmichels@redhat.com <mailto:
mmichels@redhat.com>>
>>
>>  > ---
>>  >  ovn/controller/ovn-controller.c | 17 +++++++++++++++++
>>  >  1 file changed, 17 insertions(+)
>>  >
>>  > diff --git a/ovn/controller/ovn-controller.c
b/ovn/controller/ovn-controller.c
>>  > index 7592bda25..b9f8950d4 100644
>>  > --- a/ovn/controller/ovn-controller.c
>>  > +++ b/ovn/controller/ovn-controller.c
>>  > @@ -57,6 +57,9 @@
>>  >  #include "stream.h"
>>  >  #include "unixctl.h"
>>  >  #include "util.h"
>>  > +#include "timeval.h"
>>  > +#include "timer.h"
>>  > +#include "performance.h"
>>  >
>>  >  VLOG_DEFINE_THIS_MODULE(main);
>>  >
>>  > @@ -67,6 +70,8 @@ static unixctl_cb_func inject_pkt;
>>  >  #define DEFAULT_BRIDGE_NAME "br-int"
>>  >  #define DEFAULT_PROBE_INTERVAL_MSEC 5000
>>  >
>>  > +#define CONTROLLER_LOOP_PERFORMANCE_NAME "ovn-controller-loop"
>>  > +
>>  >  static void update_probe_interval(struct controller_ctx *,
>>  >                                    const char *ovnsb_remote);
>>  >  static void parse_options(int argc, char *argv[]);
>>  > @@ -639,8 +644,10 @@ main(int argc, char *argv[])
>>  >      unixctl_command_register("inject-pkt", "MICROFLOW", 1, 1,
inject_pkt,
>>  >                               &pending_pkt);
>>  >
>>  > +    performance_create(CONTROLLER_LOOP_PERFORMANCE_NAME, PERF_MS);
>>  >      /* Main loop. */
>>  >      exiting = false;
>>  > +    unsigned int our_seqno = 0;
>>  >      while (!exiting) {
>>  >          /* Check OVN SB database. */
>>  >          char *new_ovnsb_remote = get_ovnsb_remote(ovs_idl_loop.idl);
>>  > @@ -659,6 +666,12 @@ main(int argc, char *argv[])
>>  >              .ovnsb_idl_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
>>  >          };
>>  >
>>  > +        if (our_seqno != ovsdb_idl_get_seqno(ctx.ovnsb_idl)) {
>>
>> Shall we measure when there is no SB change, too? E.g. the loop can be
triggered by local ovs-idl change, or by pin-ctrl messages, and these
changes will trigger all the flow computations, which contributes a
significant cost in ovn-controller, so I think we'd better measure them as
well. I am working on an incremental processing framework which would avoid
this kind of unnecessary recompute. I hope with this tool the improvements
can be measured accurately :)
>>
>>  > +
 performance_start_sample(CONTROLLER_LOOP_PERFORMANCE_NAME,
>>  > +                                     time_msec());
>>  > +            our_seqno = ovsdb_idl_get_seqno(ctx.ovnsb_idl);
>>  > +        }
>>  > +
>>  >          update_probe_interval(&ctx, ovnsb_remote);
>>  >
>>  >          update_ssl_config(ctx.ovs_idl);
>>  > @@ -728,6 +741,9 @@ main(int argc, char *argv[])
>>  >                      ofctrl_put(&flow_table, &pending_ct_zones,
>>  >                                 get_nb_cfg(ctx.ovnsb_idl));
>>  >
>>  > +
performance_end_sample(CONTROLLER_LOOP_PERFORMANCE_NAME,
>>  > +                                           time_msec());
>>  > +
>>  >                      hmap_destroy(&flow_table);
>>  >                  }
>>  >                  if (ctx.ovnsb_idl_txn) {
>>  > @@ -792,6 +808,7 @@ main(int argc, char *argv[])
>>  >              ofctrl_wait();
>>  >              pinctrl_wait(&ctx);
>>  >          }
>>  > +
>>  >          ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
>>  >
>>  >          if (ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop) == 1) {
>>  > --
>>  > 2.13.6
>>  >
>>  > _______________________________________________
>>  > dev mailing list
>>  > dev@openvswitch.org <mailto:dev@openvswitch.org>
>>  > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
>> Acked-by: Han Zhan <zhouhan@gmail.com <mailto:zhouhan@gmail.com>>
>
>
Mark Michelson Feb. 12, 2018, 7:16 p.m. UTC | #4
On 02/12/2018 12:58 PM, Han Zhou wrote:
> 
> 
> On Mon, Feb 12, 2018 at 6:16 AM, Mark Michelson <mmichels@redhat.com 
> <mailto:mmichels@redhat.com>> wrote:
>  >
>  > On 02/09/2018 07:36 PM, Han Zhou wrote:
>  >>
>  >> Looks a great tool! Just some minor comments:
>  >>
>  >> On Fri, Feb 9, 2018 at 3:00 PM, Mark Michelson <mmichels@redhat.com 
> <mailto:mmichels@redhat.com> <mailto:mmichels@redhat.com 
> <mailto:mmichels@redhat.com>>> wrote:
>  >>  >
>  >>  > This modifies ovn-controller to measure the amount of time it 
> takes to
>  >>  > detect a change in the southbound database, generate the 
> resulting flow
>  >>  > table, and write the flow table down to vswitchd.
>  >>
>  >> The comment is a little inaccurate. The change cannot guarantee 
> measurement of "write the flow table down to vswitchd", since sending 
> the flow-adding messages to vswitchd could be performed in multiple 
> iterations of the main loop, depending on the buffering status of 
> vconn_send(). If it can't be completed in one round, the message 
> sendings will continue in next loops in ofctrl_run().
>  >
>  >
>  > Thanks for the review Han. I attempted to account for this in the 
> code by only ending the sample in the case that ofctrl_can_put() returns 
> true. This way, we might start a measurement in one loop iteration, but 
> the measurement might not end until a later iteration if 
> ofctrl_can_put() returns false. Do you have suggestions for other 
> factors that I should take into account for determining when to end a 
> measurement?
>  >
> 
> I think one way to address this, if we really want to, is to compare 
> ofctrl_get_cur_cfg() with get_nb_cfg(ctx.ovnsb_idl). If they are equal, 
> it means ovs-vswitchd caught up with SB with all required flows installed.
> I suggest to have separate measurements, one for main loop execution 
> time, one for SB change handling up until flows installed to OVS. They 
> could be separate patches. What do you think?
> 

That sounds like a good approach. Thanks for the feedback!
diff mbox series

Patch

diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index 7592bda25..b9f8950d4 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -57,6 +57,9 @@ 
 #include "stream.h"
 #include "unixctl.h"
 #include "util.h"
+#include "timeval.h"
+#include "timer.h"
+#include "performance.h"
 
 VLOG_DEFINE_THIS_MODULE(main);
 
@@ -67,6 +70,8 @@  static unixctl_cb_func inject_pkt;
 #define DEFAULT_BRIDGE_NAME "br-int"
 #define DEFAULT_PROBE_INTERVAL_MSEC 5000
 
+#define CONTROLLER_LOOP_PERFORMANCE_NAME "ovn-controller-loop"
+
 static void update_probe_interval(struct controller_ctx *,
                                   const char *ovnsb_remote);
 static void parse_options(int argc, char *argv[]);
@@ -639,8 +644,10 @@  main(int argc, char *argv[])
     unixctl_command_register("inject-pkt", "MICROFLOW", 1, 1, inject_pkt,
                              &pending_pkt);
 
+    performance_create(CONTROLLER_LOOP_PERFORMANCE_NAME, PERF_MS);
     /* Main loop. */
     exiting = false;
+    unsigned int our_seqno = 0;
     while (!exiting) {
         /* Check OVN SB database. */
         char *new_ovnsb_remote = get_ovnsb_remote(ovs_idl_loop.idl);
@@ -659,6 +666,12 @@  main(int argc, char *argv[])
             .ovnsb_idl_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
         };
 
+        if (our_seqno != ovsdb_idl_get_seqno(ctx.ovnsb_idl)) {
+            performance_start_sample(CONTROLLER_LOOP_PERFORMANCE_NAME,
+                                     time_msec());
+            our_seqno = ovsdb_idl_get_seqno(ctx.ovnsb_idl);
+        }
+
         update_probe_interval(&ctx, ovnsb_remote);
 
         update_ssl_config(ctx.ovs_idl);
@@ -728,6 +741,9 @@  main(int argc, char *argv[])
                     ofctrl_put(&flow_table, &pending_ct_zones,
                                get_nb_cfg(ctx.ovnsb_idl));
 
+                    performance_end_sample(CONTROLLER_LOOP_PERFORMANCE_NAME,
+                                           time_msec());
+
                     hmap_destroy(&flow_table);
                 }
                 if (ctx.ovnsb_idl_txn) {
@@ -792,6 +808,7 @@  main(int argc, char *argv[])
             ofctrl_wait();
             pinctrl_wait(&ctx);
         }
+
         ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
 
         if (ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop) == 1) {