[ovs-dev,v2,2/2] Measure performance of ovn-controller loop.

Message ID 20180112225800.31938-3-mmichels@redhat.com
State Changes Requested
Headers show
Series
  • Add performance measurement library
Related show

Commit Message

Mark Michelson Jan. 12, 2018, 10:58 p.m.
This modifies ovn-controller to measure the amount of time it takes to
detect a change in the southbound database, generate the resulting flow
table, and write the flow table down to vswitchd.

The statistics can be queried using:

ovs-appctl -t ovn-controller performance/show ovn-controller-loop

Signed-off-by: Mark Michelson <mmichels@redhat.com>
---
 ovn/controller/ovn-controller.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

Comments

Aaron Conole Jan. 18, 2018, 4:58 p.m. | #1
Mark Michelson <mmichels@redhat.com> writes:

> This modifies ovn-controller to measure the amount of time it takes to
> detect a change in the southbound database, generate the resulting flow
> table, and write the flow table down to vswitchd.
>
> The statistics can be queried using:
>
> ovs-appctl -t ovn-controller performance/show ovn-controller-loop
>
> Signed-off-by: Mark Michelson <mmichels@redhat.com>
> ---
>  ovn/controller/ovn-controller.c | 17 +++++++++++++++++
>  1 file changed, 17 insertions(+)
>
> diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
> index c286ccbca..f51d5660f 100644
> --- a/ovn/controller/ovn-controller.c
> +++ b/ovn/controller/ovn-controller.c
> @@ -56,6 +56,9 @@
>  #include "stream.h"
>  #include "unixctl.h"
>  #include "util.h"
> +#include "timeval.h"
> +#include "timer.h"
> +#include "performance.h"
>  
>  VLOG_DEFINE_THIS_MODULE(main);
>  
> @@ -66,6 +69,8 @@ static unixctl_cb_func inject_pkt;
>  #define DEFAULT_BRIDGE_NAME "br-int"
>  #define DEFAULT_PROBE_INTERVAL_MSEC 5000
>  
> +#define CONTROLLER_LOOP_PERFORMANCE_NAME "ovn-controller-loop"
> +
>  static void update_probe_interval(struct controller_ctx *,
>                                    const char *ovnsb_remote);
>  static void parse_options(int argc, char *argv[]);
> @@ -637,8 +642,10 @@ main(int argc, char *argv[])
>      unixctl_command_register("inject-pkt", "MICROFLOW", 1, 1, inject_pkt,
>                               &pending_pkt);
>  
> +    performance_create(CONTROLLER_LOOP_PERFORMANCE_NAME);
>      /* Main loop. */
>      exiting = false;
> +    unsigned int our_seqno = 0;
>      while (!exiting) {
>          /* Check OVN SB database. */
>          char *new_ovnsb_remote = get_ovnsb_remote(ovs_idl_loop.idl);
> @@ -657,6 +664,11 @@ main(int argc, char *argv[])
>              .ovnsb_idl_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
>          };
>  
> +        if (our_seqno != ovsdb_idl_get_seqno(ctx.ovnsb_idl) &&
> +                performance_start_sample(CONTROLLER_LOOP_PERFORMANCE_NAME)) {
> +            our_seqno = ovsdb_idl_get_seqno(ctx.ovnsb_idl);

I'm worried about this.  If some programming logic causes
performance_end_sample to be missed once, will the sequence number
updates ever take place?

I didn't look at ovn_controller (just looking at the diff in email), so
disregard if that's not actually a concern.

> +        }
> +
>          update_probe_interval(&ctx, ovnsb_remote);
>  
>          update_ssl_config(ctx.ovs_idl);
> @@ -726,6 +738,8 @@ main(int argc, char *argv[])
>                      ofctrl_put(&flow_table, &pending_ct_zones,
>                                 get_nb_cfg(ctx.ovnsb_idl));
>  
> +                    performance_end_sample(CONTROLLER_LOOP_PERFORMANCE_NAME);
> +
>                      hmap_destroy(&flow_table);
>                  }
>                  if (ctx.ovnsb_idl_txn) {
> @@ -790,6 +804,8 @@ main(int argc, char *argv[])
>              ofctrl_wait();
>              pinctrl_wait(&ctx);
>          }
> +
> +        performance_run();

Maybe the performance_init() should create a new ovs thread which makes
this call.  Then there's no need to add it in the main() functions.

Actually, if a new daemon is implemented or an existing daemon ends up
calling a function that does performance counting, without the periodic
call to performance_run(), I think memory will end up growing without
bounds, because no cull will happen.

>          ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
>  
>          if (ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop) == 1) {
> @@ -837,6 +853,7 @@ main(int argc, char *argv[])
>          poll_block();
>      }
>  
> +    performance_destroy("ovn-controller-loop");

See the comment in previous about using an atexit() handler.  Then you
can just iterate through the performance table and delete them all.

>      unixctl_server_destroy(unixctl);
>      lflow_destroy();
>      ofctrl_destroy();
Mark Michelson Jan. 18, 2018, 9:44 p.m. | #2
On 01/18/2018 10:58 AM, Aaron Conole wrote:
> Mark Michelson <mmichels@redhat.com> writes:
> 
>> This modifies ovn-controller to measure the amount of time it takes to
>> detect a change in the southbound database, generate the resulting flow
>> table, and write the flow table down to vswitchd.
>>
>> The statistics can be queried using:
>>
>> ovs-appctl -t ovn-controller performance/show ovn-controller-loop
>>
>> Signed-off-by: Mark Michelson <mmichels@redhat.com>
>> ---
>>   ovn/controller/ovn-controller.c | 17 +++++++++++++++++
>>   1 file changed, 17 insertions(+)
>>
>> diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
>> index c286ccbca..f51d5660f 100644
>> --- a/ovn/controller/ovn-controller.c
>> +++ b/ovn/controller/ovn-controller.c
>> @@ -56,6 +56,9 @@
>>   #include "stream.h"
>>   #include "unixctl.h"
>>   #include "util.h"
>> +#include "timeval.h"
>> +#include "timer.h"
>> +#include "performance.h"
>>   
>>   VLOG_DEFINE_THIS_MODULE(main);
>>   
>> @@ -66,6 +69,8 @@ static unixctl_cb_func inject_pkt;
>>   #define DEFAULT_BRIDGE_NAME "br-int"
>>   #define DEFAULT_PROBE_INTERVAL_MSEC 5000
>>   
>> +#define CONTROLLER_LOOP_PERFORMANCE_NAME "ovn-controller-loop"
>> +
>>   static void update_probe_interval(struct controller_ctx *,
>>                                     const char *ovnsb_remote);
>>   static void parse_options(int argc, char *argv[]);
>> @@ -637,8 +642,10 @@ main(int argc, char *argv[])
>>       unixctl_command_register("inject-pkt", "MICROFLOW", 1, 1, inject_pkt,
>>                                &pending_pkt);
>>   
>> +    performance_create(CONTROLLER_LOOP_PERFORMANCE_NAME);
>>       /* Main loop. */
>>       exiting = false;
>> +    unsigned int our_seqno = 0;
>>       while (!exiting) {
>>           /* Check OVN SB database. */
>>           char *new_ovnsb_remote = get_ovnsb_remote(ovs_idl_loop.idl);
>> @@ -657,6 +664,11 @@ main(int argc, char *argv[])
>>               .ovnsb_idl_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
>>           };
>>   
>> +        if (our_seqno != ovsdb_idl_get_seqno(ctx.ovnsb_idl) &&
>> +                performance_start_sample(CONTROLLER_LOOP_PERFORMANCE_NAME)) {
>> +            our_seqno = ovsdb_idl_get_seqno(ctx.ovnsb_idl);
> 
> I'm worried about this.  If some programming logic causes
> performance_end_sample to be missed once, will the sequence number
> updates ever take place?
> 
> I didn't look at ovn_controller (just looking at the diff in email), so
> disregard if that's not actually a concern.

It's actually not a concern here.

> 
>> +        }
>> +
>>           update_probe_interval(&ctx, ovnsb_remote);
>>   
>>           update_ssl_config(ctx.ovs_idl);
>> @@ -726,6 +738,8 @@ main(int argc, char *argv[])
>>                       ofctrl_put(&flow_table, &pending_ct_zones,
>>                                  get_nb_cfg(ctx.ovnsb_idl));
>>   
>> +                    performance_end_sample(CONTROLLER_LOOP_PERFORMANCE_NAME);
>> +
>>                       hmap_destroy(&flow_table);
>>                   }
>>                   if (ctx.ovnsb_idl_txn) {
>> @@ -790,6 +804,8 @@ main(int argc, char *argv[])
>>               ofctrl_wait();
>>               pinctrl_wait(&ctx);
>>           }
>> +
>> +        performance_run();
> 
> Maybe the performance_init() should create a new ovs thread which makes
> this call.  Then there's no need to add it in the main() functions.
> 
> Actually, if a new daemon is implemented or an existing daemon ends up
> calling a function that does performance counting, without the periodic
> call to performance_run(), I think memory will end up growing without
> bounds, because no cull will happen.

In my response to your review of patch 1, I suggested a threading model 
where a background thread is responsible for storing samples and 
calculating statistics. I think if I move to that model, then that 
background thread can be responsible for periodically calling 
performance_run(), and thus preventing this possibility from occurring.

> 
>>           ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
>>   
>>           if (ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop) == 1) {
>> @@ -837,6 +853,7 @@ main(int argc, char *argv[])
>>           poll_block();
>>       }
>>   
>> +    performance_destroy("ovn-controller-loop");
> 
> See the comment in previous about using an atexit() handler.  Then you
> can just iterate through the performance table and delete them all.

Yes, that is a very good idea.

> 
>>       unixctl_server_destroy(unixctl);
>>       lflow_destroy();
>>       ofctrl_destroy();

Patch

diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index c286ccbca..f51d5660f 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -56,6 +56,9 @@ 
 #include "stream.h"
 #include "unixctl.h"
 #include "util.h"
+#include "timeval.h"
+#include "timer.h"
+#include "performance.h"
 
 VLOG_DEFINE_THIS_MODULE(main);
 
@@ -66,6 +69,8 @@  static unixctl_cb_func inject_pkt;
 #define DEFAULT_BRIDGE_NAME "br-int"
 #define DEFAULT_PROBE_INTERVAL_MSEC 5000
 
+#define CONTROLLER_LOOP_PERFORMANCE_NAME "ovn-controller-loop"
+
 static void update_probe_interval(struct controller_ctx *,
                                   const char *ovnsb_remote);
 static void parse_options(int argc, char *argv[]);
@@ -637,8 +642,10 @@  main(int argc, char *argv[])
     unixctl_command_register("inject-pkt", "MICROFLOW", 1, 1, inject_pkt,
                              &pending_pkt);
 
+    performance_create(CONTROLLER_LOOP_PERFORMANCE_NAME);
     /* Main loop. */
     exiting = false;
+    unsigned int our_seqno = 0;
     while (!exiting) {
         /* Check OVN SB database. */
         char *new_ovnsb_remote = get_ovnsb_remote(ovs_idl_loop.idl);
@@ -657,6 +664,11 @@  main(int argc, char *argv[])
             .ovnsb_idl_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
         };
 
+        if (our_seqno != ovsdb_idl_get_seqno(ctx.ovnsb_idl) &&
+                performance_start_sample(CONTROLLER_LOOP_PERFORMANCE_NAME)) {
+            our_seqno = ovsdb_idl_get_seqno(ctx.ovnsb_idl);
+        }
+
         update_probe_interval(&ctx, ovnsb_remote);
 
         update_ssl_config(ctx.ovs_idl);
@@ -726,6 +738,8 @@  main(int argc, char *argv[])
                     ofctrl_put(&flow_table, &pending_ct_zones,
                                get_nb_cfg(ctx.ovnsb_idl));
 
+                    performance_end_sample(CONTROLLER_LOOP_PERFORMANCE_NAME);
+
                     hmap_destroy(&flow_table);
                 }
                 if (ctx.ovnsb_idl_txn) {
@@ -790,6 +804,8 @@  main(int argc, char *argv[])
             ofctrl_wait();
             pinctrl_wait(&ctx);
         }
+
+        performance_run();
         ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
 
         if (ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop) == 1) {
@@ -837,6 +853,7 @@  main(int argc, char *argv[])
         poll_block();
     }
 
+    performance_destroy("ovn-controller-loop");
     unixctl_server_destroy(unixctl);
     lflow_destroy();
     ofctrl_destroy();