diff mbox series

[ovs-dev,v4] controller: introduce stats counters for ovn-controller incremental processing

Message ID 1a4940b05c36d4d5783e59dfc4f3b9b4fa8de0cd.1613996071.git.lorenzo.bianconi@redhat.com
State Changes Requested
Headers show
Series [ovs-dev,v4] controller: introduce stats counters for ovn-controller incremental processing | expand

Commit Message

Lorenzo Bianconi Feb. 22, 2021, 12:15 p.m. UTC
Introduce inc-engine/stats ovs-applctl command in order to dump
ovn-controller incremental processing engine statistics. So far for each
node a counter for run, abort and engine_handler have been added.
Counters are incremented when the node move to "updated" state.
In order to dump I-P stats we can can use the following commands:

$ovs-appctl -t ovn-controller inc-engine/stats
SB_address_set
        run     1       abort   0       change-handler 0
addr_sets
        run     2       abort   1       change-handler 0
SB_port_group
        run     0       abort   0       change-handler 0
port_groups
        run     2       abort   0       change-handler 0
ofctrl_is_connected
        run     1       abort   0       change-handler 0
OVS_open_vswitch
        run     0       abort   0       change-handler 0
OVS_bridge
        run     0       abort   0       change-handler 0
OVS_qos
        run     0       abort   0       change-handler 0
SB_chassis
        run     1       abort   0       change-handler 0
....

flow_output
        run     2       abort   0       change-handler 34

Morover introduce the inc-engine/stats-clear command to reset engine
statistics

$ovs-appctl -t ovn-controller inc-engine/stats-clear

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
Changes since v3:
- drop engine_set_note_update_from_run/engine_set_note_update_from_handler
  macros and move stats code in lib/inc-proc-eng.c
- fix commit log
Changes since v2:
- introduce inc-engine/stats and inc-engine/stats-clear commands and drop
  COVERAGE_* dependency
Changes since v1:
- drop handler counters and add global abort counter
- improve documentation and naming scheme
- introduce engine_set_node_updated utility macro
---
 controller/ovn-controller.c |  4 ++++
 lib/inc-proc-eng.c          | 35 +++++++++++++++++++++++++++++++++++
 lib/inc-proc-eng.h          | 18 ++++++++++++++++++
 3 files changed, 57 insertions(+)

Comments

Mark Gray Feb. 22, 2021, 1:48 p.m. UTC | #1
On 22/02/2021 12:15, Lorenzo Bianconi wrote:

Thanks Lorenzo, I think the change to unixctl commands looks good. Some
more comments below.

> Introduce inc-engine/stats ovs-applctl command in order to dump

nit: inc-engine/show-stats or clear-stats?

and

s/applctl/appctl

> ovn-controller incremental processing engine statistics. So far for each
> node a counter for run, abort and engine_handler have been added.
> Counters are incremented when the node move to "updated" state.
> In order to dump I-P stats we can can use the following commands:
> 
> $ovs-appctl -t ovn-controller inc-engine/stats
> SB_address_set
>         run     1       abort   0       change-handler 0
> addr_sets
>         run     2       abort   1       change-handler 0
> SB_port_group
>         run     0       abort   0       change-handler 0
> port_groups
>         run     2       abort   0       change-handler 0
> ofctrl_is_connected
>         run     1       abort   0       change-handler 0
> OVS_open_vswitch
>         run     0       abort   0       change-handler 0
> OVS_bridge
>         run     0       abort   0       change-handler 0
> OVS_qos
>         run     0       abort   0       change-handler 0
> SB_chassis
>         run     1       abort   0       change-handler 0
> ....
> 
> flow_output
>         run     2       abort   0       change-handler 34
> 
> Morover introduce the inc-engine/stats-clear command to reset engine

s/morover/moreover
> statistics
> 
> $ovs-appctl -t ovn-controller inc-engine/stats-clear
> 
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> ---
> Changes since v3:
> - drop engine_set_note_update_from_run/engine_set_note_update_from_handler
>   macros and move stats code in lib/inc-proc-eng.c
> - fix commit log
> Changes since v2:
> - introduce inc-engine/stats and inc-engine/stats-clear commands and drop
>   COVERAGE_* dependency
> Changes since v1:
> - drop handler counters and add global abort counter
> - improve documentation and naming scheme
> - introduce engine_set_node_updated utility macro
> ---
>  controller/ovn-controller.c |  4 ++++
>  lib/inc-proc-eng.c          | 35 +++++++++++++++++++++++++++++++++++
>  lib/inc-proc-eng.h          | 18 ++++++++++++++++++
>  3 files changed, 57 insertions(+)
> 
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 5dd643f52..52e7b1932 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -2710,6 +2710,10 @@ main(int argc, char *argv[])
>  
>      unixctl_command_register("recompute", "", 0, 0, engine_recompute_cmd,
>                               NULL);
> +    unixctl_command_register("inc-engine/stats", "", 0, 0, engine_dump_stats,
> +                             NULL);
> +    unixctl_command_register("inc-engine/stats-clear", "", 0, 0,
> +                             engine_clear_stats, NULL);
>      unixctl_command_register("lflow-cache/flush", "", 0, 0,
>                               lflow_cache_flush_cmd,
>                               &flow_output_data->pd);

Do we have to call this here in ovn-controller.c? Can we not call them
in inc-proc-eng.c. We should try to decouple this entirely from
ovn-controller.c. I think we can implement nearly the entire
functionality in inc-proc-eng.c
> diff --git a/lib/inc-proc-eng.c b/lib/inc-proc-eng.c
> index 916dbbe39..facd59e5b 100644
> --- a/lib/inc-proc-eng.c
> +++ b/lib/inc-proc-eng.c
> @@ -283,11 +283,13 @@ engine_recompute(struct engine_node *node, bool forced, bool allowed)
>      if (!allowed) {
>          VLOG_DBG("node: %s, recompute aborted", node->name);
>          engine_set_node_state(node, EN_ABORTED);
> +        node->stats.abort++;

We could move this to engine_run() doing something like this:

    for (size_t i = 0; i < engine_n_nodes; i++) {
        engine_run_node(engine_nodes[i], recompute_allowed);

        switch(engine_node[i]->state) {
            case EN_ABORTED:
                engine_node[i]->stats.abort++;
                break;
            case EN_UPDATED: // <- we could add other states
                engine_node[i]->stats.updated++;
                break;
            case EN_UNCHANGED: // <- we could add other states
                engine_node[i]->stats.unchanged++;
                break;
            default:
                /* Should not reach here */
                ovs_assert();
            }
        }

        if (engine_nodes[i]->state == EN_ABORTED) {
            engine_run_aborted = true;
            return;

The reason that I suggest this is that, at this point (i think), we have
finished processing the node for this run so we are logging the absolute
final state. Also, i think it is easy to count other states (as above)

What do you think?
>          return;
>      }
>  
>      /* Run the node handler which might change state. */
>      node->run(node, node->data);
> +    node->stats.run++;


>  }
>  
>  /* Return true if the node could be computed, false otherwise. */
> @@ -310,6 +312,7 @@ engine_compute(struct engine_node *node, bool recompute_allowed)
>                  engine_recompute(node, false, recompute_allowed);
>                  return (node->state != EN_ABORTED);
>              }
> +            node->stats.change_handler++;

This will increase the 'change_handler' counter for node X each time the
change_handler() for each of node X's input nodes is called. Therefore,
if Node X has 10 input nodes (with change handlers) then change_handler
would increment by 10 at each attempted "compute" of Node X. I think
this should be moved to the start of the function which would count the
number of times we attempt to "compute" the node.

If we do this, we could change the name of the counter to "compute" as
this is the current term in inc-proc-eng.c for an incremental compute of
the node.


>          }
>      }
>      return true;
> @@ -401,3 +404,35 @@ engine_need_run(void)
>      }
>      return false;
>  }
> +
> +void
> +engine_clear_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
> +                   const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED)
> +{
> +    for (size_t i = 0; i < engine_n_nodes; i++) {
> +        struct engine_node *node = engine_nodes[i];
> +
> +        memset(&node->stats, 0, sizeof(node->stats));
> +    }
> +    unixctl_command_reply(conn, NULL);
> +}
> +
> +void
> +engine_dump_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
> +                  const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED)
> +{
> +    struct ds dump = DS_EMPTY_INITIALIZER;
> +
> +    for (size_t i = 0; i < engine_n_nodes; i++) {
> +        struct engine_node *node = engine_nodes[i];
> +
> +        ds_put_format(&dump, "%s\n", node->name);
> +        ds_put_format(&dump, "\trun\t%lu", node->stats.run);
> +        ds_put_format(&dump, "\tabort\t%lu", node->stats.abort);
> +        ds_put_format(&dump, "\tchange-handler\t%lu\n",
> +                      node->stats.change_handler);
> +    }
> +    unixctl_command_reply(conn, ds_cstr(&dump));
> +
> +    ds_destroy(&dump);
> +}
> diff --git a/lib/inc-proc-eng.h b/lib/inc-proc-eng.h
> index 857234677..bc8744a0d 100644
> --- a/lib/inc-proc-eng.h
> +++ b/lib/inc-proc-eng.h
> @@ -60,6 +60,8 @@
>   * against all its inputs.
>   */
>  
> +#include "unixctl.h"
> +

We could move this from inc-proc-eng.h to inc-proc-eng.c if we move the
calls to unixctl_command_register() into inc-proc-eng.c

>  #define ENGINE_MAX_INPUT 256
>  #define ENGINE_MAX_OVSDB_INDEX 256
>  
> @@ -107,6 +109,12 @@ enum engine_node_state {
>      EN_STATE_MAX,
>  };
>  
> +struct engine_stats {
> +    unsigned long run;
> +    unsigned long abort;
> +    unsigned long change_handler;
> +};
> +
>  struct engine_node {
>      /* A unique name for each node. */
>      char *name;
> @@ -154,6 +162,9 @@ struct engine_node {
>      /* Method to clear up tracked data maintained by the engine node in the
>       * engine 'data'. It may be NULL. */
>      void (*clear_tracked_data)(void *tracked_data);
> +
> +    /* Engine stats */
> +    struct engine_stats stats;
>  };
>  
>  /* Initialize the data for the engine nodes. It calls each node's
> @@ -312,6 +323,7 @@ en_##DB_NAME##_##TBL_NAME##_run(struct engine_node *node, \
>          EN_OVSDB_GET(node); \
>      if (DB_NAME##rec_##TBL_NAME##_table_track_get_first(table)) { \
>          engine_set_node_state(node, EN_UPDATED); \
> +        node->stats.run++; \

Is this is just counting when the node state is set to updated? If so,
the suggested code above should cover that and I think the counter name
should be change to 'updated'.

If it is supposed to measure the number of times the run function is
called, then I suggest adding this after each invocation of node->run()
in inc-proc-eng.c and changing the name of it to "recompute" as this is
the current term in inc-proc-eng.c for a full recompute of the node. My
reason for this is we should not be calling node->run() directly outside
of inc-proc-eng.c.
>          return; \
>      } \
>      engine_set_node_state(node, EN_UNCHANGED); \
> @@ -352,4 +364,10 @@ static void en_##DB_NAME##_##TBL_NAME##_cleanup(void *data OVS_UNUSED) \
>  #define ENGINE_NODE_OVS(TBL_NAME, TBL_NAME_STR) \
>      ENGINE_NODE_OVSDB(ovs, "OVS", TBL_NAME, TBL_NAME_STR);
>  
> +
> +void engine_dump_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
> +                       const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED);
> +void engine_clear_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
> +                        const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED);
> +

These do not need to be defined in the inc-proc-eng.h file as they are
not required outside of inc-proc-eng.c. You could move them to
inc-proc-eng.c.
>  #endif /* lib/inc-proc-eng.h */
>
Lorenzo Bianconi Feb. 23, 2021, 2:12 p.m. UTC | #2
> On 22/02/2021 12:15, Lorenzo Bianconi wrote:
> 
> Thanks Lorenzo, I think the change to unixctl commands looks good. Some
> more comments below.

Hi Mark,

thx for the review

> 
> > Introduce inc-engine/stats ovs-applctl command in order to dump
> 
> nit: inc-engine/show-stats or clear-stats?
> 
> and
> 
> s/applctl/appctl

ack, I will fix them in v5

> 
> > ovn-controller incremental processing engine statistics. So far for each
> > node a counter for run, abort and engine_handler have been added.
> > Counters are incremented when the node move to "updated" state.
> > In order to dump I-P stats we can can use the following commands:
> > 
> > $ovs-appctl -t ovn-controller inc-engine/stats
> > SB_address_set
> >         run     1       abort   0       change-handler 0
> > addr_sets
> >         run     2       abort   1       change-handler 0
> > SB_port_group
> >         run     0       abort   0       change-handler 0
> > port_groups
> >         run     2       abort   0       change-handler 0
> > ofctrl_is_connected
> >         run     1       abort   0       change-handler 0
> > OVS_open_vswitch
> >         run     0       abort   0       change-handler 0
> > OVS_bridge
> >         run     0       abort   0       change-handler 0
> > OVS_qos
> >         run     0       abort   0       change-handler 0
> > SB_chassis
> >         run     1       abort   0       change-handler 0
> > ....
> > 
> > flow_output
> >         run     2       abort   0       change-handler 34
> > 
> > Morover introduce the inc-engine/stats-clear command to reset engine
> 
> s/morover/moreover

ack, I will fix it in v5

> > statistics
> > 
> > $ovs-appctl -t ovn-controller inc-engine/stats-clear
> > 
> > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> > ---
> > Changes since v3:
> > - drop engine_set_note_update_from_run/engine_set_note_update_from_handler
> >   macros and move stats code in lib/inc-proc-eng.c
> > - fix commit log
> > Changes since v2:
> > - introduce inc-engine/stats and inc-engine/stats-clear commands and drop
> >   COVERAGE_* dependency
> > Changes since v1:
> > - drop handler counters and add global abort counter
> > - improve documentation and naming scheme
> > - introduce engine_set_node_updated utility macro
> > ---
> >  controller/ovn-controller.c |  4 ++++
> >  lib/inc-proc-eng.c          | 35 +++++++++++++++++++++++++++++++++++
> >  lib/inc-proc-eng.h          | 18 ++++++++++++++++++
> >  3 files changed, 57 insertions(+)
> > 
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index 5dd643f52..52e7b1932 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -2710,6 +2710,10 @@ main(int argc, char *argv[])
> >  
> >      unixctl_command_register("recompute", "", 0, 0, engine_recompute_cmd,
> >                               NULL);
> > +    unixctl_command_register("inc-engine/stats", "", 0, 0, engine_dump_stats,
> > +                             NULL);
> > +    unixctl_command_register("inc-engine/stats-clear", "", 0, 0,
> > +                             engine_clear_stats, NULL);
> >      unixctl_command_register("lflow-cache/flush", "", 0, 0,
> >                               lflow_cache_flush_cmd,
> >                               &flow_output_data->pd);
> 
> Do we have to call this here in ovn-controller.c? Can we not call them
> in inc-proc-eng.c. We should try to decouple this entirely from
> ovn-controller.c. I think we can implement nearly the entire
> functionality in inc-proc-eng.c

ack, I will move them in inc-proc-eng.c

> > diff --git a/lib/inc-proc-eng.c b/lib/inc-proc-eng.c
> > index 916dbbe39..facd59e5b 100644
> > --- a/lib/inc-proc-eng.c
> > +++ b/lib/inc-proc-eng.c
> > @@ -283,11 +283,13 @@ engine_recompute(struct engine_node *node, bool forced, bool allowed)
> >      if (!allowed) {
> >          VLOG_DBG("node: %s, recompute aborted", node->name);
> >          engine_set_node_state(node, EN_ABORTED);
> > +        node->stats.abort++;
> 
> We could move this to engine_run() doing something like this:
> 
>     for (size_t i = 0; i < engine_n_nodes; i++) {
>         engine_run_node(engine_nodes[i], recompute_allowed);
> 
>         switch(engine_node[i]->state) {
>             case EN_ABORTED:
>                 engine_node[i]->stats.abort++;
>                 break;
>             case EN_UPDATED: // <- we could add other states
>                 engine_node[i]->stats.updated++;
>                 break;
>             case EN_UNCHANGED: // <- we could add other states
>                 engine_node[i]->stats.unchanged++;
>                 break;
>             default:
>                 /* Should not reach here */
>                 ovs_assert();
>             }
>         }
> 
>         if (engine_nodes[i]->state == EN_ABORTED) {
>             engine_run_aborted = true;
>             return;
> 
> The reason that I suggest this is that, at this point (i think), we have
> finished processing the node for this run so we are logging the absolute
> final state. Also, i think it is easy to count other states (as above)
> 
> What do you think?

as discussed on IRC I guess we should add not too much info now, just what we think
is essential, otherwise it will be difficult to get info. If the node state
change is important we can add it with follow-up patches. wdyt?

> >          return;
> >      }
> >  
> >      /* Run the node handler which might change state. */
> >      node->run(node, node->data);
> > +    node->stats.run++;
> 
> 
> >  }
> >  
> >  /* Return true if the node could be computed, false otherwise. */
> > @@ -310,6 +312,7 @@ engine_compute(struct engine_node *node, bool recompute_allowed)
> >                  engine_recompute(node, false, recompute_allowed);
> >                  return (node->state != EN_ABORTED);
> >              }
> > +            node->stats.change_handler++;
> 
> This will increase the 'change_handler' counter for node X each time the
> change_handler() for each of node X's input nodes is called. Therefore,
> if Node X has 10 input nodes (with change handlers) then change_handler
> would increment by 10 at each attempted "compute" of Node X. I think
> this should be moved to the start of the function which would count the
> number of times we attempt to "compute" the node.
> 
> If we do this, we could change the name of the counter to "compute" as
> this is the current term in inc-proc-eng.c for an incremental compute of
> the node.

as discussed on IRC, I think we can rework this and just report the number of
"computed" successfully.

> 
> 
> >          }
> >      }
> >      return true;
> > @@ -401,3 +404,35 @@ engine_need_run(void)
> >      }
> >      return false;
> >  }
> > +
> > +void
> > +engine_clear_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
> > +                   const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED)
> > +{
> > +    for (size_t i = 0; i < engine_n_nodes; i++) {
> > +        struct engine_node *node = engine_nodes[i];
> > +
> > +        memset(&node->stats, 0, sizeof(node->stats));
> > +    }
> > +    unixctl_command_reply(conn, NULL);
> > +}
> > +
> > +void
> > +engine_dump_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
> > +                  const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED)
> > +{
> > +    struct ds dump = DS_EMPTY_INITIALIZER;
> > +
> > +    for (size_t i = 0; i < engine_n_nodes; i++) {
> > +        struct engine_node *node = engine_nodes[i];
> > +
> > +        ds_put_format(&dump, "%s\n", node->name);
> > +        ds_put_format(&dump, "\trun\t%lu", node->stats.run);
> > +        ds_put_format(&dump, "\tabort\t%lu", node->stats.abort);
> > +        ds_put_format(&dump, "\tchange-handler\t%lu\n",
> > +                      node->stats.change_handler);
> > +    }
> > +    unixctl_command_reply(conn, ds_cstr(&dump));
> > +
> > +    ds_destroy(&dump);
> > +}
> > diff --git a/lib/inc-proc-eng.h b/lib/inc-proc-eng.h
> > index 857234677..bc8744a0d 100644
> > --- a/lib/inc-proc-eng.h
> > +++ b/lib/inc-proc-eng.h
> > @@ -60,6 +60,8 @@
> >   * against all its inputs.
> >   */
> >  
> > +#include "unixctl.h"
> > +
> 
> We could move this from inc-proc-eng.h to inc-proc-eng.c if we move the
> calls to unixctl_command_register() into inc-proc-eng.c

ack

> 
> >  #define ENGINE_MAX_INPUT 256
> >  #define ENGINE_MAX_OVSDB_INDEX 256
> >  
> > @@ -107,6 +109,12 @@ enum engine_node_state {
> >      EN_STATE_MAX,
> >  };
> >  
> > +struct engine_stats {
> > +    unsigned long run;
> > +    unsigned long abort;
> > +    unsigned long change_handler;
> > +};
> > +
> >  struct engine_node {
> >      /* A unique name for each node. */
> >      char *name;
> > @@ -154,6 +162,9 @@ struct engine_node {
> >      /* Method to clear up tracked data maintained by the engine node in the
> >       * engine 'data'. It may be NULL. */
> >      void (*clear_tracked_data)(void *tracked_data);
> > +
> > +    /* Engine stats */
> > +    struct engine_stats stats;
> >  };
> >  
> >  /* Initialize the data for the engine nodes. It calls each node's
> > @@ -312,6 +323,7 @@ en_##DB_NAME##_##TBL_NAME##_run(struct engine_node *node, \
> >          EN_OVSDB_GET(node); \
> >      if (DB_NAME##rec_##TBL_NAME##_table_track_get_first(table)) { \
> >          engine_set_node_state(node, EN_UPDATED); \
> > +        node->stats.run++; \
> 
> Is this is just counting when the node state is set to updated? If so,
> the suggested code above should cover that and I think the counter name
> should be change to 'updated'.
> 
> If it is supposed to measure the number of times the run function is
> called, then I suggest adding this after each invocation of node->run()
> in inc-proc-eng.c and changing the name of it to "recompute" as this is
> the current term in inc-proc-eng.c for a full recompute of the node. My
> reason for this is we should not be calling node->run() directly outside
> of inc-proc-eng.c.
> >          return; \
> >      } \
> >      engine_set_node_state(node, EN_UNCHANGED); \
> > @@ -352,4 +364,10 @@ static void en_##DB_NAME##_##TBL_NAME##_cleanup(void *data OVS_UNUSED) \
> >  #define ENGINE_NODE_OVS(TBL_NAME, TBL_NAME_STR) \
> >      ENGINE_NODE_OVSDB(ovs, "OVS", TBL_NAME, TBL_NAME_STR);
> >  
> > +
> > +void engine_dump_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
> > +                       const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED);
> > +void engine_clear_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
> > +                        const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED);
> > +
> 
> These do not need to be defined in the inc-proc-eng.h file as they are
> not required outside of inc-proc-eng.c. You could move them to
> inc-proc-eng.c.

ack


Regards,
Lorenzo

> >  #endif /* lib/inc-proc-eng.h */
> > 
>
Mark Gray Feb. 23, 2021, 6:56 p.m. UTC | #3
On 23/02/2021 14:12, Lorenzo Bianconi wrote:
>> On 22/02/2021 12:15, Lorenzo Bianconi wrote:
>>
>> Thanks Lorenzo, I think the change to unixctl commands looks good. Some
>> more comments below.
> 
> Hi Mark,
> 
> thx for the review

Thanks for taking my review comments into consideration!
> 
>>
>>> Introduce inc-engine/stats ovs-applctl command in order to dump
>>
>> nit: inc-engine/show-stats or clear-stats?
>>
>> and
>>
>> s/applctl/appctl
> 
> ack, I will fix them in v5
> 
>>
>>> ovn-controller incremental processing engine statistics. So far for each
>>> node a counter for run, abort and engine_handler have been added.
>>> Counters are incremented when the node move to "updated" state.
>>> In order to dump I-P stats we can can use the following commands:
>>>
>>> $ovs-appctl -t ovn-controller inc-engine/stats
>>> SB_address_set
>>>         run     1       abort   0       change-handler 0
>>> addr_sets
>>>         run     2       abort   1       change-handler 0
>>> SB_port_group
>>>         run     0       abort   0       change-handler 0
>>> port_groups
>>>         run     2       abort   0       change-handler 0
>>> ofctrl_is_connected
>>>         run     1       abort   0       change-handler 0
>>> OVS_open_vswitch
>>>         run     0       abort   0       change-handler 0
>>> OVS_bridge
>>>         run     0       abort   0       change-handler 0
>>> OVS_qos
>>>         run     0       abort   0       change-handler 0
>>> SB_chassis
>>>         run     1       abort   0       change-handler 0
>>> ....
>>>
>>> flow_output
>>>         run     2       abort   0       change-handler 34
>>>
>>> Morover introduce the inc-engine/stats-clear command to reset engine
>>
>> s/morover/moreover
> 
> ack, I will fix it in v5
> 
>>> statistics
>>>
>>> $ovs-appctl -t ovn-controller inc-engine/stats-clear
>>>
>>> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
>>> ---
>>> Changes since v3:
>>> - drop engine_set_note_update_from_run/engine_set_note_update_from_handler
>>>   macros and move stats code in lib/inc-proc-eng.c
>>> - fix commit log
>>> Changes since v2:
>>> - introduce inc-engine/stats and inc-engine/stats-clear commands and drop
>>>   COVERAGE_* dependency
>>> Changes since v1:
>>> - drop handler counters and add global abort counter
>>> - improve documentation and naming scheme
>>> - introduce engine_set_node_updated utility macro
>>> ---
>>>  controller/ovn-controller.c |  4 ++++
>>>  lib/inc-proc-eng.c          | 35 +++++++++++++++++++++++++++++++++++
>>>  lib/inc-proc-eng.h          | 18 ++++++++++++++++++
>>>  3 files changed, 57 insertions(+)
>>>
>>> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
>>> index 5dd643f52..52e7b1932 100644
>>> --- a/controller/ovn-controller.c
>>> +++ b/controller/ovn-controller.c
>>> @@ -2710,6 +2710,10 @@ main(int argc, char *argv[])
>>>  
>>>      unixctl_command_register("recompute", "", 0, 0, engine_recompute_cmd,
>>>                               NULL);
>>> +    unixctl_command_register("inc-engine/stats", "", 0, 0, engine_dump_stats,
>>> +                             NULL);
>>> +    unixctl_command_register("inc-engine/stats-clear", "", 0, 0,
>>> +                             engine_clear_stats, NULL);
>>>      unixctl_command_register("lflow-cache/flush", "", 0, 0,
>>>                               lflow_cache_flush_cmd,
>>>                               &flow_output_data->pd);
>>
>> Do we have to call this here in ovn-controller.c? Can we not call them
>> in inc-proc-eng.c. We should try to decouple this entirely from
>> ovn-controller.c. I think we can implement nearly the entire
>> functionality in inc-proc-eng.c
> 
> ack, I will move them in inc-proc-eng.c
> 
>>> diff --git a/lib/inc-proc-eng.c b/lib/inc-proc-eng.c
>>> index 916dbbe39..facd59e5b 100644
>>> --- a/lib/inc-proc-eng.c
>>> +++ b/lib/inc-proc-eng.c
>>> @@ -283,11 +283,13 @@ engine_recompute(struct engine_node *node, bool forced, bool allowed)
>>>      if (!allowed) {
>>>          VLOG_DBG("node: %s, recompute aborted", node->name);
>>>          engine_set_node_state(node, EN_ABORTED);
>>> +        node->stats.abort++;
>>
>> We could move this to engine_run() doing something like this:
>>
>>     for (size_t i = 0; i < engine_n_nodes; i++) {
>>         engine_run_node(engine_nodes[i], recompute_allowed);
>>
>>         switch(engine_node[i]->state) {
>>             case EN_ABORTED:
>>                 engine_node[i]->stats.abort++;
>>                 break;
>>             case EN_UPDATED: // <- we could add other states
>>                 engine_node[i]->stats.updated++;
>>                 break;
>>             case EN_UNCHANGED: // <- we could add other states
>>                 engine_node[i]->stats.unchanged++;
>>                 break;
>>             default:
>>                 /* Should not reach here */
>>                 ovs_assert();
>>             }
>>         }
>>
>>         if (engine_nodes[i]->state == EN_ABORTED) {
>>             engine_run_aborted = true;
>>             return;
>>
>> The reason that I suggest this is that, at this point (i think), we have
>> finished processing the node for this run so we are logging the absolute
>> final state. Also, i think it is easy to count other states (as above)
>>
>> What do you think?
> 
> as discussed on IRC I guess we should add not too much info now, just what we think
> is essential, otherwise it will be difficult to get info. If the node state
> change is important we can add it with follow-up patches. wdyt?

Yes, I am OK with this.

> 
>>>          return;
>>>      }
>>>  
>>>      /* Run the node handler which might change state. */
>>>      node->run(node, node->data);
>>> +    node->stats.run++;
>>
>>
>>>  }
>>>  
>>>  /* Return true if the node could be computed, false otherwise. */
>>> @@ -310,6 +312,7 @@ engine_compute(struct engine_node *node, bool recompute_allowed)
>>>                  engine_recompute(node, false, recompute_allowed);
>>>                  return (node->state != EN_ABORTED);
>>>              }
>>> +            node->stats.change_handler++;
>>
>> This will increase the 'change_handler' counter for node X each time the
>> change_handler() for each of node X's input nodes is called. Therefore,
>> if Node X has 10 input nodes (with change handlers) then change_handler
>> would increment by 10 at each attempted "compute" of Node X. I think
>> this should be moved to the start of the function which would count the
>> number of times we attempt to "compute" the node.
>>
>> If we do this, we could change the name of the counter to "compute" as
>> this is the current term in inc-proc-eng.c for an incremental compute of
>> the node.
> 
> as discussed on IRC, I think we can rework this and just report the number of
> "computed" successfully.

Yes

> 
>>
>>
>>>          }
>>>      }
>>>      return true;
>>> @@ -401,3 +404,35 @@ engine_need_run(void)
>>>      }
>>>      return false;
>>>  }
>>> +
>>> +void
>>> +engine_clear_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
>>> +                   const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED)
>>> +{
>>> +    for (size_t i = 0; i < engine_n_nodes; i++) {
>>> +        struct engine_node *node = engine_nodes[i];
>>> +
>>> +        memset(&node->stats, 0, sizeof(node->stats));
>>> +    }
>>> +    unixctl_command_reply(conn, NULL);
>>> +}
>>> +
>>> +void
>>> +engine_dump_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
>>> +                  const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED)
>>> +{
>>> +    struct ds dump = DS_EMPTY_INITIALIZER;
>>> +
>>> +    for (size_t i = 0; i < engine_n_nodes; i++) {
>>> +        struct engine_node *node = engine_nodes[i];
>>> +
>>> +        ds_put_format(&dump, "%s\n", node->name);
>>> +        ds_put_format(&dump, "\trun\t%lu", node->stats.run);
>>> +        ds_put_format(&dump, "\tabort\t%lu", node->stats.abort);
>>> +        ds_put_format(&dump, "\tchange-handler\t%lu\n",
>>> +                      node->stats.change_handler);
>>> +    }
>>> +    unixctl_command_reply(conn, ds_cstr(&dump));
>>> +
>>> +    ds_destroy(&dump);
>>> +}
>>> diff --git a/lib/inc-proc-eng.h b/lib/inc-proc-eng.h
>>> index 857234677..bc8744a0d 100644
>>> --- a/lib/inc-proc-eng.h
>>> +++ b/lib/inc-proc-eng.h
>>> @@ -60,6 +60,8 @@
>>>   * against all its inputs.
>>>   */
>>>  
>>> +#include "unixctl.h"
>>> +
>>
>> We could move this from inc-proc-eng.h to inc-proc-eng.c if we move the
>> calls to unixctl_command_register() into inc-proc-eng.c
> 
> ack
> 
>>
>>>  #define ENGINE_MAX_INPUT 256
>>>  #define ENGINE_MAX_OVSDB_INDEX 256
>>>  
>>> @@ -107,6 +109,12 @@ enum engine_node_state {
>>>      EN_STATE_MAX,
>>>  };
>>>  
>>> +struct engine_stats {
>>> +    unsigned long run;
>>> +    unsigned long abort;
>>> +    unsigned long change_handler;
>>> +};
>>> +
>>>  struct engine_node {
>>>      /* A unique name for each node. */
>>>      char *name;
>>> @@ -154,6 +162,9 @@ struct engine_node {
>>>      /* Method to clear up tracked data maintained by the engine node in the
>>>       * engine 'data'. It may be NULL. */
>>>      void (*clear_tracked_data)(void *tracked_data);
>>> +
>>> +    /* Engine stats */
>>> +    struct engine_stats stats;
>>>  };
>>>  
>>>  /* Initialize the data for the engine nodes. It calls each node's
>>> @@ -312,6 +323,7 @@ en_##DB_NAME##_##TBL_NAME##_run(struct engine_node *node, \
>>>          EN_OVSDB_GET(node); \
>>>      if (DB_NAME##rec_##TBL_NAME##_table_track_get_first(table)) { \
>>>          engine_set_node_state(node, EN_UPDATED); \
>>> +        node->stats.run++; \
>>
>> Is this is just counting when the node state is set to updated? If so,
>> the suggested code above should cover that and I think the counter name
>> should be change to 'updated'.
>>
>> If it is supposed to measure the number of times the run function is
>> called, then I suggest adding this after each invocation of node->run()
>> in inc-proc-eng.c and changing the name of it to "recompute" as this is
>> the current term in inc-proc-eng.c for a full recompute of the node. My
>> reason for this is we should not be calling node->run() directly outside
>> of inc-proc-eng.c.
>>>          return; \
>>>      } \
>>>      engine_set_node_state(node, EN_UNCHANGED); \
>>> @@ -352,4 +364,10 @@ static void en_##DB_NAME##_##TBL_NAME##_cleanup(void *data OVS_UNUSED) \
>>>  #define ENGINE_NODE_OVS(TBL_NAME, TBL_NAME_STR) \
>>>      ENGINE_NODE_OVSDB(ovs, "OVS", TBL_NAME, TBL_NAME_STR);
>>>  
>>> +
>>> +void engine_dump_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
>>> +                       const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED);
>>> +void engine_clear_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
>>> +                        const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED);
>>> +
>>
>> These do not need to be defined in the inc-proc-eng.h file as they are
>> not required outside of inc-proc-eng.c. You could move them to
>> inc-proc-eng.c.
> 
> ack
> 
> 
> Regards,
> Lorenzo
> 
>>>  #endif /* lib/inc-proc-eng.h */
>>>
>>
diff mbox series

Patch

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 5dd643f52..52e7b1932 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -2710,6 +2710,10 @@  main(int argc, char *argv[])
 
     unixctl_command_register("recompute", "", 0, 0, engine_recompute_cmd,
                              NULL);
+    unixctl_command_register("inc-engine/stats", "", 0, 0, engine_dump_stats,
+                             NULL);
+    unixctl_command_register("inc-engine/stats-clear", "", 0, 0,
+                             engine_clear_stats, NULL);
     unixctl_command_register("lflow-cache/flush", "", 0, 0,
                              lflow_cache_flush_cmd,
                              &flow_output_data->pd);
diff --git a/lib/inc-proc-eng.c b/lib/inc-proc-eng.c
index 916dbbe39..facd59e5b 100644
--- a/lib/inc-proc-eng.c
+++ b/lib/inc-proc-eng.c
@@ -283,11 +283,13 @@  engine_recompute(struct engine_node *node, bool forced, bool allowed)
     if (!allowed) {
         VLOG_DBG("node: %s, recompute aborted", node->name);
         engine_set_node_state(node, EN_ABORTED);
+        node->stats.abort++;
         return;
     }
 
     /* Run the node handler which might change state. */
     node->run(node, node->data);
+    node->stats.run++;
 }
 
 /* Return true if the node could be computed, false otherwise. */
@@ -310,6 +312,7 @@  engine_compute(struct engine_node *node, bool recompute_allowed)
                 engine_recompute(node, false, recompute_allowed);
                 return (node->state != EN_ABORTED);
             }
+            node->stats.change_handler++;
         }
     }
     return true;
@@ -401,3 +404,35 @@  engine_need_run(void)
     }
     return false;
 }
+
+void
+engine_clear_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
+                   const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED)
+{
+    for (size_t i = 0; i < engine_n_nodes; i++) {
+        struct engine_node *node = engine_nodes[i];
+
+        memset(&node->stats, 0, sizeof(node->stats));
+    }
+    unixctl_command_reply(conn, NULL);
+}
+
+void
+engine_dump_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
+                  const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED)
+{
+    struct ds dump = DS_EMPTY_INITIALIZER;
+
+    for (size_t i = 0; i < engine_n_nodes; i++) {
+        struct engine_node *node = engine_nodes[i];
+
+        ds_put_format(&dump, "%s\n", node->name);
+        ds_put_format(&dump, "\trun\t%lu", node->stats.run);
+        ds_put_format(&dump, "\tabort\t%lu", node->stats.abort);
+        ds_put_format(&dump, "\tchange-handler\t%lu\n",
+                      node->stats.change_handler);
+    }
+    unixctl_command_reply(conn, ds_cstr(&dump));
+
+    ds_destroy(&dump);
+}
diff --git a/lib/inc-proc-eng.h b/lib/inc-proc-eng.h
index 857234677..bc8744a0d 100644
--- a/lib/inc-proc-eng.h
+++ b/lib/inc-proc-eng.h
@@ -60,6 +60,8 @@ 
  * against all its inputs.
  */
 
+#include "unixctl.h"
+
 #define ENGINE_MAX_INPUT 256
 #define ENGINE_MAX_OVSDB_INDEX 256
 
@@ -107,6 +109,12 @@  enum engine_node_state {
     EN_STATE_MAX,
 };
 
+struct engine_stats {
+    unsigned long run;
+    unsigned long abort;
+    unsigned long change_handler;
+};
+
 struct engine_node {
     /* A unique name for each node. */
     char *name;
@@ -154,6 +162,9 @@  struct engine_node {
     /* Method to clear up tracked data maintained by the engine node in the
      * engine 'data'. It may be NULL. */
     void (*clear_tracked_data)(void *tracked_data);
+
+    /* Engine stats */
+    struct engine_stats stats;
 };
 
 /* Initialize the data for the engine nodes. It calls each node's
@@ -312,6 +323,7 @@  en_##DB_NAME##_##TBL_NAME##_run(struct engine_node *node, \
         EN_OVSDB_GET(node); \
     if (DB_NAME##rec_##TBL_NAME##_table_track_get_first(table)) { \
         engine_set_node_state(node, EN_UPDATED); \
+        node->stats.run++; \
         return; \
     } \
     engine_set_node_state(node, EN_UNCHANGED); \
@@ -352,4 +364,10 @@  static void en_##DB_NAME##_##TBL_NAME##_cleanup(void *data OVS_UNUSED) \
 #define ENGINE_NODE_OVS(TBL_NAME, TBL_NAME_STR) \
     ENGINE_NODE_OVSDB(ovs, "OVS", TBL_NAME, TBL_NAME_STR);
 
+
+void engine_dump_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
+                       const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED);
+void engine_clear_stats(struct unixctl_conn *conn, int argc OVS_UNUSED,
+                        const char *argv[] OVS_UNUSED, void *arg OVS_UNUSED);
+
 #endif /* lib/inc-proc-eng.h */