[ovs-dev,RFC] dpif-netdev: Add ovs-appctl dpif-netdev/subtable-show.
diff mbox series

Message ID 1573226624-10167-1-git-send-email-emma.finn@intel.com
State New
Headers show
Series
  • [ovs-dev,RFC] dpif-netdev: Add ovs-appctl dpif-netdev/subtable-show.
Related show

Commit Message

Emma Finn Nov. 8, 2019, 3:23 p.m. UTC
Add an ovs-appctl command to iterate through the dpcls
and for each subtable output the miniflow bits for any
existing table.

$ ovs-appctl dpif-netdev/subatable-show
pmd thread numa_id 0
  dpcls port 2:
    subtable:
      unit_0: 4 (0x4)
      unit_1: 2 (0x2)
pmd thread numa_id 1
  dpcls port 3:
    subtable:
      unit_0: 4 (0x3)
      unit_1: 2 (0x5)

Signed-off-by: Emma Finn <emma.finn@intel.com>
---
 NEWS                        |  2 ++
 lib/dpif-netdev-unixctl.man |  4 ++++
 lib/dpif-netdev.c           | 54 ++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 59 insertions(+), 1 deletion(-)

Comments

Ilya Maximets Nov. 19, 2019, 7:35 p.m. UTC | #1
On 08.11.2019 16:23, Emma Finn wrote:
> Add an ovs-appctl command to iterate through the dpcls
> and for each subtable output the miniflow bits for any
> existing table.

Hi Emma,

First of all, thanks for working on this.  This might be
useful for developers to determine the optimization points.

Regarding this FRC:
Do we really need to disturb a running datapath to get this
information?  Given the datapath flow it should be possible
to calculate the number of miniflow bits and we don't need
to look at the real subtables for that.  I mean, that it might
be useful to calculate this from the flow dump offline using
a special utility.  Flow dumps is a common thing and any
performance analysis includes looking at them.  This command
is for developers, not for the users.  For developer/someone
who troubleshoots performance issue it might be easy to ask
for a flow dump or get one from the output of ovs-bugtool and
feed it to some ovs-parse-miniflow-bits application that will
show required information from the flow dump.

What do you think?


Another thought is "why subtables?".
I mean, we need to know numbers of bits in miniflows and we
don't really care in which subtables they are.  So, we could
iterate over flows in a flow_table without disturbing classifiers.
You have flow stats if you need to find hottest flows.

Some comments for the current patch inline.

Best regards, Ilya Maximets.

> 
> $ ovs-appctl dpif-netdev/subatable-show
> pmd thread numa_id 0
>   dpcls port 2:
>     subtable:
>       unit_0: 4 (0x4)
>       unit_1: 2 (0x2)
> pmd thread numa_id 1
>   dpcls port 3:
>     subtable:
>       unit_0: 4 (0x3)
>       unit_1: 2 (0x5)

Why numbers are different?  Isn't it the same number but in hex?

> 
> Signed-off-by: Emma Finn <emma.finn@intel.com>
> ---
>  NEWS                        |  2 ++
>  lib/dpif-netdev-unixctl.man |  4 ++++
>  lib/dpif-netdev.c           | 54 ++++++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 59 insertions(+), 1 deletion(-)
> 
> diff --git a/NEWS b/NEWS
> index 88b8189..c01c100 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -10,6 +10,8 @@ Post-v2.12.0
>         if supported by libbpf.
>       * Add option to enable, disable and query TCP sequence checking in
>         conntrack.
> +     * New "ovs-appctl dpif-netdev/subtable-show" command for userspace
> +       datapath to show subtable miniflow bits.
>  
>  v2.12.0 - 03 Sep 2019
>  ---------------------
> diff --git a/lib/dpif-netdev-unixctl.man b/lib/dpif-netdev-unixctl.man
> index 6c54f6f..c443465 100644
> --- a/lib/dpif-netdev-unixctl.man
> +++ b/lib/dpif-netdev-unixctl.man
> @@ -217,3 +217,7 @@ with port names, which this thread polls.
>  .
>  .IP "\fBdpif-netdev/pmd-rxq-rebalance\fR [\fIdp\fR]"
>  Reassigns rxqs to pmds in the datapath \fIdp\fR based on their current usage.
> +.
> +.IP "\fBdpif-netdev/subtable-show\fR [\fB-pmd\fR \fIcore\fR] [\fIdp\fR]"
> +For one or all pmd threads of the datapath \fIdp\fR show the list of miniflow
> +bits for each subtable in the datapath classifier.
> \ No newline at end of file
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 4720ba1..7ae422e 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -857,6 +857,8 @@ static inline bool
>  pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd);
>  static void queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd,
>                                    struct dp_netdev_flow *flow);
> +static void pmd_info_show_subtable(struct ds *reply,
> +                                   struct dp_netdev_pmd_thread *pmd);
>  
>  static void
>  emc_cache_init(struct emc_cache *flow_cache)
> @@ -979,6 +981,7 @@ enum pmd_info_type {
>      PMD_INFO_CLEAR_STATS, /* Set the cycles count to 0. */
>      PMD_INFO_SHOW_RXQ,    /* Show poll lists of pmd threads. */
>      PMD_INFO_PERF_SHOW,   /* Show pmd performance details. */
> +    PMD_INFO_SHOW_SUBTABLE, /* Show subtable miniflow bits. */
>  };
>  
>  static void
> @@ -1334,6 +1337,8 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
>              pmd_info_show_stats(&reply, pmd);
>          } else if (type == PMD_INFO_PERF_SHOW) {
>              pmd_info_show_perf(&reply, pmd, (struct pmd_perf_params *)aux);
> +        } else if (type == PMD_INFO_SHOW_SUBTABLE) {
> +            pmd_info_show_subtable(&reply, pmd);
>          }
>      }
>      free(pmd_list);
> @@ -1391,7 +1396,8 @@ dpif_netdev_init(void)
>  {
>      static enum pmd_info_type show_aux = PMD_INFO_SHOW_STATS,
>                                clear_aux = PMD_INFO_CLEAR_STATS,
> -                              poll_aux = PMD_INFO_SHOW_RXQ;
> +                              poll_aux = PMD_INFO_SHOW_RXQ,
> +                              subtable_aux = PMD_INFO_SHOW_SUBTABLE;
>  
>      unixctl_command_register("dpif-netdev/pmd-stats-show", "[-pmd core] [dp]",
>                               0, 3, dpif_netdev_pmd_info,
> @@ -1416,6 +1422,9 @@ dpif_netdev_init(void)
>                               "[-us usec] [-q qlen]",
>                               0, 10, pmd_perf_log_set_cmd,
>                               NULL);
> +    unixctl_command_register("dpif-netdev/subtable-show", "[-pmd core] [dp]",
> +                             0, 3, dpif_netdev_pmd_info,
> +                             (void *)&subtable_aux);
>      return 0;
>  }
>  
> @@ -8036,3 +8045,46 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
>      }
>      return false;
>  }
> +
> +/* Iterate through all dpcls instances and dump out all subtable
> + * miniflow bits. */
> +static void
> +pmd_info_show_subtable(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
> +{
> +    if (pmd->core_id != NON_PMD_CORE_ID) {
> +        struct rxq_poll *list;
> +        size_t n_rxq;
> +        struct dpcls *cls;
> +        struct dpcls_subtable *subtable;
> +
> +        ovs_mutex_lock(&pmd->port_mutex);
> +        sorted_poll_list(pmd, &list, &n_rxq);
> +        for (int i = 0; i < n_rxq; i++) {
> +            struct dp_netdev_rxq *rxq = list[i].rxq;
> +            odp_port_t in_port = rxq->port->port_no;

Same classifier will be dumped several times if PMD polls several
queues of the same port.  Not sure if this is convenient.

> +            cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
> +            if (!cls) {
> +                continue;
> +            } else {
> +                struct pvector *pvec = &cls->subtables;
> +
> +                PVECTOR_FOR_EACH (subtable, pvec) {
> +                    ds_put_format(reply, "pmd thread numa_id %d "
> +                                  "core_id %u: \n",
> +                                  pmd->numa_id, pmd->core_id);
> +                    ds_put_format(reply, "  dpcls port %d: \n",cls->in_port);
> +                    ds_put_format(reply, "    subtable: \n ");
> +                    ds_put_format(reply,
> +                                  "     unit_0: %d (0x%x)\n"

Please, fix spaces.

> +                                  "      unit_1: %d (0x%x)\n",
> +                                  subtable->mf_bits_set_unit0,
> +                                  subtable->mf_bits_set_unit0,
> +                                  subtable->mf_bits_set_unit1,
> +                                  subtable->mf_bits_set_unit1);

What is the reason of printing same number in hex?
Number of bits in hex doesn't make any sense.

> +                }
> +            }
> +        }
> +    ovs_mutex_unlock(&pmd->port_mutex);
> +    free(list);

Incorrect indentation.

> +    }
> +}
> \ No newline at end of file

This needs to be fixed.
William Tu Nov. 19, 2019, 10:47 p.m. UTC | #2
On Fri, Nov 08, 2019 at 03:23:44PM +0000, Emma Finn wrote:
> Add an ovs-appctl command to iterate through the dpcls
> and for each subtable output the miniflow bits for any
> existing table.
> 
> $ ovs-appctl dpif-netdev/subatable-show
> pmd thread numa_id 0
>   dpcls port 2:
>     subtable:
>       unit_0: 4 (0x4)
>       unit_1: 2 (0x2)
> pmd thread numa_id 1
>   dpcls port 3:
>     subtable:
>       unit_0: 4 (0x3)
>       unit_1: 2 (0x5)
> 
> Signed-off-by: Emma Finn <emma.finn@intel.com>
> ---
>  NEWS                        |  2 ++
>  lib/dpif-netdev-unixctl.man |  4 ++++
>  lib/dpif-netdev.c           | 54 ++++++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 59 insertions(+), 1 deletion(-)
> 
> diff --git a/NEWS b/NEWS
> index 88b8189..c01c100 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -10,6 +10,8 @@ Post-v2.12.0
>         if supported by libbpf.
>       * Add option to enable, disable and query TCP sequence checking in
>         conntrack.
> +     * New "ovs-appctl dpif-netdev/subtable-show" command for userspace
> +       datapath to show subtable miniflow bits.
>  
>  v2.12.0 - 03 Sep 2019
>  ---------------------
> diff --git a/lib/dpif-netdev-unixctl.man b/lib/dpif-netdev-unixctl.man
> index 6c54f6f..c443465 100644
> --- a/lib/dpif-netdev-unixctl.man
> +++ b/lib/dpif-netdev-unixctl.man
> @@ -217,3 +217,7 @@ with port names, which this thread polls.
>  .
>  .IP "\fBdpif-netdev/pmd-rxq-rebalance\fR [\fIdp\fR]"
>  Reassigns rxqs to pmds in the datapath \fIdp\fR based on their current usage.
> +.
> +.IP "\fBdpif-netdev/subtable-show\fR [\fB-pmd\fR \fIcore\fR] [\fIdp\fR]"
> +For one or all pmd threads of the datapath \fIdp\fR show the list of miniflow
> +bits for each subtable in the datapath classifier.
> \ No newline at end of file
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 4720ba1..7ae422e 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -857,6 +857,8 @@ static inline bool
>  pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd);
>  static void queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd,
>                                    struct dp_netdev_flow *flow);
> +static void pmd_info_show_subtable(struct ds *reply,
> +                                   struct dp_netdev_pmd_thread *pmd);
>  
>  static void
>  emc_cache_init(struct emc_cache *flow_cache)
> @@ -979,6 +981,7 @@ enum pmd_info_type {
>      PMD_INFO_CLEAR_STATS, /* Set the cycles count to 0. */
>      PMD_INFO_SHOW_RXQ,    /* Show poll lists of pmd threads. */
>      PMD_INFO_PERF_SHOW,   /* Show pmd performance details. */
> +    PMD_INFO_SHOW_SUBTABLE, /* Show subtable miniflow bits. */
>  };
>  
>  static void
> @@ -1334,6 +1337,8 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
>              pmd_info_show_stats(&reply, pmd);
>          } else if (type == PMD_INFO_PERF_SHOW) {
>              pmd_info_show_perf(&reply, pmd, (struct pmd_perf_params *)aux);
> +        } else if (type == PMD_INFO_SHOW_SUBTABLE) {
> +            pmd_info_show_subtable(&reply, pmd);
>          }
>      }
>      free(pmd_list);
> @@ -1391,7 +1396,8 @@ dpif_netdev_init(void)
>  {
>      static enum pmd_info_type show_aux = PMD_INFO_SHOW_STATS,
>                                clear_aux = PMD_INFO_CLEAR_STATS,
> -                              poll_aux = PMD_INFO_SHOW_RXQ;
> +                              poll_aux = PMD_INFO_SHOW_RXQ,
> +                              subtable_aux = PMD_INFO_SHOW_SUBTABLE;
>  
>      unixctl_command_register("dpif-netdev/pmd-stats-show", "[-pmd core] [dp]",
>                               0, 3, dpif_netdev_pmd_info,
> @@ -1416,6 +1422,9 @@ dpif_netdev_init(void)
>                               "[-us usec] [-q qlen]",
>                               0, 10, pmd_perf_log_set_cmd,
>                               NULL);
> +    unixctl_command_register("dpif-netdev/subtable-show", "[-pmd core] [dp]",
> +                             0, 3, dpif_netdev_pmd_info,
> +                             (void *)&subtable_aux);
>      return 0;
>  }
>  
> @@ -8036,3 +8045,46 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
>      }
>      return false;
>  }
> +
> +/* Iterate through all dpcls instances and dump out all subtable
> + * miniflow bits. */
> +static void
> +pmd_info_show_subtable(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
> +{
> +    if (pmd->core_id != NON_PMD_CORE_ID) {
> +        struct rxq_poll *list;
> +        size_t n_rxq;
> +        struct dpcls *cls;
> +        struct dpcls_subtable *subtable;
> +
> +        ovs_mutex_lock(&pmd->port_mutex);
> +        sorted_poll_list(pmd, &list, &n_rxq);
> +        for (int i = 0; i < n_rxq; i++) {
> +            struct dp_netdev_rxq *rxq = list[i].rxq;
> +            odp_port_t in_port = rxq->port->port_no;
> +            cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
> +            if (!cls) {
> +                continue;
> +            } else {
> +                struct pvector *pvec = &cls->subtables;
> +
> +                PVECTOR_FOR_EACH (subtable, pvec) {
> +                    ds_put_format(reply, "pmd thread numa_id %d "
> +                                  "core_id %u: \n",
> +                                  pmd->numa_id, pmd->core_id);
> +                    ds_put_format(reply, "  dpcls port %d: \n",cls->in_port);

I think the above 2 lines should be placed before
the PVECTOR_FOR_EACH (Subtable, pvect) loop?
So in the case of multiple subtables, it will print

pmd thread numa_id 0
   dpcls port 2:
     subtable:
       unit_0: 4 (0x4)
       unit_1: 2 (0x2)
     subtable:
       unit_0: 4 (0x4)
       unit_1: 2 (0x2)
     subtable:
       ...

I guess you're debugging the performance of packets
looked up in multiple subtables?
So is it better if you print s.t like this 
   dpcls port 2:
     subtable:
       fields: nw_src, nw_dst
     subtable:
       fields: pkt_mark, dp_hash
     subtable:
       ....

So translate the miniflow bitmap to field name.

--William

> +                    ds_put_format(reply, "    subtable: \n ");
> +                    ds_put_format(reply,
> +                                  "     unit_0: %d (0x%x)\n"
> +                                  "      unit_1: %d (0x%x)\n",
> +                                  subtable->mf_bits_set_unit0,
> +                                  subtable->mf_bits_set_unit0,
> +                                  subtable->mf_bits_set_unit1,
> +                                  subtable->mf_bits_set_unit1);
> +                }
> +            }
> +        }
> +    ovs_mutex_unlock(&pmd->port_mutex);
> +    free(list);
> +    }
> +}
> \ No newline at end of file
> -- 
> 2.7.4
> 
> --------------------------------------------------------------
> Intel Research and Development Ireland Limited
> Registered in Ireland
> Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
> Registered Number: 308263
> 
> 
> This e-mail and any attachments may contain confidential material for the sole
> use of the intended recipient(s). Any review or distribution by others is
> strictly prohibited. If you are not the intended recipient, please contact the
> sender and delete all copies.
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Patch
diff mbox series

diff --git a/NEWS b/NEWS
index 88b8189..c01c100 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,8 @@  Post-v2.12.0
        if supported by libbpf.
      * Add option to enable, disable and query TCP sequence checking in
        conntrack.
+     * New "ovs-appctl dpif-netdev/subtable-show" command for userspace
+       datapath to show subtable miniflow bits.
 
 v2.12.0 - 03 Sep 2019
 ---------------------
diff --git a/lib/dpif-netdev-unixctl.man b/lib/dpif-netdev-unixctl.man
index 6c54f6f..c443465 100644
--- a/lib/dpif-netdev-unixctl.man
+++ b/lib/dpif-netdev-unixctl.man
@@ -217,3 +217,7 @@  with port names, which this thread polls.
 .
 .IP "\fBdpif-netdev/pmd-rxq-rebalance\fR [\fIdp\fR]"
 Reassigns rxqs to pmds in the datapath \fIdp\fR based on their current usage.
+.
+.IP "\fBdpif-netdev/subtable-show\fR [\fB-pmd\fR \fIcore\fR] [\fIdp\fR]"
+For one or all pmd threads of the datapath \fIdp\fR show the list of miniflow
+bits for each subtable in the datapath classifier.
\ No newline at end of file
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 4720ba1..7ae422e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -857,6 +857,8 @@  static inline bool
 pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd);
 static void queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd,
                                   struct dp_netdev_flow *flow);
+static void pmd_info_show_subtable(struct ds *reply,
+                                   struct dp_netdev_pmd_thread *pmd);
 
 static void
 emc_cache_init(struct emc_cache *flow_cache)
@@ -979,6 +981,7 @@  enum pmd_info_type {
     PMD_INFO_CLEAR_STATS, /* Set the cycles count to 0. */
     PMD_INFO_SHOW_RXQ,    /* Show poll lists of pmd threads. */
     PMD_INFO_PERF_SHOW,   /* Show pmd performance details. */
+    PMD_INFO_SHOW_SUBTABLE, /* Show subtable miniflow bits. */
 };
 
 static void
@@ -1334,6 +1337,8 @@  dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
             pmd_info_show_stats(&reply, pmd);
         } else if (type == PMD_INFO_PERF_SHOW) {
             pmd_info_show_perf(&reply, pmd, (struct pmd_perf_params *)aux);
+        } else if (type == PMD_INFO_SHOW_SUBTABLE) {
+            pmd_info_show_subtable(&reply, pmd);
         }
     }
     free(pmd_list);
@@ -1391,7 +1396,8 @@  dpif_netdev_init(void)
 {
     static enum pmd_info_type show_aux = PMD_INFO_SHOW_STATS,
                               clear_aux = PMD_INFO_CLEAR_STATS,
-                              poll_aux = PMD_INFO_SHOW_RXQ;
+                              poll_aux = PMD_INFO_SHOW_RXQ,
+                              subtable_aux = PMD_INFO_SHOW_SUBTABLE;
 
     unixctl_command_register("dpif-netdev/pmd-stats-show", "[-pmd core] [dp]",
                              0, 3, dpif_netdev_pmd_info,
@@ -1416,6 +1422,9 @@  dpif_netdev_init(void)
                              "[-us usec] [-q qlen]",
                              0, 10, pmd_perf_log_set_cmd,
                              NULL);
+    unixctl_command_register("dpif-netdev/subtable-show", "[-pmd core] [dp]",
+                             0, 3, dpif_netdev_pmd_info,
+                             (void *)&subtable_aux);
     return 0;
 }
 
@@ -8036,3 +8045,46 @@  dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
     }
     return false;
 }
+
+/* Iterate through all dpcls instances and dump out all subtable
+ * miniflow bits. */
+static void
+pmd_info_show_subtable(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
+{
+    if (pmd->core_id != NON_PMD_CORE_ID) {
+        struct rxq_poll *list;
+        size_t n_rxq;
+        struct dpcls *cls;
+        struct dpcls_subtable *subtable;
+
+        ovs_mutex_lock(&pmd->port_mutex);
+        sorted_poll_list(pmd, &list, &n_rxq);
+        for (int i = 0; i < n_rxq; i++) {
+            struct dp_netdev_rxq *rxq = list[i].rxq;
+            odp_port_t in_port = rxq->port->port_no;
+            cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
+            if (!cls) {
+                continue;
+            } else {
+                struct pvector *pvec = &cls->subtables;
+
+                PVECTOR_FOR_EACH (subtable, pvec) {
+                    ds_put_format(reply, "pmd thread numa_id %d "
+                                  "core_id %u: \n",
+                                  pmd->numa_id, pmd->core_id);
+                    ds_put_format(reply, "  dpcls port %d: \n",cls->in_port);
+                    ds_put_format(reply, "    subtable: \n ");
+                    ds_put_format(reply,
+                                  "     unit_0: %d (0x%x)\n"
+                                  "      unit_1: %d (0x%x)\n",
+                                  subtable->mf_bits_set_unit0,
+                                  subtable->mf_bits_set_unit0,
+                                  subtable->mf_bits_set_unit1,
+                                  subtable->mf_bits_set_unit1);
+                }
+            }
+        }
+    ovs_mutex_unlock(&pmd->port_mutex);
+    free(list);
+    }
+}
\ No newline at end of file