[ovs-dev,v1] dpif-netdev: Add ovs-appctl dpif-netdev/subtable-show.
diff mbox series

Message ID 1576855703-125194-1-git-send-email-emma.finn@intel.com
State Changes Requested
Headers show
Series
  • [ovs-dev,v1] dpif-netdev: Add ovs-appctl dpif-netdev/subtable-show.
Related show

Commit Message

Finn, Emma Dec. 20, 2019, 3:28 p.m. UTC
Add an ovs-appctl command to iterate through the dpcls
and for each subtable output the miniflow bits for any
existing table.

$ ovs-appctl dpif-netdev/subtable-show
pmd thread numa_id 0
  dpcls port 2:
    subtable:
      unit_0: 2 (0x5)
      unit_1: 1 (0x1)
pmd thread numa_id 1
  dpcls port 3:
    subtable:
      unit_0: 2 (0x5)
      unit_1: 1 (0x1)

Signed-off-by: Emma Finn <emma.finn@intel.com>

---

RFC -> v1

* Changed revision from RFC to v1
* Reformatted based on comments
* Fixed same classifier being dumped multiple times
  flagged by Ilya
* Fixed print of subtables flagged by William
* Updated print count of bits as well as bits themselves
---
 NEWS                        |  2 ++
 lib/dpif-netdev-unixctl.man |  4 ++++
 lib/dpif-netdev.c           | 53 ++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 58 insertions(+), 1 deletion(-)

Comments

Ilya Maximets Jan. 2, 2020, 12:27 p.m. UTC | #1
On 20.12.2019 16:28, Emma Finn wrote:
> Add an ovs-appctl command to iterate through the dpcls
> and for each subtable output the miniflow bits for any
> existing table.
> 
> $ ovs-appctl dpif-netdev/subtable-show
> pmd thread numa_id 0
>   dpcls port 2:
>     subtable:
>       unit_0: 2 (0x5)
>       unit_1: 1 (0x1)
> pmd thread numa_id 1
>   dpcls port 3:
>     subtable:
>       unit_0: 2 (0x5)
>       unit_1: 1 (0x1)
> 
> Signed-off-by: Emma Finn <emma.finn@intel.com>
> 
> ---

So, what about my suggestions and thoughts about alternative solutions
that I posted in reply to RFC?  It's still unclear why we need to disturb
the running datapath to get this information, especially if we could get
it "offline" from the flow dump.

Best regards, Ilya Maximets.
Stokes, Ian Jan. 7, 2020, 10:21 a.m. UTC | #2
On 12/20/2019 3:28 PM, Emma Finn wrote:
> Add an ovs-appctl command to iterate through the dpcls
> and for each subtable output the miniflow bits for any
> existing table.
> 
> $ ovs-appctl dpif-netdev/subtable-show
> pmd thread numa_id 0
>    dpcls port 2:
>      subtable:
>        unit_0: 2 (0x5)
>        unit_1: 1 (0x1)
> pmd thread numa_id 1
>    dpcls port 3:
>      subtable:
>        unit_0: 2 (0x5)
>        unit_1: 1 (0x1)
> 
> Signed-off-by: Emma Finn <emma.finn@intel.com>
> 

Hi Emma,

thanks for the patch, comments below.


> ---
> 
> RFC -> v1
> 
> * Changed revision from RFC to v1
> * Reformatted based on comments
> * Fixed same classifier being dumped multiple times
>    flagged by Ilya
> * Fixed print of subtables flagged by William
> * Updated print count of bits as well as bits themselves
> ---
>   NEWS                        |  2 ++
>   lib/dpif-netdev-unixctl.man |  4 ++++
>   lib/dpif-netdev.c           | 53 ++++++++++++++++++++++++++++++++++++++++++++-
>   3 files changed, 58 insertions(+), 1 deletion(-)
> 
> diff --git a/NEWS b/NEWS
> index 2acde3f..2b7b0cc 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -26,6 +26,8 @@ Post-v2.12.0
>        * DPDK ring ports (dpdkr) are deprecated and will be removed in next
>          releases.
>        * Add support for DPDK 19.11.
> +     * New "ovs-appctl dpif-netdev/subtable-show" command for userspace
> +       datapath to show subtable miniflow bits.

Will need to rebase to master as new items have been added in NEWS. Also 
I don't think this belongs under the DPDK section, rather the userspace 
header should be added and used i.e.

+   - Userspace datapath:
+     * New "ovs-appctl dpif-netdev/subtable-show" command for userspace
+       datapath to show subtable miniflow bits.

>   
>   v2.12.0 - 03 Sep 2019
>   ---------------------
> diff --git a/lib/dpif-netdev-unixctl.man b/lib/dpif-netdev-unixctl.man
> index 6c54f6f..c443465 100644
> --- a/lib/dpif-netdev-unixctl.man
> +++ b/lib/dpif-netdev-unixctl.man
> @@ -217,3 +217,7 @@ with port names, which this thread polls.
>   .
>   .IP "\fBdpif-netdev/pmd-rxq-rebalance\fR [\fIdp\fR]"
>   Reassigns rxqs to pmds in the datapath \fIdp\fR based on their current usage.
> +.
> +.IP "\fBdpif-netdev/subtable-show\fR [\fB-pmd\fR \fIcore\fR] [\fIdp\fR]"
> +For one or all pmd threads of the datapath \fIdp\fR show the list of miniflow
> +bits for each subtable in the datapath classifier.
> \ No newline at end of file
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 8485b54..a166b9e 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -857,6 +857,8 @@ static inline bool
>   pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd);
>   static void queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd,
>                                     struct dp_netdev_flow *flow);
> +static void pmd_info_show_subtable(struct ds *reply,
> +                                   struct dp_netdev_pmd_thread *pmd);
>   
>   static void
>   emc_cache_init(struct emc_cache *flow_cache)
> @@ -979,6 +981,7 @@ enum pmd_info_type {
>       PMD_INFO_CLEAR_STATS, /* Set the cycles count to 0. */
>       PMD_INFO_SHOW_RXQ,    /* Show poll lists of pmd threads. */
>       PMD_INFO_PERF_SHOW,   /* Show pmd performance details. */
> +    PMD_INFO_SHOW_SUBTABLE, /* Show subtable miniflow bits. */
>   };
>   
>   static void
> @@ -1334,6 +1337,8 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
>               pmd_info_show_stats(&reply, pmd);
>           } else if (type == PMD_INFO_PERF_SHOW) {
>               pmd_info_show_perf(&reply, pmd, (struct pmd_perf_params *)aux);
> +        } else if (type == PMD_INFO_SHOW_SUBTABLE) {
> +            pmd_info_show_subtable(&reply, pmd);
>           }
>       }
>       free(pmd_list);
> @@ -1391,7 +1396,8 @@ dpif_netdev_init(void)
>   {
>       static enum pmd_info_type show_aux = PMD_INFO_SHOW_STATS,
>                                 clear_aux = PMD_INFO_CLEAR_STATS,
> -                              poll_aux = PMD_INFO_SHOW_RXQ;
> +                              poll_aux = PMD_INFO_SHOW_RXQ,
> +                              subtable_aux = PMD_INFO_SHOW_SUBTABLE;
>   
>       unixctl_command_register("dpif-netdev/pmd-stats-show", "[-pmd core] [dp]",
>                                0, 3, dpif_netdev_pmd_info,
> @@ -1416,6 +1422,9 @@ dpif_netdev_init(void)
>                                "[-us usec] [-q qlen]",
>                                0, 10, pmd_perf_log_set_cmd,
>                                NULL);
> +    unixctl_command_register("dpif-netdev/subtable-show", "[-pmd core] [dp]",
> +                             0, 3, dpif_netdev_pmd_info,
> +                             (void *)&subtable_aux);
>       return 0;
>   }
>   
> @@ -8139,3 +8148,45 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
>       }
>       return false;
>   }
> +
> +/* Iterate through all dpcls instances and dump out all subtable
> + * miniflow bits. */
> +static void
> +pmd_info_show_subtable(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
> +    OVS_REQUIRES(dp_netdev_mutex)
> +{
> +     if (pmd->core_id != NON_PMD_CORE_ID) {
> +        struct dp_netdev_port *port = NULL;
> +        struct dp_netdev *dp = pmd->dp;
Alignment for the structs above, should be 4 spaces indented from where 
the if statement is declared, currently it's only 3.
> +
> +         ovs_mutex_lock(&dp->port_mutex);
> +         HMAP_FOR_EACH (port, node, &dp->ports) {
> +             odp_port_t in_port = port->port_no;
> +
 From here onwards the alignment seems to be out by 1 space, should be 4 
spaces in from HMAP_FOR_EACH i.e. inline with where odp_port_t is 
declared. It's worth re-checking that all indention alignments are 4 
spaces for the rest of the function.

> +            struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
> +            if (!cls) {
> +                 continue;
> +            } else {
> +                struct pvector *pvec = &cls->subtables;
> +                ds_put_format(reply, "pmd thread numa_id %d "
> +                              "core_id %u: \n",
> +                              pmd->numa_id, pmd->core_id);
> +                 ds_put_format(reply, "  dpcls port %d: \n",cls->in_port);


Not sure if it was discussed previously, I'm wondering for verbosity 
should the name of the port be included here also, there could be many 
ports and at first glimpse it may not be obvious which port number 
corresponds to which port in a deployment.

This info is available already via port->netdev->name so should be easy 
to add as part of the else block (it wont be needed if theres no cls 
available.

+            } else {
+                struct pvector *pvec = &cls->subtables;
+                const char *name = netdev_get_name(port->netdev);
+
+                ds_put_format(reply, "pmd thread numa_id %d "
+                              "core_id %u: \n",
+                              pmd->numa_id, pmd->core_id);
+                ds_put_format(reply, "  port: %s \n", name);
+                ds_put_format(reply, "  dpcls port %d: \n",cls->in_port);


What do you think?

Thanks
Ian
> +
> +                struct dpcls_subtable *subtable;
> +                PVECTOR_FOR_EACH (subtable, pvec) {
> +                     ds_put_format(reply, "    subtable: \n");
> +                     ds_put_format(reply,
> +                                   "     unit_0: %d (0x%x)\n"
> +                                   "     unit_1: %d (0x%x)\n",
> +                                   count_1bits(subtable->mf_bits_set_unit0),
> +                                   subtable->mf_bits_set_unit0,
> +                                   count_1bits(subtable->mf_bits_set_unit1),
> +                                   subtable->mf_bits_set_unit1);
> +                }
> +            }
> +        }
> +        ovs_mutex_unlock(&dp->port_mutex);
> +    }
> +}
> +
>
Stokes, Ian Jan. 7, 2020, 10:51 a.m. UTC | #3
On 1/2/2020 12:27 PM, Ilya Maximets wrote:
> On 20.12.2019 16:28, Emma Finn wrote:
>> Add an ovs-appctl command to iterate through the dpcls
>> and for each subtable output the miniflow bits for any
>> existing table.
>>
>> $ ovs-appctl dpif-netdev/subtable-show
>> pmd thread numa_id 0
>>    dpcls port 2:
>>      subtable:
>>        unit_0: 2 (0x5)
>>        unit_1: 1 (0x1)
>> pmd thread numa_id 1
>>    dpcls port 3:
>>      subtable:
>>        unit_0: 2 (0x5)
>>        unit_1: 1 (0x1)
>>
>> Signed-off-by: Emma Finn <emma.finn@intel.com>
>>
>> ---
> 
> So, what about my suggestions and thoughts about alternative solutions
> that I posted in reply to RFC?  It's still unclear why we need to disturb
> the running datapath to get this information, especially if we could get
> it "offline" from the flow dump.

Hi Ilya,

apologies I've only spotted this post now.

I guess to my mind there are a few reasons why this is being added as a 
new command and not a separate application to process/parse the flow 
dumps of a datapath.

(i) Ease of implementation, it seems straight forward enough to follow 
the existing commands structure such as dpif-netdev/pmd-rxq-show to 
implement this command. It had the required elements (required data 
structures etc.) so minimum plumbing was required to get access to that 
info and it will be familiar to any other developers who have already 
worked or will work in that area of the code in the future.

I agree this could be done offline without the need to lock the 
datapath, but from my understanding I don't think the intention here is 
to run this command at a frequent or high interval so I would think that 
the lock should not be an issue unless the command is being executed 
continually (again similar to pmd-rxq-show, it would be called when 
needed only).

The concern of adding a new separate application for parsing the dump 
flow as you suggested came down to it being another separate app within 
OVS to maintain as well as the work required to plumb or parse all 
required info.

After posting the RFC we had a number of users already applying the 
patch and using it in their deployments, we spoke about it at the 
conference and didn't hear any objections so I this is why the patch has 
continued with this approach for the 2.13 release.

Best Regards
Ian
Finn, Emma Jan. 8, 2020, 11:38 a.m. UTC | #4
> -----Original Message-----
> From: Stokes, Ian <ian.stokes@intel.com>
> Sent: Tuesday 7 January 2020 10:21
> To: Finn, Emma <emma.finn@intel.com>; u9012063@gmail.com;
> i.maximets@ovn.org; ovs-dev@openvswitch.org
> Subject: Re: [ovs-dev] [v1] dpif-netdev: Add ovs-appctl dpif-
> netdev/subtable-show.
> 
> 
> 
> On 12/20/2019 3:28 PM, Emma Finn wrote:
> > Add an ovs-appctl command to iterate through the dpcls and for each
> > subtable output the miniflow bits for any existing table.
> >
> > $ ovs-appctl dpif-netdev/subtable-show pmd thread numa_id 0
> >    dpcls port 2:
> >      subtable:
> >        unit_0: 2 (0x5)
> >        unit_1: 1 (0x1)
> > pmd thread numa_id 1
> >    dpcls port 3:
> >      subtable:
> >        unit_0: 2 (0x5)
> >        unit_1: 1 (0x1)
> >
> > Signed-off-by: Emma Finn <emma.finn@intel.com>
> >
> 
> Hi Emma,
> 
> thanks for the patch, comments below.
> 
> 
> > ---
> >
> > RFC -> v1
> >
> > * Changed revision from RFC to v1
> > * Reformatted based on comments
> > * Fixed same classifier being dumped multiple times
> >    flagged by Ilya
> > * Fixed print of subtables flagged by William
> > * Updated print count of bits as well as bits themselves
> > ---
> >   NEWS                        |  2 ++
> >   lib/dpif-netdev-unixctl.man |  4 ++++
> >   lib/dpif-netdev.c           | 53
> ++++++++++++++++++++++++++++++++++++++++++++-
> >   3 files changed, 58 insertions(+), 1 deletion(-)
> >
> > diff --git a/NEWS b/NEWS
> > index 2acde3f..2b7b0cc 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -26,6 +26,8 @@ Post-v2.12.0
> >        * DPDK ring ports (dpdkr) are deprecated and will be removed in next
> >          releases.
> >        * Add support for DPDK 19.11.
> > +     * New "ovs-appctl dpif-netdev/subtable-show" command for
> userspace
> > +       datapath to show subtable miniflow bits.
> 
> Will need to rebase to master as new items have been added in NEWS. Also I
> don't think this belongs under the DPDK section, rather the userspace
> header should be added and used i.e.
> 
> +   - Userspace datapath:
> +     * New "ovs-appctl dpif-netdev/subtable-show" command for userspace
> +       datapath to show subtable miniflow bits.
> 
> >
> >   v2.12.0 - 03 Sep 2019
> >   ---------------------
> > diff --git a/lib/dpif-netdev-unixctl.man b/lib/dpif-netdev-unixctl.man
> > index 6c54f6f..c443465 100644
> > --- a/lib/dpif-netdev-unixctl.man
> > +++ b/lib/dpif-netdev-unixctl.man
> > @@ -217,3 +217,7 @@ with port names, which this thread polls.
> >   .
> >   .IP "\fBdpif-netdev/pmd-rxq-rebalance\fR [\fIdp\fR]"
> >   Reassigns rxqs to pmds in the datapath \fIdp\fR based on their current
> usage.
> > +.
> > +.IP "\fBdpif-netdev/subtable-show\fR [\fB-pmd\fR \fIcore\fR] [\fIdp\fR]"
> > +For one or all pmd threads of the datapath \fIdp\fR show the list of
> > +miniflow bits for each subtable in the datapath classifier.
> > \ No newline at end of file
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index
> > 8485b54..a166b9e 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -857,6 +857,8 @@ static inline bool
> >   pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd);
> >   static void queue_netdev_flow_del(struct dp_netdev_pmd_thread
> *pmd,
> >                                     struct dp_netdev_flow *flow);
> > +static void pmd_info_show_subtable(struct ds *reply,
> > +                                   struct dp_netdev_pmd_thread *pmd);
> >
> >   static void
> >   emc_cache_init(struct emc_cache *flow_cache) @@ -979,6 +981,7 @@
> > enum pmd_info_type {
> >       PMD_INFO_CLEAR_STATS, /* Set the cycles count to 0. */
> >       PMD_INFO_SHOW_RXQ,    /* Show poll lists of pmd threads. */
> >       PMD_INFO_PERF_SHOW,   /* Show pmd performance details. */
> > +    PMD_INFO_SHOW_SUBTABLE, /* Show subtable miniflow bits. */
> >   };
> >
> >   static void
> > @@ -1334,6 +1337,8 @@ dpif_netdev_pmd_info(struct unixctl_conn
> *conn, int argc, const char *argv[],
> >               pmd_info_show_stats(&reply, pmd);
> >           } else if (type == PMD_INFO_PERF_SHOW) {
> >               pmd_info_show_perf(&reply, pmd, (struct pmd_perf_params
> > *)aux);
> > +        } else if (type == PMD_INFO_SHOW_SUBTABLE) {
> > +            pmd_info_show_subtable(&reply, pmd);
> >           }
> >       }
> >       free(pmd_list);
> > @@ -1391,7 +1396,8 @@ dpif_netdev_init(void)
> >   {
> >       static enum pmd_info_type show_aux = PMD_INFO_SHOW_STATS,
> >                                 clear_aux = PMD_INFO_CLEAR_STATS,
> > -                              poll_aux = PMD_INFO_SHOW_RXQ;
> > +                              poll_aux = PMD_INFO_SHOW_RXQ,
> > +                              subtable_aux = PMD_INFO_SHOW_SUBTABLE;
> >
> >       unixctl_command_register("dpif-netdev/pmd-stats-show", "[-pmd
> core] [dp]",
> >                                0, 3, dpif_netdev_pmd_info, @@ -1416,6
> > +1422,9 @@ dpif_netdev_init(void)
> >                                "[-us usec] [-q qlen]",
> >                                0, 10, pmd_perf_log_set_cmd,
> >                                NULL);
> > +    unixctl_command_register("dpif-netdev/subtable-show", "[-pmd core]
> [dp]",
> > +                             0, 3, dpif_netdev_pmd_info,
> > +                             (void *)&subtable_aux);
> >       return 0;
> >   }
> >
> > @@ -8139,3 +8148,45 @@ dpcls_lookup(struct dpcls *cls, const struct
> netdev_flow_key *keys[],
> >       }
> >       return false;
> >   }
> > +
> > +/* Iterate through all dpcls instances and dump out all subtable
> > + * miniflow bits. */
> > +static void
> > +pmd_info_show_subtable(struct ds *reply, struct
> dp_netdev_pmd_thread *pmd)
> > +    OVS_REQUIRES(dp_netdev_mutex)
> > +{
> > +     if (pmd->core_id != NON_PMD_CORE_ID) {
> > +        struct dp_netdev_port *port = NULL;
> > +        struct dp_netdev *dp = pmd->dp;
> Alignment for the structs above, should be 4 spaces indented from where
> the if statement is declared, currently it's only 3.
> > +
> > +         ovs_mutex_lock(&dp->port_mutex);
> > +         HMAP_FOR_EACH (port, node, &dp->ports) {
> > +             odp_port_t in_port = port->port_no;
> > +
>  From here onwards the alignment seems to be out by 1 space, should be 4
> spaces in from HMAP_FOR_EACH i.e. inline with where odp_port_t is
> declared. It's worth re-checking that all indention alignments are 4 spaces for
> the rest of the function.
> 
> > +            struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
> > +            if (!cls) {
> > +                 continue;
> > +            } else {
> > +                struct pvector *pvec = &cls->subtables;
> > +                ds_put_format(reply, "pmd thread numa_id %d "
> > +                              "core_id %u: \n",
> > +                              pmd->numa_id, pmd->core_id);
> > +                 ds_put_format(reply, "  dpcls port %d:
> > + \n",cls->in_port);
> 
> 
> Not sure if it was discussed previously, I'm wondering for verbosity should
> the name of the port be included here also, there could be many ports and at
> first glimpse it may not be obvious which port number corresponds to which
> port in a deployment.
> 
> This info is available already via port->netdev->name so should be easy to
> add as part of the else block (it wont be needed if theres no cls available.
> 
> +            } else {
> +                struct pvector *pvec = &cls->subtables;
> +                const char *name = netdev_get_name(port->netdev);
> +
> +                ds_put_format(reply, "pmd thread numa_id %d "
> +                              "core_id %u: \n",
> +                              pmd->numa_id, pmd->core_id);
> +                ds_put_format(reply, "  port: %s \n", name);
> +                ds_put_format(reply, "  dpcls port %d:
> + \n",cls->in_port);
> 
> 
> What do you think?
> 
> Thanks
> Ian

Thanks for the feedback.
I agree for clarity the port name should probably be included.
I will make the above changes and submit a v2.

Thanks, 
Emma
> > +
> > +                struct dpcls_subtable *subtable;
> > +                PVECTOR_FOR_EACH (subtable, pvec) {
> > +                     ds_put_format(reply, "    subtable: \n");
> > +                     ds_put_format(reply,
> > +                                   "     unit_0: %d (0x%x)\n"
> > +                                   "     unit_1: %d (0x%x)\n",
> > +                                   count_1bits(subtable->mf_bits_set_unit0),
> > +                                   subtable->mf_bits_set_unit0,
> > +                                   count_1bits(subtable->mf_bits_set_unit1),
> > +                                   subtable->mf_bits_set_unit1);
> > +                }
> > +            }
> > +        }
> > +        ovs_mutex_unlock(&dp->port_mutex);
> > +    }
> > +}
> > +
> >
Ilya Maximets Jan. 8, 2020, 4:38 p.m. UTC | #5
On 07.01.2020 11:51, Stokes, Ian wrote:
> 
> 
> On 1/2/2020 12:27 PM, Ilya Maximets wrote:
>> On 20.12.2019 16:28, Emma Finn wrote:
>>> Add an ovs-appctl command to iterate through the dpcls
>>> and for each subtable output the miniflow bits for any
>>> existing table.
>>>
>>> $ ovs-appctl dpif-netdev/subtable-show
>>> pmd thread numa_id 0
>>>    dpcls port 2:
>>>      subtable:
>>>        unit_0: 2 (0x5)
>>>        unit_1: 1 (0x1)
>>> pmd thread numa_id 1
>>>    dpcls port 3:
>>>      subtable:
>>>        unit_0: 2 (0x5)
>>>        unit_1: 1 (0x1)
>>>
>>> Signed-off-by: Emma Finn <emma.finn@intel.com>
>>>
>>> ---
>>
>> So, what about my suggestions and thoughts about alternative solutions
>> that I posted in reply to RFC?  It's still unclear why we need to disturb
>> the running datapath to get this information, especially if we could get
>> it "offline" from the flow dump.
> 
> Hi Ilya,
> 
> apologies I've only spotted this post now.
> 
> I guess to my mind there are a few reasons why this is being added as a new command and not a separate application to process/parse the flow dumps of a datapath.
> 
> (i) Ease of implementation, it seems straight forward enough to follow the existing commands structure such as dpif-netdev/pmd-rxq-show to implement this command. It had the required elements (required data structures etc.) so minimum plumbing was required to get access to that info and it will be familiar to any other developers who have already worked or will work in that area of the code in the future.
> 
> I agree this could be done offline without the need to lock the datapath, but from my understanding I don't think the intention here is to run this command at a frequent or high interval so I would think that the lock should not be an issue unless the command is being executed continually (again similar to pmd-rxq-show, it would be called when needed only).
> 
> The concern of adding a new separate application for parsing the dump flow as you suggested came down to it being another separate app within OVS to maintain as well as the work required to plumb or parse all required info.

It's not hard to parse.  Just a few function calls.  Most of the required
functionality exists in 'lib' code.

If you don't like a separate app or script, this information could be printed
in a flow-dump in a some special field named like 'dp-extra-info' or whatever.
Like this:
  recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), dp-extra-info:miniflow_bits(0x5, 0x1), actions:hash(sym_l4(0)),recirc(0x1)

'dp-extra-info' might be printed with verbosity enabled if dpif provided this
information.

I really feel that this information (miniflow bits) should be bonded with flow-dump
somehow just because it belongs there.

> 
> After posting the RFC we had a number of users already applying the patch and using it in their deployments, we spoke about it at the conference and didn't hear any objections so I this is why the patch has continued with this approach for the 2.13 release.

They didn't have any alternatives to use. =)

Best regards, Ilya Maximets.
Stokes, Ian Jan. 8, 2020, 5:37 p.m. UTC | #6
On 1/8/2020 4:38 PM, Ilya Maximets wrote:
> On 07.01.2020 11:51, Stokes, Ian wrote:
>>
>>
>> On 1/2/2020 12:27 PM, Ilya Maximets wrote:
>>> On 20.12.2019 16:28, Emma Finn wrote:
>>>> Add an ovs-appctl command to iterate through the dpcls
>>>> and for each subtable output the miniflow bits for any
>>>> existing table.
>>>>
>>>> $ ovs-appctl dpif-netdev/subtable-show
>>>> pmd thread numa_id 0
>>>>     dpcls port 2:
>>>>       subtable:
>>>>         unit_0: 2 (0x5)
>>>>         unit_1: 1 (0x1)
>>>> pmd thread numa_id 1
>>>>     dpcls port 3:
>>>>       subtable:
>>>>         unit_0: 2 (0x5)
>>>>         unit_1: 1 (0x1)
>>>>
>>>> Signed-off-by: Emma Finn <emma.finn@intel.com>
>>>>
>>>> ---
>>>
>>> So, what about my suggestions and thoughts about alternative solutions
>>> that I posted in reply to RFC?  It's still unclear why we need to disturb
>>> the running datapath to get this information, especially if we could get
>>> it "offline" from the flow dump.
>>
>> Hi Ilya,
>>
>> apologies I've only spotted this post now.
>>
>> I guess to my mind there are a few reasons why this is being added as a new command and not a separate application to process/parse the flow dumps of a datapath.
>>
>> (i) Ease of implementation, it seems straight forward enough to follow the existing commands structure such as dpif-netdev/pmd-rxq-show to implement this command. It had the required elements (required data structures etc.) so minimum plumbing was required to get access to that info and it will be familiar to any other developers who have already worked or will work in that area of the code in the future.
>>
>> I agree this could be done offline without the need to lock the datapath, but from my understanding I don't think the intention here is to run this command at a frequent or high interval so I would think that the lock should not be an issue unless the command is being executed continually (again similar to pmd-rxq-show, it would be called when needed only).
>>
>> The concern of adding a new separate application for parsing the dump flow as you suggested came down to it being another separate app within OVS to maintain as well as the work required to plumb or parse all required info.
> 
> It's not hard to parse.  Just a few function calls.  Most of the required
> functionality exists in 'lib' code.
> 
> If you don't like a separate app or script, this information could be printed

I think it would be better to keep it in OVS rather than a separate 
app/script.

> in a flow-dump in a some special field named like 'dp-extra-info' or whatever.
> Like this:
>    recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), dp-extra-info:miniflow_bits(0x5, 0x1), actions:hash(sym_l4(0)),recirc(0x1)
> 

This is interesting approach, but are we missing info such as the in 
ports, numa nodes etc? Maybe Harry can comment to this as it was his 
idea originally. It seems with either approach you will be missing some 
info depending on how you want to approach the debug usecase.

> 'dp-extra-info' might be printed with verbosity enabled if dpif provided this
> information.

I haven't looked at ovs-ofctl myself, not sure how much work would be 
needed to access this info or if you are back to parsing, again I think 
the idea here is we have this info available in the current approach as 
is why parse and break out these values?

Would it even be acceptable for 2.13 as well? it would be a new v1 and 
we're past the soft freeze.

> 
> I really feel that this information (miniflow bits) should be bonded with flow-dump
> somehow just because it belongs there.

I guess we're looking at this from different points of view, don't shoot 
me but would it make sense to have both approaches? :-D
> 
>>
>> After posting the RFC we had a number of users already applying the patch and using it in their deployments, we spoke about it at the conference and didn't hear any objections so I this is why the patch has continued with this approach for the 2.13 release.
> 
> They didn't have any alternatives to use. =)

Agree, but the choice provided worked well so it may not have been a 
problem :D.

Best Regards
Ian
> 
> Best regards, Ilya Maximets.
>
Ilya Maximets Jan. 8, 2020, 5:56 p.m. UTC | #7
On 08.01.2020 18:37, Stokes, Ian wrote:
> 
> 
> On 1/8/2020 4:38 PM, Ilya Maximets wrote:
>> On 07.01.2020 11:51, Stokes, Ian wrote:
>>>
>>>
>>> On 1/2/2020 12:27 PM, Ilya Maximets wrote:
>>>> On 20.12.2019 16:28, Emma Finn wrote:
>>>>> Add an ovs-appctl command to iterate through the dpcls
>>>>> and for each subtable output the miniflow bits for any
>>>>> existing table.
>>>>>
>>>>> $ ovs-appctl dpif-netdev/subtable-show
>>>>> pmd thread numa_id 0
>>>>>     dpcls port 2:
>>>>>       subtable:
>>>>>         unit_0: 2 (0x5)
>>>>>         unit_1: 1 (0x1)
>>>>> pmd thread numa_id 1
>>>>>     dpcls port 3:
>>>>>       subtable:
>>>>>         unit_0: 2 (0x5)
>>>>>         unit_1: 1 (0x1)
>>>>>
>>>>> Signed-off-by: Emma Finn <emma.finn@intel.com>
>>>>>
>>>>> ---
>>>>
>>>> So, what about my suggestions and thoughts about alternative solutions
>>>> that I posted in reply to RFC?  It's still unclear why we need to disturb
>>>> the running datapath to get this information, especially if we could get
>>>> it "offline" from the flow dump.
>>>
>>> Hi Ilya,
>>>
>>> apologies I've only spotted this post now.
>>>
>>> I guess to my mind there are a few reasons why this is being added as a new command and not a separate application to process/parse the flow dumps of a datapath.
>>>
>>> (i) Ease of implementation, it seems straight forward enough to follow the existing commands structure such as dpif-netdev/pmd-rxq-show to implement this command. It had the required elements (required data structures etc.) so minimum plumbing was required to get access to that info and it will be familiar to any other developers who have already worked or will work in that area of the code in the future.
>>>
>>> I agree this could be done offline without the need to lock the datapath, but from my understanding I don't think the intention here is to run this command at a frequent or high interval so I would think that the lock should not be an issue unless the command is being executed continually (again similar to pmd-rxq-show, it would be called when needed only).
>>>
>>> The concern of adding a new separate application for parsing the dump flow as you suggested came down to it being another separate app within OVS to maintain as well as the work required to plumb or parse all required info.
>>
>> It's not hard to parse.  Just a few function calls.  Most of the required
>> functionality exists in 'lib' code.
>>
>> If you don't like a separate app or script, this information could be printed
> 
> I think it would be better to keep it in OVS rather than a separate app/script.
> 
>> in a flow-dump in a some special field named like 'dp-extra-info' or whatever.
>> Like this:
>>    recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), dp-extra-info:miniflow_bits(0x5, 0x1), actions:hash(sym_l4(0)),recirc(0x1)
>>
> 
> This is interesting approach, but are we missing info such as the in ports, numa nodes etc? Maybe Harry can comment to this as it was his idea originally. It seems with either approach you will be missing some info depending on how you want to approach the debug usecase.

We're not missing core info since flow dumps are printed per-PMD:

flow-dump from the main thread:
<...>
flow-dump from the pmd thread on core X:
<...>

numa could be easily checked by by the core number and I'm not sure
if we really need this information here.

Flow dumps always contains 'in_port' fileld and the port name will
be printed instead of port number if you'll pass --names to the dump command.

> 
>> 'dp-extra-info' might be printed with verbosity enabled if dpif provided this
>> information.
> 
> I haven't looked at ovs-ofctl myself, not sure how much work would be needed to access this info or if you are back to parsing, again I think the idea here is we have this info available in the current approach as is why parse and break out these values?

Not ovs-ofctl - It dumps OF tables, not the datapath flow tables - but
'ovs-dpctl dump-flows' or 'ovs-appctl dpctl/dump-flows'.  dpctl and appctl
are using same code from lib/dpctl.c which is fairly simple.  I'm suggesting
to add this information to dpif_flow.attrs and dpif-netdev will fill this
information while sending dumped flow in dp_netdev_flow_to_dpif_flow().
Should not be hard.
It's not parsing and breaking out, we have a netdev_flow while dumping
and we only need to store a bit more information from it into dpif_flow.

> 
> Would it even be acceptable for 2.13 as well? it would be a new v1 and we're past the soft freeze.

Technically, soft freeze wasn't announced yet. =)
And we also could make it a v3.

> 
>>
>> I really feel that this information (miniflow bits) should be bonded with flow-dump
>> somehow just because it belongs there.
> 
> I guess we're looking at this from different points of view, don't shoot me but would it make sense to have both approaches? :-D

Not sure about that.  I'd like to not have both.

>>
>>>
>>> After posting the RFC we had a number of users already applying the patch and using it in their deployments, we spoke about it at the conference and didn't hear any objections so I this is why the patch has continued with this approach for the 2.13 release.
>>
>> They didn't have any alternatives to use. =)
> 
> Agree, but the choice provided worked well so it may not have been a problem :D.
> 
> Best Regards
> Ian
>>
>> Best regards, Ilya Maximets.
>>
Stokes, Ian Jan. 8, 2020, 6:25 p.m. UTC | #8
On 1/8/2020 5:56 PM, Ilya Maximets wrote:
> On 08.01.2020 18:37, Stokes, Ian wrote:
>>
>>
>> On 1/8/2020 4:38 PM, Ilya Maximets wrote:
>>> On 07.01.2020 11:51, Stokes, Ian wrote:
>>>>
>>>>
>>>> On 1/2/2020 12:27 PM, Ilya Maximets wrote:
>>>>> On 20.12.2019 16:28, Emma Finn wrote:
>>>>>> Add an ovs-appctl command to iterate through the dpcls
>>>>>> and for each subtable output the miniflow bits for any
>>>>>> existing table.
>>>>>>
>>>>>> $ ovs-appctl dpif-netdev/subtable-show
>>>>>> pmd thread numa_id 0
>>>>>>      dpcls port 2:
>>>>>>        subtable:
>>>>>>          unit_0: 2 (0x5)
>>>>>>          unit_1: 1 (0x1)
>>>>>> pmd thread numa_id 1
>>>>>>      dpcls port 3:
>>>>>>        subtable:
>>>>>>          unit_0: 2 (0x5)
>>>>>>          unit_1: 1 (0x1)
>>>>>>
>>>>>> Signed-off-by: Emma Finn <emma.finn@intel.com>
>>>>>>
>>>>>> ---
>>>>>
>>>>> So, what about my suggestions and thoughts about alternative solutions
>>>>> that I posted in reply to RFC?  It's still unclear why we need to disturb
>>>>> the running datapath to get this information, especially if we could get
>>>>> it "offline" from the flow dump.
>>>>
>>>> Hi Ilya,
>>>>
>>>> apologies I've only spotted this post now.
>>>>
>>>> I guess to my mind there are a few reasons why this is being added as a new command and not a separate application to process/parse the flow dumps of a datapath.
>>>>
>>>> (i) Ease of implementation, it seems straight forward enough to follow the existing commands structure such as dpif-netdev/pmd-rxq-show to implement this command. It had the required elements (required data structures etc.) so minimum plumbing was required to get access to that info and it will be familiar to any other developers who have already worked or will work in that area of the code in the future.
>>>>
>>>> I agree this could be done offline without the need to lock the datapath, but from my understanding I don't think the intention here is to run this command at a frequent or high interval so I would think that the lock should not be an issue unless the command is being executed continually (again similar to pmd-rxq-show, it would be called when needed only).
>>>>
>>>> The concern of adding a new separate application for parsing the dump flow as you suggested came down to it being another separate app within OVS to maintain as well as the work required to plumb or parse all required info.
>>>
>>> It's not hard to parse.  Just a few function calls.  Most of the required
>>> functionality exists in 'lib' code.
>>>
>>> If you don't like a separate app or script, this information could be printed
>>
>> I think it would be better to keep it in OVS rather than a separate app/script.
>>
>>> in a flow-dump in a some special field named like 'dp-extra-info' or whatever.
>>> Like this:
>>>     recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), dp-extra-info:miniflow_bits(0x5, 0x1), actions:hash(sym_l4(0)),recirc(0x1)
>>>
>>
>> This is interesting approach, but are we missing info such as the in ports, numa nodes etc? Maybe Harry can comment to this as it was his idea originally. It seems with either approach you will be missing some info depending on how you want to approach the debug usecase.
> 
> We're not missing core info since flow dumps are printed per-PMD:
> 
> flow-dump from the main thread:
> <...>
> flow-dump from the pmd thread on core X:
> <...>
> 
> numa could be easily checked by by the core number and I'm not sure
> if we really need this information here.
> 
> Flow dumps always contains 'in_port' fileld and the port name will
> be printed instead of port number if you'll pass --names to the dump command.
> 
>>
>>> 'dp-extra-info' might be printed with verbosity enabled if dpif provided this
>>> information.
>>
>> I haven't looked at ovs-ofctl myself, not sure how much work would be needed to access this info or if you are back to parsing, again I think the idea here is we have this info available in the current approach as is why parse and break out these values?
> 
> Not ovs-ofctl - It dumps OF tables, not the datapath flow tables - but
> 'ovs-dpctl dump-flows' or 'ovs-appctl dpctl/dump-flows'.  dpctl and appctl
> are using same code from lib/dpctl.c which is fairly simple.  I'm suggesting
> to add this information to dpif_flow.attrs and dpif-netdev will fill this
> information while sending dumped flow in dp_netdev_flow_to_dpif_flow().
> Should not be hard.
> It's not parsing and breaking out, we have a netdev_flow while dumping
> and we only need to store a bit more information from it into dpif_flow.

Now I'm on the same page as you, apologies, previously thought you were 
referring to ofctl. This approach seems more appropriate.

> 
>>
>> Would it even be acceptable for 2.13 as well? it would be a new v1 and we're past the soft freeze.
> 
> Technically, soft freeze wasn't announced yet. =)
> And we also could make it a v3.

+1 :D

> 
>>
>>>
>>> I really feel that this information (miniflow bits) should be bonded with flow-dump
>>> somehow just because it belongs there.
>>
>> I guess we're looking at this from different points of view, don't shoot me but would it make sense to have both approaches? :-D
> 
> Not sure about that.  I'd like to not have both.
> 

No worries, I understand what you meant better now so agree it will be 
fine in one place.

Best Regards
Ian
>>>
>>>>
>>>> After posting the RFC we had a number of users already applying the patch and using it in their deployments, we spoke about it at the conference and didn't hear any objections so I this is why the patch has continued with this approach for the 2.13 release.
>>>
>>> They didn't have any alternatives to use. =)
>>
>> Agree, but the choice provided worked well so it may not have been a problem :D.
>>
>> Best Regards
>> Ian
>>>
>>> Best regards, Ilya Maximets.
>>>

Patch
diff mbox series

diff --git a/NEWS b/NEWS
index 2acde3f..2b7b0cc 100644
--- a/NEWS
+++ b/NEWS
@@ -26,6 +26,8 @@  Post-v2.12.0
      * DPDK ring ports (dpdkr) are deprecated and will be removed in next
        releases.
      * Add support for DPDK 19.11.
+     * New "ovs-appctl dpif-netdev/subtable-show" command for userspace
+       datapath to show subtable miniflow bits.
 
 v2.12.0 - 03 Sep 2019
 ---------------------
diff --git a/lib/dpif-netdev-unixctl.man b/lib/dpif-netdev-unixctl.man
index 6c54f6f..c443465 100644
--- a/lib/dpif-netdev-unixctl.man
+++ b/lib/dpif-netdev-unixctl.man
@@ -217,3 +217,7 @@  with port names, which this thread polls.
 .
 .IP "\fBdpif-netdev/pmd-rxq-rebalance\fR [\fIdp\fR]"
 Reassigns rxqs to pmds in the datapath \fIdp\fR based on their current usage.
+.
+.IP "\fBdpif-netdev/subtable-show\fR [\fB-pmd\fR \fIcore\fR] [\fIdp\fR]"
+For one or all pmd threads of the datapath \fIdp\fR show the list of miniflow
+bits for each subtable in the datapath classifier.
\ No newline at end of file
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 8485b54..a166b9e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -857,6 +857,8 @@  static inline bool
 pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd);
 static void queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd,
                                   struct dp_netdev_flow *flow);
+static void pmd_info_show_subtable(struct ds *reply,
+                                   struct dp_netdev_pmd_thread *pmd);
 
 static void
 emc_cache_init(struct emc_cache *flow_cache)
@@ -979,6 +981,7 @@  enum pmd_info_type {
     PMD_INFO_CLEAR_STATS, /* Set the cycles count to 0. */
     PMD_INFO_SHOW_RXQ,    /* Show poll lists of pmd threads. */
     PMD_INFO_PERF_SHOW,   /* Show pmd performance details. */
+    PMD_INFO_SHOW_SUBTABLE, /* Show subtable miniflow bits. */
 };
 
 static void
@@ -1334,6 +1337,8 @@  dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
             pmd_info_show_stats(&reply, pmd);
         } else if (type == PMD_INFO_PERF_SHOW) {
             pmd_info_show_perf(&reply, pmd, (struct pmd_perf_params *)aux);
+        } else if (type == PMD_INFO_SHOW_SUBTABLE) {
+            pmd_info_show_subtable(&reply, pmd);
         }
     }
     free(pmd_list);
@@ -1391,7 +1396,8 @@  dpif_netdev_init(void)
 {
     static enum pmd_info_type show_aux = PMD_INFO_SHOW_STATS,
                               clear_aux = PMD_INFO_CLEAR_STATS,
-                              poll_aux = PMD_INFO_SHOW_RXQ;
+                              poll_aux = PMD_INFO_SHOW_RXQ,
+                              subtable_aux = PMD_INFO_SHOW_SUBTABLE;
 
     unixctl_command_register("dpif-netdev/pmd-stats-show", "[-pmd core] [dp]",
                              0, 3, dpif_netdev_pmd_info,
@@ -1416,6 +1422,9 @@  dpif_netdev_init(void)
                              "[-us usec] [-q qlen]",
                              0, 10, pmd_perf_log_set_cmd,
                              NULL);
+    unixctl_command_register("dpif-netdev/subtable-show", "[-pmd core] [dp]",
+                             0, 3, dpif_netdev_pmd_info,
+                             (void *)&subtable_aux);
     return 0;
 }
 
@@ -8139,3 +8148,45 @@  dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
     }
     return false;
 }
+
+/* Iterate through all dpcls instances and dump out all subtable
+ * miniflow bits. */
+static void
+pmd_info_show_subtable(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
+    OVS_REQUIRES(dp_netdev_mutex)
+{
+     if (pmd->core_id != NON_PMD_CORE_ID) {
+        struct dp_netdev_port *port = NULL;
+        struct dp_netdev *dp = pmd->dp;
+
+         ovs_mutex_lock(&dp->port_mutex);
+         HMAP_FOR_EACH (port, node, &dp->ports) {
+             odp_port_t in_port = port->port_no;
+
+            struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
+            if (!cls) {
+                 continue;
+            } else {
+                struct pvector *pvec = &cls->subtables;
+                ds_put_format(reply, "pmd thread numa_id %d "
+                              "core_id %u: \n",
+                              pmd->numa_id, pmd->core_id);
+                 ds_put_format(reply, "  dpcls port %d: \n",cls->in_port);
+
+                struct dpcls_subtable *subtable;
+                PVECTOR_FOR_EACH (subtable, pvec) {
+                     ds_put_format(reply, "    subtable: \n");
+                     ds_put_format(reply,
+                                   "     unit_0: %d (0x%x)\n"
+                                   "     unit_1: %d (0x%x)\n",
+                                   count_1bits(subtable->mf_bits_set_unit0),
+                                   subtable->mf_bits_set_unit0,
+                                   count_1bits(subtable->mf_bits_set_unit1),
+                                   subtable->mf_bits_set_unit1);
+                }
+            }
+        }
+        ovs_mutex_unlock(&dp->port_mutex);
+    }
+}
+