diff mbox series

[ovs-dev,v12,08/16] dpif-netdev: Add a partial HWOL PMD statistic.

Message ID 20210517140434.59555-9-cian.ferriter@intel.com
State Changes Requested
Headers show
Series DPIF Framework + Optimizations | expand

Commit Message

Ferriter, Cian May 17, 2021, 2:04 p.m. UTC
It is possible for packets traversing the userspace datapath to match a
flow before hitting on EMC by using a mark ID provided by a NIC. Add a
PMD statistic for this hit.

Signed-off-by: Cian Ferriter <cian.ferriter@intel.com>
---
 NEWS                     | 2 ++
 lib/dpif-netdev-avx512.c | 3 +++
 lib/dpif-netdev-perf.c   | 3 +++
 lib/dpif-netdev-perf.h   | 1 +
 lib/dpif-netdev.c        | 9 +++++++--
 tests/pmd.at             | 6 ++++--
 6 files changed, 20 insertions(+), 4 deletions(-)

Comments

Stokes, Ian June 2, 2021, 6:09 p.m. UTC | #1
> It is possible for packets traversing the userspace datapath to match a
> flow before hitting on EMC by using a mark ID provided by a NIC. Add a
> PMD statistic for this hit.
> 
> Signed-off-by: Cian Ferriter <cian.ferriter@intel.com>

Thanks for the Patch Cian, comments inline.

So I have a general query here. In the bigger  picture why is the patch required for the DPIF work?
Is it purely to track packet hits that have touched the software datapath (unlike a full offload usecase)?

I also think it would be good to get some input from other involved on the HWOL work to date as they may have opinions on this so I've included some folks from Nvidia and Broadcom in case they have input.
The implementation seems straight forward enough but good to gauge in case this goes against ongoing HWOL work.

Thanks
Ian
> ---
>  NEWS                     | 2 ++
>  lib/dpif-netdev-avx512.c | 3 +++
>  lib/dpif-netdev-perf.c   | 3 +++
>  lib/dpif-netdev-perf.h   | 1 +
>  lib/dpif-netdev.c        | 9 +++++++--
>  tests/pmd.at             | 6 ++++--
>  6 files changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/NEWS b/NEWS
> index 402ce5969..3eab5f4fa 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -5,6 +5,8 @@ Post-v2.15.0
>     - Userspace datapath:
>       * Auto load balancing of PMDs now partially supports cross-NUMA polling
>         cases, e.g if all PMD threads are running on the same NUMA node.
> +     * Add a partial HWOL PMD statistic counting hits similar to existing
> +     * EMC/SMC/DPCLS stats.

No need for the * on the second line above, only the first line.

>     - ovs-ctl:
>       * New option '--no-record-hostname' to disable hostname configuration
>         in ovsdb on startup.
> diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
> index e255c9d17..5c6f54799 100644
> --- a/lib/dpif-netdev-avx512.c
> +++ b/lib/dpif-netdev-avx512.c
> @@ -114,6 +114,7 @@ dp_netdev_input_outer_avx512(struct
> dp_netdev_pmd_thread *pmd,
>      const uint32_t emc_enabled = pmd->ctx.emc_insert_min != 0;
>      const uint32_t smc_enabled = pmd->ctx.smc_enable_db;
> 
> +    uint32_t phwol_hits = 0;

Minor nit, I'd expect new variables to be added after the existing variables below.

>      uint32_t emc_hits = 0;
>      uint32_t smc_hits = 0;
> 
> @@ -147,6 +148,7 @@ dp_netdev_input_outer_avx512(struct
> dp_netdev_pmd_thread *pmd,
>                  pkt_meta[i].tcp_flags = parse_tcp_flags(packet);
> 
>                  pkt_meta[i].bytes = dp_packet_size(packet);
> +                phwol_hits++;
>                  hwol_emc_smc_hitmask |= (1 << i);
>                  continue;
>              }
> @@ -235,6 +237,7 @@ dp_netdev_input_outer_avx512(struct
> dp_netdev_pmd_thread *pmd,
> 
>      /* At this point we don't return error anymore, so commit stats here. */
>      pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_RECV,
> batch_size);
> +    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_PHWOL_HIT,
> phwol_hits);
>      pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT,
> emc_hits);
>      pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT,
> smc_hits);
>      pmd_perf_update_counter(&pmd->perf_stats,
> PMD_STAT_MASKED_HIT,
> diff --git a/lib/dpif-netdev-perf.c b/lib/dpif-netdev-perf.c
> index 9560e7c3c..7103a2d4d 100644
> --- a/lib/dpif-netdev-perf.c
> +++ b/lib/dpif-netdev-perf.c
> @@ -246,6 +246,7 @@ pmd_perf_format_overall_stats(struct ds *str, struct
> pmd_perf_stats *s,
>          ds_put_format(str,
>              "  Rx packets:        %12"PRIu64"  (%.0f Kpps, %.0f cycles/pkt)\n"
>              "  Datapath passes:   %12"PRIu64"  (%.2f passes/pkt)\n"
> +            "  - PHWOL hits:      %12"PRIu64"  (%5.1f %%)\n"
>              "  - EMC hits:        %12"PRIu64"  (%5.1f %%)\n"
>              "  - SMC hits:        %12"PRIu64"  (%5.1f %%)\n"
>              "  - Megaflow hits:   %12"PRIu64"  (%5.1f %%, %.2f "
> @@ -255,6 +256,8 @@ pmd_perf_format_overall_stats(struct ds *str, struct
> pmd_perf_stats *s,
>              rx_packets, (rx_packets / duration) / 1000,
>              1.0 * stats[PMD_CYCLES_ITER_BUSY] / rx_packets,
>              passes, rx_packets ? 1.0 * passes / rx_packets : 0,
> +            stats[PMD_STAT_PHWOL_HIT],
> +            100.0 * stats[PMD_STAT_PHWOL_HIT] / passes,
>              stats[PMD_STAT_EXACT_HIT],
>              100.0 * stats[PMD_STAT_EXACT_HIT] / passes,
>              stats[PMD_STAT_SMC_HIT],
> diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h
> index 72645b6b3..8b1a52387 100644
> --- a/lib/dpif-netdev-perf.h
> +++ b/lib/dpif-netdev-perf.h
> @@ -56,6 +56,7 @@ extern "C" {
>  /* Set of counter types maintained in pmd_perf_stats. */
> 
>  enum pmd_stat_type {
> +    PMD_STAT_PHWOL_HIT,     /* Packets that had a partial HWOL hit
> (phwol). */
>      PMD_STAT_EXACT_HIT,     /* Packets that had an exact match (emc). */
>      PMD_STAT_SMC_HIT,       /* Packets that had a sig match hit (SMC). */
>      PMD_STAT_MASKED_HIT,    /* Packets that matched in the flow table. */
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index b35ccbe3b..f161ae7f5 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -660,6 +660,7 @@ pmd_info_show_stats(struct ds *reply,
>                    "  packets received: %"PRIu64"\n"
>                    "  packet recirculations: %"PRIu64"\n"
>                    "  avg. datapath passes per packet: %.02f\n"
> +                  "  phwol hits: %"PRIu64"\n"
>                    "  emc hits: %"PRIu64"\n"
>                    "  smc hits: %"PRIu64"\n"
>                    "  megaflow hits: %"PRIu64"\n"
> @@ -668,7 +669,8 @@ pmd_info_show_stats(struct ds *reply,
>                    "  miss with failed upcall: %"PRIu64"\n"
>                    "  avg. packets per output batch: %.02f\n",
>                    total_packets, stats[PMD_STAT_RECIRC],
> -                  passes_per_pkt, stats[PMD_STAT_EXACT_HIT],
> +                  passes_per_pkt, stats[PMD_STAT_PHWOL_HIT],
> +                  stats[PMD_STAT_EXACT_HIT],
>                    stats[PMD_STAT_SMC_HIT],
>                    stats[PMD_STAT_MASKED_HIT], lookups_per_hit,
>                    stats[PMD_STAT_MISS], stats[PMD_STAT_LOST],
> @@ -1685,6 +1687,7 @@ dpif_netdev_get_stats(const struct dpif *dpif,
> struct dpif_dp_stats *stats)
>      CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>          stats->n_flows += cmap_count(&pmd->flow_table);
>          pmd_perf_read_counters(&pmd->perf_stats, pmd_stats);
> +        stats->n_hit += pmd_stats[PMD_STAT_PHWOL_HIT];
>          stats->n_hit += pmd_stats[PMD_STAT_EXACT_HIT];
>          stats->n_hit += pmd_stats[PMD_STAT_SMC_HIT];
>          stats->n_hit += pmd_stats[PMD_STAT_MASKED_HIT];
> @@ -6697,7 +6700,7 @@ dfc_processing(struct dp_netdev_pmd_thread
> *pmd,
>                 bool md_is_valid, odp_port_t port_no)
>  {
>      struct netdev_flow_key *key = &keys[0];
> -    size_t n_missed = 0, n_emc_hit = 0;
> +    size_t n_missed = 0, n_emc_hit = 0, n_phwol_hit = 0;
>      struct dfc_cache *cache = &pmd->flow_cache;
>      struct dp_packet *packet;
>      const size_t cnt = dp_packet_batch_size(packets_);
> @@ -6739,6 +6742,7 @@ dfc_processing(struct dp_netdev_pmd_thread
> *pmd,
>              flow = mark_to_flow_find(pmd, mark);
>              if (OVS_LIKELY(flow)) {
>                  tcp_flags = parse_tcp_flags(packet);
> +                n_phwol_hit++;
>                  if (OVS_LIKELY(batch_enable)) {
>                      dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
>                                              n_batches);
> @@ -6801,6 +6805,7 @@ dfc_processing(struct dp_netdev_pmd_thread
> *pmd,
>      /* Count of packets which are not flow batched. */
>      *n_flows = map_cnt;
> 
> +    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_PHWOL_HIT,
> n_phwol_hit);
>      pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT,
> n_emc_hit);
> 
>      if (!smc_enable_db) {
> diff --git a/tests/pmd.at b/tests/pmd.at
> index cc5371d5a..34a59d502 100644
> --- a/tests/pmd.at
> +++ b/tests/pmd.at
> @@ -202,11 +202,12 @@ dummy@ovs-dummy: hit:0 missed:0
>      p0 7/1: (dummy-pmd: configured_rx_queues=4,
> configured_tx_queues=<cleared>, requested_rx_queues=4,
> requested_tx_queues=<cleared>)
>  ])
> 
> -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed
> SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
> +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed
> SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
>  pmd thread numa_id <cleared> core_id <cleared>:
>    packets received: 0
>    packet recirculations: 0
>    avg. datapath passes per packet: 0.00
> +  phwol hits: 0
>    emc hits: 0
>    smc hits: 0
>    megaflow hits: 0
> @@ -233,11 +234,12 @@ AT_CHECK([cat ovs-vswitchd.log |
> filter_flow_install | strip_xout], [0], [dnl
> 
> recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:77,dst
> =50:54:00:00:01:78),eth_type(0x0800),ipv4(frag=no), actions: <del>
>  ])
> 
> -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed
> SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
> +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed
> SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
>  pmd thread numa_id <cleared> core_id <cleared>:
>    packets received: 20
>    packet recirculations: 0
>    avg. datapath passes per packet: 1.00
> +  phwol hits: 0
>    emc hits: 19
>    smc hits: 0
>    megaflow hits: 0
> --
> 2.31.1
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Ferriter, Cian June 10, 2021, 2:46 p.m. UTC | #2
Hi Ian,

Thanks for the review. My responses are inline.

Also, one thing I spotted while looking at your later reviews was that we should update the "lib/dpif-netdev-unixctl.man" file since it prints an example of the PMD statistics.
I'll fix that in the next version.

> -----Original Message-----
> From: Stokes, Ian <ian.stokes@intel.com>
> Sent: Wednesday 2 June 2021 19:10
> To: Ferriter, Cian <cian.ferriter@intel.com>; ovs-dev@openvswitch.org; Van Haaren, Harry <harry.van.haaren@intel.com>; Gaetan
> Rivet <gaetanr@nvidia.com>; Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
> Cc: i.maximets@ovn.org
> Subject: RE: [ovs-dev] [v12 08/16] dpif-netdev: Add a partial HWOL PMD statistic.
> 
> > It is possible for packets traversing the userspace datapath to match a
> > flow before hitting on EMC by using a mark ID provided by a NIC. Add a
> > PMD statistic for this hit.
> >
> > Signed-off-by: Cian Ferriter <cian.ferriter@intel.com>
> 
> Thanks for the Patch Cian, comments inline.
> 
> So I have a general query here. In the bigger  picture why is the patch required for the DPIF work?
> Is it purely to track packet hits that have touched the software datapath (unlike a full offload usecase)?
> 

This patch is not strictly required for the DPIF work, but we found it much easier to test the functionality of HWOL in the DPIF with hit stats. As we added EMC, SMC and DPCLS capabilities to the AVX512 DPIF, we would test and could use the PMD stats to see if these lookup levels were being hit as we expected. When we tested HWOL, it just seemed like a missing feature, and adding it helped for our testing. We think it might be useful for others. 

> I also think it would be good to get some input from other involved on the HWOL work to date as they may have opinions on this so
> I've included some folks from Nvidia and Broadcom in case they have input.
> The implementation seems straight forward enough but good to gauge in case this goes against ongoing HWOL work.
> 

Good idea. Input from others involved in HWOL is much appreciated.

> Thanks
> Ian
> > ---
> >  NEWS                     | 2 ++
> >  lib/dpif-netdev-avx512.c | 3 +++
> >  lib/dpif-netdev-perf.c   | 3 +++
> >  lib/dpif-netdev-perf.h   | 1 +
> >  lib/dpif-netdev.c        | 9 +++++++--
> >  tests/pmd.at             | 6 ++++--
> >  6 files changed, 20 insertions(+), 4 deletions(-)
> >
> > diff --git a/NEWS b/NEWS
> > index 402ce5969..3eab5f4fa 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -5,6 +5,8 @@ Post-v2.15.0
> >     - Userspace datapath:
> >       * Auto load balancing of PMDs now partially supports cross-NUMA polling
> >         cases, e.g if all PMD threads are running on the same NUMA node.
> > +     * Add a partial HWOL PMD statistic counting hits similar to existing
> > +     * EMC/SMC/DPCLS stats.
> 
> No need for the * on the second line above, only the first line.
> 

Good catch, this is a typo. I'll fix in the next version.

> >     - ovs-ctl:
> >       * New option '--no-record-hostname' to disable hostname configuration
> >         in ovsdb on startup.
> > diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
> > index e255c9d17..5c6f54799 100644
> > --- a/lib/dpif-netdev-avx512.c
> > +++ b/lib/dpif-netdev-avx512.c
> > @@ -114,6 +114,7 @@ dp_netdev_input_outer_avx512(struct
> > dp_netdev_pmd_thread *pmd,
> >      const uint32_t emc_enabled = pmd->ctx.emc_insert_min != 0;
> >      const uint32_t smc_enabled = pmd->ctx.smc_enable_db;
> >
> > +    uint32_t phwol_hits = 0;
> 
> Minor nit, I'd expect new variables to be added after the existing variables below.
> 

I'll add "phwol_hits" below the other *_hits variables. I was thinking from the OVS datapath pipeline POV (HWOL->EMC->SMC).

I'll fix this in the next version.

> >      uint32_t emc_hits = 0;
> >      uint32_t smc_hits = 0;
> >
> > @@ -147,6 +148,7 @@ dp_netdev_input_outer_avx512(struct
> > dp_netdev_pmd_thread *pmd,
> >                  pkt_meta[i].tcp_flags = parse_tcp_flags(packet);
> >
> >                  pkt_meta[i].bytes = dp_packet_size(packet);
> > +                phwol_hits++;
> >                  hwol_emc_smc_hitmask |= (1 << i);
> >                  continue;
> >              }
> > @@ -235,6 +237,7 @@ dp_netdev_input_outer_avx512(struct
> > dp_netdev_pmd_thread *pmd,
> >
> >      /* At this point we don't return error anymore, so commit stats here. */
> >      pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_RECV,
> > batch_size);
> > +    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_PHWOL_HIT,
> > phwol_hits);
> >      pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT,
> > emc_hits);
> >      pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT,
> > smc_hits);
> >      pmd_perf_update_counter(&pmd->perf_stats,
> > PMD_STAT_MASKED_HIT,
> > diff --git a/lib/dpif-netdev-perf.c b/lib/dpif-netdev-perf.c
> > index 9560e7c3c..7103a2d4d 100644
> > --- a/lib/dpif-netdev-perf.c
> > +++ b/lib/dpif-netdev-perf.c
> > @@ -246,6 +246,7 @@ pmd_perf_format_overall_stats(struct ds *str, struct
> > pmd_perf_stats *s,
> >          ds_put_format(str,
> >              "  Rx packets:        %12"PRIu64"  (%.0f Kpps, %.0f cycles/pkt)\n"
> >              "  Datapath passes:   %12"PRIu64"  (%.2f passes/pkt)\n"
> > +            "  - PHWOL hits:      %12"PRIu64"  (%5.1f %%)\n"
> >              "  - EMC hits:        %12"PRIu64"  (%5.1f %%)\n"
> >              "  - SMC hits:        %12"PRIu64"  (%5.1f %%)\n"
> >              "  - Megaflow hits:   %12"PRIu64"  (%5.1f %%, %.2f "
> > @@ -255,6 +256,8 @@ pmd_perf_format_overall_stats(struct ds *str, struct
> > pmd_perf_stats *s,
> >              rx_packets, (rx_packets / duration) / 1000,
> >              1.0 * stats[PMD_CYCLES_ITER_BUSY] / rx_packets,
> >              passes, rx_packets ? 1.0 * passes / rx_packets : 0,
> > +            stats[PMD_STAT_PHWOL_HIT],
> > +            100.0 * stats[PMD_STAT_PHWOL_HIT] / passes,
> >              stats[PMD_STAT_EXACT_HIT],
> >              100.0 * stats[PMD_STAT_EXACT_HIT] / passes,
> >              stats[PMD_STAT_SMC_HIT],
> > diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h
> > index 72645b6b3..8b1a52387 100644
> > --- a/lib/dpif-netdev-perf.h
> > +++ b/lib/dpif-netdev-perf.h
> > @@ -56,6 +56,7 @@ extern "C" {
> >  /* Set of counter types maintained in pmd_perf_stats. */
> >
> >  enum pmd_stat_type {
> > +    PMD_STAT_PHWOL_HIT,     /* Packets that had a partial HWOL hit
> > (phwol). */
> >      PMD_STAT_EXACT_HIT,     /* Packets that had an exact match (emc). */
> >      PMD_STAT_SMC_HIT,       /* Packets that had a sig match hit (SMC). */
> >      PMD_STAT_MASKED_HIT,    /* Packets that matched in the flow table. */
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> > index b35ccbe3b..f161ae7f5 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -660,6 +660,7 @@ pmd_info_show_stats(struct ds *reply,
> >                    "  packets received: %"PRIu64"\n"
> >                    "  packet recirculations: %"PRIu64"\n"
> >                    "  avg. datapath passes per packet: %.02f\n"
> > +                  "  phwol hits: %"PRIu64"\n"
> >                    "  emc hits: %"PRIu64"\n"
> >                    "  smc hits: %"PRIu64"\n"
> >                    "  megaflow hits: %"PRIu64"\n"
> > @@ -668,7 +669,8 @@ pmd_info_show_stats(struct ds *reply,
> >                    "  miss with failed upcall: %"PRIu64"\n"
> >                    "  avg. packets per output batch: %.02f\n",
> >                    total_packets, stats[PMD_STAT_RECIRC],
> > -                  passes_per_pkt, stats[PMD_STAT_EXACT_HIT],
> > +                  passes_per_pkt, stats[PMD_STAT_PHWOL_HIT],
> > +                  stats[PMD_STAT_EXACT_HIT],
> >                    stats[PMD_STAT_SMC_HIT],
> >                    stats[PMD_STAT_MASKED_HIT], lookups_per_hit,
> >                    stats[PMD_STAT_MISS], stats[PMD_STAT_LOST],
> > @@ -1685,6 +1687,7 @@ dpif_netdev_get_stats(const struct dpif *dpif,
> > struct dpif_dp_stats *stats)
> >      CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
> >          stats->n_flows += cmap_count(&pmd->flow_table);
> >          pmd_perf_read_counters(&pmd->perf_stats, pmd_stats);
> > +        stats->n_hit += pmd_stats[PMD_STAT_PHWOL_HIT];
> >          stats->n_hit += pmd_stats[PMD_STAT_EXACT_HIT];
> >          stats->n_hit += pmd_stats[PMD_STAT_SMC_HIT];
> >          stats->n_hit += pmd_stats[PMD_STAT_MASKED_HIT];
> > @@ -6697,7 +6700,7 @@ dfc_processing(struct dp_netdev_pmd_thread
> > *pmd,
> >                 bool md_is_valid, odp_port_t port_no)
> >  {
> >      struct netdev_flow_key *key = &keys[0];
> > -    size_t n_missed = 0, n_emc_hit = 0;
> > +    size_t n_missed = 0, n_emc_hit = 0, n_phwol_hit = 0;
> >      struct dfc_cache *cache = &pmd->flow_cache;
> >      struct dp_packet *packet;
> >      const size_t cnt = dp_packet_batch_size(packets_);
> > @@ -6739,6 +6742,7 @@ dfc_processing(struct dp_netdev_pmd_thread
> > *pmd,
> >              flow = mark_to_flow_find(pmd, mark);
> >              if (OVS_LIKELY(flow)) {
> >                  tcp_flags = parse_tcp_flags(packet);
> > +                n_phwol_hit++;
> >                  if (OVS_LIKELY(batch_enable)) {
> >                      dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
> >                                              n_batches);
> > @@ -6801,6 +6805,7 @@ dfc_processing(struct dp_netdev_pmd_thread
> > *pmd,
> >      /* Count of packets which are not flow batched. */
> >      *n_flows = map_cnt;
> >
> > +    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_PHWOL_HIT,
> > n_phwol_hit);
> >      pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT,
> > n_emc_hit);
> >
> >      if (!smc_enable_db) {
> > diff --git a/tests/pmd.at b/tests/pmd.at
> > index cc5371d5a..34a59d502 100644
> > --- a/tests/pmd.at
> > +++ b/tests/pmd.at
> > @@ -202,11 +202,12 @@ dummy@ovs-dummy: hit:0 missed:0
> >      p0 7/1: (dummy-pmd: configured_rx_queues=4,
> > configured_tx_queues=<cleared>, requested_rx_queues=4,
> > requested_tx_queues=<cleared>)
> >  ])
> >
> > -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed
> > SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
> > +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed
> > SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
> >  pmd thread numa_id <cleared> core_id <cleared>:
> >    packets received: 0
> >    packet recirculations: 0
> >    avg. datapath passes per packet: 0.00
> > +  phwol hits: 0
> >    emc hits: 0
> >    smc hits: 0
> >    megaflow hits: 0
> > @@ -233,11 +234,12 @@ AT_CHECK([cat ovs-vswitchd.log |
> > filter_flow_install | strip_xout], [0], [dnl
> >
> > recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:77,dst
> > =50:54:00:00:01:78),eth_type(0x0800),ipv4(frag=no), actions: <del>
> >  ])
> >
> > -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed
> > SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
> > +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed
> > SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
> >  pmd thread numa_id <cleared> core_id <cleared>:
> >    packets received: 20
> >    packet recirculations: 0
> >    avg. datapath passes per packet: 1.00
> > +  phwol hits: 0
> >    emc hits: 19
> >    smc hits: 0
> >    megaflow hits: 0
> > --
> > 2.31.1
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Stokes, Ian June 16, 2021, 11:34 a.m. UTC | #3
> Hi Ian,
> 
> Thanks for the review. My responses are inline.
> 
> Also, one thing I spotted while looking at your later reviews was that we should
> update the "lib/dpif-netdev-unixctl.man" file since it prints an example of the
> PMD statistics.
> I'll fix that in the next version.
> 
> > -----Original Message-----
> > From: Stokes, Ian <ian.stokes@intel.com>
> > Sent: Wednesday 2 June 2021 19:10
> > To: Ferriter, Cian <cian.ferriter@intel.com>; ovs-dev@openvswitch.org; Van
> Haaren, Harry <harry.van.haaren@intel.com>; Gaetan
> > Rivet <gaetanr@nvidia.com>; Sriharsha Basavapatna
> <sriharsha.basavapatna@broadcom.com>
> > Cc: i.maximets@ovn.org
> > Subject: RE: [ovs-dev] [v12 08/16] dpif-netdev: Add a partial HWOL PMD
> statistic.
> >
> > > It is possible for packets traversing the userspace datapath to match a
> > > flow before hitting on EMC by using a mark ID provided by a NIC. Add a
> > > PMD statistic for this hit.
> > >
> > > Signed-off-by: Cian Ferriter <cian.ferriter@intel.com>
> >
> > Thanks for the Patch Cian, comments inline.
> >
> > So I have a general query here. In the bigger  picture why is the patch required
> for the DPIF work?
> > Is it purely to track packet hits that have touched the software datapath
> (unlike a full offload usecase)?
> >
> 
> This patch is not strictly required for the DPIF work, but we found it much easier
> to test the functionality of HWOL in the DPIF with hit stats. As we added EMC,
> SMC and DPCLS capabilities to the AVX512 DPIF, we would test and could use
> the PMD stats to see if these lookup levels were being hit as we expected. When
> we tested HWOL, it just seemed like a missing feature, and adding it helped for
> our testing. We think it might be useful for others.
> 

Understood, if it helps in testing then I think it should be a welcome feature, would like to hear from some of the other HWOL folks if there are any objections?

> > I also think it would be good to get some input from other involved on the
> HWOL work to date as they may have opinions on this so
> > I've included some folks from Nvidia and Broadcom in case they have input.
> > The implementation seems straight forward enough but good to gauge in case
> this goes against ongoing HWOL work.
> >
> 
> Good idea. Input from others involved in HWOL is much appreciated.
> 
> > Thanks
> > Ian
> > > ---
> > >  NEWS                     | 2 ++
> > >  lib/dpif-netdev-avx512.c | 3 +++
> > >  lib/dpif-netdev-perf.c   | 3 +++
> > >  lib/dpif-netdev-perf.h   | 1 +
> > >  lib/dpif-netdev.c        | 9 +++++++--
> > >  tests/pmd.at             | 6 ++++--
> > >  6 files changed, 20 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/NEWS b/NEWS
> > > index 402ce5969..3eab5f4fa 100644
> > > --- a/NEWS
> > > +++ b/NEWS
> > > @@ -5,6 +5,8 @@ Post-v2.15.0
> > >     - Userspace datapath:
> > >       * Auto load balancing of PMDs now partially supports cross-NUMA
> polling
> > >         cases, e.g if all PMD threads are running on the same NUMA node.
> > > +     * Add a partial HWOL PMD statistic counting hits similar to existing
> > > +     * EMC/SMC/DPCLS stats.
> >
> > No need for the * on the second line above, only the first line.
> >
> 
> Good catch, this is a typo. I'll fix in the next version.
> 
> > >     - ovs-ctl:
> > >       * New option '--no-record-hostname' to disable hostname configuration
> > >         in ovsdb on startup.
> > > diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
> > > index e255c9d17..5c6f54799 100644
> > > --- a/lib/dpif-netdev-avx512.c
> > > +++ b/lib/dpif-netdev-avx512.c
> > > @@ -114,6 +114,7 @@ dp_netdev_input_outer_avx512(struct
> > > dp_netdev_pmd_thread *pmd,
> > >      const uint32_t emc_enabled = pmd->ctx.emc_insert_min != 0;
> > >      const uint32_t smc_enabled = pmd->ctx.smc_enable_db;
> > >
> > > +    uint32_t phwol_hits = 0;
> >
> > Minor nit, I'd expect new variables to be added after the existing variables
> below.
> >
> 
> I'll add "phwol_hits" below the other *_hits variables. I was thinking from the
> OVS datapath pipeline POV (HWOL->EMC->SMC).
> 
> I'll fix this in the next version.
> 

Thanks

> > >      uint32_t emc_hits = 0;
> > >      uint32_t smc_hits = 0;
> > >
> > > @@ -147,6 +148,7 @@ dp_netdev_input_outer_avx512(struct
> > > dp_netdev_pmd_thread *pmd,
> > >                  pkt_meta[i].tcp_flags = parse_tcp_flags(packet);
> > >
> > >                  pkt_meta[i].bytes = dp_packet_size(packet);
> > > +                phwol_hits++;
> > >                  hwol_emc_smc_hitmask |= (1 << i);
> > >                  continue;
> > >              }
> > > @@ -235,6 +237,7 @@ dp_netdev_input_outer_avx512(struct
> > > dp_netdev_pmd_thread *pmd,
> > >
> > >      /* At this point we don't return error anymore, so commit stats here. */
> > >      pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_RECV,
> > > batch_size);
> > > +    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_PHWOL_HIT,
> > > phwol_hits);
> > >      pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT,
> > > emc_hits);
> > >      pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT,
> > > smc_hits);
> > >      pmd_perf_update_counter(&pmd->perf_stats,
> > > PMD_STAT_MASKED_HIT,
> > > diff --git a/lib/dpif-netdev-perf.c b/lib/dpif-netdev-perf.c
> > > index 9560e7c3c..7103a2d4d 100644
> > > --- a/lib/dpif-netdev-perf.c
> > > +++ b/lib/dpif-netdev-perf.c
> > > @@ -246,6 +246,7 @@ pmd_perf_format_overall_stats(struct ds *str, struct
> > > pmd_perf_stats *s,
> > >          ds_put_format(str,
> > >              "  Rx packets:        %12"PRIu64"  (%.0f Kpps, %.0f cycles/pkt)\n"
> > >              "  Datapath passes:   %12"PRIu64"  (%.2f passes/pkt)\n"
> > > +            "  - PHWOL hits:      %12"PRIu64"  (%5.1f %%)\n"
> > >              "  - EMC hits:        %12"PRIu64"  (%5.1f %%)\n"
> > >              "  - SMC hits:        %12"PRIu64"  (%5.1f %%)\n"
> > >              "  - Megaflow hits:   %12"PRIu64"  (%5.1f %%, %.2f "
> > > @@ -255,6 +256,8 @@ pmd_perf_format_overall_stats(struct ds *str, struct
> > > pmd_perf_stats *s,
> > >              rx_packets, (rx_packets / duration) / 1000,
> > >              1.0 * stats[PMD_CYCLES_ITER_BUSY] / rx_packets,
> > >              passes, rx_packets ? 1.0 * passes / rx_packets : 0,
> > > +            stats[PMD_STAT_PHWOL_HIT],
> > > +            100.0 * stats[PMD_STAT_PHWOL_HIT] / passes,
> > >              stats[PMD_STAT_EXACT_HIT],
> > >              100.0 * stats[PMD_STAT_EXACT_HIT] / passes,
> > >              stats[PMD_STAT_SMC_HIT],
> > > diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h
> > > index 72645b6b3..8b1a52387 100644
> > > --- a/lib/dpif-netdev-perf.h
> > > +++ b/lib/dpif-netdev-perf.h
> > > @@ -56,6 +56,7 @@ extern "C" {
> > >  /* Set of counter types maintained in pmd_perf_stats. */
> > >
> > >  enum pmd_stat_type {
> > > +    PMD_STAT_PHWOL_HIT,     /* Packets that had a partial HWOL hit
> > > (phwol). */
> > >      PMD_STAT_EXACT_HIT,     /* Packets that had an exact match (emc). */
> > >      PMD_STAT_SMC_HIT,       /* Packets that had a sig match hit (SMC). */
> > >      PMD_STAT_MASKED_HIT,    /* Packets that matched in the flow table. */
> > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> > > index b35ccbe3b..f161ae7f5 100644
> > > --- a/lib/dpif-netdev.c
> > > +++ b/lib/dpif-netdev.c
> > > @@ -660,6 +660,7 @@ pmd_info_show_stats(struct ds *reply,
> > >                    "  packets received: %"PRIu64"\n"
> > >                    "  packet recirculations: %"PRIu64"\n"
> > >                    "  avg. datapath passes per packet: %.02f\n"
> > > +                  "  phwol hits: %"PRIu64"\n"
> > >                    "  emc hits: %"PRIu64"\n"
> > >                    "  smc hits: %"PRIu64"\n"
> > >                    "  megaflow hits: %"PRIu64"\n"
> > > @@ -668,7 +669,8 @@ pmd_info_show_stats(struct ds *reply,
> > >                    "  miss with failed upcall: %"PRIu64"\n"
> > >                    "  avg. packets per output batch: %.02f\n",
> > >                    total_packets, stats[PMD_STAT_RECIRC],
> > > -                  passes_per_pkt, stats[PMD_STAT_EXACT_HIT],
> > > +                  passes_per_pkt, stats[PMD_STAT_PHWOL_HIT],
> > > +                  stats[PMD_STAT_EXACT_HIT],
> > >                    stats[PMD_STAT_SMC_HIT],
> > >                    stats[PMD_STAT_MASKED_HIT], lookups_per_hit,
> > >                    stats[PMD_STAT_MISS], stats[PMD_STAT_LOST],
> > > @@ -1685,6 +1687,7 @@ dpif_netdev_get_stats(const struct dpif *dpif,
> > > struct dpif_dp_stats *stats)
> > >      CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
> > >          stats->n_flows += cmap_count(&pmd->flow_table);
> > >          pmd_perf_read_counters(&pmd->perf_stats, pmd_stats);
> > > +        stats->n_hit += pmd_stats[PMD_STAT_PHWOL_HIT];
> > >          stats->n_hit += pmd_stats[PMD_STAT_EXACT_HIT];
> > >          stats->n_hit += pmd_stats[PMD_STAT_SMC_HIT];
> > >          stats->n_hit += pmd_stats[PMD_STAT_MASKED_HIT];
> > > @@ -6697,7 +6700,7 @@ dfc_processing(struct dp_netdev_pmd_thread
> > > *pmd,
> > >                 bool md_is_valid, odp_port_t port_no)
> > >  {
> > >      struct netdev_flow_key *key = &keys[0];
> > > -    size_t n_missed = 0, n_emc_hit = 0;
> > > +    size_t n_missed = 0, n_emc_hit = 0, n_phwol_hit = 0;
> > >      struct dfc_cache *cache = &pmd->flow_cache;
> > >      struct dp_packet *packet;
> > >      const size_t cnt = dp_packet_batch_size(packets_);
> > > @@ -6739,6 +6742,7 @@ dfc_processing(struct dp_netdev_pmd_thread
> > > *pmd,
> > >              flow = mark_to_flow_find(pmd, mark);
> > >              if (OVS_LIKELY(flow)) {
> > >                  tcp_flags = parse_tcp_flags(packet);
> > > +                n_phwol_hit++;
> > >                  if (OVS_LIKELY(batch_enable)) {
> > >                      dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
> > >                                              n_batches);
> > > @@ -6801,6 +6805,7 @@ dfc_processing(struct dp_netdev_pmd_thread
> > > *pmd,
> > >      /* Count of packets which are not flow batched. */
> > >      *n_flows = map_cnt;
> > >
> > > +    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_PHWOL_HIT,
> > > n_phwol_hit);
> > >      pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT,
> > > n_emc_hit);
> > >
> > >      if (!smc_enable_db) {
> > > diff --git a/tests/pmd.at b/tests/pmd.at
> > > index cc5371d5a..34a59d502 100644
> > > --- a/tests/pmd.at
> > > +++ b/tests/pmd.at
> > > @@ -202,11 +202,12 @@ dummy@ovs-dummy: hit:0 missed:0
> > >      p0 7/1: (dummy-pmd: configured_rx_queues=4,
> > > configured_tx_queues=<cleared>, requested_rx_queues=4,
> > > requested_tx_queues=<cleared>)
> > >  ])
> > >
> > > -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed
> > > SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
> > > +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed
> > > SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
> > >  pmd thread numa_id <cleared> core_id <cleared>:
> > >    packets received: 0
> > >    packet recirculations: 0
> > >    avg. datapath passes per packet: 0.00
> > > +  phwol hits: 0
> > >    emc hits: 0
> > >    smc hits: 0
> > >    megaflow hits: 0
> > > @@ -233,11 +234,12 @@ AT_CHECK([cat ovs-vswitchd.log |
> > > filter_flow_install | strip_xout], [0], [dnl
> > >
> > > recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:77,dst
> > > =50:54:00:00:01:78),eth_type(0x0800),ipv4(frag=no), actions: <del>
> > >  ])
> > >
> > > -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed
> > > SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
> > > +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed
> > > SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
> > >  pmd thread numa_id <cleared> core_id <cleared>:
> > >    packets received: 20
> > >    packet recirculations: 0
> > >    avg. datapath passes per packet: 1.00
> > > +  phwol hits: 0
> > >    emc hits: 19
> > >    smc hits: 0
> > >    megaflow hits: 0
> > > --
> > > 2.31.1
> > >
> > > _______________________________________________
> > > dev mailing list
> > > dev@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index 402ce5969..3eab5f4fa 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,8 @@  Post-v2.15.0
    - Userspace datapath:
      * Auto load balancing of PMDs now partially supports cross-NUMA polling
        cases, e.g if all PMD threads are running on the same NUMA node.
+     * Add a partial HWOL PMD statistic counting hits similar to existing
+     * EMC/SMC/DPCLS stats.
    - ovs-ctl:
      * New option '--no-record-hostname' to disable hostname configuration
        in ovsdb on startup.
diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
index e255c9d17..5c6f54799 100644
--- a/lib/dpif-netdev-avx512.c
+++ b/lib/dpif-netdev-avx512.c
@@ -114,6 +114,7 @@  dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
     const uint32_t emc_enabled = pmd->ctx.emc_insert_min != 0;
     const uint32_t smc_enabled = pmd->ctx.smc_enable_db;
 
+    uint32_t phwol_hits = 0;
     uint32_t emc_hits = 0;
     uint32_t smc_hits = 0;
 
@@ -147,6 +148,7 @@  dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
                 pkt_meta[i].tcp_flags = parse_tcp_flags(packet);
 
                 pkt_meta[i].bytes = dp_packet_size(packet);
+                phwol_hits++;
                 hwol_emc_smc_hitmask |= (1 << i);
                 continue;
             }
@@ -235,6 +237,7 @@  dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
 
     /* At this point we don't return error anymore, so commit stats here. */
     pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_RECV, batch_size);
+    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_PHWOL_HIT, phwol_hits);
     pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT, emc_hits);
     pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT, smc_hits);
     pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_HIT,
diff --git a/lib/dpif-netdev-perf.c b/lib/dpif-netdev-perf.c
index 9560e7c3c..7103a2d4d 100644
--- a/lib/dpif-netdev-perf.c
+++ b/lib/dpif-netdev-perf.c
@@ -246,6 +246,7 @@  pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s,
         ds_put_format(str,
             "  Rx packets:        %12"PRIu64"  (%.0f Kpps, %.0f cycles/pkt)\n"
             "  Datapath passes:   %12"PRIu64"  (%.2f passes/pkt)\n"
+            "  - PHWOL hits:      %12"PRIu64"  (%5.1f %%)\n"
             "  - EMC hits:        %12"PRIu64"  (%5.1f %%)\n"
             "  - SMC hits:        %12"PRIu64"  (%5.1f %%)\n"
             "  - Megaflow hits:   %12"PRIu64"  (%5.1f %%, %.2f "
@@ -255,6 +256,8 @@  pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s,
             rx_packets, (rx_packets / duration) / 1000,
             1.0 * stats[PMD_CYCLES_ITER_BUSY] / rx_packets,
             passes, rx_packets ? 1.0 * passes / rx_packets : 0,
+            stats[PMD_STAT_PHWOL_HIT],
+            100.0 * stats[PMD_STAT_PHWOL_HIT] / passes,
             stats[PMD_STAT_EXACT_HIT],
             100.0 * stats[PMD_STAT_EXACT_HIT] / passes,
             stats[PMD_STAT_SMC_HIT],
diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h
index 72645b6b3..8b1a52387 100644
--- a/lib/dpif-netdev-perf.h
+++ b/lib/dpif-netdev-perf.h
@@ -56,6 +56,7 @@  extern "C" {
 /* Set of counter types maintained in pmd_perf_stats. */
 
 enum pmd_stat_type {
+    PMD_STAT_PHWOL_HIT,     /* Packets that had a partial HWOL hit (phwol). */
     PMD_STAT_EXACT_HIT,     /* Packets that had an exact match (emc). */
     PMD_STAT_SMC_HIT,       /* Packets that had a sig match hit (SMC). */
     PMD_STAT_MASKED_HIT,    /* Packets that matched in the flow table. */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index b35ccbe3b..f161ae7f5 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -660,6 +660,7 @@  pmd_info_show_stats(struct ds *reply,
                   "  packets received: %"PRIu64"\n"
                   "  packet recirculations: %"PRIu64"\n"
                   "  avg. datapath passes per packet: %.02f\n"
+                  "  phwol hits: %"PRIu64"\n"
                   "  emc hits: %"PRIu64"\n"
                   "  smc hits: %"PRIu64"\n"
                   "  megaflow hits: %"PRIu64"\n"
@@ -668,7 +669,8 @@  pmd_info_show_stats(struct ds *reply,
                   "  miss with failed upcall: %"PRIu64"\n"
                   "  avg. packets per output batch: %.02f\n",
                   total_packets, stats[PMD_STAT_RECIRC],
-                  passes_per_pkt, stats[PMD_STAT_EXACT_HIT],
+                  passes_per_pkt, stats[PMD_STAT_PHWOL_HIT],
+                  stats[PMD_STAT_EXACT_HIT],
                   stats[PMD_STAT_SMC_HIT],
                   stats[PMD_STAT_MASKED_HIT], lookups_per_hit,
                   stats[PMD_STAT_MISS], stats[PMD_STAT_LOST],
@@ -1685,6 +1687,7 @@  dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
     CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
         stats->n_flows += cmap_count(&pmd->flow_table);
         pmd_perf_read_counters(&pmd->perf_stats, pmd_stats);
+        stats->n_hit += pmd_stats[PMD_STAT_PHWOL_HIT];
         stats->n_hit += pmd_stats[PMD_STAT_EXACT_HIT];
         stats->n_hit += pmd_stats[PMD_STAT_SMC_HIT];
         stats->n_hit += pmd_stats[PMD_STAT_MASKED_HIT];
@@ -6697,7 +6700,7 @@  dfc_processing(struct dp_netdev_pmd_thread *pmd,
                bool md_is_valid, odp_port_t port_no)
 {
     struct netdev_flow_key *key = &keys[0];
-    size_t n_missed = 0, n_emc_hit = 0;
+    size_t n_missed = 0, n_emc_hit = 0, n_phwol_hit = 0;
     struct dfc_cache *cache = &pmd->flow_cache;
     struct dp_packet *packet;
     const size_t cnt = dp_packet_batch_size(packets_);
@@ -6739,6 +6742,7 @@  dfc_processing(struct dp_netdev_pmd_thread *pmd,
             flow = mark_to_flow_find(pmd, mark);
             if (OVS_LIKELY(flow)) {
                 tcp_flags = parse_tcp_flags(packet);
+                n_phwol_hit++;
                 if (OVS_LIKELY(batch_enable)) {
                     dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
                                             n_batches);
@@ -6801,6 +6805,7 @@  dfc_processing(struct dp_netdev_pmd_thread *pmd,
     /* Count of packets which are not flow batched. */
     *n_flows = map_cnt;
 
+    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_PHWOL_HIT, n_phwol_hit);
     pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT, n_emc_hit);
 
     if (!smc_enable_db) {
diff --git a/tests/pmd.at b/tests/pmd.at
index cc5371d5a..34a59d502 100644
--- a/tests/pmd.at
+++ b/tests/pmd.at
@@ -202,11 +202,12 @@  dummy@ovs-dummy: hit:0 missed:0
     p0 7/1: (dummy-pmd: configured_rx_queues=4, configured_tx_queues=<cleared>, requested_rx_queues=4, requested_tx_queues=<cleared>)
 ])
 
-AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
+AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
 pmd thread numa_id <cleared> core_id <cleared>:
   packets received: 0
   packet recirculations: 0
   avg. datapath passes per packet: 0.00
+  phwol hits: 0
   emc hits: 0
   smc hits: 0
   megaflow hits: 0
@@ -233,11 +234,12 @@  AT_CHECK([cat ovs-vswitchd.log | filter_flow_install | strip_xout], [0], [dnl
 recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:77,dst=50:54:00:00:01:78),eth_type(0x0800),ipv4(frag=no), actions: <del>
 ])
 
-AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
+AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
 pmd thread numa_id <cleared> core_id <cleared>:
   packets received: 20
   packet recirculations: 0
   avg. datapath passes per packet: 1.00
+  phwol hits: 0
   emc hits: 19
   smc hits: 0
   megaflow hits: 0