diff mbox series

[ovs-dev] dpif-netdev: display EMC used entries for PMDs

Message ID 20210114131903.37382-1-pvalerio@redhat.com
State Changes Requested
Headers show
Series [ovs-dev] dpif-netdev: display EMC used entries for PMDs | expand

Commit Message

Paolo Valerio Jan. 14, 2021, 1:19 p.m. UTC
adds "emc entries" to "ovs-appctl dpif-netdev/pmd-stats-show" in order
to show the number of alive entries.

Signed-off-by: Paolo Valerio <pvalerio@redhat.com>
---
 NEWS                   |  2 ++
 lib/dpif-netdev-perf.h |  2 ++
 lib/dpif-netdev.c      | 76 +++++++++++++++++++++++++++++++-----------
 tests/pmd.at           |  6 ++--
 4 files changed, 65 insertions(+), 21 deletions(-)

Comments

Kevin Traynor Feb. 5, 2021, 7:01 p.m. UTC | #1
Hi Paolo,

On 14/01/2021 13:19, Paolo Valerio wrote:
> adds "emc entries" to "ovs-appctl dpif-netdev/pmd-stats-show" in order
> to show the number of alive entries.
> 

Thanks for working on this. Not a full review, but a few high level
comments. I would like to ask about the motivation - is it so a user can
judge the effectiveness of using the EMC to decide to disable it? or to
tune the EMC insertion rate?

If I run this and increase the flows, I see:
  emc hits: 1961314
  emc entries: 6032 (73.63%)
  smc hits: 0
  megaflow hits: 688946

If I look at pmd-perf-stats (below), it shows the % split between emc
and megaflow and I think I would use that to judge the effectiveness of
the emc. I'm not too fussed if the emc occupancy is high or low, I just
want to know if it is being effective at getting hits for my pkts, so
I'm not sure that 'emc entries: 6032 (73.63%)' helps me decide to
enable/disable it.

  - EMC hits:             1972766  ( 74.0 %)
  - SMC hits:                   0  (  0.0 %)
  - Megaflow hits:         693022  ( 26.0 %, 1.00 subtbl lookups/hit)

Of course it doesn't account for the differing cost between them, but if
emc hits were a low %, I'd want to experiment with disabling it.

Depending on your motivation, you might also want to take a look at the
really nice fdb stats that Eelco did, it might give some ideas.

Kevin.

> Signed-off-by: Paolo Valerio <pvalerio@redhat.com>
> ---
>  NEWS                   |  2 ++
>  lib/dpif-netdev-perf.h |  2 ++
>  lib/dpif-netdev.c      | 76 +++++++++++++++++++++++++++++++-----------
>  tests/pmd.at           |  6 ++--
>  4 files changed, 65 insertions(+), 21 deletions(-)
> 
> diff --git a/NEWS b/NEWS
> index 617fe8e6a..e5d53a83b 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -20,6 +20,8 @@ Post-v2.14.0
>       * Add generic IP protocol support to conntrack. With this change, all
>         none UDP, TCP, and ICMP traffic will be treated as general L3
>         traffic, i.e. using 3 tupples.
> +     * EMC alive entries counter has been added to command
> +       "ovs-appctl dpif-netdev/pmd-stats-show"
>     - The environment variable OVS_UNBOUND_CONF, if set, is now used
>       as the DNS resolver's (unbound) configuration file.
>     - Linux datapath:
> diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h
> index 72645b6b3..80f50eae9 100644
> --- a/lib/dpif-netdev-perf.h
> +++ b/lib/dpif-netdev-perf.h
> @@ -157,6 +157,8 @@ struct pmd_perf_stats {
>      uint64_t last_tsc;
>      /* Used to space certain checks in time. */
>      uint64_t next_check_tsc;
> +    /* Exact Match Cache used entries counter. */
> +    atomic_uint32_t emc_n_entries;
>      /* If non-NULL, outermost cycle timer currently running in PMD. */
>      struct cycle_timer *cur_timer;
>      /* Set of PMD counters with their zero offsets. */
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 300861ca5..3eb70ccd5 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -913,7 +913,7 @@ dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd,
>                             odp_port_t in_port);
>  
>  static inline bool emc_entry_alive(struct emc_entry *ce);
> -static void emc_clear_entry(struct emc_entry *ce);
> +static void emc_clear_entry(struct emc_entry *ce, struct pmd_perf_stats *s);
>  static void smc_clear_entry(struct smc_bucket *b, int idx);
>  
>  static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
> @@ -955,13 +955,16 @@ dfc_cache_init(struct dfc_cache *flow_cache)
>  }
>  
>  static void
> -emc_cache_uninit(struct emc_cache *flow_cache)
> +emc_cache_uninit(struct emc_cache *flow_cache, struct pmd_perf_stats *s)
>  {
>      int i;
>  
> +
>      for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) {
> -        emc_clear_entry(&flow_cache->entries[i]);
> +        emc_clear_entry(&flow_cache->entries[i], s);
>      }
> +
> +    atomic_store_relaxed(&s->emc_n_entries, 0);
>  }
>  
>  static void
> @@ -977,21 +980,21 @@ smc_cache_uninit(struct smc_cache *smc)
>  }
>  
>  static void
> -dfc_cache_uninit(struct dfc_cache *flow_cache)
> +dfc_cache_uninit(struct dfc_cache *flow_cache, struct pmd_perf_stats *s)
>  {
>      smc_cache_uninit(&flow_cache->smc_cache);
> -    emc_cache_uninit(&flow_cache->emc_cache);
> +    emc_cache_uninit(&flow_cache->emc_cache, s);
>  }
>  
>  /* Check and clear dead flow references slowly (one entry at each
>   * invocation).  */
>  static void
> -emc_cache_slow_sweep(struct emc_cache *flow_cache)
> +emc_cache_slow_sweep(struct emc_cache *flow_cache, struct pmd_perf_stats *s)
>  {
>      struct emc_entry *entry = &flow_cache->entries[flow_cache->sweep_idx];
>  
>      if (!emc_entry_alive(entry)) {
> -        emc_clear_entry(entry);
> +        emc_clear_entry(entry, s);
>      }
>      flow_cache->sweep_idx = (flow_cache->sweep_idx + 1) & EM_FLOW_HASH_MASK;
>  }
> @@ -1093,15 +1096,26 @@ pmd_info_show_stats(struct ds *reply,
>                    "  packets received: %"PRIu64"\n"
>                    "  packet recirculations: %"PRIu64"\n"
>                    "  avg. datapath passes per packet: %.02f\n"
> -                  "  emc hits: %"PRIu64"\n"
> +                  "  emc hits: %"PRIu64"\n",
> +                  total_packets, stats[PMD_STAT_RECIRC],
> +                  passes_per_pkt, stats[PMD_STAT_EXACT_HIT]
> +                  );
> +
> +    if (pmd->core_id != NON_PMD_CORE_ID) {
> +        uint32_t emc_entries;
> +        atomic_read_relaxed(&pmd->perf_stats.emc_n_entries, &emc_entries);
> +        ds_put_format(reply, "  emc entries: %"PRIu32" (%.02f%%)\n",
> +                      emc_entries,
> +                      100.0 * emc_entries / EM_FLOW_HASH_ENTRIES);
> +    }
> +
> +    ds_put_format(reply,
>                    "  smc hits: %"PRIu64"\n"
>                    "  megaflow hits: %"PRIu64"\n"
>                    "  avg. subtable lookups per megaflow hit: %.02f\n"
>                    "  miss with success upcall: %"PRIu64"\n"
>                    "  miss with failed upcall: %"PRIu64"\n"
>                    "  avg. packets per output batch: %.02f\n",
> -                  total_packets, stats[PMD_STAT_RECIRC],
> -                  passes_per_pkt, stats[PMD_STAT_EXACT_HIT],
>                    stats[PMD_STAT_SMC_HIT],
>                    stats[PMD_STAT_MASKED_HIT], lookups_per_hit,
>                    stats[PMD_STAT_MISS], stats[PMD_STAT_LOST],
> @@ -2004,6 +2018,26 @@ non_atomic_ullong_add(atomic_ullong *var, unsigned long long n)
>      atomic_store_relaxed(var, tmp);
>  }
>  
> +static void
> +non_atomic_dec(atomic_uint32_t *var)
> +{
> +    unsigned int tmp;
> +
> +    atomic_read_relaxed(var, &tmp);
> +    tmp--;
> +    atomic_store_relaxed(var, tmp);
> +}
> +
> +static void
> +non_atomic_inc(atomic_uint32_t *var)
> +{
> +    unsigned int tmp;
> +
> +    atomic_read_relaxed(var, &tmp);
> +    tmp++;
> +    atomic_store_relaxed(var, tmp);
> +}
> +
>  static int
>  dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
>  {
> @@ -3070,25 +3104,28 @@ emc_entry_alive(struct emc_entry *ce)
>  }
>  
>  static void
> -emc_clear_entry(struct emc_entry *ce)
> +emc_clear_entry(struct emc_entry *ce, struct pmd_perf_stats *s)
>  {
>      if (ce->flow) {
>          dp_netdev_flow_unref(ce->flow);
>          ce->flow = NULL;
> +        non_atomic_dec(&s->emc_n_entries);
>      }
>  }
>  
>  static inline void
>  emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow,
> -                 const struct netdev_flow_key *key)
> +                 const struct netdev_flow_key *key, struct pmd_perf_stats *s)
>  {
>      if (ce->flow != flow) {
>          if (ce->flow) {
>              dp_netdev_flow_unref(ce->flow);
> +            non_atomic_dec(&s->emc_n_entries);
>          }
>  
>          if (dp_netdev_flow_ref(flow)) {
>              ce->flow = flow;
> +            non_atomic_inc(&s->emc_n_entries);
>          } else {
>              ce->flow = NULL;
>          }
> @@ -3100,7 +3137,7 @@ emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow,
>  
>  static inline void
>  emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
> -           struct dp_netdev_flow *flow)
> +           struct dp_netdev_flow *flow, struct pmd_perf_stats *s)
>  {
>      struct emc_entry *to_be_replaced = NULL;
>      struct emc_entry *current_entry;
> @@ -3108,7 +3145,7 @@ emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
>      EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) {
>          if (netdev_flow_key_equal(&current_entry->key, key)) {
>              /* We found the entry with the 'mf' miniflow */
> -            emc_change_entry(current_entry, flow, NULL);
> +            emc_change_entry(current_entry, flow, NULL, s);
>              return;
>          }
>  
> @@ -3124,7 +3161,7 @@ emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
>      /* We didn't find the miniflow in the cache.
>       * The 'to_be_replaced' entry is where the new flow will be stored */
>  
> -    emc_change_entry(to_be_replaced, flow, key);
> +    emc_change_entry(to_be_replaced, flow, key, s);
>  }
>  
>  static inline void
> @@ -3139,7 +3176,7 @@ emc_probabilistic_insert(struct dp_netdev_pmd_thread *pmd,
>      uint32_t min = pmd->ctx.emc_insert_min;
>  
>      if (min && random_uint32() <= min) {
> -        emc_insert(&(pmd->flow_cache).emc_cache, key, flow);
> +        emc_insert(&(pmd->flow_cache).emc_cache, key, flow, &pmd->perf_stats);
>      }
>  }
>  
> @@ -6014,7 +6051,8 @@ reload:
>              coverage_try_clear();
>              dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt);
>              if (!ovsrcu_try_quiesce()) {
> -                emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache));
> +                emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache),
> +                                     &pmd->perf_stats);
>                  pmd->next_rcu_quiesce =
>                      pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
>              }
> @@ -6058,7 +6096,7 @@ reload:
>      }
>  
>      pmd_free_static_tx_qid(pmd);
> -    dfc_cache_uninit(&pmd->flow_cache);
> +    dfc_cache_uninit(&pmd->flow_cache, &pmd->perf_stats);
>      free(poll_list);
>      pmd_free_cached_ports(pmd);
>      return NULL;
> @@ -6537,7 +6575,7 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd)
>       * but extra cleanup is necessary */
>      if (pmd->core_id == NON_PMD_CORE_ID) {
>          ovs_mutex_lock(&dp->non_pmd_mutex);
> -        dfc_cache_uninit(&pmd->flow_cache);
> +        dfc_cache_uninit(&pmd->flow_cache, &pmd->perf_stats);
>          pmd_free_cached_ports(pmd);
>          pmd_free_static_tx_qid(pmd);
>          ovs_mutex_unlock(&dp->non_pmd_mutex);
> diff --git a/tests/pmd.at b/tests/pmd.at
> index cc5371d5a..45c69563c 100644
> --- a/tests/pmd.at
> +++ b/tests/pmd.at
> @@ -202,12 +202,13 @@ dummy@ovs-dummy: hit:0 missed:0
>      p0 7/1: (dummy-pmd: configured_rx_queues=4, configured_tx_queues=<cleared>, requested_rx_queues=4, requested_tx_queues=<cleared>)
>  ])
>  
> -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
> +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
>  pmd thread numa_id <cleared> core_id <cleared>:
>    packets received: 0
>    packet recirculations: 0
>    avg. datapath passes per packet: 0.00
>    emc hits: 0
> +  emc entries: 0 (0.00%)
>    smc hits: 0
>    megaflow hits: 0
>    avg. subtable lookups per megaflow hit: 0.00
> @@ -233,12 +234,13 @@ AT_CHECK([cat ovs-vswitchd.log | filter_flow_install | strip_xout], [0], [dnl
>  recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:77,dst=50:54:00:00:01:78),eth_type(0x0800),ipv4(frag=no), actions: <del>
>  ])
>  
> -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
> +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
>  pmd thread numa_id <cleared> core_id <cleared>:
>    packets received: 20
>    packet recirculations: 0
>    avg. datapath passes per packet: 1.00
>    emc hits: 19
> +  emc entries: 1 (0.01%)
>    smc hits: 0
>    megaflow hits: 0
>    avg. subtable lookups per megaflow hit: 0.00
>
Paolo Valerio Feb. 8, 2021, 2:34 p.m. UTC | #2
Hi Kevin,

thank you for your feedback

Kevin Traynor <ktraynor@redhat.com> writes:

> Hi Paolo,
>
> On 14/01/2021 13:19, Paolo Valerio wrote:
>> adds "emc entries" to "ovs-appctl dpif-netdev/pmd-stats-show" in order
>> to show the number of alive entries.
>> 
>
> Thanks for working on this. Not a full review, but a few high level
> comments. I would like to ask about the motivation - is it so a user can
> judge the effectiveness of using the EMC to decide to disable it? or to
> tune the EMC insertion rate?
>

the purpose is more like giving an extra bit of information that of course
can be used for tuning.

> If I run this and increase the flows, I see:
>   emc hits: 1961314
>   emc entries: 6032 (73.63%)
>   smc hits: 0
>   megaflow hits: 688946
>
> If I look at pmd-perf-stats (below), it shows the % split between emc
> and megaflow and I think I would use that to judge the effectiveness of
> the emc. I'm not too fussed if the emc occupancy is high or low, I just
> want to know if it is being effective at getting hits for my pkts, so
> I'm not sure that 'emc entries: 6032 (73.63%)' helps me decide to
> enable/disable it.
>
>   - EMC hits:             1972766  ( 74.0 %)
>   - SMC hits:                   0  (  0.0 %)
>   - Megaflow hits:         693022  ( 26.0 %, 1.00 subtbl lookups/hit)
>
> Of course it doesn't account for the differing cost between them, but if
> emc hits were a low %, I'd want to experiment with disabling it.
>

When you are approaching a reasonably high number of alive entries,
and you expect for any reason that the number of flows for that PMD will
grow significantly, it gives you an insight that the hits rate may
decrease.

> Depending on your motivation, you might also want to take a look at the
> really nice fdb stats that Eelco did, it might give some ideas.
>

Thanks for the heads up.

Paolo

> Kevin.
>
>> Signed-off-by: Paolo Valerio <pvalerio@redhat.com>
>> ---
>>  NEWS                   |  2 ++
>>  lib/dpif-netdev-perf.h |  2 ++
>>  lib/dpif-netdev.c      | 76 +++++++++++++++++++++++++++++++-----------
>>  tests/pmd.at           |  6 ++--
>>  4 files changed, 65 insertions(+), 21 deletions(-)
>> 
>> diff --git a/NEWS b/NEWS
>> index 617fe8e6a..e5d53a83b 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -20,6 +20,8 @@ Post-v2.14.0
>>       * Add generic IP protocol support to conntrack. With this change, all
>>         none UDP, TCP, and ICMP traffic will be treated as general L3
>>         traffic, i.e. using 3 tupples.
>> +     * EMC alive entries counter has been added to command
>> +       "ovs-appctl dpif-netdev/pmd-stats-show"
>>     - The environment variable OVS_UNBOUND_CONF, if set, is now used
>>       as the DNS resolver's (unbound) configuration file.
>>     - Linux datapath:
>> diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h
>> index 72645b6b3..80f50eae9 100644
>> --- a/lib/dpif-netdev-perf.h
>> +++ b/lib/dpif-netdev-perf.h
>> @@ -157,6 +157,8 @@ struct pmd_perf_stats {
>>      uint64_t last_tsc;
>>      /* Used to space certain checks in time. */
>>      uint64_t next_check_tsc;
>> +    /* Exact Match Cache used entries counter. */
>> +    atomic_uint32_t emc_n_entries;
>>      /* If non-NULL, outermost cycle timer currently running in PMD. */
>>      struct cycle_timer *cur_timer;
>>      /* Set of PMD counters with their zero offsets. */
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index 300861ca5..3eb70ccd5 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -913,7 +913,7 @@ dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd,
>>                             odp_port_t in_port);
>>  
>>  static inline bool emc_entry_alive(struct emc_entry *ce);
>> -static void emc_clear_entry(struct emc_entry *ce);
>> +static void emc_clear_entry(struct emc_entry *ce, struct pmd_perf_stats *s);
>>  static void smc_clear_entry(struct smc_bucket *b, int idx);
>>  
>>  static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
>> @@ -955,13 +955,16 @@ dfc_cache_init(struct dfc_cache *flow_cache)
>>  }
>>  
>>  static void
>> -emc_cache_uninit(struct emc_cache *flow_cache)
>> +emc_cache_uninit(struct emc_cache *flow_cache, struct pmd_perf_stats *s)
>>  {
>>      int i;
>>  
>> +
>>      for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) {
>> -        emc_clear_entry(&flow_cache->entries[i]);
>> +        emc_clear_entry(&flow_cache->entries[i], s);
>>      }
>> +
>> +    atomic_store_relaxed(&s->emc_n_entries, 0);
>>  }
>>  
>>  static void
>> @@ -977,21 +980,21 @@ smc_cache_uninit(struct smc_cache *smc)
>>  }
>>  
>>  static void
>> -dfc_cache_uninit(struct dfc_cache *flow_cache)
>> +dfc_cache_uninit(struct dfc_cache *flow_cache, struct pmd_perf_stats *s)
>>  {
>>      smc_cache_uninit(&flow_cache->smc_cache);
>> -    emc_cache_uninit(&flow_cache->emc_cache);
>> +    emc_cache_uninit(&flow_cache->emc_cache, s);
>>  }
>>  
>>  /* Check and clear dead flow references slowly (one entry at each
>>   * invocation).  */
>>  static void
>> -emc_cache_slow_sweep(struct emc_cache *flow_cache)
>> +emc_cache_slow_sweep(struct emc_cache *flow_cache, struct pmd_perf_stats *s)
>>  {
>>      struct emc_entry *entry = &flow_cache->entries[flow_cache->sweep_idx];
>>  
>>      if (!emc_entry_alive(entry)) {
>> -        emc_clear_entry(entry);
>> +        emc_clear_entry(entry, s);
>>      }
>>      flow_cache->sweep_idx = (flow_cache->sweep_idx + 1) & EM_FLOW_HASH_MASK;
>>  }
>> @@ -1093,15 +1096,26 @@ pmd_info_show_stats(struct ds *reply,
>>                    "  packets received: %"PRIu64"\n"
>>                    "  packet recirculations: %"PRIu64"\n"
>>                    "  avg. datapath passes per packet: %.02f\n"
>> -                  "  emc hits: %"PRIu64"\n"
>> +                  "  emc hits: %"PRIu64"\n",
>> +                  total_packets, stats[PMD_STAT_RECIRC],
>> +                  passes_per_pkt, stats[PMD_STAT_EXACT_HIT]
>> +                  );
>> +
>> +    if (pmd->core_id != NON_PMD_CORE_ID) {
>> +        uint32_t emc_entries;
>> +        atomic_read_relaxed(&pmd->perf_stats.emc_n_entries, &emc_entries);
>> +        ds_put_format(reply, "  emc entries: %"PRIu32" (%.02f%%)\n",
>> +                      emc_entries,
>> +                      100.0 * emc_entries / EM_FLOW_HASH_ENTRIES);
>> +    }
>> +
>> +    ds_put_format(reply,
>>                    "  smc hits: %"PRIu64"\n"
>>                    "  megaflow hits: %"PRIu64"\n"
>>                    "  avg. subtable lookups per megaflow hit: %.02f\n"
>>                    "  miss with success upcall: %"PRIu64"\n"
>>                    "  miss with failed upcall: %"PRIu64"\n"
>>                    "  avg. packets per output batch: %.02f\n",
>> -                  total_packets, stats[PMD_STAT_RECIRC],
>> -                  passes_per_pkt, stats[PMD_STAT_EXACT_HIT],
>>                    stats[PMD_STAT_SMC_HIT],
>>                    stats[PMD_STAT_MASKED_HIT], lookups_per_hit,
>>                    stats[PMD_STAT_MISS], stats[PMD_STAT_LOST],
>> @@ -2004,6 +2018,26 @@ non_atomic_ullong_add(atomic_ullong *var, unsigned long long n)
>>      atomic_store_relaxed(var, tmp);
>>  }
>>  
>> +static void
>> +non_atomic_dec(atomic_uint32_t *var)
>> +{
>> +    unsigned int tmp;
>> +
>> +    atomic_read_relaxed(var, &tmp);
>> +    tmp--;
>> +    atomic_store_relaxed(var, tmp);
>> +}
>> +
>> +static void
>> +non_atomic_inc(atomic_uint32_t *var)
>> +{
>> +    unsigned int tmp;
>> +
>> +    atomic_read_relaxed(var, &tmp);
>> +    tmp++;
>> +    atomic_store_relaxed(var, tmp);
>> +}
>> +
>>  static int
>>  dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
>>  {
>> @@ -3070,25 +3104,28 @@ emc_entry_alive(struct emc_entry *ce)
>>  }
>>  
>>  static void
>> -emc_clear_entry(struct emc_entry *ce)
>> +emc_clear_entry(struct emc_entry *ce, struct pmd_perf_stats *s)
>>  {
>>      if (ce->flow) {
>>          dp_netdev_flow_unref(ce->flow);
>>          ce->flow = NULL;
>> +        non_atomic_dec(&s->emc_n_entries);
>>      }
>>  }
>>  
>>  static inline void
>>  emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow,
>> -                 const struct netdev_flow_key *key)
>> +                 const struct netdev_flow_key *key, struct pmd_perf_stats *s)
>>  {
>>      if (ce->flow != flow) {
>>          if (ce->flow) {
>>              dp_netdev_flow_unref(ce->flow);
>> +            non_atomic_dec(&s->emc_n_entries);
>>          }
>>  
>>          if (dp_netdev_flow_ref(flow)) {
>>              ce->flow = flow;
>> +            non_atomic_inc(&s->emc_n_entries);
>>          } else {
>>              ce->flow = NULL;
>>          }
>> @@ -3100,7 +3137,7 @@ emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow,
>>  
>>  static inline void
>>  emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
>> -           struct dp_netdev_flow *flow)
>> +           struct dp_netdev_flow *flow, struct pmd_perf_stats *s)
>>  {
>>      struct emc_entry *to_be_replaced = NULL;
>>      struct emc_entry *current_entry;
>> @@ -3108,7 +3145,7 @@ emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
>>      EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) {
>>          if (netdev_flow_key_equal(&current_entry->key, key)) {
>>              /* We found the entry with the 'mf' miniflow */
>> -            emc_change_entry(current_entry, flow, NULL);
>> +            emc_change_entry(current_entry, flow, NULL, s);
>>              return;
>>          }
>>  
>> @@ -3124,7 +3161,7 @@ emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
>>      /* We didn't find the miniflow in the cache.
>>       * The 'to_be_replaced' entry is where the new flow will be stored */
>>  
>> -    emc_change_entry(to_be_replaced, flow, key);
>> +    emc_change_entry(to_be_replaced, flow, key, s);
>>  }
>>  
>>  static inline void
>> @@ -3139,7 +3176,7 @@ emc_probabilistic_insert(struct dp_netdev_pmd_thread *pmd,
>>      uint32_t min = pmd->ctx.emc_insert_min;
>>  
>>      if (min && random_uint32() <= min) {
>> -        emc_insert(&(pmd->flow_cache).emc_cache, key, flow);
>> +        emc_insert(&(pmd->flow_cache).emc_cache, key, flow, &pmd->perf_stats);
>>      }
>>  }
>>  
>> @@ -6014,7 +6051,8 @@ reload:
>>              coverage_try_clear();
>>              dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt);
>>              if (!ovsrcu_try_quiesce()) {
>> -                emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache));
>> +                emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache),
>> +                                     &pmd->perf_stats);
>>                  pmd->next_rcu_quiesce =
>>                      pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
>>              }
>> @@ -6058,7 +6096,7 @@ reload:
>>      }
>>  
>>      pmd_free_static_tx_qid(pmd);
>> -    dfc_cache_uninit(&pmd->flow_cache);
>> +    dfc_cache_uninit(&pmd->flow_cache, &pmd->perf_stats);
>>      free(poll_list);
>>      pmd_free_cached_ports(pmd);
>>      return NULL;
>> @@ -6537,7 +6575,7 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd)
>>       * but extra cleanup is necessary */
>>      if (pmd->core_id == NON_PMD_CORE_ID) {
>>          ovs_mutex_lock(&dp->non_pmd_mutex);
>> -        dfc_cache_uninit(&pmd->flow_cache);
>> +        dfc_cache_uninit(&pmd->flow_cache, &pmd->perf_stats);
>>          pmd_free_cached_ports(pmd);
>>          pmd_free_static_tx_qid(pmd);
>>          ovs_mutex_unlock(&dp->non_pmd_mutex);
>> diff --git a/tests/pmd.at b/tests/pmd.at
>> index cc5371d5a..45c69563c 100644
>> --- a/tests/pmd.at
>> +++ b/tests/pmd.at
>> @@ -202,12 +202,13 @@ dummy@ovs-dummy: hit:0 missed:0
>>      p0 7/1: (dummy-pmd: configured_rx_queues=4, configured_tx_queues=<cleared>, requested_rx_queues=4, requested_tx_queues=<cleared>)
>>  ])
>>  
>> -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
>> +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
>>  pmd thread numa_id <cleared> core_id <cleared>:
>>    packets received: 0
>>    packet recirculations: 0
>>    avg. datapath passes per packet: 0.00
>>    emc hits: 0
>> +  emc entries: 0 (0.00%)
>>    smc hits: 0
>>    megaflow hits: 0
>>    avg. subtable lookups per megaflow hit: 0.00
>> @@ -233,12 +234,13 @@ AT_CHECK([cat ovs-vswitchd.log | filter_flow_install | strip_xout], [0], [dnl
>>  recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:77,dst=50:54:00:00:01:78),eth_type(0x0800),ipv4(frag=no), actions: <del>
>>  ])
>>  
>> -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
>> +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
>>  pmd thread numa_id <cleared> core_id <cleared>:
>>    packets received: 20
>>    packet recirculations: 0
>>    avg. datapath passes per packet: 1.00
>>    emc hits: 19
>> +  emc entries: 1 (0.01%)
>>    smc hits: 0
>>    megaflow hits: 0
>>    avg. subtable lookups per megaflow hit: 0.00
>>
Kevin Traynor Feb. 11, 2021, 4:56 p.m. UTC | #3
On 08/02/2021 14:34, Paolo Valerio wrote:
> Hi Kevin,
> 
> thank you for your feedback
> 
> Kevin Traynor <ktraynor@redhat.com> writes:
> 
>> Hi Paolo,
>>
>> On 14/01/2021 13:19, Paolo Valerio wrote:
>>> adds "emc entries" to "ovs-appctl dpif-netdev/pmd-stats-show" in order
>>> to show the number of alive entries.
>>>
>>
>> Thanks for working on this. Not a full review, but a few high level
>> comments. I would like to ask about the motivation - is it so a user can
>> judge the effectiveness of using the EMC to decide to disable it? or to
>> tune the EMC insertion rate?
>>
> 
> the purpose is more like giving an extra bit of information that of course
> can be used for tuning.
> 
>> If I run this and increase the flows, I see:
>>   emc hits: 1961314
>>   emc entries: 6032 (73.63%)
>>   smc hits: 0
>>   megaflow hits: 688946
>>
>> If I look at pmd-perf-stats (below), it shows the % split between emc
>> and megaflow and I think I would use that to judge the effectiveness of
>> the emc. I'm not too fussed if the emc occupancy is high or low, I just
>> want to know if it is being effective at getting hits for my pkts, so
>> I'm not sure that 'emc entries: 6032 (73.63%)' helps me decide to
>> enable/disable it.
>>
>>   - EMC hits:             1972766  ( 74.0 %)
>>   - SMC hits:                   0  (  0.0 %)
>>   - Megaflow hits:         693022  ( 26.0 %, 1.00 subtbl lookups/hit)
>>
>> Of course it doesn't account for the differing cost between them, but if
>> emc hits were a low %, I'd want to experiment with disabling it.
>>
> 
> When you are approaching a reasonably high number of alive entries,
> and you expect for any reason that the number of flows for that PMD will
> grow significantly, it gives you an insight that the hits rate may
> decrease.
> 

I agree it is a hint, but it would be hard to predict how much new flows
would get their own slots or just cause more evictions. Actually, the
fact you are displaying the max size could be a good hint in this case,
as it might be clear for someone who was going to add 10s of thousands
of flows that an 8K emc would not be effective.

I think it is more useful to know how the emc is performing right now
and hits/misses/evictions look to be the most useful IMHO. Not against
occupancy as well, maybe it helps give more insight or with debugging.

The fdb stats give both occupancy and evictions, but it's a different
case as it's not a set-associate cache and the size can be configured by
the user.

>> Depending on your motivation, you might also want to take a look at the
>> really nice fdb stats that Eelco did, it might give some ideas.
>>
> 
> Thanks for the heads up.
> 
> Paolo
> 
>> Kevin.
>>
>>> Signed-off-by: Paolo Valerio <pvalerio@redhat.com>
>>> ---
>>>  NEWS                   |  2 ++
>>>  lib/dpif-netdev-perf.h |  2 ++
>>>  lib/dpif-netdev.c      | 76 +++++++++++++++++++++++++++++++-----------
>>>  tests/pmd.at           |  6 ++--
>>>  4 files changed, 65 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/NEWS b/NEWS
>>> index 617fe8e6a..e5d53a83b 100644
>>> --- a/NEWS
>>> +++ b/NEWS
>>> @@ -20,6 +20,8 @@ Post-v2.14.0
>>>       * Add generic IP protocol support to conntrack. With this change, all
>>>         none UDP, TCP, and ICMP traffic will be treated as general L3
>>>         traffic, i.e. using 3 tupples.
>>> +     * EMC alive entries counter has been added to command
>>> +       "ovs-appctl dpif-netdev/pmd-stats-show"
>>>     - The environment variable OVS_UNBOUND_CONF, if set, is now used
>>>       as the DNS resolver's (unbound) configuration file.
>>>     - Linux datapath:
>>> diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h
>>> index 72645b6b3..80f50eae9 100644
>>> --- a/lib/dpif-netdev-perf.h
>>> +++ b/lib/dpif-netdev-perf.h
>>> @@ -157,6 +157,8 @@ struct pmd_perf_stats {
>>>      uint64_t last_tsc;
>>>      /* Used to space certain checks in time. */
>>>      uint64_t next_check_tsc;
>>> +    /* Exact Match Cache used entries counter. */
>>> +    atomic_uint32_t emc_n_entries;
>>>      /* If non-NULL, outermost cycle timer currently running in PMD. */
>>>      struct cycle_timer *cur_timer;
>>>      /* Set of PMD counters with their zero offsets. */
>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>>> index 300861ca5..3eb70ccd5 100644
>>> --- a/lib/dpif-netdev.c
>>> +++ b/lib/dpif-netdev.c
>>> @@ -913,7 +913,7 @@ dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd,
>>>                             odp_port_t in_port);
>>>  
>>>  static inline bool emc_entry_alive(struct emc_entry *ce);
>>> -static void emc_clear_entry(struct emc_entry *ce);
>>> +static void emc_clear_entry(struct emc_entry *ce, struct pmd_perf_stats *s);
>>>  static void smc_clear_entry(struct smc_bucket *b, int idx);
>>>  
>>>  static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
>>> @@ -955,13 +955,16 @@ dfc_cache_init(struct dfc_cache *flow_cache)
>>>  }
>>>  
>>>  static void
>>> -emc_cache_uninit(struct emc_cache *flow_cache)
>>> +emc_cache_uninit(struct emc_cache *flow_cache, struct pmd_perf_stats *s)
>>>  {
>>>      int i;
>>>  
>>> +
>>>      for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) {
>>> -        emc_clear_entry(&flow_cache->entries[i]);
>>> +        emc_clear_entry(&flow_cache->entries[i], s);
>>>      }
>>> +
>>> +    atomic_store_relaxed(&s->emc_n_entries, 0);
>>>  }
>>>  
>>>  static void
>>> @@ -977,21 +980,21 @@ smc_cache_uninit(struct smc_cache *smc)
>>>  }
>>>  
>>>  static void
>>> -dfc_cache_uninit(struct dfc_cache *flow_cache)
>>> +dfc_cache_uninit(struct dfc_cache *flow_cache, struct pmd_perf_stats *s)
>>>  {
>>>      smc_cache_uninit(&flow_cache->smc_cache);
>>> -    emc_cache_uninit(&flow_cache->emc_cache);
>>> +    emc_cache_uninit(&flow_cache->emc_cache, s);
>>>  }
>>>  
>>>  /* Check and clear dead flow references slowly (one entry at each
>>>   * invocation).  */
>>>  static void
>>> -emc_cache_slow_sweep(struct emc_cache *flow_cache)
>>> +emc_cache_slow_sweep(struct emc_cache *flow_cache, struct pmd_perf_stats *s)
>>>  {
>>>      struct emc_entry *entry = &flow_cache->entries[flow_cache->sweep_idx];
>>>  
>>>      if (!emc_entry_alive(entry)) {
>>> -        emc_clear_entry(entry);
>>> +        emc_clear_entry(entry, s);
>>>      }
>>>      flow_cache->sweep_idx = (flow_cache->sweep_idx + 1) & EM_FLOW_HASH_MASK;
>>>  }
>>> @@ -1093,15 +1096,26 @@ pmd_info_show_stats(struct ds *reply,
>>>                    "  packets received: %"PRIu64"\n"
>>>                    "  packet recirculations: %"PRIu64"\n"
>>>                    "  avg. datapath passes per packet: %.02f\n"
>>> -                  "  emc hits: %"PRIu64"\n"
>>> +                  "  emc hits: %"PRIu64"\n",
>>> +                  total_packets, stats[PMD_STAT_RECIRC],
>>> +                  passes_per_pkt, stats[PMD_STAT_EXACT_HIT]
>>> +                  );
>>> +
>>> +    if (pmd->core_id != NON_PMD_CORE_ID) {
>>> +        uint32_t emc_entries;
>>> +        atomic_read_relaxed(&pmd->perf_stats.emc_n_entries, &emc_entries);
>>> +        ds_put_format(reply, "  emc entries: %"PRIu32" (%.02f%%)\n",
>>> +                      emc_entries,
>>> +                      100.0 * emc_entries / EM_FLOW_HASH_ENTRIES);
>>> +    }
>>> +
>>> +    ds_put_format(reply,
>>>                    "  smc hits: %"PRIu64"\n"
>>>                    "  megaflow hits: %"PRIu64"\n"
>>>                    "  avg. subtable lookups per megaflow hit: %.02f\n"
>>>                    "  miss with success upcall: %"PRIu64"\n"
>>>                    "  miss with failed upcall: %"PRIu64"\n"
>>>                    "  avg. packets per output batch: %.02f\n",
>>> -                  total_packets, stats[PMD_STAT_RECIRC],
>>> -                  passes_per_pkt, stats[PMD_STAT_EXACT_HIT],
>>>                    stats[PMD_STAT_SMC_HIT],
>>>                    stats[PMD_STAT_MASKED_HIT], lookups_per_hit,
>>>                    stats[PMD_STAT_MISS], stats[PMD_STAT_LOST],
>>> @@ -2004,6 +2018,26 @@ non_atomic_ullong_add(atomic_ullong *var, unsigned long long n)
>>>      atomic_store_relaxed(var, tmp);
>>>  }
>>>  
>>> +static void
>>> +non_atomic_dec(atomic_uint32_t *var)
>>> +{
>>> +    unsigned int tmp;
>>> +
>>> +    atomic_read_relaxed(var, &tmp);
>>> +    tmp--;
>>> +    atomic_store_relaxed(var, tmp);
>>> +}
>>> +
>>> +static void
>>> +non_atomic_inc(atomic_uint32_t *var)
>>> +{
>>> +    unsigned int tmp;
>>> +
>>> +    atomic_read_relaxed(var, &tmp);
>>> +    tmp++;
>>> +    atomic_store_relaxed(var, tmp);
>>> +}
>>> +
>>>  static int
>>>  dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
>>>  {
>>> @@ -3070,25 +3104,28 @@ emc_entry_alive(struct emc_entry *ce)
>>>  }
>>>  
>>>  static void
>>> -emc_clear_entry(struct emc_entry *ce)
>>> +emc_clear_entry(struct emc_entry *ce, struct pmd_perf_stats *s)
>>>  {
>>>      if (ce->flow) {
>>>          dp_netdev_flow_unref(ce->flow);
>>>          ce->flow = NULL;
>>> +        non_atomic_dec(&s->emc_n_entries);
>>>      }
>>>  }
>>>  
>>>  static inline void
>>>  emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow,
>>> -                 const struct netdev_flow_key *key)
>>> +                 const struct netdev_flow_key *key, struct pmd_perf_stats *s)
>>>  {
>>>      if (ce->flow != flow) {
>>>          if (ce->flow) {
>>>              dp_netdev_flow_unref(ce->flow);
>>> +            non_atomic_dec(&s->emc_n_entries);
>>>          }
>>>  
>>>          if (dp_netdev_flow_ref(flow)) {
>>>              ce->flow = flow;
>>> +            non_atomic_inc(&s->emc_n_entries);
>>>          } else {
>>>              ce->flow = NULL;
>>>          }
>>> @@ -3100,7 +3137,7 @@ emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow,
>>>  
>>>  static inline void
>>>  emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
>>> -           struct dp_netdev_flow *flow)
>>> +           struct dp_netdev_flow *flow, struct pmd_perf_stats *s)
>>>  {
>>>      struct emc_entry *to_be_replaced = NULL;
>>>      struct emc_entry *current_entry;
>>> @@ -3108,7 +3145,7 @@ emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
>>>      EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) {
>>>          if (netdev_flow_key_equal(&current_entry->key, key)) {
>>>              /* We found the entry with the 'mf' miniflow */
>>> -            emc_change_entry(current_entry, flow, NULL);
>>> +            emc_change_entry(current_entry, flow, NULL, s);
>>>              return;
>>>          }
>>>  
>>> @@ -3124,7 +3161,7 @@ emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
>>>      /* We didn't find the miniflow in the cache.
>>>       * The 'to_be_replaced' entry is where the new flow will be stored */
>>>  
>>> -    emc_change_entry(to_be_replaced, flow, key);
>>> +    emc_change_entry(to_be_replaced, flow, key, s);
>>>  }
>>>  
>>>  static inline void
>>> @@ -3139,7 +3176,7 @@ emc_probabilistic_insert(struct dp_netdev_pmd_thread *pmd,
>>>      uint32_t min = pmd->ctx.emc_insert_min;
>>>  
>>>      if (min && random_uint32() <= min) {
>>> -        emc_insert(&(pmd->flow_cache).emc_cache, key, flow);
>>> +        emc_insert(&(pmd->flow_cache).emc_cache, key, flow, &pmd->perf_stats);
>>>      }
>>>  }
>>>  
>>> @@ -6014,7 +6051,8 @@ reload:
>>>              coverage_try_clear();
>>>              dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt);
>>>              if (!ovsrcu_try_quiesce()) {
>>> -                emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache));
>>> +                emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache),
>>> +                                     &pmd->perf_stats);
>>>                  pmd->next_rcu_quiesce =
>>>                      pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
>>>              }
>>> @@ -6058,7 +6096,7 @@ reload:
>>>      }
>>>  
>>>      pmd_free_static_tx_qid(pmd);
>>> -    dfc_cache_uninit(&pmd->flow_cache);
>>> +    dfc_cache_uninit(&pmd->flow_cache, &pmd->perf_stats);
>>>      free(poll_list);
>>>      pmd_free_cached_ports(pmd);
>>>      return NULL;
>>> @@ -6537,7 +6575,7 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd)
>>>       * but extra cleanup is necessary */
>>>      if (pmd->core_id == NON_PMD_CORE_ID) {
>>>          ovs_mutex_lock(&dp->non_pmd_mutex);
>>> -        dfc_cache_uninit(&pmd->flow_cache);
>>> +        dfc_cache_uninit(&pmd->flow_cache, &pmd->perf_stats);
>>>          pmd_free_cached_ports(pmd);
>>>          pmd_free_static_tx_qid(pmd);
>>>          ovs_mutex_unlock(&dp->non_pmd_mutex);
>>> diff --git a/tests/pmd.at b/tests/pmd.at
>>> index cc5371d5a..45c69563c 100644
>>> --- a/tests/pmd.at
>>> +++ b/tests/pmd.at
>>> @@ -202,12 +202,13 @@ dummy@ovs-dummy: hit:0 missed:0
>>>      p0 7/1: (dummy-pmd: configured_rx_queues=4, configured_tx_queues=<cleared>, requested_rx_queues=4, requested_tx_queues=<cleared>)
>>>  ])
>>>  
>>> -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
>>> +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
>>>  pmd thread numa_id <cleared> core_id <cleared>:
>>>    packets received: 0
>>>    packet recirculations: 0
>>>    avg. datapath passes per packet: 0.00
>>>    emc hits: 0
>>> +  emc entries: 0 (0.00%)
>>>    smc hits: 0
>>>    megaflow hits: 0
>>>    avg. subtable lookups per megaflow hit: 0.00
>>> @@ -233,12 +234,13 @@ AT_CHECK([cat ovs-vswitchd.log | filter_flow_install | strip_xout], [0], [dnl
>>>  recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:77,dst=50:54:00:00:01:78),eth_type(0x0800),ipv4(frag=no), actions: <del>
>>>  ])
>>>  
>>> -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
>>> +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
>>>  pmd thread numa_id <cleared> core_id <cleared>:
>>>    packets received: 20
>>>    packet recirculations: 0
>>>    avg. datapath passes per packet: 1.00
>>>    emc hits: 19
>>> +  emc entries: 1 (0.01%)
>>>    smc hits: 0
>>>    megaflow hits: 0
>>>    avg. subtable lookups per megaflow hit: 0.00
>>>
>
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index 617fe8e6a..e5d53a83b 100644
--- a/NEWS
+++ b/NEWS
@@ -20,6 +20,8 @@  Post-v2.14.0
      * Add generic IP protocol support to conntrack. With this change, all
        none UDP, TCP, and ICMP traffic will be treated as general L3
        traffic, i.e. using 3 tupples.
+     * EMC alive entries counter has been added to command
+       "ovs-appctl dpif-netdev/pmd-stats-show"
    - The environment variable OVS_UNBOUND_CONF, if set, is now used
      as the DNS resolver's (unbound) configuration file.
    - Linux datapath:
diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h
index 72645b6b3..80f50eae9 100644
--- a/lib/dpif-netdev-perf.h
+++ b/lib/dpif-netdev-perf.h
@@ -157,6 +157,8 @@  struct pmd_perf_stats {
     uint64_t last_tsc;
     /* Used to space certain checks in time. */
     uint64_t next_check_tsc;
+    /* Exact Match Cache used entries counter. */
+    atomic_uint32_t emc_n_entries;
     /* If non-NULL, outermost cycle timer currently running in PMD. */
     struct cycle_timer *cur_timer;
     /* Set of PMD counters with their zero offsets. */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 300861ca5..3eb70ccd5 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -913,7 +913,7 @@  dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd,
                            odp_port_t in_port);
 
 static inline bool emc_entry_alive(struct emc_entry *ce);
-static void emc_clear_entry(struct emc_entry *ce);
+static void emc_clear_entry(struct emc_entry *ce, struct pmd_perf_stats *s);
 static void smc_clear_entry(struct smc_bucket *b, int idx);
 
 static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
@@ -955,13 +955,16 @@  dfc_cache_init(struct dfc_cache *flow_cache)
 }
 
 static void
-emc_cache_uninit(struct emc_cache *flow_cache)
+emc_cache_uninit(struct emc_cache *flow_cache, struct pmd_perf_stats *s)
 {
     int i;
 
+
     for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) {
-        emc_clear_entry(&flow_cache->entries[i]);
+        emc_clear_entry(&flow_cache->entries[i], s);
     }
+
+    atomic_store_relaxed(&s->emc_n_entries, 0);
 }
 
 static void
@@ -977,21 +980,21 @@  smc_cache_uninit(struct smc_cache *smc)
 }
 
 static void
-dfc_cache_uninit(struct dfc_cache *flow_cache)
+dfc_cache_uninit(struct dfc_cache *flow_cache, struct pmd_perf_stats *s)
 {
     smc_cache_uninit(&flow_cache->smc_cache);
-    emc_cache_uninit(&flow_cache->emc_cache);
+    emc_cache_uninit(&flow_cache->emc_cache, s);
 }
 
 /* Check and clear dead flow references slowly (one entry at each
  * invocation).  */
 static void
-emc_cache_slow_sweep(struct emc_cache *flow_cache)
+emc_cache_slow_sweep(struct emc_cache *flow_cache, struct pmd_perf_stats *s)
 {
     struct emc_entry *entry = &flow_cache->entries[flow_cache->sweep_idx];
 
     if (!emc_entry_alive(entry)) {
-        emc_clear_entry(entry);
+        emc_clear_entry(entry, s);
     }
     flow_cache->sweep_idx = (flow_cache->sweep_idx + 1) & EM_FLOW_HASH_MASK;
 }
@@ -1093,15 +1096,26 @@  pmd_info_show_stats(struct ds *reply,
                   "  packets received: %"PRIu64"\n"
                   "  packet recirculations: %"PRIu64"\n"
                   "  avg. datapath passes per packet: %.02f\n"
-                  "  emc hits: %"PRIu64"\n"
+                  "  emc hits: %"PRIu64"\n",
+                  total_packets, stats[PMD_STAT_RECIRC],
+                  passes_per_pkt, stats[PMD_STAT_EXACT_HIT]
+                  );
+
+    if (pmd->core_id != NON_PMD_CORE_ID) {
+        uint32_t emc_entries;
+        atomic_read_relaxed(&pmd->perf_stats.emc_n_entries, &emc_entries);
+        ds_put_format(reply, "  emc entries: %"PRIu32" (%.02f%%)\n",
+                      emc_entries,
+                      100.0 * emc_entries / EM_FLOW_HASH_ENTRIES);
+    }
+
+    ds_put_format(reply,
                   "  smc hits: %"PRIu64"\n"
                   "  megaflow hits: %"PRIu64"\n"
                   "  avg. subtable lookups per megaflow hit: %.02f\n"
                   "  miss with success upcall: %"PRIu64"\n"
                   "  miss with failed upcall: %"PRIu64"\n"
                   "  avg. packets per output batch: %.02f\n",
-                  total_packets, stats[PMD_STAT_RECIRC],
-                  passes_per_pkt, stats[PMD_STAT_EXACT_HIT],
                   stats[PMD_STAT_SMC_HIT],
                   stats[PMD_STAT_MASKED_HIT], lookups_per_hit,
                   stats[PMD_STAT_MISS], stats[PMD_STAT_LOST],
@@ -2004,6 +2018,26 @@  non_atomic_ullong_add(atomic_ullong *var, unsigned long long n)
     atomic_store_relaxed(var, tmp);
 }
 
+static void
+non_atomic_dec(atomic_uint32_t *var)
+{
+    unsigned int tmp;
+
+    atomic_read_relaxed(var, &tmp);
+    tmp--;
+    atomic_store_relaxed(var, tmp);
+}
+
+static void
+non_atomic_inc(atomic_uint32_t *var)
+{
+    unsigned int tmp;
+
+    atomic_read_relaxed(var, &tmp);
+    tmp++;
+    atomic_store_relaxed(var, tmp);
+}
+
 static int
 dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
 {
@@ -3070,25 +3104,28 @@  emc_entry_alive(struct emc_entry *ce)
 }
 
 static void
-emc_clear_entry(struct emc_entry *ce)
+emc_clear_entry(struct emc_entry *ce, struct pmd_perf_stats *s)
 {
     if (ce->flow) {
         dp_netdev_flow_unref(ce->flow);
         ce->flow = NULL;
+        non_atomic_dec(&s->emc_n_entries);
     }
 }
 
 static inline void
 emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow,
-                 const struct netdev_flow_key *key)
+                 const struct netdev_flow_key *key, struct pmd_perf_stats *s)
 {
     if (ce->flow != flow) {
         if (ce->flow) {
             dp_netdev_flow_unref(ce->flow);
+            non_atomic_dec(&s->emc_n_entries);
         }
 
         if (dp_netdev_flow_ref(flow)) {
             ce->flow = flow;
+            non_atomic_inc(&s->emc_n_entries);
         } else {
             ce->flow = NULL;
         }
@@ -3100,7 +3137,7 @@  emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow,
 
 static inline void
 emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
-           struct dp_netdev_flow *flow)
+           struct dp_netdev_flow *flow, struct pmd_perf_stats *s)
 {
     struct emc_entry *to_be_replaced = NULL;
     struct emc_entry *current_entry;
@@ -3108,7 +3145,7 @@  emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
     EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) {
         if (netdev_flow_key_equal(&current_entry->key, key)) {
             /* We found the entry with the 'mf' miniflow */
-            emc_change_entry(current_entry, flow, NULL);
+            emc_change_entry(current_entry, flow, NULL, s);
             return;
         }
 
@@ -3124,7 +3161,7 @@  emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
     /* We didn't find the miniflow in the cache.
      * The 'to_be_replaced' entry is where the new flow will be stored */
 
-    emc_change_entry(to_be_replaced, flow, key);
+    emc_change_entry(to_be_replaced, flow, key, s);
 }
 
 static inline void
@@ -3139,7 +3176,7 @@  emc_probabilistic_insert(struct dp_netdev_pmd_thread *pmd,
     uint32_t min = pmd->ctx.emc_insert_min;
 
     if (min && random_uint32() <= min) {
-        emc_insert(&(pmd->flow_cache).emc_cache, key, flow);
+        emc_insert(&(pmd->flow_cache).emc_cache, key, flow, &pmd->perf_stats);
     }
 }
 
@@ -6014,7 +6051,8 @@  reload:
             coverage_try_clear();
             dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt);
             if (!ovsrcu_try_quiesce()) {
-                emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache));
+                emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache),
+                                     &pmd->perf_stats);
                 pmd->next_rcu_quiesce =
                     pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
             }
@@ -6058,7 +6096,7 @@  reload:
     }
 
     pmd_free_static_tx_qid(pmd);
-    dfc_cache_uninit(&pmd->flow_cache);
+    dfc_cache_uninit(&pmd->flow_cache, &pmd->perf_stats);
     free(poll_list);
     pmd_free_cached_ports(pmd);
     return NULL;
@@ -6537,7 +6575,7 @@  dp_netdev_del_pmd(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd)
      * but extra cleanup is necessary */
     if (pmd->core_id == NON_PMD_CORE_ID) {
         ovs_mutex_lock(&dp->non_pmd_mutex);
-        dfc_cache_uninit(&pmd->flow_cache);
+        dfc_cache_uninit(&pmd->flow_cache, &pmd->perf_stats);
         pmd_free_cached_ports(pmd);
         pmd_free_static_tx_qid(pmd);
         ovs_mutex_unlock(&dp->non_pmd_mutex);
diff --git a/tests/pmd.at b/tests/pmd.at
index cc5371d5a..45c69563c 100644
--- a/tests/pmd.at
+++ b/tests/pmd.at
@@ -202,12 +202,13 @@  dummy@ovs-dummy: hit:0 missed:0
     p0 7/1: (dummy-pmd: configured_rx_queues=4, configured_tx_queues=<cleared>, requested_rx_queues=4, requested_tx_queues=<cleared>)
 ])
 
-AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
+AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
 pmd thread numa_id <cleared> core_id <cleared>:
   packets received: 0
   packet recirculations: 0
   avg. datapath passes per packet: 0.00
   emc hits: 0
+  emc entries: 0 (0.00%)
   smc hits: 0
   megaflow hits: 0
   avg. subtable lookups per megaflow hit: 0.00
@@ -233,12 +234,13 @@  AT_CHECK([cat ovs-vswitchd.log | filter_flow_install | strip_xout], [0], [dnl
 recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:77,dst=50:54:00:00:01:78),eth_type(0x0800),ipv4(frag=no), actions: <del>
 ])
 
-AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
+AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
 pmd thread numa_id <cleared> core_id <cleared>:
   packets received: 20
   packet recirculations: 0
   avg. datapath passes per packet: 1.00
   emc hits: 19
+  emc entries: 1 (0.01%)
   smc hits: 0
   megaflow hits: 0
   avg. subtable lookups per megaflow hit: 0.00