diff mbox series

[ovs-dev,1/2] dpctl: dpif: add kernel datapath cache hit output

Message ID 159904155642.95561.4879858074655028502.stgit@ebuild
State Superseded, archived
Headers show
Series dpctl: cache visibility/configuration enhancements | expand

Commit Message

Eelco Chaudron Sept. 2, 2020, 10:12 a.m. UTC
This patch adds cache usage statistics to the output:

$ ovs-dpctl show                                                                                                                                                                                                                                                ovnhostvm: Tue Jul 21 15:25:14 2020
system@ovs-system:
  lookups: hit:24 missed:71 lost:0
  flows: 0
  masks: hit:334 total:0 hit/pkt:3.52
  cache: hit:4 hit rate:4.2105%
  port 0: ovs-system (internal)
  port 1: genev_sys_6081 (geneve: packet_type=ptap)
  port 2: br-int (internal)
  port 3: br-ex (internal)
  port 4: eth2
  port 5: sw1p1 (internal)
  port 6: sw0p4 (internal)

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
 datapath/linux/compat/include/linux/openvswitch.h |    2 +-
 lib/dpctl.c                                       |    9 +++++++++
 lib/dpif-netdev.c                                 |    1 +
 lib/dpif-netlink.c                                |    3 +++
 lib/dpif.h                                        |    2 ++
 5 files changed, 16 insertions(+), 1 deletion(-)

Comments

Flavio Leitner March 1, 2021, 9:04 p.m. UTC | #1
Hi Eelco,


Thanks for the patch. I added few comments in line below.


On Wed, Sep 02, 2020 at 12:12:36PM +0200, Eelco Chaudron wrote:
> This patch adds cache usage statistics to the output:
> 
> $ ovs-dpctl show                                                                                                                                                                                                                                                ovnhostvm: Tue Jul 21 15:25:14 2020


There are some extra spaces above

> system@ovs-system:
>   lookups: hit:24 missed:71 lost:0
>   flows: 0
>   masks: hit:334 total:0 hit/pkt:3.52
>   cache: hit:4 hit rate:4.2105%
>   port 0: ovs-system (internal)
>   port 1: genev_sys_6081 (geneve: packet_type=ptap)
>   port 2: br-int (internal)
>   port 3: br-ex (internal)
>   port 4: eth2
>   port 5: sw1p1 (internal)
>   port 6: sw0p4 (internal)
> 
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---
>  datapath/linux/compat/include/linux/openvswitch.h |    2 +-
>  lib/dpctl.c                                       |    9 +++++++++
>  lib/dpif-netdev.c                                 |    1 +
>  lib/dpif-netlink.c                                |    3 +++
>  lib/dpif.h                                        |    2 ++
>  5 files changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
> index cc41bbea4..fb73bfa90 100644
> --- a/datapath/linux/compat/include/linux/openvswitch.h
> +++ b/datapath/linux/compat/include/linux/openvswitch.h
> @@ -122,8 +122,8 @@ struct ovs_dp_megaflow_stats {
>  	__u64 n_mask_hit;	 /* Number of masks used for flow lookups. */
>  	__u32 n_masks;		 /* Number of masks for the datapath. */
>  	__u32 pad0;		 /* Pad for future expension. */
> +	__u64 n_cache_hit;       /* Number of cache matches for flow lookups. */
>  	__u64 pad1;		 /* Pad for future expension. */
> -	__u64 pad2;		 /* Pad for future expension. */
>  };
>
>  struct ovs_vport_stats {
> diff --git a/lib/dpctl.c b/lib/dpctl.c
> index db2b1f896..dac350fb5 100644
> --- a/lib/dpctl.c
> +++ b/lib/dpctl.c
> @@ -605,6 +605,15 @@ show_dpif(struct dpif *dpif, struct dpctl_params *dpctl_p)
>              dpctl_print(dpctl_p, "  masks: hit:%"PRIu64" total:%"PRIu32
>                                   " hit/pkt:%.2f\n",
>                          stats.n_mask_hit, stats.n_masks, avg);
> +
> +            if (stats.n_cache_hit > 0 && stats.n_cache_hit != UINT64_MAX) {

The kernel uses memset() to zero all the struct stats, so unfortunately
we don't know if zero means unsupported/unused or just no hit yet.

One thing that troubles me is that when we start OVS and the stats
is zero, then there is no line and that might be unexpected to scripts
or users. Other stats show zero stats.

I am on the fence here because showing zero as well might confuse
users using old kernels without support, though it's a matter of time.

In any case, stats zero should be valid from dpctl point of view, so
this situation should be handled in the specific implementation which
in this case is netlink. Does that make sense? See below.


> +                double avg_hits = n_pkts ?
> +                    (double) stats.n_cache_hit / n_pkts * 100 : 0.0;
> +
> +                dpctl_print(dpctl_p,
> +                            "  cache: hit:%"PRIu64" hit rate:%.4f%%\n",

That's an interesting precision.
I'd say 2 decimal digits is enough. ;)

> +                            stats.n_cache_hit, avg_hits);
> +            }
>          }
>      }
>  
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 2aad24511..4e3bd7519 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -2023,6 +2023,7 @@ dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
>      }
>      stats->n_masks = UINT32_MAX;
>      stats->n_mask_hit = UINT64_MAX;
> +    stats->n_cache_hit = UINT64_MAX;
>  
>      return 0;
>  }
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index 7da4fb54d..07f15ac65 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -672,9 +672,12 @@ dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats)
>              stats->n_masks = dp.megaflow_stats->n_masks;
>              stats->n_mask_hit = get_32aligned_u64(
>                  &dp.megaflow_stats->n_mask_hit);
> +            stats->n_cache_hit = get_32aligned_u64(
> +                &dp.megaflow_stats->n_cache_hit);

I think we should work around the kernel issue here by checking
if it is zero and switch that to UINT64_MAX to disable it:

               if (!stats->n_cache_hit) {
                    /* Old kernels don't use this field and always
                     * report zero instead. Disable this stat. */
                    stats->n_cache_hit = UINT64_MAX;
               }


>          } else {
>              stats->n_masks = UINT32_MAX;
>              stats->n_mask_hit = UINT64_MAX;
> +            stats->n_cache_hit = UINT64_MAX;
>          }
>          ofpbuf_delete(buf);
>      }
> diff --git a/lib/dpif.h b/lib/dpif.h
> index 2d52f0186..43c1ab998 100644
> --- a/lib/dpif.h
> +++ b/lib/dpif.h
> @@ -429,6 +429,8 @@ struct dpif_dp_stats {
>      uint64_t n_missed;          /* Number of flow table misses. */
>      uint64_t n_lost;            /* Number of misses not sent to userspace. */
>      uint64_t n_flows;           /* Number of flows present. */
> +    uint64_t n_cache_hit;       /* Number of mega flow mask cache hits for
> +                                   flow table matches. */
>      uint64_t n_mask_hit;        /* Number of mega flow masks visited for
>                                     flow table matches. */
>      uint32_t n_masks;           /* Number of mega flow masks. */
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Eelco Chaudron March 2, 2021, 9:06 a.m. UTC | #2
On 1 Mar 2021, at 22:04, Flavio Leitner wrote:

> Hi Eelco,
>
>
> Thanks for the patch. I added few comments in line below.
>
>
> On Wed, Sep 02, 2020 at 12:12:36PM +0200, Eelco Chaudron wrote:
>> This patch adds cache usage statistics to the output:
>>
>> $ ovs-dpctl show                                                      
>>                                                                                                                                                                                           ovnhostvm: 
>> Tue Jul 21 15:25:14 2020
>
>
> There are some extra spaces above

Thanks for reviewing this Flavio! I copied this from a TMUX session 
halfway last year I see :) Will remove the timestamp from the commit 
messageā€¦

>> system@ovs-system:
>>   lookups: hit:24 missed:71 lost:0
>>   flows: 0
>>   masks: hit:334 total:0 hit/pkt:3.52
>>   cache: hit:4 hit rate:4.2105%
>>   port 0: ovs-system (internal)
>>   port 1: genev_sys_6081 (geneve: packet_type=ptap)
>>   port 2: br-int (internal)
>>   port 3: br-ex (internal)
>>   port 4: eth2
>>   port 5: sw1p1 (internal)
>>   port 6: sw0p4 (internal)
>>
>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>> ---
>>  datapath/linux/compat/include/linux/openvswitch.h |    2 +-
>>  lib/dpctl.c                                       |    9 +++++++++
>>  lib/dpif-netdev.c                                 |    1 +
>>  lib/dpif-netlink.c                                |    3 +++
>>  lib/dpif.h                                        |    2 ++
>>  5 files changed, 16 insertions(+), 1 deletion(-)
>>
>> diff --git a/datapath/linux/compat/include/linux/openvswitch.h 
>> b/datapath/linux/compat/include/linux/openvswitch.h
>> index cc41bbea4..fb73bfa90 100644
>> --- a/datapath/linux/compat/include/linux/openvswitch.h
>> +++ b/datapath/linux/compat/include/linux/openvswitch.h
>> @@ -122,8 +122,8 @@ struct ovs_dp_megaflow_stats {
>>  	__u64 n_mask_hit;	 /* Number of masks used for flow lookups. */
>>  	__u32 n_masks;		 /* Number of masks for the datapath. */
>>  	__u32 pad0;		 /* Pad for future expension. */
>> +	__u64 n_cache_hit;       /* Number of cache matches for flow 
>> lookups. */
>>  	__u64 pad1;		 /* Pad for future expension. */
>> -	__u64 pad2;		 /* Pad for future expension. */
>>  };
>>
>>  struct ovs_vport_stats {
>> diff --git a/lib/dpctl.c b/lib/dpctl.c
>> index db2b1f896..dac350fb5 100644
>> --- a/lib/dpctl.c
>> +++ b/lib/dpctl.c
>> @@ -605,6 +605,15 @@ show_dpif(struct dpif *dpif, struct dpctl_params 
>> *dpctl_p)
>>              dpctl_print(dpctl_p, "  masks: hit:%"PRIu64" 
>> total:%"PRIu32
>>                                   " hit/pkt:%.2f\n",
>>                          stats.n_mask_hit, stats.n_masks, avg);
>> +
>> +            if (stats.n_cache_hit > 0 && stats.n_cache_hit != 
>> UINT64_MAX) {
>
> The kernel uses memset() to zero all the struct stats, so 
> unfortunately
> we don't know if zero means unsupported/unused or just no hit yet.
>
> One thing that troubles me is that when we start OVS and the stats
> is zero, then there is no line and that might be unexpected to scripts
> or users. Other stats show zero stats.
>
> I am on the fence here because showing zero as well might confuse
> users using old kernels without support, though it's a matter of time.
>
> In any case, stats zero should be valid from dpctl point of view, so
> this situation should be handled in the specific implementation which
> in this case is netlink. Does that make sense? See below.

My preference for this was not showing it when zero for backward 
compatibility, assuming the cache hits will quickly be >1.
But I do agree it should be handled at the netlink layer, so will pick 
your solution below in the v2.

>> +                double avg_hits = n_pkts ?
>> +                    (double) stats.n_cache_hit / n_pkts * 100 : 0.0;
>> +
>> +                dpctl_print(dpctl_p,
>> +                            "  cache: hit:%"PRIu64" hit 
>> rate:%.4f%%\n",
>
> That's an interesting precision.
> I'd say 2 decimal digits is enough. ;)

ACK will change it in the v2.

>> +                            stats.n_cache_hit, avg_hits);
>> +            }
>>          }
>>      }
>>
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index 2aad24511..4e3bd7519 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -2023,6 +2023,7 @@ dpif_netdev_get_stats(const struct dpif *dpif, 
>> struct dpif_dp_stats *stats)
>>      }
>>      stats->n_masks = UINT32_MAX;
>>      stats->n_mask_hit = UINT64_MAX;
>> +    stats->n_cache_hit = UINT64_MAX;
>>
>>      return 0;
>>  }
>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>> index 7da4fb54d..07f15ac65 100644
>> --- a/lib/dpif-netlink.c
>> +++ b/lib/dpif-netlink.c
>> @@ -672,9 +672,12 @@ dpif_netlink_get_stats(const struct dpif *dpif_, 
>> struct dpif_dp_stats *stats)
>>              stats->n_masks = dp.megaflow_stats->n_masks;
>>              stats->n_mask_hit = get_32aligned_u64(
>>                  &dp.megaflow_stats->n_mask_hit);
>> +            stats->n_cache_hit = get_32aligned_u64(
>> +                &dp.megaflow_stats->n_cache_hit);
>
> I think we should work around the kernel issue here by checking
> if it is zero and switch that to UINT64_MAX to disable it:
>
>                if (!stats->n_cache_hit) {
>                     /* Old kernels don't use this field and always
>                      * report zero instead. Disable this stat. */
>                     stats->n_cache_hit = UINT64_MAX;
>                }

Ack

>>          } else {
>>              stats->n_masks = UINT32_MAX;
>>              stats->n_mask_hit = UINT64_MAX;
>> +            stats->n_cache_hit = UINT64_MAX;
>>          }
>>          ofpbuf_delete(buf);
>>      }
>> diff --git a/lib/dpif.h b/lib/dpif.h
>> index 2d52f0186..43c1ab998 100644
>> --- a/lib/dpif.h
>> +++ b/lib/dpif.h
>> @@ -429,6 +429,8 @@ struct dpif_dp_stats {
>>      uint64_t n_missed;          /* Number of flow table misses. */
>>      uint64_t n_lost;            /* Number of misses not sent to 
>> userspace. */
>>      uint64_t n_flows;           /* Number of flows present. */
>> +    uint64_t n_cache_hit;       /* Number of mega flow mask cache 
>> hits for
>> +                                   flow table matches. */
>>      uint64_t n_mask_hit;        /* Number of mega flow masks visited 
>> for
>>                                     flow table matches. */
>>      uint32_t n_masks;           /* Number of mega flow masks. */
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
> -- 
> fbl
diff mbox series

Patch

diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
index cc41bbea4..fb73bfa90 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -122,8 +122,8 @@  struct ovs_dp_megaflow_stats {
 	__u64 n_mask_hit;	 /* Number of masks used for flow lookups. */
 	__u32 n_masks;		 /* Number of masks for the datapath. */
 	__u32 pad0;		 /* Pad for future expension. */
+	__u64 n_cache_hit;       /* Number of cache matches for flow lookups. */
 	__u64 pad1;		 /* Pad for future expension. */
-	__u64 pad2;		 /* Pad for future expension. */
 };
 
 struct ovs_vport_stats {
diff --git a/lib/dpctl.c b/lib/dpctl.c
index db2b1f896..dac350fb5 100644
--- a/lib/dpctl.c
+++ b/lib/dpctl.c
@@ -605,6 +605,15 @@  show_dpif(struct dpif *dpif, struct dpctl_params *dpctl_p)
             dpctl_print(dpctl_p, "  masks: hit:%"PRIu64" total:%"PRIu32
                                  " hit/pkt:%.2f\n",
                         stats.n_mask_hit, stats.n_masks, avg);
+
+            if (stats.n_cache_hit > 0 && stats.n_cache_hit != UINT64_MAX) {
+                double avg_hits = n_pkts ?
+                    (double) stats.n_cache_hit / n_pkts * 100 : 0.0;
+
+                dpctl_print(dpctl_p,
+                            "  cache: hit:%"PRIu64" hit rate:%.4f%%\n",
+                            stats.n_cache_hit, avg_hits);
+            }
         }
     }
 
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 2aad24511..4e3bd7519 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2023,6 +2023,7 @@  dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
     }
     stats->n_masks = UINT32_MAX;
     stats->n_mask_hit = UINT64_MAX;
+    stats->n_cache_hit = UINT64_MAX;
 
     return 0;
 }
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 7da4fb54d..07f15ac65 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -672,9 +672,12 @@  dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats)
             stats->n_masks = dp.megaflow_stats->n_masks;
             stats->n_mask_hit = get_32aligned_u64(
                 &dp.megaflow_stats->n_mask_hit);
+            stats->n_cache_hit = get_32aligned_u64(
+                &dp.megaflow_stats->n_cache_hit);
         } else {
             stats->n_masks = UINT32_MAX;
             stats->n_mask_hit = UINT64_MAX;
+            stats->n_cache_hit = UINT64_MAX;
         }
         ofpbuf_delete(buf);
     }
diff --git a/lib/dpif.h b/lib/dpif.h
index 2d52f0186..43c1ab998 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -429,6 +429,8 @@  struct dpif_dp_stats {
     uint64_t n_missed;          /* Number of flow table misses. */
     uint64_t n_lost;            /* Number of misses not sent to userspace. */
     uint64_t n_flows;           /* Number of flows present. */
+    uint64_t n_cache_hit;       /* Number of mega flow mask cache hits for
+                                   flow table matches. */
     uint64_t n_mask_hit;        /* Number of mega flow masks visited for
                                    flow table matches. */
     uint32_t n_masks;           /* Number of mega flow masks. */