diff mbox series

[ovs-dev,V4,08/17] dpif-netdev: Update offloaded flows statistics

Message ID 20191216151047.5967-9-elibr@mellanox.com
State Changes Requested
Delegated to: Ilya Maximets
Headers show
Series netdev datapath actions offload | expand

Commit Message

Eli Britstein Dec. 16, 2019, 3:10 p.m. UTC
From: Ophir Munk <ophirmu@mellanox.com>

In case a flow is HW offloaded, packets do not reach the SW, thus not
counted for statistics. Use netdev flow get API in order to update the
statistics of flows by the HW statistics.

Co-authored-by: Eli Britstein <elibr@mellanox.com>
Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
Signed-off-by: Eli Britstein <elibr@mellanox.com>
---
 lib/dpif-netdev.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 68 insertions(+), 13 deletions(-)

Comments

Ilya Maximets Dec. 16, 2019, 6:47 p.m. UTC | #1
On 16.12.2019 16:10, Eli Britstein wrote:
> From: Ophir Munk <ophirmu@mellanox.com>
> 
> In case a flow is HW offloaded, packets do not reach the SW, thus not
> counted for statistics. Use netdev flow get API in order to update the
> statistics of flows by the HW statistics.
> 
> Co-authored-by: Eli Britstein <elibr@mellanox.com>
> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
> Signed-off-by: Eli Britstein <elibr@mellanox.com>
> ---
>  lib/dpif-netdev.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 68 insertions(+), 13 deletions(-)

Some issues of this patch:

1. stats are duplicated now in dpif-netdev.  We already have them
   in netdev-offload-dpdk, so we don't need to store them here too.

2. dpif_flow_attrs should be filled by the offload provider, not by
   the datapath.  netdev-offload-tc fills this information and it
   will be lost in current implementation of dpif-netdev.

3. New 'dp_layer' types has to be documented in dpctl man.
   Also, 'in_hw' doesn't look like a datapath layer name.
   Suggesting to use 'dpdk' as netdev-offload-tc uses 'tc'.
   We also could return specific dp_layer for partially offloaded
   flows, i.e. 'ovs-partial'.

Suggesting following incremental patch where I tried to address above
issues (didn't update man, only compile tested, made on top of the full set):
-----
Subject: [PATCH] dpif-netdev: Cleanup for stats/attrs of offloaded flows.

Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
---
 lib/dpif-netdev.c         | 146 +++++++++++++++++++-------------------
 lib/netdev-offload-dpdk.c |  35 +++++----
 2 files changed, 94 insertions(+), 87 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 91fefb877..892b2d0f5 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -530,7 +530,6 @@ struct dp_netdev_flow {
 
     /* Statistics. */
     struct dp_netdev_flow_stats stats;
-    struct dp_netdev_flow_stats hw_stats;
 
     /* Actions. */
     OVSRCU_TYPE(struct dp_netdev_actions *) actions;
@@ -3011,11 +3010,49 @@ dp_netdev_pmd_find_flow(const struct dp_netdev_pmd_thread *pmd,
     return NULL;
 }
 
+static bool
+dpif_netdev_get_flow_offload_status(const struct dp_netdev *dp,
+                                    const struct dp_netdev_flow *netdev_flow,
+                                    struct dpif_flow_stats *stats,
+                                    struct dpif_flow_attrs *attrs)
+{
+    struct nlattr *actions;
+    struct ofpbuf wbuffer;
+    struct netdev *netdev;
+    struct match match;
+
+    int ret = 0;
+
+    if (!netdev_is_flow_api_enabled()) {
+        return false;
+    }
+
+    netdev = netdev_ports_get(netdev_flow->flow.in_port.odp_port, dp->class);
+    if (!netdev) {
+        return false;
+    }
+    /* Taking a global 'port_mutex' to fulfill thread safety
+     * restrictions for the netdev-offload-dpdk module. */
+    ovs_mutex_lock(&dp->port_mutex);
+    ret = netdev_flow_get(netdev, &match, &actions, &netdev_flow->mega_ufid,
+                          stats, attrs, &wbuffer);
+    ovs_mutex_unlock(&dp->port_mutex);
+    netdev_close(netdev);
+    if (ret) {
+        return false;
+    }
+
+    return true;
+}
+
 static void
-get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow_,
-                    struct dpif_flow_stats *stats, bool hw)
+get_dpif_flow_status(const struct dp_netdev *dp,
+                     const struct dp_netdev_flow *netdev_flow_,
+                     struct dpif_flow_stats *stats,
+                     struct dpif_flow_attrs *attrs)
 {
-    struct dp_netdev_flow_stats *flow_stats;
+    struct dpif_flow_stats offload_stats;
+    struct dpif_flow_attrs offload_attrs;
     struct dp_netdev_flow *netdev_flow;
     unsigned long long n;
     long long used;
@@ -3023,16 +3060,29 @@ get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow_,
 
     netdev_flow = CONST_CAST(struct dp_netdev_flow *, netdev_flow_);
 
-    flow_stats = (hw) ? &netdev_flow->hw_stats : &netdev_flow->stats;
-
-    atomic_read_relaxed(&flow_stats->packet_count, &n);
+    atomic_read_relaxed(&netdev_flow->stats.packet_count, &n);
     stats->n_packets = n;
-    atomic_read_relaxed(&flow_stats->byte_count, &n);
+    atomic_read_relaxed(&netdev_flow->stats.byte_count, &n);
     stats->n_bytes = n;
-    atomic_read_relaxed(&flow_stats->used, &used);
+    atomic_read_relaxed(&netdev_flow->stats.used, &used);
     stats->used = used;
-    atomic_read_relaxed(&flow_stats->tcp_flags, &flags);
+    atomic_read_relaxed(&netdev_flow->stats.tcp_flags, &flags);
     stats->tcp_flags = flags;
+
+    if (dpif_netdev_get_flow_offload_status(dp, netdev_flow,
+                                            &offload_stats, &offload_attrs)) {
+        stats->n_packets += offload_stats.n_packets;
+        stats->n_bytes += offload_stats.n_bytes;
+        stats->used = MAX(stats->used, offload_stats.used);
+        stats->tcp_flags |= offload_stats.tcp_flags;
+        if (attrs) {
+            attrs->offloaded = offload_attrs.offloaded;
+            attrs->dp_layer = offload_attrs.dp_layer;
+        }
+    } else if (attrs) {
+        attrs->offloaded = false;
+        attrs->dp_layer = "ovs";
+    }
 }
 
 /* Converts to the dpif_flow format, using 'key_buf' and 'mask_buf' for
@@ -3040,12 +3090,11 @@ get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow_,
  * 'mask_buf'. Actions will be returned without copying, by relying on RCU to
  * protect them. */
 static void
-dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow *netdev_flow,
+dp_netdev_flow_to_dpif_flow(const struct dp_netdev *dp,
+                            const struct dp_netdev_flow *netdev_flow,
                             struct ofpbuf *key_buf, struct ofpbuf *mask_buf,
-                            struct dpif_flow *flow, bool terse, bool offloaded)
+                            struct dpif_flow *flow, bool terse)
 {
-    struct dpif_flow_stats hw_stats;
-
     if (terse) {
         memset(flow, 0, sizeof *flow);
     } else {
@@ -3085,14 +3134,8 @@ dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow *netdev_flow,
     flow->ufid = netdev_flow->ufid;
     flow->ufid_present = true;
     flow->pmd_id = netdev_flow->pmd_id;
-    get_dpif_flow_stats(netdev_flow, &flow->stats, false);
-    get_dpif_flow_stats(netdev_flow, &hw_stats, true);
-    flow->stats.n_packets += hw_stats.n_packets;
-    flow->stats.n_bytes += hw_stats.n_bytes;
-    flow->stats.used = MAX(flow->stats.used, hw_stats.used);
 
-    flow->attrs.offloaded = offloaded;
-    flow->attrs.dp_layer = flow->attrs.offloaded ? "in_hw" : "ovs";
+    get_dpif_flow_status(dp, netdev_flow, &flow->stats, &flow->attrs);
 }
 
 static int
@@ -3162,44 +3205,6 @@ dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
     return 0;
 }
 
-static bool
-dpif_netdev_offload_stats(struct dp_netdev_flow *netdev_flow,
-                          struct dp_netdev *dp)
-{
-    struct dpif_flow_stats stats;
-    struct dpif_flow_attrs attrs;
-    struct nlattr *actions;
-    struct ofpbuf wbuffer;
-    struct netdev *netdev;
-    struct match match;
-
-    int ret = 0;
-
-    if (!netdev_is_flow_api_enabled()) {
-        return false;
-    }
-
-    netdev = netdev_ports_get(netdev_flow->flow.in_port.odp_port, dp->class);
-    if (!netdev) {
-        return false;
-    }
-    /* Taking a global 'port_mutex' to fulfill thread safety
-     * restrictions for the netdev-offload-dpdk module. */
-    ovs_mutex_lock(&dp->port_mutex);
-    ret = netdev_flow_get(netdev, &match, &actions, &netdev_flow->mega_ufid,
-                          &stats, &attrs, &wbuffer);
-    ovs_mutex_unlock(&dp->port_mutex);
-    netdev_close(netdev);
-    if (ret) {
-        return false;
-    }
-    atomic_store_relaxed(&netdev_flow->hw_stats.packet_count, stats.n_packets);
-    atomic_store_relaxed(&netdev_flow->hw_stats.byte_count, stats.n_bytes);
-    atomic_store_relaxed(&netdev_flow->hw_stats.used, stats.used);
-
-    return true;
-}
-
 static int
 dpif_netdev_flow_get(const struct dpif *dpif, const struct dpif_flow_get *get)
 {
@@ -3233,9 +3238,8 @@ dpif_netdev_flow_get(const struct dpif *dpif, const struct dpif_flow_get *get)
         netdev_flow = dp_netdev_pmd_find_flow(pmd, get->ufid, get->key,
                                               get->key_len);
         if (netdev_flow) {
-            bool offloaded = dpif_netdev_offload_stats(netdev_flow, pmd->dp);
-            dp_netdev_flow_to_dpif_flow(netdev_flow, get->buffer, get->buffer,
-                                        get->flow, false, offloaded);
+            dp_netdev_flow_to_dpif_flow(dp, netdev_flow, get->buffer,
+                                        get->buffer, get->flow, false);
             error = 0;
             break;
         } else {
@@ -3298,7 +3302,6 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
     /* Do not allocate extra space. */
     flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len);
     memset(&flow->stats, 0, sizeof flow->stats);
-    memset(&flow->hw_stats, 0, sizeof flow->hw_stats);
     flow->dead = false;
     flow->batch = NULL;
     flow->mark = INVALID_FLOW_MARK;
@@ -3411,7 +3414,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
                                   put->actions, put->actions_len);
 
             if (stats) {
-                get_dpif_flow_stats(netdev_flow, stats, false);
+                get_dpif_flow_status(pmd->dp, netdev_flow, stats, NULL);
             }
             if (put->flags & DPIF_FP_ZERO_STATS) {
                 /* XXX: The userspace datapath uses thread local statistics
@@ -3530,7 +3533,7 @@ flow_del_on_pmd(struct dp_netdev_pmd_thread *pmd,
                                           del->key_len);
     if (netdev_flow) {
         if (stats) {
-            get_dpif_flow_stats(netdev_flow, stats, false);
+            get_dpif_flow_status(pmd->dp, netdev_flow, stats, NULL);
         }
         dp_netdev_pmd_remove_flow(pmd, netdev_flow);
     } else {
@@ -3664,16 +3667,13 @@ dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_,
         = dpif_netdev_flow_dump_thread_cast(thread_);
     struct dpif_netdev_flow_dump *dump = thread->dump;
     struct dp_netdev_flow *netdev_flows[FLOW_DUMP_MAX_BATCH];
-    bool offloaded[FLOW_DUMP_MAX_BATCH];
+    struct dpif_netdev *dpif = dpif_netdev_cast(thread->up.dpif);
+    struct dp_netdev *dp = get_dp_netdev(&dpif->dpif);
     int n_flows = 0;
     int i;
 
-    memset(offloaded, 0, sizeof offloaded);
-
     ovs_mutex_lock(&dump->mutex);
     if (!dump->status) {
-        struct dpif_netdev *dpif = dpif_netdev_cast(thread->up.dpif);
-        struct dp_netdev *dp = get_dp_netdev(&dpif->dpif);
         struct dp_netdev_pmd_thread *pmd = dump->cur_pmd;
         int flow_limit = MIN(max_flows, FLOW_DUMP_MAX_BATCH);
 
@@ -3699,8 +3699,6 @@ dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_,
                 netdev_flows[n_flows] = CONTAINER_OF(node,
                                                      struct dp_netdev_flow,
                                                      node);
-                offloaded[n_flows] =
-                    dpif_netdev_offload_stats(netdev_flows[n_flows], pmd->dp);
             }
             /* When finishing dumping the current pmd thread, moves to
              * the next. */
@@ -3732,8 +3730,8 @@ dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_,
 
         ofpbuf_use_stack(&key, keybuf, sizeof *keybuf);
         ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf);
-        dp_netdev_flow_to_dpif_flow(netdev_flow, &key, &mask, f,
-                                    dump->up.terse, offloaded[i]);
+        dp_netdev_flow_to_dpif_flow(dp, netdev_flow, &key, &mask, f,
+                                    dump->up.terse);
     }
 
     return n_flows;
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 0ec840ce1..d1c066992 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -57,6 +57,7 @@ struct ufid_to_rte_flow_data {
     ovs_u128 ufid;
     struct rte_flow *rte_flow;
     struct dpif_flow_stats stats;
+    bool actions_offloaded;
 };
 
 /* Find rte_flow with @ufid. */
@@ -77,7 +78,7 @@ ufid_to_rte_flow_data_find(const ovs_u128 *ufid)
 
 static inline void
 ufid_to_rte_flow_associate(const ovs_u128 *ufid,
-                           struct rte_flow *rte_flow)
+                           struct rte_flow *rte_flow, bool actions_offloaded)
 {
     size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
     struct ufid_to_rte_flow_data *data = xzalloc(sizeof *data);
@@ -96,6 +97,7 @@ ufid_to_rte_flow_associate(const ovs_u128 *ufid,
 
     data->ufid = *ufid;
     data->rte_flow = rte_flow;
+    data->actions_offloaded = actions_offloaded;
 
     cmap_insert(&ufid_to_rte_flow,
                 CONST_CAST(struct cmap_node *, &data->node), hash);
@@ -1130,6 +1132,7 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev,
                              struct offload_info *info)
 {
     struct flow_patterns patterns = { .items = NULL, .cnt = 0 };
+    bool actions_offloaded = true;
     struct rte_flow *flow;
     int ret = 0;
 
@@ -1145,13 +1148,14 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev,
          * actions.
          */
         flow = netdev_offload_dpdk_mark_rss(&patterns, netdev, info->flow_mark);
+        actions_offloaded = false;
     }
 
     if (!flow) {
         ret = -1;
         goto out;
     }
-    ufid_to_rte_flow_associate(ufid, flow);
+    ufid_to_rte_flow_associate(ufid, flow, actions_offloaded);
     VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT"\n",
              netdev_get_name(netdev), flow, UUID_ARGS((struct uuid *)ufid));
 
@@ -1317,7 +1321,7 @@ netdev_offload_dpdk_flow_get(struct netdev *netdev,
                              struct nlattr **actions OVS_UNUSED,
                              const ovs_u128 *ufid,
                              struct dpif_flow_stats *stats,
-                             struct dpif_flow_attrs *attrs OVS_UNUSED,
+                             struct dpif_flow_attrs *attrs,
                              struct ofpbuf *buf OVS_UNUSED)
 {
     struct rte_flow_query_count query = { .reset = 1 };
@@ -1331,18 +1335,23 @@ netdev_offload_dpdk_flow_get(struct netdev *netdev,
         goto out;
     }
 
-    ret = netdev_dpdk_rte_flow_query_count(netdev, rte_flow_data->rte_flow,
-                                           &query, &error);
-    if (ret) {
-        goto out;
-    }
-    rte_flow_data->stats.n_packets += (query.hits_set) ? query.hits : 0;
-    rte_flow_data->stats.n_bytes += (query.bytes_set) ? query.bytes : 0;
-    if (query.hits_set && query.hits) {
-        rte_flow_data->stats.used = time_usec() / 1000;
+    attrs->offloaded = true;
+    if (rte_flow_data->actions_offloaded) {
+        attrs->dp_layer = "dpdk";
+        ret = netdev_dpdk_rte_flow_query_count(netdev, rte_flow_data->rte_flow,
+                                               &query, &error);
+        if (ret) {
+            goto out;
+        }
+        rte_flow_data->stats.n_packets += (query.hits_set) ? query.hits : 0;
+        rte_flow_data->stats.n_bytes += (query.bytes_set) ? query.bytes : 0;
+        if (query.hits_set && query.hits) {
+            rte_flow_data->stats.used = time_msec();
+        }
+    } else {
+        attrs->dp_layer = "ovs-partial";
     }
     memcpy(stats, &rte_flow_data->stats, sizeof *stats);
-
 out:
     return ret;
 }
Eli Britstein Dec. 17, 2019, 5:38 p.m. UTC | #2
On 12/16/2019 8:47 PM, Ilya Maximets wrote:
> 3. New 'dp_layer' types has to be documented in dpctl man.
>     Also, 'in_hw' doesn't look like a datapath layer name.
>     Suggesting to use 'dpdk' as netdev-offload-tc uses 'tc'.
>     We also could return specific dp_layer for partially offloaded
>     flows, i.e. 'ovs-partial'.

Thanks for the patch. I applied and tested. It works well.

Re-thinking about the dp_layer and offloaded, I think it's wrong.

The 'dp_layer' should be "ovs-dpdk" to differentiate from kernel module 
datapath "ovs", and denote the datapath is by dpdk.

It is true for any kind of offloading.

This can be done in dp_netdev_flow_to_dpif_flow to handle offloading is 
off, and filled in netdev-offload-dpdk (to the same value).

The "offloaded" flag can be enhanced to 3 states, instead of just 
boolean, as below (of course need to adapt throughout the code).

So, we will be able to show "partial" or "yes" for "offloaded", in 
dpctl/dump-flows.

Please comment.


diff --git a/lib/dpif.h b/lib/dpif.h
index d96f854a3..dba2f3cbf 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -508,8 +508,14 @@ struct dpif_flow_detailed_stats {
      uint16_t tcp_flags;
  };

+enum ol_state {
+    OFFLOADED_STATE_OFF,
+    OFFLOADED_STATE_ON,
+    OFFLOADED_STATE_PARTIAL,
+};
+
  struct dpif_flow_attrs {
-    bool offloaded;         /* True if flow is offloaded to HW. */
+    enum ol_state offloaded;
      const char *dp_layer;   /* DP layer the flow is handled in. */
  };
Ilya Maximets Dec. 17, 2019, 7:07 p.m. UTC | #3
On 17.12.2019 18:38, Eli Britstein wrote:
> 
> On 12/16/2019 8:47 PM, Ilya Maximets wrote:
>> 3. New 'dp_layer' types has to be documented in dpctl man.
>>     Also, 'in_hw' doesn't look like a datapath layer name.
>>     Suggesting to use 'dpdk' as netdev-offload-tc uses 'tc'.
>>     We also could return specific dp_layer for partially offloaded
>>     flows, i.e. 'ovs-partial'.
> 
> Thanks for the patch. I applied and tested. It works well.
> 
> Re-thinking about the dp_layer and offloaded, I think it's wrong.
> 
> The 'dp_layer' should be "ovs-dpdk" to differentiate from kernel module 
> datapath "ovs", and denote the datapath is by dpdk.

I don't think we need to differentiate userspace and kernel datapaths
like this just because they are kind of same datapath layer, also,
you're dumping flows always from the specific datapath/bridge, i.e. you
know what is your datapath type.  Second point is that it definitely
should not be called as 'ovs-dpdk' because there might be no DPDK at all.
My suggestion is to have 'ovs' layer for all the usual non-offloadded
flows, 'tc' for flows offloaded to linux tc via netdev-offload-tc
and 'dpdk' for fully offloaded flows via netdev-offload-dpdk.

> 
> It is true for any kind of offloading.
> 
> This can be done in dp_netdev_flow_to_dpif_flow to handle offloading is 
> off, and filled in netdev-offload-dpdk (to the same value).
> 
> The "offloaded" flag can be enhanced to 3 states, instead of just 
> boolean, as below (of course need to adapt throughout the code).
> 
> So, we will be able to show "partial" or "yes" for "offloaded", in 
> dpctl/dump-flows.

Yes, I think there were such idea in early discussion with Roni this year.
So, we could implement this.  For partially offloaded flows it might
make sense to have dp_layer = "ovs" since most of the processing happens
in the main userspace datapath.

> 
> Please comment.
> 
> 
> diff --git a/lib/dpif.h b/lib/dpif.h
> index d96f854a3..dba2f3cbf 100644
> --- a/lib/dpif.h
> +++ b/lib/dpif.h
> @@ -508,8 +508,14 @@ struct dpif_flow_detailed_stats {
>       uint16_t tcp_flags;
>   };
> 
> +enum ol_state {
> +    OFFLOADED_STATE_OFF,
> +    OFFLOADED_STATE_ON,
> +    OFFLOADED_STATE_PARTIAL,

How about:

enum dpif_flow_ol_state {
    DPIF_FLOW_OL_STATE_NOT_OFFLOADED, /* Flow fully handled in software. */
    DPIF_FLOW_OL_STATE_OFFLOADED,     /* Flow fully handled in hardware. */
    DPIF_FLOW_OL_STATE_PARTIAL,       /* Flow matching handled in hardware. */
};

> +};
> +
>   struct dpif_flow_attrs {
> -    bool offloaded;         /* True if flow is offloaded to HW. */
> +    enum ol_state offloaded;

Some minimal comment would be nice here.
    enum dpif_flow_ol_state ol_state;  /* Status of HW offloading. */

>       const char *dp_layer;   /* DP layer the flow is handled in. */
>   };
>
Eli Britstein Dec. 17, 2019, 8:56 p.m. UTC | #4
On 12/17/2019 9:07 PM, Ilya Maximets wrote:
> On 17.12.2019 18:38, Eli Britstein wrote:
>> On 12/16/2019 8:47 PM, Ilya Maximets wrote:
>>> 3. New 'dp_layer' types has to be documented in dpctl man.
>>>      Also, 'in_hw' doesn't look like a datapath layer name.
>>>      Suggesting to use 'dpdk' as netdev-offload-tc uses 'tc'.
>>>      We also could return specific dp_layer for partially offloaded
>>>      flows, i.e. 'ovs-partial'.
>> Thanks for the patch. I applied and tested. It works well.
>>
>> Re-thinking about the dp_layer and offloaded, I think it's wrong.
>>
>> The 'dp_layer' should be "ovs-dpdk" to differentiate from kernel module
>> datapath "ovs", and denote the datapath is by dpdk.
> I don't think we need to differentiate userspace and kernel datapaths
> like this just because they are kind of same datapath layer, also,
> you're dumping flows always from the specific datapath/bridge, i.e. you
> know what is your datapath type.  Second point is that it definitely
> should not be called as 'ovs-dpdk' because there might be no DPDK at all.
> My suggestion is to have 'ovs' layer for all the usual non-offloadded
> flows, 'tc' for flows offloaded to linux tc via netdev-offload-tc
> and 'dpdk' for fully offloaded flows via netdev-offload-dpdk.

I think it's not very clear that the DP is changed because of 
offloading. If we go ahead with this, so

there is no point make the offloaded flag with 3 states, as partial 
offloaded can be noted by "ovs"

and "offloaded=yes". It can just be formatted this way in dump-flows 
(offloaded=partial, dp: ovs).

>
>> It is true for any kind of offloading.
>>
>> This can be done in dp_netdev_flow_to_dpif_flow to handle offloading is
>> off, and filled in netdev-offload-dpdk (to the same value).
>>
>> The "offloaded" flag can be enhanced to 3 states, instead of just
>> boolean, as below (of course need to adapt throughout the code).
>>
>> So, we will be able to show "partial" or "yes" for "offloaded", in
>> dpctl/dump-flows.
> Yes, I think there were such idea in early discussion with Roni this year.
> So, we could implement this.  For partially offloaded flows it might
> make sense to have dp_layer = "ovs" since most of the processing happens
> in the main userspace datapath.
>
>> Please comment.
>>
>>
>> diff --git a/lib/dpif.h b/lib/dpif.h
>> index d96f854a3..dba2f3cbf 100644
>> --- a/lib/dpif.h
>> +++ b/lib/dpif.h
>> @@ -508,8 +508,14 @@ struct dpif_flow_detailed_stats {
>>        uint16_t tcp_flags;
>>    };
>>
>> +enum ol_state {
>> +    OFFLOADED_STATE_OFF,
>> +    OFFLOADED_STATE_ON,
>> +    OFFLOADED_STATE_PARTIAL,
> How about:
>
> enum dpif_flow_ol_state {
>      DPIF_FLOW_OL_STATE_NOT_OFFLOADED, /* Flow fully handled in software. */
>      DPIF_FLOW_OL_STATE_OFFLOADED,     /* Flow fully handled in hardware. */
>      DPIF_FLOW_OL_STATE_PARTIAL,       /* Flow matching handled in hardware. */
> };
>
>> +};
>> +
>>    struct dpif_flow_attrs {
>> -    bool offloaded;         /* True if flow is offloaded to HW. */
>> +    enum ol_state offloaded;
> Some minimal comment would be nice here.
>      enum dpif_flow_ol_state ol_state;  /* Status of HW offloading. */
>
>>        const char *dp_layer;   /* DP layer the flow is handled in. */
>>    };
>>
Ilya Maximets Dec. 17, 2019, 9:34 p.m. UTC | #5
On 17.12.2019 21:56, Eli Britstein wrote:
> 
> On 12/17/2019 9:07 PM, Ilya Maximets wrote:
>> On 17.12.2019 18:38, Eli Britstein wrote:
>>> On 12/16/2019 8:47 PM, Ilya Maximets wrote:
>>>> 3. New 'dp_layer' types has to be documented in dpctl man.
>>>>      Also, 'in_hw' doesn't look like a datapath layer name.
>>>>      Suggesting to use 'dpdk' as netdev-offload-tc uses 'tc'.
>>>>      We also could return specific dp_layer for partially offloaded
>>>>      flows, i.e. 'ovs-partial'.
>>> Thanks for the patch. I applied and tested. It works well.
>>>
>>> Re-thinking about the dp_layer and offloaded, I think it's wrong.
>>>
>>> The 'dp_layer' should be "ovs-dpdk" to differentiate from kernel module
>>> datapath "ovs", and denote the datapath is by dpdk.
>> I don't think we need to differentiate userspace and kernel datapaths
>> like this just because they are kind of same datapath layer, also,
>> you're dumping flows always from the specific datapath/bridge, i.e. you
>> know what is your datapath type.  Second point is that it definitely
>> should not be called as 'ovs-dpdk' because there might be no DPDK at all.
>> My suggestion is to have 'ovs' layer for all the usual non-offloadded
>> flows, 'tc' for flows offloaded to linux tc via netdev-offload-tc
>> and 'dpdk' for fully offloaded flows via netdev-offload-dpdk.
> 
> I think it's not very clear that the DP is changed because of 
> offloading. If we go ahead with this, so
> 
> there is no point make the offloaded flag with 3 states, as partial 
> offloaded can be noted by "ovs"
> 
> and "offloaded=yes". It can just be formatted this way in dump-flows 
> (offloaded=partial, dp: ovs).

Good point.  This might be easier to implement.

> 
>>
>>> It is true for any kind of offloading.
>>>
>>> This can be done in dp_netdev_flow_to_dpif_flow to handle offloading is
>>> off, and filled in netdev-offload-dpdk (to the same value).
>>>
>>> The "offloaded" flag can be enhanced to 3 states, instead of just
>>> boolean, as below (of course need to adapt throughout the code).
>>>
>>> So, we will be able to show "partial" or "yes" for "offloaded", in
>>> dpctl/dump-flows.
>> Yes, I think there were such idea in early discussion with Roni this year.
>> So, we could implement this.  For partially offloaded flows it might
>> make sense to have dp_layer = "ovs" since most of the processing happens
>> in the main userspace datapath.
>>
>>> Please comment.
>>>
>>>
>>> diff --git a/lib/dpif.h b/lib/dpif.h
>>> index d96f854a3..dba2f3cbf 100644
>>> --- a/lib/dpif.h
>>> +++ b/lib/dpif.h
>>> @@ -508,8 +508,14 @@ struct dpif_flow_detailed_stats {
>>>        uint16_t tcp_flags;
>>>    };
>>>
>>> +enum ol_state {
>>> +    OFFLOADED_STATE_OFF,
>>> +    OFFLOADED_STATE_ON,
>>> +    OFFLOADED_STATE_PARTIAL,
>> How about:
>>
>> enum dpif_flow_ol_state {
>>      DPIF_FLOW_OL_STATE_NOT_OFFLOADED, /* Flow fully handled in software. */
>>      DPIF_FLOW_OL_STATE_OFFLOADED,     /* Flow fully handled in hardware. */
>>      DPIF_FLOW_OL_STATE_PARTIAL,       /* Flow matching handled in hardware. */
>> };
>>
>>> +};
>>> +
>>>    struct dpif_flow_attrs {
>>> -    bool offloaded;         /* True if flow is offloaded to HW. */
>>> +    enum ol_state offloaded;
>> Some minimal comment would be nice here.
>>      enum dpif_flow_ol_state ol_state;  /* Status of HW offloading. */
>>
>>>        const char *dp_layer;   /* DP layer the flow is handled in. */
>>>    };
>>>
Oz Shlomo Dec. 18, 2019, 10:02 a.m. UTC | #6
Hi Ilya,


On 12/17/2019 11:34 PM, Ilya Maximets wrote:
> On 17.12.2019 21:56, Eli Britstein wrote:
>> On 12/17/2019 9:07 PM, Ilya Maximets wrote:
>>> On 17.12.2019 18:38, Eli Britstein wrote:
>>>> On 12/16/2019 8:47 PM, Ilya Maximets wrote:
>>>>> 3. New 'dp_layer' types has to be documented in dpctl man.
>>>>>       Also, 'in_hw' doesn't look like a datapath layer name.
>>>>>       Suggesting to use 'dpdk' as netdev-offload-tc uses 'tc'.
>>>>>       We also could return specific dp_layer for partially offloaded
>>>>>       flows, i.e. 'ovs-partial'.
>>>> Thanks for the patch. I applied and tested. It works well.
>>>>
>>>> Re-thinking about the dp_layer and offloaded, I think it's wrong.
>>>>
>>>> The 'dp_layer' should be "ovs-dpdk" to differentiate from kernel module
>>>> datapath "ovs", and denote the datapath is by dpdk.
>>> I don't think we need to differentiate userspace and kernel datapaths
>>> like this just because they are kind of same datapath layer, also,
>>> you're dumping flows always from the specific datapath/bridge, i.e. you
>>> know what is your datapath type.  Second point is that it definitely
>>> should not be called as 'ovs-dpdk' because there might be no DPDK at all.
>>> My suggestion is to have 'ovs' layer for all the usual non-offloadded
>>> flows, 'tc' for flows offloaded to linux tc via netdev-offload-tc
>>> and 'dpdk' for fully offloaded flows via netdev-offload-dpdk.
>> I think it's not very clear that the DP is changed because of
>> offloading. If we go ahead with this, so
>>
>> there is no point make the offloaded flag with 3 states, as partial
>> offloaded can be noted by "ovs"
>>
>> and "offloaded=yes". It can just be formatted this way in dump-flows
>> (offloaded=partial, dp: ovs).
> Good point.  This might be easier to implement.

So we agree that (offloaded=partial, dp: ovs) will be added to indicate 
that the dp runs in ovs layer with the HW providing match acceleration.

We are discussing two alternatives to indicate that a flow is fully 
offloaded by DPDK:

1. (dp: dpdk, offloaded=yes)

2. (dp: ovs, offloaded=yes)

The thing is that there is no real "dpdk" data plane as DPDK's software 
data plane is ovs, unlike TC which is a kernel  software data plane.

In addition, the (dp: dpdk, offloaded="") is not a valid system permutation.

Therefore, I vote for option 2, giving the following permutations:

(dp: ovs, offloaded="")  - dp runs in ovs layer (kernel/dpdk)

(dp: ovs, offloaded="yes")  - dp runs in hardware (controlled by OVS)

(dp: ovs, offloaded="partial")  - dp runs in ovs layer with HW providing 
match acceleration

(dp: tc, offloaded="")  -  dp runs in tc kernel

(dp: tc, offloaded="yes")  - dp runs in hardware (controlled by TC)

What do you think?


>>>> It is true for any kind of offloading.
>>>>
>>>> This can be done in dp_netdev_flow_to_dpif_flow to handle offloading is
>>>> off, and filled in netdev-offload-dpdk (to the same value).
>>>>
>>>> The "offloaded" flag can be enhanced to 3 states, instead of just
>>>> boolean, as below (of course need to adapt throughout the code).
>>>>
>>>> So, we will be able to show "partial" or "yes" for "offloaded", in
>>>> dpctl/dump-flows.
>>> Yes, I think there were such idea in early discussion with Roni this year.
>>> So, we could implement this.  For partially offloaded flows it might
>>> make sense to have dp_layer = "ovs" since most of the processing happens
>>> in the main userspace datapath.
>>>
>>>> Please comment.
>>>>
>>>>
>>>> diff --git a/lib/dpif.h b/lib/dpif.h
>>>> index d96f854a3..dba2f3cbf 100644
>>>> --- a/lib/dpif.h
>>>> +++ b/lib/dpif.h
>>>> @@ -508,8 +508,14 @@ struct dpif_flow_detailed_stats {
>>>>         uint16_t tcp_flags;
>>>>     };
>>>>
>>>> +enum ol_state {
>>>> +    OFFLOADED_STATE_OFF,
>>>> +    OFFLOADED_STATE_ON,
>>>> +    OFFLOADED_STATE_PARTIAL,
>>> How about:
>>>
>>> enum dpif_flow_ol_state {
>>>       DPIF_FLOW_OL_STATE_NOT_OFFLOADED, /* Flow fully handled in software. */
>>>       DPIF_FLOW_OL_STATE_OFFLOADED,     /* Flow fully handled in hardware. */
>>>       DPIF_FLOW_OL_STATE_PARTIAL,       /* Flow matching handled in hardware. */
>>> };
>>>
>>>> +};
>>>> +
>>>>     struct dpif_flow_attrs {
>>>> -    bool offloaded;         /* True if flow is offloaded to HW. */
>>>> +    enum ol_state offloaded;
>>> Some minimal comment would be nice here.
>>>       enum dpif_flow_ol_state ol_state;  /* Status of HW offloading. */
>>>
>>>>         const char *dp_layer;   /* DP layer the flow is handled in. */
>>>>     };
>>>>
Ilya Maximets Dec. 18, 2019, 11:04 a.m. UTC | #7
On 18.12.2019 11:02, Oz Shlomo wrote:
> Hi Ilya,
> 
> 
> On 12/17/2019 11:34 PM, Ilya Maximets wrote:
>> On 17.12.2019 21:56, Eli Britstein wrote:
>>> On 12/17/2019 9:07 PM, Ilya Maximets wrote:
>>>> On 17.12.2019 18:38, Eli Britstein wrote:
>>>>> On 12/16/2019 8:47 PM, Ilya Maximets wrote:
>>>>>> 3. New 'dp_layer' types has to be documented in dpctl man.
>>>>>>       Also, 'in_hw' doesn't look like a datapath layer name.
>>>>>>       Suggesting to use 'dpdk' as netdev-offload-tc uses 'tc'.
>>>>>>       We also could return specific dp_layer for partially offloaded
>>>>>>       flows, i.e. 'ovs-partial'.
>>>>> Thanks for the patch. I applied and tested. It works well.
>>>>>
>>>>> Re-thinking about the dp_layer and offloaded, I think it's wrong.
>>>>>
>>>>> The 'dp_layer' should be "ovs-dpdk" to differentiate from kernel module
>>>>> datapath "ovs", and denote the datapath is by dpdk.
>>>> I don't think we need to differentiate userspace and kernel datapaths
>>>> like this just because they are kind of same datapath layer, also,
>>>> you're dumping flows always from the specific datapath/bridge, i.e. you
>>>> know what is your datapath type.  Second point is that it definitely
>>>> should not be called as 'ovs-dpdk' because there might be no DPDK at all.
>>>> My suggestion is to have 'ovs' layer for all the usual non-offloadded
>>>> flows, 'tc' for flows offloaded to linux tc via netdev-offload-tc
>>>> and 'dpdk' for fully offloaded flows via netdev-offload-dpdk.
>>> I think it's not very clear that the DP is changed because of
>>> offloading. If we go ahead with this, so
>>>
>>> there is no point make the offloaded flag with 3 states, as partial
>>> offloaded can be noted by "ovs"
>>>
>>> and "offloaded=yes". It can just be formatted this way in dump-flows
>>> (offloaded=partial, dp: ovs).
>> Good point.  This might be easier to implement.
> 
> So we agree that (offloaded=partial, dp: ovs) will be added to indicate 
> that the dp runs in ovs layer with the HW providing match acceleration.
> 
> We are discussing two alternatives to indicate that a flow is fully 
> offloaded by DPDK:
> 
> 1. (dp: dpdk, offloaded=yes)
> 
> 2. (dp: ovs, offloaded=yes)
> 
> The thing is that there is no real "dpdk" data plane as DPDK's software 
> data plane is ovs, unlike TC which is a kernel  software data plane.
> 
> In addition, the (dp: dpdk, offloaded="") is not a valid system permutation.
> 
> Therefore, I vote for option 2, giving the following permutations:
> 
> (dp: ovs, offloaded="")  - dp runs in ovs layer (kernel/dpdk)
> 
> (dp: ovs, offloaded="yes")  - dp runs in hardware (controlled by OVS)
> 
> (dp: ovs, offloaded="partial")  - dp runs in ovs layer with HW providing 
> match acceleration
> 
> (dp: tc, offloaded="")  -  dp runs in tc kernel
> 
> (dp: tc, offloaded="yes")  - dp runs in hardware (controlled by TC)
> 
> What do you think?

I'd still prefer option #1, i.e. {dp:dpdk offloaded="yes"} to represent
flows that offloaded by netdev-offload-dpdk for two reasons:

1. This is in line with netdev-offload-tc.

2. You can't be sure that flow offloaded by rte_flow is actually in hardware.
   For example, rte_flow implementation for tap interfaces works via linux TC.
   So, it'll be hard to correctly formulate the documentation.  And this is
   actually the case where (dp: dpdk, offloaded="") is a valid combination
   even if we can't detect this case with current DPDK rte_flow API.

> 
> 
>>>>> It is true for any kind of offloading.
>>>>>
>>>>> This can be done in dp_netdev_flow_to_dpif_flow to handle offloading is
>>>>> off, and filled in netdev-offload-dpdk (to the same value).
>>>>>
>>>>> The "offloaded" flag can be enhanced to 3 states, instead of just
>>>>> boolean, as below (of course need to adapt throughout the code).
>>>>>
>>>>> So, we will be able to show "partial" or "yes" for "offloaded", in
>>>>> dpctl/dump-flows.
>>>> Yes, I think there were such idea in early discussion with Roni this year.
>>>> So, we could implement this.  For partially offloaded flows it might
>>>> make sense to have dp_layer = "ovs" since most of the processing happens
>>>> in the main userspace datapath.
>>>>
>>>>> Please comment.
>>>>>
>>>>>
>>>>> diff --git a/lib/dpif.h b/lib/dpif.h
>>>>> index d96f854a3..dba2f3cbf 100644
>>>>> --- a/lib/dpif.h
>>>>> +++ b/lib/dpif.h
>>>>> @@ -508,8 +508,14 @@ struct dpif_flow_detailed_stats {
>>>>>         uint16_t tcp_flags;
>>>>>     };
>>>>>
>>>>> +enum ol_state {
>>>>> +    OFFLOADED_STATE_OFF,
>>>>> +    OFFLOADED_STATE_ON,
>>>>> +    OFFLOADED_STATE_PARTIAL,
>>>> How about:
>>>>
>>>> enum dpif_flow_ol_state {
>>>>       DPIF_FLOW_OL_STATE_NOT_OFFLOADED, /* Flow fully handled in software. */
>>>>       DPIF_FLOW_OL_STATE_OFFLOADED,     /* Flow fully handled in hardware. */
>>>>       DPIF_FLOW_OL_STATE_PARTIAL,       /* Flow matching handled in hardware. */
>>>> };
>>>>
>>>>> +};
>>>>> +
>>>>>     struct dpif_flow_attrs {
>>>>> -    bool offloaded;         /* True if flow is offloaded to HW. */
>>>>> +    enum ol_state offloaded;
>>>> Some minimal comment would be nice here.
>>>>       enum dpif_flow_ol_state ol_state;  /* Status of HW offloading. */
>>>>
>>>>>         const char *dp_layer;   /* DP layer the flow is handled in. */
>>>>>     };
>>>>>
Ilya Maximets Dec. 18, 2019, 1:48 p.m. UTC | #8
On 18.12.2019 12:04, Ilya Maximets wrote:
> On 18.12.2019 11:02, Oz Shlomo wrote:
>> Hi Ilya,
>>
>>
>> On 12/17/2019 11:34 PM, Ilya Maximets wrote:
>>> On 17.12.2019 21:56, Eli Britstein wrote:
>>>> On 12/17/2019 9:07 PM, Ilya Maximets wrote:
>>>>> On 17.12.2019 18:38, Eli Britstein wrote:
>>>>>> On 12/16/2019 8:47 PM, Ilya Maximets wrote:
>>>>>>> 3. New 'dp_layer' types has to be documented in dpctl man.
>>>>>>>       Also, 'in_hw' doesn't look like a datapath layer name.
>>>>>>>       Suggesting to use 'dpdk' as netdev-offload-tc uses 'tc'.
>>>>>>>       We also could return specific dp_layer for partially offloaded
>>>>>>>       flows, i.e. 'ovs-partial'.
>>>>>> Thanks for the patch. I applied and tested. It works well.
>>>>>>
>>>>>> Re-thinking about the dp_layer and offloaded, I think it's wrong.
>>>>>>
>>>>>> The 'dp_layer' should be "ovs-dpdk" to differentiate from kernel module
>>>>>> datapath "ovs", and denote the datapath is by dpdk.
>>>>> I don't think we need to differentiate userspace and kernel datapaths
>>>>> like this just because they are kind of same datapath layer, also,
>>>>> you're dumping flows always from the specific datapath/bridge, i.e. you
>>>>> know what is your datapath type.  Second point is that it definitely
>>>>> should not be called as 'ovs-dpdk' because there might be no DPDK at all.
>>>>> My suggestion is to have 'ovs' layer for all the usual non-offloadded
>>>>> flows, 'tc' for flows offloaded to linux tc via netdev-offload-tc
>>>>> and 'dpdk' for fully offloaded flows via netdev-offload-dpdk.
>>>> I think it's not very clear that the DP is changed because of
>>>> offloading. If we go ahead with this, so
>>>>
>>>> there is no point make the offloaded flag with 3 states, as partial
>>>> offloaded can be noted by "ovs"
>>>>
>>>> and "offloaded=yes". It can just be formatted this way in dump-flows
>>>> (offloaded=partial, dp: ovs).
>>> Good point.  This might be easier to implement.
>>
>> So we agree that (offloaded=partial, dp: ovs) will be added to indicate 
>> that the dp runs in ovs layer with the HW providing match acceleration.
>>
>> We are discussing two alternatives to indicate that a flow is fully 
>> offloaded by DPDK:
>>
>> 1. (dp: dpdk, offloaded=yes)
>>
>> 2. (dp: ovs, offloaded=yes)
>>
>> The thing is that there is no real "dpdk" data plane as DPDK's software 
>> data plane is ovs, unlike TC which is a kernel  software data plane.
>>
>> In addition, the (dp: dpdk, offloaded="") is not a valid system permutation.
>>
>> Therefore, I vote for option 2, giving the following permutations:
>>
>> (dp: ovs, offloaded="")  - dp runs in ovs layer (kernel/dpdk)
>>
>> (dp: ovs, offloaded="yes")  - dp runs in hardware (controlled by OVS)
>>
>> (dp: ovs, offloaded="partial")  - dp runs in ovs layer with HW providing 
>> match acceleration
>>
>> (dp: tc, offloaded="")  -  dp runs in tc kernel
>>
>> (dp: tc, offloaded="yes")  - dp runs in hardware (controlled by TC)
>>
>> What do you think?
> 
> I'd still prefer option #1, i.e. {dp:dpdk offloaded="yes"} to represent
> flows that offloaded by netdev-offload-dpdk for two reasons:
> 
> 1. This is in line with netdev-offload-tc.
> 
> 2. You can't be sure that flow offloaded by rte_flow is actually in hardware.
>    For example, rte_flow implementation for tap interfaces works via linux TC.
>    So, it'll be hard to correctly formulate the documentation.  And this is
>    actually the case where (dp: dpdk, offloaded="") is a valid combination
>    even if we can't detect this case with current DPDK rte_flow API.
> 

CC: Simon as he might be interested in that topic.

>>
>>
>>>>>> It is true for any kind of offloading.
>>>>>>
>>>>>> This can be done in dp_netdev_flow_to_dpif_flow to handle offloading is
>>>>>> off, and filled in netdev-offload-dpdk (to the same value).
>>>>>>
>>>>>> The "offloaded" flag can be enhanced to 3 states, instead of just
>>>>>> boolean, as below (of course need to adapt throughout the code).
>>>>>>
>>>>>> So, we will be able to show "partial" or "yes" for "offloaded", in
>>>>>> dpctl/dump-flows.
>>>>> Yes, I think there were such idea in early discussion with Roni this year.
>>>>> So, we could implement this.  For partially offloaded flows it might
>>>>> make sense to have dp_layer = "ovs" since most of the processing happens
>>>>> in the main userspace datapath.
>>>>>
>>>>>> Please comment.
>>>>>>
>>>>>>
>>>>>> diff --git a/lib/dpif.h b/lib/dpif.h
>>>>>> index d96f854a3..dba2f3cbf 100644
>>>>>> --- a/lib/dpif.h
>>>>>> +++ b/lib/dpif.h
>>>>>> @@ -508,8 +508,14 @@ struct dpif_flow_detailed_stats {
>>>>>>         uint16_t tcp_flags;
>>>>>>     };
>>>>>>
>>>>>> +enum ol_state {
>>>>>> +    OFFLOADED_STATE_OFF,
>>>>>> +    OFFLOADED_STATE_ON,
>>>>>> +    OFFLOADED_STATE_PARTIAL,
>>>>> How about:
>>>>>
>>>>> enum dpif_flow_ol_state {
>>>>>       DPIF_FLOW_OL_STATE_NOT_OFFLOADED, /* Flow fully handled in software. */
>>>>>       DPIF_FLOW_OL_STATE_OFFLOADED,     /* Flow fully handled in hardware. */
>>>>>       DPIF_FLOW_OL_STATE_PARTIAL,       /* Flow matching handled in hardware. */
>>>>> };
>>>>>
>>>>>> +};
>>>>>> +
>>>>>>     struct dpif_flow_attrs {
>>>>>> -    bool offloaded;         /* True if flow is offloaded to HW. */
>>>>>> +    enum ol_state offloaded;
>>>>> Some minimal comment would be nice here.
>>>>>       enum dpif_flow_ol_state ol_state;  /* Status of HW offloading. */
>>>>>
>>>>>>         const char *dp_layer;   /* DP layer the flow is handled in. */
>>>>>>     };
>>>>>>
diff mbox series

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 7ebf4ef87..be3205a7c 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -530,6 +530,7 @@  struct dp_netdev_flow {
 
     /* Statistics. */
     struct dp_netdev_flow_stats stats;
+    struct dp_netdev_flow_stats hw_stats;
 
     /* Actions. */
     OVSRCU_TYPE(struct dp_netdev_actions *) actions;
@@ -3002,8 +3003,9 @@  dp_netdev_pmd_find_flow(const struct dp_netdev_pmd_thread *pmd,
 
 static void
 get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow_,
-                    struct dpif_flow_stats *stats)
+                    struct dpif_flow_stats *stats, bool hw)
 {
+    struct dp_netdev_flow_stats *flow_stats;
     struct dp_netdev_flow *netdev_flow;
     unsigned long long n;
     long long used;
@@ -3011,13 +3013,15 @@  get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow_,
 
     netdev_flow = CONST_CAST(struct dp_netdev_flow *, netdev_flow_);
 
-    atomic_read_relaxed(&netdev_flow->stats.packet_count, &n);
+    flow_stats = (hw) ? &netdev_flow->hw_stats : &netdev_flow->stats;
+
+    atomic_read_relaxed(&flow_stats->packet_count, &n);
     stats->n_packets = n;
-    atomic_read_relaxed(&netdev_flow->stats.byte_count, &n);
+    atomic_read_relaxed(&flow_stats->byte_count, &n);
     stats->n_bytes = n;
-    atomic_read_relaxed(&netdev_flow->stats.used, &used);
+    atomic_read_relaxed(&flow_stats->used, &used);
     stats->used = used;
-    atomic_read_relaxed(&netdev_flow->stats.tcp_flags, &flags);
+    atomic_read_relaxed(&flow_stats->tcp_flags, &flags);
     stats->tcp_flags = flags;
 }
 
@@ -3028,8 +3032,10 @@  get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow_,
 static void
 dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow *netdev_flow,
                             struct ofpbuf *key_buf, struct ofpbuf *mask_buf,
-                            struct dpif_flow *flow, bool terse)
+                            struct dpif_flow *flow, bool terse, bool offloaded)
 {
+    struct dpif_flow_stats hw_stats;
+
     if (terse) {
         memset(flow, 0, sizeof *flow);
     } else {
@@ -3069,10 +3075,14 @@  dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow *netdev_flow,
     flow->ufid = netdev_flow->ufid;
     flow->ufid_present = true;
     flow->pmd_id = netdev_flow->pmd_id;
-    get_dpif_flow_stats(netdev_flow, &flow->stats);
+    get_dpif_flow_stats(netdev_flow, &flow->stats, false);
+    get_dpif_flow_stats(netdev_flow, &hw_stats, true);
+    flow->stats.n_packets += hw_stats.n_packets;
+    flow->stats.n_bytes += hw_stats.n_bytes;
+    flow->stats.used = MAX(flow->stats.used, hw_stats.used);
 
-    flow->attrs.offloaded = false;
-    flow->attrs.dp_layer = "ovs";
+    flow->attrs.offloaded = offloaded;
+    flow->attrs.dp_layer = flow->attrs.offloaded ? "in_hw" : "ovs";
 }
 
 static int
@@ -3142,6 +3152,44 @@  dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
     return 0;
 }
 
+static bool
+dpif_netdev_offload_stats(struct dp_netdev_flow *netdev_flow,
+                          struct dp_netdev *dp)
+{
+    struct dpif_flow_stats stats;
+    struct dpif_flow_attrs attrs;
+    struct nlattr *actions;
+    struct ofpbuf wbuffer;
+    struct netdev *netdev;
+    struct match match;
+
+    int ret = 0;
+
+    if (!netdev_is_flow_api_enabled()) {
+        return false;
+    }
+
+    netdev = netdev_ports_get(netdev_flow->flow.in_port.odp_port, dp->class);
+    if (!netdev) {
+        return false;
+    }
+    /* Taking a global 'port_mutex' to fulfill thread safety
+     * restrictions for the netdev-offload-dpdk module. */
+    ovs_mutex_lock(&dp->port_mutex);
+    ret = netdev_flow_get(netdev, &match, &actions, &netdev_flow->mega_ufid,
+                          &stats, &attrs, &wbuffer);
+    ovs_mutex_unlock(&dp->port_mutex);
+    netdev_close(netdev);
+    if (ret) {
+        return false;
+    }
+    atomic_store_relaxed(&netdev_flow->hw_stats.packet_count, stats.n_packets);
+    atomic_store_relaxed(&netdev_flow->hw_stats.byte_count, stats.n_bytes);
+    atomic_store_relaxed(&netdev_flow->hw_stats.used, stats.used);
+
+    return true;
+}
+
 static int
 dpif_netdev_flow_get(const struct dpif *dpif, const struct dpif_flow_get *get)
 {
@@ -3175,8 +3223,9 @@  dpif_netdev_flow_get(const struct dpif *dpif, const struct dpif_flow_get *get)
         netdev_flow = dp_netdev_pmd_find_flow(pmd, get->ufid, get->key,
                                               get->key_len);
         if (netdev_flow) {
+            bool offloaded = dpif_netdev_offload_stats(netdev_flow, pmd->dp);
             dp_netdev_flow_to_dpif_flow(netdev_flow, get->buffer, get->buffer,
-                                        get->flow, false);
+                                        get->flow, false, offloaded);
             error = 0;
             break;
         } else {
@@ -3239,6 +3288,7 @@  dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
     /* Do not allocate extra space. */
     flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len);
     memset(&flow->stats, 0, sizeof flow->stats);
+    memset(&flow->hw_stats, 0, sizeof flow->hw_stats);
     flow->dead = false;
     flow->batch = NULL;
     flow->mark = INVALID_FLOW_MARK;
@@ -3351,7 +3401,7 @@  flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
                                   put->actions, put->actions_len);
 
             if (stats) {
-                get_dpif_flow_stats(netdev_flow, stats);
+                get_dpif_flow_stats(netdev_flow, stats, false);
             }
             if (put->flags & DPIF_FP_ZERO_STATS) {
                 /* XXX: The userspace datapath uses thread local statistics
@@ -3470,7 +3520,7 @@  flow_del_on_pmd(struct dp_netdev_pmd_thread *pmd,
                                           del->key_len);
     if (netdev_flow) {
         if (stats) {
-            get_dpif_flow_stats(netdev_flow, stats);
+            get_dpif_flow_stats(netdev_flow, stats, false);
         }
         dp_netdev_pmd_remove_flow(pmd, netdev_flow);
     } else {
@@ -3604,9 +3654,12 @@  dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_,
         = dpif_netdev_flow_dump_thread_cast(thread_);
     struct dpif_netdev_flow_dump *dump = thread->dump;
     struct dp_netdev_flow *netdev_flows[FLOW_DUMP_MAX_BATCH];
+    bool offloaded[FLOW_DUMP_MAX_BATCH];
     int n_flows = 0;
     int i;
 
+    memset(offloaded, 0, sizeof offloaded);
+
     ovs_mutex_lock(&dump->mutex);
     if (!dump->status) {
         struct dpif_netdev *dpif = dpif_netdev_cast(thread->up.dpif);
@@ -3636,6 +3689,8 @@  dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_,
                 netdev_flows[n_flows] = CONTAINER_OF(node,
                                                      struct dp_netdev_flow,
                                                      node);
+                offloaded[n_flows] =
+                    dpif_netdev_offload_stats(netdev_flows[n_flows], pmd->dp);
             }
             /* When finishing dumping the current pmd thread, moves to
              * the next. */
@@ -3668,7 +3723,7 @@  dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_,
         ofpbuf_use_stack(&key, keybuf, sizeof *keybuf);
         ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf);
         dp_netdev_flow_to_dpif_flow(netdev_flow, &key, &mask, f,
-                                    dump->up.terse);
+                                    dump->up.terse, offloaded[i]);
     }
 
     return n_flows;