Message ID | 20191216151047.5967-9-elibr@mellanox.com |
---|---|
State | Changes Requested |
Delegated to: | Ilya Maximets |
Headers | show |
Series | netdev datapath actions offload | expand |
On 16.12.2019 16:10, Eli Britstein wrote: > From: Ophir Munk <ophirmu@mellanox.com> > > In case a flow is HW offloaded, packets do not reach the SW, thus not > counted for statistics. Use netdev flow get API in order to update the > statistics of flows by the HW statistics. > > Co-authored-by: Eli Britstein <elibr@mellanox.com> > Signed-off-by: Ophir Munk <ophirmu@mellanox.com> > Reviewed-by: Oz Shlomo <ozsh@mellanox.com> > Signed-off-by: Eli Britstein <elibr@mellanox.com> > --- > lib/dpif-netdev.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++--------- > 1 file changed, 68 insertions(+), 13 deletions(-) Some issues of this patch: 1. stats are duplicated now in dpif-netdev. We already have them in netdev-offload-dpdk, so we don't need to store them here too. 2. dpif_flow_attrs should be filled by the offload provider, not by the datapath. netdev-offload-tc fills this information and it will be lost in current implementation of dpif-netdev. 3. New 'dp_layer' types has to be documented in dpctl man. Also, 'in_hw' doesn't look like a datapath layer name. Suggesting to use 'dpdk' as netdev-offload-tc uses 'tc'. We also could return specific dp_layer for partially offloaded flows, i.e. 'ovs-partial'. Suggesting following incremental patch where I tried to address above issues (didn't update man, only compile tested, made on top of the full set): ----- Subject: [PATCH] dpif-netdev: Cleanup for stats/attrs of offloaded flows. Signed-off-by: Ilya Maximets <i.maximets@ovn.org> --- lib/dpif-netdev.c | 146 +++++++++++++++++++------------------- lib/netdev-offload-dpdk.c | 35 +++++---- 2 files changed, 94 insertions(+), 87 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 91fefb877..892b2d0f5 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -530,7 +530,6 @@ struct dp_netdev_flow { /* Statistics. */ struct dp_netdev_flow_stats stats; - struct dp_netdev_flow_stats hw_stats; /* Actions. */ OVSRCU_TYPE(struct dp_netdev_actions *) actions; @@ -3011,11 +3010,49 @@ dp_netdev_pmd_find_flow(const struct dp_netdev_pmd_thread *pmd, return NULL; } +static bool +dpif_netdev_get_flow_offload_status(const struct dp_netdev *dp, + const struct dp_netdev_flow *netdev_flow, + struct dpif_flow_stats *stats, + struct dpif_flow_attrs *attrs) +{ + struct nlattr *actions; + struct ofpbuf wbuffer; + struct netdev *netdev; + struct match match; + + int ret = 0; + + if (!netdev_is_flow_api_enabled()) { + return false; + } + + netdev = netdev_ports_get(netdev_flow->flow.in_port.odp_port, dp->class); + if (!netdev) { + return false; + } + /* Taking a global 'port_mutex' to fulfill thread safety + * restrictions for the netdev-offload-dpdk module. */ + ovs_mutex_lock(&dp->port_mutex); + ret = netdev_flow_get(netdev, &match, &actions, &netdev_flow->mega_ufid, + stats, attrs, &wbuffer); + ovs_mutex_unlock(&dp->port_mutex); + netdev_close(netdev); + if (ret) { + return false; + } + + return true; +} + static void -get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow_, - struct dpif_flow_stats *stats, bool hw) +get_dpif_flow_status(const struct dp_netdev *dp, + const struct dp_netdev_flow *netdev_flow_, + struct dpif_flow_stats *stats, + struct dpif_flow_attrs *attrs) { - struct dp_netdev_flow_stats *flow_stats; + struct dpif_flow_stats offload_stats; + struct dpif_flow_attrs offload_attrs; struct dp_netdev_flow *netdev_flow; unsigned long long n; long long used; @@ -3023,16 +3060,29 @@ get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow_, netdev_flow = CONST_CAST(struct dp_netdev_flow *, netdev_flow_); - flow_stats = (hw) ? &netdev_flow->hw_stats : &netdev_flow->stats; - - atomic_read_relaxed(&flow_stats->packet_count, &n); + atomic_read_relaxed(&netdev_flow->stats.packet_count, &n); stats->n_packets = n; - atomic_read_relaxed(&flow_stats->byte_count, &n); + atomic_read_relaxed(&netdev_flow->stats.byte_count, &n); stats->n_bytes = n; - atomic_read_relaxed(&flow_stats->used, &used); + atomic_read_relaxed(&netdev_flow->stats.used, &used); stats->used = used; - atomic_read_relaxed(&flow_stats->tcp_flags, &flags); + atomic_read_relaxed(&netdev_flow->stats.tcp_flags, &flags); stats->tcp_flags = flags; + + if (dpif_netdev_get_flow_offload_status(dp, netdev_flow, + &offload_stats, &offload_attrs)) { + stats->n_packets += offload_stats.n_packets; + stats->n_bytes += offload_stats.n_bytes; + stats->used = MAX(stats->used, offload_stats.used); + stats->tcp_flags |= offload_stats.tcp_flags; + if (attrs) { + attrs->offloaded = offload_attrs.offloaded; + attrs->dp_layer = offload_attrs.dp_layer; + } + } else if (attrs) { + attrs->offloaded = false; + attrs->dp_layer = "ovs"; + } } /* Converts to the dpif_flow format, using 'key_buf' and 'mask_buf' for @@ -3040,12 +3090,11 @@ get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow_, * 'mask_buf'. Actions will be returned without copying, by relying on RCU to * protect them. */ static void -dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow *netdev_flow, +dp_netdev_flow_to_dpif_flow(const struct dp_netdev *dp, + const struct dp_netdev_flow *netdev_flow, struct ofpbuf *key_buf, struct ofpbuf *mask_buf, - struct dpif_flow *flow, bool terse, bool offloaded) + struct dpif_flow *flow, bool terse) { - struct dpif_flow_stats hw_stats; - if (terse) { memset(flow, 0, sizeof *flow); } else { @@ -3085,14 +3134,8 @@ dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow *netdev_flow, flow->ufid = netdev_flow->ufid; flow->ufid_present = true; flow->pmd_id = netdev_flow->pmd_id; - get_dpif_flow_stats(netdev_flow, &flow->stats, false); - get_dpif_flow_stats(netdev_flow, &hw_stats, true); - flow->stats.n_packets += hw_stats.n_packets; - flow->stats.n_bytes += hw_stats.n_bytes; - flow->stats.used = MAX(flow->stats.used, hw_stats.used); - flow->attrs.offloaded = offloaded; - flow->attrs.dp_layer = flow->attrs.offloaded ? "in_hw" : "ovs"; + get_dpif_flow_status(dp, netdev_flow, &flow->stats, &flow->attrs); } static int @@ -3162,44 +3205,6 @@ dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len, return 0; } -static bool -dpif_netdev_offload_stats(struct dp_netdev_flow *netdev_flow, - struct dp_netdev *dp) -{ - struct dpif_flow_stats stats; - struct dpif_flow_attrs attrs; - struct nlattr *actions; - struct ofpbuf wbuffer; - struct netdev *netdev; - struct match match; - - int ret = 0; - - if (!netdev_is_flow_api_enabled()) { - return false; - } - - netdev = netdev_ports_get(netdev_flow->flow.in_port.odp_port, dp->class); - if (!netdev) { - return false; - } - /* Taking a global 'port_mutex' to fulfill thread safety - * restrictions for the netdev-offload-dpdk module. */ - ovs_mutex_lock(&dp->port_mutex); - ret = netdev_flow_get(netdev, &match, &actions, &netdev_flow->mega_ufid, - &stats, &attrs, &wbuffer); - ovs_mutex_unlock(&dp->port_mutex); - netdev_close(netdev); - if (ret) { - return false; - } - atomic_store_relaxed(&netdev_flow->hw_stats.packet_count, stats.n_packets); - atomic_store_relaxed(&netdev_flow->hw_stats.byte_count, stats.n_bytes); - atomic_store_relaxed(&netdev_flow->hw_stats.used, stats.used); - - return true; -} - static int dpif_netdev_flow_get(const struct dpif *dpif, const struct dpif_flow_get *get) { @@ -3233,9 +3238,8 @@ dpif_netdev_flow_get(const struct dpif *dpif, const struct dpif_flow_get *get) netdev_flow = dp_netdev_pmd_find_flow(pmd, get->ufid, get->key, get->key_len); if (netdev_flow) { - bool offloaded = dpif_netdev_offload_stats(netdev_flow, pmd->dp); - dp_netdev_flow_to_dpif_flow(netdev_flow, get->buffer, get->buffer, - get->flow, false, offloaded); + dp_netdev_flow_to_dpif_flow(dp, netdev_flow, get->buffer, + get->buffer, get->flow, false); error = 0; break; } else { @@ -3298,7 +3302,6 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, /* Do not allocate extra space. */ flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len); memset(&flow->stats, 0, sizeof flow->stats); - memset(&flow->hw_stats, 0, sizeof flow->hw_stats); flow->dead = false; flow->batch = NULL; flow->mark = INVALID_FLOW_MARK; @@ -3411,7 +3414,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd, put->actions, put->actions_len); if (stats) { - get_dpif_flow_stats(netdev_flow, stats, false); + get_dpif_flow_status(pmd->dp, netdev_flow, stats, NULL); } if (put->flags & DPIF_FP_ZERO_STATS) { /* XXX: The userspace datapath uses thread local statistics @@ -3530,7 +3533,7 @@ flow_del_on_pmd(struct dp_netdev_pmd_thread *pmd, del->key_len); if (netdev_flow) { if (stats) { - get_dpif_flow_stats(netdev_flow, stats, false); + get_dpif_flow_status(pmd->dp, netdev_flow, stats, NULL); } dp_netdev_pmd_remove_flow(pmd, netdev_flow); } else { @@ -3664,16 +3667,13 @@ dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_, = dpif_netdev_flow_dump_thread_cast(thread_); struct dpif_netdev_flow_dump *dump = thread->dump; struct dp_netdev_flow *netdev_flows[FLOW_DUMP_MAX_BATCH]; - bool offloaded[FLOW_DUMP_MAX_BATCH]; + struct dpif_netdev *dpif = dpif_netdev_cast(thread->up.dpif); + struct dp_netdev *dp = get_dp_netdev(&dpif->dpif); int n_flows = 0; int i; - memset(offloaded, 0, sizeof offloaded); - ovs_mutex_lock(&dump->mutex); if (!dump->status) { - struct dpif_netdev *dpif = dpif_netdev_cast(thread->up.dpif); - struct dp_netdev *dp = get_dp_netdev(&dpif->dpif); struct dp_netdev_pmd_thread *pmd = dump->cur_pmd; int flow_limit = MIN(max_flows, FLOW_DUMP_MAX_BATCH); @@ -3699,8 +3699,6 @@ dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_, netdev_flows[n_flows] = CONTAINER_OF(node, struct dp_netdev_flow, node); - offloaded[n_flows] = - dpif_netdev_offload_stats(netdev_flows[n_flows], pmd->dp); } /* When finishing dumping the current pmd thread, moves to * the next. */ @@ -3732,8 +3730,8 @@ dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_, ofpbuf_use_stack(&key, keybuf, sizeof *keybuf); ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf); - dp_netdev_flow_to_dpif_flow(netdev_flow, &key, &mask, f, - dump->up.terse, offloaded[i]); + dp_netdev_flow_to_dpif_flow(dp, netdev_flow, &key, &mask, f, + dump->up.terse); } return n_flows; diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index 0ec840ce1..d1c066992 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -57,6 +57,7 @@ struct ufid_to_rte_flow_data { ovs_u128 ufid; struct rte_flow *rte_flow; struct dpif_flow_stats stats; + bool actions_offloaded; }; /* Find rte_flow with @ufid. */ @@ -77,7 +78,7 @@ ufid_to_rte_flow_data_find(const ovs_u128 *ufid) static inline void ufid_to_rte_flow_associate(const ovs_u128 *ufid, - struct rte_flow *rte_flow) + struct rte_flow *rte_flow, bool actions_offloaded) { size_t hash = hash_bytes(ufid, sizeof *ufid, 0); struct ufid_to_rte_flow_data *data = xzalloc(sizeof *data); @@ -96,6 +97,7 @@ ufid_to_rte_flow_associate(const ovs_u128 *ufid, data->ufid = *ufid; data->rte_flow = rte_flow; + data->actions_offloaded = actions_offloaded; cmap_insert(&ufid_to_rte_flow, CONST_CAST(struct cmap_node *, &data->node), hash); @@ -1130,6 +1132,7 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev, struct offload_info *info) { struct flow_patterns patterns = { .items = NULL, .cnt = 0 }; + bool actions_offloaded = true; struct rte_flow *flow; int ret = 0; @@ -1145,13 +1148,14 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev, * actions. */ flow = netdev_offload_dpdk_mark_rss(&patterns, netdev, info->flow_mark); + actions_offloaded = false; } if (!flow) { ret = -1; goto out; } - ufid_to_rte_flow_associate(ufid, flow); + ufid_to_rte_flow_associate(ufid, flow, actions_offloaded); VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT"\n", netdev_get_name(netdev), flow, UUID_ARGS((struct uuid *)ufid)); @@ -1317,7 +1321,7 @@ netdev_offload_dpdk_flow_get(struct netdev *netdev, struct nlattr **actions OVS_UNUSED, const ovs_u128 *ufid, struct dpif_flow_stats *stats, - struct dpif_flow_attrs *attrs OVS_UNUSED, + struct dpif_flow_attrs *attrs, struct ofpbuf *buf OVS_UNUSED) { struct rte_flow_query_count query = { .reset = 1 }; @@ -1331,18 +1335,23 @@ netdev_offload_dpdk_flow_get(struct netdev *netdev, goto out; } - ret = netdev_dpdk_rte_flow_query_count(netdev, rte_flow_data->rte_flow, - &query, &error); - if (ret) { - goto out; - } - rte_flow_data->stats.n_packets += (query.hits_set) ? query.hits : 0; - rte_flow_data->stats.n_bytes += (query.bytes_set) ? query.bytes : 0; - if (query.hits_set && query.hits) { - rte_flow_data->stats.used = time_usec() / 1000; + attrs->offloaded = true; + if (rte_flow_data->actions_offloaded) { + attrs->dp_layer = "dpdk"; + ret = netdev_dpdk_rte_flow_query_count(netdev, rte_flow_data->rte_flow, + &query, &error); + if (ret) { + goto out; + } + rte_flow_data->stats.n_packets += (query.hits_set) ? query.hits : 0; + rte_flow_data->stats.n_bytes += (query.bytes_set) ? query.bytes : 0; + if (query.hits_set && query.hits) { + rte_flow_data->stats.used = time_msec(); + } + } else { + attrs->dp_layer = "ovs-partial"; } memcpy(stats, &rte_flow_data->stats, sizeof *stats); - out: return ret; }
On 12/16/2019 8:47 PM, Ilya Maximets wrote: > 3. New 'dp_layer' types has to be documented in dpctl man. > Also, 'in_hw' doesn't look like a datapath layer name. > Suggesting to use 'dpdk' as netdev-offload-tc uses 'tc'. > We also could return specific dp_layer for partially offloaded > flows, i.e. 'ovs-partial'. Thanks for the patch. I applied and tested. It works well. Re-thinking about the dp_layer and offloaded, I think it's wrong. The 'dp_layer' should be "ovs-dpdk" to differentiate from kernel module datapath "ovs", and denote the datapath is by dpdk. It is true for any kind of offloading. This can be done in dp_netdev_flow_to_dpif_flow to handle offloading is off, and filled in netdev-offload-dpdk (to the same value). The "offloaded" flag can be enhanced to 3 states, instead of just boolean, as below (of course need to adapt throughout the code). So, we will be able to show "partial" or "yes" for "offloaded", in dpctl/dump-flows. Please comment. diff --git a/lib/dpif.h b/lib/dpif.h index d96f854a3..dba2f3cbf 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -508,8 +508,14 @@ struct dpif_flow_detailed_stats { uint16_t tcp_flags; }; +enum ol_state { + OFFLOADED_STATE_OFF, + OFFLOADED_STATE_ON, + OFFLOADED_STATE_PARTIAL, +}; + struct dpif_flow_attrs { - bool offloaded; /* True if flow is offloaded to HW. */ + enum ol_state offloaded; const char *dp_layer; /* DP layer the flow is handled in. */ };
On 17.12.2019 18:38, Eli Britstein wrote: > > On 12/16/2019 8:47 PM, Ilya Maximets wrote: >> 3. New 'dp_layer' types has to be documented in dpctl man. >> Also, 'in_hw' doesn't look like a datapath layer name. >> Suggesting to use 'dpdk' as netdev-offload-tc uses 'tc'. >> We also could return specific dp_layer for partially offloaded >> flows, i.e. 'ovs-partial'. > > Thanks for the patch. I applied and tested. It works well. > > Re-thinking about the dp_layer and offloaded, I think it's wrong. > > The 'dp_layer' should be "ovs-dpdk" to differentiate from kernel module > datapath "ovs", and denote the datapath is by dpdk. I don't think we need to differentiate userspace and kernel datapaths like this just because they are kind of same datapath layer, also, you're dumping flows always from the specific datapath/bridge, i.e. you know what is your datapath type. Second point is that it definitely should not be called as 'ovs-dpdk' because there might be no DPDK at all. My suggestion is to have 'ovs' layer for all the usual non-offloadded flows, 'tc' for flows offloaded to linux tc via netdev-offload-tc and 'dpdk' for fully offloaded flows via netdev-offload-dpdk. > > It is true for any kind of offloading. > > This can be done in dp_netdev_flow_to_dpif_flow to handle offloading is > off, and filled in netdev-offload-dpdk (to the same value). > > The "offloaded" flag can be enhanced to 3 states, instead of just > boolean, as below (of course need to adapt throughout the code). > > So, we will be able to show "partial" or "yes" for "offloaded", in > dpctl/dump-flows. Yes, I think there were such idea in early discussion with Roni this year. So, we could implement this. For partially offloaded flows it might make sense to have dp_layer = "ovs" since most of the processing happens in the main userspace datapath. > > Please comment. > > > diff --git a/lib/dpif.h b/lib/dpif.h > index d96f854a3..dba2f3cbf 100644 > --- a/lib/dpif.h > +++ b/lib/dpif.h > @@ -508,8 +508,14 @@ struct dpif_flow_detailed_stats { > uint16_t tcp_flags; > }; > > +enum ol_state { > + OFFLOADED_STATE_OFF, > + OFFLOADED_STATE_ON, > + OFFLOADED_STATE_PARTIAL, How about: enum dpif_flow_ol_state { DPIF_FLOW_OL_STATE_NOT_OFFLOADED, /* Flow fully handled in software. */ DPIF_FLOW_OL_STATE_OFFLOADED, /* Flow fully handled in hardware. */ DPIF_FLOW_OL_STATE_PARTIAL, /* Flow matching handled in hardware. */ }; > +}; > + > struct dpif_flow_attrs { > - bool offloaded; /* True if flow is offloaded to HW. */ > + enum ol_state offloaded; Some minimal comment would be nice here. enum dpif_flow_ol_state ol_state; /* Status of HW offloading. */ > const char *dp_layer; /* DP layer the flow is handled in. */ > }; >
On 12/17/2019 9:07 PM, Ilya Maximets wrote: > On 17.12.2019 18:38, Eli Britstein wrote: >> On 12/16/2019 8:47 PM, Ilya Maximets wrote: >>> 3. New 'dp_layer' types has to be documented in dpctl man. >>> Also, 'in_hw' doesn't look like a datapath layer name. >>> Suggesting to use 'dpdk' as netdev-offload-tc uses 'tc'. >>> We also could return specific dp_layer for partially offloaded >>> flows, i.e. 'ovs-partial'. >> Thanks for the patch. I applied and tested. It works well. >> >> Re-thinking about the dp_layer and offloaded, I think it's wrong. >> >> The 'dp_layer' should be "ovs-dpdk" to differentiate from kernel module >> datapath "ovs", and denote the datapath is by dpdk. > I don't think we need to differentiate userspace and kernel datapaths > like this just because they are kind of same datapath layer, also, > you're dumping flows always from the specific datapath/bridge, i.e. you > know what is your datapath type. Second point is that it definitely > should not be called as 'ovs-dpdk' because there might be no DPDK at all. > My suggestion is to have 'ovs' layer for all the usual non-offloadded > flows, 'tc' for flows offloaded to linux tc via netdev-offload-tc > and 'dpdk' for fully offloaded flows via netdev-offload-dpdk. I think it's not very clear that the DP is changed because of offloading. If we go ahead with this, so there is no point make the offloaded flag with 3 states, as partial offloaded can be noted by "ovs" and "offloaded=yes". It can just be formatted this way in dump-flows (offloaded=partial, dp: ovs). > >> It is true for any kind of offloading. >> >> This can be done in dp_netdev_flow_to_dpif_flow to handle offloading is >> off, and filled in netdev-offload-dpdk (to the same value). >> >> The "offloaded" flag can be enhanced to 3 states, instead of just >> boolean, as below (of course need to adapt throughout the code). >> >> So, we will be able to show "partial" or "yes" for "offloaded", in >> dpctl/dump-flows. > Yes, I think there were such idea in early discussion with Roni this year. > So, we could implement this. For partially offloaded flows it might > make sense to have dp_layer = "ovs" since most of the processing happens > in the main userspace datapath. > >> Please comment. >> >> >> diff --git a/lib/dpif.h b/lib/dpif.h >> index d96f854a3..dba2f3cbf 100644 >> --- a/lib/dpif.h >> +++ b/lib/dpif.h >> @@ -508,8 +508,14 @@ struct dpif_flow_detailed_stats { >> uint16_t tcp_flags; >> }; >> >> +enum ol_state { >> + OFFLOADED_STATE_OFF, >> + OFFLOADED_STATE_ON, >> + OFFLOADED_STATE_PARTIAL, > How about: > > enum dpif_flow_ol_state { > DPIF_FLOW_OL_STATE_NOT_OFFLOADED, /* Flow fully handled in software. */ > DPIF_FLOW_OL_STATE_OFFLOADED, /* Flow fully handled in hardware. */ > DPIF_FLOW_OL_STATE_PARTIAL, /* Flow matching handled in hardware. */ > }; > >> +}; >> + >> struct dpif_flow_attrs { >> - bool offloaded; /* True if flow is offloaded to HW. */ >> + enum ol_state offloaded; > Some minimal comment would be nice here. > enum dpif_flow_ol_state ol_state; /* Status of HW offloading. */ > >> const char *dp_layer; /* DP layer the flow is handled in. */ >> }; >>
On 17.12.2019 21:56, Eli Britstein wrote: > > On 12/17/2019 9:07 PM, Ilya Maximets wrote: >> On 17.12.2019 18:38, Eli Britstein wrote: >>> On 12/16/2019 8:47 PM, Ilya Maximets wrote: >>>> 3. New 'dp_layer' types has to be documented in dpctl man. >>>> Also, 'in_hw' doesn't look like a datapath layer name. >>>> Suggesting to use 'dpdk' as netdev-offload-tc uses 'tc'. >>>> We also could return specific dp_layer for partially offloaded >>>> flows, i.e. 'ovs-partial'. >>> Thanks for the patch. I applied and tested. It works well. >>> >>> Re-thinking about the dp_layer and offloaded, I think it's wrong. >>> >>> The 'dp_layer' should be "ovs-dpdk" to differentiate from kernel module >>> datapath "ovs", and denote the datapath is by dpdk. >> I don't think we need to differentiate userspace and kernel datapaths >> like this just because they are kind of same datapath layer, also, >> you're dumping flows always from the specific datapath/bridge, i.e. you >> know what is your datapath type. Second point is that it definitely >> should not be called as 'ovs-dpdk' because there might be no DPDK at all. >> My suggestion is to have 'ovs' layer for all the usual non-offloadded >> flows, 'tc' for flows offloaded to linux tc via netdev-offload-tc >> and 'dpdk' for fully offloaded flows via netdev-offload-dpdk. > > I think it's not very clear that the DP is changed because of > offloading. If we go ahead with this, so > > there is no point make the offloaded flag with 3 states, as partial > offloaded can be noted by "ovs" > > and "offloaded=yes". It can just be formatted this way in dump-flows > (offloaded=partial, dp: ovs). Good point. This might be easier to implement. > >> >>> It is true for any kind of offloading. >>> >>> This can be done in dp_netdev_flow_to_dpif_flow to handle offloading is >>> off, and filled in netdev-offload-dpdk (to the same value). >>> >>> The "offloaded" flag can be enhanced to 3 states, instead of just >>> boolean, as below (of course need to adapt throughout the code). >>> >>> So, we will be able to show "partial" or "yes" for "offloaded", in >>> dpctl/dump-flows. >> Yes, I think there were such idea in early discussion with Roni this year. >> So, we could implement this. For partially offloaded flows it might >> make sense to have dp_layer = "ovs" since most of the processing happens >> in the main userspace datapath. >> >>> Please comment. >>> >>> >>> diff --git a/lib/dpif.h b/lib/dpif.h >>> index d96f854a3..dba2f3cbf 100644 >>> --- a/lib/dpif.h >>> +++ b/lib/dpif.h >>> @@ -508,8 +508,14 @@ struct dpif_flow_detailed_stats { >>> uint16_t tcp_flags; >>> }; >>> >>> +enum ol_state { >>> + OFFLOADED_STATE_OFF, >>> + OFFLOADED_STATE_ON, >>> + OFFLOADED_STATE_PARTIAL, >> How about: >> >> enum dpif_flow_ol_state { >> DPIF_FLOW_OL_STATE_NOT_OFFLOADED, /* Flow fully handled in software. */ >> DPIF_FLOW_OL_STATE_OFFLOADED, /* Flow fully handled in hardware. */ >> DPIF_FLOW_OL_STATE_PARTIAL, /* Flow matching handled in hardware. */ >> }; >> >>> +}; >>> + >>> struct dpif_flow_attrs { >>> - bool offloaded; /* True if flow is offloaded to HW. */ >>> + enum ol_state offloaded; >> Some minimal comment would be nice here. >> enum dpif_flow_ol_state ol_state; /* Status of HW offloading. */ >> >>> const char *dp_layer; /* DP layer the flow is handled in. */ >>> }; >>>
Hi Ilya, On 12/17/2019 11:34 PM, Ilya Maximets wrote: > On 17.12.2019 21:56, Eli Britstein wrote: >> On 12/17/2019 9:07 PM, Ilya Maximets wrote: >>> On 17.12.2019 18:38, Eli Britstein wrote: >>>> On 12/16/2019 8:47 PM, Ilya Maximets wrote: >>>>> 3. New 'dp_layer' types has to be documented in dpctl man. >>>>> Also, 'in_hw' doesn't look like a datapath layer name. >>>>> Suggesting to use 'dpdk' as netdev-offload-tc uses 'tc'. >>>>> We also could return specific dp_layer for partially offloaded >>>>> flows, i.e. 'ovs-partial'. >>>> Thanks for the patch. I applied and tested. It works well. >>>> >>>> Re-thinking about the dp_layer and offloaded, I think it's wrong. >>>> >>>> The 'dp_layer' should be "ovs-dpdk" to differentiate from kernel module >>>> datapath "ovs", and denote the datapath is by dpdk. >>> I don't think we need to differentiate userspace and kernel datapaths >>> like this just because they are kind of same datapath layer, also, >>> you're dumping flows always from the specific datapath/bridge, i.e. you >>> know what is your datapath type. Second point is that it definitely >>> should not be called as 'ovs-dpdk' because there might be no DPDK at all. >>> My suggestion is to have 'ovs' layer for all the usual non-offloadded >>> flows, 'tc' for flows offloaded to linux tc via netdev-offload-tc >>> and 'dpdk' for fully offloaded flows via netdev-offload-dpdk. >> I think it's not very clear that the DP is changed because of >> offloading. If we go ahead with this, so >> >> there is no point make the offloaded flag with 3 states, as partial >> offloaded can be noted by "ovs" >> >> and "offloaded=yes". It can just be formatted this way in dump-flows >> (offloaded=partial, dp: ovs). > Good point. This might be easier to implement. So we agree that (offloaded=partial, dp: ovs) will be added to indicate that the dp runs in ovs layer with the HW providing match acceleration. We are discussing two alternatives to indicate that a flow is fully offloaded by DPDK: 1. (dp: dpdk, offloaded=yes) 2. (dp: ovs, offloaded=yes) The thing is that there is no real "dpdk" data plane as DPDK's software data plane is ovs, unlike TC which is a kernel software data plane. In addition, the (dp: dpdk, offloaded="") is not a valid system permutation. Therefore, I vote for option 2, giving the following permutations: (dp: ovs, offloaded="") - dp runs in ovs layer (kernel/dpdk) (dp: ovs, offloaded="yes") - dp runs in hardware (controlled by OVS) (dp: ovs, offloaded="partial") - dp runs in ovs layer with HW providing match acceleration (dp: tc, offloaded="") - dp runs in tc kernel (dp: tc, offloaded="yes") - dp runs in hardware (controlled by TC) What do you think? >>>> It is true for any kind of offloading. >>>> >>>> This can be done in dp_netdev_flow_to_dpif_flow to handle offloading is >>>> off, and filled in netdev-offload-dpdk (to the same value). >>>> >>>> The "offloaded" flag can be enhanced to 3 states, instead of just >>>> boolean, as below (of course need to adapt throughout the code). >>>> >>>> So, we will be able to show "partial" or "yes" for "offloaded", in >>>> dpctl/dump-flows. >>> Yes, I think there were such idea in early discussion with Roni this year. >>> So, we could implement this. For partially offloaded flows it might >>> make sense to have dp_layer = "ovs" since most of the processing happens >>> in the main userspace datapath. >>> >>>> Please comment. >>>> >>>> >>>> diff --git a/lib/dpif.h b/lib/dpif.h >>>> index d96f854a3..dba2f3cbf 100644 >>>> --- a/lib/dpif.h >>>> +++ b/lib/dpif.h >>>> @@ -508,8 +508,14 @@ struct dpif_flow_detailed_stats { >>>> uint16_t tcp_flags; >>>> }; >>>> >>>> +enum ol_state { >>>> + OFFLOADED_STATE_OFF, >>>> + OFFLOADED_STATE_ON, >>>> + OFFLOADED_STATE_PARTIAL, >>> How about: >>> >>> enum dpif_flow_ol_state { >>> DPIF_FLOW_OL_STATE_NOT_OFFLOADED, /* Flow fully handled in software. */ >>> DPIF_FLOW_OL_STATE_OFFLOADED, /* Flow fully handled in hardware. */ >>> DPIF_FLOW_OL_STATE_PARTIAL, /* Flow matching handled in hardware. */ >>> }; >>> >>>> +}; >>>> + >>>> struct dpif_flow_attrs { >>>> - bool offloaded; /* True if flow is offloaded to HW. */ >>>> + enum ol_state offloaded; >>> Some minimal comment would be nice here. >>> enum dpif_flow_ol_state ol_state; /* Status of HW offloading. */ >>> >>>> const char *dp_layer; /* DP layer the flow is handled in. */ >>>> }; >>>>
On 18.12.2019 11:02, Oz Shlomo wrote: > Hi Ilya, > > > On 12/17/2019 11:34 PM, Ilya Maximets wrote: >> On 17.12.2019 21:56, Eli Britstein wrote: >>> On 12/17/2019 9:07 PM, Ilya Maximets wrote: >>>> On 17.12.2019 18:38, Eli Britstein wrote: >>>>> On 12/16/2019 8:47 PM, Ilya Maximets wrote: >>>>>> 3. New 'dp_layer' types has to be documented in dpctl man. >>>>>> Also, 'in_hw' doesn't look like a datapath layer name. >>>>>> Suggesting to use 'dpdk' as netdev-offload-tc uses 'tc'. >>>>>> We also could return specific dp_layer for partially offloaded >>>>>> flows, i.e. 'ovs-partial'. >>>>> Thanks for the patch. I applied and tested. It works well. >>>>> >>>>> Re-thinking about the dp_layer and offloaded, I think it's wrong. >>>>> >>>>> The 'dp_layer' should be "ovs-dpdk" to differentiate from kernel module >>>>> datapath "ovs", and denote the datapath is by dpdk. >>>> I don't think we need to differentiate userspace and kernel datapaths >>>> like this just because they are kind of same datapath layer, also, >>>> you're dumping flows always from the specific datapath/bridge, i.e. you >>>> know what is your datapath type. Second point is that it definitely >>>> should not be called as 'ovs-dpdk' because there might be no DPDK at all. >>>> My suggestion is to have 'ovs' layer for all the usual non-offloadded >>>> flows, 'tc' for flows offloaded to linux tc via netdev-offload-tc >>>> and 'dpdk' for fully offloaded flows via netdev-offload-dpdk. >>> I think it's not very clear that the DP is changed because of >>> offloading. If we go ahead with this, so >>> >>> there is no point make the offloaded flag with 3 states, as partial >>> offloaded can be noted by "ovs" >>> >>> and "offloaded=yes". It can just be formatted this way in dump-flows >>> (offloaded=partial, dp: ovs). >> Good point. This might be easier to implement. > > So we agree that (offloaded=partial, dp: ovs) will be added to indicate > that the dp runs in ovs layer with the HW providing match acceleration. > > We are discussing two alternatives to indicate that a flow is fully > offloaded by DPDK: > > 1. (dp: dpdk, offloaded=yes) > > 2. (dp: ovs, offloaded=yes) > > The thing is that there is no real "dpdk" data plane as DPDK's software > data plane is ovs, unlike TC which is a kernel software data plane. > > In addition, the (dp: dpdk, offloaded="") is not a valid system permutation. > > Therefore, I vote for option 2, giving the following permutations: > > (dp: ovs, offloaded="") - dp runs in ovs layer (kernel/dpdk) > > (dp: ovs, offloaded="yes") - dp runs in hardware (controlled by OVS) > > (dp: ovs, offloaded="partial") - dp runs in ovs layer with HW providing > match acceleration > > (dp: tc, offloaded="") - dp runs in tc kernel > > (dp: tc, offloaded="yes") - dp runs in hardware (controlled by TC) > > What do you think? I'd still prefer option #1, i.e. {dp:dpdk offloaded="yes"} to represent flows that offloaded by netdev-offload-dpdk for two reasons: 1. This is in line with netdev-offload-tc. 2. You can't be sure that flow offloaded by rte_flow is actually in hardware. For example, rte_flow implementation for tap interfaces works via linux TC. So, it'll be hard to correctly formulate the documentation. And this is actually the case where (dp: dpdk, offloaded="") is a valid combination even if we can't detect this case with current DPDK rte_flow API. > > >>>>> It is true for any kind of offloading. >>>>> >>>>> This can be done in dp_netdev_flow_to_dpif_flow to handle offloading is >>>>> off, and filled in netdev-offload-dpdk (to the same value). >>>>> >>>>> The "offloaded" flag can be enhanced to 3 states, instead of just >>>>> boolean, as below (of course need to adapt throughout the code). >>>>> >>>>> So, we will be able to show "partial" or "yes" for "offloaded", in >>>>> dpctl/dump-flows. >>>> Yes, I think there were such idea in early discussion with Roni this year. >>>> So, we could implement this. For partially offloaded flows it might >>>> make sense to have dp_layer = "ovs" since most of the processing happens >>>> in the main userspace datapath. >>>> >>>>> Please comment. >>>>> >>>>> >>>>> diff --git a/lib/dpif.h b/lib/dpif.h >>>>> index d96f854a3..dba2f3cbf 100644 >>>>> --- a/lib/dpif.h >>>>> +++ b/lib/dpif.h >>>>> @@ -508,8 +508,14 @@ struct dpif_flow_detailed_stats { >>>>> uint16_t tcp_flags; >>>>> }; >>>>> >>>>> +enum ol_state { >>>>> + OFFLOADED_STATE_OFF, >>>>> + OFFLOADED_STATE_ON, >>>>> + OFFLOADED_STATE_PARTIAL, >>>> How about: >>>> >>>> enum dpif_flow_ol_state { >>>> DPIF_FLOW_OL_STATE_NOT_OFFLOADED, /* Flow fully handled in software. */ >>>> DPIF_FLOW_OL_STATE_OFFLOADED, /* Flow fully handled in hardware. */ >>>> DPIF_FLOW_OL_STATE_PARTIAL, /* Flow matching handled in hardware. */ >>>> }; >>>> >>>>> +}; >>>>> + >>>>> struct dpif_flow_attrs { >>>>> - bool offloaded; /* True if flow is offloaded to HW. */ >>>>> + enum ol_state offloaded; >>>> Some minimal comment would be nice here. >>>> enum dpif_flow_ol_state ol_state; /* Status of HW offloading. */ >>>> >>>>> const char *dp_layer; /* DP layer the flow is handled in. */ >>>>> }; >>>>>
On 18.12.2019 12:04, Ilya Maximets wrote: > On 18.12.2019 11:02, Oz Shlomo wrote: >> Hi Ilya, >> >> >> On 12/17/2019 11:34 PM, Ilya Maximets wrote: >>> On 17.12.2019 21:56, Eli Britstein wrote: >>>> On 12/17/2019 9:07 PM, Ilya Maximets wrote: >>>>> On 17.12.2019 18:38, Eli Britstein wrote: >>>>>> On 12/16/2019 8:47 PM, Ilya Maximets wrote: >>>>>>> 3. New 'dp_layer' types has to be documented in dpctl man. >>>>>>> Also, 'in_hw' doesn't look like a datapath layer name. >>>>>>> Suggesting to use 'dpdk' as netdev-offload-tc uses 'tc'. >>>>>>> We also could return specific dp_layer for partially offloaded >>>>>>> flows, i.e. 'ovs-partial'. >>>>>> Thanks for the patch. I applied and tested. It works well. >>>>>> >>>>>> Re-thinking about the dp_layer and offloaded, I think it's wrong. >>>>>> >>>>>> The 'dp_layer' should be "ovs-dpdk" to differentiate from kernel module >>>>>> datapath "ovs", and denote the datapath is by dpdk. >>>>> I don't think we need to differentiate userspace and kernel datapaths >>>>> like this just because they are kind of same datapath layer, also, >>>>> you're dumping flows always from the specific datapath/bridge, i.e. you >>>>> know what is your datapath type. Second point is that it definitely >>>>> should not be called as 'ovs-dpdk' because there might be no DPDK at all. >>>>> My suggestion is to have 'ovs' layer for all the usual non-offloadded >>>>> flows, 'tc' for flows offloaded to linux tc via netdev-offload-tc >>>>> and 'dpdk' for fully offloaded flows via netdev-offload-dpdk. >>>> I think it's not very clear that the DP is changed because of >>>> offloading. If we go ahead with this, so >>>> >>>> there is no point make the offloaded flag with 3 states, as partial >>>> offloaded can be noted by "ovs" >>>> >>>> and "offloaded=yes". It can just be formatted this way in dump-flows >>>> (offloaded=partial, dp: ovs). >>> Good point. This might be easier to implement. >> >> So we agree that (offloaded=partial, dp: ovs) will be added to indicate >> that the dp runs in ovs layer with the HW providing match acceleration. >> >> We are discussing two alternatives to indicate that a flow is fully >> offloaded by DPDK: >> >> 1. (dp: dpdk, offloaded=yes) >> >> 2. (dp: ovs, offloaded=yes) >> >> The thing is that there is no real "dpdk" data plane as DPDK's software >> data plane is ovs, unlike TC which is a kernel software data plane. >> >> In addition, the (dp: dpdk, offloaded="") is not a valid system permutation. >> >> Therefore, I vote for option 2, giving the following permutations: >> >> (dp: ovs, offloaded="") - dp runs in ovs layer (kernel/dpdk) >> >> (dp: ovs, offloaded="yes") - dp runs in hardware (controlled by OVS) >> >> (dp: ovs, offloaded="partial") - dp runs in ovs layer with HW providing >> match acceleration >> >> (dp: tc, offloaded="") - dp runs in tc kernel >> >> (dp: tc, offloaded="yes") - dp runs in hardware (controlled by TC) >> >> What do you think? > > I'd still prefer option #1, i.e. {dp:dpdk offloaded="yes"} to represent > flows that offloaded by netdev-offload-dpdk for two reasons: > > 1. This is in line with netdev-offload-tc. > > 2. You can't be sure that flow offloaded by rte_flow is actually in hardware. > For example, rte_flow implementation for tap interfaces works via linux TC. > So, it'll be hard to correctly formulate the documentation. And this is > actually the case where (dp: dpdk, offloaded="") is a valid combination > even if we can't detect this case with current DPDK rte_flow API. > CC: Simon as he might be interested in that topic. >> >> >>>>>> It is true for any kind of offloading. >>>>>> >>>>>> This can be done in dp_netdev_flow_to_dpif_flow to handle offloading is >>>>>> off, and filled in netdev-offload-dpdk (to the same value). >>>>>> >>>>>> The "offloaded" flag can be enhanced to 3 states, instead of just >>>>>> boolean, as below (of course need to adapt throughout the code). >>>>>> >>>>>> So, we will be able to show "partial" or "yes" for "offloaded", in >>>>>> dpctl/dump-flows. >>>>> Yes, I think there were such idea in early discussion with Roni this year. >>>>> So, we could implement this. For partially offloaded flows it might >>>>> make sense to have dp_layer = "ovs" since most of the processing happens >>>>> in the main userspace datapath. >>>>> >>>>>> Please comment. >>>>>> >>>>>> >>>>>> diff --git a/lib/dpif.h b/lib/dpif.h >>>>>> index d96f854a3..dba2f3cbf 100644 >>>>>> --- a/lib/dpif.h >>>>>> +++ b/lib/dpif.h >>>>>> @@ -508,8 +508,14 @@ struct dpif_flow_detailed_stats { >>>>>> uint16_t tcp_flags; >>>>>> }; >>>>>> >>>>>> +enum ol_state { >>>>>> + OFFLOADED_STATE_OFF, >>>>>> + OFFLOADED_STATE_ON, >>>>>> + OFFLOADED_STATE_PARTIAL, >>>>> How about: >>>>> >>>>> enum dpif_flow_ol_state { >>>>> DPIF_FLOW_OL_STATE_NOT_OFFLOADED, /* Flow fully handled in software. */ >>>>> DPIF_FLOW_OL_STATE_OFFLOADED, /* Flow fully handled in hardware. */ >>>>> DPIF_FLOW_OL_STATE_PARTIAL, /* Flow matching handled in hardware. */ >>>>> }; >>>>> >>>>>> +}; >>>>>> + >>>>>> struct dpif_flow_attrs { >>>>>> - bool offloaded; /* True if flow is offloaded to HW. */ >>>>>> + enum ol_state offloaded; >>>>> Some minimal comment would be nice here. >>>>> enum dpif_flow_ol_state ol_state; /* Status of HW offloading. */ >>>>> >>>>>> const char *dp_layer; /* DP layer the flow is handled in. */ >>>>>> }; >>>>>>
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 7ebf4ef87..be3205a7c 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -530,6 +530,7 @@ struct dp_netdev_flow { /* Statistics. */ struct dp_netdev_flow_stats stats; + struct dp_netdev_flow_stats hw_stats; /* Actions. */ OVSRCU_TYPE(struct dp_netdev_actions *) actions; @@ -3002,8 +3003,9 @@ dp_netdev_pmd_find_flow(const struct dp_netdev_pmd_thread *pmd, static void get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow_, - struct dpif_flow_stats *stats) + struct dpif_flow_stats *stats, bool hw) { + struct dp_netdev_flow_stats *flow_stats; struct dp_netdev_flow *netdev_flow; unsigned long long n; long long used; @@ -3011,13 +3013,15 @@ get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow_, netdev_flow = CONST_CAST(struct dp_netdev_flow *, netdev_flow_); - atomic_read_relaxed(&netdev_flow->stats.packet_count, &n); + flow_stats = (hw) ? &netdev_flow->hw_stats : &netdev_flow->stats; + + atomic_read_relaxed(&flow_stats->packet_count, &n); stats->n_packets = n; - atomic_read_relaxed(&netdev_flow->stats.byte_count, &n); + atomic_read_relaxed(&flow_stats->byte_count, &n); stats->n_bytes = n; - atomic_read_relaxed(&netdev_flow->stats.used, &used); + atomic_read_relaxed(&flow_stats->used, &used); stats->used = used; - atomic_read_relaxed(&netdev_flow->stats.tcp_flags, &flags); + atomic_read_relaxed(&flow_stats->tcp_flags, &flags); stats->tcp_flags = flags; } @@ -3028,8 +3032,10 @@ get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow_, static void dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow *netdev_flow, struct ofpbuf *key_buf, struct ofpbuf *mask_buf, - struct dpif_flow *flow, bool terse) + struct dpif_flow *flow, bool terse, bool offloaded) { + struct dpif_flow_stats hw_stats; + if (terse) { memset(flow, 0, sizeof *flow); } else { @@ -3069,10 +3075,14 @@ dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow *netdev_flow, flow->ufid = netdev_flow->ufid; flow->ufid_present = true; flow->pmd_id = netdev_flow->pmd_id; - get_dpif_flow_stats(netdev_flow, &flow->stats); + get_dpif_flow_stats(netdev_flow, &flow->stats, false); + get_dpif_flow_stats(netdev_flow, &hw_stats, true); + flow->stats.n_packets += hw_stats.n_packets; + flow->stats.n_bytes += hw_stats.n_bytes; + flow->stats.used = MAX(flow->stats.used, hw_stats.used); - flow->attrs.offloaded = false; - flow->attrs.dp_layer = "ovs"; + flow->attrs.offloaded = offloaded; + flow->attrs.dp_layer = flow->attrs.offloaded ? "in_hw" : "ovs"; } static int @@ -3142,6 +3152,44 @@ dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len, return 0; } +static bool +dpif_netdev_offload_stats(struct dp_netdev_flow *netdev_flow, + struct dp_netdev *dp) +{ + struct dpif_flow_stats stats; + struct dpif_flow_attrs attrs; + struct nlattr *actions; + struct ofpbuf wbuffer; + struct netdev *netdev; + struct match match; + + int ret = 0; + + if (!netdev_is_flow_api_enabled()) { + return false; + } + + netdev = netdev_ports_get(netdev_flow->flow.in_port.odp_port, dp->class); + if (!netdev) { + return false; + } + /* Taking a global 'port_mutex' to fulfill thread safety + * restrictions for the netdev-offload-dpdk module. */ + ovs_mutex_lock(&dp->port_mutex); + ret = netdev_flow_get(netdev, &match, &actions, &netdev_flow->mega_ufid, + &stats, &attrs, &wbuffer); + ovs_mutex_unlock(&dp->port_mutex); + netdev_close(netdev); + if (ret) { + return false; + } + atomic_store_relaxed(&netdev_flow->hw_stats.packet_count, stats.n_packets); + atomic_store_relaxed(&netdev_flow->hw_stats.byte_count, stats.n_bytes); + atomic_store_relaxed(&netdev_flow->hw_stats.used, stats.used); + + return true; +} + static int dpif_netdev_flow_get(const struct dpif *dpif, const struct dpif_flow_get *get) { @@ -3175,8 +3223,9 @@ dpif_netdev_flow_get(const struct dpif *dpif, const struct dpif_flow_get *get) netdev_flow = dp_netdev_pmd_find_flow(pmd, get->ufid, get->key, get->key_len); if (netdev_flow) { + bool offloaded = dpif_netdev_offload_stats(netdev_flow, pmd->dp); dp_netdev_flow_to_dpif_flow(netdev_flow, get->buffer, get->buffer, - get->flow, false); + get->flow, false, offloaded); error = 0; break; } else { @@ -3239,6 +3288,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, /* Do not allocate extra space. */ flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len); memset(&flow->stats, 0, sizeof flow->stats); + memset(&flow->hw_stats, 0, sizeof flow->hw_stats); flow->dead = false; flow->batch = NULL; flow->mark = INVALID_FLOW_MARK; @@ -3351,7 +3401,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd, put->actions, put->actions_len); if (stats) { - get_dpif_flow_stats(netdev_flow, stats); + get_dpif_flow_stats(netdev_flow, stats, false); } if (put->flags & DPIF_FP_ZERO_STATS) { /* XXX: The userspace datapath uses thread local statistics @@ -3470,7 +3520,7 @@ flow_del_on_pmd(struct dp_netdev_pmd_thread *pmd, del->key_len); if (netdev_flow) { if (stats) { - get_dpif_flow_stats(netdev_flow, stats); + get_dpif_flow_stats(netdev_flow, stats, false); } dp_netdev_pmd_remove_flow(pmd, netdev_flow); } else { @@ -3604,9 +3654,12 @@ dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_, = dpif_netdev_flow_dump_thread_cast(thread_); struct dpif_netdev_flow_dump *dump = thread->dump; struct dp_netdev_flow *netdev_flows[FLOW_DUMP_MAX_BATCH]; + bool offloaded[FLOW_DUMP_MAX_BATCH]; int n_flows = 0; int i; + memset(offloaded, 0, sizeof offloaded); + ovs_mutex_lock(&dump->mutex); if (!dump->status) { struct dpif_netdev *dpif = dpif_netdev_cast(thread->up.dpif); @@ -3636,6 +3689,8 @@ dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_, netdev_flows[n_flows] = CONTAINER_OF(node, struct dp_netdev_flow, node); + offloaded[n_flows] = + dpif_netdev_offload_stats(netdev_flows[n_flows], pmd->dp); } /* When finishing dumping the current pmd thread, moves to * the next. */ @@ -3668,7 +3723,7 @@ dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_, ofpbuf_use_stack(&key, keybuf, sizeof *keybuf); ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf); dp_netdev_flow_to_dpif_flow(netdev_flow, &key, &mask, f, - dump->up.terse); + dump->up.terse, offloaded[i]); } return n_flows;