[ovs-dev,V6,05/18] netdev-dpdk: Introduce rte flow query function
diff mbox series

Message ID 20191219115436.10354-6-elibr@mellanox.com
State Changes Requested
Delegated to: Ilya Maximets
Headers show
Series
  • netdev datapath actions offload
Related show

Commit Message

Eli Britstein Dec. 19, 2019, 11:54 a.m. UTC
Introduce a rte flow query function as a pre-step towards reading HW
statistics of fully offloaded flows.

Signed-off-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
---
 lib/netdev-dpdk.c | 30 ++++++++++++++++++++++++++++++
 lib/netdev-dpdk.h |  6 ++++++
 2 files changed, 36 insertions(+)

Comments

贺鹏 Dec. 28, 2019, 8:27 a.m. UTC | #1
Hi,

there is a potential memory leak in ovs-dpdk hw offload, when remove a
dpdk port in datapath, the dpif will call dpif_port_del and will
eventually remove the netdev
from the map *port_to_netdev* (in netdev-offload). Meanwhile, a
resulted datapath reconfiguration will flush all the marked flows by
calling *flow_mark_flush*, however, this function only put the offload
requests on the queue, it does not wait for the flow deletion
finishes. So there could be a case that the offload thread tries to
delete a hw flow, however, since the netdev is removed, it will fail
at *netdev_ports_get* function, thus, will not call *netdev_flow_del*
thus not free the flow entry (ufid_to_rte_flow_data), which will cause
memory leak.

In fact, since any port removal will hold dp->port_mutex, when calling
*flow_mark_flush*, no flow deletion will really happen since the
offload thread cannot get the *dp->port_mutex* to call
*netdev_flow_del*.

To fix this, I guess we'd better design a wait-done mechanism to wait the
offload process to finish all the deletion before calling *netdev_ports_remove*.
But, first, the holding of dp->port_mutex in offload thread which
prevents the real removal of the offloaded
flows should also be considered.

Also, this patch includes querying the hw stats also
depends on the dp->port_mutex to be thread safe, these also needs to be fixed.

Eli Britstein <elibr@mellanox.com> 于2019年12月19日周四 下午7:54写道:
>
> Introduce a rte flow query function as a pre-step towards reading HW
> statistics of fully offloaded flows.
>
> Signed-off-by: Eli Britstein <elibr@mellanox.com>
> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
> ---
>  lib/netdev-dpdk.c | 30 ++++++++++++++++++++++++++++++
>  lib/netdev-dpdk.h |  6 ++++++
>  2 files changed, 36 insertions(+)
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 663eef5b8..74fc9a99c 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -4527,6 +4527,36 @@ netdev_dpdk_rte_flow_create(struct netdev *netdev,
>      return flow;
>  }
>
> +int
> +netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
> +                                 struct rte_flow *rte_flow,
> +                                 struct rte_flow_query_count *query,
> +                                 struct rte_flow_error *error)
> +{
> +    struct rte_flow_action_count count = { .shared = 0, .id = 0 };
> +    const struct rte_flow_action actions[] = {
> +        {
> +            .type = RTE_FLOW_ACTION_TYPE_COUNT,
> +            .conf = &count,
> +        },
> +        {
> +            .type = RTE_FLOW_ACTION_TYPE_END,
> +        },
> +    };
> +    struct netdev_dpdk *dev;
> +    int ret;
> +
> +    if (!is_dpdk_class(netdev->netdev_class)) {
> +        return -1;
> +    }
> +
> +    dev = netdev_dpdk_cast(netdev);
> +    ovs_mutex_lock(&dev->mutex);
> +    ret = rte_flow_query(dev->port_id, rte_flow, actions, query, error);
> +    ovs_mutex_unlock(&dev->mutex);
> +    return ret;
> +}
> +
>  #define NETDEV_DPDK_CLASS_COMMON                            \
>      .is_pmd = true,                                         \
>      .alloc = netdev_dpdk_alloc,                             \
> diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
> index 60631c4f0..59919a89a 100644
> --- a/lib/netdev-dpdk.h
> +++ b/lib/netdev-dpdk.h
> @@ -31,6 +31,7 @@ struct rte_flow_error;
>  struct rte_flow_attr;
>  struct rte_flow_item;
>  struct rte_flow_action;
> +struct rte_flow_query_count;
>
>  void netdev_dpdk_register(void);
>  void free_dpdk_buf(struct dp_packet *);
> @@ -47,6 +48,11 @@ netdev_dpdk_rte_flow_create(struct netdev *netdev,
>                              const struct rte_flow_item *items,
>                              const struct rte_flow_action *actions,
>                              struct rte_flow_error *error);
> +int
> +netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
> +                                 struct rte_flow *rte_flow,
> +                                 struct rte_flow_query_count *query,
> +                                 struct rte_flow_error *error);
>
>  #else
>
> --
> 2.14.5
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Eli Britstein Jan. 6, 2020, 9:22 a.m. UTC | #2
On 12/28/2019 10:27 AM, 贺鹏 wrote:
> Hi,
>
> there is a potential memory leak in ovs-dpdk hw offload, when remove a
> dpdk port in datapath, the dpif will call dpif_port_del and will
> eventually remove the netdev
> from the map *port_to_netdev* (in netdev-offload). Meanwhile, a
> resulted datapath reconfiguration will flush all the marked flows by
> calling *flow_mark_flush*, however, this function only put the offload
> requests on the queue, it does not wait for the flow deletion
> finishes. So there could be a case that the offload thread tries to
> delete a hw flow, however, since the netdev is removed, it will fail
> at *netdev_ports_get* function, thus, will not call *netdev_flow_del*
> thus not free the flow entry (ufid_to_rte_flow_data), which will cause
> memory leak.
>
> In fact, since any port removal will hold dp->port_mutex, when calling
> *flow_mark_flush*, no flow deletion will really happen since the
> offload thread cannot get the *dp->port_mutex* to call
> *netdev_flow_del*.
OK, but this issue is not related to this patch-set.
>
> To fix this, I guess we'd better design a wait-done mechanism to wait the
> offload process to finish all the deletion before calling *netdev_ports_remove*.
> But, first, the holding of dp->port_mutex in offload thread which
> prevents the real removal of the offloaded
> flows should also be considered.
>
> Also, this patch includes querying the hw stats also
> depends on the dp->port_mutex to be thread safe, these also needs to be fixed.
For querying the HW stats there is not such problem. There is no crash 
or another memory leak if querying a flow that was already deleted.
>
> Eli Britstein <elibr@mellanox.com> 于2019年12月19日周四 下午7:54写道:
>> Introduce a rte flow query function as a pre-step towards reading HW
>> statistics of fully offloaded flows.
>>
>> Signed-off-by: Eli Britstein <elibr@mellanox.com>
>> Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
>> ---
>>   lib/netdev-dpdk.c | 30 ++++++++++++++++++++++++++++++
>>   lib/netdev-dpdk.h |  6 ++++++
>>   2 files changed, 36 insertions(+)
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index 663eef5b8..74fc9a99c 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -4527,6 +4527,36 @@ netdev_dpdk_rte_flow_create(struct netdev *netdev,
>>       return flow;
>>   }
>>
>> +int
>> +netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
>> +                                 struct rte_flow *rte_flow,
>> +                                 struct rte_flow_query_count *query,
>> +                                 struct rte_flow_error *error)
>> +{
>> +    struct rte_flow_action_count count = { .shared = 0, .id = 0 };
>> +    const struct rte_flow_action actions[] = {
>> +        {
>> +            .type = RTE_FLOW_ACTION_TYPE_COUNT,
>> +            .conf = &count,
>> +        },
>> +        {
>> +            .type = RTE_FLOW_ACTION_TYPE_END,
>> +        },
>> +    };
>> +    struct netdev_dpdk *dev;
>> +    int ret;
>> +
>> +    if (!is_dpdk_class(netdev->netdev_class)) {
>> +        return -1;
>> +    }
>> +
>> +    dev = netdev_dpdk_cast(netdev);
>> +    ovs_mutex_lock(&dev->mutex);
>> +    ret = rte_flow_query(dev->port_id, rte_flow, actions, query, error);
>> +    ovs_mutex_unlock(&dev->mutex);
>> +    return ret;
>> +}
>> +
>>   #define NETDEV_DPDK_CLASS_COMMON                            \
>>       .is_pmd = true,                                         \
>>       .alloc = netdev_dpdk_alloc,                             \
>> diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
>> index 60631c4f0..59919a89a 100644
>> --- a/lib/netdev-dpdk.h
>> +++ b/lib/netdev-dpdk.h
>> @@ -31,6 +31,7 @@ struct rte_flow_error;
>>   struct rte_flow_attr;
>>   struct rte_flow_item;
>>   struct rte_flow_action;
>> +struct rte_flow_query_count;
>>
>>   void netdev_dpdk_register(void);
>>   void free_dpdk_buf(struct dp_packet *);
>> @@ -47,6 +48,11 @@ netdev_dpdk_rte_flow_create(struct netdev *netdev,
>>                               const struct rte_flow_item *items,
>>                               const struct rte_flow_action *actions,
>>                               struct rte_flow_error *error);
>> +int
>> +netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
>> +                                 struct rte_flow *rte_flow,
>> +                                 struct rte_flow_query_count *query,
>> +                                 struct rte_flow_error *error);
>>
>>   #else
>>
>> --
>> 2.14.5
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&amp;data=02%7C01%7Celibr%40mellanox.com%7C9791d35f44c14c678c6b08d78b6fc941%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637131184556908673&amp;sdata=Tg4pIv0sti9xKWSS7g%2F2PWR9qrZ12tF1Wx%2F%2Bpg0yDlk%3D&amp;reserved=0
>
>

Patch
diff mbox series

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 663eef5b8..74fc9a99c 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -4527,6 +4527,36 @@  netdev_dpdk_rte_flow_create(struct netdev *netdev,
     return flow;
 }
 
+int
+netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
+                                 struct rte_flow *rte_flow,
+                                 struct rte_flow_query_count *query,
+                                 struct rte_flow_error *error)
+{
+    struct rte_flow_action_count count = { .shared = 0, .id = 0 };
+    const struct rte_flow_action actions[] = {
+        {
+            .type = RTE_FLOW_ACTION_TYPE_COUNT,
+            .conf = &count,
+        },
+        {
+            .type = RTE_FLOW_ACTION_TYPE_END,
+        },
+    };
+    struct netdev_dpdk *dev;
+    int ret;
+
+    if (!is_dpdk_class(netdev->netdev_class)) {
+        return -1;
+    }
+
+    dev = netdev_dpdk_cast(netdev);
+    ovs_mutex_lock(&dev->mutex);
+    ret = rte_flow_query(dev->port_id, rte_flow, actions, query, error);
+    ovs_mutex_unlock(&dev->mutex);
+    return ret;
+}
+
 #define NETDEV_DPDK_CLASS_COMMON                            \
     .is_pmd = true,                                         \
     .alloc = netdev_dpdk_alloc,                             \
diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
index 60631c4f0..59919a89a 100644
--- a/lib/netdev-dpdk.h
+++ b/lib/netdev-dpdk.h
@@ -31,6 +31,7 @@  struct rte_flow_error;
 struct rte_flow_attr;
 struct rte_flow_item;
 struct rte_flow_action;
+struct rte_flow_query_count;
 
 void netdev_dpdk_register(void);
 void free_dpdk_buf(struct dp_packet *);
@@ -47,6 +48,11 @@  netdev_dpdk_rte_flow_create(struct netdev *netdev,
                             const struct rte_flow_item *items,
                             const struct rte_flow_action *actions,
                             struct rte_flow_error *error);
+int
+netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
+                                 struct rte_flow *rte_flow,
+                                 struct rte_flow_query_count *query,
+                                 struct rte_flow_error *error);
 
 #else