diff mbox series

[ovs-dev,v2,1/3] netdev-dpdk: Expose flow creation/destruction calls

Message ID 1550750870-25004-2-git-send-email-ophirmu@mellanox.com
State Superseded
Headers show
Series Move offloading-code into a new file | expand

Commit Message

Ophir Munk Feb. 21, 2019, 12:07 p.m. UTC
From: Roni Bar Yanai <roniba@mellanox.com>

Before offloading-code was added to the netdev-dpdk.c file (MARK and
RSS actions) the only DPDK RTE calls in use were rte_flow_create() and
rte_flow_destroy(). In preparation for splitting the offloading-code
from the netdev-dpdk.c file to a separate file, it is required
to embed these RTE calls into a global netdev-dpdk-* API so that
they can be called from the new file. An example for this requirement
can be seen in the handling of dpdk_mutex, which should be encapsulated
inside netdev-dpdk class (netdev-dpdk.c file), and should be unknown
to the outside callers. This commit embeds the rte_flow_create() call
inside the netdev_dpdk_flow_create() API and the rte_flow_destroy()
call inside the netdev_dpdk_rte_flow_destroy() API.

Reviewed-by: Asaf Penso <asafp@mellanox.com>
Signed-off-by: Roni Bar Yanai <roniba@mellanox.com>
Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
---
 lib/netdev-dpdk.c | 51 +++++++++++++++++++++++++++++++++++++++------------
 lib/netdev-dpdk.h | 14 ++++++++++++++
 2 files changed, 53 insertions(+), 12 deletions(-)

Comments

0-day Robot Feb. 21, 2019, 1 p.m. UTC | #1
Bleep bloop.  Greetings Ophir Munk, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Unexpected sign-offs from developers who are not authors or co-authors or committers: Ophir Munk <ophirmu@mellanox.com>
Lines checked: 155, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email aconole@bytheb.org

Thanks,
0-day Robot
Ilya Maximets Feb. 21, 2019, 3:27 p.m. UTC | #2
On 21.02.2019 15:07, Ophir Munk wrote:
> From: Roni Bar Yanai <roniba@mellanox.com>
> 
> Before offloading-code was added to the netdev-dpdk.c file (MARK and
> RSS actions) the only DPDK RTE calls in use were rte_flow_create() and
> rte_flow_destroy(). In preparation for splitting the offloading-code
> from the netdev-dpdk.c file to a separate file, it is required
> to embed these RTE calls into a global netdev-dpdk-* API so that
> they can be called from the new file. An example for this requirement
> can be seen in the handling of dpdk_mutex, which should be encapsulated

You probably meant dev->mutex.

> inside netdev-dpdk class (netdev-dpdk.c file), and should be unknown
> to the outside callers. This commit embeds the rte_flow_create() call
> inside the netdev_dpdk_flow_create() API and the rte_flow_destroy()
> call inside the netdev_dpdk_rte_flow_destroy() API.
> 
> Reviewed-by: Asaf Penso <asafp@mellanox.com>
> Signed-off-by: Roni Bar Yanai <roniba@mellanox.com>
> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> ---
>  lib/netdev-dpdk.c | 51 +++++++++++++++++++++++++++++++++++++++------------
>  lib/netdev-dpdk.h | 14 ++++++++++++++
>  2 files changed, 53 insertions(+), 12 deletions(-)
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 32a6ffd..163d4ec 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -4203,6 +4203,42 @@ unlock:
>      return err;
>  }
>  
> +int
> +netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
> +                             struct rte_flow *rte_flow,
> +                             struct rte_flow_error *error)
> +{
> +    if (!is_dpdk_class(netdev->netdev_class)) {
> +        return -1;
> +    }

Not sure if this check is needed, because we're registering
this offload API only for DPDK devices. However, it's not
correct anyway, because 'is_dpdk_class' will return 'true'
for vhost netdevs which are not rte_ethdev devices, i.e. has
no offloading API.

> +
> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> +    int ret;
> +
> +    ovs_mutex_lock(&dev->mutex);
> +    ret = rte_flow_destroy(dev->port_id, rte_flow, error);
> +    ovs_mutex_unlock(&dev->mutex);
> +    return ret;
> +}
> +
> +struct rte_flow *
> +netdev_dpdk_rte_flow_create(struct netdev *netdev,
> +                            const struct rte_flow_attr *attr,
> +                            const struct rte_flow_item *items,
> +                            const struct rte_flow_action *actions,
> +                            struct rte_flow_error *error)
> +{
> +    if (!is_dpdk_class(netdev->netdev_class)) {
> +        return NULL;
> +    }

Same here.

> +
> +    struct rte_flow *flow;
> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

It's better to have an empty line between declarations and code.

> +    ovs_mutex_lock(&dev->mutex);
> +    flow = rte_flow_create(dev->port_id, attr, items, actions, error);
> +    ovs_mutex_unlock(&dev->mutex);
> +    return flow;
> +}
>  
>  /* Find rte_flow with @ufid */
>  static struct rte_flow *
> @@ -4554,7 +4590,6 @@ netdev_dpdk_add_rte_flow_offload(struct netdev *netdev,
>                                   size_t actions_len OVS_UNUSED,
>                                   const ovs_u128 *ufid,
>                                   struct offload_info *info) {
> -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      const struct rte_flow_attr flow_attr = {
>          .group = 0,
>          .priority = 0,
> @@ -4759,15 +4794,12 @@ end_proto_check:
>      mark.id = info->flow_mark;
>      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, &mark);
>  
> -    ovs_mutex_lock(&dev->mutex);
>  
>      rss = add_flow_rss_action(&actions, netdev);

This doesn't look right. 'add_flow_rss_action' Uses 'netdev->n_rxq'
that should not be accessed without 'dev->mutex'. Otherwise you may
mess up with reconfiguration and segfault here.

This could be avoided if dpif-netdev will really wait for completion
of all the offloading operations for the device before reconfiguring it,
but this is not yet implemented and will probably require changes in
netdev offloading API.


And this is, probably, one of the examples of atomicity, but it's not
the atomicity of few subsequent rte_flow calls, but the atomicity of
work with netdev fields. Because even if this will not crash, we'll
try to offload rss action with incorrect number of queues.

>      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>  
> -    flow = rte_flow_create(dev->port_id, &flow_attr, patterns.items,
> -                           actions.actions, &error);
> -
> -    ovs_mutex_unlock(&dev->mutex);
> +    flow = netdev_dpdk_rte_flow_create(netdev, &flow_attr,patterns.items,
> +                            actions.actions, &error);

Please, keep the indents for function arguments. i.e. it's better to align
arguments to the first parenthesis.
This comment is for many other places in code.

>  
>      free(rss);
>      if (!flow) {
> @@ -4861,13 +4893,9 @@ static int
>  netdev_dpdk_destroy_rte_flow(struct netdev *netdev,
>                               const ovs_u128 *ufid,
>                               struct rte_flow *rte_flow) {
> -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      struct rte_flow_error error;
> -    int ret;
> +    int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error);
>  
> -    ovs_mutex_lock(&dev->mutex);
> -
> -    ret = rte_flow_destroy(dev->port_id, rte_flow, &error);
>      if (ret == 0) {
>          ufid_to_rte_flow_disassociate(ufid);
>          VLOG_DBG("%s: removed rte flow %p associated with ufid " UUID_FMT "\n",
> @@ -4878,7 +4906,6 @@ netdev_dpdk_destroy_rte_flow(struct netdev *netdev,
>                   netdev_get_name(netdev), error.type, error.message);
>      }
>  
> -    ovs_mutex_unlock(&dev->mutex);
>      return ret;
>  }
>  
> diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
> index b7d02a7..82d2828 100644
> --- a/lib/netdev-dpdk.h
> +++ b/lib/netdev-dpdk.h
> @@ -22,11 +22,25 @@
>  #include "openvswitch/compiler.h"
>  
>  struct dp_packet;
> +struct netdev;
> +struct rte_flow;
> +struct rte_flow_error;
> +struct rte_flow_attr;
> +struct rte_flow_item;
> +struct rte_flow_action;
>  
>  #ifdef DPDK_NETDEV
>  
>  void netdev_dpdk_register(void);
>  void free_dpdk_buf(struct dp_packet *);
> +int netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
> +                             struct rte_flow *rte_flow,
> +                             struct rte_flow_error *error);
> +struct rte_flow *netdev_dpdk_rte_flow_create(struct netdev *netdev,
> +                            const struct rte_flow_attr *attr,
> +                            const struct rte_flow_item *items,
> +                            const struct rte_flow_action *actions,
> +                            struct rte_flow_error *error);

Alignments.

>  
>  #else
>  
>
Roni Bar Yanai Feb. 21, 2019, 4:37 p.m. UTC | #3
> -----Original Message-----
> From: Ilya Maximets <i.maximets@samsung.com>
> Sent: Thursday, February 21, 2019 5:27 PM
> To: Ophir Munk <ophirmu@mellanox.com>; ovs-dev@openvswitch.org
> Cc: Ian Stokes <ian.stokes@intel.com>; Olga Shern <olgas@mellanox.com>;
> Kevin Traynor <ktraynor@redhat.com>; Asaf Penso <asafp@mellanox.com>;
> Roni Bar Yanai <roniba@mellanox.com>; Flavio Leitner <fbl@sysclose.org>
> Subject: Re: [PATCH v2 1/3] netdev-dpdk: Expose flow creation/destruction
> calls
> 
> On 21.02.2019 15:07, Ophir Munk wrote:
> > From: Roni Bar Yanai <roniba@mellanox.com>
> >
> > Before offloading-code was added to the netdev-dpdk.c file (MARK and
> > RSS actions) the only DPDK RTE calls in use were rte_flow_create() and
> > rte_flow_destroy(). In preparation for splitting the offloading-code
> > from the netdev-dpdk.c file to a separate file, it is required
> > to embed these RTE calls into a global netdev-dpdk-* API so that
> > they can be called from the new file. An example for this requirement
> > can be seen in the handling of dpdk_mutex, which should be encapsulated
> 
> You probably meant dev->mutex.
> 
> > inside netdev-dpdk class (netdev-dpdk.c file), and should be unknown
> > to the outside callers. This commit embeds the rte_flow_create() call
> > inside the netdev_dpdk_flow_create() API and the rte_flow_destroy()
> > call inside the netdev_dpdk_rte_flow_destroy() API.
> >
> > Reviewed-by: Asaf Penso <asafp@mellanox.com>
> > Signed-off-by: Roni Bar Yanai <roniba@mellanox.com>
> > Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> > ---
> >  lib/netdev-dpdk.c | 51
> +++++++++++++++++++++++++++++++++++++++------------
> >  lib/netdev-dpdk.h | 14 ++++++++++++++
> >  2 files changed, 53 insertions(+), 12 deletions(-)
> >
> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> > index 32a6ffd..163d4ec 100644
> > --- a/lib/netdev-dpdk.c
> > +++ b/lib/netdev-dpdk.c
> > @@ -4203,6 +4203,42 @@ unlock:
> >      return err;
> >  }
> >
> > +int
> > +netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
> > +                             struct rte_flow *rte_flow,
> > +                             struct rte_flow_error *error)
> > +{
> > +    if (!is_dpdk_class(netdev->netdev_class)) {
> > +        return -1;
> > +    }
> 
> Not sure if this check is needed, because we're registering
> this offload API only for DPDK devices. However, it's not
> correct anyway, because 'is_dpdk_class' will return 'true'
> for vhost netdevs which are not rte_ethdev devices, i.e. has
> no offloading API.
> 
> > +
> > +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> > +    int ret;
> > +
> > +    ovs_mutex_lock(&dev->mutex);
> > +    ret = rte_flow_destroy(dev->port_id, rte_flow, error);
> > +    ovs_mutex_unlock(&dev->mutex);
> > +    return ret;
> > +}
> > +
> > +struct rte_flow *
> > +netdev_dpdk_rte_flow_create(struct netdev *netdev,
> > +                            const struct rte_flow_attr *attr,
> > +                            const struct rte_flow_item *items,
> > +                            const struct rte_flow_action *actions,
> > +                            struct rte_flow_error *error)
> > +{
> > +    if (!is_dpdk_class(netdev->netdev_class)) {
> > +        return NULL;
> > +    }
> 
> Same here.
> 
> > +
> > +    struct rte_flow *flow;
> > +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> 
> It's better to have an empty line between declarations and code.
> 
> > +    ovs_mutex_lock(&dev->mutex);
> > +    flow = rte_flow_create(dev->port_id, attr, items, actions, error);
> > +    ovs_mutex_unlock(&dev->mutex);
> > +    return flow;
> > +}
> >
> >  /* Find rte_flow with @ufid */
> >  static struct rte_flow *
> > @@ -4554,7 +4590,6 @@ netdev_dpdk_add_rte_flow_offload(struct
> netdev *netdev,
> >                                   size_t actions_len OVS_UNUSED,
> >                                   const ovs_u128 *ufid,
> >                                   struct offload_info *info) {
> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >      const struct rte_flow_attr flow_attr = {
> >          .group = 0,
> >          .priority = 0,
> > @@ -4759,15 +4794,12 @@ end_proto_check:
> >      mark.id = info->flow_mark;
> >      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, &mark);
> >
> > -    ovs_mutex_lock(&dev->mutex);
> >
> >      rss = add_flow_rss_action(&actions, netdev);
> 
Just wondering what will happen if rss action is added with current n_rxq, and immediately after
there is a reconfiguration. The result will be exactly the same, rss flow has wrong
configuration. The lock doesn't solve this issue.

> This doesn't look right. 'add_flow_rss_action' Uses 'netdev->n_rxq'
> that should not be accessed without 'dev->mutex'. Otherwise you may
> mess up with reconfiguration and segfault here.
> 
> This could be avoided if dpif-netdev will really wait for completion
> of all the offloading operations for the device before reconfiguring it,
> but this is not yet implemented and will probably require changes in
> netdev offloading API.
> 
> 
> And this is, probably, one of the examples of atomicity, but it's not
> the atomicity of few subsequent rte_flow calls, but the atomicity of
> work with netdev fields. Because even if this will not crash, we'll
> try to offload rss action with incorrect number of queues.
> 
> >      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
> >
> > -    flow = rte_flow_create(dev->port_id, &flow_attr, patterns.items,
> > -                           actions.actions, &error);
> > -
> > -    ovs_mutex_unlock(&dev->mutex);
> > +    flow = netdev_dpdk_rte_flow_create(netdev,
> &flow_attr,patterns.items,
> > +                            actions.actions, &error);
> 
> Please, keep the indents for function arguments. i.e. it's better to align
> arguments to the first parenthesis.
> This comment is for many other places in code.
> 
> >
> >      free(rss);
> >      if (!flow) {
> > @@ -4861,13 +4893,9 @@ static int
> >  netdev_dpdk_destroy_rte_flow(struct netdev *netdev,
> >                               const ovs_u128 *ufid,
> >                               struct rte_flow *rte_flow) {
> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >      struct rte_flow_error error;
> > -    int ret;
> > +    int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error);
> >
> > -    ovs_mutex_lock(&dev->mutex);
> > -
> > -    ret = rte_flow_destroy(dev->port_id, rte_flow, &error);
> >      if (ret == 0) {
> >          ufid_to_rte_flow_disassociate(ufid);
> >          VLOG_DBG("%s: removed rte flow %p associated with ufid "
> UUID_FMT "\n",
> > @@ -4878,7 +4906,6 @@ netdev_dpdk_destroy_rte_flow(struct netdev
> *netdev,
> >                   netdev_get_name(netdev), error.type, error.message);
> >      }
> >
> > -    ovs_mutex_unlock(&dev->mutex);
> >      return ret;
> >  }
> >
> > diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
> > index b7d02a7..82d2828 100644
> > --- a/lib/netdev-dpdk.h
> > +++ b/lib/netdev-dpdk.h
> > @@ -22,11 +22,25 @@
> >  #include "openvswitch/compiler.h"
> >
> >  struct dp_packet;
> > +struct netdev;
> > +struct rte_flow;
> > +struct rte_flow_error;
> > +struct rte_flow_attr;
> > +struct rte_flow_item;
> > +struct rte_flow_action;
> >
> >  #ifdef DPDK_NETDEV
> >
> >  void netdev_dpdk_register(void);
> >  void free_dpdk_buf(struct dp_packet *);
> > +int netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
> > +                             struct rte_flow *rte_flow,
> > +                             struct rte_flow_error *error);
> > +struct rte_flow *netdev_dpdk_rte_flow_create(struct netdev *netdev,
> > +                            const struct rte_flow_attr *attr,
> > +                            const struct rte_flow_item *items,
> > +                            const struct rte_flow_action *actions,
> > +                            struct rte_flow_error *error);
> 
> Alignments.
> 
> >
> >  #else
> >
> >
Ilya Maximets Feb. 22, 2019, 11:26 a.m. UTC | #4
On 21.02.2019 19:37, Roni Bar Yanai wrote:
> 
> 
>> -----Original Message-----
>> From: Ilya Maximets <i.maximets@samsung.com>
>> Sent: Thursday, February 21, 2019 5:27 PM
>> To: Ophir Munk <ophirmu@mellanox.com>; ovs-dev@openvswitch.org
>> Cc: Ian Stokes <ian.stokes@intel.com>; Olga Shern <olgas@mellanox.com>;
>> Kevin Traynor <ktraynor@redhat.com>; Asaf Penso <asafp@mellanox.com>;
>> Roni Bar Yanai <roniba@mellanox.com>; Flavio Leitner <fbl@sysclose.org>
>> Subject: Re: [PATCH v2 1/3] netdev-dpdk: Expose flow creation/destruction
>> calls
>>
>> On 21.02.2019 15:07, Ophir Munk wrote:
>>> From: Roni Bar Yanai <roniba@mellanox.com>
>>>
>>> Before offloading-code was added to the netdev-dpdk.c file (MARK and
>>> RSS actions) the only DPDK RTE calls in use were rte_flow_create() and
>>> rte_flow_destroy(). In preparation for splitting the offloading-code
>>> from the netdev-dpdk.c file to a separate file, it is required
>>> to embed these RTE calls into a global netdev-dpdk-* API so that
>>> they can be called from the new file. An example for this requirement
>>> can be seen in the handling of dpdk_mutex, which should be encapsulated
>>
>> You probably meant dev->mutex.
>>
>>> inside netdev-dpdk class (netdev-dpdk.c file), and should be unknown
>>> to the outside callers. This commit embeds the rte_flow_create() call
>>> inside the netdev_dpdk_flow_create() API and the rte_flow_destroy()
>>> call inside the netdev_dpdk_rte_flow_destroy() API.
>>>
>>> Reviewed-by: Asaf Penso <asafp@mellanox.com>
>>> Signed-off-by: Roni Bar Yanai <roniba@mellanox.com>
>>> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
>>> ---
>>>  lib/netdev-dpdk.c | 51
>> +++++++++++++++++++++++++++++++++++++++------------
>>>  lib/netdev-dpdk.h | 14 ++++++++++++++
>>>  2 files changed, 53 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>>> index 32a6ffd..163d4ec 100644
>>> --- a/lib/netdev-dpdk.c
>>> +++ b/lib/netdev-dpdk.c
>>> @@ -4203,6 +4203,42 @@ unlock:
>>>      return err;
>>>  }
>>>
>>> +int
>>> +netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
>>> +                             struct rte_flow *rte_flow,
>>> +                             struct rte_flow_error *error)
>>> +{
>>> +    if (!is_dpdk_class(netdev->netdev_class)) {
>>> +        return -1;
>>> +    }
>>
>> Not sure if this check is needed, because we're registering
>> this offload API only for DPDK devices. However, it's not
>> correct anyway, because 'is_dpdk_class' will return 'true'
>> for vhost netdevs which are not rte_ethdev devices, i.e. has
>> no offloading API.
>>
>>> +
>>> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>>> +    int ret;
>>> +
>>> +    ovs_mutex_lock(&dev->mutex);
>>> +    ret = rte_flow_destroy(dev->port_id, rte_flow, error);
>>> +    ovs_mutex_unlock(&dev->mutex);
>>> +    return ret;
>>> +}
>>> +
>>> +struct rte_flow *
>>> +netdev_dpdk_rte_flow_create(struct netdev *netdev,
>>> +                            const struct rte_flow_attr *attr,
>>> +                            const struct rte_flow_item *items,
>>> +                            const struct rte_flow_action *actions,
>>> +                            struct rte_flow_error *error)
>>> +{
>>> +    if (!is_dpdk_class(netdev->netdev_class)) {
>>> +        return NULL;
>>> +    }
>>
>> Same here.
>>
>>> +
>>> +    struct rte_flow *flow;
>>> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>>
>> It's better to have an empty line between declarations and code.
>>
>>> +    ovs_mutex_lock(&dev->mutex);
>>> +    flow = rte_flow_create(dev->port_id, attr, items, actions, error);
>>> +    ovs_mutex_unlock(&dev->mutex);
>>> +    return flow;
>>> +}
>>>
>>>  /* Find rte_flow with @ufid */
>>>  static struct rte_flow *
>>> @@ -4554,7 +4590,6 @@ netdev_dpdk_add_rte_flow_offload(struct
>> netdev *netdev,
>>>                                   size_t actions_len OVS_UNUSED,
>>>                                   const ovs_u128 *ufid,
>>>                                   struct offload_info *info) {
>>> -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>>>      const struct rte_flow_attr flow_attr = {
>>>          .group = 0,
>>>          .priority = 0,
>>> @@ -4759,15 +4794,12 @@ end_proto_check:
>>>      mark.id = info->flow_mark;
>>>      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, &mark);
>>>
>>> -    ovs_mutex_lock(&dev->mutex);
>>>
>>>      rss = add_flow_rss_action(&actions, netdev);
>>
> Just wondering what will happen if rss action is added with current n_rxq, and immediately after
> there is a reconfiguration. The result will be exactly the same, rss flow has wrong
> configuration. The lock doesn't solve this issue.

And I'm periodically forgetting about the fact that all offloading
operations are guarded by datapath port_mutex. So, the race below
is not really possible. However, IMHO, having everything under the
datapath port mutex is a bad design decision which is suitable only
for experimental feature. The worst part here is that offloading code
tries to make impression of the thread-safety by using concurrent
hash maps (but it doesn't even protect the insertion). Moving the
code to a separate module is one more step that tries to simulate the
independence. I'm not telling that we don't need that because this
could be a step in a way to make it really independent from the
dpif-netdev or netdev-dpdk. However, for now, I think that we need
a huge red banner at the top of each file that says that the code here
is *not thread safe* and even more it *must* be executed only with
*datapath port mutex held*. And every rte API call should be protected
anyway by dev->mutex to prevent races with pmd threads and some
stats/info calls from the main/other thread.

Issue 1: Current implementation of rte_flow netdev-offload api strictly
         depends on implementation of dpif-netdev.

Issue 2: Current implementation of rte_flow netdev-offload api tries to
         make an illusion that Issue 1 doesn't exist.

There are issues in current design even with the datapath port mutex held.
For example, reconfiguration code requests to delete all the offloaded
flows before reconfiguring the device. But this happens under the port_mutex
and flows will be removed only after the reconfiguration. This seems
dangerous, because device configuration could be significantly changed.
I'm not even sure that it's allowed to reconfigure device in this condition.
For example, in case of port deletion, port will be destroyed before we
unlock the port_mutex, i.e. flows will remain in HW probably keeping it
in an inconsistent state. Even more, we'll try to remove this flow from
another device if it'll be added fast with the same odp_port number.

Issue 3: Bad synchronisation between main and offloading threads inside
         dpif-netdev.

BTW, All above issues are not the issues of this patch-set and this
patch-set doesn't try solve them (However it makes Issue 2 a bit worse).

> 
>> This doesn't look right. 'add_flow_rss_action' Uses 'netdev->n_rxq'
>> that should not be accessed without 'dev->mutex'. Otherwise you may
>> mess up with reconfiguration and segfault here.
>>
>> This could be avoided if dpif-netdev will really wait for completion
>> of all the offloading operations for the device before reconfiguring it,
>> but this is not yet implemented and will probably require changes in
>> netdev offloading API.
>>
>>
>> And this is, probably, one of the examples of atomicity, but it's not
>> the atomicity of few subsequent rte_flow calls, but the atomicity of
>> work with netdev fields. Because even if this will not crash, we'll
>> try to offload rss action with incorrect number of queues.
>>
>>>      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>>>
>>> -    flow = rte_flow_create(dev->port_id, &flow_attr, patterns.items,
>>> -                           actions.actions, &error);
>>> -
>>> -    ovs_mutex_unlock(&dev->mutex);
>>> +    flow = netdev_dpdk_rte_flow_create(netdev,
>> &flow_attr,patterns.items,
>>> +                            actions.actions, &error);
>>
>> Please, keep the indents for function arguments. i.e. it's better to align
>> arguments to the first parenthesis.
>> This comment is for many other places in code.
>>
>>>
>>>      free(rss);
>>>      if (!flow) {
>>> @@ -4861,13 +4893,9 @@ static int
>>>  netdev_dpdk_destroy_rte_flow(struct netdev *netdev,
>>>                               const ovs_u128 *ufid,
>>>                               struct rte_flow *rte_flow) {
>>> -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>>>      struct rte_flow_error error;
>>> -    int ret;
>>> +    int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error);
>>>
>>> -    ovs_mutex_lock(&dev->mutex);
>>> -
>>> -    ret = rte_flow_destroy(dev->port_id, rte_flow, &error);
>>>      if (ret == 0) {
>>>          ufid_to_rte_flow_disassociate(ufid);
>>>          VLOG_DBG("%s: removed rte flow %p associated with ufid "
>> UUID_FMT "\n",
>>> @@ -4878,7 +4906,6 @@ netdev_dpdk_destroy_rte_flow(struct netdev
>> *netdev,
>>>                   netdev_get_name(netdev), error.type, error.message);
>>>      }
>>>
>>> -    ovs_mutex_unlock(&dev->mutex);
>>>      return ret;
>>>  }
>>>
>>> diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
>>> index b7d02a7..82d2828 100644
>>> --- a/lib/netdev-dpdk.h
>>> +++ b/lib/netdev-dpdk.h
>>> @@ -22,11 +22,25 @@
>>>  #include "openvswitch/compiler.h"
>>>
>>>  struct dp_packet;
>>> +struct netdev;
>>> +struct rte_flow;
>>> +struct rte_flow_error;
>>> +struct rte_flow_attr;
>>> +struct rte_flow_item;
>>> +struct rte_flow_action;
>>>
>>>  #ifdef DPDK_NETDEV
>>>
>>>  void netdev_dpdk_register(void);
>>>  void free_dpdk_buf(struct dp_packet *);
>>> +int netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
>>> +                             struct rte_flow *rte_flow,
>>> +                             struct rte_flow_error *error);
>>> +struct rte_flow *netdev_dpdk_rte_flow_create(struct netdev *netdev,
>>> +                            const struct rte_flow_attr *attr,
>>> +                            const struct rte_flow_item *items,
>>> +                            const struct rte_flow_action *actions,
>>> +                            struct rte_flow_error *error);
>>
>> Alignments.
>>
>>>
>>>  #else
>>>
>>>
Roni Bar Yanai Feb. 22, 2019, 1:23 p.m. UTC | #5
> -----Original Message-----
> From: Ilya Maximets <i.maximets@samsung.com>
> Sent: Friday, February 22, 2019 1:26 PM
> To: Roni Bar Yanai <roniba@mellanox.com>; Ophir Munk
> <ophirmu@mellanox.com>; ovs-dev@openvswitch.org
> Cc: Ian Stokes <ian.stokes@intel.com>; Olga Shern <olgas@mellanox.com>;
> Kevin Traynor <ktraynor@redhat.com>; Asaf Penso <asafp@mellanox.com>;
> Flavio Leitner <fbl@sysclose.org>
> Subject: Re: [PATCH v2 1/3] netdev-dpdk: Expose flow creation/destruction
> calls
> 
> On 21.02.2019 19:37, Roni Bar Yanai wrote:
> >
> >
> >> -----Original Message-----
> >> From: Ilya Maximets <i.maximets@samsung.com>
> >> Sent: Thursday, February 21, 2019 5:27 PM
> >> To: Ophir Munk <ophirmu@mellanox.com>; ovs-dev@openvswitch.org
> >> Cc: Ian Stokes <ian.stokes@intel.com>; Olga Shern
> <olgas@mellanox.com>;
> >> Kevin Traynor <ktraynor@redhat.com>; Asaf Penso
> <asafp@mellanox.com>;
> >> Roni Bar Yanai <roniba@mellanox.com>; Flavio Leitner
> <fbl@sysclose.org>
> >> Subject: Re: [PATCH v2 1/3] netdev-dpdk: Expose flow
> creation/destruction
> >> calls
> >>
> >> On 21.02.2019 15:07, Ophir Munk wrote:
> >>> From: Roni Bar Yanai <roniba@mellanox.com>
> >>>
> >>> Before offloading-code was added to the netdev-dpdk.c file (MARK and
> >>> RSS actions) the only DPDK RTE calls in use were rte_flow_create() and
> >>> rte_flow_destroy(). In preparation for splitting the offloading-code
> >>> from the netdev-dpdk.c file to a separate file, it is required
> >>> to embed these RTE calls into a global netdev-dpdk-* API so that
> >>> they can be called from the new file. An example for this requirement
> >>> can be seen in the handling of dpdk_mutex, which should be
> encapsulated
> >>
> >> You probably meant dev->mutex.
> >>
> >>> inside netdev-dpdk class (netdev-dpdk.c file), and should be unknown
> >>> to the outside callers. This commit embeds the rte_flow_create() call
> >>> inside the netdev_dpdk_flow_create() API and the rte_flow_destroy()
> >>> call inside the netdev_dpdk_rte_flow_destroy() API.
> >>>
> >>> Reviewed-by: Asaf Penso <asafp@mellanox.com>
> >>> Signed-off-by: Roni Bar Yanai <roniba@mellanox.com>
> >>> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> >>> ---
> >>>  lib/netdev-dpdk.c | 51
> >> +++++++++++++++++++++++++++++++++++++++------------
> >>>  lib/netdev-dpdk.h | 14 ++++++++++++++
> >>>  2 files changed, 53 insertions(+), 12 deletions(-)
> >>>
> >>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> >>> index 32a6ffd..163d4ec 100644
> >>> --- a/lib/netdev-dpdk.c
> >>> +++ b/lib/netdev-dpdk.c
> >>> @@ -4203,6 +4203,42 @@ unlock:
> >>>      return err;
> >>>  }
> >>>
> >>> +int
> >>> +netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
> >>> +                             struct rte_flow *rte_flow,
> >>> +                             struct rte_flow_error *error)
> >>> +{
> >>> +    if (!is_dpdk_class(netdev->netdev_class)) {
> >>> +        return -1;
> >>> +    }
> >>
> >> Not sure if this check is needed, because we're registering
> >> this offload API only for DPDK devices. However, it's not
> >> correct anyway, because 'is_dpdk_class' will return 'true'
> >> for vhost netdevs which are not rte_ethdev devices, i.e. has
> >> no offloading API.
> >>
> >>> +
> >>> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >>> +    int ret;
> >>> +
> >>> +    ovs_mutex_lock(&dev->mutex);
> >>> +    ret = rte_flow_destroy(dev->port_id, rte_flow, error);
> >>> +    ovs_mutex_unlock(&dev->mutex);
> >>> +    return ret;
> >>> +}
> >>> +
> >>> +struct rte_flow *
> >>> +netdev_dpdk_rte_flow_create(struct netdev *netdev,
> >>> +                            const struct rte_flow_attr *attr,
> >>> +                            const struct rte_flow_item *items,
> >>> +                            const struct rte_flow_action *actions,
> >>> +                            struct rte_flow_error *error)
> >>> +{
> >>> +    if (!is_dpdk_class(netdev->netdev_class)) {
> >>> +        return NULL;
> >>> +    }
> >>
> >> Same here.
> >>
> >>> +
> >>> +    struct rte_flow *flow;
> >>> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >>
> >> It's better to have an empty line between declarations and code.
> >>
> >>> +    ovs_mutex_lock(&dev->mutex);
> >>> +    flow = rte_flow_create(dev->port_id, attr, items, actions, error);
> >>> +    ovs_mutex_unlock(&dev->mutex);
> >>> +    return flow;
> >>> +}
> >>>
> >>>  /* Find rte_flow with @ufid */
> >>>  static struct rte_flow *
> >>> @@ -4554,7 +4590,6 @@ netdev_dpdk_add_rte_flow_offload(struct
> >> netdev *netdev,
> >>>                                   size_t actions_len OVS_UNUSED,
> >>>                                   const ovs_u128 *ufid,
> >>>                                   struct offload_info *info) {
> >>> -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >>>      const struct rte_flow_attr flow_attr = {
> >>>          .group = 0,
> >>>          .priority = 0,
> >>> @@ -4759,15 +4794,12 @@ end_proto_check:
> >>>      mark.id = info->flow_mark;
> >>>      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK,
> &mark);
> >>>
> >>> -    ovs_mutex_lock(&dev->mutex);
> >>>
> >>>      rss = add_flow_rss_action(&actions, netdev);
> >>
> > Just wondering what will happen if rss action is added with current n_rxq,
> and immediately after
> > there is a reconfiguration. The result will be exactly the same, rss flow has
> wrong
> > configuration. The lock doesn't solve this issue.
> 
> And I'm periodically forgetting about the fact that all offloading
> operations are guarded by datapath port_mutex. So, the race below
> is not really possible. However, IMHO, having everything under the
> datapath port mutex is a bad design decision which is suitable only
> for experimental feature. The worst part here is that offloading code
> tries to make impression of the thread-safety by using concurrent
> hash maps (but it doesn't even protect the insertion). Moving the
> code to a separate module is one more step that tries to simulate the
> independence. I'm not telling that we don't need that because this
> could be a step in a way to make it really independent from the
> dpif-netdev or netdev-dpdk. However, for now, I think that we need
> a huge red banner at the top of each file that says that the code here
> is *not thread safe* and even more it *must* be executed only with
> *datapath port mutex held*. And every rte API call should be protected
> anyway by dev->mutex to prevent races with pmd threads and some
> stats/info calls from the main/other thread.
> 
> Issue 1: Current implementation of rte_flow netdev-offload api strictly
>          depends on implementation of dpif-netdev.
> 
> Issue 2: Current implementation of rte_flow netdev-offload api tries to
>          make an illusion that Issue 1 doesn't exist.
> 
> There are issues in current design even with the datapath port mutex held.
> For example, reconfiguration code requests to delete all the offloaded
> flows before reconfiguring the device. But this happens under the
> port_mutex
> and flows will be removed only after the reconfiguration. This seems
> dangerous, because device configuration could be significantly changed.
> I'm not even sure that it's allowed to reconfigure device in this condition.
> For example, in case of port deletion, port will be destroyed before we
> unlock the port_mutex, i.e. flows will remain in HW probably keeping it
> in an inconsistent state. Even more, we'll try to remove this flow from
> another device if it'll be added fast with the same odp_port number.
> 
> Issue 3: Bad synchronisation between main and offloading threads inside
>          dpif-netdev.
> 
> BTW, All above issues are not the issues of this patch-set and this
> patch-set doesn't try solve them (However it makes Issue 2 a bit worse).
> 

I must agree with you on all. Those are good points. It might be that there
are still offload messages in the queue for the old port, maybe even flow_put,
if new port uses same odp_port some non-relevant flows can be configured
on the new port, and this can break correctness. just a thought, maybe the offload layer
should take care of the entire path, namely flow_put messages will be handled by rte-offload
module. On reconfigure for example you call rte-offload port reconfigure
that will block put/del of flows, will remove all existing flows, and can
mark the incoming queue (maybe dump), so non relevant message will not be
handled (probably need to release ref count and delete). This way you can also benefit 
from HW optimization such as clear all HW rules.
In such model, if concurrency will be needed you can add it more easily and 
independent form dpif. No port_mutex will be required as well, if need all will be
done internally in rte-module.

> >
> >> This doesn't look right. 'add_flow_rss_action' Uses 'netdev->n_rxq'
> >> that should not be accessed without 'dev->mutex'. Otherwise you may
> >> mess up with reconfiguration and segfault here.
> >>
> >> This could be avoided if dpif-netdev will really wait for completion
> >> of all the offloading operations for the device before reconfiguring it,
> >> but this is not yet implemented and will probably require changes in
> >> netdev offloading API.
> >>
> >>
> >> And this is, probably, one of the examples of atomicity, but it's not
> >> the atomicity of few subsequent rte_flow calls, but the atomicity of
> >> work with netdev fields. Because even if this will not crash, we'll
> >> try to offload rss action with incorrect number of queues.
> >>
> >>>      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
> >>>
> >>> -    flow = rte_flow_create(dev->port_id, &flow_attr, patterns.items,
> >>> -                           actions.actions, &error);
> >>> -
> >>> -    ovs_mutex_unlock(&dev->mutex);
> >>> +    flow = netdev_dpdk_rte_flow_create(netdev,
> >> &flow_attr,patterns.items,
> >>> +                            actions.actions, &error);
> >>
> >> Please, keep the indents for function arguments. i.e. it's better to align
> >> arguments to the first parenthesis.
> >> This comment is for many other places in code.
> >>
> >>>
> >>>      free(rss);
> >>>      if (!flow) {
> >>> @@ -4861,13 +4893,9 @@ static int
> >>>  netdev_dpdk_destroy_rte_flow(struct netdev *netdev,
> >>>                               const ovs_u128 *ufid,
> >>>                               struct rte_flow *rte_flow) {
> >>> -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >>>      struct rte_flow_error error;
> >>> -    int ret;
> >>> +    int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error);
> >>>
> >>> -    ovs_mutex_lock(&dev->mutex);
> >>> -
> >>> -    ret = rte_flow_destroy(dev->port_id, rte_flow, &error);
> >>>      if (ret == 0) {
> >>>          ufid_to_rte_flow_disassociate(ufid);
> >>>          VLOG_DBG("%s: removed rte flow %p associated with ufid "
> >> UUID_FMT "\n",
> >>> @@ -4878,7 +4906,6 @@ netdev_dpdk_destroy_rte_flow(struct netdev
> >> *netdev,
> >>>                   netdev_get_name(netdev), error.type, error.message);
> >>>      }
> >>>
> >>> -    ovs_mutex_unlock(&dev->mutex);
> >>>      return ret;
> >>>  }
> >>>
> >>> diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
> >>> index b7d02a7..82d2828 100644
> >>> --- a/lib/netdev-dpdk.h
> >>> +++ b/lib/netdev-dpdk.h
> >>> @@ -22,11 +22,25 @@
> >>>  #include "openvswitch/compiler.h"
> >>>
> >>>  struct dp_packet;
> >>> +struct netdev;
> >>> +struct rte_flow;
> >>> +struct rte_flow_error;
> >>> +struct rte_flow_attr;
> >>> +struct rte_flow_item;
> >>> +struct rte_flow_action;
> >>>
> >>>  #ifdef DPDK_NETDEV
> >>>
> >>>  void netdev_dpdk_register(void);
> >>>  void free_dpdk_buf(struct dp_packet *);
> >>> +int netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
> >>> +                             struct rte_flow *rte_flow,
> >>> +                             struct rte_flow_error *error);
> >>> +struct rte_flow *netdev_dpdk_rte_flow_create(struct netdev
> *netdev,
> >>> +                            const struct rte_flow_attr *attr,
> >>> +                            const struct rte_flow_item *items,
> >>> +                            const struct rte_flow_action *actions,
> >>> +                            struct rte_flow_error *error);
> >>
> >> Alignments.
> >>
> >>>
> >>>  #else
> >>>
> >>>
Ophir Munk Feb. 27, 2019, 6:51 p.m. UTC | #6
> -----Original Message-----
> From: Ilya Maximets <i.maximets@samsung.com>
> Sent: Friday, February 22, 2019 1:26 PM
> To: Roni Bar Yanai <roniba@mellanox.com>; Ophir Munk
> <ophirmu@mellanox.com>; ovs-dev@openvswitch.org
> Cc: Ian Stokes <ian.stokes@intel.com>; Olga Shern <olgas@mellanox.com>;
> Kevin Traynor <ktraynor@redhat.com>; Asaf Penso <asafp@mellanox.com>;
> Flavio Leitner <fbl@sysclose.org>
> Subject: Re: [PATCH v2 1/3] netdev-dpdk: Expose flow creation/destruction
> calls
> 
> On 21.02.2019 19:37, Roni Bar Yanai wrote:
> >
> >
> >> -----Original Message-----
> >> From: Ilya Maximets <i.maximets@samsung.com>
> >> Sent: Thursday, February 21, 2019 5:27 PM
> >> To: Ophir Munk <ophirmu@mellanox.com>; ovs-dev@openvswitch.org
> >> Cc: Ian Stokes <ian.stokes@intel.com>; Olga Shern
> >> <olgas@mellanox.com>; Kevin Traynor <ktraynor@redhat.com>; Asaf
> Penso
> >> <asafp@mellanox.com>; Roni Bar Yanai <roniba@mellanox.com>; Flavio
> >> Leitner <fbl@sysclose.org>
> >> Subject: Re: [PATCH v2 1/3] netdev-dpdk: Expose flow
> >> creation/destruction calls
> >>
> >> On 21.02.2019 15:07, Ophir Munk wrote:
> >>> From: Roni Bar Yanai <roniba@mellanox.com>
> >>>
> >>> Before offloading-code was added to the netdev-dpdk.c file (MARK and
> >>> RSS actions) the only DPDK RTE calls in use were rte_flow_create()
> >>> and rte_flow_destroy(). In preparation for splitting the
> >>> offloading-code from the netdev-dpdk.c file to a separate file, it
> >>> is required to embed these RTE calls into a global netdev-dpdk-* API
> >>> so that they can be called from the new file. An example for this
> >>> requirement can be seen in the handling of dpdk_mutex, which should
> >>> be encapsulated
> >>
> >> You probably meant dev->mutex.

Fixed in v3

> >>
> >>> inside netdev-dpdk class (netdev-dpdk.c file), and should be unknown
> >>> to the outside callers. This commit embeds the rte_flow_create()
> >>> call inside the netdev_dpdk_flow_create() API and the
> >>> rte_flow_destroy() call inside the netdev_dpdk_rte_flow_destroy() API.
> >>>
> >>> Reviewed-by: Asaf Penso <asafp@mellanox.com>
> >>> Signed-off-by: Roni Bar Yanai <roniba@mellanox.com>
> >>> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> >>> ---
> >>>  lib/netdev-dpdk.c | 51
> >> +++++++++++++++++++++++++++++++++++++++------------
> >>>  lib/netdev-dpdk.h | 14 ++++++++++++++
> >>>  2 files changed, 53 insertions(+), 12 deletions(-)
> >>>
> >>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
> >>> 32a6ffd..163d4ec 100644
> >>> --- a/lib/netdev-dpdk.c
> >>> +++ b/lib/netdev-dpdk.c
> >>> @@ -4203,6 +4203,42 @@ unlock:
> >>>      return err;
> >>>  }
> >>>
> >>> +int
> >>> +netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
> >>> +                             struct rte_flow *rte_flow,
> >>> +                             struct rte_flow_error *error) {
> >>> +    if (!is_dpdk_class(netdev->netdev_class)) {
> >>> +        return -1;
> >>> +    }
> >>
> >> Not sure if this check is needed, because we're registering this
> >> offload API only for DPDK devices. However, it's not correct anyway,
> >> because 'is_dpdk_class' will return 'true'
> >> for vhost netdevs which are not rte_ethdev devices, i.e. has no
> >> offloading API.

Removed in v3

> >>
> >>> +
> >>> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >>> +    int ret;
> >>> +
> >>> +    ovs_mutex_lock(&dev->mutex);
> >>> +    ret = rte_flow_destroy(dev->port_id, rte_flow, error);
> >>> +    ovs_mutex_unlock(&dev->mutex);
> >>> +    return ret;
> >>> +}
> >>> +
> >>> +struct rte_flow *
> >>> +netdev_dpdk_rte_flow_create(struct netdev *netdev,
> >>> +                            const struct rte_flow_attr *attr,
> >>> +                            const struct rte_flow_item *items,
> >>> +                            const struct rte_flow_action *actions,
> >>> +                            struct rte_flow_error *error) {
> >>> +    if (!is_dpdk_class(netdev->netdev_class)) {
> >>> +        return NULL;
> >>> +    }
> >>
> >> Same here.

Removed in v3

> >>
> >>> +
> >>> +    struct rte_flow *flow;
> >>> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >>
> >> It's better to have an empty line between declarations and code.

Fixed in v3

> >>
> >>> +    ovs_mutex_lock(&dev->mutex);
> >>> +    flow = rte_flow_create(dev->port_id, attr, items, actions, error);
> >>> +    ovs_mutex_unlock(&dev->mutex);
> >>> +    return flow;
> >>> +}
> >>>
> >>>  /* Find rte_flow with @ufid */
> >>>  static struct rte_flow *
> >>> @@ -4554,7 +4590,6 @@ netdev_dpdk_add_rte_flow_offload(struct
> >> netdev *netdev,
> >>>                                   size_t actions_len OVS_UNUSED,
> >>>                                   const ovs_u128 *ufid,
> >>>                                   struct offload_info *info) {
> >>> -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >>>      const struct rte_flow_attr flow_attr = {
> >>>          .group = 0,
> >>>          .priority = 0,
> >>> @@ -4759,15 +4794,12 @@ end_proto_check:
> >>>      mark.id = info->flow_mark;
> >>>      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK,
> &mark);
> >>>
> >>> -    ovs_mutex_lock(&dev->mutex);
> >>>
> >>>      rss = add_flow_rss_action(&actions, netdev);
> >>
> > Just wondering what will happen if rss action is added with current
> > n_rxq, and immediately after there is a reconfiguration. The result
> > will be exactly the same, rss flow has wrong configuration. The lock doesn't
> solve this issue.
> 
> And I'm periodically forgetting about the fact that all offloading operations
> are guarded by datapath port_mutex. So, the race below is not really
> possible. However, IMHO, having everything under the datapath port mutex
> is a bad design decision which is suitable only for experimental feature. The
> worst part here is that offloading code tries to make impression of the
> thread-safety by using concurrent hash maps (but it doesn't even protect the
> insertion). Moving the code to a separate module is one more step that tries
> to simulate the independence. I'm not telling that we don't need that
> because this could be a step in a way to make it really independent from the
> dpif-netdev or netdev-dpdk. However, for now, I think that we need a huge
> red banner at the top of each file that says that the code here is *not thread
> safe* and even more it *must* be executed only with *datapath port mutex
> held*. And every rte API call should be protected anyway by dev->mutex to
> prevent races with pmd threads and some stats/info calls from the
> main/other thread.
> 
> Issue 1: Current implementation of rte_flow netdev-offload api strictly
>          depends on implementation of dpif-netdev.
> 
> Issue 2: Current implementation of rte_flow netdev-offload api tries to
>          make an illusion that Issue 1 doesn't exist.
> 
> There are issues in current design even with the datapath port mutex held.
> For example, reconfiguration code requests to delete all the offloaded flows
> before reconfiguring the device. But this happens under the port_mutex and
> flows will be removed only after the reconfiguration. This seems dangerous,
> because device configuration could be significantly changed.
> I'm not even sure that it's allowed to reconfigure device in this condition.
> For example, in case of port deletion, port will be destroyed before we
> unlock the port_mutex, i.e. flows will remain in HW probably keeping it in an
> inconsistent state. Even more, we'll try to remove this flow from another
> device if it'll be added fast with the same odp_port number.
> 
> Issue 3: Bad synchronisation between main and offloading threads inside
>          dpif-netdev.
> 
> BTW, All above issues are not the issues of this patch-set and this patch-set
> doesn't try solve them (However it makes Issue 2 a bit worse).
> 

Having said that I wish to conclude this patch-set and having another task to handle the issues mentioned above.

> >
> >> This doesn't look right. 'add_flow_rss_action' Uses 'netdev->n_rxq'
> >> that should not be accessed without 'dev->mutex'. Otherwise you may
> >> mess up with reconfiguration and segfault here.
> >>
> >> This could be avoided if dpif-netdev will really wait for completion
> >> of all the offloading operations for the device before reconfiguring
> >> it, but this is not yet implemented and will probably require changes
> >> in netdev offloading API.
> >>
> >>
> >> And this is, probably, one of the examples of atomicity, but it's not
> >> the atomicity of few subsequent rte_flow calls, but the atomicity of
> >> work with netdev fields. Because even if this will not crash, we'll
> >> try to offload rss action with incorrect number of queues.
> >>
> >>>      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
> >>>
> >>> -    flow = rte_flow_create(dev->port_id, &flow_attr, patterns.items,
> >>> -                           actions.actions, &error);
> >>> -
> >>> -    ovs_mutex_unlock(&dev->mutex);
> >>> +    flow = netdev_dpdk_rte_flow_create(netdev,
> >> &flow_attr,patterns.items,
> >>> +                            actions.actions, &error);
> >>
> >> Please, keep the indents for function arguments. i.e. it's better to
> >> align arguments to the first parenthesis.
> >> This comment is for many other places in code.

Fixed in v3

> >>
> >>>
> >>>      free(rss);
> >>>      if (!flow) {
> >>> @@ -4861,13 +4893,9 @@ static int
> >>>  netdev_dpdk_destroy_rte_flow(struct netdev *netdev,
> >>>                               const ovs_u128 *ufid,
> >>>                               struct rte_flow *rte_flow) {
> >>> -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >>>      struct rte_flow_error error;
> >>> -    int ret;
> >>> +    int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow,
> >>> + &error);
> >>>
> >>> -    ovs_mutex_lock(&dev->mutex);
> >>> -
> >>> -    ret = rte_flow_destroy(dev->port_id, rte_flow, &error);
> >>>      if (ret == 0) {
> >>>          ufid_to_rte_flow_disassociate(ufid);
> >>>          VLOG_DBG("%s: removed rte flow %p associated with ufid "
> >> UUID_FMT "\n",
> >>> @@ -4878,7 +4906,6 @@ netdev_dpdk_destroy_rte_flow(struct netdev
> >> *netdev,
> >>>                   netdev_get_name(netdev), error.type, error.message);
> >>>      }
> >>>
> >>> -    ovs_mutex_unlock(&dev->mutex);
> >>>      return ret;
> >>>  }
> >>>
> >>> diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h index
> >>> b7d02a7..82d2828 100644
> >>> --- a/lib/netdev-dpdk.h
> >>> +++ b/lib/netdev-dpdk.h
> >>> @@ -22,11 +22,25 @@
> >>>  #include "openvswitch/compiler.h"
> >>>
> >>>  struct dp_packet;
> >>> +struct netdev;
> >>> +struct rte_flow;
> >>> +struct rte_flow_error;
> >>> +struct rte_flow_attr;
> >>> +struct rte_flow_item;
> >>> +struct rte_flow_action;
> >>>
> >>>  #ifdef DPDK_NETDEV
> >>>
> >>>  void netdev_dpdk_register(void);
> >>>  void free_dpdk_buf(struct dp_packet *);
> >>> +int netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
> >>> +                             struct rte_flow *rte_flow,
> >>> +                             struct rte_flow_error *error); struct
> >>> +rte_flow *netdev_dpdk_rte_flow_create(struct netdev *netdev,
> >>> +                            const struct rte_flow_attr *attr,
> >>> +                            const struct rte_flow_item *items,
> >>> +                            const struct rte_flow_action *actions,
> >>> +                            struct rte_flow_error *error);
> >>
> >> Alignments.

Fixed in v3

> >>
> >>>
> >>>  #else
> >>>
> >>>
diff mbox series

Patch

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 32a6ffd..163d4ec 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -4203,6 +4203,42 @@  unlock:
     return err;
 }
 
+int
+netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
+                             struct rte_flow *rte_flow,
+                             struct rte_flow_error *error)
+{
+    if (!is_dpdk_class(netdev->netdev_class)) {
+        return -1;
+    }
+
+    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+    int ret;
+
+    ovs_mutex_lock(&dev->mutex);
+    ret = rte_flow_destroy(dev->port_id, rte_flow, error);
+    ovs_mutex_unlock(&dev->mutex);
+    return ret;
+}
+
+struct rte_flow *
+netdev_dpdk_rte_flow_create(struct netdev *netdev,
+                            const struct rte_flow_attr *attr,
+                            const struct rte_flow_item *items,
+                            const struct rte_flow_action *actions,
+                            struct rte_flow_error *error)
+{
+    if (!is_dpdk_class(netdev->netdev_class)) {
+        return NULL;
+    }
+
+    struct rte_flow *flow;
+    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+    ovs_mutex_lock(&dev->mutex);
+    flow = rte_flow_create(dev->port_id, attr, items, actions, error);
+    ovs_mutex_unlock(&dev->mutex);
+    return flow;
+}
 
 /* Find rte_flow with @ufid */
 static struct rte_flow *
@@ -4554,7 +4590,6 @@  netdev_dpdk_add_rte_flow_offload(struct netdev *netdev,
                                  size_t actions_len OVS_UNUSED,
                                  const ovs_u128 *ufid,
                                  struct offload_info *info) {
-    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     const struct rte_flow_attr flow_attr = {
         .group = 0,
         .priority = 0,
@@ -4759,15 +4794,12 @@  end_proto_check:
     mark.id = info->flow_mark;
     add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, &mark);
 
-    ovs_mutex_lock(&dev->mutex);
 
     rss = add_flow_rss_action(&actions, netdev);
     add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
 
-    flow = rte_flow_create(dev->port_id, &flow_attr, patterns.items,
-                           actions.actions, &error);
-
-    ovs_mutex_unlock(&dev->mutex);
+    flow = netdev_dpdk_rte_flow_create(netdev, &flow_attr,patterns.items,
+                            actions.actions, &error);
 
     free(rss);
     if (!flow) {
@@ -4861,13 +4893,9 @@  static int
 netdev_dpdk_destroy_rte_flow(struct netdev *netdev,
                              const ovs_u128 *ufid,
                              struct rte_flow *rte_flow) {
-    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     struct rte_flow_error error;
-    int ret;
+    int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error);
 
-    ovs_mutex_lock(&dev->mutex);
-
-    ret = rte_flow_destroy(dev->port_id, rte_flow, &error);
     if (ret == 0) {
         ufid_to_rte_flow_disassociate(ufid);
         VLOG_DBG("%s: removed rte flow %p associated with ufid " UUID_FMT "\n",
@@ -4878,7 +4906,6 @@  netdev_dpdk_destroy_rte_flow(struct netdev *netdev,
                  netdev_get_name(netdev), error.type, error.message);
     }
 
-    ovs_mutex_unlock(&dev->mutex);
     return ret;
 }
 
diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
index b7d02a7..82d2828 100644
--- a/lib/netdev-dpdk.h
+++ b/lib/netdev-dpdk.h
@@ -22,11 +22,25 @@ 
 #include "openvswitch/compiler.h"
 
 struct dp_packet;
+struct netdev;
+struct rte_flow;
+struct rte_flow_error;
+struct rte_flow_attr;
+struct rte_flow_item;
+struct rte_flow_action;
 
 #ifdef DPDK_NETDEV
 
 void netdev_dpdk_register(void);
 void free_dpdk_buf(struct dp_packet *);
+int netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
+                             struct rte_flow *rte_flow,
+                             struct rte_flow_error *error);
+struct rte_flow *netdev_dpdk_rte_flow_create(struct netdev *netdev,
+                            const struct rte_flow_attr *attr,
+                            const struct rte_flow_item *items,
+                            const struct rte_flow_action *actions,
+                            struct rte_flow_error *error);
 
 #else