diff mbox

[ovs-dev,patch_v1] dpdk: Fix device cleanup.

Message ID 1496737849-50926-1-git-send-email-dlu998@gmail.com
State Superseded
Headers show

Commit Message

Darrell Ball June 6, 2017, 8:30 a.m. UTC
On port deletion, device resources cannot be cleaned
up if the device was attached when vswitchd was started.
The issue is mainly due to introduction of the 'attached'
field for the netdev_dpdk device context.  The logic
setting the 'attached' field did not include startup
handling.  The use of an extra 'attached' field was
introduced to guard against detaching a device that could
not be detached, but should not be necessary as the rte and
eal keep the attached state, as they must, and also filter
trying to detach a device that is not detachable.  If
there were any bugs in this regard, which I did not find
with basic testing, then they should be fixed in rte/eal.

Fixes: 5dcde09c80a8 ("netdev-dpdk: Fix device leak on port deletion.")
CC: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Darrell Ball <dlu998@gmail.com>
---
 lib/netdev-dpdk.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

Comments

Ilya Maximets June 6, 2017, 12:46 p.m. UTC | #1
On 06.06.2017 11:30, Darrell Ball wrote:
> On port deletion, device resources cannot be cleaned
> up if the device was attached when vswitchd was started.
> The issue is mainly due to introduction of the 'attached'
> field for the netdev_dpdk device context.  The logic
> setting the 'attached' field did not include startup
> handling.  The use of an extra 'attached' field was
> introduced to guard against detaching a device that could
> not be detached, but should not be necessary as the rte and
> eal keep the attached state, as they must, and also filter
> trying to detach a device that is not detachable.  If
> there were any bugs in this regard, which I did not find
> with basic testing, then they should be fixed in rte/eal.

Why you want to free resources of these devices?
It's, actually, was a default behaviour of OVS for years.
If you want to detach hotplugable devices than you may just not
attach them using cmdline. Not hotplugable devices will not
be detached anyway. 

From the other side this patch will allow following scenario
for not detachable devices:

	- add_port(05.00.0)
	- configure()
	- stop()
	- close()
	- detach(05.00.0) -----> Failure
	- add_port(05.00.0)
	- configure()
	- start()

At this point OvS will get closed device by name and will
try to configure it and start again.

My concern is that we can't be sure that it will continue
work properly after close.

According to DPDK API, *device can not be started back after
close*. Unfortunately, there is no documentation about
trying to reinitialize closed device. So, we can't
rely on your assumption that device will work properly
after that.

> Fixes: 5dcde09c80a8 ("netdev-dpdk: Fix device leak on port deletion.")
> CC: Ilya Maximets <i.maximets@samsung.com>
> Signed-off-by: Darrell Ball <dlu998@gmail.com>
> ---
>  lib/netdev-dpdk.c | 14 +++-----------
>  1 file changed, 3 insertions(+), 11 deletions(-)
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index b770b70..753345d 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -365,9 +365,6 @@ struct netdev_dpdk {
>      /* Device arguments for dpdk ports */
>      char *devargs;
>  
> -    /* If true, device was attached by rte_eth_dev_attach(). */
> -    bool attached;
> -
>      /* In dpdk_list. */
>      struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
>  
> @@ -862,7 +859,6 @@ common_construct(struct netdev *netdev, dpdk_port_t port_no,
>      dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>      ovsrcu_index_init(&dev->vid, -1);
>      dev->vhost_reconfigured = false;
> -    dev->attached = false;
>  
>      ovsrcu_init(&dev->qos_conf, NULL);
>  
> @@ -1016,7 +1012,7 @@ netdev_dpdk_destruct(struct netdev *netdev)
>  
>      rte_eth_dev_stop(dev->port_id);
>  
> -    if (dev->attached) {
> +    if (rte_eth_dev_is_valid_port(dev->port_id)) {
>          rte_eth_dev_close(dev->port_id);
>          if (rte_eth_dev_detach(dev->port_id, devname) < 0) {
>              VLOG_ERR("Device '%s' can not be detached", dev->devargs);
> @@ -1136,8 +1132,7 @@ netdev_dpdk_lookup_by_port_id(dpdk_port_t port_id)
>  }
>  
>  static dpdk_port_t
> -netdev_dpdk_process_devargs(struct netdev_dpdk *dev,
> -                            const char *devargs, char **errp)
> +netdev_dpdk_process_devargs(const char *devargs, char **errp)
>  {
>      /* Get the name up to the first comma. */
>      char *name = xmemdup0(devargs, strcspn(devargs, ","));
> @@ -1148,8 +1143,6 @@ netdev_dpdk_process_devargs(struct netdev_dpdk *dev,
>              || !rte_eth_dev_is_valid_port(new_port_id)) {
>          /* Device not found in DPDK, attempt to attach it */
>          if (!rte_eth_dev_attach(devargs, &new_port_id)) {
> -            /* Attach successful */
> -            dev->attached = true;
>              VLOG_INFO("Device '%s' attached to DPDK", devargs);
>          } else {
>              /* Attach unsuccessful */
> @@ -1236,8 +1229,7 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args,
>           * is valid */
>          if (!(dev->devargs && !strcmp(dev->devargs, new_devargs)
>                 && rte_eth_dev_is_valid_port(dev->port_id))) {
> -            dpdk_port_t new_port_id = netdev_dpdk_process_devargs(dev,
> -                                                                  new_devargs,
> +            dpdk_port_t new_port_id = netdev_dpdk_process_devargs(new_devargs,
>                                                                    errp);
>              if (!rte_eth_dev_is_valid_port(new_port_id)) {
>                  err = EINVAL;
>
Darrell Ball June 7, 2017, 1:46 a.m. UTC | #2
On 6/6/17, 5:46 AM, "ovs-dev-bounces@openvswitch.org on behalf of Ilya Maximets" <ovs-dev-bounces@openvswitch.org on behalf of i.maximets@samsung.com> wrote:

    On 06.06.2017 11:30, Darrell Ball wrote:
    > On port deletion, device resources cannot be cleaned

    > up if the device was attached when vswitchd was started.

    > The issue is mainly due to introduction of the 'attached'

    > field for the netdev_dpdk device context.  The logic

    > setting the 'attached' field did not include startup

    > handling.  The use of an extra 'attached' field was

    > introduced to guard against detaching a device that could

    > not be detached, but should not be necessary as the rte and

    > eal keep the attached state, as they must, and also filter

    > trying to detach a device that is not detachable.  If

    > there were any bugs in this regard, which I did not find

    > with basic testing, then they should be fixed in rte/eal.

    
    Why you want to free resources of these devices?
    It's, actually, was a default behaviour of OVS for years.
    If you want to detach hotplugable devices than you may just not
    attach them using cmdline.

Some testing:

1) Using a hotpluggable device; it is using the uio_pci_generic
driver.
a) I restart OVS with the device already bound.
When I do del-port to remove it, the code skips resource cleanup,
namely *_close and *_detach, bcoz the new internal OVS variable ‘attached’ is
not true, even though rte/eal considers it ‘attached.’ 

b) If I use the exact same device, but hot plug attach it this time and then do del-port,
the device resources are cleaned up.

I would expect ‘a’ to do cleanup and also be consistent with ‘b’.

Moreover, this worked before
5dcde09c80a8 ("netdev-dpdk: Fix device leak on port deletion."), 
as netdev_dpdk_detach() could be used to do the cleanup in the exact same way
for both ‘a’ and ‘b’, as this new patch can do, without the old API.


2) Now, let us move to the second point.


 Not hotplugable devices will not
    be detached anyway. 

Correct, I mentioned in the commit message that the rte/eal code should block this
and it does.
    
    From the other side this patch will allow following scenario
    for not detachable devices:

    
    	- add_port(05.00.0)
    	- configure()

    	- stop()
    	- close()
    	- detach(05.00.0) -----> Failure

Correct/expected.


    	- add_port(05.00.0)
    	- configure()
    	- start()
    
    At this point OvS will get closed device by name and will
    try to configure it and start again.
    
    My concern is that we can't be sure that it will continue
    work properly after close.
    
    According to DPDK API, *device can not be started back after
    close*. Unfortunately, there is no documentation about
    trying to reinitialize closed device. So, we can't
    rely on your assumption that device will work properly
    after that.


Using my patch with VFIO and 16.11; VFIO is not detachable in this environment.
There is no problem seen; packet rates are the same before and after del-port/re-add-port. 

Using your previous patch, this is what I see:
In this case, ‘attached’ will be false and on del-port, no resources will be freed -
_close will not be called and resources leaked.
So, you want to achieve blocking the _close call to free resources, in case we want
to add the port back and because we are not sure if the device can be restarted
after _close?
Since we deleted the port, it is likely we won’t use it again, but we will want to use
other ports, presumably.

Also, this would be a bug in rte/eal, if it was an issue, since we should be able to free
resources on deleting a port, right ? Freeing unused resources should not make it
impossible to reuse the device later by reattaching it, right ?
However, my earlier test shows this works with VFIO.



    
    > Fixes: 5dcde09c80a8 ("netdev-dpdk: Fix device leak on port deletion.")

    > CC: Ilya Maximets <i.maximets@samsung.com>

    > Signed-off-by: Darrell Ball dlu998@gmail.comf	ter

    > ---

    >  lib/netdev-dpdk.c | 14 +++-----------

    >  1 file changed, 3 insertions(+), 11 deletions(-)

    > 

    > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c

    > index b770b70..753345d 100644

    > --- a/lib/netdev-dpdk.c

    > +++ b/lib/netdev-dpdk.c

    > @@ -365,9 +365,6 @@ struct netdev_dpdk {

    >      /* Device arguments for dpdk ports */

    >      char *devargs;

    >  

    > -    /* If true, device was attached by rte_eth_dev_attach(). */

    > -    bool attached;

    > -

    >      /* In dpdk_list. */

    >      struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);

    >  

    > @@ -862,7 +859,6 @@ common_construct(struct netdev *netdev, dpdk_port_t port_no,

    >      dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);

    >      ovsrcu_index_init(&dev->vid, -1);

    >      dev->vhost_reconfigured = false;

    > -    dev->attached = false;

    >  

    >      ovsrcu_init(&dev->qos_conf, NULL);

    >  

    > @@ -1016,7 +1012,7 @@ netdev_dpdk_destruct(struct netdev *netdev)

    >  

    >      rte_eth_dev_stop(dev->port_id);

    >  

    > -    if (dev->attached) {

    > +    if (rte_eth_dev_is_valid_port(dev->port_id)) {

    >          rte_eth_dev_close(dev->port_id);

    >          if (rte_eth_dev_detach(dev->port_id, devname) < 0) {

    >              VLOG_ERR("Device '%s' can not be detached", dev->devargs);

    > @@ -1136,8 +1132,7 @@ netdev_dpdk_lookup_by_port_id(dpdk_port_t port_id)

    >  }

    >  

    >  static dpdk_port_t

    > -netdev_dpdk_process_devargs(struct netdev_dpdk *dev,

    > -                            const char *devargs, char **errp)

    > +netdev_dpdk_process_devargs(const char *devargs, char **errp)

    >  {

    >      /* Get the name up to the first comma. */

    >      char *name = xmemdup0(devargs, strcspn(devargs, ","));

    > @@ -1148,8 +1143,6 @@ netdev_dpdk_process_devargs(struct netdev_dpdk *dev,

    >              || !rte_eth_dev_is_valid_port(new_port_id)) {

    >          /* Device not found in DPDK, attempt to attach it */

    >          if (!rte_eth_dev_attach(devargs, &new_port_id)) {

    > -            /* Attach successful */

    > -            dev->attached = true;

    >              VLOG_INFO("Device '%s' attached to DPDK", devargs);

    >          } else {

    >              /* Attach unsuccessful */

    > @@ -1236,8 +1229,7 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args,

    >           * is valid */

    >          if (!(dev->devargs && !strcmp(dev->devargs, new_devargs)

    >                 && rte_eth_dev_is_valid_port(dev->port_id))) {

    > -            dpdk_port_t new_port_id = netdev_dpdk_process_devargs(dev,

    > -                                                                  new_devargs,

    > +            dpdk_port_t new_port_id = netdev_dpdk_process_devargs(new_devargs,

    >                                                                    errp);

    >              if (!rte_eth_dev_is_valid_port(new_port_id)) {

    >                  err = EINVAL;

    > 

    _______________________________________________
    dev mailing list
    dev@openvswitch.org
    https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=G8QfrUVNm6ggIVwO3aMkZCC7YjM9GV0vFsv3waiJ0C0&s=cXxznk6aRq6m4xUyF4hzRow5hhSTQPC7gj0ahmI7LZs&e=
Ilya Maximets June 7, 2017, 8:23 a.m. UTC | #3
On 07.06.2017 04:46, Darrell Ball wrote:
> 
> 
> On 6/6/17, 5:46 AM, "ovs-dev-bounces@openvswitch.org on behalf of Ilya Maximets" <ovs-dev-bounces@openvswitch.org on behalf of i.maximets@samsung.com> wrote:
> 
>     On 06.06.2017 11:30, Darrell Ball wrote:
>     > On port deletion, device resources cannot be cleaned
>     > up if the device was attached when vswitchd was started.
>     > The issue is mainly due to introduction of the 'attached'
>     > field for the netdev_dpdk device context.  The logic
>     > setting the 'attached' field did not include startup
>     > handling.  The use of an extra 'attached' field was
>     > introduced to guard against detaching a device that could
>     > not be detached, but should not be necessary as the rte and
>     > eal keep the attached state, as they must, and also filter
>     > trying to detach a device that is not detachable.  If
>     > there were any bugs in this regard, which I did not find
>     > with basic testing, then they should be fixed in rte/eal.
>     
>     Why you want to free resources of these devices?
>     It's, actually, was a default behaviour of OVS for years.
>     If you want to detach hotplugable devices than you may just not
>     attach them using cmdline.
> 
> Some testing:
> 
> 1) Using a hotpluggable device; it is using the uio_pci_generic
> driver.
> a) I restart OVS with the device already bound.
> When I do del-port to remove it, the code skips resource cleanup,
> namely *_close and *_detach, bcoz the new internal OVS variable ‘attached’ is
> not true, even though rte/eal considers it ‘attached.’

At first, I already told in discussion of my previous patch that
'dev->attached' is not the copy of internal state of device inside dpdk.
They are completely different.

About your testing.
I tried this:
	1. Start ovs-vswitchd
	2. ovs-vsctl add-port br1 dpdk0 -- set Interface dpdk0 type=dpdk options:dpdk-devargs=0000:88:00.0
	   --> |netdev_dpdk|INFO|Device '0000:88:00.0' attached to DPDK
	3. Restart ovs-vswitchd
	4. ovs-vsctl del-port dpdk0
	   --> |netdev_dpdk|INFO|Device '0000:88:00.0' detached

So, all resources freed as expected.
What I'm doing wrong?
Or port was bound using whitelist in 'other_ocnfig:dpdk-extra' ? In this case
it's expected behaviour that it will not be detached.
It's reasonable because changes in 'dpdk-extra' requires restart and device
will not be attached after restart if removed from list in 'dpdk-extra'.

> b) If I use the exact same device, but hot plug attach it this time and then do del-port,
> the device resources are cleaned up.
> 
> I would expect ‘a’ to do cleanup and also be consistent with ‘b’.
> 
> Moreover, this worked before
> 5dcde09c80a8 ("netdev-dpdk: Fix device leak on port deletion."), 
> as netdev_dpdk_detach() could be used to do the cleanup in the exact same way
> for both ‘a’ and ‘b’, as this new patch can do, without the old API.
> 
> 
> 2) Now, let us move to the second point.
> 
> 
>  Not hotplugable devices will not
>     be detached anyway. 
> 
> Correct, I mentioned in the commit message that the rte/eal code should block this
> and it does.
>     
>     From the other side this patch will allow following scenario
>     for not detachable devices:
> 
>     
>     	- add_port(05.00.0)
>     	- configure()
> 
>     	- stop()
>     	- close()
>     	- detach(05.00.0) -----> Failure
> 
> Correct/expected.
> 
> 
>     	- add_port(05.00.0)
>     	- configure()
>     	- start()
>     
>     At this point OvS will get closed device by name and will
>     try to configure it and start again.
>     
>     My concern is that we can't be sure that it will continue
>     work properly after close.
>     
>     According to DPDK API, *device can not be started back after
>     close*. Unfortunately, there is no documentation about
>     trying to reinitialize closed device. So, we can't
>     rely on your assumption that device will work properly
>     after that.
> 
> 
> Using my patch with VFIO and 16.11; VFIO is not detachable in this environment.
> There is no problem seen; packet rates are the same before and after del-port/re-add-port. 
> 
> Using your previous patch, this is what I see:
> In this case, ‘attached’ will be false and on del-port, no resources will be freed -
> _close will not be called and resources leaked.

Resources are not actually leaked because you're able to reopen the device.

> So, you want to achieve blocking the _close call to free resources, in case we want
> to add the port back and because we are not sure if the device can be restarted
> after _close?

Yes. I found at least one device that doesn't support hotplug and will not work
after close.
It is 'Chelsio T5' NIC (cxgbe). On close it will call 't4_fw_bye' to close
connection with NICs firmware and it will not open it back because 't4_fw_hello'
can be called only on 'eth_cxgbe_pci_probe'. Such behaviour reasonable for this
driver because of some unusual NIC architecture (It has only 1 pci address for
all it's ports).

So, with this patch applied it will be impossible to use 'Chelsio T5' NIC with OvS.

> Since we deleted the port, it is likely we won’t use it again, but we will want to use
> other ports, presumably.

That is not true because there are some actions that leads to removing and subsequent
re-adding ports to the datapath. For example, setting the 'ofport_request':

ovs-vsctl set Interface dpdk4 ofport_request=42
---> |bridge|INFO|bridge br10_4f0: deleted interface dpdk4 on port 1
     |netdev_dpdk|INFO|Device '0000:88:00.0' detached
     |netdev_dpdk|INFO|Device '0000:88:00.0' attached to DPDK
     |bridge|INFO|bridge br10_4f0: added interface dpdk4 on port 42

> 
> Also, this would be a bug in rte/eal, if it was an issue, since we should be able to free
> resources on deleting a port, right ?

I don't think so because we passed this device as cmdline argument and we, actually,
need to restart application to change cmdline arguments. And we have no issues
with resource freeing if device wasn't passed via cmdline.

> Freeing unused resources should not make it
> impossible to reuse the device later by reattaching it, right ?

The key word here is 'reattaching', but we're not reattaching it.
You're just tries to reconfigure closed device while DPDK API
says *"The device cannot be restarted!"*.

> However, my earlier test shows this works with VFIO.

At least 'cxgbe' will not work. I have no such NIC for testing, but it's
clear from the code.
 
>     
>     > Fixes: 5dcde09c80a8 ("netdev-dpdk: Fix device leak on port deletion.")
>     > CC: Ilya Maximets <i.maximets@samsung.com>
>     > Signed-off-by: Darrell Ball dlu998@gmail.comf	ter
>     > ---
>     >  lib/netdev-dpdk.c | 14 +++-----------
>     >  1 file changed, 3 insertions(+), 11 deletions(-)
>     > 
>     > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>     > index b770b70..753345d 100644
>     > --- a/lib/netdev-dpdk.c
>     > +++ b/lib/netdev-dpdk.c
>     > @@ -365,9 +365,6 @@ struct netdev_dpdk {
>     >      /* Device arguments for dpdk ports */
>     >      char *devargs;
>     >  
>     > -    /* If true, device was attached by rte_eth_dev_attach(). */
>     > -    bool attached;
>     > -
>     >      /* In dpdk_list. */
>     >      struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
>     >  
>     > @@ -862,7 +859,6 @@ common_construct(struct netdev *netdev, dpdk_port_t port_no,
>     >      dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>     >      ovsrcu_index_init(&dev->vid, -1);
>     >      dev->vhost_reconfigured = false;
>     > -    dev->attached = false;
>     >  
>     >      ovsrcu_init(&dev->qos_conf, NULL);
>     >  
>     > @@ -1016,7 +1012,7 @@ netdev_dpdk_destruct(struct netdev *netdev)
>     >  
>     >      rte_eth_dev_stop(dev->port_id);
>     >  
>     > -    if (dev->attached) {
>     > +    if (rte_eth_dev_is_valid_port(dev->port_id)) {
>     >          rte_eth_dev_close(dev->port_id);
>     >          if (rte_eth_dev_detach(dev->port_id, devname) < 0) {
>     >              VLOG_ERR("Device '%s' can not be detached", dev->devargs);
>     > @@ -1136,8 +1132,7 @@ netdev_dpdk_lookup_by_port_id(dpdk_port_t port_id)
>     >  }
>     >  
>     >  static dpdk_port_t
>     > -netdev_dpdk_process_devargs(struct netdev_dpdk *dev,
>     > -                            const char *devargs, char **errp)
>     > +netdev_dpdk_process_devargs(const char *devargs, char **errp)
>     >  {
>     >      /* Get the name up to the first comma. */
>     >      char *name = xmemdup0(devargs, strcspn(devargs, ","));
>     > @@ -1148,8 +1143,6 @@ netdev_dpdk_process_devargs(struct netdev_dpdk *dev,
>     >              || !rte_eth_dev_is_valid_port(new_port_id)) {
>     >          /* Device not found in DPDK, attempt to attach it */
>     >          if (!rte_eth_dev_attach(devargs, &new_port_id)) {
>     > -            /* Attach successful */
>     > -            dev->attached = true;
>     >              VLOG_INFO("Device '%s' attached to DPDK", devargs);
>     >          } else {
>     >              /* Attach unsuccessful */
>     > @@ -1236,8 +1229,7 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args,
>     >           * is valid */
>     >          if (!(dev->devargs && !strcmp(dev->devargs, new_devargs)
>     >                 && rte_eth_dev_is_valid_port(dev->port_id))) {
>     > -            dpdk_port_t new_port_id = netdev_dpdk_process_devargs(dev,
>     > -                                                                  new_devargs,
>     > +            dpdk_port_t new_port_id = netdev_dpdk_process_devargs(new_devargs,
>     >                                                                    errp);
>     >              if (!rte_eth_dev_is_valid_port(new_port_id)) {
>     >                  err = EINVAL;
>     > 
>     _______________________________________________
>     dev mailing list
>     dev@openvswitch.org
Darrell Ball June 8, 2017, 4:50 a.m. UTC | #4
On 6/7/17, 1:23 AM, "Ilya Maximets" <i.maximets@samsung.com> wrote:

    On 07.06.2017 04:46, Darrell Ball wrote:
    > 

    > 

    > On 6/6/17, 5:46 AM, "ovs-dev-bounces@openvswitch.org on behalf of Ilya Maximets" <ovs-dev-bounces@openvswitch.org on behalf of i.maximets@samsung.com> wrote:

    > 

    >     On 06.06.2017 11:30, Darrell Ball wrote:

    >     > On port deletion, device resources cannot be cleaned

    >     > up if the device was attached when vswitchd was started.

    >     > The issue is mainly due to introduction of the 'attached'

    >     > field for the netdev_dpdk device context.  The logic

    >     > setting the 'attached' field did not include startup

    >     > handling.  The use of an extra 'attached' field was

    >     > introduced to guard against detaching a device that could

    >     > not be detached, but should not be necessary as the rte and

    >     > eal keep the attached state, as they must, and also filter

    >     > trying to detach a device that is not detachable.  If

    >     > there were any bugs in this regard, which I did not find

    >     > with basic testing, then they should be fixed in rte/eal.

    >     

    >     Why you want to free resources of these devices?

    >     It's, actually, was a default behaviour of OVS for years.

    >     If you want to detach hotplugable devices than you may just not

    >     attach them using cmdline.

    > 

    > Some testing:

    > 

    > 1) Using a hotpluggable device; it is using the uio_pci_generic

    > driver.

    > a) I restart OVS with the device already bound.

    > When I do del-port to remove it, the code skips resource cleanup,

    > namely *_close and *_detach, bcoz the new internal OVS variable ‘attached’ is

    > not true, even though rte/eal considers it ‘attached.’

    
    At first, I already told in discussion of my previous patch that
    'dev->attached' is not the copy of internal state of device inside dpdk.
    They are completely different.
    
    About your testing.
    I tried this:
    	1. Start ovs-vswitchd
    	2. ovs-vsctl add-port br1 dpdk0 -- set Interface dpdk0 type=dpdk options:dpdk-devargs=0000:88:00.0
    	   --> |netdev_dpdk|INFO|Device '0000:88:00.0' attached to DPDK
    	3. Restart ovs-vswitchd
    	4. ovs-vsctl del-port dpdk0
    	   --> |netdev_dpdk|INFO|Device '0000:88:00.0' detached
    
    So, all resources freed as expected.
    What I'm doing wrong?


Here is an example of the order of events with code annotations:

1) vswitchd is not started and I have bound some devices to dpdk (see example below)

Network devices using DPDK-compatible driver
============================================
0000:04:00.0 'Ethernet Controller 10-Gigabit X540-AT2' drv=uio_pci_generic unused=ixgbe,vfio-pci
0000:04:00.1 'Ethernet Controller 10-Gigabit X540-AT2' drv=uio_pci_generic unused=ixgbe,vfio-pci

2) I start vswitchd
3) I do add-port for a couple ports previously bound to dpdk, plus the usual general config.
4) So at this point, the below outer ‘if’ condition will be false because
a) rte_eth_dev_count will be non-zero
b) rte_eth_dev_get_port_by_name() will return 0 (success)
c) rte_eth_dev_is_valid_port() will return 1, meaning valid
So, this means dev->attached will remain ‘false’ since this is the only place where we
set it to ‘true’.

    if (!rte_eth_dev_count()
            || rte_eth_dev_get_port_by_name(name, &new_port_id)
            || !rte_eth_dev_is_valid_port(new_port_id)) {
        /* Device not found in DPDK, attempt to attach it */
        if (!rte_eth_dev_attach(devargs, &new_port_id)) {
            /* Attach successful */
            dev->attached = true;
            VLOG_INFO("Device '%s' attached to DPDK", devargs);
        } else {
            /* Attach unsuccessful */
            new_port_id = DPDK_ETH_PORT_ID_INVALID;
            VLOG_WARN_BUF(errp, "Error attaching device '%s' to DPDK",
                          devargs);
        }
    }
5) Send some traffic and verify expected rates
6) Now, I do del-port, but the ‘if (dev->attached)’ check does NOT pass because dev->attached is ‘false’ 
So rte_eth_dev_close and rte_eth_dev_detach are NOT called; these functions clean up
resources – rte/eal general and device specific resources.

    if (dev->attached) {
        rte_eth_dev_close(dev->port_id);
        if (rte_eth_dev_detach(dev->port_id, devname) < 0) {
            VLOG_ERR("Device '%s' can not be detached", dev->devargs);
        } else {
            VLOG_INFO("Device '%s' detached", devname);
        }
    }

As I stated earlier, this worked before 5dcde09c80a8 ("netdev-dpdk: Fix device leak on port deletion."),
by calling netdev_dpdk_detach().


    Or port was bound using whitelist in 'other_ocnfig:dpdk-extra' ? In this case
    it's expected behaviour that it will not be detached.
    It's reasonable because changes in 'dpdk-extra' requires restart and device
    will not be attached after restart if removed from list in 'dpdk-extra'.

No, see above.
    
    > b) If I use the exact same device, but hot plug attach it this time and then do del-port,

    > the device resources are cleaned up.

    > 

    > I would expect ‘a’ to do cleanup and also be consistent with ‘b’.

    > 

    > Moreover, this worked before

    > 5dcde09c80a8 ("netdev-dpdk: Fix device leak on port deletion."), 

    > as netdev_dpdk_detach() could be used to do the cleanup in the exact same way

    > for both ‘a’ and ‘b’, as this new patch can do, without the old API.

    > 

    > 

    > 2) Now, let us move to the second point.

    > 

    > 

    >  Not hotplugable devices will not

    >     be detached anyway. 

    > 

    > Correct, I mentioned in the commit message that the rte/eal code should block this

    > and it does.

    >     

    >     From the other side this patch will allow following scenario

    >     for not detachable devices:

    > 

    >     

    >     	- add_port(05.00.0)

    >     	- configure()

    > 

    >     	- stop()

    >     	- close()

    >     	- detach(05.00.0) -----> Failure

    > 

    > Correct/expected.

    > 

    > 

    >     	- add_port(05.00.0)

    >     	- configure()

    >     	- start()

    >     

    >     At this point OvS will get closed device by name and will

    >     try to configure it and start again.

    >     

    >     My concern is that we can't be sure that it will continue

    >     work properly after close.

    >     

    >     According to DPDK API, *device can not be started back after

    >     close*. Unfortunately, there is no documentation about

    >     trying to reinitialize closed device. So, we can't

    >     rely on your assumption that device will work properly

    >     after that.

    > 

    > 

    > Using my patch with VFIO and 16.11; VFIO is not detachable in this environment.

    > There is no problem seen; packet rates are the same before and after del-port/re-add-port. 

    > 

    > Using your previous patch, this is what I see:

    > In this case, ‘attached’ will be false and on del-port, no resources will be freed -

    > _close will not be called and resources leaked.

    
    Resources are not actually leaked because you're able to reopen the device.

If that port is not restarted, resources will be leaked.
If the same port is restarted, some resources will be reused I believe, although I did
not trace all the resources.

    
    > So, you want to achieve blocking the _close call to free resources, in case we want

    > to add the port back and because we are not sure if the device can be restarted

    > after _close?

    
    Yes. I found at least one device that doesn't support hotplug and will not work
    after close.
    It is 'Chelsio T5' NIC (cxgbe). On close it will call 't4_fw_bye' to close
    connection with NICs firmware and it will not open it back because 't4_fw_hello'
    can be called only on 'eth_cxgbe_pci_probe'. Such behaviour reasonable for this
    driver because of some unusual NIC architecture (It has only 1 pci address for
    all it's ports).
    
    So, with this patch applied it will be impossible to use 'Chelsio T5' NIC with OvS.


ic, so ‘dev->attached’ in OVS is about supporting a particular ‘NIC’, or rather ‘ASIC’ in this case.
I checked the driver code; this could be a driver bug as ‘close’ will not provide lots
of benefit in this case if restart is impossible or maybe rte needs a CLOSABLE flag 
similar to the DETACHABLE flag.
Handling this kind of device specific behavior at the application layer (i.e. OVS) is 
not maintainable from OVS POV, dpdk POV or even the NIC/ASIC vendor POV.

    
    > Since we deleted the port, it is likely we won’t use it again, but we will want to use

    > other ports, presumably.

    
    That is not true because there are some actions that leads to removing and subsequent
    re-adding ports to the datapath. For example, setting the 'ofport_request':
    
    ovs-vsctl set Interface dpdk4 ofport_request=42
    ---> |bridge|INFO|bridge br10_4f0: deleted interface dpdk4 on port 1
         |netdev_dpdk|INFO|Device '0000:88:00.0' detached
         |netdev_dpdk|INFO|Device '0000:88:00.0' attached to DPDK
         |bridge|INFO|bridge br10_4f0: added interface dpdk4 on port 42

The point I was trying to make is that we don’t necessarily reuse the same device.
I should not say how ‘likely’ it is to re-add-port. 
    
    > 

    > Also, this would be a bug in rte/eal, if it was an issue, since we should be able to free

    > resources on deleting a port, right ?

    
    I don't think so because we passed this device as cmdline argument and we, actually,
    need to restart application to change cmdline arguments. And we have no issues
    with resource freeing if device wasn't passed via cmdline.

I have no idea what you are saying with this paragraph; I don’t need to restart OVS
to del-port and re-add-port, so maybe you can expand with a concrete example to 
explain what you want to communicate.

    
    > Freeing unused resources should not make it

    > impossible to reuse the device later by reattaching it, right ?

    
    The key word here is 'reattaching', but we're not reattaching it.
    You're just tries to reconfigure closed device while DPDK API
    says *"The device cannot be restarted!"*.

It is just a typo; I meant ‘restart’, as in the previous paragraph.
    
    > However, my earlier test shows this works with VFIO.

    
    At least 'cxgbe' will not work. I have no such NIC for testing, but it's
    clear from the code.
     
    >     

    >     > Fixes: 5dcde09c80a8 ("netdev-dpdk: Fix device leak on port deletion.")

    >     > CC: Ilya Maximets <i.maximets@samsung.com>

    >     > Signed-off-by: Darrell Ball dlu998@gmail.comf	ter

    >     > ---

    >     >  lib/netdev-dpdk.c | 14 +++-----------

    >     >  1 file changed, 3 insertions(+), 11 deletions(-)

    >     > 

    >     > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c

    >     > index b770b70..753345d 100644

    >     > --- a/lib/netdev-dpdk.c

    >     > +++ b/lib/netdev-dpdk.c

    >     > @@ -365,9 +365,6 @@ struct netdev_dpdk {

    >     >      /* Device arguments for dpdk ports */

    >     >      char *devargs;

    >     >  

    >     > -    /* If true, device was attached by rte_eth_dev_attach(). */

    >     > -    bool attached;

    >     > -

    >     >      /* In dpdk_list. */

    >     >      struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);

    >     >  

    >     > @@ -862,7 +859,6 @@ common_construct(struct netdev *netdev, dpdk_port_t port_no,

    >     >      dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);

    >     >      ovsrcu_index_init(&dev->vid, -1);

    >     >      dev->vhost_reconfigured = false;

    >     > -    dev->attached = false;

    >     >  

    >     >      ovsrcu_init(&dev->qos_conf, NULL);

    >     >  

    >     > @@ -1016,7 +1012,7 @@ netdev_dpdk_destruct(struct netdev *netdev)

    >     >  

    >     >      rte_eth_dev_stop(dev->port_id);

    >     >  

    >     > -    if (dev->attached) {

    >     > +    if (rte_eth_dev_is_valid_port(dev->port_id)) {

    >     >          rte_eth_dev_close(dev->port_id);

    >     >          if (rte_eth_dev_detach(dev->port_id, devname) < 0) {

    >     >              VLOG_ERR("Device '%s' can not be detached", dev->devargs);

    >     > @@ -1136,8 +1132,7 @@ netdev_dpdk_lookup_by_port_id(dpdk_port_t port_id)

    >     >  }

    >     >  

    >     >  static dpdk_port_t

    >     > -netdev_dpdk_process_devargs(struct netdev_dpdk *dev,

    >     > -                            const char *devargs, char **errp)

    >     > +netdev_dpdk_process_devargs(const char *devargs, char **errp)

    >     >  {

    >     >      /* Get the name up to the first comma. */

    >     >      char *name = xmemdup0(devargs, strcspn(devargs, ","));

    >     > @@ -1148,8 +1143,6 @@ netdev_dpdk_process_devargs(struct netdev_dpdk *dev,

    >     >              || !rte_eth_dev_is_valid_port(new_port_id)) {

    >     >          /* Device not found in DPDK, attempt to attach it */

    >     >          if (!rte_eth_dev_attach(devargs, &new_port_id)) {

    >     > -            /* Attach successful */

    >     > -            dev->attached = true;

    >     >              VLOG_INFO("Device '%s' attached to DPDK", devargs);

    >     >          } else {

    >     >              /* Attach unsuccessful */

    >     > @@ -1236,8 +1229,7 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args,

    >     >           * is valid */

    >     >          if (!(dev->devargs && !strcmp(dev->devargs, new_devargs)

    >     >                 && rte_eth_dev_is_valid_port(dev->port_id))) {

    >     > -            dpdk_port_t new_port_id = netdev_dpdk_process_devargs(dev,

    >     > -                                                                  new_devargs,

    >     > +            dpdk_port_t new_port_id = netdev_dpdk_process_devargs(new_devargs,

    >     >                                                                    errp);

    >     >              if (!rte_eth_dev_is_valid_port(new_port_id)) {

    >     >                  err = EINVAL;

    >     > 

    >     _______________________________________________

    >     dev mailing list

    >     dev@openvswitch.org
diff mbox

Patch

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index b770b70..753345d 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -365,9 +365,6 @@  struct netdev_dpdk {
     /* Device arguments for dpdk ports */
     char *devargs;
 
-    /* If true, device was attached by rte_eth_dev_attach(). */
-    bool attached;
-
     /* In dpdk_list. */
     struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
 
@@ -862,7 +859,6 @@  common_construct(struct netdev *netdev, dpdk_port_t port_no,
     dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
     ovsrcu_index_init(&dev->vid, -1);
     dev->vhost_reconfigured = false;
-    dev->attached = false;
 
     ovsrcu_init(&dev->qos_conf, NULL);
 
@@ -1016,7 +1012,7 @@  netdev_dpdk_destruct(struct netdev *netdev)
 
     rte_eth_dev_stop(dev->port_id);
 
-    if (dev->attached) {
+    if (rte_eth_dev_is_valid_port(dev->port_id)) {
         rte_eth_dev_close(dev->port_id);
         if (rte_eth_dev_detach(dev->port_id, devname) < 0) {
             VLOG_ERR("Device '%s' can not be detached", dev->devargs);
@@ -1136,8 +1132,7 @@  netdev_dpdk_lookup_by_port_id(dpdk_port_t port_id)
 }
 
 static dpdk_port_t
-netdev_dpdk_process_devargs(struct netdev_dpdk *dev,
-                            const char *devargs, char **errp)
+netdev_dpdk_process_devargs(const char *devargs, char **errp)
 {
     /* Get the name up to the first comma. */
     char *name = xmemdup0(devargs, strcspn(devargs, ","));
@@ -1148,8 +1143,6 @@  netdev_dpdk_process_devargs(struct netdev_dpdk *dev,
             || !rte_eth_dev_is_valid_port(new_port_id)) {
         /* Device not found in DPDK, attempt to attach it */
         if (!rte_eth_dev_attach(devargs, &new_port_id)) {
-            /* Attach successful */
-            dev->attached = true;
             VLOG_INFO("Device '%s' attached to DPDK", devargs);
         } else {
             /* Attach unsuccessful */
@@ -1236,8 +1229,7 @@  netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args,
          * is valid */
         if (!(dev->devargs && !strcmp(dev->devargs, new_devargs)
                && rte_eth_dev_is_valid_port(dev->port_id))) {
-            dpdk_port_t new_port_id = netdev_dpdk_process_devargs(dev,
-                                                                  new_devargs,
+            dpdk_port_t new_port_id = netdev_dpdk_process_devargs(new_devargs,
                                                                   errp);
             if (!rte_eth_dev_is_valid_port(new_port_id)) {
                 err = EINVAL;