diff mbox series

[net-next,5/6] net/ncsi: Reset channel state in ncsi_start_dev()

Message ID 20181018035917.19413-6-sam@mendozajonas.com
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series net/ncsi: Allow enabling multiple packages & channels | expand

Commit Message

Sam Mendoza-Jonas Oct. 18, 2018, 3:59 a.m. UTC
When the NCSI driver is stopped with ncsi_stop_dev() the channel
monitors are stopped and the state set to "inactive". However the
channels are still configured and active from the perspective of the
network controller. We should suspend each active channel but in the
context of ncsi_stop_dev() the transmit queue has been or is about to be
stopped so we won't have time to do so.

Instead when ncsi_start_dev() is called if the NCSI topology has already
been probed then call ncsi_reset_dev() to suspend any channels that were
previously active. This resets the network controller to a known state,
provides an up to date view of channel link state, and makes sure that
mode flags such as NCSI_MODE_TX_ENABLE are properly reset.

In addition to ncsi_start_dev() use ncsi_reset_dev() in ncsi-netlink.c
to update the channel configuration more cleanly.

Signed-off-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
---
 net/ncsi/internal.h     |  2 ++
 net/ncsi/ncsi-manage.c  | 59 +++++++++++++++++++++++++++++++++++++----
 net/ncsi/ncsi-netlink.c | 10 +++----
 3 files changed, 60 insertions(+), 11 deletions(-)

Comments

Justin.Lee1@Dell.com Oct. 30, 2018, 9:26 p.m. UTC | #1
> +int ncsi_reset_dev(struct ncsi_dev *nd)
> +{
> +	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
> +	struct ncsi_channel *nc, *active;
> +	struct ncsi_package *np;
> +	unsigned long flags;
> +	bool enabled;
> +	int state;
> +
> +	active = NULL;
> +	NCSI_FOR_EACH_PACKAGE(ndp, np) {
> +		NCSI_FOR_EACH_CHANNEL(np, nc) {
> +			spin_lock_irqsave(&nc->lock, flags);
> +			enabled = nc->monitor.enabled;
> +			state = nc->state;
> +			spin_unlock_irqrestore(&nc->lock, flags);
> +
> +			if (enabled)
> +				ncsi_stop_channel_monitor(nc);
> +			if (state == NCSI_CHANNEL_ACTIVE) {
> +				active = nc;
> +				break;

Is the original intention to process the channel one by one?
If it is the case, there are two loops and we might need to use
"goto found" instead.

> +			}
> +		}
> +	}
> +

found: ?

> +	if (!active) {
> +		/* Done */
> +		spin_lock_irqsave(&ndp->lock, flags);
> +		ndp->flags &= ~NCSI_DEV_RESET;
> +		spin_unlock_irqrestore(&ndp->lock, flags);
> +		return ncsi_choose_active_channel(ndp);
> +	}
> +
> +	spin_lock_irqsave(&ndp->lock, flags);
> +	ndp->flags |= NCSI_DEV_RESET;
> +	ndp->active_channel = active;
> +	ndp->active_package = active->package;
> +	spin_unlock_irqrestore(&ndp->lock, flags);
> +
> +	nd->state = ncsi_dev_state_suspend;
> +	schedule_work(&ndp->work);
> +	return 0;
> +}


Also similar issue in ncsi_choose_active_channel() function below.

> @@ -916,32 +1045,49 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
>  
>  			ncm = &nc->modes[NCSI_MODE_LINK];
>  			if (ncm->data[2] & 0x1) {
> -				spin_unlock_irqrestore(&nc->lock, flags);
>  				found = nc;
> -				goto out;
> +				with_link = true;
>  			}
>  
> -			spin_unlock_irqrestore(&nc->lock, flags);
> +			/* If multi_channel is enabled configure all valid
> +			 * channels whether or not they currently have link
> +			 * so they will have AENs enabled.
> +			 */
> +			if (with_link || np->multi_channel) {

I notice that there is a case that we will misconfigure the interface.
For example below, multi-channel is not enable for package 1.
But we enable the channel for ncsi2 below (package 1 channel 0) as that interface is the first
channel for that package with link.

cat /sys/kernel/debug/ncsi_protocol/ncsi_device_
IFIDX IFNAME NAME   PID CID RX TX MP MC WP WC PC CS PS LS RU CR NQ HA
=====================================================================
  2   eth2   ncsi0  000 000 1  1  1  1  1  1  0  2  1  1  1  1  0  1
  2   eth2   ncsi1  000 001 1  0  1  1  1  1  0  2  1  1  1  1  0  1
  2   eth2   ncsi2  001 000 1  0  1  0  1  1  0  2  1  1  1  1  0  1
  2   eth2   ncsi3  001 001 0  0  1  0  1  1  0  1  0  1  1  1  0  1
=====================================================================
MP: Multi-mode Package     WP: Whitelist Package
MC: Multi-mode Channel     WC: Whitelist Channel
PC: Primary Channel        CS: Channel State IA/A/IV 1/2/3
PS: Poll Status            LS: Link Status
RU: Running                CR: Carrier OK
NQ: Queue Stopped          HA: Hardware Arbitration

I temporally change to the following to avoid that.
			if ((with_link &&
			     !np->multi_channel &&
			     list_empty(&ndp->channel_queue)) || np->multi_channel) {

> +				spin_lock_irqsave(&ndp->lock, flags);
> +				list_add_tail_rcu(&nc->link,
> +						  &ndp->channel_queue);
> +				spin_unlock_irqrestore(&ndp->lock, flags);
> +
> +				netdev_dbg(ndp->ndev.dev,
> +					   "NCSI: Channel %u added to queue (link %s)\n",
> +					   nc->id,
> +					   ncm->data[2] & 0x1 ? "up" : "down");
> +			}
> +
> +			spin_unlock_irqrestore(&nc->lock, cflags);
> +
> +			if (with_link && !np->multi_channel)
> +				break;

Similar issue here. As we are using break, so each package will configure one active TX.

>  		}
> +		if (with_link && !ndp->multi_package)
> +			break;
>  	}
>  
> -	if (!found) {
> +	if (list_empty(&ndp->channel_queue) && found) {
> +		netdev_info(ndp->ndev.dev,
> +			    "NCSI: No channel with link found, configuring channel %u\n",
> +			    found->id);
> +		spin_lock_irqsave(&ndp->lock, flags);
> +		list_add_tail_rcu(&found->link, &ndp->channel_queue);
> +		spin_unlock_irqrestore(&ndp->lock, flags);
> +	} else if (!found) {
>  		netdev_warn(ndp->ndev.dev,
> -			    "NCSI: No channel found with link\n");
> +			    "NCSI: No channel found to configure!\n");
>  		ncsi_report_link(ndp, true);
>  		return -ENODEV;
>  	}


Also, for deselect package handler function, do we want to set to inactive here?
If we just change the state, the cached data still keeps the old value. If the new 
ncsi_reset_dev() function is handling one by one, can we skip this part?

static int ncsi_rsp_handler_dp(struct ncsi_request *nr)
{
	struct ncsi_rsp_pkt *rsp;
	struct ncsi_dev_priv *ndp = nr->ndp;
	struct ncsi_package *np;
	struct ncsi_channel *nc;
	unsigned long flags;

	/* Find the package */
	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
				      &np, NULL);
	if (!np)
		return -ENODEV;

	/* Change state of all channels attached to the package */
	NCSI_FOR_EACH_CHANNEL(np, nc) {
		spin_lock_irqsave(&nc->lock, flags);
		nc->state = NCSI_CHANNEL_INACTIVE;

		spin_unlock_irqrestore(&nc->lock, flags);
	}

	return 0;
}
Sam Mendoza-Jonas Nov. 1, 2018, 10:53 p.m. UTC | #2
On Tue, 2018-10-30 at 21:26 +0000, Justin.Lee1@Dell.com wrote:
> > +int ncsi_reset_dev(struct ncsi_dev *nd)
> > +{
> > +	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
> > +	struct ncsi_channel *nc, *active;
> > +	struct ncsi_package *np;
> > +	unsigned long flags;
> > +	bool enabled;
> > +	int state;
> > +
> > +	active = NULL;
> > +	NCSI_FOR_EACH_PACKAGE(ndp, np) {
> > +		NCSI_FOR_EACH_CHANNEL(np, nc) {
> > +			spin_lock_irqsave(&nc->lock, flags);
> > +			enabled = nc->monitor.enabled;
> > +			state = nc->state;
> > +			spin_unlock_irqrestore(&nc->lock, flags);
> > +
> > +			if (enabled)
> > +				ncsi_stop_channel_monitor(nc);
> > +			if (state == NCSI_CHANNEL_ACTIVE) {
> > +				active = nc;
> > +				break;
> 
> Is the original intention to process the channel one by one?
> If it is the case, there are two loops and we might need to use
> "goto found" instead.

Yes we'll need to break out of the package loop here as well.

> 
> > +			}
> > +		}
> > +	}
> > +
> 
> found: ?
> 
> > +	if (!active) {
> > +		/* Done */
> > +		spin_lock_irqsave(&ndp->lock, flags);
> > +		ndp->flags &= ~NCSI_DEV_RESET;
> > +		spin_unlock_irqrestore(&ndp->lock, flags);
> > +		return ncsi_choose_active_channel(ndp);
> > +	}
> > +
> > +	spin_lock_irqsave(&ndp->lock, flags);
> > +	ndp->flags |= NCSI_DEV_RESET;
> > +	ndp->active_channel = active;
> > +	ndp->active_package = active->package;
> > +	spin_unlock_irqrestore(&ndp->lock, flags);
> > +
> > +	nd->state = ncsi_dev_state_suspend;
> > +	schedule_work(&ndp->work);
> > +	return 0;
> > +}
> 
> Also similar issue in ncsi_choose_active_channel() function below.
> 
> > @@ -916,32 +1045,49 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
> >  
> >  			ncm = &nc->modes[NCSI_MODE_LINK];
> >  			if (ncm->data[2] & 0x1) {
> > -				spin_unlock_irqrestore(&nc->lock, flags);
> >  				found = nc;
> > -				goto out;
> > +				with_link = true;
> >  			}
> >  
> > -			spin_unlock_irqrestore(&nc->lock, flags);
> > +			/* If multi_channel is enabled configure all valid
> > +			 * channels whether or not they currently have link
> > +			 * so they will have AENs enabled.
> > +			 */
> > +			if (with_link || np->multi_channel) {
> 
> I notice that there is a case that we will misconfigure the interface.
> For example below, multi-channel is not enable for package 1.
> But we enable the channel for ncsi2 below (package 1 channel 0) as that interface is the first
> channel for that package with link.

I don't think I see the issue here; multi-channel is not set on package
1, but both channels are in the channel whitelist. Channel 0 is
configured since it's the first found on package 1, and channel 1 is not
since channel 0 is already found. Are you expecting something different?
 
> 
> cat /sys/kernel/debug/ncsi_protocol/ncsi_device_
> IFIDX IFNAME NAME   PID CID RX TX MP MC WP WC PC CS PS LS RU CR NQ HA
> =====================================================================
>   2   eth2   ncsi0  000 000 1  1  1  1  1  1  0  2  1  1  1  1  0  1
>   2   eth2   ncsi1  000 001 1  0  1  1  1  1  0  2  1  1  1  1  0  1
>   2   eth2   ncsi2  001 000 1  0  1  0  1  1  0  2  1  1  1  1  0  1
>   2   eth2   ncsi3  001 001 0  0  1  0  1  1  0  1  0  1  1  1  0  1
> =====================================================================
> MP: Multi-mode Package     WP: Whitelist Package
> MC: Multi-mode Channel     WC: Whitelist Channel
> PC: Primary Channel        CS: Channel State IA/A/IV 1/2/3
> PS: Poll Status            LS: Link Status
> RU: Running                CR: Carrier OK
> NQ: Queue Stopped          HA: Hardware Arbitration
> 
> I temporally change to the following to avoid that.
> 			if ((with_link &&
> 			     !np->multi_channel &&
> 			     list_empty(&ndp->channel_queue)) || np->multi_channel) {
> 
> > +				spin_lock_irqsave(&ndp->lock, flags);
> > +				list_add_tail_rcu(&nc->link,
> > +						  &ndp->channel_queue);
> > +				spin_unlock_irqrestore(&ndp->lock, flags);
> > +
> > +				netdev_dbg(ndp->ndev.dev,
> > +					   "NCSI: Channel %u added to queue (link %s)\n",
> > +					   nc->id,
> > +					   ncm->data[2] & 0x1 ? "up" : "down");
> > +			}
> > +
> > +			spin_unlock_irqrestore(&nc->lock, cflags);
> > +
> > +			if (with_link && !np->multi_channel)
> > +				break;
> 
> Similar issue here. As we are using break, so each package will configure one active TX.
> 

I believe this is handled properly in ncsi_channel_is_tx() in the most
recent revision.

> >  		}
> > +		if (with_link && !ndp->multi_package)
> > +			break;
> >  	}
> >  
> > -	if (!found) {
> > +	if (list_empty(&ndp->channel_queue) && found) {
> > +		netdev_info(ndp->ndev.dev,
> > +			    "NCSI: No channel with link found, configuring channel %u\n",
> > +			    found->id);
> > +		spin_lock_irqsave(&ndp->lock, flags);
> > +		list_add_tail_rcu(&found->link, &ndp->channel_queue);
> > +		spin_unlock_irqrestore(&ndp->lock, flags);
> > +	} else if (!found) {
> >  		netdev_warn(ndp->ndev.dev,
> > -			    "NCSI: No channel found with link\n");
> > +			    "NCSI: No channel found to configure!\n");
> >  		ncsi_report_link(ndp, true);
> >  		return -ENODEV;
> >  	}
> 
> Also, for deselect package handler function, do we want to set to inactive here?
> If we just change the state, the cached data still keeps the old value. If the new 
> ncsi_reset_dev() function is handling one by one, can we skip this part?

Technically yes we could skip the state change here since
ncsi_reset_dev() will have already done it. However if we send a DP
command via some other means then it is probably best to ensure we treat
all channels on that package as inactive.

> 
> static int ncsi_rsp_handler_dp(struct ncsi_request *nr)
> {
> 	struct ncsi_rsp_pkt *rsp;
> 	struct ncsi_dev_priv *ndp = nr->ndp;
> 	struct ncsi_package *np;
> 	struct ncsi_channel *nc;
> 	unsigned long flags;
> 
> 	/* Find the package */
> 	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
> 	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
> 				      &np, NULL);
> 	if (!np)
> 		return -ENODEV;
> 
> 	/* Change state of all channels attached to the package */
> 	NCSI_FOR_EACH_CHANNEL(np, nc) {
> 		spin_lock_irqsave(&nc->lock, flags);
> 		nc->state = NCSI_CHANNEL_INACTIVE;
> 
> 		spin_unlock_irqrestore(&nc->lock, flags);
> 	}
> 
> 	return 0;
> }
> 
>
Justin.Lee1@Dell.com Nov. 5, 2018, 6:01 p.m. UTC | #3
> On Tue, 2018-10-30 at 21:26 +0000, Justin.Lee1@Dell.com wrote:
> > > +int ncsi_reset_dev(struct ncsi_dev *nd)
> > > +{
> > > +	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
> > > +	struct ncsi_channel *nc, *active;
> > > +	struct ncsi_package *np;
> > > +	unsigned long flags;
> > > +	bool enabled;
> > > +	int state;
> > > +
> > > +	active = NULL;
> > > +	NCSI_FOR_EACH_PACKAGE(ndp, np) {
> > > +		NCSI_FOR_EACH_CHANNEL(np, nc) {
> > > +			spin_lock_irqsave(&nc->lock, flags);
> > > +			enabled = nc->monitor.enabled;
> > > +			state = nc->state;
> > > +			spin_unlock_irqrestore(&nc->lock, flags);
> > > +
> > > +			if (enabled)
> > > +				ncsi_stop_channel_monitor(nc);
> > > +			if (state == NCSI_CHANNEL_ACTIVE) {
> > > +				active = nc;
> > > +				break;
> > 
> > Is the original intention to process the channel one by one?
> > If it is the case, there are two loops and we might need to use
> > "goto found" instead.
> 
> Yes we'll need to break out of the package loop here as well.
> 
> > 
> > > +			}
> > > +		}
> > > +	}
> > > +
> > 
> > found: ?
> > 
> > > +	if (!active) {
> > > +		/* Done */
> > > +		spin_lock_irqsave(&ndp->lock, flags);
> > > +		ndp->flags &= ~NCSI_DEV_RESET;
> > > +		spin_unlock_irqrestore(&ndp->lock, flags);
> > > +		return ncsi_choose_active_channel(ndp);
> > > +	}
> > > +
> > > +	spin_lock_irqsave(&ndp->lock, flags);
> > > +	ndp->flags |= NCSI_DEV_RESET;
> > > +	ndp->active_channel = active;
> > > +	ndp->active_package = active->package;
> > > +	spin_unlock_irqrestore(&ndp->lock, flags);
> > > +
> > > +	nd->state = ncsi_dev_state_suspend;
> > > +	schedule_work(&ndp->work);
> > > +	return 0;
> > > +}
> > 
> > Also similar issue in ncsi_choose_active_channel() function below.
> > 
> > > @@ -916,32 +1045,49 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
> > >  
> > >  			ncm = &nc->modes[NCSI_MODE_LINK];
> > >  			if (ncm->data[2] & 0x1) {
> > > -				spin_unlock_irqrestore(&nc->lock, flags);
> > >  				found = nc;
> > > -				goto out;
> > > +				with_link = true;
> > >  			}
> > >  
> > > -			spin_unlock_irqrestore(&nc->lock, flags);
> > > +			/* If multi_channel is enabled configure all valid
> > > +			 * channels whether or not they currently have link
> > > +			 * so they will have AENs enabled.
> > > +			 */
> > > +			if (with_link || np->multi_channel) {
> > 
> > I notice that there is a case that we will misconfigure the interface.
> > For example below, multi-channel is not enable for package 1.
> > But we enable the channel for ncsi2 below (package 1 channel 0) as that interface is the first
> > channel for that package with link.
> 
> I don't think I see the issue here; multi-channel is not set on package
> 1, but both channels are in the channel whitelist. Channel 0 is
> configured since it's the first found on package 1, and channel 1 is not
> since channel 0 is already found. Are you expecting something different?
>  

The setting is that multi-package is enable for both package 0 and 1.
Multi-channel is only enabled for package 0.

> > 
> > cat /sys/kernel/debug/ncsi_protocol/ncsi_device_
> > IFIDX IFNAME NAME   PID CID RX TX MP MC WP WC PC CS PS LS RU CR NQ HA
> > =====================================================================
> >   2   eth2   ncsi0  000 000 1  1  1  1  1  1  0  2  1  1  1  1  0  1
> >   2   eth2   ncsi1  000 001 1  0  1  1  1  1  0  2  1  1  1  1  0  1
> >   2   eth2   ncsi2  001 000 1  0  1  0  1  1  0  2  1  1  1  1  0  1

I was replying to the wrong old email and it might cause a bit confusion.
The first 1 meaning channel is enabled for package 1 channel 0 (ncsi2). 
For eth2, we already has ncsi0 as the active channel with TX enable.
I would think that package doesn't have the multi-channel enabled and
we should not enable the channel for ncsi2. The problem is that package 1 doesn't
enable the multi-channel and it believes it needs to enable one channel for its package 
but it doesn't aware that the other package already has one active channel.

> >   2   eth2   ncsi3  001 001 0  0  1  0  1  1  0  1  0  1  1  1  0  1
> > =====================================================================
> > MP: Multi-mode Package     WP: Whitelist Package
> > MC: Multi-mode Channel     WC: Whitelist Channel
> > PC: Primary Channel        CS: Channel State IA/A/IV 1/2/3
> > PS: Poll Status            LS: Link Status
> > RU: Running                CR: Carrier OK
> > NQ: Queue Stopped          HA: Hardware Arbitration
> > 
> > I temporally change to the following to avoid that.
> > 			if ((with_link &&
> > 			     !np->multi_channel &&
> > 			     list_empty(&ndp->channel_queue)) || np->multi_channel) {
> > 
> > > +				spin_lock_irqsave(&ndp->lock, flags);
> > > +				list_add_tail_rcu(&nc->link,
> > > +						  &ndp->channel_queue);
> > > +				spin_unlock_irqrestore(&ndp->lock, flags);
> > > +
> > > +				netdev_dbg(ndp->ndev.dev,
> > > +					   "NCSI: Channel %u added to queue (link %s)\n",
> > > +					   nc->id,
> > > +					   ncm->data[2] & 0x1 ? "up" : "down");
> > > +			}
> > > +
> > > +			spin_unlock_irqrestore(&nc->lock, cflags);
> > > +
> > > +			if (with_link && !np->multi_channel)
> > > +				break;
> > 
> > Similar issue here. As we are using break, so each package will configure one active TX.
> > 
> 
> I believe this is handled properly in ncsi_channel_is_tx() in the most
> recent revision.

I saw this issue with the last revision. I was using the wrong email to reply.

> 
> > >  		}
> > > +		if (with_link && !ndp->multi_package)
> > > +			break;
> > >  	}
> > >  
> > > -	if (!found) {
> > > +	if (list_empty(&ndp->channel_queue) && found) {
> > > +		netdev_info(ndp->ndev.dev,
> > > +			    "NCSI: No channel with link found, configuring channel %u\n",
> > > +			    found->id);
> > > +		spin_lock_irqsave(&ndp->lock, flags);
> > > +		list_add_tail_rcu(&found->link, &ndp->channel_queue);
> > > +		spin_unlock_irqrestore(&ndp->lock, flags);
> > > +	} else if (!found) {
> > >  		netdev_warn(ndp->ndev.dev,
> > > -			    "NCSI: No channel found with link\n");
> > > +			    "NCSI: No channel found to configure!\n");
> > >  		ncsi_report_link(ndp, true);
> > >  		return -ENODEV;
> > >  	}
> > 
> > Also, for deselect package handler function, do we want to set to inactive here?
> > If we just change the state, the cached data still keeps the old value. If the new 
> > ncsi_reset_dev() function is handling one by one, can we skip this part?
> 
> Technically yes we could skip the state change here since
> ncsi_reset_dev() will have already done it. However if we send a DP
> command via some other means then it is probably best to ensure we treat
> all channels on that package as inactive.

When I tested, if I didn't comment out the state change in response handler,
ncsi_reset_dev() function will not handle properly and some channels got into
invisible state and at the end we lost those selectable channels.

> 
> > 
> > static int ncsi_rsp_handler_dp(struct ncsi_request *nr)
> > {
> > 	struct ncsi_rsp_pkt *rsp;
> > 	struct ncsi_dev_priv *ndp = nr->ndp;
> > 	struct ncsi_package *np;
> > 	struct ncsi_channel *nc;
> > 	unsigned long flags;
> > 
> > 	/* Find the package */
> > 	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
> > 	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
> > 				      &np, NULL);
> > 	if (!np)
> > 		return -ENODEV;
> > 
> > 	/* Change state of all channels attached to the package */
> > 	NCSI_FOR_EACH_CHANNEL(np, nc) {
> > 		spin_lock_irqsave(&nc->lock, flags);
> > 		nc->state = NCSI_CHANNEL_INACTIVE;
> > 
> > 		spin_unlock_irqrestore(&nc->lock, flags);
> > 	}
> > 
> > 	return 0;
> > }
> > 
> >
Sam Mendoza-Jonas Nov. 6, 2018, 12:28 a.m. UTC | #4
On Mon, 2018-11-05 at 18:01 +0000, Justin.Lee1@Dell.com wrote:
> > On Tue, 2018-10-30 at 21:26 +0000, Justin.Lee1@Dell.com wrote:
> > > > +int ncsi_reset_dev(struct ncsi_dev *nd)
> > > > +{
> > > > +	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
> > > > +	struct ncsi_channel *nc, *active;
> > > > +	struct ncsi_package *np;
> > > > +	unsigned long flags;
> > > > +	bool enabled;
> > > > +	int state;
> > > > +
> > > > +	active = NULL;
> > > > +	NCSI_FOR_EACH_PACKAGE(ndp, np) {
> > > > +		NCSI_FOR_EACH_CHANNEL(np, nc) {
> > > > +			spin_lock_irqsave(&nc->lock, flags);
> > > > +			enabled = nc->monitor.enabled;
> > > > +			state = nc->state;
> > > > +			spin_unlock_irqrestore(&nc->lock, flags);
> > > > +
> > > > +			if (enabled)
> > > > +				ncsi_stop_channel_monitor(nc);
> > > > +			if (state == NCSI_CHANNEL_ACTIVE) {
> > > > +				active = nc;
> > > > +				break;
> > > 
> > > Is the original intention to process the channel one by one?
> > > If it is the case, there are two loops and we might need to use
> > > "goto found" instead.
> > 
> > Yes we'll need to break out of the package loop here as well.
> > 
> > > > +			}
> > > > +		}
> > > > +	}
> > > > +
> > > 
> > > found: ?
> > > 
> > > > +	if (!active) {
> > > > +		/* Done */
> > > > +		spin_lock_irqsave(&ndp->lock, flags);
> > > > +		ndp->flags &= ~NCSI_DEV_RESET;
> > > > +		spin_unlock_irqrestore(&ndp->lock, flags);
> > > > +		return ncsi_choose_active_channel(ndp);
> > > > +	}
> > > > +
> > > > +	spin_lock_irqsave(&ndp->lock, flags);
> > > > +	ndp->flags |= NCSI_DEV_RESET;
> > > > +	ndp->active_channel = active;
> > > > +	ndp->active_package = active->package;
> > > > +	spin_unlock_irqrestore(&ndp->lock, flags);
> > > > +
> > > > +	nd->state = ncsi_dev_state_suspend;
> > > > +	schedule_work(&ndp->work);
> > > > +	return 0;
> > > > +}
> > > 
> > > Also similar issue in ncsi_choose_active_channel() function below.
> > > 
> > > > @@ -916,32 +1045,49 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
> > > >  
> > > >  			ncm = &nc->modes[NCSI_MODE_LINK];
> > > >  			if (ncm->data[2] & 0x1) {
> > > > -				spin_unlock_irqrestore(&nc->lock, flags);
> > > >  				found = nc;
> > > > -				goto out;
> > > > +				with_link = true;
> > > >  			}
> > > >  
> > > > -			spin_unlock_irqrestore(&nc->lock, flags);
> > > > +			/* If multi_channel is enabled configure all valid
> > > > +			 * channels whether or not they currently have link
> > > > +			 * so they will have AENs enabled.
> > > > +			 */
> > > > +			if (with_link || np->multi_channel) {
> > > 
> > > I notice that there is a case that we will misconfigure the interface.
> > > For example below, multi-channel is not enable for package 1.
> > > But we enable the channel for ncsi2 below (package 1 channel 0) as that interface is the first
> > > channel for that package with link.
> > 
> > I don't think I see the issue here; multi-channel is not set on package
> > 1, but both channels are in the channel whitelist. Channel 0 is
> > configured since it's the first found on package 1, and channel 1 is not
> > since channel 0 is already found. Are you expecting something different?
> >  
> 
> The setting is that multi-package is enable for both package 0 and 1.
> Multi-channel is only enabled for package 0.
> 
> > > cat /sys/kernel/debug/ncsi_protocol/ncsi_device_
> > > IFIDX IFNAME NAME   PID CID RX TX MP MC WP WC PC CS PS LS RU CR NQ HA
> > > =====================================================================
> > >   2   eth2   ncsi0  000 000 1  1  1  1  1  1  0  2  1  1  1  1  0  1
> > >   2   eth2   ncsi1  000 001 1  0  1  1  1  1  0  2  1  1  1  1  0  1
> > >   2   eth2   ncsi2  001 000 1  0  1  0  1  1  0  2  1  1  1  1  0  1
> 
> I was replying to the wrong old email and it might cause a bit confusion.
> The first 1 meaning channel is enabled for package 1 channel 0 (ncsi2). 
> For eth2, we already has ncsi0 as the active channel with TX enable.
> I would think that package doesn't have the multi-channel enabled and
> we should not enable the channel for ncsi2. The problem is that package 1 doesn't
> enable the multi-channel and it believes it needs to enable one channel for its package 
> but it doesn't aware that the other package already has one active channel.

Ah, maybe the confusion here is that multi_channel is a per-package
setting; it determines what a package does with its own channels.

So you have package 0 with multi-channel enabled so it enables channels 0
& 1.
Then you have package 1 without multi-channel so it enables only channel
0.
There is still only one Tx channel (package 0, channel 0).

Does that sound right, or have I missed something?

> 
> > >   2   eth2   ncsi3  001 001 0  0  1  0  1  1  0  1  0  1  1  1  0  1
> > > =====================================================================
> > > MP: Multi-mode Package     WP: Whitelist Package
> > > MC: Multi-mode Channel     WC: Whitelist Channel
> > > PC: Primary Channel        CS: Channel State IA/A/IV 1/2/3
> > > PS: Poll Status            LS: Link Status
> > > RU: Running                CR: Carrier OK
> > > NQ: Queue Stopped          HA: Hardware Arbitration
> > > 
> > > I temporally change to the following to avoid that.
> > > 			if ((with_link &&
> > > 			     !np->multi_channel &&
> > > 			     list_empty(&ndp->channel_queue)) || np->multi_channel) {
> > > 
> > > > +				spin_lock_irqsave(&ndp->lock, flags);
> > > > +				list_add_tail_rcu(&nc->link,
> > > > +						  &ndp->channel_queue);
> > > > +				spin_unlock_irqrestore(&ndp->lock, flags);
> > > > +
> > > > +				netdev_dbg(ndp->ndev.dev,
> > > > +					   "NCSI: Channel %u added to queue (link %s)\n",
> > > > +					   nc->id,
> > > > +					   ncm->data[2] & 0x1 ? "up" : "down");
> > > > +			}
> > > > +
> > > > +			spin_unlock_irqrestore(&nc->lock, cflags);
> > > > +
> > > > +			if (with_link && !np->multi_channel)
> > > > +				break;
> > > 
> > > Similar issue here. As we are using break, so each package will configure one active TX.
> > > 
> > 
> > I believe this is handled properly in ncsi_channel_is_tx() in the most
> > recent revision.
> 
> I saw this issue with the last revision. I was using the wrong email to reply.
> 
> > > >  		}
> > > > +		if (with_link && !ndp->multi_package)
> > > > +			break;
> > > >  	}
> > > >  
> > > > -	if (!found) {
> > > > +	if (list_empty(&ndp->channel_queue) && found) {
> > > > +		netdev_info(ndp->ndev.dev,
> > > > +			    "NCSI: No channel with link found, configuring channel %u\n",
> > > > +			    found->id);
> > > > +		spin_lock_irqsave(&ndp->lock, flags);
> > > > +		list_add_tail_rcu(&found->link, &ndp->channel_queue);
> > > > +		spin_unlock_irqrestore(&ndp->lock, flags);
> > > > +	} else if (!found) {
> > > >  		netdev_warn(ndp->ndev.dev,
> > > > -			    "NCSI: No channel found with link\n");
> > > > +			    "NCSI: No channel found to configure!\n");
> > > >  		ncsi_report_link(ndp, true);
> > > >  		return -ENODEV;
> > > >  	}
> > > 
> > > Also, for deselect package handler function, do we want to set to inactive here?
> > > If we just change the state, the cached data still keeps the old value. If the new 
> > > ncsi_reset_dev() function is handling one by one, can we skip this part?
> > 
> > Technically yes we could skip the state change here since
> > ncsi_reset_dev() will have already done it. However if we send a DP
> > command via some other means then it is probably best to ensure we treat
> > all channels on that package as inactive.
> 
> When I tested, if I didn't comment out the state change in response handler,
> ncsi_reset_dev() function will not handle properly and some channels got into
> invisible state and at the end we lost those selectable channels.

Well that's not good :)
The deselect package command should only be sent once all channels on a
package have become inactive, so I wouldn't expect it to interfere with
ncsi_reset_dev(), but perhaps this gets combined with the issues sending
netlink commands close together. I'll investigate, if I'm lucky this will
be resolved with the same fixes.

> 
> > > static int ncsi_rsp_handler_dp(struct ncsi_request *nr)
> > > {
> > > 	struct ncsi_rsp_pkt *rsp;
> > > 	struct ncsi_dev_priv *ndp = nr->ndp;
> > > 	struct ncsi_package *np;
> > > 	struct ncsi_channel *nc;
> > > 	unsigned long flags;
> > > 
> > > 	/* Find the package */
> > > 	rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
> > > 	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
> > > 				      &np, NULL);
> > > 	if (!np)
> > > 		return -ENODEV;
> > > 
> > > 	/* Change state of all channels attached to the package */
> > > 	NCSI_FOR_EACH_CHANNEL(np, nc) {
> > > 		spin_lock_irqsave(&nc->lock, flags);
> > > 		nc->state = NCSI_CHANNEL_INACTIVE;
> > > 
> > > 		spin_unlock_irqrestore(&nc->lock, flags);
> > > 	}
> > > 
> > > 	return 0;
> > > }
> > > 
> > > 
> 
>
Justin.Lee1@Dell.com Nov. 6, 2018, 5:27 p.m. UTC | #5
> On Mon, 2018-11-05 at 18:01 +0000, Justin.Lee1@Dell.com wrote:
> > > On Tue, 2018-10-30 at 21:26 +0000, Justin.Lee1@Dell.com wrote:
> > > > > +int ncsi_reset_dev(struct ncsi_dev *nd)
> > > > > +{
> > > > > +	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
> > > > > +	struct ncsi_channel *nc, *active;
> > > > > +	struct ncsi_package *np;
> > > > > +	unsigned long flags;
> > > > > +	bool enabled;
> > > > > +	int state;
> > > > > +
> > > > > +	active = NULL;
> > > > > +	NCSI_FOR_EACH_PACKAGE(ndp, np) {
> > > > > +		NCSI_FOR_EACH_CHANNEL(np, nc) {
> > > > > +			spin_lock_irqsave(&nc->lock, flags);
> > > > > +			enabled = nc->monitor.enabled;
> > > > > +			state = nc->state;
> > > > > +			spin_unlock_irqrestore(&nc->lock, flags);
> > > > > +
> > > > > +			if (enabled)
> > > > > +				ncsi_stop_channel_monitor(nc);
> > > > > +			if (state == NCSI_CHANNEL_ACTIVE) {
> > > > > +				active = nc;
> > > > > +				break;
> > > > 
> > > > Is the original intention to process the channel one by one?
> > > > If it is the case, there are two loops and we might need to use
> > > > "goto found" instead.
> > > 
> > > Yes we'll need to break out of the package loop here as well.
> > > 
> > > > > +			}
> > > > > +		}
> > > > > +	}
> > > > > +
> > > > 
> > > > found: ?
> > > > 
> > > > > +	if (!active) {
> > > > > +		/* Done */
> > > > > +		spin_lock_irqsave(&ndp->lock, flags);
> > > > > +		ndp->flags &= ~NCSI_DEV_RESET;
> > > > > +		spin_unlock_irqrestore(&ndp->lock, flags);
> > > > > +		return ncsi_choose_active_channel(ndp);
> > > > > +	}
> > > > > +
> > > > > +	spin_lock_irqsave(&ndp->lock, flags);
> > > > > +	ndp->flags |= NCSI_DEV_RESET;
> > > > > +	ndp->active_channel = active;
> > > > > +	ndp->active_package = active->package;
> > > > > +	spin_unlock_irqrestore(&ndp->lock, flags);
> > > > > +
> > > > > +	nd->state = ncsi_dev_state_suspend;
> > > > > +	schedule_work(&ndp->work);
> > > > > +	return 0;
> > > > > +}
> > > > 
> > > > Also similar issue in ncsi_choose_active_channel() function below.
> > > > 
> > > > > @@ -916,32 +1045,49 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
> > > > >  
> > > > >  			ncm = &nc->modes[NCSI_MODE_LINK];
> > > > >  			if (ncm->data[2] & 0x1) {
> > > > > -				spin_unlock_irqrestore(&nc->lock, flags);
> > > > >  				found = nc;
> > > > > -				goto out;
> > > > > +				with_link = true;
> > > > >  			}
> > > > >  
> > > > > -			spin_unlock_irqrestore(&nc->lock, flags);
> > > > > +			/* If multi_channel is enabled configure all valid
> > > > > +			 * channels whether or not they currently have link
> > > > > +			 * so they will have AENs enabled.
> > > > > +			 */
> > > > > +			if (with_link || np->multi_channel) {
> > > > 
> > > > I notice that there is a case that we will misconfigure the interface.
> > > > For example below, multi-channel is not enable for package 1.
> > > > But we enable the channel for ncsi2 below (package 1 channel 0) as that interface is the first
> > > > channel for that package with link.
> > > 
> > > I don't think I see the issue here; multi-channel is not set on package
> > > 1, but both channels are in the channel whitelist. Channel 0 is
> > > configured since it's the first found on package 1, and channel 1 is not
> > > since channel 0 is already found. Are you expecting something different?
> > >  
> > 
> > The setting is that multi-package is enable for both package 0 and 1.
> > Multi-channel is only enabled for package 0.
> > 
> > > > cat /sys/kernel/debug/ncsi_protocol/ncsi_device_
> > > > IFIDX IFNAME NAME   PID CID RX TX MP MC WP WC PC CS PS LS RU CR NQ HA
> > > > =====================================================================
> > > >   2   eth2   ncsi0  000 000 1  1  1  1  1  1  0  2  1  1  1  1  0  1
> > > >   2   eth2   ncsi1  000 001 1  0  1  1  1  1  0  2  1  1  1  1  0  1
> > > >   2   eth2   ncsi2  001 000 1  0  1  0  1  1  0  2  1  1  1  1  0  1
> > 
> > I was replying to the wrong old email and it might cause a bit confusion.
> > The first 1 meaning channel is enabled for package 1 channel 0 (ncsi2). 
> > For eth2, we already has ncsi0 as the active channel with TX enable.
> > I would think that package doesn't have the multi-channel enabled and
> > we should not enable the channel for ncsi2. The problem is that package 1 doesn't
> > enable the multi-channel and it believes it needs to enable one channel for its package 
> > but it doesn't aware that the other package already has one active channel.
> 
> Ah, maybe the confusion here is that multi_channel is a per-package
> setting; it determines what a package does with its own channels.
> 
> So you have package 0 with multi-channel enabled so it enables channels 0
> & 1.
> Then you have package 1 without multi-channel so it enables only channel
> 0.
> There is still only one Tx channel (package 0, channel 0).
> 
> Does that sound right, or have I missed something?

Yes, you are right. There is only one TX enabled. 
If we can hold off a few seconds before applying, then we will not see 
these configuration changes in between the back to back netlink commands.

Thanks, 
Justin
diff mbox series

Patch

diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 592e3cd40728..ed68ca677e41 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -279,6 +279,7 @@  struct ncsi_dev_priv {
 #define NCSI_DEV_PROBED		1            /* Finalized NCSI topology    */
 #define NCSI_DEV_HWA		2            /* Enabled HW arbitration     */
 #define NCSI_DEV_RESHUFFLE	4
+#define NCSI_DEV_RESET		8            /* Reset state of NC          */
 	spinlock_t          lock;            /* Protect the NCSI device    */
 #if IS_ENABLED(CONFIG_IPV6)
 	unsigned int        inet6_addr_num;  /* Number of IPv6 addresses   */
@@ -333,6 +334,7 @@  extern spinlock_t ncsi_dev_lock;
 	list_for_each_entry_rcu(nc, &np->channels, node)
 
 /* Resources */
+int ncsi_reset_dev(struct ncsi_dev *nd);
 void ncsi_start_channel_monitor(struct ncsi_channel *nc);
 void ncsi_stop_channel_monitor(struct ncsi_channel *nc);
 struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 632caeea672f..af23e2cc6c1d 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -550,8 +550,10 @@  static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
 		spin_lock_irqsave(&nc->lock, flags);
 		nc->state = NCSI_CHANNEL_INACTIVE;
 		spin_unlock_irqrestore(&nc->lock, flags);
-		ncsi_process_next_channel(ndp);
-
+		if (ndp->flags & NCSI_DEV_RESET)
+			ncsi_reset_dev(nd);
+		else
+			ncsi_process_next_channel(ndp);
 		break;
 	default:
 		netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n",
@@ -1472,7 +1474,7 @@  int ncsi_start_dev(struct ncsi_dev *nd)
 		return 0;
 	}
 
-	return ncsi_choose_active_channel(nd);
+	return ncsi_reset_dev(nd);
 }
 EXPORT_SYMBOL_GPL(ncsi_start_dev);
 
@@ -1485,7 +1487,10 @@  void ncsi_stop_dev(struct ncsi_dev *nd)
 	int old_state;
 	unsigned long flags;
 
-	/* Stop the channel monitor and reset channel's state */
+	/* Stop the channel monitor on any active channels. Don't reset the
+	 * channel state so we know which were active when ncsi_start_dev()
+	 * is next called.
+	 */
 	NCSI_FOR_EACH_PACKAGE(ndp, np) {
 		NCSI_FOR_EACH_CHANNEL(np, nc) {
 			ncsi_stop_channel_monitor(nc);
@@ -1493,7 +1498,6 @@  void ncsi_stop_dev(struct ncsi_dev *nd)
 			spin_lock_irqsave(&nc->lock, flags);
 			chained = !list_empty(&nc->link);
 			old_state = nc->state;
-			nc->state = NCSI_CHANNEL_INACTIVE;
 			spin_unlock_irqrestore(&nc->lock, flags);
 
 			WARN_ON_ONCE(chained ||
@@ -1506,6 +1510,51 @@  void ncsi_stop_dev(struct ncsi_dev *nd)
 }
 EXPORT_SYMBOL_GPL(ncsi_stop_dev);
 
+int ncsi_reset_dev(struct ncsi_dev *nd)
+{
+	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
+	struct ncsi_channel *nc, *active;
+	struct ncsi_package *np;
+	unsigned long flags;
+	bool enabled;
+	int state;
+
+	active = NULL;
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			spin_lock_irqsave(&nc->lock, flags);
+			enabled = nc->monitor.enabled;
+			state = nc->state;
+			spin_unlock_irqrestore(&nc->lock, flags);
+
+			if (enabled)
+				ncsi_stop_channel_monitor(nc);
+			if (state == NCSI_CHANNEL_ACTIVE) {
+				active = nc;
+				break;
+			}
+		}
+	}
+
+	if (!active) {
+		/* Done */
+		spin_lock_irqsave(&ndp->lock, flags);
+		ndp->flags &= ~NCSI_DEV_RESET;
+		spin_unlock_irqrestore(&ndp->lock, flags);
+		return ncsi_choose_active_channel(ndp);
+	}
+
+	spin_lock_irqsave(&ndp->lock, flags);
+	ndp->flags |= NCSI_DEV_RESET;
+	ndp->active_channel = active;
+	ndp->active_package = active->package;
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	nd->state = ncsi_dev_state_suspend;
+	schedule_work(&ndp->work);
+	return 0;
+}
+
 void ncsi_unregister_dev(struct ncsi_dev *nd)
 {
 	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index 33314381b4f5..06bb8bc2c798 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -330,9 +330,8 @@  static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info)
 		    package_id, channel_id,
 		    channel_id == NCSI_RESERVED_CHANNEL ? " (any)" : "");
 
-	/* Bounce the NCSI channel to set changes */
-	ncsi_stop_dev(&ndp->ndev);
-	ncsi_start_dev(&ndp->ndev);
+	/* Update channel configuration */
+	ncsi_reset_dev(&ndp->ndev);
 
 	return 0;
 }
@@ -360,9 +359,8 @@  static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info)
 	spin_unlock_irqrestore(&ndp->lock, flags);
 	netdev_info(ndp->ndev.dev, "NCSI: Cleared preferred package/channel\n");
 
-	/* Bounce the NCSI channel to set changes */
-	ncsi_stop_dev(&ndp->ndev);
-	ncsi_start_dev(&ndp->ndev);
+	/* Update channel configuration */
+	ncsi_reset_dev(&ndp->ndev);
 
 	return 0;
 }