[RFC] net/ncsi: Reset state if no channels are available

Message ID 20171121055003.22039-1-sam@mendozajonas.com
State RFC, archived
Headers show
Series
  • [RFC] net/ncsi: Reset state if no channels are available
Related show

Commit Message

Samuel Mendoza-Jonas Nov. 21, 2017, 5:50 a.m.
A number of times a machine has been found in a state where the NCSI
interface never comes up because no channels are available. Once this
state is reached however it is impossible to leave it as the NCSI driver
never re-checks link or topology.
If this state is recognised reset the driver to a pre-probed state so
next time ncsi_start_dev() is called it will interrogate the remote NCSI
interface again.

Signed-off-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
---
This is RFC at the moment mostly due to difficulty testing - this is
specifically aimed at BMCs that fail to bring network up, eg:

[   35.960000] ftgmac100 1e660000.ethernet eth0: NCSI: No channel found with link
[   35.970000] ftgmac100 1e660000.ethernet eth0: NCSI interface down

We don't have a solid way to recreate that problem, however
ncsi_reset_dev() which this implements only triggers in the odd case
where we have no channels. I'll continue to try and recreate with this
patch.

 include/net/ncsi.h     |  5 +++++
 net/ncsi/ncsi-manage.c | 39 ++++++++++++++++++++++++++++++++++++---
 2 files changed, 41 insertions(+), 3 deletions(-)

Patch

diff --git a/include/net/ncsi.h b/include/net/ncsi.h
index fbefe80361ee..33ac21008680 100644
--- a/include/net/ncsi.h
+++ b/include/net/ncsi.h
@@ -34,6 +34,7 @@  int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid);
 struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 				   void (*notifier)(struct ncsi_dev *nd));
 int ncsi_start_dev(struct ncsi_dev *nd);
+void ncsi_reset_dev(struct ncsi_dev *nd);
 void ncsi_stop_dev(struct ncsi_dev *nd);
 void ncsi_unregister_dev(struct ncsi_dev *nd);
 #else /* !CONFIG_NET_NCSI */
@@ -58,6 +59,10 @@  static inline int ncsi_start_dev(struct ncsi_dev *nd)
 	return -ENOTTY;
 }
 
+static void ncsi_reset_dev(struct ncsi_dev *nd)
+{
+}
+
 static void ncsi_stop_dev(struct ncsi_dev *nd)
 {
 }
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 0fcf5f3fa9f9..df227a1a9b48 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -356,6 +356,9 @@  static void ncsi_remove_channel(struct ncsi_channel *nc)
 	}
 
 	nc->state = NCSI_CHANNEL_INACTIVE;
+	/* Make sure this channel is not in the channel queue */
+	if (!list_empty(&nc->link))
+		list_del_init(&nc->link);
 	spin_unlock_irqrestore(&nc->lock, flags);
 	ncsi_stop_channel_monitor(nc);
 
@@ -984,11 +987,18 @@  static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
 	hot_nc = ndp->hot_channel;
 	spin_unlock_irqrestore(&ndp->lock, flags);
 
+	if (ndp->package_num == 0)
+		netdev_warn(ndp->ndev.dev, "NCSI: No available packages!\n");
+
 	/* The search is done once an inactive channel with up
 	 * link is found.
 	 */
 	found = NULL;
 	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		if (np->channel_num == 0)
+			netdev_warn(ndp->ndev.dev,
+				    "NCSI: Package %u has no available channels!\n",
+				    np->id);
 		NCSI_FOR_EACH_CHANNEL(np, nc) {
 			spin_lock_irqsave(&nc->lock, flags);
 
@@ -1016,9 +1026,13 @@  static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
 	}
 
 	if (!found) {
-		netdev_warn(ndp->ndev.dev,
-			    "NCSI: No channel found with link\n");
-		ncsi_report_link(ndp, true);
+		/* No free channels were found. NCSI can't do anything right
+		 * now, but switch off NCSI_DEV_PROBED in case we haven't found
+		 * any channels yet - NCSI will re-probe the NCSI topology next
+		 * time the interface is brought up
+		 */
+		netdev_warn(ndp->ndev.dev, "NCSI: No free channels found!\n");
+		ncsi_reset_dev(&ndp->ndev);
 		return -ENODEV;
 	}
 
@@ -1666,6 +1680,25 @@  void ncsi_stop_dev(struct ncsi_dev *nd)
 }
 EXPORT_SYMBOL_GPL(ncsi_stop_dev);
 
+/* Drop all packages and channels and reset NCSI to a pre-probed state */
+void ncsi_reset_dev(struct ncsi_dev *nd)
+{
+	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
+	struct ncsi_package *np, *tmp;
+	unsigned long flags;
+
+	cancel_work_sync(&ndp->work);
+	ncsi_stop_dev(nd);
+
+	list_for_each_entry_safe(np, tmp, &ndp->packages, node)
+		ncsi_remove_package(np);
+
+	spin_lock_irqsave(&ndp->lock, flags);
+	ndp->flags &= !NCSI_DEV_PROBED;
+	spin_unlock_irqrestore(&ndp->lock, flags);
+}
+EXPORT_SYMBOL_GPL(ncsi_reset_dev);
+
 void ncsi_unregister_dev(struct ncsi_dev *nd)
 {
 	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);