diff mbox series

[net-next,13/13] nfp: abm: restructure Qdisc handling

Message ID 20181112225819.29823-14-jakub.kicinski@netronome.com
State Accepted, archived
Delegated to: David Miller
Headers show
Series nfp: abm: track all Qdiscs | expand

Commit Message

Jakub Kicinski Nov. 12, 2018, 10:58 p.m. UTC
In preparation of handling more Qdisc types switch to a different
offload strategy.  We have now recreated the Qdisc hierarchy in
the driver.  Every time the hierarchy changes parse it, and update
the configuration of the HW accordingly.

While at it drop the support of pretending that we can instantiate
a single queue on a multi-queue device in HW/FW.  MQ is now required,
and each queue will have its own instance of RED.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: John Hurley <john.hurley@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/abm/ctrl.c |  73 ---
 drivers/net/ethernet/netronome/nfp/abm/main.c |  14 +-
 drivers/net/ethernet/netronome/nfp/abm/main.h |  57 +-
 .../net/ethernet/netronome/nfp/abm/qdisc.c    | 548 ++++++++++--------
 4 files changed, 340 insertions(+), 352 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
index 564427e8a6e8..1629b07f727b 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
@@ -50,27 +50,6 @@  nfp_abm_ctrl_stat(struct nfp_abm_link *alink, const struct nfp_rtsym *sym,
 	return 0;
 }
 
-static int
-nfp_abm_ctrl_stat_all(struct nfp_abm_link *alink, const struct nfp_rtsym *sym,
-		      unsigned int stride, unsigned int offset, bool is_u64,
-		      u64 *res)
-{
-	u64 val, sum = 0;
-	unsigned int i;
-	int err;
-
-	for (i = 0; i < alink->vnic->max_rx_rings; i++) {
-		err = nfp_abm_ctrl_stat(alink, sym, stride, offset, i,
-					is_u64, &val);
-		if (err)
-			return err;
-		sum += val;
-	}
-
-	*res = sum;
-	return 0;
-}
-
 int __nfp_abm_ctrl_set_q_lvl(struct nfp_abm *abm, unsigned int id, u32 val)
 {
 	struct nfp_cpp *cpp = abm->app->cpp;
@@ -155,42 +134,6 @@  int nfp_abm_ctrl_read_q_stats(struct nfp_abm_link *alink, unsigned int i,
 				 i, true, &stats->overlimits);
 }
 
-int nfp_abm_ctrl_read_stats(struct nfp_abm_link *alink,
-			    struct nfp_alink_stats *stats)
-{
-	u64 pkts = 0, bytes = 0;
-	int i, err;
-
-	for (i = 0; i < alink->vnic->max_rx_rings; i++) {
-		pkts += nn_readq(alink->vnic, NFP_NET_CFG_RXR_STATS(i));
-		bytes += nn_readq(alink->vnic, NFP_NET_CFG_RXR_STATS(i) + 8);
-	}
-	stats->tx_pkts = pkts;
-	stats->tx_bytes = bytes;
-
-	err = nfp_abm_ctrl_stat_all(alink, alink->abm->q_lvls,
-				    NFP_QLVL_STRIDE, NFP_QLVL_BLOG_BYTES,
-				    false, &stats->backlog_bytes);
-	if (err)
-		return err;
-
-	err = nfp_abm_ctrl_stat_all(alink, alink->abm->q_lvls,
-				    NFP_QLVL_STRIDE, NFP_QLVL_BLOG_PKTS,
-				    false, &stats->backlog_pkts);
-	if (err)
-		return err;
-
-	err = nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
-				    NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP,
-				    true, &stats->drops);
-	if (err)
-		return err;
-
-	return nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
-				     NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN,
-				     true, &stats->overlimits);
-}
-
 int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, unsigned int i,
 			       struct nfp_alink_xstats *xstats)
 {
@@ -207,22 +150,6 @@  int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, unsigned int i,
 				 i, true, &xstats->ecn_marked);
 }
 
-int nfp_abm_ctrl_read_xstats(struct nfp_abm_link *alink,
-			     struct nfp_alink_xstats *xstats)
-{
-	int err;
-
-	err = nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
-				    NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP,
-				    true, &xstats->pdrop);
-	if (err)
-		return err;
-
-	return nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
-				     NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN,
-				     true, &xstats->ecn_marked);
-}
-
 int nfp_abm_ctrl_qm_enable(struct nfp_abm *abm)
 {
 	return nfp_mbox_cmd(abm->app->pf, NFP_MBOX_PCIE_ABM_ENABLE,
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
index da5394886a78..a5732d3bd1b7 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.c
@@ -7,6 +7,7 @@ 
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
 #include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
 #include <linux/slab.h>
 
 #include "../nfpcore/nfp.h"
@@ -310,22 +311,14 @@  nfp_abm_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
 	alink->abm = abm;
 	alink->vnic = nn;
 	alink->id = id;
-	alink->parent = TC_H_ROOT;
 	alink->total_queues = alink->vnic->max_rx_rings;
-	alink->red_qdiscs = kvcalloc(alink->total_queues,
-				     sizeof(*alink->red_qdiscs),
-				     GFP_KERNEL);
-	if (!alink->red_qdiscs) {
-		err = -ENOMEM;
-		goto err_free_alink;
-	}
 
 	/* This is a multi-host app, make sure MAC/PHY is up, but don't
 	 * make the MAC/PHY state follow the state of any of the ports.
 	 */
 	err = nfp_eth_set_configured(app->cpp, eth_port->index, true);
 	if (err < 0)
-		goto err_free_qdiscs;
+		goto err_free_alink;
 
 	netif_keep_dst(nn->dp.netdev);
 
@@ -335,8 +328,6 @@  nfp_abm_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
 
 	return 0;
 
-err_free_qdiscs:
-	kvfree(alink->red_qdiscs);
 err_free_alink:
 	kfree(alink);
 	return err;
@@ -348,7 +339,6 @@  static void nfp_abm_vnic_free(struct nfp_app *app, struct nfp_net *nn)
 
 	nfp_abm_kill_reprs(alink->abm, alink);
 	WARN(!radix_tree_empty(&alink->qdiscs), "left over qdiscs\n");
-	kvfree(alink->red_qdiscs);
 	kfree(alink);
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h
index d0d85f82bd1c..240e2c8683fe 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.h
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.h
@@ -9,6 +9,11 @@ 
 #include <net/devlink.h>
 #include <net/pkt_cls.h>
 
+/* Dump of 64 PRIOs and 256 REDs seems to take 850us on Xeon v4 @ 2.20GHz;
+ * 2.5ms / 400Hz seems more than sufficient for stats resolution.
+ */
+#define NFP_ABM_STATS_REFRESH_IVAL	(2500 * 1000) /* ns */
+
 #define NFP_ABM_LVL_INFINITY		S32_MAX
 
 struct nfp_app;
@@ -91,9 +96,19 @@  enum nfp_qdisc_type {
  * @children:		pointers to children
  *
  * @params_ok:		parameters of this Qdisc are OK for offload
+ * @offload_mark:	offload refresh state - selected for offload
+ * @offloaded:		Qdisc is currently offloaded to the HW
+ *
+ * @mq:			MQ Qdisc specific parameters and state
+ * @mq.stats:		current stats of the MQ Qdisc
+ * @mq.prev_stats:	previously reported @mq.stats
  *
  * @red:		RED Qdisc specific parameters and state
  * @red.threshold:	ECN marking threshold
+ * @red.stats:		current stats of the RED Qdisc
+ * @red.prev_stats:	previously reported @red.stats
+ * @red.xstats:		extended stats for RED - current
+ * @red.prev_xstats:	extended stats for RED - previously reported
  */
 struct nfp_qdisc {
 	struct net_device *netdev;
@@ -105,29 +120,26 @@  struct nfp_qdisc {
 	struct nfp_qdisc **children;
 
 	bool params_ok;
+	bool offload_mark;
+	bool offloaded;
 
 	union {
+		/* NFP_QDISC_MQ */
+		struct {
+			struct nfp_alink_stats stats;
+			struct nfp_alink_stats prev_stats;
+		} mq;
 		/* TC_SETUP_QDISC_RED */
 		struct {
 			u32 threshold;
+			struct nfp_alink_stats stats;
+			struct nfp_alink_stats prev_stats;
+			struct nfp_alink_xstats xstats;
+			struct nfp_alink_xstats prev_xstats;
 		} red;
 	};
 };
 
-/**
- * struct nfp_red_qdisc - representation of single RED Qdisc
- * @handle:	handle of currently offloaded RED Qdisc
- * @threshold:	marking threshold of this Qdisc
- * @stats:	statistics from last refresh
- * @xstats:	base of extended statistics
- */
-struct nfp_red_qdisc {
-	u32 handle;
-	u32 threshold;
-	struct nfp_alink_stats stats;
-	struct nfp_alink_xstats xstats;
-};
-
 /**
  * struct nfp_abm_link - port tuple of a ABM NIC
  * @abm:	back pointer to nfp_abm
@@ -135,9 +147,9 @@  struct nfp_red_qdisc {
  * @id:		id of the data vNIC
  * @queue_base:	id of base to host queue within PCIe (not QC idx)
  * @total_queues:	number of PF queues
- * @parent:	handle of expected parent, i.e. handle of MQ, or TC_H_ROOT
- * @num_qdiscs:	number of currently used qdiscs
- * @red_qdiscs:	array of qdiscs
+ *
+ * @last_stats_update:	ktime of last stats update
+ *
  * @root_qdisc:	pointer to the current root of the Qdisc hierarchy
  * @qdiscs:	all qdiscs recorded by major part of the handle
  */
@@ -147,13 +159,14 @@  struct nfp_abm_link {
 	unsigned int id;
 	unsigned int queue_base;
 	unsigned int total_queues;
-	u32 parent;
-	unsigned int num_qdiscs;
-	struct nfp_red_qdisc *red_qdiscs;
+
+	u64 last_stats_update;
+
 	struct nfp_qdisc *root_qdisc;
 	struct radix_tree_root qdiscs;
 };
 
+void nfp_abm_qdisc_offload_update(struct nfp_abm_link *alink);
 int nfp_abm_setup_root(struct net_device *netdev, struct nfp_abm_link *alink,
 		       struct tc_root_qopt_offload *opt);
 int nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
@@ -166,12 +179,8 @@  int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm);
 int __nfp_abm_ctrl_set_q_lvl(struct nfp_abm *abm, unsigned int id, u32 val);
 int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int queue,
 			   u32 val);
-int nfp_abm_ctrl_read_stats(struct nfp_abm_link *alink,
-			    struct nfp_alink_stats *stats);
 int nfp_abm_ctrl_read_q_stats(struct nfp_abm_link *alink, unsigned int i,
 			      struct nfp_alink_stats *stats);
-int nfp_abm_ctrl_read_xstats(struct nfp_abm_link *alink,
-			     struct nfp_alink_xstats *xstats);
 int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, unsigned int i,
 			       struct nfp_alink_xstats *xstats);
 u64 nfp_abm_ctrl_stat_non_sto(struct nfp_abm_link *alink, unsigned int i);
diff --git a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
index fb68038ec1da..16c4afe3a37f 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
@@ -13,6 +13,11 @@ 
 #include "../nfp_port.h"
 #include "main.h"
 
+static bool nfp_abm_qdisc_is_red(struct nfp_qdisc *qdisc)
+{
+	return qdisc->type == NFP_QDISC_RED;
+}
+
 static bool nfp_abm_qdisc_child_valid(struct nfp_qdisc *qdisc, unsigned int id)
 {
 	return qdisc->children[id] &&
@@ -24,6 +29,74 @@  static void *nfp_abm_qdisc_tree_deref_slot(void __rcu **slot)
 	return rtnl_dereference(*slot);
 }
 
+static void
+nfp_abm_stats_propagate(struct nfp_alink_stats *parent,
+			struct nfp_alink_stats *child)
+{
+	parent->tx_pkts		+= child->tx_pkts;
+	parent->tx_bytes	+= child->tx_bytes;
+	parent->backlog_pkts	+= child->backlog_pkts;
+	parent->backlog_bytes	+= child->backlog_bytes;
+	parent->overlimits	+= child->overlimits;
+	parent->drops		+= child->drops;
+}
+
+static void
+nfp_abm_stats_update_red(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc,
+			 unsigned int queue)
+{
+	struct nfp_cpp *cpp = alink->abm->app->cpp;
+	int err;
+
+	if (!qdisc->offloaded)
+		return;
+
+	err = nfp_abm_ctrl_read_q_stats(alink, queue, &qdisc->red.stats);
+	if (err)
+		nfp_err(cpp, "RED stats (%d) read failed with error %d\n",
+			queue, err);
+
+	err = nfp_abm_ctrl_read_q_xstats(alink, queue, &qdisc->red.xstats);
+	if (err)
+		nfp_err(cpp, "RED xstats (%d) read failed with error %d\n",
+			queue, err);
+}
+
+static void
+nfp_abm_stats_update_mq(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc)
+{
+	unsigned int i;
+
+	if (qdisc->type != NFP_QDISC_MQ)
+		return;
+
+	for (i = 0; i < alink->total_queues; i++)
+		if (nfp_abm_qdisc_child_valid(qdisc, i))
+			nfp_abm_stats_update_red(alink, qdisc->children[i], i);
+}
+
+static void __nfp_abm_stats_update(struct nfp_abm_link *alink, u64 time_now)
+{
+	alink->last_stats_update = time_now;
+	if (alink->root_qdisc)
+		nfp_abm_stats_update_mq(alink, alink->root_qdisc);
+}
+
+static void nfp_abm_stats_update(struct nfp_abm_link *alink)
+{
+	u64 now;
+
+	/* Limit the frequency of updates - stats of non-leaf qdiscs are a sum
+	 * of all their leafs, so we would read the same stat multiple times
+	 * for every dump.
+	 */
+	now = ktime_get();
+	if (now - alink->last_stats_update < NFP_ABM_STATS_REFRESH_IVAL)
+		return;
+
+	__nfp_abm_stats_update(alink, now);
+}
+
 static void
 nfp_abm_qdisc_unlink_children(struct nfp_qdisc *qdisc,
 			      unsigned int start, unsigned int end)
@@ -38,89 +111,174 @@  nfp_abm_qdisc_unlink_children(struct nfp_qdisc *qdisc,
 }
 
 static void
-nfp_abm_qdisc_clear_mq(struct net_device *netdev, struct nfp_abm_link *alink,
-		       struct nfp_qdisc *qdisc)
+nfp_abm_qdisc_offload_stop(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc)
 {
-	struct radix_tree_iter iter;
-	unsigned int mq_refs = 0;
-	void __rcu **slot;
+	/* Don't complain when qdisc is getting unlinked */
+	if (qdisc->use_cnt)
+		nfp_warn(alink->abm->app->cpp, "Offload of '%08x' stopped\n",
+			 qdisc->handle);
 
-	if (!qdisc->use_cnt)
+	if (!nfp_abm_qdisc_is_red(qdisc))
 		return;
-	/* MQ doesn't notify well on destruction, we need special handling of
-	 * MQ's children.
+
+	qdisc->red.stats.backlog_pkts = 0;
+	qdisc->red.stats.backlog_bytes = 0;
+}
+
+static int
+__nfp_abm_stats_init(struct nfp_abm_link *alink,
+		     unsigned int queue, struct nfp_alink_stats *prev_stats,
+		     struct nfp_alink_xstats *prev_xstats)
+{
+	u64 backlog_pkts, backlog_bytes;
+	int err;
+
+	/* Don't touch the backlog, backlog can only be reset after it has
+	 * been reported back to the tc qdisc stats.
 	 */
-	if (qdisc->type == NFP_QDISC_MQ &&
-	    qdisc == alink->root_qdisc &&
-	    netdev->reg_state == NETREG_UNREGISTERING)
-		return;
+	backlog_pkts = prev_stats->backlog_pkts;
+	backlog_bytes = prev_stats->backlog_bytes;
 
-	/* Count refs held by MQ instances and clear pointers */
-	radix_tree_for_each_slot(slot, &alink->qdiscs, &iter, 0) {
-		struct nfp_qdisc *mq = nfp_abm_qdisc_tree_deref_slot(slot);
-		unsigned int i;
+	err = nfp_abm_ctrl_read_q_stats(alink, queue, prev_stats);
+	if (err) {
+		nfp_err(alink->abm->app->cpp,
+			"RED stats init (%d) failed with error %d\n",
+			queue, err);
+		return err;
+	}
 
-		if (mq->type != NFP_QDISC_MQ || mq->netdev != netdev)
-			continue;
-		for (i = 0; i < mq->num_children; i++)
-			if (mq->children[i] == qdisc) {
-				mq->children[i] = NULL;
-				mq_refs++;
-			}
+	err = nfp_abm_ctrl_read_q_xstats(alink, queue, prev_xstats);
+	if (err) {
+		nfp_err(alink->abm->app->cpp,
+			"RED xstats init (%d) failed with error %d\n",
+			queue, err);
+		return err;
 	}
 
-	WARN(qdisc->use_cnt != mq_refs, "non-zero qdisc use count: %d (- %d)\n",
-	     qdisc->use_cnt, mq_refs);
+	prev_stats->backlog_pkts = backlog_pkts;
+	prev_stats->backlog_bytes = backlog_bytes;
+	return 0;
+}
+
+static int
+nfp_abm_stats_init(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc,
+		   unsigned int queue)
+{
+	return __nfp_abm_stats_init(alink, queue,
+				    &qdisc->red.prev_stats,
+				    &qdisc->red.prev_xstats);
 }
 
 static void
-nfp_abm_offload_compile_red(struct nfp_abm_link *alink,
-			    struct nfp_red_qdisc *qdisc, unsigned int queue)
+nfp_abm_offload_compile_red(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc,
+			    unsigned int queue)
 {
-	if (!qdisc->handle)
+	qdisc->offload_mark = qdisc->type == NFP_QDISC_RED &&
+			      qdisc->params_ok &&
+			      qdisc->use_cnt == 1 &&
+			      !qdisc->children[0];
+
+	/* If we are starting offload init prev_stats */
+	if (qdisc->offload_mark && !qdisc->offloaded)
+		if (nfp_abm_stats_init(alink, qdisc, queue))
+			qdisc->offload_mark = false;
+
+	if (!qdisc->offload_mark)
 		return;
 
-	nfp_abm_ctrl_set_q_lvl(alink, queue, qdisc->threshold);
+	nfp_abm_ctrl_set_q_lvl(alink, queue, qdisc->red.threshold);
 }
 
-static void nfp_abm_offload_compile_one(struct nfp_abm_link *alink)
+static void
+nfp_abm_offload_compile_mq(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc)
 {
 	unsigned int i;
-	bool is_mq;
 
-	is_mq = alink->num_qdiscs > 1;
+	qdisc->offload_mark = qdisc->type == NFP_QDISC_MQ;
+	if (!qdisc->offload_mark)
+		return;
 
 	for (i = 0; i < alink->total_queues; i++) {
-		struct nfp_red_qdisc *next;
+		struct nfp_qdisc *child = qdisc->children[i];
 
-		if (is_mq && !alink->red_qdiscs[i].handle)
+		if (!nfp_abm_qdisc_child_valid(qdisc, i))
 			continue;
 
-		next = is_mq ? &alink->red_qdiscs[i] : &alink->red_qdiscs[0];
-		nfp_abm_offload_compile_red(alink, next, i);
+		nfp_abm_offload_compile_red(alink, child, i);
 	}
 }
 
-static void nfp_abm_offload_update(struct nfp_abm *abm)
+void nfp_abm_qdisc_offload_update(struct nfp_abm_link *alink)
 {
-	struct nfp_abm_link *alink = NULL;
-	struct nfp_pf *pf = abm->app->pf;
-	struct nfp_net *nn;
+	struct nfp_abm *abm = alink->abm;
+	struct radix_tree_iter iter;
+	struct nfp_qdisc *qdisc;
+	void __rcu **slot;
 	size_t i;
 
 	/* Mark all thresholds as unconfigured */
-	__bitmap_set(abm->threshold_undef, 0, abm->num_thresholds);
+	__bitmap_set(abm->threshold_undef,
+		     alink->queue_base, alink->total_queues);
 
-	/* Configure all offloads */
-	list_for_each_entry(nn, &pf->vnics, vnic_list) {
-		alink = nn->app_priv;
-		nfp_abm_offload_compile_one(alink);
+	/* Clear offload marks */
+	radix_tree_for_each_slot(slot, &alink->qdiscs, &iter, 0) {
+		qdisc = nfp_abm_qdisc_tree_deref_slot(slot);
+		qdisc->offload_mark = false;
+	}
+
+	if (alink->root_qdisc)
+		nfp_abm_offload_compile_mq(alink, alink->root_qdisc);
+
+	/* Refresh offload status */
+	radix_tree_for_each_slot(slot, &alink->qdiscs, &iter, 0) {
+		qdisc = nfp_abm_qdisc_tree_deref_slot(slot);
+		if (!qdisc->offload_mark && qdisc->offloaded)
+			nfp_abm_qdisc_offload_stop(alink, qdisc);
+		qdisc->offloaded = qdisc->offload_mark;
 	}
 
 	/* Reset the unconfigured thresholds */
 	for (i = 0; i < abm->num_thresholds; i++)
 		if (test_bit(i, abm->threshold_undef))
 			__nfp_abm_ctrl_set_q_lvl(abm, i, NFP_ABM_LVL_INFINITY);
+
+	__nfp_abm_stats_update(alink, ktime_get());
+}
+
+static void
+nfp_abm_qdisc_clear_mq(struct net_device *netdev, struct nfp_abm_link *alink,
+		       struct nfp_qdisc *qdisc)
+{
+	struct radix_tree_iter iter;
+	unsigned int mq_refs = 0;
+	void __rcu **slot;
+
+	if (!qdisc->use_cnt)
+		return;
+	/* MQ doesn't notify well on destruction, we need special handling of
+	 * MQ's children.
+	 */
+	if (qdisc->type == NFP_QDISC_MQ &&
+	    qdisc == alink->root_qdisc &&
+	    netdev->reg_state == NETREG_UNREGISTERING)
+		return;
+
+	/* Count refs held by MQ instances and clear pointers */
+	radix_tree_for_each_slot(slot, &alink->qdiscs, &iter, 0) {
+		struct nfp_qdisc *mq = nfp_abm_qdisc_tree_deref_slot(slot);
+		unsigned int i;
+
+		if (mq->type != NFP_QDISC_MQ || mq->netdev != netdev)
+			continue;
+		for (i = 0; i < mq->num_children; i++)
+			if (mq->children[i] == qdisc) {
+				mq->children[i] = NULL;
+				mq_refs++;
+			}
+	}
+
+	WARN(qdisc->use_cnt != mq_refs, "non-zero qdisc use count: %d (- %d)\n",
+	     qdisc->use_cnt, mq_refs);
 }
 
 static void
@@ -221,8 +379,13 @@  nfp_abm_qdisc_destroy(struct net_device *netdev, struct nfp_abm_link *alink,
 	nfp_abm_qdisc_unlink_children(qdisc, 0, qdisc->num_children);
 	nfp_abm_qdisc_free(netdev, alink, qdisc);
 
-	if (alink->root_qdisc == qdisc)
+	if (alink->root_qdisc == qdisc) {
 		alink->root_qdisc = NULL;
+		/* Only root change matters, other changes are acted upon on
+		 * the graft notification.
+		 */
+		nfp_abm_qdisc_offload_update(alink);
+	}
 }
 
 static int
@@ -249,66 +412,73 @@  nfp_abm_qdisc_graft(struct nfp_abm_link *alink, u32 handle, u32 child_handle,
 		child = NFP_QDISC_UNTRACKED;
 	parent->children[id] = child;
 
+	nfp_abm_qdisc_offload_update(alink);
+
 	return 0;
 }
 
 static void
-__nfp_abm_reset_root(struct net_device *netdev, struct nfp_abm_link *alink,
-		     u32 handle, unsigned int qs, u32 init_val)
+nfp_abm_stats_calculate(struct nfp_alink_stats *new,
+			struct nfp_alink_stats *old,
+			struct gnet_stats_basic_packed *bstats,
+			struct gnet_stats_queue *qstats)
 {
-	memset(alink->red_qdiscs, 0,
-	       sizeof(*alink->red_qdiscs) * alink->num_qdiscs);
-
-	alink->parent = handle;
-	alink->num_qdiscs = qs;
+	_bstats_update(bstats, new->tx_bytes - old->tx_bytes,
+		       new->tx_pkts - old->tx_pkts);
+	qstats->qlen += new->backlog_pkts - old->backlog_pkts;
+	qstats->backlog += new->backlog_bytes - old->backlog_bytes;
+	qstats->overlimits += new->overlimits - old->overlimits;
+	qstats->drops += new->drops - old->drops;
 }
 
 static void
-nfp_abm_reset_root(struct net_device *netdev, struct nfp_abm_link *alink,
-		   u32 handle, unsigned int qs)
+nfp_abm_stats_red_calculate(struct nfp_alink_xstats *new,
+			    struct nfp_alink_xstats *old,
+			    struct red_stats *stats)
 {
-	__nfp_abm_reset_root(netdev, alink, handle, qs, NFP_ABM_LVL_INFINITY);
+	stats->forced_mark += new->ecn_marked - old->ecn_marked;
+	stats->pdrop += new->pdrop - old->pdrop;
 }
 
 static int
-nfp_abm_red_find(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
+nfp_abm_red_xstats(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
 {
-	unsigned int i = TC_H_MIN(opt->parent) - 1;
+	struct nfp_qdisc *qdisc;
 
-	if (opt->parent == TC_H_ROOT)
-		i = 0;
-	else if (TC_H_MAJ(alink->parent) == TC_H_MAJ(opt->parent))
-		i = TC_H_MIN(opt->parent) - 1;
-	else
-		return -EOPNOTSUPP;
+	nfp_abm_stats_update(alink);
 
-	if (i >= alink->num_qdiscs ||
-	    opt->handle != alink->red_qdiscs[i].handle)
+	qdisc = nfp_abm_qdisc_find(alink, opt->handle);
+	if (!qdisc || !qdisc->offloaded)
 		return -EOPNOTSUPP;
 
-	return i;
+	nfp_abm_stats_red_calculate(&qdisc->red.xstats,
+				    &qdisc->red.prev_xstats,
+				    opt->xstats);
+	qdisc->red.prev_xstats = qdisc->red.xstats;
+	return 0;
 }
 
-static void
-nfp_abm_red_destroy(struct net_device *netdev, struct nfp_abm_link *alink,
-		    u32 handle)
+static int
+nfp_abm_red_stats(struct nfp_abm_link *alink, u32 handle,
+		  struct tc_qopt_offload_stats *stats)
 {
-	unsigned int i;
+	struct nfp_qdisc *qdisc;
 
-	nfp_abm_qdisc_destroy(netdev, alink, handle);
+	nfp_abm_stats_update(alink);
 
-	for (i = 0; i < alink->num_qdiscs; i++)
-		if (handle == alink->red_qdiscs[i].handle)
-			break;
-	if (i == alink->num_qdiscs)
-		return;
+	qdisc = nfp_abm_qdisc_find(alink, handle);
+	if (!qdisc)
+		return -EOPNOTSUPP;
+	/* If the qdisc offload has stopped we may need to adjust the backlog
+	 * counters back so carry on even if qdisc is not currently offloaded.
+	 */
 
-	if (alink->parent == TC_H_ROOT)
-		nfp_abm_reset_root(netdev, alink, TC_H_ROOT, 0);
-	else
-		memset(&alink->red_qdiscs[i], 0, sizeof(*alink->red_qdiscs));
+	nfp_abm_stats_calculate(&qdisc->red.stats,
+				&qdisc->red.prev_stats,
+				stats->bstats, stats->qstats);
+	qdisc->red.prev_stats = qdisc->red.stats;
 
-	nfp_abm_offload_update(alink->abm);
+	return qdisc->offloaded ? 0 : -EOPNOTSUPP;
 }
 
 static bool
@@ -347,20 +517,12 @@  nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink,
 		    struct tc_red_qopt_offload *opt)
 {
 	struct nfp_qdisc *qdisc;
-	bool existing;
-	int i, err;
 	int ret;
 
 	ret = nfp_abm_qdisc_replace(netdev, alink, NFP_QDISC_RED, opt->parent,
 				    opt->handle, 1, &qdisc);
-
-	i = nfp_abm_red_find(alink, opt);
-	existing = i >= 0;
-
-	if (ret < 0) {
-		err = ret;
-		goto err_destroy;
-	}
+	if (ret < 0)
+		return ret;
 
 	/* If limit != 0 child gets reset */
 	if (opt->set.limit) {
@@ -376,127 +538,11 @@  nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink,
 	}
 
 	qdisc->params_ok = nfp_abm_red_check_params(alink, opt);
-	if (qdisc->params_ok) {
+	if (qdisc->params_ok)
 		qdisc->red.threshold = opt->set.min;
-	} else {
-		err = -EINVAL;
-		goto err_destroy;
-	}
-
-	if (existing) {
-		nfp_abm_offload_update(alink->abm);
-		return 0;
-	}
-
-	if (opt->parent == TC_H_ROOT) {
-		i = 0;
-		__nfp_abm_reset_root(netdev, alink, TC_H_ROOT, 1, opt->set.min);
-	} else if (TC_H_MAJ(alink->parent) == TC_H_MAJ(opt->parent)) {
-		i = TC_H_MIN(opt->parent) - 1;
-	} else {
-		return -EINVAL;
-	}
-	alink->red_qdiscs[i].handle = opt->handle;
-
-	if (opt->parent == TC_H_ROOT)
-		err = nfp_abm_ctrl_read_stats(alink,
-					      &alink->red_qdiscs[i].stats);
-	else
-		err = nfp_abm_ctrl_read_q_stats(alink, i,
-						&alink->red_qdiscs[i].stats);
-	if (err)
-		goto err_destroy;
-
-	if (opt->parent == TC_H_ROOT)
-		err = nfp_abm_ctrl_read_xstats(alink,
-					       &alink->red_qdiscs[i].xstats);
-	else
-		err = nfp_abm_ctrl_read_q_xstats(alink, i,
-						 &alink->red_qdiscs[i].xstats);
-	if (err)
-		goto err_destroy;
-
-	alink->red_qdiscs[i].threshold = opt->set.min;
-	alink->red_qdiscs[i].stats.backlog_pkts = 0;
-	alink->red_qdiscs[i].stats.backlog_bytes = 0;
-
-	nfp_abm_offload_update(alink->abm);
-
-	return 0;
-err_destroy:
-	/* If the qdisc keeps on living, but we can't offload undo changes */
-	if (existing) {
-		opt->set.qstats->qlen -=
-			alink->red_qdiscs[i].stats.backlog_pkts;
-		opt->set.qstats->backlog -=
-			alink->red_qdiscs[i].stats.backlog_bytes;
-	}
-	nfp_abm_red_destroy(netdev, alink, opt->handle);
-
-	return err;
-}
-
-static void
-nfp_abm_update_stats(struct nfp_alink_stats *new, struct nfp_alink_stats *old,
-		     struct tc_qopt_offload_stats *stats)
-{
-	_bstats_update(stats->bstats, new->tx_bytes - old->tx_bytes,
-		       new->tx_pkts - old->tx_pkts);
-	stats->qstats->qlen += new->backlog_pkts - old->backlog_pkts;
-	stats->qstats->backlog += new->backlog_bytes - old->backlog_bytes;
-	stats->qstats->overlimits += new->overlimits - old->overlimits;
-	stats->qstats->drops += new->drops - old->drops;
-}
-
-static int
-nfp_abm_red_stats(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
-{
-	struct nfp_alink_stats *prev_stats;
-	struct nfp_alink_stats stats;
-	int i, err;
-
-	i = nfp_abm_red_find(alink, opt);
-	if (i < 0)
-		return i;
-	prev_stats = &alink->red_qdiscs[i].stats;
-
-	if (alink->parent == TC_H_ROOT)
-		err = nfp_abm_ctrl_read_stats(alink, &stats);
-	else
-		err = nfp_abm_ctrl_read_q_stats(alink, i, &stats);
-	if (err)
-		return err;
-
-	nfp_abm_update_stats(&stats, prev_stats, &opt->stats);
 
-	*prev_stats = stats;
-
-	return 0;
-}
-
-static int
-nfp_abm_red_xstats(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
-{
-	struct nfp_alink_xstats *prev_xstats;
-	struct nfp_alink_xstats xstats;
-	int i, err;
-
-	i = nfp_abm_red_find(alink, opt);
-	if (i < 0)
-		return i;
-	prev_xstats = &alink->red_qdiscs[i].xstats;
-
-	if (alink->parent == TC_H_ROOT)
-		err = nfp_abm_ctrl_read_xstats(alink, &xstats);
-	else
-		err = nfp_abm_ctrl_read_q_xstats(alink, i, &xstats);
-	if (err)
-		return err;
-
-	opt->xstats->forced_mark += xstats.ecn_marked - prev_xstats->ecn_marked;
-	opt->xstats->pdrop += xstats.pdrop - prev_xstats->pdrop;
-
-	*prev_xstats = xstats;
+	if (qdisc->use_cnt == 1)
+		nfp_abm_qdisc_offload_update(alink);
 
 	return 0;
 }
@@ -508,10 +554,10 @@  int nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
 	case TC_RED_REPLACE:
 		return nfp_abm_red_replace(netdev, alink, opt);
 	case TC_RED_DESTROY:
-		nfp_abm_red_destroy(netdev, alink, opt->handle);
+		nfp_abm_qdisc_destroy(netdev, alink, opt->handle);
 		return 0;
 	case TC_RED_STATS:
-		return nfp_abm_red_stats(alink, opt);
+		return nfp_abm_red_stats(alink, opt->handle, &opt->stats);
 	case TC_RED_XSTATS:
 		return nfp_abm_red_xstats(alink, opt);
 	case TC_RED_GRAFT:
@@ -522,28 +568,6 @@  int nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
 	}
 }
 
-static int
-nfp_abm_mq_stats(struct nfp_abm_link *alink, struct tc_mq_qopt_offload *opt)
-{
-	struct nfp_alink_stats stats;
-	unsigned int i;
-	int err;
-
-	for (i = 0; i < alink->num_qdiscs; i++) {
-		if (alink->red_qdiscs[i].handle == TC_H_UNSPEC)
-			continue;
-
-		err = nfp_abm_ctrl_read_q_stats(alink, i, &stats);
-		if (err)
-			return err;
-
-		nfp_abm_update_stats(&stats, &alink->red_qdiscs[i].stats,
-				     &opt->stats);
-	}
-
-	return 0;
-}
-
 static int
 nfp_abm_mq_create(struct net_device *netdev, struct nfp_abm_link *alink,
 		  struct tc_mq_qopt_offload *opt)
@@ -556,27 +580,63 @@  nfp_abm_mq_create(struct net_device *netdev, struct nfp_abm_link *alink,
 				    &qdisc);
 	if (ret < 0)
 		return ret;
+
+	qdisc->params_ok = true;
+	qdisc->offloaded = true;
+	nfp_abm_qdisc_offload_update(alink);
 	return 0;
 }
 
+static int
+nfp_abm_mq_stats(struct nfp_abm_link *alink, u32 handle,
+		 struct tc_qopt_offload_stats *stats)
+{
+	struct nfp_qdisc *qdisc, *red;
+	unsigned int i;
+
+	qdisc = nfp_abm_qdisc_find(alink, handle);
+	if (!qdisc)
+		return -EOPNOTSUPP;
+
+	nfp_abm_stats_update(alink);
+
+	/* MQ stats are summed over the children in the core, so we need
+	 * to add up the unreported child values.
+	 */
+	memset(&qdisc->mq.stats, 0, sizeof(qdisc->mq.stats));
+	memset(&qdisc->mq.prev_stats, 0, sizeof(qdisc->mq.prev_stats));
+
+	for (i = 0; i < qdisc->num_children; i++) {
+		if (!nfp_abm_qdisc_child_valid(qdisc, i))
+			continue;
+
+		if (!nfp_abm_qdisc_is_red(qdisc->children[i]))
+			continue;
+		red = qdisc->children[i];
+
+		nfp_abm_stats_propagate(&qdisc->mq.stats,
+					&red->red.stats);
+		nfp_abm_stats_propagate(&qdisc->mq.prev_stats,
+					&red->red.prev_stats);
+	}
+
+	nfp_abm_stats_calculate(&qdisc->mq.stats, &qdisc->mq.prev_stats,
+				stats->bstats, stats->qstats);
+
+	return qdisc->offloaded ? 0 : -EOPNOTSUPP;
+}
+
 int nfp_abm_setup_tc_mq(struct net_device *netdev, struct nfp_abm_link *alink,
 			struct tc_mq_qopt_offload *opt)
 {
 	switch (opt->command) {
 	case TC_MQ_CREATE:
-		nfp_abm_reset_root(netdev, alink, opt->handle,
-				   alink->total_queues);
-		nfp_abm_offload_update(alink->abm);
 		return nfp_abm_mq_create(netdev, alink, opt);
 	case TC_MQ_DESTROY:
 		nfp_abm_qdisc_destroy(netdev, alink, opt->handle);
-		if (opt->handle == alink->parent) {
-			nfp_abm_reset_root(netdev, alink, TC_H_ROOT, 0);
-			nfp_abm_offload_update(alink->abm);
-		}
 		return 0;
 	case TC_MQ_STATS:
-		return nfp_abm_mq_stats(alink, opt);
+		return nfp_abm_mq_stats(alink, opt->handle, &opt->stats);
 	case TC_MQ_GRAFT:
 		return nfp_abm_qdisc_graft(alink, opt->handle,
 					   opt->graft_params.child_handle,
@@ -597,5 +657,7 @@  int nfp_abm_setup_root(struct net_device *netdev, struct nfp_abm_link *alink,
 	if (alink->root_qdisc)
 		alink->root_qdisc->use_cnt++;
 
+	nfp_abm_qdisc_offload_update(alink);
+
 	return 0;
 }