diff mbox

[NET] Add proc file to display the state of all qdiscs.

Message ID 4AA070BA.9000105@trash.net
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Patrick McHardy Sept. 4, 2009, 1:43 a.m. UTC
Patrick McHardy wrote:
> David Miller wrote:
>   
>> You mean just like Intel's multiqueue scheduler and multiqueue
>> classifiers which we already have in the tree :-
>>     
>
> No, actually integrated in the current infrastructure :) The patches
> add a virtual root qdisc, so we still have a single root from userspace's
> POV. This qdisc is only used for sch_api purposes and doesn't necessarily
> match the real root qdiscs. When a "mq" qdisc is attached (done by
> default for multiqueue devices), the queues are exposed as child
> classes of this qdisc and can be individually grafted with different
> qdiscs.
>   

Since its getting late and I won't finish this tonight, here are the
patches I'm currently working on in case someone already wants to have
a look. Its working as intended and should be testable. Patch 1 is good
shape IMO, patch 3 still needs some work. Userspace needs no changes.
diff mbox

Patch

commit 7f8e95c412644cc23173c9c5673df794fb9b65f7
Author: Patrick McHardy <kaber@trash.net>
Date:   Fri Sep 4 03:24:28 2009 +0200

    net_sched: add classful multiqueue dummy scheduler
    
    This patch adds a classful dummy scheduler which can be used as root qdisc
    for multiqueue devices and exposes each device queue as a child class.
    
    This allows to address queues individually and graft them similar to regular
    classes. Additionally it presents an accumulated view of the statistics of
    all real root qdiscs in the dummy root.
    
    Signed-off-by: Patrick McHardy <kaber@trash.net>

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a1a3f3f..b76e4fa 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -122,6 +122,7 @@  struct Qdisc_ops
 	void			(*reset)(struct Qdisc *);
 	void			(*destroy)(struct Qdisc *);
 	int			(*change)(struct Qdisc *, struct nlattr *arg);
+	void			(*attach)(struct Qdisc *);
 
 	int			(*dump)(struct Qdisc *, struct sk_buff *);
 	int			(*dump_stats)(struct Qdisc *, struct gnet_dump *);
@@ -255,6 +256,8 @@  static inline void sch_tree_unlock(struct Qdisc *q)
 
 extern struct Qdisc noop_qdisc;
 extern struct Qdisc_ops noop_qdisc_ops;
+extern struct Qdisc_ops pfifo_fast_ops;
+extern struct Qdisc_ops mq_qdisc_ops;
 
 struct Qdisc_class_common
 {
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 54d950c..f14e71b 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -2,7 +2,7 @@ 
 # Makefile for the Linux Traffic Control Unit.
 #
 
-obj-y	:= sch_generic.o
+obj-y	:= sch_generic.o sch_mq.o
 
 obj-$(CONFIG_NET_SCHED)		+= sch_api.o sch_blackhole.o
 obj-$(CONFIG_NET_CLS)		+= cls_api.o
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c0d2704..dc41cb1 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -678,6 +678,11 @@  static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 		if (dev->flags & IFF_UP)
 			dev_deactivate(dev);
 
+		if (new && new->ops->attach) {
+			new->ops->attach(new);
+			num_q = 0;
+		}
+
 		for (i = 0; i < num_q; i++) {
 			struct netdev_queue *dev_queue = &dev->rx_queue;
 
@@ -692,7 +697,7 @@  static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 		}
 
 		notify_and_destroy(skb, n, classid, dev->qdisc, new);
-		if (new)
+		if (new && !new->ops->attach)
 			atomic_inc(&new->refcnt);
 		dev->qdisc = new ? : &noop_qdisc;
 
@@ -1670,6 +1675,7 @@  static int __init pktsched_init(void)
 {
 	register_qdisc(&pfifo_qdisc_ops);
 	register_qdisc(&bfifo_qdisc_ops);
+	register_qdisc(&mq_qdisc_ops);
 	proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
 
 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e7c47ce..4ae6aa5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -514,7 +514,7 @@  static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
 	return 0;
 }
 
-static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
+struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 	.id		=	"pfifo_fast",
 	.priv_size	=	sizeof(struct pfifo_fast_priv),
 	.enqueue	=	pfifo_fast_enqueue,
@@ -670,6 +670,26 @@  static void attach_one_default_qdisc(struct net_device *dev,
 	dev_queue->qdisc_sleeping = qdisc;
 }
 
+static void attach_default_qdiscs(struct net_device *dev)
+{
+	struct netdev_queue *txq;
+	struct Qdisc *qdisc;
+
+	txq = netdev_get_tx_queue(dev, 0);
+
+	if (!netif_is_multiqueue(dev) || dev->tx_queue_len == 0) {
+		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
+		dev->qdisc = txq->qdisc_sleeping;
+		atomic_inc(&dev->qdisc->refcnt);
+	} else {
+		qdisc = qdisc_create_dflt(dev, txq, &mq_qdisc_ops, TC_H_ROOT);
+		if (qdisc) {
+			qdisc->ops->attach(qdisc);
+			dev->qdisc = qdisc;
+		}
+	}
+}
+
 static void transition_one_qdisc(struct net_device *dev,
 				 struct netdev_queue *dev_queue,
 				 void *_need_watchdog)
@@ -689,7 +709,6 @@  static void transition_one_qdisc(struct net_device *dev,
 
 void dev_activate(struct net_device *dev)
 {
-	struct netdev_queue *txq;
 	int need_watchdog;
 
 	/* No queueing discipline is attached to device;
@@ -698,13 +717,8 @@  void dev_activate(struct net_device *dev)
 	   virtual interfaces
 	 */
 
-	if (dev->qdisc == &noop_qdisc) {
-		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
-
-		txq = netdev_get_tx_queue(dev, 0);
-		dev->qdisc = txq->qdisc_sleeping;
-		atomic_inc(&dev->qdisc->refcnt);
-	}
+	if (dev->qdisc == &noop_qdisc)
+		attach_default_qdiscs(dev);
 
 	if (!netif_carrier_ok(dev))
 		/* Delay activation until next carrier-on event */
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
new file mode 100644
index 0000000..fadaa6d
--- /dev/null
+++ b/net/sched/sch_mq.c
@@ -0,0 +1,244 @@ 
+/*
+ * net/sched/sch_mq.c		Classful multiqueue dummy scheduler
+ *
+ * Copyright (c) 2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+struct mq_sched {
+	struct Qdisc		**qdiscs;
+};
+
+static int mq_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mq_sched *priv = qdisc_priv(sch);
+	struct netdev_queue *dev_queue;
+	struct Qdisc *qdisc;
+	unsigned int ntx;
+
+	if (sch->parent != TC_H_ROOT)
+		return -EOPNOTSUPP;
+
+	if (!netif_is_multiqueue(dev))
+		return -EOPNOTSUPP;
+
+	/* pre-allocate qdiscs, attachment can't fail */
+	priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
+			       GFP_KERNEL);
+	if (priv->qdiscs == NULL)
+		return -ENOMEM;
+
+	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+		dev_queue = netdev_get_tx_queue(dev, ntx);
+		qdisc = qdisc_create_dflt(dev, dev_queue, &pfifo_fast_ops,
+					  TC_H_MAKE(TC_H_MAJ(sch->handle),
+						    TC_H_MIN(ntx + 1)));
+		if (qdisc == NULL)
+			goto err;
+		qdisc->flags |= TCQ_F_CAN_BYPASS;
+		priv->qdiscs[ntx] = qdisc;
+	}
+
+	return 0;
+
+err:
+	while (ntx > 0)
+		qdisc_destroy(priv->qdiscs[--ntx]);
+	kfree(priv->qdiscs);
+	return -ENOMEM;
+}
+
+static void mq_attach(struct Qdisc *sch)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mq_sched *priv = qdisc_priv(sch);
+	struct Qdisc *qdisc;
+	unsigned int ntx;
+
+	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+		qdisc = priv->qdiscs[ntx];
+		qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+		if (qdisc)
+			qdisc_destroy(qdisc);
+	}
+	kfree(priv->qdiscs);
+}
+
+static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct Qdisc *qdisc;
+	unsigned int ntx;
+
+	sch->q.qlen = 0;
+	memset(&sch->bstats, 0, sizeof(sch->bstats));
+	memset(&sch->qstats, 0, sizeof(sch->qstats));
+
+	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+		qdisc = netdev_get_tx_queue(dev, ntx)->qdisc;
+		sch->q.qlen		+= qdisc->q.qlen;
+		sch->bstats.bytes	+= qdisc->bstats.bytes;
+		sch->bstats.packets	+= qdisc->bstats.packets;
+		sch->qstats.qlen	+= qdisc->qstats.qlen;
+		sch->qstats.backlog	+= qdisc->qstats.backlog;
+		sch->qstats.drops	+= qdisc->qstats.drops;
+		sch->qstats.requeues	+= qdisc->qstats.requeues;
+		sch->qstats.overlimits	+= qdisc->qstats.overlimits;
+	}
+	return 0;
+}
+
+static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long arg)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned long ntx = arg - 1;
+
+	if (ntx >= dev->num_tx_queues)
+		return NULL;
+	return netdev_get_tx_queue(dev, ntx);
+}
+
+static int mq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		    struct Qdisc **old)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct netdev_queue *dev_queue;
+
+	dev_queue = mq_queue_get(sch, arg);
+	if (dev_queue == NULL)
+		return -ENOENT;
+
+	if (dev->flags & IFF_UP)
+		dev_deactivate(dev);
+
+	*old = dev_graft_qdisc(dev_queue, new);
+
+	if (dev->flags & IFF_UP)
+		dev_activate(dev);
+	return 0;
+}
+
+static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct netdev_queue *dev_queue;
+
+	dev_queue = mq_queue_get(sch, arg);
+	if (dev_queue == NULL)
+		return NULL;
+
+	return dev_queue->qdisc;
+}
+
+static unsigned long mq_get(struct Qdisc *sch, u32 classid)
+{
+	unsigned int ntx = TC_H_MIN(classid);
+
+	if (!mq_queue_get(sch, ntx))
+		return 0;
+	return ntx;
+}
+
+static void mq_put(struct Qdisc *sch, unsigned long cl)
+{
+	return;
+}
+
+static int mq_change(struct Qdisc *sch, u32 handle, u32 parent,
+		     struct nlattr **tca, unsigned long *arg)
+{
+	if (!mq_queue_get(sch, *arg))
+		return -ENOENT;
+	return 0;
+}
+
+static int mq_delete(struct Qdisc *sch, unsigned long cl)
+{
+	return -EOPNOTSUPP;
+}
+
+static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
+			 struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct netdev_queue *dev_queue;
+
+	dev_queue = mq_queue_get(sch, cl);
+	if (dev_queue == NULL)
+		return -ENOENT;
+
+	tcm->tcm_parent = TC_H_ROOT;
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	if (dev_queue->qdisc)
+		tcm->tcm_info = dev_queue->qdisc->handle;
+	return 0;
+}
+
+static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+			       struct gnet_dump *d)
+{
+	struct netdev_queue *dev_queue;
+
+	dev_queue = mq_queue_get(sch, cl);
+	sch = dev_queue->qdisc_sleeping;
+
+	if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
+	    gnet_stats_copy_queue(d, &sch->qstats) < 0)
+		return -1;
+	return 0;
+}
+
+static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned int ntx;
+
+	if (arg->stop)
+		return;
+
+	arg->count = arg->skip;
+	for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
+		if (arg->fn(sch, ntx + 1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static const struct Qdisc_class_ops mq_class_ops = {
+	.graft		= mq_graft,
+	.leaf		= mq_leaf,
+	.get		= mq_get,
+	.put		= mq_put,
+	.change		= mq_change,
+	.delete		= mq_delete,
+	.walk		= mq_walk,
+	.tcf_chain	= NULL,
+	.bind_tcf	= NULL,
+	.unbind_tcf	= NULL,
+	.dump		= mq_dump_class,
+	.dump_stats	= mq_dump_class_stats,
+};
+
+struct Qdisc_ops mq_qdisc_ops __read_mostly = {
+	.cl_ops		= &mq_class_ops,
+	.id		= "mq",
+	.priv_size	= sizeof(struct mq_sched),
+	.init		= mq_init,
+	//.reset	= mq_reset,
+	//.destroy	= mq_destroy,
+	.attach		= mq_attach,
+	.dump		= mq_dump,
+	.owner		= THIS_MODULE,
+};