diff mbox

[NET] Add proc file to display the state of all qdiscs.

Message ID alpine.DEB.1.10.0909011943430.12066@V090114053VZO-1
State Rejected, archived
Delegated to: David Miller
Headers show

Commit Message

Christoph Lameter Sept. 1, 2009, 11:52 p.m. UTC
[NET] Add proc file to display the state of all qdiscs

TC is a complicated tool and it currently does not allow the display of all
qdisc states. It does not support multiple tx queues and also not
localhost, nor does it display the current operating state of the queues.

This functionality could be added to tc / netlink but the tool is already
complex to handle. The simple proc file here allows easy scanning by
scripts and other tools. However, tc still needs to be updated to allow
the modifications of multiqueue TX settings. tc's main focus is the
configuration of qdiscs. The qdisc_stats file just shows the current
state.

This patch adds

	/proc/net/qdisc_stats

which displays the current state of all qdiscs on the system.

F.e.

$ cat /proc/net/qdisc_stats
Queue    Device  State   Bytes  Packets Qlen Blog   Drops Requeue Overlimit
TX0/root     lo   -          0        0    0    0       0       0       0
 RX/root     lo   -          0        0    0    0       0       0       0
TX0/root   eth0   -       5518       60    0    0       0       0       0
TX1/root   eth0   -       2549       37    0    0       0       0       0
TX2/root   eth0   -      63625      272    0    0       0       0       0
TX3/root   eth0   -       1580       21    0    0       0       0       0
TX4/root   eth0   R   88979440   260183    0 3532   43176    2111       0
TX5/root   eth0   -       4698       56    0    0       0       0       0
TX6/root   eth0   - 3598883129 10523140    0    0       0       0       0
TX7/root   eth0   -       1750       21    0    0       0       0       0


Signed-off-by: Christoph Lameter <cl@linux-foundation.org>

---
 net/sched/sch_api.c |  130 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 130 insertions(+)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Jarek Poplawski Sept. 2, 2009, 8:14 a.m. UTC | #1
[Resent with fixed netdev@ address]

On 02-09-2009 01:52, Christoph Lameter wrote:
> [NET] Add proc file to display the state of all qdiscs
> 
> TC is a complicated tool and it currently does not allow the display of all
> qdisc states. It does not support multiple tx queues and also not
> localhost, nor does it display the current operating state of the queues.

I think, tc should've no problem with displaying summary stats of
multiqueue qdiscs or even all of them separately, as mentioned by
Patrick. And, maybe I still miss something, but there should be
nothing special with tc vs. localhost either.

> 
> This functionality could be added to tc / netlink but the tool is already
> complex to handle. The simple proc file here allows easy scanning by
> scripts and other tools. However, tc still needs to be updated to allow
> the modifications of multiqueue TX settings. tc's main focus is the
> configuration of qdiscs. The qdisc_stats file just shows the current
> state.
> 
> This patch adds
> 
> 	/proc/net/qdisc_stats
> 
> which displays the current state of all qdiscs on the system.
> 
> F.e.
> 
> $ cat /proc/net/qdisc_stats
> Queue    Device  State   Bytes  Packets Qlen Blog   Drops Requeue Overlimit
> TX0/root     lo   -          0        0    0    0       0       0       0
>  RX/root     lo   -          0        0    0    0       0       0       0
> TX0/root   eth0   -       5518       60    0    0       0       0       0
> TX1/root   eth0   -       2549       37    0    0       0       0       0
> TX2/root   eth0   -      63625      272    0    0       0       0       0
> TX3/root   eth0   -       1580       21    0    0       0       0       0
> TX4/root   eth0   R   88979440   260183    0 3532   43176    2111       0
> TX5/root   eth0   -       4698       56    0    0       0       0       0
> TX6/root   eth0   - 3598883129 10523140    0    0       0       0       0
> TX7/root   eth0   -       1750       21    0    0       0       0       0

As I wrote earlier, it's a nice work, but since it makes a new API I
guess we shouldn't hurry with merging this patch, and publish it as an
RFC first.

Then my humble suggestions would be to reserve more space for most of
the columns to make it readable not only for scripts when more TX#,
bytes, packets etc. Users of non-default qdiscs would also miss things
like: q->ops->id, q->handle, and q->parent at least. Plus, as I
mentioned earlier, q->qstats.qlen update with q->q.qlen (or using it
directly) is needed.

Jarek P.

> 
> Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
> 
> ---
>  net/sched/sch_api.c |  130 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 130 insertions(+)
> 
> Index: linux-2.6/net/sched/sch_api.c
> ===================================================================
> --- linux-2.6.orig/net/sched/sch_api.c	2009-09-01 12:27:24.000000000 -0500
> +++ linux-2.6/net/sched/sch_api.c	2009-09-01 14:39:27.000000000 -0500
> @@ -1699,6 +1699,135 @@ static const struct file_operations psch
>  	.llseek = seq_lseek,
>  	.release = single_release,
>  };
> +
> +static void dump_qdisc(struct seq_file *seq, struct net_device *dev,
> +				struct Qdisc *q, char *inout, char *text)
> +{
> +	char state[5];
> +	char *p = state;
> +
> +	if (test_bit(__QDISC_STATE_RUNNING, &q->state))
> +		*p++ = 'R';
> +	if (test_bit(__QDISC_STATE_SCHED, &q->state))
> +		*p++ = 'S';
> +	if (test_bit(__QDISC_STATE_DEACTIVATED, &q->state))
> +		*p++ = 'D';
> +	if (q->state == 0)
> +		*p++ = '-';
> +	*p = 0;
> +
> +	seq_printf(seq, "%3s/%2s %6s %3s %10lld %8d %4d %4d %7d %7d %7d\n",
> +		inout, text, dev->name, state,
> +		q->bstats.bytes, q->bstats.packets,
> +		q->qstats.qlen, q->qstats.backlog, q->qstats.drops,
> +		q->qstats.requeues, q->qstats.overlimits);
> +}
> +
> +static void dump_qdisc_root(struct seq_file *seq, struct net_device *dev,
> +					 struct Qdisc *root, char *inout)
> +{
> +	struct Qdisc *q;
> +	int n = 0;
> +
> +	if (!root)
> +		return;
> +
> +	dump_qdisc(seq, dev, root, inout, "root");
> +
> +	list_for_each_entry(q, &root->list, list) {
> +		char buffer[10];
> +
> +		sprintf(buffer,"Q%d", ++n);
> +		dump_qdisc(seq, dev, q, inout, buffer);
> +	}
> +}
> +
> +
> +static void qdisc_seq_out(struct seq_file *seq, struct net_device *dev)
> +{
> +	struct netdev_queue *dev_queue;
> +	int i;
> +
> +	for (i = 0; i < dev->real_num_tx_queues; i++) {
> +		char buffer[10];
> +
> +		dev_queue = netdev_get_tx_queue(dev, i);
> +		sprintf(buffer, "TX%d", i);
> +		dump_qdisc_root(seq, dev, dev_queue->qdisc_sleeping, buffer);
> +	}
> +	dev_queue = &dev->rx_queue;
> +	dump_qdisc_root(seq, dev, dev_queue->qdisc_sleeping, "RX");
> +}
> +
> +static int qdisc_seq_show(struct seq_file *seq, void *v)
> +{
> +	if (v == SEQ_START_TOKEN) {
> +		seq_printf(seq, "Queue    Device  State   Bytes  Packets "
> +			"Qlen Blog   Drops Requeue Overlimit\n");
> +	} else
> +		qdisc_seq_out(seq, v);
> +
> +	return 0;
> +}
> +
> +static void *qdisc_seq_start(struct seq_file *seq, loff_t *pos)
> +	__acquires(dev_base_lock)
> +{
> +	struct net *net = seq_file_net(seq);
> +	loff_t off;
> +	struct net_device *dev;
> +
> +	read_lock(&dev_base_lock);
> +
> +	if (!*pos)
> +		return SEQ_START_TOKEN;
> +
> +	off = 1;
> +
> +	for_each_netdev(net, dev)
> +		if (off++ == *pos)
> +			return dev;
> +
> +	return NULL;
> +}
> +
> +static void *qdisc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
> +{
> +	struct net *net = seq_file_net(seq);
> +
> +	++*pos;
> +	return v == SEQ_START_TOKEN ?
> +		first_net_device(net) : next_net_device((struct net_device *)v);
> +}
> +
> +static void qdisc_seq_stop(struct seq_file *seq, void *v)
> +	__releases(dev_base_lock)
> +{
> +	read_unlock(&dev_base_lock);
> +}
> +
> +
> +static const struct seq_operations qdisc_seq_ops = {
> +	.start	= qdisc_seq_start,
> +	.next	= qdisc_seq_next,
> +	.stop	= qdisc_seq_stop,
> +	.show	= qdisc_seq_show,
> +};
> +
> +static int qdisc_open(struct inode *inode, struct file *file)
> +{
> +	return seq_open_net(inode, file, &qdisc_seq_ops,
> +			sizeof(struct seq_net_private));
> +}
> +
> +
> +static const struct file_operations qdisc_fops = {
> +	.owner = THIS_MODULE,
> +	.open = qdisc_open,
> +	.read = seq_read,
> +	.llseek = seq_lseek,
> +	.release = seq_release,
> +};
>  #endif
> 
>  static int __init pktsched_init(void)
> @@ -1706,6 +1835,7 @@ static int __init pktsched_init(void)
>  	register_qdisc(&pfifo_qdisc_ops);
>  	register_qdisc(&bfifo_qdisc_ops);
>  	proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
> +	proc_net_fops_create(&init_net, "qdisc_stats", 0, &qdisc_fops);
> 
>  	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
>  	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Sept. 2, 2009, 8:28 a.m. UTC | #2
Jarek Poplawski a écrit :
> [Resent with fixed netdev@ address]
> 
> On 02-09-2009 01:52, Christoph Lameter wrote:
>> [NET] Add proc file to display the state of all qdiscs
>>
>> TC is a complicated tool and it currently does not allow the display of all
>> qdisc states. It does not support multiple tx queues and also not
>> localhost, nor does it display the current operating state of the queues.
> 
> I think, tc should've no problem with displaying summary stats of
> multiqueue qdiscs or even all of them separately, as mentioned by
> Patrick. And, maybe I still miss something, but there should be
> nothing special with tc vs. localhost either.
>

I made a patch, but for a 8 queue device (bnx2), here is the "tc -s -d qdisc" result :

$ tc -s -d qdisc show
qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
 Sent 51814 bytes 459 pkt (dropped 0, overlimits 0 requeues 0)
 rate 0bit 0pps backlog 0b 0p requeues 0
qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
 Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
 rate 0bit 0pps backlog 0b 0p requeues 0
qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
 Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
 rate 0bit 0pps backlog 0b 0p requeues 0
qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
 Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
 rate 0bit 0pps backlog 0b 0p requeues 0
qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
 Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
 rate 0bit 0pps backlog 0b 0p requeues 0
qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
 Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
 rate 0bit 0pps backlog 0b 0p requeues 0
qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
 Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
 rate 0bit 0pps backlog 0b 0p requeues 0
qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
 Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
 rate 0bit 0pps backlog 0b 0p requeues 0


Same name "eth0" is displayed, that might confuse parsers...

What naming convention should we choose for multiqueue devices ?

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Sept. 2, 2009, 8:30 a.m. UTC | #3
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 02 Sep 2009 10:28:55 +0200

> What naming convention should we choose for multiqueue devices ?

We could give an index field to multiple root qdiscs assigned
to a device.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Sept. 2, 2009, 9:18 a.m. UTC | #4
On Wed, Sep 02, 2009 at 10:28:55AM +0200, Eric Dumazet wrote:
> Jarek Poplawski a écrit :
> > [Resent with fixed netdev@ address]
> > 
> > On 02-09-2009 01:52, Christoph Lameter wrote:
> >> [NET] Add proc file to display the state of all qdiscs
> >>
> >> TC is a complicated tool and it currently does not allow the display of all
> >> qdisc states. It does not support multiple tx queues and also not
> >> localhost, nor does it display the current operating state of the queues.
> > 
> > I think, tc should've no problem with displaying summary stats of
> > multiqueue qdiscs or even all of them separately, as mentioned by
> > Patrick. And, maybe I still miss something, but there should be
> > nothing special with tc vs. localhost either.
> >
> 
> I made a patch, but for a 8 queue device (bnx2), here is the "tc -s -d qdisc" result :
> 
> $ tc -s -d qdisc show
> qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
>  Sent 51814 bytes 459 pkt (dropped 0, overlimits 0 requeues 0)
>  rate 0bit 0pps backlog 0b 0p requeues 0
> qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>  rate 0bit 0pps backlog 0b 0p requeues 0
> qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>  rate 0bit 0pps backlog 0b 0p requeues 0
> qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>  rate 0bit 0pps backlog 0b 0p requeues 0
> qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>  rate 0bit 0pps backlog 0b 0p requeues 0
> qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>  rate 0bit 0pps backlog 0b 0p requeues 0
> qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>  rate 0bit 0pps backlog 0b 0p requeues 0
> qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>  rate 0bit 0pps backlog 0b 0p requeues 0
> 
> 
> Same name "eth0" is displayed, that might confuse parsers...
> 
> What naming convention should we choose for multiqueue devices ?
> 

Hmm... anything could break here something for somebody, so there is
still a (Patrick's) question if not sum it all? Otherwise, I wonder
about using the qdisc handle (tcm_handle>>16): there would be at
least one "pfifo_fast 0:" looking like proper root for somebody...

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Sept. 2, 2009, 9:33 a.m. UTC | #5
On Wed, Sep 02, 2009 at 09:18:54AM +0000, Jarek Poplawski wrote:
> On Wed, Sep 02, 2009 at 10:28:55AM +0200, Eric Dumazet wrote:
...
> > qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
> >  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
> >  rate 0bit 0pps backlog 0b 0p requeues 0
> > 
> > 
> > Same name "eth0" is displayed, that might confuse parsers...
> > 
> > What naming convention should we choose for multiqueue devices ?
> > 
> 
> Hmm... anything could break here something for somebody, so there is
> still a (Patrick's) question if not sum it all? Otherwise, I wonder
> about using the qdisc handle (tcm_handle>>16): there would be at
> least one "pfifo_fast 0:" looking like proper root for somebody...

I meant "proper" for pfifo_fast. On the other hand, I wonder why these
multiqueue qdisc handles can't be really given such unique per dev
(instead of per queue) handles?

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Sept. 2, 2009, 9:37 a.m. UTC | #6
On Wed, Sep 02, 2009 at 09:33:29AM +0000, Jarek Poplawski wrote:
> On Wed, Sep 02, 2009 at 09:18:54AM +0000, Jarek Poplawski wrote:
> > On Wed, Sep 02, 2009 at 10:28:55AM +0200, Eric Dumazet wrote:
> ...
> > > qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
> > >  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
> > >  rate 0bit 0pps backlog 0b 0p requeues 0
> > > 
> > > 
> > > Same name "eth0" is displayed, that might confuse parsers...
> > > 
> > > What naming convention should we choose for multiqueue devices ?
> > > 
> > 
> > Hmm... anything could break here something for somebody, so there is
> > still a (Patrick's) question if not sum it all? Otherwise, I wonder
> > about using the qdisc handle (tcm_handle>>16): there would be at
> > least one "pfifo_fast 0:" looking like proper root for somebody...
> 
> I meant "proper" for pfifo_fast. On the other hand, I wonder why these
> multiqueue qdisc handles can't be really given such unique per dev

should be:
multiqueue qdiscs can't be really given such unique per dev

> (instead of per queue) handles?
> 
> Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Patrick McHardy Sept. 2, 2009, 12:44 p.m. UTC | #7
Jarek Poplawski wrote:
> On Wed, Sep 02, 2009 at 09:33:29AM +0000, Jarek Poplawski wrote:
>> On Wed, Sep 02, 2009 at 09:18:54AM +0000, Jarek Poplawski wrote:
>>> On Wed, Sep 02, 2009 at 10:28:55AM +0200, Eric Dumazet wrote:
>> ...
>>>> qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
>>>>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>>>>  rate 0bit 0pps backlog 0b 0p requeues 0
>>>>
>>>>
>>>> Same name "eth0" is displayed, that might confuse parsers...
>>>>
>>>> What naming convention should we choose for multiqueue devices ?
>>>>
>>> Hmm... anything could break here something for somebody, so there is
>>> still a (Patrick's) question if not sum it all? Otherwise, I wonder
>>> about using the qdisc handle (tcm_handle>>16): there would be at
>>> least one "pfifo_fast 0:" looking like proper root for somebody...
>> I meant "proper" for pfifo_fast. On the other hand, I wonder why these
>> multiqueue qdisc handles can't be really given such unique per dev
> 
> should be:
> multiqueue qdiscs can't be really given such unique per dev
> 
>> (instead of per queue) handles?

In my opinion the best way would be to sum up the stats and display
them for the "main" qdisc to avoid any compatibility problems and
additionally dump each queue for informational purposes.

This raises a few more questions however. First of all, there's no
"main" qdisc, so if we just use the first one for the summed up stats,
the question is whether the parameters of all root qdiscs are the same.
Currently they should be since pfifo_fast doesn't support changing
parameters, but this might change in the future, in which case we
might be displaying "incorrect" parameters.

The next question would be whether and how to support changing
parameters of individual multiq qdiscs. Similar to dumps, when
changing qdisc parameters we always pick queue number 0. It we want
to support changing parameters of different queues, we could
replicate changes to all queues, which would avoid the problem
stated above, or we could add an optional identifier for the
queue number, or we could use a combination of both which only
replicates settings when no queue number is specified.

In the second and third case, one possibility to get around the
incorrect parameters for the "main" qdisc would be to display
a virtual (non-existant) qdisc as the root, which only contains
the stats. I don't think the default choice of root qdisc type
should be counted as belonging to the API and userspace should
be prepared to deal with this.

And finally, the TC API used to be symetrical in the sense that
you could replay a dump to the kernel to get the same configuration.
Dumping the individual queues would break that property unless
we also support creating and configuring them in a symetrical
fashion.

So here's something of a possible solution, assuming that at some
point we do want to support changing parameters for individual root
qdiscs and taking the above into account:

- when creating a qdisc on a multiqueue-device through the TC
  API, we keep the current behaviour and share it between all
  queues. Configuration changes don't need to be replicated since
  the qdisc is shared. Dumps only contain a single instance of
  the qdisc. This is exactly what is done today.

- for non-shared root qdiscs on a multiqueue device like the
  pfifo_fast qdiscs created by default, we dump a virtual root
  qdisc (multiqueue) which only contains the number of bands
  and the statistics and dump the real root qdiscs as children
  of that qdisc.

- to create or configure a qdisc for a specific queue number, we
  require the user to create the virtual root qdisc (which only
  sets a flag or something like that) and then address each queue
  number. When the virtual qdisc has been created, we don't
  replicate any changes. This restores symetry with dumps and
  allows to address individual queues.

This is just a first collection of thoughts and probably can be
improved. Comments welcome :)

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Lameter Sept. 2, 2009, 6:12 p.m. UTC | #8
On Wed, 2 Sep 2009, Jarek Poplawski wrote:

> I think, tc should've no problem with displaying summary stats of
> multiqueue qdiscs or even all of them separately, as mentioned by
> Patrick. And, maybe I still miss something, but there should be
> nothing special with tc vs. localhost either.

Ok. Can you come up with a patch? net/sched/sch_api.c can likely be
patches with the loop logic that Eric suggested earlier.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Lameter Sept. 2, 2009, 6:13 p.m. UTC | #9
On Wed, 2 Sep 2009, Eric Dumazet wrote:

> Same name "eth0" is displayed, that might confuse parsers...
>
> What naming convention should we choose for multiqueue devices ?

eth0/tx<number> ?


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Sept. 2, 2009, 7:49 p.m. UTC | #10
On Wed, Sep 02, 2009 at 01:12:55PM -0500, Christoph Lameter wrote:
> 
> On Wed, 2 Sep 2009, Jarek Poplawski wrote:
> 
> > I think, tc should've no problem with displaying summary stats of
> > multiqueue qdiscs or even all of them separately, as mentioned by
> > Patrick. And, maybe I still miss something, but there should be
> > nothing special with tc vs. localhost either.
> 
> Ok. Can you come up with a patch? net/sched/sch_api.c can likely be
> patches with the loop logic that Eric suggested earlier.

Yes, it's nice Eric and Patrick found time to fix it most properly.
 
Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Sept. 2, 2009, 9:27 p.m. UTC | #11
Christoph Lameter wrote, On 09/02/2009 01:52 AM:

> [NET] Add proc file to display the state of all qdiscs
> 
> TC is a complicated tool and it currently does not allow the display of all
> qdisc states. It does not support multiple tx queues and also not
> localhost, nor does it display the current operating state of the queues.
> 
> This functionality could be added to tc / netlink but the tool is already
> complex to handle. The simple proc file here allows easy scanning by
> scripts and other tools. However, tc still needs to be updated to allow
> the modifications of multiqueue TX settings. tc's main focus is the
> configuration of qdiscs. The qdisc_stats file just shows the current
> state.
> 
> This patch adds
> 
> 	/proc/net/qdisc_stats
> 
> which displays the current state of all qdiscs on the system.
> 
> F.e.
> 
> $ cat /proc/net/qdisc_stats
> Queue    Device  State   Bytes  Packets Qlen Blog   Drops Requeue Overlimit
> TX0/root     lo   -          0        0    0    0       0       0       0
>  RX/root     lo   -          0        0    0    0       0       0       0
> TX0/root   eth0   -       5518       60    0    0       0       0       0
> TX1/root   eth0   -       2549       37    0    0       0       0       0
> TX2/root   eth0   -      63625      272    0    0       0       0       0
> TX3/root   eth0   -       1580       21    0    0       0       0       0
> TX4/root   eth0   R   88979440   260183    0 3532   43176    2111       0
> TX5/root   eth0   -       4698       56    0    0       0       0       0
> TX6/root   eth0   - 3598883129 10523140    0    0       0       0       0
> TX7/root   eth0   -       1750       21    0    0       0       0       0
> 
> 
> Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
> 
> ---
>  net/sched/sch_api.c |  130 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 130 insertions(+)
> 
> Index: linux-2.6/net/sched/sch_api.c
> ===================================================================
> --- linux-2.6.orig/net/sched/sch_api.c	2009-09-01 12:27:24.000000000 -0500
> +++ linux-2.6/net/sched/sch_api.c	2009-09-01 14:39:27.000000000 -0500
...
> +static void qdisc_seq_out(struct seq_file *seq, struct net_device *dev)
> +{
> +	struct netdev_queue *dev_queue;
> +	int i;
> +
> +	for (i = 0; i < dev->real_num_tx_queues; i++) {
> +		char buffer[10];
> +
> +		dev_queue = netdev_get_tx_queue(dev, i);
> +		sprintf(buffer, "TX%d", i);
> +		dump_qdisc_root(seq, dev, dev_queue->qdisc_sleeping, buffer);
> +	}
...

Btw, Patrick's comments reminded me this is probably not what you want
in case of non-default qdiscs: the root qdisc like prio will be
repeated for each tx queue with the same stats. I guess you need to do
here an additional query e.g. by comparing dev_queue->qdisc_sleeping
with that of i = 0.

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jesper Dangaard Brouer Sept. 3, 2009, 2:19 p.m. UTC | #12
On Wed, 2 Sep 2009, Christoph Lameter wrote:
> On Wed, 2 Sep 2009, Eric Dumazet wrote:
>
>> Same name "eth0" is displayed, that might confuse parsers...
>>
>> What naming convention should we choose for multiqueue devices ?
>
> eth0/tx<number> ?

Remember that we already have a naming convention in /proc/interrupts

  eth0-tx-<number>

Lets not introduce too many new once ;-)

Cheers,
   Jesper Brouer

--
-------------------------------------------------------------------
MSc. Master of Computer Science
Dept. of Computer Science, University of Copenhagen
Author of http://www.adsl-optimizer.dk
-------------------------------------------------------------------
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Patrick McHardy Sept. 3, 2009, 2:29 p.m. UTC | #13
Jesper Dangaard Brouer wrote:
> 
> On Wed, 2 Sep 2009, Christoph Lameter wrote:
>> On Wed, 2 Sep 2009, Eric Dumazet wrote:
>>
>>> Same name "eth0" is displayed, that might confuse parsers...
>>>
>>> What naming convention should we choose for multiqueue devices ?
>>
>> eth0/tx<number> ?
> 
> Remember that we already have a naming convention in /proc/interrupts
> 
>  eth0-tx-<number>
> 
> Lets not introduce too many new once ;-)

The approach I'm currently working on will present multiqueue root
qdiscs as children of a dummy classful qdisc. This avoids handle
clashes and the need for new identifiers and allows to address each
qdisc seperately, similar to how it works with other classful qdiscs:

qdisc mq 1: root refcnt 2
 Sent 126 bytes 3 pkt (dropped 0, overlimits 0 requeues 0)
 rate 0bit 0pps backlog 0b 0p requeues 0

class mq 1:1 root leaf 8001:
 Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
class mq 1:2 root leaf 8002:
 Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
class mq 1:3 root leaf 8003:
 Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
class mq 1:4 root leaf 8004:
 Sent 126 bytes 3 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0

Its a bit tricky though so I'm likely going to need a few more hours.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Sept. 3, 2009, 2:43 p.m. UTC | #14
Patrick McHardy a écrit :
> Jesper Dangaard Brouer wrote:
>> On Wed, 2 Sep 2009, Christoph Lameter wrote:
>>> On Wed, 2 Sep 2009, Eric Dumazet wrote:
>>>
>>>> Same name "eth0" is displayed, that might confuse parsers...
>>>>
>>>> What naming convention should we choose for multiqueue devices ?
>>> eth0/tx<number> ?
>> Remember that we already have a naming convention in /proc/interrupts
>>
>>  eth0-tx-<number>
>>
>> Lets not introduce too many new once ;-)
> 
> The approach I'm currently working on will present multiqueue root
> qdiscs as children of a dummy classful qdisc. This avoids handle
> clashes and the need for new identifiers and allows to address each
> qdisc seperately, similar to how it works with other classful qdiscs:
> 
> qdisc mq 1: root refcnt 2
>  Sent 126 bytes 3 pkt (dropped 0, overlimits 0 requeues 0)
>  rate 0bit 0pps backlog 0b 0p requeues 0
> 
> class mq 1:1 root leaf 8001:
>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>  backlog 0b 0p requeues 0
> class mq 1:2 root leaf 8002:
>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>  backlog 0b 0p requeues 0
> class mq 1:3 root leaf 8003:
>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>  backlog 0b 0p requeues 0
> class mq 1:4 root leaf 8004:
>  Sent 126 bytes 3 pkt (dropped 0, overlimits 0 requeues 0)
>  backlog 0b 0p requeues 0
> 
> Its a bit tricky though so I'm likely going to need a few more hours.

Seems pretty cool, thanks Patrick.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jesper Dangaard Brouer Sept. 3, 2009, 5:30 p.m. UTC | #15
On Thu, 3 Sep 2009, Patrick McHardy wrote:

> Jesper Dangaard Brouer wrote:
>>
>> On Wed, 2 Sep 2009, Christoph Lameter wrote:
>>> On Wed, 2 Sep 2009, Eric Dumazet wrote:
>>>
>>>> Same name "eth0" is displayed, that might confuse parsers...
>>>>
>>>> What naming convention should we choose for multiqueue devices ?
>>>
>>> eth0/tx<number> ?
>>
>> Remember that we already have a naming convention in /proc/interrupts
>>
>>  eth0-tx-<number>
>>
>> Lets not introduce too many new once ;-)
>
> The approach I'm currently working on will present multiqueue root
> qdiscs as children of a dummy classful qdisc. This avoids handle
> clashes and the need for new identifiers and allows to address each
> qdisc seperately, similar to how it works with other classful qdiscs:

I like your approach. Its well suited for the qdiscs :-)

I especially like the possibility to access each qdisc seperately.  Does 
it then support having seperate qdisc per TX queue?  (I'm toying with the 
idea of transmitting our multicast traffic into/via a seperate TX hardware 
queue, and making a special qdisc for IPTV MPEG2-TS shaping)

Cheers,
   Jesper Brouer

--
-------------------------------------------------------------------
MSc. Master of Computer Science
Dept. of Computer Science, University of Copenhagen
Author of http://www.adsl-optimizer.dk
-------------------------------------------------------------------
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Patrick McHardy Sept. 3, 2009, 5:56 p.m. UTC | #16
Jesper Dangaard Brouer wrote:
> 
> On Thu, 3 Sep 2009, Patrick McHardy wrote:
> 
>> The approach I'm currently working on will present multiqueue root
>> qdiscs as children of a dummy classful qdisc. This avoids handle
>> clashes and the need for new identifiers and allows to address each
>> qdisc seperately, similar to how it works with other classful qdiscs:
> 
> I like your approach. Its well suited for the qdiscs :-)
> 
> I especially like the possibility to access each qdisc seperately.  Does
> it then support having seperate qdisc per TX queue?  (I'm toying with
> the idea of transmitting our multicast traffic into/via a seperate TX
> hardware queue, and making a special qdisc for IPTV MPEG2-TS shaping)

Yes, you can attach qdiscs to the classes representing the queues.
At least it should work :) It would probably also be possible to
use TC classifiers for queue selection.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Lameter Sept. 3, 2009, 6:03 p.m. UTC | #17
On Wed, 2 Sep 2009, Jarek Poplawski wrote:

> Btw, Patrick's comments reminded me this is probably not what you want
> in case of non-default qdiscs: the root qdisc like prio will be
> repeated for each tx queue with the same stats. I guess you need to do
> here an additional query e.g. by comparing dev_queue->qdisc_sleeping
> with that of i = 0.

Hmmm.. Maybe I better leave it to the experts then.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Sept. 3, 2009, 7:35 p.m. UTC | #18
On Thu, Sep 03, 2009 at 01:03:37PM -0500, Christoph Lameter wrote:
> On Wed, 2 Sep 2009, Jarek Poplawski wrote:
> 
> > Btw, Patrick's comments reminded me this is probably not what you want
> > in case of non-default qdiscs: the root qdisc like prio will be
> > repeated for each tx queue with the same stats. I guess you need to do
> > here an additional query e.g. by comparing dev_queue->qdisc_sleeping
> > with that of i = 0.
> 
> Hmmm.. Maybe I better leave it to the experts then.
> 

This is certainly not what I wanted... The experts missed this problem
long enough. It might only need to wait a bit for Patrick's changes.

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Sept. 3, 2009, 7:38 p.m. UTC | #19
Jarek Poplawski a écrit :
> On Thu, Sep 03, 2009 at 01:03:37PM -0500, Christoph Lameter wrote:
>> On Wed, 2 Sep 2009, Jarek Poplawski wrote:
>>
>>> Btw, Patrick's comments reminded me this is probably not what you want
>>> in case of non-default qdiscs: the root qdisc like prio will be
>>> repeated for each tx queue with the same stats. I guess you need to do
>>> here an additional query e.g. by comparing dev_queue->qdisc_sleeping
>>> with that of i = 0.
>> Hmmm.. Maybe I better leave it to the experts then.
>>
> 
> This is certainly not what I wanted... The experts missed this problem
> long enough. It might only need to wait a bit for Patrick's changes.

The experts ? Anyway, you can say "the expert", there is only one :)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Sept. 3, 2009, 7:54 p.m. UTC | #20
On Thu, Sep 03, 2009 at 09:38:54PM +0200, Eric Dumazet wrote:
> Jarek Poplawski a écrit :
> > On Thu, Sep 03, 2009 at 01:03:37PM -0500, Christoph Lameter wrote:
> >> On Wed, 2 Sep 2009, Jarek Poplawski wrote:
> >>
> >>> Btw, Patrick's comments reminded me this is probably not what you want
> >>> in case of non-default qdiscs: the root qdisc like prio will be
> >>> repeated for each tx queue with the same stats. I guess you need to do
> >>> here an additional query e.g. by comparing dev_queue->qdisc_sleeping
> >>> with that of i = 0.
> >> Hmmm.. Maybe I better leave it to the experts then.
> >>
> > 
> > This is certainly not what I wanted... The experts missed this problem
> > long enough. It might only need to wait a bit for Patrick's changes.
> 
> The experts ? Anyway, you can say "the expert", there is only one :)

Right! I admire Davem too... ;-)

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Sept. 3, 2009, 11:22 p.m. UTC | #21
From: Jesper Dangaard Brouer <hawk@diku.dk>
Date: Thu, 3 Sep 2009 16:19:44 +0200 (CEST)

> 
> On Wed, 2 Sep 2009, Christoph Lameter wrote:
>> On Wed, 2 Sep 2009, Eric Dumazet wrote:
>>
>>> Same name "eth0" is displayed, that might confuse parsers...
>>>
>>> What naming convention should we choose for multiqueue devices ?
>>
>> eth0/tx<number> ?
> 
> Remember that we already have a naming convention in /proc/interrupts
> 
>  eth0-tx-<number>
> 
> Lets not introduce too many new once ;-)

Yes, please :-)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Sept. 3, 2009, 11:28 p.m. UTC | #22
From: Jesper Dangaard Brouer <hawk@diku.dk>
Date: Thu, 3 Sep 2009 19:30:00 +0200 (CEST)

> I especially like the possibility to access each qdisc seperately.
> Does it then support having seperate qdisc per TX queue?  (I'm toying
> with the idea of transmitting our multicast traffic into/via a
> seperate TX hardware queue, and making a special qdisc for IPTV
> MPEG2-TS shaping)

Indeed it sounds interesting.

But I want to warn everyone against adding support for anything
that adds a way to do operations "on every device qdisc" at the
kernel level.

I tried to do that and the error unwinding complexity is unacceptable.

We can make the tools support things like this.  Keep the kernel
simple and allow it to only operate on one queue at a time for
a given individual config request.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Sept. 3, 2009, 11:31 p.m. UTC | #23
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 03 Sep 2009 19:56:33 +0200

> It would probably also be possible to use TC classifiers for queue
> selection.

You mean just like Intel's multiqueue scheduler and multiqueue
classifiers which we already have in the tree :-)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Patrick McHardy Sept. 4, 2009, 1:36 a.m. UTC | #24
David Miller wrote:
> From: Patrick McHardy <kaber@trash.net>
> Date: Thu, 03 Sep 2009 19:56:33 +0200
>
>   
>> It would probably also be possible to use TC classifiers for queue
>> selection.
>>     
>
> You mean just like Intel's multiqueue scheduler and multiqueue
> classifiers which we already have in the tree :-

No, actually integrated in the current infrastructure :) The patches
add a virtual root qdisc, so we still have a single root from userspace's
POV. This qdisc is only used for sch_api purposes and doesn't necessarily
match the real root qdiscs. When a "mq" qdisc is attached (done by
default for multiqueue devices), the queues are exposed as child
classes of this qdisc and can be individually grafted with different
qdiscs.

Since the virtual root is global per device, what we could do is allow
to attach classifiers and execute them in dev_pick_tx() to select the
queue. But currently this would require global locking, so this would
have to be fixed previously. Probably not an easy thing to do.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

Index: linux-2.6/net/sched/sch_api.c
===================================================================
--- linux-2.6.orig/net/sched/sch_api.c	2009-09-01 12:27:24.000000000 -0500
+++ linux-2.6/net/sched/sch_api.c	2009-09-01 14:39:27.000000000 -0500
@@ -1699,6 +1699,135 @@  static const struct file_operations psch
 	.llseek = seq_lseek,
 	.release = single_release,
 };
+
+static void dump_qdisc(struct seq_file *seq, struct net_device *dev,
+				struct Qdisc *q, char *inout, char *text)
+{
+	char state[5];
+	char *p = state;
+
+	if (test_bit(__QDISC_STATE_RUNNING, &q->state))
+		*p++ = 'R';
+	if (test_bit(__QDISC_STATE_SCHED, &q->state))
+		*p++ = 'S';
+	if (test_bit(__QDISC_STATE_DEACTIVATED, &q->state))
+		*p++ = 'D';
+	if (q->state == 0)
+		*p++ = '-';
+	*p = 0;
+
+	seq_printf(seq, "%3s/%2s %6s %3s %10lld %8d %4d %4d %7d %7d %7d\n",
+		inout, text, dev->name, state,
+		q->bstats.bytes, q->bstats.packets,
+		q->qstats.qlen, q->qstats.backlog, q->qstats.drops,
+		q->qstats.requeues, q->qstats.overlimits);
+}
+
+static void dump_qdisc_root(struct seq_file *seq, struct net_device *dev,
+					 struct Qdisc *root, char *inout)
+{
+	struct Qdisc *q;
+	int n = 0;
+
+	if (!root)
+		return;
+
+	dump_qdisc(seq, dev, root, inout, "root");
+
+	list_for_each_entry(q, &root->list, list) {
+		char buffer[10];
+
+		sprintf(buffer,"Q%d", ++n);
+		dump_qdisc(seq, dev, q, inout, buffer);
+	}
+}
+
+
+static void qdisc_seq_out(struct seq_file *seq, struct net_device *dev)
+{
+	struct netdev_queue *dev_queue;
+	int i;
+
+	for (i = 0; i < dev->real_num_tx_queues; i++) {
+		char buffer[10];
+
+		dev_queue = netdev_get_tx_queue(dev, i);
+		sprintf(buffer, "TX%d", i);
+		dump_qdisc_root(seq, dev, dev_queue->qdisc_sleeping, buffer);
+	}
+	dev_queue = &dev->rx_queue;
+	dump_qdisc_root(seq, dev, dev_queue->qdisc_sleeping, "RX");
+}
+
+static int qdisc_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq, "Queue    Device  State   Bytes  Packets "
+			"Qlen Blog   Drops Requeue Overlimit\n");
+	} else
+		qdisc_seq_out(seq, v);
+
+	return 0;
+}
+
+static void *qdisc_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(dev_base_lock)
+{
+	struct net *net = seq_file_net(seq);
+	loff_t off;
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+
+	if (!*pos)
+		return SEQ_START_TOKEN;
+
+	off = 1;
+
+	for_each_netdev(net, dev)
+		if (off++ == *pos)
+			return dev;
+
+	return NULL;
+}
+
+static void *qdisc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct net *net = seq_file_net(seq);
+
+	++*pos;
+	return v == SEQ_START_TOKEN ?
+		first_net_device(net) : next_net_device((struct net_device *)v);
+}
+
+static void qdisc_seq_stop(struct seq_file *seq, void *v)
+	__releases(dev_base_lock)
+{
+	read_unlock(&dev_base_lock);
+}
+
+
+static const struct seq_operations qdisc_seq_ops = {
+	.start	= qdisc_seq_start,
+	.next	= qdisc_seq_next,
+	.stop	= qdisc_seq_stop,
+	.show	= qdisc_seq_show,
+};
+
+static int qdisc_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &qdisc_seq_ops,
+			sizeof(struct seq_net_private));
+}
+
+
+static const struct file_operations qdisc_fops = {
+	.owner = THIS_MODULE,
+	.open = qdisc_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
 #endif

 static int __init pktsched_init(void)
@@ -1706,6 +1835,7 @@  static int __init pktsched_init(void)
 	register_qdisc(&pfifo_qdisc_ops);
 	register_qdisc(&bfifo_qdisc_ops);
 	proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
+	proc_net_fops_create(&init_net, "qdisc_stats", 0, &qdisc_fops);

 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);