Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/490/?format=api
{ "id": 490, "url": "http://patchwork.ozlabs.org/api/patches/490/?format=api", "web_url": "http://patchwork.ozlabs.org/project/netdev/patch/20080918063036.27934.91273.stgit@localhost.localdomain/", "project": { "id": 7, "url": "http://patchwork.ozlabs.org/api/projects/7/?format=api", "name": "Linux network development", "link_name": "netdev", "list_id": "netdev.vger.kernel.org", "list_email": "netdev@vger.kernel.org", "web_url": null, "scm_url": null, "webscm_url": null, "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20080918063036.27934.91273.stgit@localhost.localdomain>", "list_archive_url": null, "date": "2008-09-18T06:43:02", "name": "[RFC] sched: only dequeue if packet can be queued to hardware queue.", "commit_ref": null, "pull_url": null, "state": "rfc", "archived": true, "hash": "6a7d1488e68d69261e6a83f18d690c8b804e2f01", "submitter": { "id": 251, "url": "http://patchwork.ozlabs.org/api/people/251/?format=api", "name": "Duyck, Alexander H", "email": "alexander.h.duyck@intel.com" }, "delegate": { "id": 34, "url": "http://patchwork.ozlabs.org/api/users/34/?format=api", "username": "davem", "first_name": "David", "last_name": "Miller", "email": "davem@davemloft.net" }, "mbox": "http://patchwork.ozlabs.org/project/netdev/patch/20080918063036.27934.91273.stgit@localhost.localdomain/mbox/", "series": [], "comments": "http://patchwork.ozlabs.org/api/patches/490/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/490/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<netdev-owner@vger.kernel.org>", "X-Original-To": "patchwork-incoming@ozlabs.org", "Delivered-To": "patchwork-incoming@ozlabs.org", "Received": [ "from vger.kernel.org (vger.kernel.org [209.132.176.167])\n\tby ozlabs.org (Postfix) with ESMTP id 6A7B0DDE17\n\tfor <patchwork-incoming@ozlabs.org>;\n\tThu, 18 Sep 2008 16:43:20 +1000 (EST)", "(majordomo@vger.kernel.org) by vger.kernel.org via listexpand\n\tid S1752628AbYIRGnM (ORCPT <rfc822;patchwork-incoming@ozlabs.org>);\n\tThu, 18 Sep 2008 02:43:12 -0400", "(majordomo@vger.kernel.org) by vger.kernel.org id S1752503AbYIRGnL\n\t(ORCPT <rfc822; netdev-outgoing>); Thu, 18 Sep 2008 02:43:11 -0400", "from mga11.intel.com ([192.55.52.93]:63296 \"EHLO mga11.intel.com\"\n\trhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP\n\tid S1752333AbYIRGnI (ORCPT <rfc822;netdev@vger.kernel.org>);\n\tThu, 18 Sep 2008 02:43:08 -0400", "from fmsmga002.fm.intel.com ([10.253.24.26])\n\tby fmsmga102.fm.intel.com with ESMTP; 17 Sep 2008 23:40:17 -0700", "from orsmsx334.amr.corp.intel.com (HELO orsmsx334.jf.intel.com)\n\t([10.22.226.45])\n\tby fmsmga002.fm.intel.com with ESMTP; 17 Sep 2008 23:40:05 -0700", "from localhost.localdomain ([10.23.35.60]) by\n\torsmsx334.jf.intel.com with Microsoft SMTPSVC(6.0.3790.1830); \n\tWed, 17 Sep 2008 23:43:05 -0700", "from localhost.localdomain (gitlad [127.0.0.1])\n\tby localhost.localdomain (8.14.2/8.14.2) with ESMTP id m8I6h25J028036;\n\tWed, 17 Sep 2008 23:43:02 -0700" ], "X-ExtLoop1": "1", "X-IronPort-AV": "E=Sophos;i=\"4.32,419,1217833200\"; d=\"scan'208\";a=\"381631412\"", "From": "Alexander Duyck <alexander.h.duyck@intel.com>", "Subject": "[RFC PATCH] sched: only dequeue if packet can be queued to hardware\n\tqueue.", "To": "netdev@vger.kernel.org", "Cc": "jarkao2@gmail.com, herbert@gondor.apana.org.au, davem@daveloft.net,\n\tkaber@trash.net", "Date": "Wed, 17 Sep 2008 23:43:02 -0700", "Message-ID": "<20080918063036.27934.91273.stgit@localhost.localdomain>", "User-Agent": "StGIT/0.14.2", "MIME-Version": "1.0", "Content-Type": "text/plain; charset=\"utf-8\"", "Content-Transfer-Encoding": "7bit", "X-OriginalArrivalTime": "18 Sep 2008 06:43:05.0915 (UTC)\n\tFILETIME=[CE3540B0:01C91959]", "Sender": "netdev-owner@vger.kernel.org", "Precedence": "bulk", "List-ID": "<netdev.vger.kernel.org>", "X-Mailing-List": "netdev@vger.kernel.org" }, "content": "This this patch is mangled I appologize, this is my first try sending\na patch directly to netdev.\n\nThe patch below is my attempt to resolve the issue found with qdisc_run \nonly checking the state of queue zero before running. This approach \nessentially makes the qdisc layer smart enough to do it's own check to \nsee if a hw queue is stopped instead of relying on other calls to check \nbeforehand.\n\nI have been able to verify functionality for most qdiscs with the\nexceptions of netem, red, sfq, and tbf. I am not familiar with the\noperation of these qdiscs and so I am not certain how to avoid the high \ndrop rate I am currently seeing when using these qdiscs.\n\nThe main advantages of this patch can be seen using a netperf UDP_STREAM\ntest to a slow interface with multiple queues and a qdisc such as pfifo,\nbfifo, or prio. For my testing I used an 82575 with 4 queues on a\nsystem with 8 cpus. When any queue other than 0 was used in the old \nmethod the cpu utilization for one core would go to 100%, using this new \napproach the cpu utilization for all queues was at the same level queue \n0 was with the old approach.", "diff": "diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h\nindex b786a5b..4082f39 100644\n--- a/include/net/pkt_sched.h\n+++ b/include/net/pkt_sched.h\n@@ -90,10 +90,7 @@ extern void __qdisc_run(struct Qdisc *q);\n \n static inline void qdisc_run(struct Qdisc *q)\n {\n-\tstruct netdev_queue *txq = q->dev_queue;\n-\n-\tif (!netif_tx_queue_stopped(txq) &&\n-\t !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state))\n+\tif (!test_and_set_bit(__QDISC_STATE_RUNNING, &q->state))\n \t\t__qdisc_run(q);\n }\n \ndiff --git a/include/net/sch_generic.h b/include/net/sch_generic.h\nindex e556962..4400a18 100644\n--- a/include/net/sch_generic.h\n+++ b/include/net/sch_generic.h\n@@ -45,6 +45,7 @@ struct Qdisc\n #define TCQ_F_BUILTIN\t1\n #define TCQ_F_THROTTLED\t2\n #define TCQ_F_INGRESS\t4\n+#define TCQ_F_STOPPED\t8\n \tint\t\t\tpadded;\n \tstruct Qdisc_ops\t*ops;\n \tstruct qdisc_size_table\t*stab;\n@@ -110,6 +111,7 @@ struct Qdisc_ops\n \n \tint \t\t\t(*enqueue)(struct sk_buff *, struct Qdisc *);\n \tstruct sk_buff *\t(*dequeue)(struct Qdisc *);\n+\tstruct sk_buff *\t(*smart_dequeue)(struct Qdisc *);\n \tint \t\t\t(*requeue)(struct sk_buff *, struct Qdisc *);\n \tunsigned int\t\t(*drop)(struct Qdisc *);\n \n@@ -399,6 +401,31 @@ static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch)\n \treturn __qdisc_enqueue_tail(skb, sch, &sch->q);\n }\n \n+static inline struct sk_buff *__qdisc_smart_dequeue(struct Qdisc *sch,\n+\t\t\t\t\t\t struct sk_buff_head *list)\n+{\n+\tstruct sk_buff *skb = skb_peek(list);\n+\tstruct netdev_queue *txq;\n+\n+\tif (!skb)\n+\t\treturn NULL;\n+\n+\ttxq = netdev_get_tx_queue(qdisc_dev(sch), skb_get_queue_mapping(skb));\n+\tif (netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq)) {\n+\t\tsch->flags |= TCQ_F_STOPPED;\n+\t\treturn NULL;\n+\t}\n+\t__skb_unlink(skb, list);\n+\tsch->qstats.backlog -= qdisc_pkt_len(skb);\n+\tsch->flags &= ~TCQ_F_STOPPED;\n+\treturn skb;\n+}\n+\n+static inline struct sk_buff *qdisc_smart_dequeue(struct Qdisc *sch)\n+{\n+\treturn __qdisc_smart_dequeue(sch, &sch->q);\n+}\n+\n static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch,\n \t\t\t\t\t\t struct sk_buff_head *list)\n {\ndiff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c\nindex 43d3725..91a40b2 100644\n--- a/net/sched/sch_atm.c\n+++ b/net/sched/sch_atm.c\n@@ -516,12 +516,31 @@ static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)\n \n \tpr_debug(\"atm_tc_dequeue(sch %p,[qdisc %p])\\n\", sch, p);\n \ttasklet_schedule(&p->task);\n-\tskb = p->link.q->dequeue(p->link.q);\n+\tskb = p->link.q->ops->dequeue(p->link.q);\n \tif (skb)\n \t\tsch->q.qlen--;\n \treturn skb;\n }\n \n+static struct sk_buff *atm_tc_smart_dequeue(struct Qdisc *sch)\n+{\n+\tstruct atm_qdisc_data *p = qdisc_priv(sch);\n+\tstruct sk_buff *skb;\n+\n+\tpr_debug(\"atm_tc_smart_dequeue(sch %p,[qdisc %p])\\n\", sch, p);\n+\ttasklet_schedule(&p->task);\n+\tskb = p->link.q->dequeue(p->link.q);\n+\tif (skb) {\n+\t\tsch->q.qlen--;\n+\t\tsch->flags &= ~TCQ_F_STOPPED;\n+\t} else {\n+\t\tif (p->link.q->flags & TCQ_F_STOPPED)\n+\t\t\tsch->flags |= TCQ_F_STOPPED;\n+\t}\n+\n+\treturn skb;\n+}\n+\n static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)\n {\n \tstruct atm_qdisc_data *p = qdisc_priv(sch);\n@@ -694,6 +713,7 @@ static struct Qdisc_ops atm_qdisc_ops __read_mostly = {\n \t.priv_size\t= sizeof(struct atm_qdisc_data),\n \t.enqueue\t= atm_tc_enqueue,\n \t.dequeue\t= atm_tc_dequeue,\n+\t.smart_dequeue\t= atm_tc_smart_dequeue,\n \t.requeue\t= atm_tc_requeue,\n \t.drop\t\t= atm_tc_drop,\n \t.init\t\t= atm_tc_init,\ndiff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c\nindex 507fb48..48e6909 100644\n--- a/net/sched/sch_blackhole.c\n+++ b/net/sched/sch_blackhole.c\n@@ -33,6 +33,7 @@ static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = {\n \t.priv_size\t= 0,\n \t.enqueue\t= blackhole_enqueue,\n \t.dequeue\t= blackhole_dequeue,\n+\t.smart_dequeue\t= blackhole_dequeue,\n \t.owner\t\t= THIS_MODULE,\n };\n \ndiff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c\nindex 8b06fa9..5ec6040 100644\n--- a/net/sched/sch_cbq.c\n+++ b/net/sched/sch_cbq.c\n@@ -851,7 +851,7 @@ cbq_under_limit(struct cbq_class *cl)\n }\n \n static __inline__ struct sk_buff *\n-cbq_dequeue_prio(struct Qdisc *sch, int prio)\n+cbq_dequeue_prio(struct Qdisc *sch, int prio, int *stopped)\n {\n \tstruct cbq_sched_data *q = qdisc_priv(sch);\n \tstruct cbq_class *cl_tail, *cl_prev, *cl;\n@@ -881,7 +881,10 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)\n \t\t\t\tgoto next_class;\n \t\t\t}\n \n-\t\t\tskb = cl->q->dequeue(cl->q);\n+\t\t\tif (stopped)\n+\t\t\t\tskb = cl->q->dequeue(cl->q);\n+\t\t\telse\n+\t\t\t\tskb = cl->q->ops->dequeue(cl->q);\n \n \t\t\t/* Class did not give us any skb :-(\n \t\t\t It could occur even if cl->q->q.qlen != 0\n@@ -912,6 +915,11 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)\n \t\t\treturn skb;\n \n skip_class:\n+\t\t\tif (stopped && (cl->q->flags & TCQ_F_STOPPED)) {\n+\t\t\t\t*stopped = true;\n+\t\t\t\treturn NULL;\n+\t\t\t}\n+\n \t\t\tif (cl->q->q.qlen == 0 || prio != cl->cpriority) {\n \t\t\t\t/* Class is empty or penalized.\n \t\t\t\t Unlink it from active chain.\n@@ -964,7 +972,7 @@ cbq_dequeue_1(struct Qdisc *sch)\n \twhile (activemask) {\n \t\tint prio = ffz(~activemask);\n \t\tactivemask &= ~(1<<prio);\n-\t\tskb = cbq_dequeue_prio(sch, prio);\n+\t\tskb = cbq_dequeue_prio(sch, prio, NULL);\n \t\tif (skb)\n \t\t\treturn skb;\n \t}\n@@ -1048,6 +1056,109 @@ cbq_dequeue(struct Qdisc *sch)\n \treturn NULL;\n }\n \n+static __inline__ struct sk_buff *\n+cbq_smart_dequeue_1(struct Qdisc *sch)\n+{\n+\tstruct cbq_sched_data *q = qdisc_priv(sch);\n+\tstruct sk_buff *skb;\n+\tunsigned activemask;\n+\tint stopped = false;\n+\n+\tactivemask = q->activemask&0xFF;\n+\twhile (activemask) {\n+\t\tint prio = ffz(~activemask);\n+\t\tactivemask &= ~(1<<prio);\n+\t\tskb = cbq_dequeue_prio(sch, prio, &stopped);\n+\t\tif (skb)\n+\t\t\treturn skb;\n+\t\tif (stopped) {\n+\t\t\tsch->flags |= TCQ_F_STOPPED;\n+\t\t\tbreak;\n+\t\t}\n+\t}\n+\treturn NULL;\n+}\n+\n+static struct sk_buff *\n+cbq_smart_dequeue(struct Qdisc *sch)\n+{\n+\tstruct sk_buff *skb;\n+\tstruct cbq_sched_data *q = qdisc_priv(sch);\n+\tpsched_time_t now;\n+\tpsched_tdiff_t incr;\n+\n+\tnow = psched_get_time();\n+\tincr = now - q->now_rt;\n+\n+\tif (q->tx_class) {\n+\t\tpsched_tdiff_t incr2;\n+\t\t/* Time integrator. We calculate EOS time\n+\t\t by adding expected packet transmission time.\n+\t\t If real time is greater, we warp artificial clock,\n+\t\t so that:\n+\n+\t\t cbq_time = max(real_time, work);\n+\t\t */\n+\t\tincr2 = L2T(&q->link, q->tx_len);\n+\t\tq->now += incr2;\n+\t\tcbq_update(q);\n+\t\tincr -= incr2;\n+\t\tif (incr < 0)\n+\t\t\tincr = 0;\n+\t}\n+\tq->now += incr;\n+\tq->now_rt = now;\n+\n+\tfor (;;) {\n+\t\tq->wd_expires = 0;\n+\n+\t\tskb = cbq_smart_dequeue_1(sch);\n+\t\tif (skb) {\n+\t\t\tsch->q.qlen--;\n+\t\t\tsch->flags &= ~(TCQ_F_THROTTLED | TCQ_F_STOPPED);\n+\t\t\treturn skb;\n+\t\t}\n+\n+\t\tif (sch->flags & TCQ_F_STOPPED)\n+\t\t\treturn NULL;\n+\n+\t\t/* All the classes are overlimit.\n+\n+\t\t It is possible, if:\n+\n+\t\t 1. Scheduler is empty.\n+\t\t 2. Toplevel cutoff inhibited borrowing.\n+\t\t 3. Root class is overlimit.\n+\n+\t\t Reset 2d and 3d conditions and retry.\n+\n+\t\t Note, that NS and cbq-2.0 are buggy, peeking\n+\t\t an arbitrary class is appropriate for ancestor-only\n+\t\t sharing, but not for toplevel algorithm.\n+\n+\t\t Our version is better, but slower, because it requires\n+\t\t two passes, but it is unavoidable with top-level sharing.\n+\t\t*/\n+\n+\t\tif (q->toplevel == TC_CBQ_MAXLEVEL &&\n+\t\t q->link.undertime == PSCHED_PASTPERFECT)\n+\t\t\tbreak;\n+\n+\t\tq->toplevel = TC_CBQ_MAXLEVEL;\n+\t\tq->link.undertime = PSCHED_PASTPERFECT;\n+\t}\n+\n+\t/* No packets in scheduler or nobody wants to give them to us :-(\n+\t Sigh... start watchdog timer in the last case. */\n+\n+\tif (sch->q.qlen) {\n+\t\tsch->qstats.overlimits++;\n+\t\tif (q->wd_expires)\n+\t\t\tqdisc_watchdog_schedule(&q->watchdog,\n+\t\t\t\t\t\tnow + q->wd_expires);\n+\t}\n+\treturn NULL;\n+}\n /* CBQ class maintanance routines */\n \n static void cbq_adjust_levels(struct cbq_class *this)\n@@ -2065,6 +2176,7 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {\n \t.priv_size\t=\tsizeof(struct cbq_sched_data),\n \t.enqueue\t=\tcbq_enqueue,\n \t.dequeue\t=\tcbq_dequeue,\n+\t.smart_dequeue\t=\tcbq_smart_dequeue,\n \t.requeue\t=\tcbq_requeue,\n \t.drop\t\t=\tcbq_drop,\n \t.init\t\t=\tcbq_init,\ndiff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c\nindex edd1298..21da7af 100644\n--- a/net/sched/sch_dsmark.c\n+++ b/net/sched/sch_dsmark.c\n@@ -313,6 +313,52 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)\n \treturn skb;\n }\n \n+static struct sk_buff *dsmark_smart_dequeue(struct Qdisc *sch)\n+{\n+\tstruct dsmark_qdisc_data *p = qdisc_priv(sch);\n+\tstruct sk_buff *skb;\n+\tu32 index;\n+\n+\tpr_debug(\"dsmark_smart_dequeue(sch %p,[qdisc %p])\\n\", sch, p);\n+\n+\tskb = p->q->dequeue(p->q);\n+\tif (skb == NULL) {\n+\t\tif (p->q->flags & TCQ_F_STOPPED)\n+\t\t\tsch->flags |= TCQ_F_STOPPED;\n+\t\treturn NULL;\n+\t}\n+\n+\tsch->q.qlen--;\n+\tsch->flags &= ~TCQ_F_STOPPED;\n+\n+\tindex = skb->tc_index & (p->indices - 1);\n+\tpr_debug(\"index %d->%d\\n\", skb->tc_index, index);\n+\n+\tswitch (skb->protocol) {\n+\tcase __constant_htons(ETH_P_IP):\n+\t\tipv4_change_dsfield(ip_hdr(skb), p->mask[index],\n+\t\t\t\t p->value[index]);\n+\t\t\tbreak;\n+\tcase __constant_htons(ETH_P_IPV6):\n+\t\tipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],\n+\t\t\t\t p->value[index]);\n+\t\t\tbreak;\n+\tdefault:\n+\t\t/*\n+\t\t * Only complain if a change was actually attempted.\n+\t\t * This way, we can send non-IP traffic through dsmark\n+\t\t * and don't need yet another qdisc as a bypass.\n+\t\t */\n+\t\tif (p->mask[index] != 0xff || p->value[index])\n+\t\t\tprintk(KERN_WARNING\n+\t\t\t \"dsmark_smart_dequeue: unsupported protocol %d\"\n+\t\t\t \"\\n\", ntohs(skb->protocol));\n+\t\tbreak;\n+\t}\n+\n+\treturn skb;\n+}\n+\n static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch)\n {\n \tstruct dsmark_qdisc_data *p = qdisc_priv(sch);\n@@ -496,6 +542,7 @@ static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {\n \t.priv_size\t=\tsizeof(struct dsmark_qdisc_data),\n \t.enqueue\t=\tdsmark_enqueue,\n \t.dequeue\t=\tdsmark_dequeue,\n+\t.smart_dequeue\t=\tdsmark_smart_dequeue,\n \t.requeue\t=\tdsmark_requeue,\n \t.drop\t\t=\tdsmark_drop,\n \t.init\t\t=\tdsmark_init,\ndiff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c\nindex 23d258b..15f28f6 100644\n--- a/net/sched/sch_fifo.c\n+++ b/net/sched/sch_fifo.c\n@@ -83,6 +83,7 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {\n \t.priv_size\t=\tsizeof(struct fifo_sched_data),\n \t.enqueue\t=\tpfifo_enqueue,\n \t.dequeue\t=\tqdisc_dequeue_head,\n+\t.smart_dequeue\t=\tqdisc_smart_dequeue,\n \t.requeue\t=\tqdisc_requeue,\n \t.drop\t\t=\tqdisc_queue_drop,\n \t.init\t\t=\tfifo_init,\n@@ -98,6 +99,7 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {\n \t.priv_size\t=\tsizeof(struct fifo_sched_data),\n \t.enqueue\t=\tbfifo_enqueue,\n \t.dequeue\t=\tqdisc_dequeue_head,\n+\t.smart_dequeue\t=\tqdisc_smart_dequeue,\n \t.requeue\t=\tqdisc_requeue,\n \t.drop\t\t=\tqdisc_queue_drop,\n \t.init\t\t=\tfifo_init,\ndiff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c\nindex ec0a083..f32cb83 100644\n--- a/net/sched/sch_generic.c\n+++ b/net/sched/sch_generic.c\n@@ -135,8 +135,7 @@ static inline int qdisc_restart(struct Qdisc *q)\n \ttxq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));\n \n \tHARD_TX_LOCK(dev, txq, smp_processor_id());\n-\tif (!netif_tx_queue_stopped(txq) &&\n-\t !netif_tx_queue_frozen(txq))\n+\tif (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))\n \t\tret = dev_hard_start_xmit(skb, dev, txq);\n \tHARD_TX_UNLOCK(dev, txq);\n \n@@ -163,10 +162,6 @@ static inline int qdisc_restart(struct Qdisc *q)\n \t\tbreak;\n \t}\n \n-\tif (ret && (netif_tx_queue_stopped(txq) ||\n-\t\t netif_tx_queue_frozen(txq)))\n-\t\tret = 0;\n-\n \treturn ret;\n }\n \n@@ -313,6 +308,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = {\n \t.priv_size\t=\t0,\n \t.enqueue\t=\tnoop_enqueue,\n \t.dequeue\t=\tnoop_dequeue,\n+\t.smart_dequeue\t=\tnoop_dequeue,\n \t.requeue\t=\tnoop_requeue,\n \t.owner\t\t=\tTHIS_MODULE,\n };\n@@ -337,6 +333,7 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {\n \t.priv_size\t=\t0,\n \t.enqueue\t=\tnoop_enqueue,\n \t.dequeue\t=\tnoop_dequeue,\n+\t.smart_dequeue\t=\tnoop_dequeue,\n \t.requeue\t=\tnoop_requeue,\n \t.owner\t\t=\tTHIS_MODULE,\n };\n@@ -400,6 +397,24 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)\n \treturn NULL;\n }\n \n+static struct sk_buff *pfifo_fast_smart_dequeue(struct Qdisc* qdisc)\n+{\n+\tint prio;\n+\tstruct sk_buff_head *list = qdisc_priv(qdisc);\n+\tstruct sk_buff *skb;\n+\n+\tfor (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {\n+\t\tif (!skb_queue_empty(list + prio)) {\n+\t\t\tskb = __qdisc_smart_dequeue(qdisc, list + prio);\n+\t\t\tif (skb != NULL)\n+\t\t\t\tqdisc->q.qlen--;\n+\t\t\treturn skb;\n+\t\t}\n+\t}\n+\n+\treturn NULL;\n+}\n+\n static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)\n {\n \tqdisc->q.qlen++;\n@@ -446,6 +461,7 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = {\n \t.priv_size\t=\tPFIFO_FAST_BANDS * sizeof(struct sk_buff_head),\n \t.enqueue\t=\tpfifo_fast_enqueue,\n \t.dequeue\t=\tpfifo_fast_dequeue,\n+\t.smart_dequeue\t=\tpfifo_fast_smart_dequeue,\n \t.requeue\t=\tpfifo_fast_requeue,\n \t.init\t\t=\tpfifo_fast_init,\n \t.reset\t\t=\tpfifo_fast_reset,\n@@ -475,7 +491,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,\n \tskb_queue_head_init(&sch->q);\n \tsch->ops = ops;\n \tsch->enqueue = ops->enqueue;\n-\tsch->dequeue = ops->dequeue;\n+\tsch->dequeue = ops->smart_dequeue;\n \tsch->dev_queue = dev_queue;\n \tdev_hold(qdisc_dev(sch));\n \tatomic_set(&sch->refcnt, 1);\ndiff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c\nindex c1ad6b8..5d1654f 100644\n--- a/net/sched/sch_gred.c\n+++ b/net/sched/sch_gred.c\n@@ -292,6 +292,39 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)\n \treturn NULL;\n }\n \n+static struct sk_buff *gred_smart_dequeue(struct Qdisc* sch)\n+{\n+\tstruct sk_buff *skb;\n+\tstruct gred_sched *t = qdisc_priv(sch);\n+\n+\tskb = qdisc_smart_dequeue(sch);\n+\n+\tif (skb) {\n+\t\tstruct gred_sched_data *q;\n+\t\tu16 dp = tc_index_to_dp(skb);\n+\n+\t\tif (dp >= t->DPs || (q = t->tab[dp]) == NULL) {\n+\t\t\tif (net_ratelimit())\n+\t\t\t\tprintk(KERN_WARNING \"GRED: Unable to relocate \"\n+\t\t\t\t \"VQ 0x%x after dequeue, screwing up \"\n+\t\t\t\t \"backlog.\\n\", tc_index_to_dp(skb));\n+\t\t} else {\n+\t\t\tq->backlog -= qdisc_pkt_len(skb);\n+\n+\t\t\tif (!q->backlog && !gred_wred_mode(t))\n+\t\t\t\tred_start_of_idle_period(&q->parms);\n+\t\t}\n+\n+\t\treturn skb;\n+\t}\n+\n+\tif (!(sch->flags & TCQ_F_STOPPED) && gred_wred_mode(t) &&\n+\t !red_is_idling(&t->wred_set))\n+\t\tred_start_of_idle_period(&t->wred_set);\n+\n+\treturn NULL;\n+}\n+\n static unsigned int gred_drop(struct Qdisc* sch)\n {\n \tstruct sk_buff *skb;\n@@ -602,6 +635,7 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = {\n \t.priv_size\t=\tsizeof(struct gred_sched),\n \t.enqueue\t=\tgred_enqueue,\n \t.dequeue\t=\tgred_dequeue,\n+\t.smart_dequeue\t=\tgred_smart_dequeue,\n \t.requeue\t=\tgred_requeue,\n \t.drop\t\t=\tgred_drop,\n \t.init\t\t=\tgred_init,\ndiff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c\nindex c1e77da..2060250 100644\n--- a/net/sched/sch_hfsc.c\n+++ b/net/sched/sch_hfsc.c\n@@ -889,7 +889,7 @@ qdisc_peek_len(struct Qdisc *sch)\n \tstruct sk_buff *skb;\n \tunsigned int len;\n \n-\tskb = sch->dequeue(sch);\n+\tskb = sch->ops->dequeue(sch);\n \tif (skb == NULL) {\n \t\tif (net_ratelimit())\n \t\t\tprintk(\"qdisc_peek_len: non work-conserving qdisc ?\\n\");\n@@ -1642,7 +1642,7 @@ hfsc_dequeue(struct Qdisc *sch)\n \t\t}\n \t}\n \n-\tskb = cl->qdisc->dequeue(cl->qdisc);\n+\tskb = cl->qdisc->ops->dequeue(cl->qdisc);\n \tif (skb == NULL) {\n \t\tif (net_ratelimit())\n \t\t\tprintk(\"HFSC: Non-work-conserving qdisc ?\\n\");\n@@ -1674,6 +1674,87 @@ hfsc_dequeue(struct Qdisc *sch)\n \treturn skb;\n }\n \n+static struct sk_buff *\n+hfsc_smart_dequeue(struct Qdisc *sch)\n+{\n+\tstruct hfsc_sched *q = qdisc_priv(sch);\n+\tstruct hfsc_class *cl;\n+\tstruct sk_buff *skb;\n+\tu64 cur_time;\n+\tunsigned int next_len;\n+\tint realtime = 0;\n+\n+\tif (sch->q.qlen == 0)\n+\t\treturn NULL;\n+\tskb = skb_peek(&q->requeue);\n+\tif (skb) {\n+\t\tstruct netdev_queue *txq;\n+\t\ttxq = netdev_get_tx_queue(qdisc_dev(sch),\n+\t\t skb_get_queue_mapping(skb));\n+\t\tif (netif_tx_queue_stopped(txq) ||\n+\t\t netif_tx_queue_frozen(txq)) {\n+\t\t\tsch->flags |= TCQ_F_STOPPED;\n+\t\t\treturn NULL;\n+\t\t}\n+\t\t__skb_unlink(skb, &q->requeue);\n+\t\tgoto out;\n+\t}\n+\n+\tcur_time = psched_get_time();\n+\n+\t/*\n+\t * if there are eligible classes, use real-time criteria.\n+\t * find the class with the minimum deadline among\n+\t * the eligible classes.\n+\t */\n+\tcl = eltree_get_mindl(q, cur_time);\n+\tif (cl != NULL) {\n+\t\trealtime = 1;\n+\t} else {\n+\t\t/*\n+\t\t * use link-sharing criteria\n+\t\t * get the class with the minimum vt in the hierarchy\n+\t\t */\n+\t\tcl = vttree_get_minvt(&q->root, cur_time);\n+\t\tif (cl == NULL) {\n+\t\t\tsch->qstats.overlimits++;\n+\t\t\thfsc_schedule_watchdog(sch);\n+\t\t\treturn NULL;\n+\t\t}\n+\t}\n+\n+\tskb = cl->qdisc->dequeue(cl->qdisc);\n+\tif (skb == NULL) {\n+\t\tif (net_ratelimit())\n+\t\t\tprintk(\"HFSC: Non-work-conserving qdisc ?\\n\");\n+\t\treturn NULL;\n+\t}\n+\n+\tupdate_vf(cl, qdisc_pkt_len(skb), cur_time);\n+\tif (realtime)\n+\t\tcl->cl_cumul += qdisc_pkt_len(skb);\n+\n+\tif (cl->qdisc->q.qlen != 0) {\n+\t\tif (cl->cl_flags & HFSC_RSC) {\n+\t\t\t/* update ed */\n+\t\t\tnext_len = qdisc_peek_len(cl->qdisc);\n+\t\t\tif (realtime)\n+\t\t\t\tupdate_ed(cl, next_len);\n+\t\t\telse\n+\t\t\t\tupdate_d(cl, next_len);\n+\t\t}\n+\t} else {\n+\t\t/* the class becomes passive */\n+\t\tset_passive(cl);\n+\t}\n+\n+ out:\n+\tsch->flags &= ~(TCQ_F_THROTTLED | TCQ_F_STOPPED);\n+\tsch->q.qlen--;\n+\n+\treturn skb;\n+}\n+\n static int\n hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch)\n {\n@@ -1735,6 +1816,7 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {\n \t.dump\t\t= hfsc_dump_qdisc,\n \t.enqueue\t= hfsc_enqueue,\n \t.dequeue\t= hfsc_dequeue,\n+\t.smart_dequeue\t= hfsc_smart_dequeue,\n \t.requeue\t= hfsc_requeue,\n \t.drop\t\t= hfsc_drop,\n \t.cl_ops\t\t= &hfsc_class_ops,\ndiff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c\nindex d14f020..4da1a85 100644\n--- a/net/sched/sch_htb.c\n+++ b/net/sched/sch_htb.c\n@@ -803,7 +803,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,\n /* dequeues packet at given priority and level; call only if\n you are sure that there is active class at prio/level */\n static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,\n-\t\t\t\t\tint level)\n+\t\t\t\t\tint level, int *stopped)\n {\n \tstruct sk_buff *skb = NULL;\n \tstruct htb_class *cl, *start;\n@@ -840,9 +840,17 @@ next:\n \t\t\tgoto next;\n \t\t}\n \n-\t\tskb = cl->un.leaf.q->dequeue(cl->un.leaf.q);\n+\t\tif (stopped)\n+\t\t\tskb = cl->un.leaf.q->dequeue(cl->un.leaf.q);\n+\t\telse\n+\t\t\tskb = cl->un.leaf.q->ops->dequeue(cl->un.leaf.q);\n+\n \t\tif (likely(skb != NULL))\n \t\t\tbreak;\n+\t\tif (stopped && (cl->un.leaf.q->flags & TCQ_F_STOPPED)) {\n+\t\t\t*stopped = true;\n+\t\t\tbreak;\n+\t\t}\n \t\tif (!cl->warned) {\n \t\t\tprintk(KERN_WARNING\n \t\t\t \"htb: class %X isn't work conserving ?!\\n\",\n@@ -915,7 +923,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)\n \t\twhile (m != (int)(-1)) {\n \t\t\tint prio = ffz(m);\n \t\t\tm |= 1 << prio;\n-\t\t\tskb = htb_dequeue_tree(q, prio, level);\n+\t\t\tskb = htb_dequeue_tree(q, prio, level, NULL);\n \t\t\tif (likely(skb != NULL)) {\n \t\t\t\tsch->q.qlen--;\n \t\t\t\tsch->flags &= ~TCQ_F_THROTTLED;\n@@ -929,6 +937,73 @@ fin:\n \treturn skb;\n }\n \n+static struct sk_buff *htb_smart_dequeue(struct Qdisc *sch)\n+{\n+\tstruct sk_buff *skb = NULL;\n+\tstruct htb_sched *q = qdisc_priv(sch);\n+\tint level, stopped = false;\n+\tpsched_time_t next_event;\n+\n+\t/* try to dequeue direct packets as high prio (!) to minimize cpu work */\n+\tskb = skb_peek(&q->direct_queue);\n+\tif (skb) {\n+\t\tstruct netdev_queue *txq;\n+\t\ttxq = netdev_get_tx_queue(qdisc_dev(sch),\n+\t\t skb_get_queue_mapping(skb));\n+\t\tif (netif_tx_queue_stopped(txq) ||\n+\t\t netif_tx_queue_frozen(txq)) {\n+\t\t\tsch->flags |= TCQ_F_STOPPED;\n+\t\t\treturn NULL;\n+\t\t}\n+\t\t__skb_unlink(skb, &q->direct_queue);\n+\t\tsch->flags &= ~(TCQ_F_THROTTLED | TCQ_F_STOPPED);\n+\t\tsch->q.qlen--;\n+\t\treturn skb;\n+\t}\n+\n+\tif (!sch->q.qlen)\n+\t\tgoto fin;\n+\tq->now = psched_get_time();\n+\n+\tnext_event = q->now + 5 * PSCHED_TICKS_PER_SEC;\n+\tq->nwc_hit = 0;\n+\tfor (level = 0; level < TC_HTB_MAXDEPTH; level++) {\n+\t\t/* common case optimization - skip event handler quickly */\n+\t\tint m;\n+\t\tpsched_time_t event;\n+\n+\t\tif (q->now >= q->near_ev_cache[level]) {\n+\t\t\tevent = htb_do_events(q, level);\n+\t\t\tif (!event)\n+\t\t\t\tevent = q->now + PSCHED_TICKS_PER_SEC;\n+\t\t\tq->near_ev_cache[level] = event;\n+\t\t} else\n+\t\t\tevent = q->near_ev_cache[level];\n+\n+\t\tif (event && next_event > event)\n+\t\t\tnext_event = event;\n+\n+\t\tm = ~q->row_mask[level];\n+\t\twhile (m != (int)(-1)) {\n+\t\t\tint prio = ffz(m);\n+\t\t\tm |= 1 << prio;\n+\t\t\tskb = htb_dequeue_tree(q, prio, level, &stopped);\n+\t\t\tif (likely(skb != NULL)) {\n+\t\t\t\tsch->q.qlen--;\n+\t\t\t\tsch->flags &= ~(TCQ_F_THROTTLED |\n+\t\t\t\t TCQ_F_STOPPED);\n+\t\t\t\tgoto fin;\n+\t\t\t}\n+\t\t\tif (stopped)\n+\t\t\t\tgoto fin;\n+\t\t}\n+\t}\n+\tsch->qstats.overlimits++;\n+\tqdisc_watchdog_schedule(&q->watchdog, next_event);\n+fin:\n+\treturn skb;\n+}\n+\n /* try to drop from each class (by prio) until one succeed */\n static unsigned int htb_drop(struct Qdisc *sch)\n {\n@@ -1565,6 +1640,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {\n \t.priv_size\t=\tsizeof(struct htb_sched),\n \t.enqueue\t=\thtb_enqueue,\n \t.dequeue\t=\thtb_dequeue,\n+\t.smart_dequeue\t=\thtb_smart_dequeue,\n \t.requeue\t=\thtb_requeue,\n \t.drop\t\t=\thtb_drop,\n \t.init\t\t=\thtb_init,\ndiff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c\nindex 7f4dbf0..e201171 100644\n--- a/net/sched/sch_multiq.c\n+++ b/net/sched/sch_multiq.c\n@@ -142,15 +142,45 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)\n \t\t/* Check that target subqueue is available before\n \t\t * pulling an skb to avoid excessive requeues\n \t\t */\n-\t\tif (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {\n-\t\t\tqdisc = q->queues[q->curband];\n-\t\t\tskb = qdisc->dequeue(qdisc);\n-\t\t\tif (skb) {\n-\t\t\t\tsch->q.qlen--;\n-\t\t\t\treturn skb;\n-\t\t\t}\n+\t\tqdisc = q->queues[q->curband];\n+\t\tskb = qdisc->ops->dequeue(qdisc);\n+\t\tif (skb) {\n+\t\t\tsch->q.qlen--;\n+\t\t\treturn skb;\n+\t\t}\n+\t}\n+\treturn NULL;\n+\n+}\n+\n+static struct sk_buff *multiq_smart_dequeue(struct Qdisc *sch)\n+{\n+\tstruct multiq_sched_data *q = qdisc_priv(sch);\n+\tstruct Qdisc *qdisc;\n+\tstruct sk_buff *skb;\n+\tint band, stopped = 0;\n+\n+\tfor (band = 0; band < q->bands; band++) {\n+\t\t/* cycle through bands to ensure fairness */\n+\t\tq->curband++;\n+\t\tif (q->curband >= q->bands)\n+\t\t\tq->curband = 0;\n+\n+\t\t/* Check that target subqueue is available before\n+\t\t * pulling an skb to avoid excessive requeues\n+\t\t */\n+\t\tqdisc = q->queues[q->curband];\n+\t\tskb = qdisc->dequeue(qdisc);\n+\t\tif (skb) {\n+\t\t\tsch->q.qlen--;\n+\t\t\tsch->flags &= ~TCQ_F_STOPPED;\n+\t\t\treturn skb;\n \t\t}\n+\t\tif (qdisc->flags & TCQ_F_STOPPED)\n+\t\t\tstopped++;\n \t}\n+\tif (stopped)\n+\t\tsch->flags |= TCQ_F_STOPPED;\n \treturn NULL;\n \n }\n@@ -448,6 +478,7 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {\n \t.priv_size\t=\tsizeof(struct multiq_sched_data),\n \t.enqueue\t=\tmultiq_enqueue,\n \t.dequeue\t=\tmultiq_dequeue,\n+\t.smart_dequeue\t=\tmultiq_smart_dequeue,\n \t.requeue\t=\tmultiq_requeue,\n \t.drop\t\t=\tmultiq_drop,\n \t.init\t\t=\tmultiq_init,\ndiff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c\nindex a119599..47dfe8e 100644\n--- a/net/sched/sch_netem.c\n+++ b/net/sched/sch_netem.c\n@@ -283,7 +283,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)\n \tif (sch->flags & TCQ_F_THROTTLED)\n \t\treturn NULL;\n \n-\tskb = q->qdisc->dequeue(q->qdisc);\n+\tskb = q->qdisc->ops->dequeue(q->qdisc);\n \tif (skb) {\n \t\tconst struct netem_skb_cb *cb = netem_skb_cb(skb);\n \t\tpsched_time_t now = psched_get_time();\n@@ -308,6 +308,42 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)\n \treturn NULL;\n }\n \n+static struct sk_buff *netem_smart_dequeue(struct Qdisc *sch)\n+{\n+\tstruct netem_sched_data *q = qdisc_priv(sch);\n+\tstruct sk_buff *skb;\n+\n+\tsmp_mb();\n+\tif (sch->flags & TCQ_F_THROTTLED)\n+\t\treturn NULL;\n+\n+\tskb = q->qdisc->dequeue(q->qdisc);\n+\tif (skb) {\n+\t\tconst struct netem_skb_cb *cb = netem_skb_cb(skb);\n+\t\tpsched_time_t now = psched_get_time();\n+\n+\t\t/* if more time remaining? */\n+\t\tif (cb->time_to_send <= now) {\n+\t\t\tpr_debug(\"netem_dequeue: return skb=%p\\n\", skb);\n+\t\t\tsch->q.qlen--;\n+\t\t\tsch->flags &= ~TCQ_F_STOPPED;\n+\t\t\treturn skb;\n+\t\t}\n+\n+\t\tif (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) {\n+\t\t\tqdisc_tree_decrease_qlen(q->qdisc, 1);\n+\t\t\tsch->qstats.drops++;\n+\t\t\tprintk(KERN_ERR \"netem: %s could not requeue\\n\",\n+\t\t\t q->qdisc->ops->id);\n+\t\t}\n+\n+\t\tqdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);\n+\t} else if (q->qdisc->flags & TCQ_F_STOPPED) {\n+\t\tsch->flags |= TCQ_F_STOPPED;\n+\t}\n+\n+\treturn NULL;\n+}\n static void netem_reset(struct Qdisc *sch)\n {\n \tstruct netem_sched_data *q = qdisc_priv(sch);\n@@ -541,6 +577,7 @@ static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {\n \t.priv_size\t=\tsizeof(struct fifo_sched_data),\n \t.enqueue\t=\ttfifo_enqueue,\n \t.dequeue\t=\tqdisc_dequeue_head,\n+\t.smart_dequeue\t=\tqdisc_smart_dequeue,\n \t.requeue\t=\tqdisc_requeue,\n \t.drop\t\t=\tqdisc_queue_drop,\n \t.init\t\t=\ttfifo_init,\n@@ -716,6 +753,7 @@ static struct Qdisc_ops netem_qdisc_ops __read_mostly = {\n \t.priv_size\t=\tsizeof(struct netem_sched_data),\n \t.enqueue\t=\tnetem_enqueue,\n \t.dequeue\t=\tnetem_dequeue,\n+\t.smart_dequeue\t=\tnetem_smart_dequeue,\n \t.requeue\t=\tnetem_requeue,\n \t.drop\t\t=\tnetem_drop,\n \t.init\t\t=\tnetem_init,\ndiff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c\nindex 504a78c..f085dbe 100644\n--- a/net/sched/sch_prio.c\n+++ b/net/sched/sch_prio.c\n@@ -128,11 +128,33 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch)\n \n \tfor (prio = 0; prio < q->bands; prio++) {\n \t\tstruct Qdisc *qdisc = q->queues[prio];\n+\t\tstruct sk_buff *skb = qdisc->ops->dequeue(qdisc);\n+\t\tif (skb) {\n+\t\t\tsch->q.qlen--;\n+\t\t\treturn skb;\n+\t\t}\n+\t}\n+\treturn NULL;\n+\n+}\n+\n+static struct sk_buff *prio_smart_dequeue(struct Qdisc* sch)\n+{\n+\tstruct prio_sched_data *q = qdisc_priv(sch);\n+\tint prio;\n+\n+\tfor (prio = 0; prio < q->bands; prio++) {\n+\t\tstruct Qdisc *qdisc = q->queues[prio];\n \t\tstruct sk_buff *skb = qdisc->dequeue(qdisc);\n \t\tif (skb) {\n \t\t\tsch->q.qlen--;\n+\t\t\tsch->flags &= ~TCQ_F_STOPPED;\n \t\t\treturn skb;\n \t\t}\n+\t\tif (qdisc->flags & TCQ_F_STOPPED) {\n+\t\t\tsch->flags |= TCQ_F_STOPPED;\n+\t\t\treturn NULL;\n+\t\t}\n \t}\n \treturn NULL;\n \n@@ -421,6 +443,7 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = {\n \t.priv_size\t=\tsizeof(struct prio_sched_data),\n \t.enqueue\t=\tprio_enqueue,\n \t.dequeue\t=\tprio_dequeue,\n+\t.smart_dequeue\t=\tprio_smart_dequeue,\n \t.requeue\t=\tprio_requeue,\n \t.drop\t\t=\tprio_drop,\n \t.init\t\t=\tprio_init,\ndiff --git a/net/sched/sch_red.c b/net/sched/sch_red.c\nindex 5da0583..b8247cb 100644\n--- a/net/sched/sch_red.c\n+++ b/net/sched/sch_red.c\n@@ -131,7 +131,7 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch)\n \tstruct red_sched_data *q = qdisc_priv(sch);\n \tstruct Qdisc *child = q->qdisc;\n \n-\tskb = child->dequeue(child);\n+\tskb = child->ops->dequeue(child);\n \tif (skb)\n \t\tsch->q.qlen--;\n \telse if (!red_is_idling(&q->parms))\n@@ -140,6 +140,25 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch)\n \treturn skb;\n }\n \n+static struct sk_buff * red_smart_dequeue(struct Qdisc* sch)\n+{\n+\tstruct sk_buff *skb;\n+\tstruct red_sched_data *q = qdisc_priv(sch);\n+\tstruct Qdisc *child = q->qdisc;\n+\n+\tskb = child->dequeue(child);\n+\tif (skb) {\n+\t\tsch->q.qlen--;\n+\t\tsch->flags &= ~TCQ_F_STOPPED;\n+\t} else {\n+\t\tif (child->flags & TCQ_F_STOPPED)\n+\t\t\tsch->flags |= TCQ_F_STOPPED;\n+\t\telse if (!red_is_idling(&q->parms))\n+\t\t\tred_start_of_idle_period(&q->parms);\n+\t}\n+\n+\treturn skb;\n+}\n static unsigned int red_drop(struct Qdisc* sch)\n {\n \tstruct red_sched_data *q = qdisc_priv(sch);\n@@ -361,6 +380,7 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = {\n \t.cl_ops\t\t=\t&red_class_ops,\n \t.enqueue\t=\tred_enqueue,\n \t.dequeue\t=\tred_dequeue,\n+\t.smart_dequeue\t=\tred_smart_dequeue,\n \t.requeue\t=\tred_requeue,\n \t.drop\t\t=\tred_drop,\n \t.init\t\t=\tred_init,\ndiff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c\nindex 6e041d1..2a7ba8e 100644\n--- a/net/sched/sch_sfq.c\n+++ b/net/sched/sch_sfq.c\n@@ -391,9 +391,6 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)\n \treturn NET_XMIT_CN;\n }\n \n-\n-\n-\n static struct sk_buff *\n sfq_dequeue(struct Qdisc *sch)\n {\n@@ -431,6 +428,48 @@ sfq_dequeue(struct Qdisc *sch)\n \treturn skb;\n }\n \n+static struct sk_buff *\n+sfq_smart_dequeue(struct Qdisc *sch)\n+{\n+\tstruct sfq_sched_data *q = qdisc_priv(sch);\n+\tstruct sk_buff *skb;\n+\tsfq_index a, old_a;\n+\tstruct netdev_queue *txq;\n+\n+\t/* No active slots */\n+\tif (q->tail == SFQ_DEPTH)\n+\t\treturn NULL;\n+\n+\ta = old_a = q->next[q->tail];\n+\n+\t/* Grab packet */\n+\tskb = __qdisc_smart_dequeue(sch, &q->qs[a]);\n+\n+\tif (!skb && (sch->flags & TCQ_F_STOPPED))\n+\t\treturn NULL;\n+\n+\tsfq_dec(q, a);\n+\tsch->q.qlen--;\n+\n+\t/* Is the slot empty? */\n+\tif (q->qs[a].qlen == 0) {\n+\t\tq->ht[q->hash[a]] = SFQ_DEPTH;\n+\t\ta = q->next[a];\n+\t\tif (a == old_a) {\n+\t\t\tq->tail = SFQ_DEPTH;\n+\t\t\treturn skb;\n+\t\t}\n+\t\tq->next[q->tail] = a;\n+\t\tq->allot[a] += q->quantum;\n+\t} else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) {\n+\t\tq->tail = a;\n+\t\ta = q->next[a];\n+\t\tq->allot[a] += q->quantum;\n+\t}\n+\tsch->flags &= ~TCQ_F_STOPPED;\n+\treturn skb;\n+}\n+\n static void\n sfq_reset(struct Qdisc *sch)\n {\n@@ -624,6 +663,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {\n \t.priv_size\t=\tsizeof(struct sfq_sched_data),\n \t.enqueue\t=\tsfq_enqueue,\n \t.dequeue\t=\tsfq_dequeue,\n+\t.smart_dequeue\t=\tsfq_smart_dequeue,\n \t.requeue\t=\tsfq_requeue,\n \t.drop\t\t=\tsfq_drop,\n \t.init\t\t=\tsfq_init,\ndiff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c\nindex 94c6159..f65204c 100644\n--- a/net/sched/sch_tbf.c\n+++ b/net/sched/sch_tbf.c\n@@ -169,6 +169,67 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)\n \tstruct tbf_sched_data *q = qdisc_priv(sch);\n \tstruct sk_buff *skb;\n \n+\tskb = q->qdisc->ops->dequeue(q->qdisc);\n+\n+\tif (skb) {\n+\t\tpsched_time_t now;\n+\t\tlong toks;\n+\t\tlong ptoks = 0;\n+\t\tunsigned int len = qdisc_pkt_len(skb);\n+\n+\t\tnow = psched_get_time();\n+\t\ttoks = psched_tdiff_bounded(now, q->t_c, q->buffer);\n+\n+\t\tif (q->P_tab) {\n+\t\t\tptoks = toks + q->ptokens;\n+\t\t\tif (ptoks > (long)q->mtu)\n+\t\t\t\tptoks = q->mtu;\n+\t\t\tptoks -= L2T_P(q, len);\n+\t\t}\n+\t\ttoks += q->tokens;\n+\t\tif (toks > (long)q->buffer)\n+\t\t\ttoks = q->buffer;\n+\t\ttoks -= L2T(q, len);\n+\n+\t\tif ((toks|ptoks) >= 0) {\n+\t\t\tq->t_c = now;\n+\t\t\tq->tokens = toks;\n+\t\t\tq->ptokens = ptoks;\n+\t\t\tsch->q.qlen--;\n+\t\t\tsch->flags &= ~TCQ_F_THROTTLED;\n+\t\t\treturn skb;\n+\t\t}\n+\n+\t\tqdisc_watchdog_schedule(&q->watchdog,\n+\t\t\t\t\tnow + max_t(long, -toks, -ptoks));\n+\n+\t\t/* Maybe we have a shorter packet in the queue,\n+\t\t which can be sent now. It sounds cool,\n+\t\t but, however, this is wrong in principle.\n+\t\t We MUST NOT reorder packets under these circumstances.\n+\n+\t\t Really, if we split the flow into independent\n+\t\t subflows, it would be a very good solution.\n+\t\t This is the main idea of all FQ algorithms\n+\t\t (cf. CSZ, HPFQ, HFSC)\n+\t\t */\n+\n+\t\tif (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {\n+\t\t\t/* When requeue fails skb is dropped */\n+\t\t\tqdisc_tree_decrease_qlen(q->qdisc, 1);\n+\t\t\tsch->qstats.drops++;\n+\t\t}\n+\n+\t\tsch->qstats.overlimits++;\n+\t}\n+\treturn NULL;\n+}\n+\n+static struct sk_buff *tbf_smart_dequeue(struct Qdisc* sch)\n+{\n+\tstruct tbf_sched_data *q = qdisc_priv(sch);\n+\tstruct sk_buff *skb;\n+\n \tskb = q->qdisc->dequeue(q->qdisc);\n \n \tif (skb) {\n@@ -179,6 +240,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)\n \n \t\tnow = psched_get_time();\n \t\ttoks = psched_tdiff_bounded(now, q->t_c, q->buffer);\n+\t\tsch->flags &= ~TCQ_F_STOPPED;\n \n \t\tif (q->P_tab) {\n \t\t\tptoks = toks + q->ptokens;\n@@ -221,7 +283,10 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)\n \t\t}\n \n \t\tsch->qstats.overlimits++;\n+\t} else if (q->qdisc->flags & TCQ_F_STOPPED) {\n+\t\tsch->flags |= TCQ_F_STOPPED;\n \t}\n+\n \treturn NULL;\n }\n \n@@ -469,6 +534,7 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {\n \t.priv_size\t=\tsizeof(struct tbf_sched_data),\n \t.enqueue\t=\ttbf_enqueue,\n \t.dequeue\t=\ttbf_dequeue,\n+\t.smart_dequeue\t=\ttbf_smart_dequeue,\n \t.requeue\t=\ttbf_requeue,\n \t.drop\t\t=\ttbf_drop,\n \t.init\t\t=\ttbf_init,\ndiff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c\nindex d35ef05..fecb3f8 100644\n--- a/net/sched/sch_teql.c\n+++ b/net/sched/sch_teql.c\n@@ -123,6 +123,40 @@ teql_dequeue(struct Qdisc* sch)\n \treturn skb;\n }\n \n+static struct sk_buff *\n+teql_smart_dequeue(struct Qdisc* sch)\n+{\n+\tstruct teql_sched_data *dat = qdisc_priv(sch);\n+\tstruct netdev_queue *dat_queue;\n+\tstruct sk_buff *skb;\n+\tstruct netdev_queue *txq;\n+\n+\tskb = skb_peek(&dat->q);\n+\tif (skb) {\n+\t\ttxq = netdev_get_tx_queue(qdisc_dev(sch),\n+\t\t skb_get_queue_mapping(skb));\n+\t\tif (netif_tx_queue_stopped(txq) ||\n+\t\t netif_tx_queue_frozen(txq)) {\n+\t\t\tsch->flags |= TCQ_F_STOPPED;\n+\t\t\treturn NULL;\n+\t\t}\n+\t\t__skb_unlink(skb, &dat->q);\n+\t}\n+\tdat_queue = netdev_get_tx_queue(dat->m->dev, 0);\n+\tif (skb == NULL) {\n+\t\tstruct net_device *m = qdisc_dev(dat_queue->qdisc);\n+\t\tif (m) {\n+\t\t\tdat->m->slaves = sch;\n+\t\t\tnetif_wake_queue(m);\n+\t\t}\n+\t} else {\n+\t\tsch->flags &= ~TCQ_F_STOPPED;\n+\t}\n+\tsch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;\n+\n+\treturn skb;\n+}\n+\n static __inline__ void\n teql_neigh_release(struct neighbour *n)\n {\n@@ -431,13 +465,14 @@ static __init void teql_master_setup(struct net_device *dev)\n \tmaster->dev\t= dev;\n \tops->priv_size = sizeof(struct teql_sched_data);\n \n-\tops->enqueue\t=\tteql_enqueue;\n-\tops->dequeue\t=\tteql_dequeue;\n-\tops->requeue\t=\tteql_requeue;\n-\tops->init\t=\tteql_qdisc_init;\n-\tops->reset\t=\tteql_reset;\n-\tops->destroy\t=\tteql_destroy;\n-\tops->owner\t=\tTHIS_MODULE;\n+\tops->enqueue\t\t= teql_enqueue;\n+\tops->dequeue\t\t= teql_dequeue;\n+\tops->smart_dequeue\t= teql_smart_dequeue;\n+\tops->requeue\t\t= teql_requeue;\n+\tops->init\t\t= teql_qdisc_init;\n+\tops->reset\t\t= teql_reset;\n+\tops->destroy\t\t= teql_destroy;\n+\tops->owner\t\t= THIS_MODULE;\n \n \tdev->open\t\t= teql_master_open;\n \tdev->hard_start_xmit\t= teql_master_xmit;\n", "prefixes": [ "RFC" ] }