diff mbox

[RFC] Avoid enqueuing skb for default qdiscs

Message ID OF104CB719.2A67F34E-ON65257608.001D0048-65257608.0026016D@in.ibm.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Krishna Kumar Aug. 4, 2009, 6:55 a.m. UTC
Jarek Poplawski <jarkao2@gmail.com> wrote on 08/04/2009 01:08:47 AM:

Hi Jarek,

> Since this quoted part looks very similarly I'm using it here, so
> forget my note if something has changed.

Thanks for your feedback. Yes, the inline patch is the same as the
attached one.

> >> @@ -306,6 +312,9 @@ extern struct Qdisc *qdisc_create_dflt(s
> >>                     struct Qdisc_ops *ops, u32 parentid);
> >>  extern void qdisc_calculate_pkt_len(struct sk_buff *skb,
> >>                 struct qdisc_size_table *stab);
> >> +extern int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
> >> +            struct net_device *dev, struct netdev_queue *txq,
> >> +            spinlock_t *root_lock);
>
> Probably this would look better in pkt_sched.h around __qdisc_run()?

I had it originally in pkt_sched.h, for some reason I had moved it to
sch_generic.h, I will move it back.

> >> +static inline int __dev_hw_xmit(struct sk_buff *skb, struct Qdisc *q,
>
> I guess, we don't touch "hw" in this function yet?

Right - this was due to lack of a good name. Maybe I can call it
__dev_xmit_skb?

> >> -   kfree_skb(qdisc->gso_skb);
> >> -   qdisc->gso_skb = NULL;
> >> +   if (qdisc->gso_skb) {
> >> +      kfree_skb(qdisc->gso_skb);
> >> +      qdisc->gso_skb = NULL;
> >> +      qdisc->q.qlen--;
>
> You don't need this here: qdiscs should zero it in ops->reset().

I will keep it around, as you said in your other mail.

> >> +      /* Can by-pass the queue discipline for default qdisc */
> >> +      qdisc->flags = TCQ_F_CAN_BYPASS;
>
> "|=" looks nicer to me.

Agreed - and required if qdisc_create_dflt (and the functions it calls)
is changed to set the flags.

> >>     } else {
> >>        qdisc =  &noqueue_qdisc;
> >>     }
>
> I wonder if we shouldn't update bstats yet. Btw, try checkpatch.

Rate estimator isn't used for default qdisc, so is anything
required? Also, checkpatch suggested to change the original code:
        "if (unlikely((skb = dequeue_skb(q)) == NULL))"
since it appears as "new code", hope that change is fine.

The patch after incorporating the review comments is both attached
and inlined, since my mailer is still not working (I sent this mail
about an hour back but this one didn't seem to have made it while
one of my latter mails did).

Thanks,

- KK

 (See attached file: patch4)

----------------------------------------------------------------------
 include/net/pkt_sched.h   |    3 +
 include/net/sch_generic.h |    6 ++
 net/core/dev.c            |   46 ++++++++++++-----
 net/sched/sch_generic.c   |   93 ++++++++++++++++++++++--------------
 4 files changed, 99 insertions(+), 49 deletions(-)

                skb = q->dequeue(q);
@@ -103,44 +101,23 @@ static inline int handle_dev_cpu_collisi
 }

 /*
- * NOTE: Called under qdisc_lock(q) with locally disabled BH.
- *
- * __QDISC_STATE_RUNNING guarantees only one CPU can process
- * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
- * this queue.
- *
- *  netif_tx_lock serializes accesses to device driver.
- *
- *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
- *  if one is grabbed, another must be free.
- *
- * Note, that this procedure can be called by a watchdog timer
+ * Transmit one skb, and handle the return status as required. Holding the
+ * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute this
+ * function.
  *
  * Returns to the caller:
  *                             0  - queue is empty or throttled.
  *                             >0 - queue is not empty.
- *
  */
-static inline int qdisc_restart(struct Qdisc *q)
+int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
+                   struct net_device *dev, struct netdev_queue *txq,
+                   spinlock_t *root_lock)
 {
-       struct netdev_queue *txq;
        int ret = NETDEV_TX_BUSY;
-       struct net_device *dev;
-       spinlock_t *root_lock;
-       struct sk_buff *skb;
-
-       /* Dequeue packet */
-       if (unlikely((skb = dequeue_skb(q)) == NULL))
-               return 0;
-
-       root_lock = qdisc_lock(q);

        /* And release qdisc */
        spin_unlock(root_lock);

-       dev = qdisc_dev(q);
-       txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-
        HARD_TX_LOCK(dev, txq, smp_processor_id());
        if (!netif_tx_queue_stopped(txq) &&
            !netif_tx_queue_frozen(txq))
@@ -177,6 +154,44 @@ static inline int qdisc_restart(struct Q
        return ret;
 }

+/*
+ * NOTE: Called under qdisc_lock(q) with locally disabled BH.
+ *
+ * __QDISC_STATE_RUNNING guarantees only one CPU can process
+ * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
+ * this queue.
+ *
+ *  netif_tx_lock serializes accesses to device driver.
+ *
+ *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
+ *  if one is grabbed, another must be free.
+ *
+ * Note, that this procedure can be called by a watchdog timer
+ *
+ * Returns to the caller:
+ *                             0  - queue is empty or throttled.
+ *                             >0 - queue is not empty.
+ *
+ */
+static inline int qdisc_restart(struct Qdisc *q)
+{
+       struct netdev_queue *txq;
+       struct net_device *dev;
+       spinlock_t *root_lock;
+       struct sk_buff *skb;
+
+       /* Dequeue packet */
+       skb = dequeue_skb(q);
+       if (unlikely(!skb))
+               return 0;
+
+       root_lock = qdisc_lock(q);
+       dev = qdisc_dev(q);
+       txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+
+       return sch_direct_xmit(skb, q, dev, txq, root_lock);
+}
+
 void __qdisc_run(struct Qdisc *q)
 {
        unsigned long start_time = jiffies;
@@ -547,8 +562,11 @@ void qdisc_reset(struct Qdisc *qdisc)
        if (ops->reset)
                ops->reset(qdisc);

-       kfree_skb(qdisc->gso_skb);
-       qdisc->gso_skb = NULL;
+       if (qdisc->gso_skb) {
+               kfree_skb(qdisc->gso_skb);
+               qdisc->gso_skb = NULL;
+               qdisc->q.qlen--;
+       }
 }
 EXPORT_SYMBOL(qdisc_reset);

@@ -605,6 +623,9 @@ static void attach_one_default_qdisc(str
                        printk(KERN_INFO "%s: activation failed\n",
dev->name);
                        return;
                }
+
+               /* Can by-pass the queue discipline for default qdisc */
+               qdisc->flags |= TCQ_F_CAN_BYPASS;
        } else {
                qdisc =  &noqueue_qdisc;
        }

Comments

Jarek Poplawski Aug. 4, 2009, 7:22 a.m. UTC | #1
On Tue, Aug 04, 2009 at 12:25:07PM +0530, Krishna Kumar2 wrote:
> Jarek Poplawski <jarkao2@gmail.com> wrote on 08/04/2009 01:08:47 AM:
> 
> Hi Jarek,
> 
> > Since this quoted part looks very similarly I'm using it here, so
> > forget my note if something has changed.
> 
> Thanks for your feedback. Yes, the inline patch is the same as the
> attached one.
> 
> > >> @@ -306,6 +312,9 @@ extern struct Qdisc *qdisc_create_dflt(s
> > >>                     struct Qdisc_ops *ops, u32 parentid);
> > >>  extern void qdisc_calculate_pkt_len(struct sk_buff *skb,
> > >>                 struct qdisc_size_table *stab);
> > >> +extern int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
> > >> +            struct net_device *dev, struct netdev_queue *txq,
> > >> +            spinlock_t *root_lock);
> >
> > Probably this would look better in pkt_sched.h around __qdisc_run()?
> 
> I had it originally in pkt_sched.h, for some reason I had moved it to
> sch_generic.h, I will move it back.
> 
> > >> +static inline int __dev_hw_xmit(struct sk_buff *skb, struct Qdisc *q,
> >
> > I guess, we don't touch "hw" in this function yet?
> 
> Right - this was due to lack of a good name. Maybe I can call it
> __dev_xmit_skb?

Looks better to me.

> 
> > >> -   kfree_skb(qdisc->gso_skb);
> > >> -   qdisc->gso_skb = NULL;
> > >> +   if (qdisc->gso_skb) {
> > >> +      kfree_skb(qdisc->gso_skb);
> > >> +      qdisc->gso_skb = NULL;
> > >> +      qdisc->q.qlen--;
> >
> > You don't need this here: qdiscs should zero it in ops->reset().
> 
> I will keep it around, as you said in your other mail.

Actually I meant:
-      qdisc->q.qlen--;
+      qdisc->q.qlen = 0;
because you can do it after another qdisc->q.qlen = 0.

> 
> > >> +      /* Can by-pass the queue discipline for default qdisc */
> > >> +      qdisc->flags = TCQ_F_CAN_BYPASS;
> >
> > "|=" looks nicer to me.
> 
> Agreed - and required if qdisc_create_dflt (and the functions it calls)
> is changed to set the flags.
> 
> > >>     } else {
> > >>        qdisc =  &noqueue_qdisc;
> > >>     }
> >
> > I wonder if we shouldn't update bstats yet. Btw, try checkpatch.
> 
> Rate estimator isn't used for default qdisc, so is anything
> required?

I meant "Sent" stats from "tc -s qdisc sh" could be misleading.

> Also, checkpatch suggested to change the original code:
>         "if (unlikely((skb = dequeue_skb(q)) == NULL))"
> since it appears as "new code", hope that change is fine.

I mainly meant a whitespace warning.

> 
> The patch after incorporating the review comments is both attached
> and inlined, since my mailer is still not working (I sent this mail
> about an hour back but this one didn't seem to have made it while
> one of my latter mails did).

I got 2 very similar messages this time, several minutes ago ;-)

Thanks,
Jarek P.

> 
> Thanks,
> 
> - KK
> 
>  (See attached file: patch4)
> 
> ----------------------------------------------------------------------
>  include/net/pkt_sched.h   |    3 +
>  include/net/sch_generic.h |    6 ++
>  net/core/dev.c            |   46 ++++++++++++-----
>  net/sched/sch_generic.c   |   93 ++++++++++++++++++++++--------------
>  4 files changed, 99 insertions(+), 49 deletions(-)
> 
> diff -ruNp org/include/net/pkt_sched.h new/include/net/pkt_sched.h
> --- org/include/net/pkt_sched.h 2009-08-04 11:41:21.000000000 +0530
> +++ new/include/net/pkt_sched.h 2009-08-04 11:42:18.000000000 +0530
> @@ -87,6 +87,9 @@ extern struct qdisc_rate_table *qdisc_ge
>  extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
>  extern void qdisc_put_stab(struct qdisc_size_table *tab);
>  extern void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc);
> +extern int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
> +                          struct net_device *dev, struct netdev_queue
> *txq,
> +                          spinlock_t *root_lock);
> 
>  extern void __qdisc_run(struct Qdisc *q);
> 
> diff -ruNp org/include/net/sch_generic.h new/include/net/sch_generic.h
> --- org/include/net/sch_generic.h       2009-03-24 08:54:16.000000000 +0530
> +++ new/include/net/sch_generic.h       2009-08-04 11:02:39.000000000 +0530
> @@ -45,6 +45,7 @@ struct Qdisc
>  #define TCQ_F_BUILTIN          1
>  #define TCQ_F_THROTTLED                2
>  #define TCQ_F_INGRESS          4
> +#define TCQ_F_CAN_BYPASS       8
>  #define TCQ_F_WARN_NONWC       (1 << 16)
>         int                     padded;
>         struct Qdisc_ops        *ops;
> @@ -182,6 +183,11 @@ struct qdisc_skb_cb {
>         char                    data[];
>  };
> 
> +static inline int qdisc_qlen(struct Qdisc *q)
> +{
> +       return q->q.qlen;
> +}
> +
>  static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb)
>  {
>         return (struct qdisc_skb_cb *)skb->cb;
> diff -ruNp org/net/core/dev.c new/net/core/dev.c
> --- org/net/core/dev.c  2009-07-27 09:08:24.000000000 +0530
> +++ new/net/core/dev.c  2009-08-04 11:44:57.000000000 +0530
> @@ -1786,6 +1786,38 @@ static struct netdev_queue *dev_pick_tx(
>         return netdev_get_tx_queue(dev, queue_index);
>  }
> 
> +static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
> +                                struct net_device *dev,
> +                                struct netdev_queue *txq)
> +{
> +       spinlock_t *root_lock = qdisc_lock(q);
> +       int rc;
> +
> +       spin_lock(root_lock);
> +       if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
> +               kfree_skb(skb);
> +               rc = NET_XMIT_DROP;
> +       } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
> +                  !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
> +               /*
> +                * This is a work-conserving queue; there are no old skbs
> +                * waiting to be sent out; and the qdisc is not running -
> +                * xmit the skb directly.
> +                */
> +               if (sch_direct_xmit(skb, q, dev, txq, root_lock))
> +                       __qdisc_run(q);
> +               else
> +                       clear_bit(__QDISC_STATE_RUNNING, &q->state);
> +               rc = NET_XMIT_SUCCESS;
> +       } else {
> +               rc = qdisc_enqueue_root(skb, q);
> +               qdisc_run(q);
> +       }
> +       spin_unlock(root_lock);
> +
> +       return rc;
> +}
> +
>  /**
>   *     dev_queue_xmit - transmit a buffer
>   *     @skb: buffer to transmit
> @@ -1859,19 +1891,7 @@ gso:
>         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
>  #endif
>         if (q->enqueue) {
> -               spinlock_t *root_lock = qdisc_lock(q);
> -
> -               spin_lock(root_lock);
> -
> -               if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
> &q->state))) {
> -                       kfree_skb(skb);
> -                       rc = NET_XMIT_DROP;
> -               } else {
> -                       rc = qdisc_enqueue_root(skb, q);
> -                       qdisc_run(q);
> -               }
> -               spin_unlock(root_lock);
> -
> +               rc = __dev_xmit_skb(skb, q, dev, txq);
>                 goto out;
>         }
> 
> diff -ruNp org/net/sched/sch_generic.c new/net/sched/sch_generic.c
> --- org/net/sched/sch_generic.c 2009-05-25 07:48:07.000000000 +0530
> +++ new/net/sched/sch_generic.c 2009-08-04 11:00:38.000000000 +0530
> @@ -37,15 +37,11 @@
>   * - updates to tree and tree walking are only done under the rtnl mutex.
>   */
> 
> -static inline int qdisc_qlen(struct Qdisc *q)
> -{
> -       return q->q.qlen;
> -}
> -
>  static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
>  {
>         q->gso_skb = skb;
>         q->qstats.requeues++;
> +       q->q.qlen++;    /* it's still part of the queue */
>         __netif_schedule(q);
> 
>         return 0;
> @@ -61,9 +57,11 @@ static inline struct sk_buff *dequeue_sk
> 
>                 /* check the reason of requeuing without tx lock first */
>                 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
> -               if (!netif_tx_queue_stopped(txq) &&
> !netif_tx_queue_frozen(txq))
> +               if (!netif_tx_queue_stopped(txq) &&
> +                   !netif_tx_queue_frozen(txq)) {
>                         q->gso_skb = NULL;
> -               else
> +                       q->q.qlen--;
> +               } else
>                         skb = NULL;
>         } else {
>                 skb = q->dequeue(q);
> @@ -103,44 +101,23 @@ static inline int handle_dev_cpu_collisi
>  }
> 
>  /*
> - * NOTE: Called under qdisc_lock(q) with locally disabled BH.
> - *
> - * __QDISC_STATE_RUNNING guarantees only one CPU can process
> - * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
> - * this queue.
> - *
> - *  netif_tx_lock serializes accesses to device driver.
> - *
> - *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
> - *  if one is grabbed, another must be free.
> - *
> - * Note, that this procedure can be called by a watchdog timer
> + * Transmit one skb, and handle the return status as required. Holding the
> + * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute this
> + * function.
>   *
>   * Returns to the caller:
>   *                             0  - queue is empty or throttled.
>   *                             >0 - queue is not empty.
> - *
>   */
> -static inline int qdisc_restart(struct Qdisc *q)
> +int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
> +                   struct net_device *dev, struct netdev_queue *txq,
> +                   spinlock_t *root_lock)
>  {
> -       struct netdev_queue *txq;
>         int ret = NETDEV_TX_BUSY;
> -       struct net_device *dev;
> -       spinlock_t *root_lock;
> -       struct sk_buff *skb;
> -
> -       /* Dequeue packet */
> -       if (unlikely((skb = dequeue_skb(q)) == NULL))
> -               return 0;
> -
> -       root_lock = qdisc_lock(q);
> 
>         /* And release qdisc */
>         spin_unlock(root_lock);
> 
> -       dev = qdisc_dev(q);
> -       txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
> -
>         HARD_TX_LOCK(dev, txq, smp_processor_id());
>         if (!netif_tx_queue_stopped(txq) &&
>             !netif_tx_queue_frozen(txq))
> @@ -177,6 +154,44 @@ static inline int qdisc_restart(struct Q
>         return ret;
>  }
> 
> +/*
> + * NOTE: Called under qdisc_lock(q) with locally disabled BH.
> + *
> + * __QDISC_STATE_RUNNING guarantees only one CPU can process
> + * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
> + * this queue.
> + *
> + *  netif_tx_lock serializes accesses to device driver.
> + *
> + *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
> + *  if one is grabbed, another must be free.
> + *
> + * Note, that this procedure can be called by a watchdog timer
> + *
> + * Returns to the caller:
> + *                             0  - queue is empty or throttled.
> + *                             >0 - queue is not empty.
> + *
> + */
> +static inline int qdisc_restart(struct Qdisc *q)
> +{
> +       struct netdev_queue *txq;
> +       struct net_device *dev;
> +       spinlock_t *root_lock;
> +       struct sk_buff *skb;
> +
> +       /* Dequeue packet */
> +       skb = dequeue_skb(q);
> +       if (unlikely(!skb))
> +               return 0;
> +
> +       root_lock = qdisc_lock(q);
> +       dev = qdisc_dev(q);
> +       txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
> +
> +       return sch_direct_xmit(skb, q, dev, txq, root_lock);
> +}
> +
>  void __qdisc_run(struct Qdisc *q)
>  {
>         unsigned long start_time = jiffies;
> @@ -547,8 +562,11 @@ void qdisc_reset(struct Qdisc *qdisc)
>         if (ops->reset)
>                 ops->reset(qdisc);
> 
> -       kfree_skb(qdisc->gso_skb);
> -       qdisc->gso_skb = NULL;
> +       if (qdisc->gso_skb) {
> +               kfree_skb(qdisc->gso_skb);
> +               qdisc->gso_skb = NULL;
> +               qdisc->q.qlen--;
> +       }
>  }
>  EXPORT_SYMBOL(qdisc_reset);
> 
> @@ -605,6 +623,9 @@ static void attach_one_default_qdisc(str
>                         printk(KERN_INFO "%s: activation failed\n",
> dev->name);
>                         return;
>                 }
> +
> +               /* Can by-pass the queue discipline for default qdisc */
> +               qdisc->flags |= TCQ_F_CAN_BYPASS;
>         } else {
>                 qdisc =  &noqueue_qdisc;
>         }


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff -ruNp org/include/net/pkt_sched.h new/include/net/pkt_sched.h
--- org/include/net/pkt_sched.h 2009-08-04 11:41:21.000000000 +0530
+++ new/include/net/pkt_sched.h 2009-08-04 11:42:18.000000000 +0530
@@ -87,6 +87,9 @@  extern struct qdisc_rate_table *qdisc_ge
 extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
 extern void qdisc_put_stab(struct qdisc_size_table *tab);
 extern void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc);
+extern int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
+                          struct net_device *dev, struct netdev_queue
*txq,
+                          spinlock_t *root_lock);

 extern void __qdisc_run(struct Qdisc *q);

diff -ruNp org/include/net/sch_generic.h new/include/net/sch_generic.h
--- org/include/net/sch_generic.h       2009-03-24 08:54:16.000000000 +0530
+++ new/include/net/sch_generic.h       2009-08-04 11:02:39.000000000 +0530
@@ -45,6 +45,7 @@  struct Qdisc
 #define TCQ_F_BUILTIN          1
 #define TCQ_F_THROTTLED                2
 #define TCQ_F_INGRESS          4
+#define TCQ_F_CAN_BYPASS       8
 #define TCQ_F_WARN_NONWC       (1 << 16)
        int                     padded;
        struct Qdisc_ops        *ops;
@@ -182,6 +183,11 @@  struct qdisc_skb_cb {
        char                    data[];
 };

+static inline int qdisc_qlen(struct Qdisc *q)
+{
+       return q->q.qlen;
+}
+
 static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb)
 {
        return (struct qdisc_skb_cb *)skb->cb;
diff -ruNp org/net/core/dev.c new/net/core/dev.c
--- org/net/core/dev.c  2009-07-27 09:08:24.000000000 +0530
+++ new/net/core/dev.c  2009-08-04 11:44:57.000000000 +0530
@@ -1786,6 +1786,38 @@  static struct netdev_queue *dev_pick_tx(
        return netdev_get_tx_queue(dev, queue_index);
 }

+static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
+                                struct net_device *dev,
+                                struct netdev_queue *txq)
+{
+       spinlock_t *root_lock = qdisc_lock(q);
+       int rc;
+
+       spin_lock(root_lock);
+       if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+               kfree_skb(skb);
+               rc = NET_XMIT_DROP;
+       } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
+                  !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
+               /*
+                * This is a work-conserving queue; there are no old skbs
+                * waiting to be sent out; and the qdisc is not running -
+                * xmit the skb directly.
+                */
+               if (sch_direct_xmit(skb, q, dev, txq, root_lock))
+                       __qdisc_run(q);
+               else
+                       clear_bit(__QDISC_STATE_RUNNING, &q->state);
+               rc = NET_XMIT_SUCCESS;
+       } else {
+               rc = qdisc_enqueue_root(skb, q);
+               qdisc_run(q);
+       }
+       spin_unlock(root_lock);
+
+       return rc;
+}
+
 /**
  *     dev_queue_xmit - transmit a buffer
  *     @skb: buffer to transmit
@@ -1859,19 +1891,7 @@  gso:
        skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
 #endif
        if (q->enqueue) {
-               spinlock_t *root_lock = qdisc_lock(q);
-
-               spin_lock(root_lock);
-
-               if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
&q->state))) {
-                       kfree_skb(skb);
-                       rc = NET_XMIT_DROP;
-               } else {
-                       rc = qdisc_enqueue_root(skb, q);
-                       qdisc_run(q);
-               }
-               spin_unlock(root_lock);
-
+               rc = __dev_xmit_skb(skb, q, dev, txq);
                goto out;
        }

diff -ruNp org/net/sched/sch_generic.c new/net/sched/sch_generic.c
--- org/net/sched/sch_generic.c 2009-05-25 07:48:07.000000000 +0530
+++ new/net/sched/sch_generic.c 2009-08-04 11:00:38.000000000 +0530
@@ -37,15 +37,11 @@ 
  * - updates to tree and tree walking are only done under the rtnl mutex.
  */

-static inline int qdisc_qlen(struct Qdisc *q)
-{
-       return q->q.qlen;
-}
-
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
        q->gso_skb = skb;
        q->qstats.requeues++;
+       q->q.qlen++;    /* it's still part of the queue */
        __netif_schedule(q);

        return 0;
@@ -61,9 +57,11 @@  static inline struct sk_buff *dequeue_sk

                /* check the reason of requeuing without tx lock first */
                txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-               if (!netif_tx_queue_stopped(txq) &&
!netif_tx_queue_frozen(txq))
+               if (!netif_tx_queue_stopped(txq) &&
+                   !netif_tx_queue_frozen(txq)) {
                        q->gso_skb = NULL;
-               else
+                       q->q.qlen--;
+               } else
                        skb = NULL;
        } else {