diff mbox

[RFC] Avoid enqueuing skb for default qdiscs

Message ID 20090804155135.26922.12356.sendpatchset@localhost.localdomain
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Krishna Kumar Aug. 4, 2009, 3:51 p.m. UTC
> Jarek Poplawski <jarkao2@gmail.com> wrote on 08/04/2009 12:52:05 PM:

Hi Jarek,

(Sorry that you are getting this mail twice since the mail didn't go out to
others, but this time I have fixed my mailer)

> Actually I meant:
> -      qdisc->q.qlen--;
> +      qdisc->q.qlen = 0;
> because you can do it after another qdisc->q.qlen = 0.

You are right - I saw some of the sched resets setting it to zero (while others
dont touch it). I will reset to zero.

> > > I wonder if we shouldn't update bstats yet. Btw, try checkpatch.
> >
> > Rate estimator isn't used for default qdisc, so is anything
> > required?
>
> I meant "Sent" stats from "tc -s qdisc sh" could be misleading.

You are right - for a single netperf run, the stats magically show 0
packets/bytes sent!

> > Also, checkpatch suggested to change the original code:
> >         "if (unlikely((skb = dequeue_skb(q)) == NULL))"
> > since it appears as "new code", hope that change is fine.
>
> I mainly meant a whitespace warning.

Yes, I had fixed that also.

A compile and single-netperf-run tested patch is inlined below - the only
two changes from last patch is setting qlen=0 and updating bstats. Now
the stats show:

qdisc pfifo_fast 0: root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
 Sent 10170288228 bytes 157522 pkt (dropped 0, overlimits 0 requeues 0) 
 rate 0bit 0pps backlog 0b 0p requeues 0 

On the original kernel, I got:
qdisc pfifo_fast 0: root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
 Sent 10157266798 bytes 177539 pkt (dropped 0, overlimits 0 requeues 0) 
 rate 0bit 0pps backlog 0b 0p requeues 0 

Thanks,

- KK

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
 include/net/pkt_sched.h   |    3 +
 include/net/sch_generic.h |   15 +++++
 net/core/dev.c            |   51 ++++++++++++++-----
 net/sched/sch_generic.c   |   93 ++++++++++++++++++++++--------------
 4 files changed, 111 insertions(+), 51 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Krishna Kumar Aug. 4, 2009, 4:21 p.m. UTC | #1
I will send out a final version after any more comments with a fix for a
typo:

-static inline int __qdisc_update_bstats(struct Qdisc *sch, unsigned int
len)
+static inline void __qdisc_update_bstats(struct Qdisc *sch, unsigned int
len)

Thanks,

- KK

Krishna Kumar2/India/IBM@IBMIN wrote on 08/04/2009 09:21:35 PM:
> Hi Jarek,
>
> (Sorry that you are getting this mail twice since the mail didn't go out
to
> others, but this time I have fixed my mailer)
>
> > Actually I meant:
> > -      qdisc->q.qlen--;
> > +      qdisc->q.qlen = 0;
> > because you can do it after another qdisc->q.qlen = 0.
>
> You are right - I saw some of the sched resets setting it to zero (while
others
> dont touch it). I will reset to zero.
>
> > > > I wonder if we shouldn't update bstats yet. Btw, try checkpatch.
> > >
> > > Rate estimator isn't used for default qdisc, so is anything
> > > required?
> >
> > I meant "Sent" stats from "tc -s qdisc sh" could be misleading.
>
> You are right - for a single netperf run, the stats magically show 0
> packets/bytes sent!
>
> > > Also, checkpatch suggested to change the original code:
> > >         "if (unlikely((skb = dequeue_skb(q)) == NULL))"
> > > since it appears as "new code", hope that change is fine.
> >
> > I mainly meant a whitespace warning.
>
> Yes, I had fixed that also.
>
> A compile and single-netperf-run tested patch is inlined below - the only
> two changes from last patch is setting qlen=0 and updating bstats. Now
> the stats show:
>
> qdisc pfifo_fast 0: root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
>  Sent 10170288228 bytes 157522 pkt (dropped 0, overlimits 0 requeues 0)
>  rate 0bit 0pps backlog 0b 0p requeues 0
>
> On the original kernel, I got:
> qdisc pfifo_fast 0: root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
>  Sent 10157266798 bytes 177539 pkt (dropped 0, overlimits 0 requeues 0)
>  rate 0bit 0pps backlog 0b 0p requeues 0
>
> Thanks,
>
> - KK
>
> Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
> ---
>  include/net/pkt_sched.h   |    3 +
>  include/net/sch_generic.h |   15 +++++
>  net/core/dev.c            |   51 ++++++++++++++-----
>  net/sched/sch_generic.c   |   93 ++++++++++++++++++++++--------------
>  4 files changed, 111 insertions(+), 51 deletions(-)
>
> diff -ruNp org2/include/net/pkt_sched.h new5/include/net/pkt_sched.h
> --- org2/include/net/pkt_sched.h   2009-08-04 11:41:21.000000000 +0530
> +++ new5/include/net/pkt_sched.h   2009-08-04 18:47:16.000000000 +0530
> @@ -87,6 +87,9 @@ extern struct qdisc_rate_table *qdisc_ge
>  extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
>  extern void qdisc_put_stab(struct qdisc_size_table *tab);
>  extern void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc);
> +extern int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
> +            struct net_device *dev, struct netdev_queue *txq,
> +            spinlock_t *root_lock);
>
>  extern void __qdisc_run(struct Qdisc *q);
>
> diff -ruNp org2/include/net/sch_generic.h new5/include/net/sch_generic.h
> --- org2/include/net/sch_generic.h   2009-03-24 08:54:16.000000000 +0530
> +++ new5/include/net/sch_generic.h   2009-08-04 19:58:26.000000000 +0530
> @@ -45,6 +45,7 @@ struct Qdisc
>  #define TCQ_F_BUILTIN      1
>  #define TCQ_F_THROTTLED      2
>  #define TCQ_F_INGRESS      4
> +#define TCQ_F_CAN_BYPASS   8
>  #define TCQ_F_WARN_NONWC   (1 << 16)
>     int         padded;
>     struct Qdisc_ops   *ops;
> @@ -182,6 +183,11 @@ struct qdisc_skb_cb {
>     char         data[];
>  };
>
> +static inline int qdisc_qlen(struct Qdisc *q)
> +{
> +   return q->q.qlen;
> +}
> +
>  static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb)
>  {
>     return (struct qdisc_skb_cb *)skb->cb;
> @@ -387,13 +393,18 @@ static inline int qdisc_enqueue_root(str
>     return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
>  }
>
> +static inline int __qdisc_update_bstats(struct Qdisc *sch, unsigned int
len)
> +{
> +   sch->bstats.bytes += len;
> +   sch->bstats.packets++;
> +}
> +
>  static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc
*sch,
>                     struct sk_buff_head *list)
>  {
>     __skb_queue_tail(list, skb);
>     sch->qstats.backlog += qdisc_pkt_len(skb);
> -   sch->bstats.bytes += qdisc_pkt_len(skb);
> -   sch->bstats.packets++;
> +   __qdisc_update_bstats(sch, qdisc_pkt_len(skb));
>
>     return NET_XMIT_SUCCESS;
>  }
> diff -ruNp org2/net/core/dev.c new5/net/core/dev.c
> --- org2/net/core/dev.c   2009-07-27 09:08:24.000000000 +0530
> +++ new5/net/core/dev.c   2009-08-04 20:26:57.000000000 +0530
> @@ -1786,6 +1786,43 @@ static struct netdev_queue *dev_pick_tx(
>     return netdev_get_tx_queue(dev, queue_index);
>  }
>
> +static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
> +             struct net_device *dev,
> +             struct netdev_queue *txq)
> +{
> +   spinlock_t *root_lock = qdisc_lock(q);
> +   int rc;
> +
> +   spin_lock(root_lock);
> +   if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
> +      kfree_skb(skb);
> +      rc = NET_XMIT_DROP;
> +   } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
> +         !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
> +      unsigned int len = skb->len;
> +
> +      /*
> +       * This is a work-conserving queue; there are no old skbs
> +       * waiting to be sent out; and the qdisc is not running -
> +       * xmit the skb directly.
> +       */
> +      if (sch_direct_xmit(skb, q, dev, txq, root_lock))
> +         __qdisc_run(q);
> +      else
> +         clear_bit(__QDISC_STATE_RUNNING, &q->state);
> +
> +      __qdisc_update_bstats(q, len);
> +
> +      rc = NET_XMIT_SUCCESS;
> +   } else {
> +      rc = qdisc_enqueue_root(skb, q);
> +      qdisc_run(q);
> +   }
> +   spin_unlock(root_lock);
> +
> +   return rc;
> +}
> +
>  /**
>   *   dev_queue_xmit - transmit a buffer
>   *   @skb: buffer to transmit
> @@ -1859,19 +1896,7 @@ gso:
>     skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
>  #endif
>     if (q->enqueue) {
> -      spinlock_t *root_lock = qdisc_lock(q);
> -
> -      spin_lock(root_lock);
> -
> -      if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
> -         kfree_skb(skb);
> -         rc = NET_XMIT_DROP;
> -      } else {
> -         rc = qdisc_enqueue_root(skb, q);
> -         qdisc_run(q);
> -      }
> -      spin_unlock(root_lock);
> -
> +      rc = __dev_xmit_skb(skb, q, dev, txq);
>        goto out;
>     }
>
> diff -ruNp org2/net/sched/sch_generic.c new5/net/sched/sch_generic.c
> --- org2/net/sched/sch_generic.c   2009-05-25 07:48:07.000000000 +0530
> +++ new5/net/sched/sch_generic.c   2009-08-04 19:38:27.000000000 +0530
> @@ -37,15 +37,11 @@
>   * - updates to tree and tree walking are only done under the rtnl
mutex.
>   */
>
> -static inline int qdisc_qlen(struct Qdisc *q)
> -{
> -   return q->q.qlen;
> -}
> -
>  static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
>  {
>     q->gso_skb = skb;
>     q->qstats.requeues++;
> +   q->q.qlen++;   /* it's still part of the queue */
>     __netif_schedule(q);
>
>     return 0;
> @@ -61,9 +57,11 @@ static inline struct sk_buff *dequeue_sk
>
>        /* check the reason of requeuing without tx lock first */
>        txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
> -      if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
> +      if (!netif_tx_queue_stopped(txq) &&
> +          !netif_tx_queue_frozen(txq)) {
>           q->gso_skb = NULL;
> -      else
> +         q->q.qlen--;
> +      } else
>           skb = NULL;
>     } else {
>        skb = q->dequeue(q);
> @@ -103,44 +101,23 @@ static inline int handle_dev_cpu_collisi
>  }
>
>  /*
> - * NOTE: Called under qdisc_lock(q) with locally disabled BH.
> - *
> - * __QDISC_STATE_RUNNING guarantees only one CPU can process
> - * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
> - * this queue.
> - *
> - *  netif_tx_lock serializes accesses to device driver.
> - *
> - *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
> - *  if one is grabbed, another must be free.
> - *
> - * Note, that this procedure can be called by a watchdog timer
> + * Transmit one skb, and handle the return status as required. Holding
the
> + * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute
this
> + * function.
>   *
>   * Returns to the caller:
>   *            0  - queue is empty or throttled.
>   *            >0 - queue is not empty.
> - *
>   */
> -static inline int qdisc_restart(struct Qdisc *q)
> +int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
> +          struct net_device *dev, struct netdev_queue *txq,
> +          spinlock_t *root_lock)
>  {
> -   struct netdev_queue *txq;
>     int ret = NETDEV_TX_BUSY;
> -   struct net_device *dev;
> -   spinlock_t *root_lock;
> -   struct sk_buff *skb;
> -
> -   /* Dequeue packet */
> -   if (unlikely((skb = dequeue_skb(q)) == NULL))
> -      return 0;
> -
> -   root_lock = qdisc_lock(q);
>
>     /* And release qdisc */
>     spin_unlock(root_lock);
>
> -   dev = qdisc_dev(q);
> -   txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
> -
>     HARD_TX_LOCK(dev, txq, smp_processor_id());
>     if (!netif_tx_queue_stopped(txq) &&
>         !netif_tx_queue_frozen(txq))
> @@ -177,6 +154,44 @@ static inline int qdisc_restart(struct Q
>     return ret;
>  }
>
> +/*
> + * NOTE: Called under qdisc_lock(q) with locally disabled BH.
> + *
> + * __QDISC_STATE_RUNNING guarantees only one CPU can process
> + * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
> + * this queue.
> + *
> + *  netif_tx_lock serializes accesses to device driver.
> + *
> + *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
> + *  if one is grabbed, another must be free.
> + *
> + * Note, that this procedure can be called by a watchdog timer
> + *
> + * Returns to the caller:
> + *            0  - queue is empty or throttled.
> + *            >0 - queue is not empty.
> + *
> + */
> +static inline int qdisc_restart(struct Qdisc *q)
> +{
> +   struct netdev_queue *txq;
> +   struct net_device *dev;
> +   spinlock_t *root_lock;
> +   struct sk_buff *skb;
> +
> +   /* Dequeue packet */
> +   skb = dequeue_skb(q);
> +   if (unlikely(!skb))
> +      return 0;
> +
> +   root_lock = qdisc_lock(q);
> +   dev = qdisc_dev(q);
> +   txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
> +
> +   return sch_direct_xmit(skb, q, dev, txq, root_lock);
> +}
> +
>  void __qdisc_run(struct Qdisc *q)
>  {
>     unsigned long start_time = jiffies;
> @@ -547,8 +562,11 @@ void qdisc_reset(struct Qdisc *qdisc)
>     if (ops->reset)
>        ops->reset(qdisc);
>
> -   kfree_skb(qdisc->gso_skb);
> -   qdisc->gso_skb = NULL;
> +   if (qdisc->gso_skb) {
> +      kfree_skb(qdisc->gso_skb);
> +      qdisc->gso_skb = NULL;
> +      qdisc->q.qlen = 0;
> +   }
>  }
>  EXPORT_SYMBOL(qdisc_reset);
>
> @@ -605,6 +623,9 @@ static void attach_one_default_qdisc(str
>           printk(KERN_INFO "%s: activation failed\n", dev->name);
>           return;
>        }
> +
> +      /* Can by-pass the queue discipline for default qdisc */
> +      qdisc->flags |= TCQ_F_CAN_BYPASS;
>     } else {
>        qdisc =  &noqueue_qdisc;
>     }

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jarek Poplawski Aug. 4, 2009, 8:23 p.m. UTC | #2
On Tue, Aug 04, 2009 at 09:51:10PM +0530, Krishna Kumar2 wrote:
> I will send out a final version after any more comments with a fix for a
> typo:
> 
> -static inline int __qdisc_update_bstats(struct Qdisc *sch, unsigned int
> len)
> +static inline void __qdisc_update_bstats(struct Qdisc *sch, unsigned int
> len)
> 
> Thanks,
> 
> - KK

Yes, except this typo this patch looks OK to me.

Thanks,
Jarek P.

> 
> Krishna Kumar2/India/IBM@IBMIN wrote on 08/04/2009 09:21:35 PM:
> > Hi Jarek,
> >
> > (Sorry that you are getting this mail twice since the mail didn't go out
> to
> > others, but this time I have fixed my mailer)
> >
> > > Actually I meant:
> > > -      qdisc->q.qlen--;
> > > +      qdisc->q.qlen = 0;
> > > because you can do it after another qdisc->q.qlen = 0.
> >
> > You are right - I saw some of the sched resets setting it to zero (while
> others
> > dont touch it). I will reset to zero.
> >
> > > > > I wonder if we shouldn't update bstats yet. Btw, try checkpatch.
> > > >
> > > > Rate estimator isn't used for default qdisc, so is anything
> > > > required?
> > >
> > > I meant "Sent" stats from "tc -s qdisc sh" could be misleading.
> >
> > You are right - for a single netperf run, the stats magically show 0
> > packets/bytes sent!
> >
> > > > Also, checkpatch suggested to change the original code:
> > > >         "if (unlikely((skb = dequeue_skb(q)) == NULL))"
> > > > since it appears as "new code", hope that change is fine.
> > >
> > > I mainly meant a whitespace warning.
> >
> > Yes, I had fixed that also.
> >
> > A compile and single-netperf-run tested patch is inlined below - the only
> > two changes from last patch is setting qlen=0 and updating bstats. Now
> > the stats show:
> >
> > qdisc pfifo_fast 0: root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
> >  Sent 10170288228 bytes 157522 pkt (dropped 0, overlimits 0 requeues 0)
> >  rate 0bit 0pps backlog 0b 0p requeues 0
> >
> > On the original kernel, I got:
> > qdisc pfifo_fast 0: root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
> >  Sent 10157266798 bytes 177539 pkt (dropped 0, overlimits 0 requeues 0)
> >  rate 0bit 0pps backlog 0b 0p requeues 0
> >
> > Thanks,
> >
> > - KK
> >
> > Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
> > ---
> >  include/net/pkt_sched.h   |    3 +
> >  include/net/sch_generic.h |   15 +++++
> >  net/core/dev.c            |   51 ++++++++++++++-----
> >  net/sched/sch_generic.c   |   93 ++++++++++++++++++++++--------------
> >  4 files changed, 111 insertions(+), 51 deletions(-)
> >
> > diff -ruNp org2/include/net/pkt_sched.h new5/include/net/pkt_sched.h
> > --- org2/include/net/pkt_sched.h   2009-08-04 11:41:21.000000000 +0530
> > +++ new5/include/net/pkt_sched.h   2009-08-04 18:47:16.000000000 +0530
> > @@ -87,6 +87,9 @@ extern struct qdisc_rate_table *qdisc_ge
> >  extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
> >  extern void qdisc_put_stab(struct qdisc_size_table *tab);
> >  extern void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc);
> > +extern int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
> > +            struct net_device *dev, struct netdev_queue *txq,
> > +            spinlock_t *root_lock);
> >
> >  extern void __qdisc_run(struct Qdisc *q);
> >
> > diff -ruNp org2/include/net/sch_generic.h new5/include/net/sch_generic.h
> > --- org2/include/net/sch_generic.h   2009-03-24 08:54:16.000000000 +0530
> > +++ new5/include/net/sch_generic.h   2009-08-04 19:58:26.000000000 +0530
> > @@ -45,6 +45,7 @@ struct Qdisc
> >  #define TCQ_F_BUILTIN      1
> >  #define TCQ_F_THROTTLED      2
> >  #define TCQ_F_INGRESS      4
> > +#define TCQ_F_CAN_BYPASS   8
> >  #define TCQ_F_WARN_NONWC   (1 << 16)
> >     int         padded;
> >     struct Qdisc_ops   *ops;
> > @@ -182,6 +183,11 @@ struct qdisc_skb_cb {
> >     char         data[];
> >  };
> >
> > +static inline int qdisc_qlen(struct Qdisc *q)
> > +{
> > +   return q->q.qlen;
> > +}
> > +
> >  static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb)
> >  {
> >     return (struct qdisc_skb_cb *)skb->cb;
> > @@ -387,13 +393,18 @@ static inline int qdisc_enqueue_root(str
> >     return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
> >  }
> >
> > +static inline int __qdisc_update_bstats(struct Qdisc *sch, unsigned int
> len)
> > +{
> > +   sch->bstats.bytes += len;
> > +   sch->bstats.packets++;
> > +}
> > +
> >  static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc
> *sch,
> >                     struct sk_buff_head *list)
> >  {
> >     __skb_queue_tail(list, skb);
> >     sch->qstats.backlog += qdisc_pkt_len(skb);
> > -   sch->bstats.bytes += qdisc_pkt_len(skb);
> > -   sch->bstats.packets++;
> > +   __qdisc_update_bstats(sch, qdisc_pkt_len(skb));
> >
> >     return NET_XMIT_SUCCESS;
> >  }
> > diff -ruNp org2/net/core/dev.c new5/net/core/dev.c
> > --- org2/net/core/dev.c   2009-07-27 09:08:24.000000000 +0530
> > +++ new5/net/core/dev.c   2009-08-04 20:26:57.000000000 +0530
> > @@ -1786,6 +1786,43 @@ static struct netdev_queue *dev_pick_tx(
> >     return netdev_get_tx_queue(dev, queue_index);
> >  }
> >
> > +static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
> > +             struct net_device *dev,
> > +             struct netdev_queue *txq)
> > +{
> > +   spinlock_t *root_lock = qdisc_lock(q);
> > +   int rc;
> > +
> > +   spin_lock(root_lock);
> > +   if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
> > +      kfree_skb(skb);
> > +      rc = NET_XMIT_DROP;
> > +   } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
> > +         !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
> > +      unsigned int len = skb->len;
> > +
> > +      /*
> > +       * This is a work-conserving queue; there are no old skbs
> > +       * waiting to be sent out; and the qdisc is not running -
> > +       * xmit the skb directly.
> > +       */
> > +      if (sch_direct_xmit(skb, q, dev, txq, root_lock))
> > +         __qdisc_run(q);
> > +      else
> > +         clear_bit(__QDISC_STATE_RUNNING, &q->state);
> > +
> > +      __qdisc_update_bstats(q, len);
> > +
> > +      rc = NET_XMIT_SUCCESS;
> > +   } else {
> > +      rc = qdisc_enqueue_root(skb, q);
> > +      qdisc_run(q);
> > +   }
> > +   spin_unlock(root_lock);
> > +
> > +   return rc;
> > +}
> > +
> >  /**
> >   *   dev_queue_xmit - transmit a buffer
> >   *   @skb: buffer to transmit
> > @@ -1859,19 +1896,7 @@ gso:
> >     skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
> >  #endif
> >     if (q->enqueue) {
> > -      spinlock_t *root_lock = qdisc_lock(q);
> > -
> > -      spin_lock(root_lock);
> > -
> > -      if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
> > -         kfree_skb(skb);
> > -         rc = NET_XMIT_DROP;
> > -      } else {
> > -         rc = qdisc_enqueue_root(skb, q);
> > -         qdisc_run(q);
> > -      }
> > -      spin_unlock(root_lock);
> > -
> > +      rc = __dev_xmit_skb(skb, q, dev, txq);
> >        goto out;
> >     }
> >
> > diff -ruNp org2/net/sched/sch_generic.c new5/net/sched/sch_generic.c
> > --- org2/net/sched/sch_generic.c   2009-05-25 07:48:07.000000000 +0530
> > +++ new5/net/sched/sch_generic.c   2009-08-04 19:38:27.000000000 +0530
> > @@ -37,15 +37,11 @@
> >   * - updates to tree and tree walking are only done under the rtnl
> mutex.
> >   */
> >
> > -static inline int qdisc_qlen(struct Qdisc *q)
> > -{
> > -   return q->q.qlen;
> > -}
> > -
> >  static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
> >  {
> >     q->gso_skb = skb;
> >     q->qstats.requeues++;
> > +   q->q.qlen++;   /* it's still part of the queue */
> >     __netif_schedule(q);
> >
> >     return 0;
> > @@ -61,9 +57,11 @@ static inline struct sk_buff *dequeue_sk
> >
> >        /* check the reason of requeuing without tx lock first */
> >        txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
> > -      if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
> > +      if (!netif_tx_queue_stopped(txq) &&
> > +          !netif_tx_queue_frozen(txq)) {
> >           q->gso_skb = NULL;
> > -      else
> > +         q->q.qlen--;
> > +      } else
> >           skb = NULL;
> >     } else {
> >        skb = q->dequeue(q);
> > @@ -103,44 +101,23 @@ static inline int handle_dev_cpu_collisi
> >  }
> >
> >  /*
> > - * NOTE: Called under qdisc_lock(q) with locally disabled BH.
> > - *
> > - * __QDISC_STATE_RUNNING guarantees only one CPU can process
> > - * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
> > - * this queue.
> > - *
> > - *  netif_tx_lock serializes accesses to device driver.
> > - *
> > - *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
> > - *  if one is grabbed, another must be free.
> > - *
> > - * Note, that this procedure can be called by a watchdog timer
> > + * Transmit one skb, and handle the return status as required. Holding
> the
> > + * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute
> this
> > + * function.
> >   *
> >   * Returns to the caller:
> >   *            0  - queue is empty or throttled.
> >   *            >0 - queue is not empty.
> > - *
> >   */
> > -static inline int qdisc_restart(struct Qdisc *q)
> > +int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
> > +          struct net_device *dev, struct netdev_queue *txq,
> > +          spinlock_t *root_lock)
> >  {
> > -   struct netdev_queue *txq;
> >     int ret = NETDEV_TX_BUSY;
> > -   struct net_device *dev;
> > -   spinlock_t *root_lock;
> > -   struct sk_buff *skb;
> > -
> > -   /* Dequeue packet */
> > -   if (unlikely((skb = dequeue_skb(q)) == NULL))
> > -      return 0;
> > -
> > -   root_lock = qdisc_lock(q);
> >
> >     /* And release qdisc */
> >     spin_unlock(root_lock);
> >
> > -   dev = qdisc_dev(q);
> > -   txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
> > -
> >     HARD_TX_LOCK(dev, txq, smp_processor_id());
> >     if (!netif_tx_queue_stopped(txq) &&
> >         !netif_tx_queue_frozen(txq))
> > @@ -177,6 +154,44 @@ static inline int qdisc_restart(struct Q
> >     return ret;
> >  }
> >
> > +/*
> > + * NOTE: Called under qdisc_lock(q) with locally disabled BH.
> > + *
> > + * __QDISC_STATE_RUNNING guarantees only one CPU can process
> > + * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
> > + * this queue.
> > + *
> > + *  netif_tx_lock serializes accesses to device driver.
> > + *
> > + *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
> > + *  if one is grabbed, another must be free.
> > + *
> > + * Note, that this procedure can be called by a watchdog timer
> > + *
> > + * Returns to the caller:
> > + *            0  - queue is empty or throttled.
> > + *            >0 - queue is not empty.
> > + *
> > + */
> > +static inline int qdisc_restart(struct Qdisc *q)
> > +{
> > +   struct netdev_queue *txq;
> > +   struct net_device *dev;
> > +   spinlock_t *root_lock;
> > +   struct sk_buff *skb;
> > +
> > +   /* Dequeue packet */
> > +   skb = dequeue_skb(q);
> > +   if (unlikely(!skb))
> > +      return 0;
> > +
> > +   root_lock = qdisc_lock(q);
> > +   dev = qdisc_dev(q);
> > +   txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
> > +
> > +   return sch_direct_xmit(skb, q, dev, txq, root_lock);
> > +}
> > +
> >  void __qdisc_run(struct Qdisc *q)
> >  {
> >     unsigned long start_time = jiffies;
> > @@ -547,8 +562,11 @@ void qdisc_reset(struct Qdisc *qdisc)
> >     if (ops->reset)
> >        ops->reset(qdisc);
> >
> > -   kfree_skb(qdisc->gso_skb);
> > -   qdisc->gso_skb = NULL;
> > +   if (qdisc->gso_skb) {
> > +      kfree_skb(qdisc->gso_skb);
> > +      qdisc->gso_skb = NULL;
> > +      qdisc->q.qlen = 0;
> > +   }
> >  }
> >  EXPORT_SYMBOL(qdisc_reset);
> >
> > @@ -605,6 +623,9 @@ static void attach_one_default_qdisc(str
> >           printk(KERN_INFO "%s: activation failed\n", dev->name);
> >           return;
> >        }
> > +
> > +      /* Can by-pass the queue discipline for default qdisc */
> > +      qdisc->flags |= TCQ_F_CAN_BYPASS;
> >     } else {
> >        qdisc =  &noqueue_qdisc;
> >     }
> 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff -ruNp org2/include/net/pkt_sched.h new5/include/net/pkt_sched.h
--- org2/include/net/pkt_sched.h	2009-08-04 11:41:21.000000000 +0530
+++ new5/include/net/pkt_sched.h	2009-08-04 18:47:16.000000000 +0530
@@ -87,6 +87,9 @@  extern struct qdisc_rate_table *qdisc_ge
 extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
 extern void qdisc_put_stab(struct qdisc_size_table *tab);
 extern void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc);
+extern int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
+			   struct net_device *dev, struct netdev_queue *txq,
+			   spinlock_t *root_lock);
 
 extern void __qdisc_run(struct Qdisc *q);
 
diff -ruNp org2/include/net/sch_generic.h new5/include/net/sch_generic.h
--- org2/include/net/sch_generic.h	2009-03-24 08:54:16.000000000 +0530
+++ new5/include/net/sch_generic.h	2009-08-04 19:58:26.000000000 +0530
@@ -45,6 +45,7 @@  struct Qdisc
 #define TCQ_F_BUILTIN		1
 #define TCQ_F_THROTTLED		2
 #define TCQ_F_INGRESS		4
+#define TCQ_F_CAN_BYPASS	8
 #define TCQ_F_WARN_NONWC	(1 << 16)
 	int			padded;
 	struct Qdisc_ops	*ops;
@@ -182,6 +183,11 @@  struct qdisc_skb_cb {
 	char			data[];
 };
 
+static inline int qdisc_qlen(struct Qdisc *q)
+{
+	return q->q.qlen;
+}
+
 static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb)
 {
 	return (struct qdisc_skb_cb *)skb->cb;
@@ -387,13 +393,18 @@  static inline int qdisc_enqueue_root(str
 	return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
 }
 
+static inline int __qdisc_update_bstats(struct Qdisc *sch, unsigned int len)
+{
+	sch->bstats.bytes += len;
+	sch->bstats.packets++;
+}
+
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
 				       struct sk_buff_head *list)
 {
 	__skb_queue_tail(list, skb);
 	sch->qstats.backlog += qdisc_pkt_len(skb);
-	sch->bstats.bytes += qdisc_pkt_len(skb);
-	sch->bstats.packets++;
+	__qdisc_update_bstats(sch, qdisc_pkt_len(skb));
 
 	return NET_XMIT_SUCCESS;
 }
diff -ruNp org2/net/core/dev.c new5/net/core/dev.c
--- org2/net/core/dev.c	2009-07-27 09:08:24.000000000 +0530
+++ new5/net/core/dev.c	2009-08-04 20:26:57.000000000 +0530
@@ -1786,6 +1786,43 @@  static struct netdev_queue *dev_pick_tx(
 	return netdev_get_tx_queue(dev, queue_index);
 }
 
+static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
+				 struct net_device *dev,
+				 struct netdev_queue *txq)
+{
+	spinlock_t *root_lock = qdisc_lock(q);
+	int rc;
+
+	spin_lock(root_lock);
+	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+		kfree_skb(skb);
+		rc = NET_XMIT_DROP;
+	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
+		   !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
+		unsigned int len = skb->len;
+
+		/*
+		 * This is a work-conserving queue; there are no old skbs
+		 * waiting to be sent out; and the qdisc is not running -
+		 * xmit the skb directly.
+		 */
+		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
+			__qdisc_run(q);
+		else
+			clear_bit(__QDISC_STATE_RUNNING, &q->state);
+
+		__qdisc_update_bstats(q, len);
+
+		rc = NET_XMIT_SUCCESS;
+	} else {
+		rc = qdisc_enqueue_root(skb, q);
+		qdisc_run(q);
+	}
+	spin_unlock(root_lock);
+
+	return rc;
+}
+
 /**
  *	dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
@@ -1859,19 +1896,7 @@  gso:
 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
 #endif
 	if (q->enqueue) {
-		spinlock_t *root_lock = qdisc_lock(q);
-
-		spin_lock(root_lock);
-
-		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
-			kfree_skb(skb);
-			rc = NET_XMIT_DROP;
-		} else {
-			rc = qdisc_enqueue_root(skb, q);
-			qdisc_run(q);
-		}
-		spin_unlock(root_lock);
-
+		rc = __dev_xmit_skb(skb, q, dev, txq);
 		goto out;
 	}
 
diff -ruNp org2/net/sched/sch_generic.c new5/net/sched/sch_generic.c
--- org2/net/sched/sch_generic.c	2009-05-25 07:48:07.000000000 +0530
+++ new5/net/sched/sch_generic.c	2009-08-04 19:38:27.000000000 +0530
@@ -37,15 +37,11 @@ 
  * - updates to tree and tree walking are only done under the rtnl mutex.
  */
 
-static inline int qdisc_qlen(struct Qdisc *q)
-{
-	return q->q.qlen;
-}
-
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
 	q->gso_skb = skb;
 	q->qstats.requeues++;
+	q->q.qlen++;	/* it's still part of the queue */
 	__netif_schedule(q);
 
 	return 0;
@@ -61,9 +57,11 @@  static inline struct sk_buff *dequeue_sk
 
 		/* check the reason of requeuing without tx lock first */
 		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-		if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
+		if (!netif_tx_queue_stopped(txq) &&
+		    !netif_tx_queue_frozen(txq)) {
 			q->gso_skb = NULL;
-		else
+			q->q.qlen--;
+		} else
 			skb = NULL;
 	} else {
 		skb = q->dequeue(q);
@@ -103,44 +101,23 @@  static inline int handle_dev_cpu_collisi
 }
 
 /*
- * NOTE: Called under qdisc_lock(q) with locally disabled BH.
- *
- * __QDISC_STATE_RUNNING guarantees only one CPU can process
- * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
- * this queue.
- *
- *  netif_tx_lock serializes accesses to device driver.
- *
- *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
- *  if one is grabbed, another must be free.
- *
- * Note, that this procedure can be called by a watchdog timer
+ * Transmit one skb, and handle the return status as required. Holding the
+ * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute this
+ * function.
  *
  * Returns to the caller:
  *				0  - queue is empty or throttled.
  *				>0 - queue is not empty.
- *
  */
-static inline int qdisc_restart(struct Qdisc *q)
+int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
+		    struct net_device *dev, struct netdev_queue *txq,
+		    spinlock_t *root_lock)
 {
-	struct netdev_queue *txq;
 	int ret = NETDEV_TX_BUSY;
-	struct net_device *dev;
-	spinlock_t *root_lock;
-	struct sk_buff *skb;
-
-	/* Dequeue packet */
-	if (unlikely((skb = dequeue_skb(q)) == NULL))
-		return 0;
-
-	root_lock = qdisc_lock(q);
 
 	/* And release qdisc */
 	spin_unlock(root_lock);
 
-	dev = qdisc_dev(q);
-	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
 	if (!netif_tx_queue_stopped(txq) &&
 	    !netif_tx_queue_frozen(txq))
@@ -177,6 +154,44 @@  static inline int qdisc_restart(struct Q
 	return ret;
 }
 
+/*
+ * NOTE: Called under qdisc_lock(q) with locally disabled BH.
+ *
+ * __QDISC_STATE_RUNNING guarantees only one CPU can process
+ * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
+ * this queue.
+ *
+ *  netif_tx_lock serializes accesses to device driver.
+ *
+ *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
+ *  if one is grabbed, another must be free.
+ *
+ * Note, that this procedure can be called by a watchdog timer
+ *
+ * Returns to the caller:
+ *				0  - queue is empty or throttled.
+ *				>0 - queue is not empty.
+ *
+ */
+static inline int qdisc_restart(struct Qdisc *q)
+{
+	struct netdev_queue *txq;
+	struct net_device *dev;
+	spinlock_t *root_lock;
+	struct sk_buff *skb;
+
+	/* Dequeue packet */
+	skb = dequeue_skb(q);
+	if (unlikely(!skb))
+		return 0;
+
+	root_lock = qdisc_lock(q);
+	dev = qdisc_dev(q);
+	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+
+	return sch_direct_xmit(skb, q, dev, txq, root_lock);
+}
+
 void __qdisc_run(struct Qdisc *q)
 {
 	unsigned long start_time = jiffies;
@@ -547,8 +562,11 @@  void qdisc_reset(struct Qdisc *qdisc)
 	if (ops->reset)
 		ops->reset(qdisc);
 
-	kfree_skb(qdisc->gso_skb);
-	qdisc->gso_skb = NULL;
+	if (qdisc->gso_skb) {
+		kfree_skb(qdisc->gso_skb);
+		qdisc->gso_skb = NULL;
+		qdisc->q.qlen = 0;
+	}
 }
 EXPORT_SYMBOL(qdisc_reset);
 
@@ -605,6 +623,9 @@  static void attach_one_default_qdisc(str
 			printk(KERN_INFO "%s: activation failed\n", dev->name);
 			return;
 		}
+
+		/* Can by-pass the queue discipline for default qdisc */
+		qdisc->flags |= TCQ_F_CAN_BYPASS;
 	} else {
 		qdisc =  &noqueue_qdisc;
 	}