Message ID | 20090804155135.26922.12356.sendpatchset@localhost.localdomain |
---|---|
State | RFC, archived |
Delegated to: | David Miller |
Headers | show |
I will send out a final version after any more comments with a fix for a typo: -static inline int __qdisc_update_bstats(struct Qdisc *sch, unsigned int len) +static inline void __qdisc_update_bstats(struct Qdisc *sch, unsigned int len) Thanks, - KK Krishna Kumar2/India/IBM@IBMIN wrote on 08/04/2009 09:21:35 PM: > Hi Jarek, > > (Sorry that you are getting this mail twice since the mail didn't go out to > others, but this time I have fixed my mailer) > > > Actually I meant: > > - qdisc->q.qlen--; > > + qdisc->q.qlen = 0; > > because you can do it after another qdisc->q.qlen = 0. > > You are right - I saw some of the sched resets setting it to zero (while others > dont touch it). I will reset to zero. > > > > > I wonder if we shouldn't update bstats yet. Btw, try checkpatch. > > > > > > Rate estimator isn't used for default qdisc, so is anything > > > required? > > > > I meant "Sent" stats from "tc -s qdisc sh" could be misleading. > > You are right - for a single netperf run, the stats magically show 0 > packets/bytes sent! > > > > Also, checkpatch suggested to change the original code: > > > "if (unlikely((skb = dequeue_skb(q)) == NULL))" > > > since it appears as "new code", hope that change is fine. > > > > I mainly meant a whitespace warning. > > Yes, I had fixed that also. > > A compile and single-netperf-run tested patch is inlined below - the only > two changes from last patch is setting qlen=0 and updating bstats. Now > the stats show: > > qdisc pfifo_fast 0: root bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 > Sent 10170288228 bytes 157522 pkt (dropped 0, overlimits 0 requeues 0) > rate 0bit 0pps backlog 0b 0p requeues 0 > > On the original kernel, I got: > qdisc pfifo_fast 0: root bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 > Sent 10157266798 bytes 177539 pkt (dropped 0, overlimits 0 requeues 0) > rate 0bit 0pps backlog 0b 0p requeues 0 > > Thanks, > > - KK > > Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com> > --- > include/net/pkt_sched.h | 3 + > include/net/sch_generic.h | 15 +++++ > net/core/dev.c | 51 ++++++++++++++----- > net/sched/sch_generic.c | 93 ++++++++++++++++++++++-------------- > 4 files changed, 111 insertions(+), 51 deletions(-) > > diff -ruNp org2/include/net/pkt_sched.h new5/include/net/pkt_sched.h > --- org2/include/net/pkt_sched.h 2009-08-04 11:41:21.000000000 +0530 > +++ new5/include/net/pkt_sched.h 2009-08-04 18:47:16.000000000 +0530 > @@ -87,6 +87,9 @@ extern struct qdisc_rate_table *qdisc_ge > extern void qdisc_put_rtab(struct qdisc_rate_table *tab); > extern void qdisc_put_stab(struct qdisc_size_table *tab); > extern void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc); > +extern int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, > + struct net_device *dev, struct netdev_queue *txq, > + spinlock_t *root_lock); > > extern void __qdisc_run(struct Qdisc *q); > > diff -ruNp org2/include/net/sch_generic.h new5/include/net/sch_generic.h > --- org2/include/net/sch_generic.h 2009-03-24 08:54:16.000000000 +0530 > +++ new5/include/net/sch_generic.h 2009-08-04 19:58:26.000000000 +0530 > @@ -45,6 +45,7 @@ struct Qdisc > #define TCQ_F_BUILTIN 1 > #define TCQ_F_THROTTLED 2 > #define TCQ_F_INGRESS 4 > +#define TCQ_F_CAN_BYPASS 8 > #define TCQ_F_WARN_NONWC (1 << 16) > int padded; > struct Qdisc_ops *ops; > @@ -182,6 +183,11 @@ struct qdisc_skb_cb { > char data[]; > }; > > +static inline int qdisc_qlen(struct Qdisc *q) > +{ > + return q->q.qlen; > +} > + > static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb) > { > return (struct qdisc_skb_cb *)skb->cb; > @@ -387,13 +393,18 @@ static inline int qdisc_enqueue_root(str > return qdisc_enqueue(skb, sch) & NET_XMIT_MASK; > } > > +static inline int __qdisc_update_bstats(struct Qdisc *sch, unsigned int len) > +{ > + sch->bstats.bytes += len; > + sch->bstats.packets++; > +} > + > static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, > struct sk_buff_head *list) > { > __skb_queue_tail(list, skb); > sch->qstats.backlog += qdisc_pkt_len(skb); > - sch->bstats.bytes += qdisc_pkt_len(skb); > - sch->bstats.packets++; > + __qdisc_update_bstats(sch, qdisc_pkt_len(skb)); > > return NET_XMIT_SUCCESS; > } > diff -ruNp org2/net/core/dev.c new5/net/core/dev.c > --- org2/net/core/dev.c 2009-07-27 09:08:24.000000000 +0530 > +++ new5/net/core/dev.c 2009-08-04 20:26:57.000000000 +0530 > @@ -1786,6 +1786,43 @@ static struct netdev_queue *dev_pick_tx( > return netdev_get_tx_queue(dev, queue_index); > } > > +static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, > + struct net_device *dev, > + struct netdev_queue *txq) > +{ > + spinlock_t *root_lock = qdisc_lock(q); > + int rc; > + > + spin_lock(root_lock); > + if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { > + kfree_skb(skb); > + rc = NET_XMIT_DROP; > + } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && > + !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) { > + unsigned int len = skb->len; > + > + /* > + * This is a work-conserving queue; there are no old skbs > + * waiting to be sent out; and the qdisc is not running - > + * xmit the skb directly. > + */ > + if (sch_direct_xmit(skb, q, dev, txq, root_lock)) > + __qdisc_run(q); > + else > + clear_bit(__QDISC_STATE_RUNNING, &q->state); > + > + __qdisc_update_bstats(q, len); > + > + rc = NET_XMIT_SUCCESS; > + } else { > + rc = qdisc_enqueue_root(skb, q); > + qdisc_run(q); > + } > + spin_unlock(root_lock); > + > + return rc; > +} > + > /** > * dev_queue_xmit - transmit a buffer > * @skb: buffer to transmit > @@ -1859,19 +1896,7 @@ gso: > skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); > #endif > if (q->enqueue) { > - spinlock_t *root_lock = qdisc_lock(q); > - > - spin_lock(root_lock); > - > - if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { > - kfree_skb(skb); > - rc = NET_XMIT_DROP; > - } else { > - rc = qdisc_enqueue_root(skb, q); > - qdisc_run(q); > - } > - spin_unlock(root_lock); > - > + rc = __dev_xmit_skb(skb, q, dev, txq); > goto out; > } > > diff -ruNp org2/net/sched/sch_generic.c new5/net/sched/sch_generic.c > --- org2/net/sched/sch_generic.c 2009-05-25 07:48:07.000000000 +0530 > +++ new5/net/sched/sch_generic.c 2009-08-04 19:38:27.000000000 +0530 > @@ -37,15 +37,11 @@ > * - updates to tree and tree walking are only done under the rtnl mutex. > */ > > -static inline int qdisc_qlen(struct Qdisc *q) > -{ > - return q->q.qlen; > -} > - > static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) > { > q->gso_skb = skb; > q->qstats.requeues++; > + q->q.qlen++; /* it's still part of the queue */ > __netif_schedule(q); > > return 0; > @@ -61,9 +57,11 @@ static inline struct sk_buff *dequeue_sk > > /* check the reason of requeuing without tx lock first */ > txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); > - if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) > + if (!netif_tx_queue_stopped(txq) && > + !netif_tx_queue_frozen(txq)) { > q->gso_skb = NULL; > - else > + q->q.qlen--; > + } else > skb = NULL; > } else { > skb = q->dequeue(q); > @@ -103,44 +101,23 @@ static inline int handle_dev_cpu_collisi > } > > /* > - * NOTE: Called under qdisc_lock(q) with locally disabled BH. > - * > - * __QDISC_STATE_RUNNING guarantees only one CPU can process > - * this qdisc at a time. qdisc_lock(q) serializes queue accesses for > - * this queue. > - * > - * netif_tx_lock serializes accesses to device driver. > - * > - * qdisc_lock(q) and netif_tx_lock are mutually exclusive, > - * if one is grabbed, another must be free. > - * > - * Note, that this procedure can be called by a watchdog timer > + * Transmit one skb, and handle the return status as required. Holding the > + * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute this > + * function. > * > * Returns to the caller: > * 0 - queue is empty or throttled. > * >0 - queue is not empty. > - * > */ > -static inline int qdisc_restart(struct Qdisc *q) > +int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, > + struct net_device *dev, struct netdev_queue *txq, > + spinlock_t *root_lock) > { > - struct netdev_queue *txq; > int ret = NETDEV_TX_BUSY; > - struct net_device *dev; > - spinlock_t *root_lock; > - struct sk_buff *skb; > - > - /* Dequeue packet */ > - if (unlikely((skb = dequeue_skb(q)) == NULL)) > - return 0; > - > - root_lock = qdisc_lock(q); > > /* And release qdisc */ > spin_unlock(root_lock); > > - dev = qdisc_dev(q); > - txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); > - > HARD_TX_LOCK(dev, txq, smp_processor_id()); > if (!netif_tx_queue_stopped(txq) && > !netif_tx_queue_frozen(txq)) > @@ -177,6 +154,44 @@ static inline int qdisc_restart(struct Q > return ret; > } > > +/* > + * NOTE: Called under qdisc_lock(q) with locally disabled BH. > + * > + * __QDISC_STATE_RUNNING guarantees only one CPU can process > + * this qdisc at a time. qdisc_lock(q) serializes queue accesses for > + * this queue. > + * > + * netif_tx_lock serializes accesses to device driver. > + * > + * qdisc_lock(q) and netif_tx_lock are mutually exclusive, > + * if one is grabbed, another must be free. > + * > + * Note, that this procedure can be called by a watchdog timer > + * > + * Returns to the caller: > + * 0 - queue is empty or throttled. > + * >0 - queue is not empty. > + * > + */ > +static inline int qdisc_restart(struct Qdisc *q) > +{ > + struct netdev_queue *txq; > + struct net_device *dev; > + spinlock_t *root_lock; > + struct sk_buff *skb; > + > + /* Dequeue packet */ > + skb = dequeue_skb(q); > + if (unlikely(!skb)) > + return 0; > + > + root_lock = qdisc_lock(q); > + dev = qdisc_dev(q); > + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); > + > + return sch_direct_xmit(skb, q, dev, txq, root_lock); > +} > + > void __qdisc_run(struct Qdisc *q) > { > unsigned long start_time = jiffies; > @@ -547,8 +562,11 @@ void qdisc_reset(struct Qdisc *qdisc) > if (ops->reset) > ops->reset(qdisc); > > - kfree_skb(qdisc->gso_skb); > - qdisc->gso_skb = NULL; > + if (qdisc->gso_skb) { > + kfree_skb(qdisc->gso_skb); > + qdisc->gso_skb = NULL; > + qdisc->q.qlen = 0; > + } > } > EXPORT_SYMBOL(qdisc_reset); > > @@ -605,6 +623,9 @@ static void attach_one_default_qdisc(str > printk(KERN_INFO "%s: activation failed\n", dev->name); > return; > } > + > + /* Can by-pass the queue discipline for default qdisc */ > + qdisc->flags |= TCQ_F_CAN_BYPASS; > } else { > qdisc = &noqueue_qdisc; > } -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Aug 04, 2009 at 09:51:10PM +0530, Krishna Kumar2 wrote: > I will send out a final version after any more comments with a fix for a > typo: > > -static inline int __qdisc_update_bstats(struct Qdisc *sch, unsigned int > len) > +static inline void __qdisc_update_bstats(struct Qdisc *sch, unsigned int > len) > > Thanks, > > - KK Yes, except this typo this patch looks OK to me. Thanks, Jarek P. > > Krishna Kumar2/India/IBM@IBMIN wrote on 08/04/2009 09:21:35 PM: > > Hi Jarek, > > > > (Sorry that you are getting this mail twice since the mail didn't go out > to > > others, but this time I have fixed my mailer) > > > > > Actually I meant: > > > - qdisc->q.qlen--; > > > + qdisc->q.qlen = 0; > > > because you can do it after another qdisc->q.qlen = 0. > > > > You are right - I saw some of the sched resets setting it to zero (while > others > > dont touch it). I will reset to zero. > > > > > > > I wonder if we shouldn't update bstats yet. Btw, try checkpatch. > > > > > > > > Rate estimator isn't used for default qdisc, so is anything > > > > required? > > > > > > I meant "Sent" stats from "tc -s qdisc sh" could be misleading. > > > > You are right - for a single netperf run, the stats magically show 0 > > packets/bytes sent! > > > > > > Also, checkpatch suggested to change the original code: > > > > "if (unlikely((skb = dequeue_skb(q)) == NULL))" > > > > since it appears as "new code", hope that change is fine. > > > > > > I mainly meant a whitespace warning. > > > > Yes, I had fixed that also. > > > > A compile and single-netperf-run tested patch is inlined below - the only > > two changes from last patch is setting qlen=0 and updating bstats. Now > > the stats show: > > > > qdisc pfifo_fast 0: root bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 > > Sent 10170288228 bytes 157522 pkt (dropped 0, overlimits 0 requeues 0) > > rate 0bit 0pps backlog 0b 0p requeues 0 > > > > On the original kernel, I got: > > qdisc pfifo_fast 0: root bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 > > Sent 10157266798 bytes 177539 pkt (dropped 0, overlimits 0 requeues 0) > > rate 0bit 0pps backlog 0b 0p requeues 0 > > > > Thanks, > > > > - KK > > > > Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com> > > --- > > include/net/pkt_sched.h | 3 + > > include/net/sch_generic.h | 15 +++++ > > net/core/dev.c | 51 ++++++++++++++----- > > net/sched/sch_generic.c | 93 ++++++++++++++++++++++-------------- > > 4 files changed, 111 insertions(+), 51 deletions(-) > > > > diff -ruNp org2/include/net/pkt_sched.h new5/include/net/pkt_sched.h > > --- org2/include/net/pkt_sched.h 2009-08-04 11:41:21.000000000 +0530 > > +++ new5/include/net/pkt_sched.h 2009-08-04 18:47:16.000000000 +0530 > > @@ -87,6 +87,9 @@ extern struct qdisc_rate_table *qdisc_ge > > extern void qdisc_put_rtab(struct qdisc_rate_table *tab); > > extern void qdisc_put_stab(struct qdisc_size_table *tab); > > extern void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc); > > +extern int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, > > + struct net_device *dev, struct netdev_queue *txq, > > + spinlock_t *root_lock); > > > > extern void __qdisc_run(struct Qdisc *q); > > > > diff -ruNp org2/include/net/sch_generic.h new5/include/net/sch_generic.h > > --- org2/include/net/sch_generic.h 2009-03-24 08:54:16.000000000 +0530 > > +++ new5/include/net/sch_generic.h 2009-08-04 19:58:26.000000000 +0530 > > @@ -45,6 +45,7 @@ struct Qdisc > > #define TCQ_F_BUILTIN 1 > > #define TCQ_F_THROTTLED 2 > > #define TCQ_F_INGRESS 4 > > +#define TCQ_F_CAN_BYPASS 8 > > #define TCQ_F_WARN_NONWC (1 << 16) > > int padded; > > struct Qdisc_ops *ops; > > @@ -182,6 +183,11 @@ struct qdisc_skb_cb { > > char data[]; > > }; > > > > +static inline int qdisc_qlen(struct Qdisc *q) > > +{ > > + return q->q.qlen; > > +} > > + > > static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb) > > { > > return (struct qdisc_skb_cb *)skb->cb; > > @@ -387,13 +393,18 @@ static inline int qdisc_enqueue_root(str > > return qdisc_enqueue(skb, sch) & NET_XMIT_MASK; > > } > > > > +static inline int __qdisc_update_bstats(struct Qdisc *sch, unsigned int > len) > > +{ > > + sch->bstats.bytes += len; > > + sch->bstats.packets++; > > +} > > + > > static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc > *sch, > > struct sk_buff_head *list) > > { > > __skb_queue_tail(list, skb); > > sch->qstats.backlog += qdisc_pkt_len(skb); > > - sch->bstats.bytes += qdisc_pkt_len(skb); > > - sch->bstats.packets++; > > + __qdisc_update_bstats(sch, qdisc_pkt_len(skb)); > > > > return NET_XMIT_SUCCESS; > > } > > diff -ruNp org2/net/core/dev.c new5/net/core/dev.c > > --- org2/net/core/dev.c 2009-07-27 09:08:24.000000000 +0530 > > +++ new5/net/core/dev.c 2009-08-04 20:26:57.000000000 +0530 > > @@ -1786,6 +1786,43 @@ static struct netdev_queue *dev_pick_tx( > > return netdev_get_tx_queue(dev, queue_index); > > } > > > > +static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, > > + struct net_device *dev, > > + struct netdev_queue *txq) > > +{ > > + spinlock_t *root_lock = qdisc_lock(q); > > + int rc; > > + > > + spin_lock(root_lock); > > + if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { > > + kfree_skb(skb); > > + rc = NET_XMIT_DROP; > > + } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && > > + !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) { > > + unsigned int len = skb->len; > > + > > + /* > > + * This is a work-conserving queue; there are no old skbs > > + * waiting to be sent out; and the qdisc is not running - > > + * xmit the skb directly. > > + */ > > + if (sch_direct_xmit(skb, q, dev, txq, root_lock)) > > + __qdisc_run(q); > > + else > > + clear_bit(__QDISC_STATE_RUNNING, &q->state); > > + > > + __qdisc_update_bstats(q, len); > > + > > + rc = NET_XMIT_SUCCESS; > > + } else { > > + rc = qdisc_enqueue_root(skb, q); > > + qdisc_run(q); > > + } > > + spin_unlock(root_lock); > > + > > + return rc; > > +} > > + > > /** > > * dev_queue_xmit - transmit a buffer > > * @skb: buffer to transmit > > @@ -1859,19 +1896,7 @@ gso: > > skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); > > #endif > > if (q->enqueue) { > > - spinlock_t *root_lock = qdisc_lock(q); > > - > > - spin_lock(root_lock); > > - > > - if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { > > - kfree_skb(skb); > > - rc = NET_XMIT_DROP; > > - } else { > > - rc = qdisc_enqueue_root(skb, q); > > - qdisc_run(q); > > - } > > - spin_unlock(root_lock); > > - > > + rc = __dev_xmit_skb(skb, q, dev, txq); > > goto out; > > } > > > > diff -ruNp org2/net/sched/sch_generic.c new5/net/sched/sch_generic.c > > --- org2/net/sched/sch_generic.c 2009-05-25 07:48:07.000000000 +0530 > > +++ new5/net/sched/sch_generic.c 2009-08-04 19:38:27.000000000 +0530 > > @@ -37,15 +37,11 @@ > > * - updates to tree and tree walking are only done under the rtnl > mutex. > > */ > > > > -static inline int qdisc_qlen(struct Qdisc *q) > > -{ > > - return q->q.qlen; > > -} > > - > > static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) > > { > > q->gso_skb = skb; > > q->qstats.requeues++; > > + q->q.qlen++; /* it's still part of the queue */ > > __netif_schedule(q); > > > > return 0; > > @@ -61,9 +57,11 @@ static inline struct sk_buff *dequeue_sk > > > > /* check the reason of requeuing without tx lock first */ > > txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); > > - if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) > > + if (!netif_tx_queue_stopped(txq) && > > + !netif_tx_queue_frozen(txq)) { > > q->gso_skb = NULL; > > - else > > + q->q.qlen--; > > + } else > > skb = NULL; > > } else { > > skb = q->dequeue(q); > > @@ -103,44 +101,23 @@ static inline int handle_dev_cpu_collisi > > } > > > > /* > > - * NOTE: Called under qdisc_lock(q) with locally disabled BH. > > - * > > - * __QDISC_STATE_RUNNING guarantees only one CPU can process > > - * this qdisc at a time. qdisc_lock(q) serializes queue accesses for > > - * this queue. > > - * > > - * netif_tx_lock serializes accesses to device driver. > > - * > > - * qdisc_lock(q) and netif_tx_lock are mutually exclusive, > > - * if one is grabbed, another must be free. > > - * > > - * Note, that this procedure can be called by a watchdog timer > > + * Transmit one skb, and handle the return status as required. Holding > the > > + * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute > this > > + * function. > > * > > * Returns to the caller: > > * 0 - queue is empty or throttled. > > * >0 - queue is not empty. > > - * > > */ > > -static inline int qdisc_restart(struct Qdisc *q) > > +int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, > > + struct net_device *dev, struct netdev_queue *txq, > > + spinlock_t *root_lock) > > { > > - struct netdev_queue *txq; > > int ret = NETDEV_TX_BUSY; > > - struct net_device *dev; > > - spinlock_t *root_lock; > > - struct sk_buff *skb; > > - > > - /* Dequeue packet */ > > - if (unlikely((skb = dequeue_skb(q)) == NULL)) > > - return 0; > > - > > - root_lock = qdisc_lock(q); > > > > /* And release qdisc */ > > spin_unlock(root_lock); > > > > - dev = qdisc_dev(q); > > - txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); > > - > > HARD_TX_LOCK(dev, txq, smp_processor_id()); > > if (!netif_tx_queue_stopped(txq) && > > !netif_tx_queue_frozen(txq)) > > @@ -177,6 +154,44 @@ static inline int qdisc_restart(struct Q > > return ret; > > } > > > > +/* > > + * NOTE: Called under qdisc_lock(q) with locally disabled BH. > > + * > > + * __QDISC_STATE_RUNNING guarantees only one CPU can process > > + * this qdisc at a time. qdisc_lock(q) serializes queue accesses for > > + * this queue. > > + * > > + * netif_tx_lock serializes accesses to device driver. > > + * > > + * qdisc_lock(q) and netif_tx_lock are mutually exclusive, > > + * if one is grabbed, another must be free. > > + * > > + * Note, that this procedure can be called by a watchdog timer > > + * > > + * Returns to the caller: > > + * 0 - queue is empty or throttled. > > + * >0 - queue is not empty. > > + * > > + */ > > +static inline int qdisc_restart(struct Qdisc *q) > > +{ > > + struct netdev_queue *txq; > > + struct net_device *dev; > > + spinlock_t *root_lock; > > + struct sk_buff *skb; > > + > > + /* Dequeue packet */ > > + skb = dequeue_skb(q); > > + if (unlikely(!skb)) > > + return 0; > > + > > + root_lock = qdisc_lock(q); > > + dev = qdisc_dev(q); > > + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); > > + > > + return sch_direct_xmit(skb, q, dev, txq, root_lock); > > +} > > + > > void __qdisc_run(struct Qdisc *q) > > { > > unsigned long start_time = jiffies; > > @@ -547,8 +562,11 @@ void qdisc_reset(struct Qdisc *qdisc) > > if (ops->reset) > > ops->reset(qdisc); > > > > - kfree_skb(qdisc->gso_skb); > > - qdisc->gso_skb = NULL; > > + if (qdisc->gso_skb) { > > + kfree_skb(qdisc->gso_skb); > > + qdisc->gso_skb = NULL; > > + qdisc->q.qlen = 0; > > + } > > } > > EXPORT_SYMBOL(qdisc_reset); > > > > @@ -605,6 +623,9 @@ static void attach_one_default_qdisc(str > > printk(KERN_INFO "%s: activation failed\n", dev->name); > > return; > > } > > + > > + /* Can by-pass the queue discipline for default qdisc */ > > + qdisc->flags |= TCQ_F_CAN_BYPASS; > > } else { > > qdisc = &noqueue_qdisc; > > } > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff -ruNp org2/include/net/pkt_sched.h new5/include/net/pkt_sched.h --- org2/include/net/pkt_sched.h 2009-08-04 11:41:21.000000000 +0530 +++ new5/include/net/pkt_sched.h 2009-08-04 18:47:16.000000000 +0530 @@ -87,6 +87,9 @@ extern struct qdisc_rate_table *qdisc_ge extern void qdisc_put_rtab(struct qdisc_rate_table *tab); extern void qdisc_put_stab(struct qdisc_size_table *tab); extern void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc); +extern int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, + struct net_device *dev, struct netdev_queue *txq, + spinlock_t *root_lock); extern void __qdisc_run(struct Qdisc *q); diff -ruNp org2/include/net/sch_generic.h new5/include/net/sch_generic.h --- org2/include/net/sch_generic.h 2009-03-24 08:54:16.000000000 +0530 +++ new5/include/net/sch_generic.h 2009-08-04 19:58:26.000000000 +0530 @@ -45,6 +45,7 @@ struct Qdisc #define TCQ_F_BUILTIN 1 #define TCQ_F_THROTTLED 2 #define TCQ_F_INGRESS 4 +#define TCQ_F_CAN_BYPASS 8 #define TCQ_F_WARN_NONWC (1 << 16) int padded; struct Qdisc_ops *ops; @@ -182,6 +183,11 @@ struct qdisc_skb_cb { char data[]; }; +static inline int qdisc_qlen(struct Qdisc *q) +{ + return q->q.qlen; +} + static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb) { return (struct qdisc_skb_cb *)skb->cb; @@ -387,13 +393,18 @@ static inline int qdisc_enqueue_root(str return qdisc_enqueue(skb, sch) & NET_XMIT_MASK; } +static inline int __qdisc_update_bstats(struct Qdisc *sch, unsigned int len) +{ + sch->bstats.bytes += len; + sch->bstats.packets++; +} + static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff_head *list) { __skb_queue_tail(list, skb); sch->qstats.backlog += qdisc_pkt_len(skb); - sch->bstats.bytes += qdisc_pkt_len(skb); - sch->bstats.packets++; + __qdisc_update_bstats(sch, qdisc_pkt_len(skb)); return NET_XMIT_SUCCESS; } diff -ruNp org2/net/core/dev.c new5/net/core/dev.c --- org2/net/core/dev.c 2009-07-27 09:08:24.000000000 +0530 +++ new5/net/core/dev.c 2009-08-04 20:26:57.000000000 +0530 @@ -1786,6 +1786,43 @@ static struct netdev_queue *dev_pick_tx( return netdev_get_tx_queue(dev, queue_index); } +static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, + struct net_device *dev, + struct netdev_queue *txq) +{ + spinlock_t *root_lock = qdisc_lock(q); + int rc; + + spin_lock(root_lock); + if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { + kfree_skb(skb); + rc = NET_XMIT_DROP; + } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && + !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) { + unsigned int len = skb->len; + + /* + * This is a work-conserving queue; there are no old skbs + * waiting to be sent out; and the qdisc is not running - + * xmit the skb directly. + */ + if (sch_direct_xmit(skb, q, dev, txq, root_lock)) + __qdisc_run(q); + else + clear_bit(__QDISC_STATE_RUNNING, &q->state); + + __qdisc_update_bstats(q, len); + + rc = NET_XMIT_SUCCESS; + } else { + rc = qdisc_enqueue_root(skb, q); + qdisc_run(q); + } + spin_unlock(root_lock); + + return rc; +} + /** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit @@ -1859,19 +1896,7 @@ gso: skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); #endif if (q->enqueue) { - spinlock_t *root_lock = qdisc_lock(q); - - spin_lock(root_lock); - - if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { - kfree_skb(skb); - rc = NET_XMIT_DROP; - } else { - rc = qdisc_enqueue_root(skb, q); - qdisc_run(q); - } - spin_unlock(root_lock); - + rc = __dev_xmit_skb(skb, q, dev, txq); goto out; } diff -ruNp org2/net/sched/sch_generic.c new5/net/sched/sch_generic.c --- org2/net/sched/sch_generic.c 2009-05-25 07:48:07.000000000 +0530 +++ new5/net/sched/sch_generic.c 2009-08-04 19:38:27.000000000 +0530 @@ -37,15 +37,11 @@ * - updates to tree and tree walking are only done under the rtnl mutex. */ -static inline int qdisc_qlen(struct Qdisc *q) -{ - return q->q.qlen; -} - static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { q->gso_skb = skb; q->qstats.requeues++; + q->q.qlen++; /* it's still part of the queue */ __netif_schedule(q); return 0; @@ -61,9 +57,11 @@ static inline struct sk_buff *dequeue_sk /* check the reason of requeuing without tx lock first */ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) + if (!netif_tx_queue_stopped(txq) && + !netif_tx_queue_frozen(txq)) { q->gso_skb = NULL; - else + q->q.qlen--; + } else skb = NULL; } else { skb = q->dequeue(q); @@ -103,44 +101,23 @@ static inline int handle_dev_cpu_collisi } /* - * NOTE: Called under qdisc_lock(q) with locally disabled BH. - * - * __QDISC_STATE_RUNNING guarantees only one CPU can process - * this qdisc at a time. qdisc_lock(q) serializes queue accesses for - * this queue. - * - * netif_tx_lock serializes accesses to device driver. - * - * qdisc_lock(q) and netif_tx_lock are mutually exclusive, - * if one is grabbed, another must be free. - * - * Note, that this procedure can be called by a watchdog timer + * Transmit one skb, and handle the return status as required. Holding the + * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute this + * function. * * Returns to the caller: * 0 - queue is empty or throttled. * >0 - queue is not empty. - * */ -static inline int qdisc_restart(struct Qdisc *q) +int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, + struct net_device *dev, struct netdev_queue *txq, + spinlock_t *root_lock) { - struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; - struct net_device *dev; - spinlock_t *root_lock; - struct sk_buff *skb; - - /* Dequeue packet */ - if (unlikely((skb = dequeue_skb(q)) == NULL)) - return 0; - - root_lock = qdisc_lock(q); /* And release qdisc */ spin_unlock(root_lock); - dev = qdisc_dev(q); - txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) @@ -177,6 +154,44 @@ static inline int qdisc_restart(struct Q return ret; } +/* + * NOTE: Called under qdisc_lock(q) with locally disabled BH. + * + * __QDISC_STATE_RUNNING guarantees only one CPU can process + * this qdisc at a time. qdisc_lock(q) serializes queue accesses for + * this queue. + * + * netif_tx_lock serializes accesses to device driver. + * + * qdisc_lock(q) and netif_tx_lock are mutually exclusive, + * if one is grabbed, another must be free. + * + * Note, that this procedure can be called by a watchdog timer + * + * Returns to the caller: + * 0 - queue is empty or throttled. + * >0 - queue is not empty. + * + */ +static inline int qdisc_restart(struct Qdisc *q) +{ + struct netdev_queue *txq; + struct net_device *dev; + spinlock_t *root_lock; + struct sk_buff *skb; + + /* Dequeue packet */ + skb = dequeue_skb(q); + if (unlikely(!skb)) + return 0; + + root_lock = qdisc_lock(q); + dev = qdisc_dev(q); + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + + return sch_direct_xmit(skb, q, dev, txq, root_lock); +} + void __qdisc_run(struct Qdisc *q) { unsigned long start_time = jiffies; @@ -547,8 +562,11 @@ void qdisc_reset(struct Qdisc *qdisc) if (ops->reset) ops->reset(qdisc); - kfree_skb(qdisc->gso_skb); - qdisc->gso_skb = NULL; + if (qdisc->gso_skb) { + kfree_skb(qdisc->gso_skb); + qdisc->gso_skb = NULL; + qdisc->q.qlen = 0; + } } EXPORT_SYMBOL(qdisc_reset); @@ -605,6 +623,9 @@ static void attach_one_default_qdisc(str printk(KERN_INFO "%s: activation failed\n", dev->name); return; } + + /* Can by-pass the queue discipline for default qdisc */ + qdisc->flags |= TCQ_F_CAN_BYPASS; } else { qdisc = &noqueue_qdisc; }