diff mbox

sch_htb: ix the deficit overflows

Message ID 4B0F8A5D.1040806@gmail.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Changli Gao Nov. 27, 2009, 8:14 a.m. UTC
fix the deficit overflows.

HTB uses WDRR(Weighted Deficit Round Robin) algorithm to schedule the spare bandwidth, but it doesn't check if the deficit is big enough for the skb when dequeuing skb from a class. In some case(the quantum is smaller than the packet size), the deficit will be decreased, even when it is smaller than ZERO. At last, the deficit will overflows, and become MAX_INT.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 sch_htb.c |   25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Jarek Poplawski Nov. 28, 2009, 12:04 a.m. UTC | #1
Changli Gao wrote, On 11/27/2009 09:14 AM:

> fix the deficit overflows.
> 
> HTB uses WDRR(Weighted Deficit Round Robin) algorithm to schedule the spare
> bandwidth, but it doesn't check if the deficit is big enough for the skb when
> dequeuing skb from a class. In some case(the quantum is smaller than the
> packet size), the deficit will be decreased, even when it is smaller than
> ZERO. At last, the deficit will overflows, and become MAX_INT.

This case of the quantum smaller than the packet size should be treated
as a broken config, so I don't think it's worth to do such a deep change
with additional delays and cpu cycles for all to fix it. A warning or
lower limit should be enough (if necessary at all).

Jarek P.

> 
> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ----
>  sch_htb.c |   25 ++++++++++++++++++++-----
>  1 file changed, 20 insertions(+), 5 deletions(-)
> diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
> index 2e38d1a..293983e 100644
> --- a/net/sched/sch_htb.c
> +++ b/net/sched/sch_htb.c
> @@ -783,6 +783,7 @@ static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
>  {
>  	struct sk_buff *skb = NULL;
>  	struct htb_class *cl, *start;
> +	unsigned int len;
>  	/* look initial class up in the row */
>  	start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
>  				     q->ptr[level] + prio,
> @@ -815,9 +816,23 @@ next:
>  			goto next;
>  		}
>  
> -		skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
> -		if (likely(skb != NULL))
> -			break;
> +		skb = cl->un.leaf.q->ops->peek(cl->un.leaf.q);
> +		if (likely(skb != NULL)) {
> +			len = qdisc_pkt_len(skb);
> +			if (len <= cl->un.leaf.deficit[level]) {
> +				skb = qdisc_dequeue_peeked(cl->un.leaf.q);
> +				break;
> +			}
> +			skb = NULL;
> +			cl->un.leaf.deficit[level] += cl->quantum;
> +			htb_next_rb_node((level ? cl->parent->un.inner.ptr :
> +					  q->ptr[0]) + prio);
> +			cl = htb_lookup_leaf(q->row[level] + prio, prio,
> +					     q->ptr[level] + prio,
> +					     q->last_ptr_id[level] + prio);
> +			start = cl;
> +			goto next;
> +		}
>  
>  		qdisc_warn_nonwc("htb", cl->un.leaf.q);
>  		htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
> @@ -829,8 +844,8 @@ next:
>  	} while (cl != start);
>  
>  	if (likely(skb != NULL)) {
> -		cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
> -		if (cl->un.leaf.deficit[level] < 0) {
> +		cl->un.leaf.deficit[level] -= len;
> +		if (cl->un.leaf.deficit[level] <= 0) {
>  			cl->un.leaf.deficit[level] += cl->quantum;
>  			htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
>  					  ptr[0]) + prio);
> 
> --
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 2e38d1a..293983e 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -783,6 +783,7 @@  static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
 {
 	struct sk_buff *skb = NULL;
 	struct htb_class *cl, *start;
+	unsigned int len;
 	/* look initial class up in the row */
 	start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
 				     q->ptr[level] + prio,
@@ -815,9 +816,23 @@  next:
 			goto next;
 		}
 
-		skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
-		if (likely(skb != NULL))
-			break;
+		skb = cl->un.leaf.q->ops->peek(cl->un.leaf.q);
+		if (likely(skb != NULL)) {
+			len = qdisc_pkt_len(skb);
+			if (len <= cl->un.leaf.deficit[level]) {
+				skb = qdisc_dequeue_peeked(cl->un.leaf.q);
+				break;
+			}
+			skb = NULL;
+			cl->un.leaf.deficit[level] += cl->quantum;
+			htb_next_rb_node((level ? cl->parent->un.inner.ptr :
+					  q->ptr[0]) + prio);
+			cl = htb_lookup_leaf(q->row[level] + prio, prio,
+					     q->ptr[level] + prio,
+					     q->last_ptr_id[level] + prio);
+			start = cl;
+			goto next;
+		}
 
 		qdisc_warn_nonwc("htb", cl->un.leaf.q);
 		htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
@@ -829,8 +844,8 @@  next:
 	} while (cl != start);
 
 	if (likely(skb != NULL)) {
-		cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
-		if (cl->un.leaf.deficit[level] < 0) {
+		cl->un.leaf.deficit[level] -= len;
+		if (cl->un.leaf.deficit[level] <= 0) {
 			cl->un.leaf.deficit[level] += cl->quantum;
 			htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
 					  ptr[0]) + prio);