Patchwork CHOKe flow scheduler (0.8)

login
register
mail settings
Submitter Eric Dumazet
Date Jan. 14, 2011, 3:29 a.m.
Message ID <1294975793.3403.117.camel@edumazet-laptop>
Download mbox | patch
Permalink /patch/78860/
State RFC
Delegated to: David Miller
Headers show

Comments

Eric Dumazet - Jan. 14, 2011, 3:29 a.m.
Le jeudi 13 janvier 2011 à 15:34 -0800, Stephen Hemminger a écrit :
> CHOKe ("CHOose and Kill" or "CHOose and Keep") is an alternative
> packet scheduler based on the Random Exponential Drop (RED) algorithm.
> 
> The core idea is:
>   For every packet arrival:
>   	Calculate Qave
> 	if (Qave < minth) 
> 	     Queue the new packet
> 	else 
> 	     Select randomly a packet from the queue 
> 	     if (both packets from same flow)
> 	     then Drop both the packets
> 	     else if (Qave > maxth)
> 	          Drop packet
> 	     else
> 	       	  Admit packet with probability P (same as RED)
> 
> See also:
>   Rong Pan, Balaji Prabhakar, Konstantinos Psounis, "CHOKe: a stateless active
>    queue management scheme for approximating fair bandwidth allocation", 
>   Proceeding of INFOCOM'2000, March 2000.
> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> 
> ---
> 0.8 change queue length and holes account.
>     keep sch->q.qlen updated, and holes counter not needed.
> 
> ---
>  net/sched/Kconfig     |   11 +
>  net/sched/Makefile    |    1 
>  net/sched/sch_choke.c |  536 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 548 insertions(+)
> 

Hi Stephen

Your diffstat was an old one, here the right one.

 include/linux/pkt_sched.h |   29 +
 net/sched/Kconfig         |   11 
 net/sched/Makefile        |    1 
 net/sched/sch_choke.c     |  552 ++++++++++++++++++++++++++++++++++++


I tested v8 and found several serious problems, please find a diff of my
latest changes :

- wrong oskb/skb used in choke_enqueue()
- choke_zap_head_holes() is called from choke_dequeue() and crash if we
dequeued last packet. (!!!)
- out of bound access in choke_zap_tail_holes()
- choke_dequeue() can be shorter
- choke_change() must dequeue/drop in excess packets or risk new array
overfill (if we reduce queue limit by tc qdisc change ...)
- inline is not needed, space errors in include file

Thanks !

 include/linux/pkt_sched.h |    8 +++---
 net/sched/sch_choke.c     |   43 ++++++++++++++++++++++--------------
 2 files changed, 31 insertions(+), 20 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet - Jan. 14, 2011, 3:34 a.m.
Le vendredi 14 janvier 2011 à 04:29 +0100, Eric Dumazet a écrit :
> Le jeudi 13 janvier 2011 à 15:34 -0800, Stephen Hemminger a écrit :
> > CHOKe ("CHOose and Kill" or "CHOose and Keep") is an alternative
> > packet scheduler based on the Random Exponential Drop (RED) algorithm.
> > 
> > The core idea is:
> >   For every packet arrival:
> >   	Calculate Qave
> > 	if (Qave < minth) 
> > 	     Queue the new packet
> > 	else 
> > 	     Select randomly a packet from the queue 
> > 	     if (both packets from same flow)
> > 	     then Drop both the packets
> > 	     else if (Qave > maxth)
> > 	          Drop packet
> > 	     else
> > 	       	  Admit packet with probability P (same as RED)
> > 
> > See also:
> >   Rong Pan, Balaji Prabhakar, Konstantinos Psounis, "CHOKe: a stateless active
> >    queue management scheme for approximating fair bandwidth allocation", 
> >   Proceeding of INFOCOM'2000, March 2000.
> > 
> > Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> > 
> > ---
> > 0.8 change queue length and holes account.
> >     keep sch->q.qlen updated, and holes counter not needed.
> > 
> > ---
> >  net/sched/Kconfig     |   11 +
> >  net/sched/Makefile    |    1 
> >  net/sched/sch_choke.c |  536 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 548 insertions(+)
> > 
> 
> Hi Stephen
> 
> Your diffstat was an old one, here the right one.
> 
>  include/linux/pkt_sched.h |   29 +
>  net/sched/Kconfig         |   11 
>  net/sched/Makefile        |    1 
>  net/sched/sch_choke.c     |  552 ++++++++++++++++++++++++++++++++++++
> 
> 
> I tested v8 and found several serious problems, please find a diff of my
> latest changes :
> 
> - wrong oskb/skb used in choke_enqueue()
> - choke_zap_head_holes() is called from choke_dequeue() and crash if we
> dequeued last packet. (!!!)
> - out of bound access in choke_zap_tail_holes()
> - choke_dequeue() can be shorter
> - choke_change() must dequeue/drop in excess packets or risk new array
> overfill (if we reduce queue limit by tc qdisc change ...)
> - inline is not needed, space errors in include file
> 
> Thanks !

Hmm, please wait a bit, I had another crash when I stopped my
bench/stress



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 83bac92..498c798 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -269,10 +269,10 @@  struct tc_choke_qopt {
 };
 
 struct tc_choke_xstats {
-	__u32           early;          /* Early drops */
-	__u32           pdrop;          /* Drops due to queue limits */
-	__u32           other;          /* Drops due to drop() calls */
-	__u32           marked;         /* Marked packets */
+	__u32		early;          /* Early drops */
+	__u32		pdrop;          /* Drops due to queue limits */
+	__u32		other;          /* Drops due to drop() calls */
+	__u32		marked;         /* Marked packets */
 	__u32		matched;	/* Drops due to flow match */
 };
 
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 136d4e5..c710c57 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -74,7 +74,7 @@  struct choke_sched_data {
 };
 
 /* deliver a random number between 0 and N - 1 */
-static inline u32 random_N(unsigned int N)
+static u32 random_N(unsigned int N)
 {
 	return reciprocal_divide(random32(), N);
 }
@@ -99,13 +99,13 @@  static struct sk_buff *choke_peek_random(struct Qdisc *sch,
 }
 
 /* Is ECN parameter configured */
-static inline int use_ecn(const struct choke_sched_data *q)
+static int use_ecn(const struct choke_sched_data *q)
 {
 	return q->flags & TC_RED_ECN;
 }
 
 /* Should packets over max just be dropped (versus marked) */
-static inline int use_harddrop(const struct choke_sched_data *q)
+static int use_harddrop(const struct choke_sched_data *q)
 {
 	return q->flags & TC_RED_HARDDROP;
 }
@@ -113,20 +113,21 @@  static inline int use_harddrop(const struct choke_sched_data *q)
 /* Move head pointer forward to skip over holes */
 static void choke_zap_head_holes(struct choke_sched_data *q)
 {
-	while (q->tab[q->head] == NULL) {
+	do {
 		q->head = (q->head + 1) & q->tab_mask;
-
-		BUG_ON(q->head == q->tail);
-	}
+		if (q->head == q->tail)
+			break;
+	} while (q->tab[q->head] == NULL);
 }
 
 /* Move tail pointer backwards to reuse holes */
 static void choke_zap_tail_holes(struct choke_sched_data *q)
 {
-	while (q->tab[q->tail - 1] == NULL) {
+	do {
 		q->tail = (q->tail - 1) & q->tab_mask;
-		BUG_ON(q->head == q->tail);
-	}
+		if (q->head == q->tail)
+			break;
+	} while (q->tab[q->tail] == NULL);
 }
 
 /* Drop packet from queue array by creating a "hole" */
@@ -145,7 +146,7 @@  static void choke_drop_by_idx(struct choke_sched_data *q, unsigned int idx)
    2. fast internal classification
    3. use TC filter based classification
 */
-static inline unsigned int choke_classify(struct sk_buff *skb,
+static unsigned int choke_classify(struct sk_buff *skb,
 					  struct Qdisc *sch, int *qerr)
 
 {
@@ -218,7 +219,7 @@  static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			/* Drop both packets */
 			q->stats.matched++;
 			choke_drop_by_idx(q, idx);
-			sch->qstats.backlog -= qdisc_pkt_len(skb);
+			sch->qstats.backlog -= qdisc_pkt_len(oskb);
 			--sch->q.qlen;
 			qdisc_drop(oskb, sch);
 			goto congestion_drop;
@@ -285,8 +286,7 @@  static struct sk_buff *choke_dequeue(struct Qdisc *sch)
 	}
 
 	skb = q->tab[q->head];
-	q->tab[q->head] = NULL; /* not really needed */
-	q->head = (q->head + 1) & q->tab_mask;
+	q->tab[q->head] = NULL;
 	choke_zap_head_holes(q);
 	--sch->q.qlen;
 	sch->qstats.backlog -= qdisc_pkt_len(skb);
@@ -371,12 +371,23 @@  static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 		sch_tree_lock(sch);
 		old = q->tab;
 		if (old) {
-			unsigned int tail = 0;
+			unsigned int oqlen = sch->q.qlen, tail = 0;
 
 			while (q->head != q->tail) {
-				ntab[tail++] = q->tab[q->head];
+				struct sk_buff *skb = q->tab[q->head];
+
 				q->head = (q->head + 1) & q->tab_mask;
+				if (!skb)
+					continue;
+				if (tail < mask) {
+					ntab[tail++] = skb;
+					continue;
+				}
+				sch->qstats.backlog -= qdisc_pkt_len(skb);
+				--sch->q.qlen;
+				qdisc_drop(skb, sch);
 			}
+			qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
 			q->head = 0;
 			q->tail = tail;
 		}