Patchwork CHOKe flow scheduler (0.8)

login
register
mail settings
Submitter Eric Dumazet
Date Jan. 14, 2011, 3:58 a.m.
Message ID <1294977498.3403.127.camel@edumazet-laptop>
Download mbox | patch
Permalink /patch/78861/
State RFC
Delegated to: David Miller
Headers show

Comments

Eric Dumazet - Jan. 14, 2011, 3:58 a.m.
Le vendredi 14 janvier 2011 à 04:34 +0100, Eric Dumazet a écrit :

> Hmm, please wait a bit, I had another crash when I stopped my
> bench/stress

I am not sure p->qavg is correctly computed.

Crash happened because choke_peek_random() was called while no packet
was in queue.

With my params (min=10833 max=32500 burst=18055 limit=130000) this
implies qavg was very big while qlen==0 !

qdisc choke 11: dev ifb0 parent 1:11 limit 130000b min 10833b max 32500b ewma 13 Plog 21 Scell_log 30
 Sent 200857857 bytes 365183 pkt (dropped 1010937, overlimits 557577 requeues 0) 
 rate 32253Kbit 7330pps backlog 17875996b 32505p requeues 0 
  marked 0 early 557577 pdrop 0 other 0 matched 226680


Here is latest diff :

 include/linux/pkt_sched.h |    8 +++----
 net/sched/sch_choke.c     |   50 +++++++++++++++++++++++++++++-----------------
 2 files changed, 36 insertions(+), 22 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet - Jan. 14, 2011, 11:32 a.m.
Le vendredi 14 janvier 2011 à 04:58 +0100, Eric Dumazet a écrit :
> Le vendredi 14 janvier 2011 à 04:34 +0100, Eric Dumazet a écrit :
> 
> > Hmm, please wait a bit, I had another crash when I stopped my
> > bench/stress
> 
> I am not sure p->qavg is correctly computed.
> 
> Crash happened because choke_peek_random() was called while no packet
> was in queue.
> 
> With my params (min=10833 max=32500 burst=18055 limit=130000) this
> implies qavg was very big while qlen==0 !
> 
> qdisc choke 11: dev ifb0 parent 1:11 limit 130000b min 10833b max 32500b ewma 13 Plog 21 Scell_log 30
>  Sent 200857857 bytes 365183 pkt (dropped 1010937, overlimits 557577 requeues 0) 
>  rate 32253Kbit 7330pps backlog 17875996b 32505p requeues 0 
>   marked 0 early 557577 pdrop 0 other 0 matched 226680

Moving the qdisc_bstats_update(sch, skb); out of choke_enqueue() to
choke_dequeue(), I get nicer rate values (because packets that are
enqueued but CHOKed dont artificialy raise the packet/byte rates)

Now, rate properly matches my 10Mbit CBQ bandwidth :

qdisc choke 11: parent 1:11 limit 130000b min 10833b max 32500b ewma 13 Plog 21 Scell_log 30
 Sent 86470970 bytes 157418 pkt (dropped 127451, overlimits 48275 requeues 0) 
 rate 9947Kbit 2264pps backlog 17759368b 32288p requeues 0 
  marked 0 early 48275 pdrop 0 other 0 matched 39588


For other qdiscs, it is less easy because qdisc_bstats_update() call is
integrated in __qdisc_enqueue_tail() / qdisc_enqueue_tail(), so all
users shall be updated at once.





--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 83bac92..498c798 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -269,10 +269,10 @@  struct tc_choke_qopt {
 };
 
 struct tc_choke_xstats {
-	__u32           early;          /* Early drops */
-	__u32           pdrop;          /* Drops due to queue limits */
-	__u32           other;          /* Drops due to drop() calls */
-	__u32           marked;         /* Marked packets */
+	__u32		early;          /* Early drops */
+	__u32		pdrop;          /* Drops due to queue limits */
+	__u32		other;          /* Drops due to drop() calls */
+	__u32		marked;         /* Marked packets */
 	__u32		matched;	/* Drops due to flow match */
 };
 
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 136d4e5..2f94dad 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -74,7 +74,7 @@  struct choke_sched_data {
 };
 
 /* deliver a random number between 0 and N - 1 */
-static inline u32 random_N(unsigned int N)
+static u32 random_N(unsigned int N)
 {
 	return reciprocal_divide(random32(), N);
 }
@@ -94,18 +94,20 @@  static struct sk_buff *choke_peek_random(struct Qdisc *sch,
 			return skb;
 	} while (--retrys > 0);
 
-	/* queue is has lots of holes use the head which is known to exist */
+	/* queue is has lots of holes use the head which is known to exist
+	 * Note : result can still be NULL if q->head == q->tail
+	 */
 	return q->tab[*pidx = q->head];
 }
 
 /* Is ECN parameter configured */
-static inline int use_ecn(const struct choke_sched_data *q)
+static int use_ecn(const struct choke_sched_data *q)
 {
 	return q->flags & TC_RED_ECN;
 }
 
 /* Should packets over max just be dropped (versus marked) */
-static inline int use_harddrop(const struct choke_sched_data *q)
+static int use_harddrop(const struct choke_sched_data *q)
 {
 	return q->flags & TC_RED_HARDDROP;
 }
@@ -113,20 +115,21 @@  static inline int use_harddrop(const struct choke_sched_data *q)
 /* Move head pointer forward to skip over holes */
 static void choke_zap_head_holes(struct choke_sched_data *q)
 {
-	while (q->tab[q->head] == NULL) {
+	do {
 		q->head = (q->head + 1) & q->tab_mask;
-
-		BUG_ON(q->head == q->tail);
-	}
+		if (q->head == q->tail)
+			break;
+	} while (q->tab[q->head] == NULL);
 }
 
 /* Move tail pointer backwards to reuse holes */
 static void choke_zap_tail_holes(struct choke_sched_data *q)
 {
-	while (q->tab[q->tail - 1] == NULL) {
+	do {
 		q->tail = (q->tail - 1) & q->tab_mask;
-		BUG_ON(q->head == q->tail);
-	}
+		if (q->head == q->tail)
+			break;
+	} while (q->tab[q->tail] == NULL);
 }
 
 /* Drop packet from queue array by creating a "hole" */
@@ -145,7 +148,7 @@  static void choke_drop_by_idx(struct choke_sched_data *q, unsigned int idx)
    2. fast internal classification
    3. use TC filter based classification
 */
-static inline unsigned int choke_classify(struct sk_buff *skb,
+static unsigned int choke_classify(struct sk_buff *skb,
 					  struct Qdisc *sch, int *qerr)
 
 {
@@ -214,11 +217,12 @@  static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		oskb = choke_peek_random(sch, &idx);
 
 		/* Both packets from same flow ? */
-		if (*(unsigned int *)(qdisc_skb_cb(oskb)->data) == hash) {
+		if (oskb &&
+		    *(unsigned int *)(qdisc_skb_cb(oskb)->data) == hash) {
 			/* Drop both packets */
 			q->stats.matched++;
 			choke_drop_by_idx(q, idx);
-			sch->qstats.backlog -= qdisc_pkt_len(skb);
+			sch->qstats.backlog -= qdisc_pkt_len(oskb);
 			--sch->q.qlen;
 			qdisc_drop(oskb, sch);
 			goto congestion_drop;
@@ -285,8 +289,7 @@  static struct sk_buff *choke_dequeue(struct Qdisc *sch)
 	}
 
 	skb = q->tab[q->head];
-	q->tab[q->head] = NULL; /* not really needed */
-	q->head = (q->head + 1) & q->tab_mask;
+	q->tab[q->head] = NULL;
 	choke_zap_head_holes(q);
 	--sch->q.qlen;
 	sch->qstats.backlog -= qdisc_pkt_len(skb);
@@ -371,12 +374,23 @@  static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 		sch_tree_lock(sch);
 		old = q->tab;
 		if (old) {
-			unsigned int tail = 0;
+			unsigned int oqlen = sch->q.qlen, tail = 0;
 
 			while (q->head != q->tail) {
-				ntab[tail++] = q->tab[q->head];
+				struct sk_buff *skb = q->tab[q->head];
+
 				q->head = (q->head + 1) & q->tab_mask;
+				if (!skb)
+					continue;
+				if (tail < mask) {
+					ntab[tail++] = skb;
+					continue;
+				}
+				sch->qstats.backlog -= qdisc_pkt_len(skb);
+				--sch->q.qlen;
+				qdisc_drop(skb, sch);
 			}
+			qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
 			q->head = 0;
 			q->tail = tail;
 		}