diff mbox

[net-next] netem: add ECN capability

Message ID 1335863465.11396.45.camel@edumazet-glaptop
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet May 1, 2012, 9:11 a.m. UTC
From: Eric Dumazet <edumazet@google.com>

Add ECN (Explicit Congestion Notification) marking capability to netem

tc qdisc add dev eth0 root netem drop 0.5 ecn

Instead of dropping packets, try to ECN mark them.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Cc: Stephen Hemminger <shemminger@vyatta.com>
---
 include/linux/pkt_sched.h |    1 +
 net/sched/sch_netem.c     |   18 +++++++++++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Hagen Paul Pfeifer May 1, 2012, 9:28 a.m. UTC | #1
Nice!


* Eric Dumazet | 2012-05-01 11:11:05 [+0200]:

> 	u32 loss;
>+	u32 ecn;

Why no bool here Eric?

> 	u32 limit;
> 	u32 counter;
> 	u32 gap;
>@@ -374,9 +376,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
> 		++count;
> 
> 	/* Drop packet? */
>-	if (loss_event(q))
>-		--count;
>-
>+	if (loss_event(q)) {
>+		if (q->ecn && INET_ECN_set_ce(skb))
>+			sch->qstats.drops++; /* mark packet */
>+		else
>+			--count;
>+	}
> 	if (count == 0) {
> 		sch->qstats.drops++;
> 		kfree_skb(skb);
>@@ -706,6 +711,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
> 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
> 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
> 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
>+	[TCA_NETEM_ECN]		= { .type = NLA_U32 },

This netem API, what a heck ... Ideal we would extend TCA_NETEM_LOSS
somehow[tm], but yes ... ;(

Acked-by: Hagen Paul Pfeifer <hagen@jauu.net>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Taht May 1, 2012, 10:14 a.m. UTC | #2
On Tue, May 1, 2012 at 2:28 AM, Hagen Paul Pfeifer <hagen@jauu.net> wrote:
> Nice!

Applause as well! While this is something of a fork of this
discussion, I would rather like to resurrect a variant of the old
'time in queue' patch...

http://patchwork.ozlabs.org/patch/125329/

As best as I recall we'd run into trouble with combining things like
netem and complex qdiscs, because we had basically overrun the size of
the control block.

And then there was a long discussion of all that and I forget if any
conclusion was reached.

An alternative to reducing the length of the control block was to find
some way to leverage this sanely in the qdisc layer...

#define HAVE_HW_TIME_STAMP

/**
 * struct skb_shared_hwtstamps - hardware time stamps
 * @hwtstamp:   hardware time stamp transformed into duration
 *              since arbitrary point in time
 * @syststamp:  hwtstamp transformed to system time base
 *
David Miller May 1, 2012, 1:40 p.m. UTC | #3
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 01 May 2012 11:11:05 +0200

> From: Eric Dumazet <edumazet@google.com>
> 
> Add ECN (Explicit Congestion Notification) marking capability to netem
> 
> tc qdisc add dev eth0 root netem drop 0.5 ecn
> 
> Instead of dropping packets, try to ECN mark them.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Applied.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
stephen hemminger May 1, 2012, 4:59 p.m. UTC | #4
On Tue, 01 May 2012 11:11:05 +0200
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> From: Eric Dumazet <edumazet@google.com>
> 
> Add ECN (Explicit Congestion Notification) marking capability to netem
> 
> tc qdisc add dev eth0 root netem drop 0.5 ecn
> 
> Instead of dropping packets, try to ECN mark them.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Neal Cardwell <ncardwell@google.com>
> Cc: Tom Herbert <therbert@google.com>
> Cc: Hagen Paul Pfeifer <hagen@jauu.net>
> Cc: Stephen Hemminger <shemminger@vyatta.com>
> ---
>  include/linux/pkt_sched.h |    1 +
>  net/sched/sch_netem.c     |   18 +++++++++++++++---
>  2 files changed, 16 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
> index 410b33d..ffe975c 100644
> --- a/include/linux/pkt_sched.h
> +++ b/include/linux/pkt_sched.h
> @@ -509,6 +509,7 @@ enum {
>  	TCA_NETEM_CORRUPT,
>  	TCA_NETEM_LOSS,
>  	TCA_NETEM_RATE,
> +	TCA_NETEM_ECN,
>  	__TCA_NETEM_MAX,
>  };
>  
> diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
> index 1109731..231cd11 100644
> --- a/net/sched/sch_netem.c
> +++ b/net/sched/sch_netem.c
> @@ -26,6 +26,7 @@
>  
>  #include <net/netlink.h>
>  #include <net/pkt_sched.h>
> +#include <net/inet_ecn.h>
>  
>  #define VERSION "1.3"
>  
> @@ -78,6 +79,7 @@ struct netem_sched_data {
>  	psched_tdiff_t jitter;
>  
>  	u32 loss;
> +	u32 ecn;
>  	u32 limit;
>  	u32 counter;
>  	u32 gap;
> @@ -374,9 +376,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
>  		++count;
>  
>  	/* Drop packet? */
> -	if (loss_event(q))
> -		--count;
> -
> +	if (loss_event(q)) {
> +		if (q->ecn && INET_ECN_set_ce(skb))
> +			sch->qstats.drops++; /* mark packet */
> +		else
> +			--count;
> +	}
>  	if (count == 0) {
>  		sch->qstats.drops++;
>  		kfree_skb(skb);
> @@ -706,6 +711,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
>  	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
>  	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
>  	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
> +	[TCA_NETEM_ECN]		= { .type = NLA_U32 },
>  };
>  
>  static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
> @@ -776,6 +782,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
>  	if (tb[TCA_NETEM_RATE])
>  		get_rate(sch, tb[TCA_NETEM_RATE]);
>  
> +	if (tb[TCA_NETEM_ECN])
> +		q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
> +
>  	q->loss_model = CLG_RANDOM;
>  	if (tb[TCA_NETEM_LOSS])
>  		ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
> @@ -902,6 +911,9 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
>  	if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
>  		goto nla_put_failure;
>  
> +	if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
> +		goto nla_put_failure;
> +
>  	if (dump_loss_model(q, skb) != 0)
>  		goto nla_put_failure;

The concept is fine, but a couple of questions.
 1. Why a whole u32 for boolean?
 2. The logic in this part of netem is setup to handle case of random duplication
    combined with random loss. With ecn option set, will this code correctly
    handled a duplication combined with a loss and send one packet?
    It looks like the new code would change that behaviour.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
stephen hemminger May 1, 2012, 4:59 p.m. UTC | #5
On Tue, 01 May 2012 09:40:51 -0400 (EDT)
David Miller <davem@davemloft.net> wrote:

> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Tue, 01 May 2012 11:11:05 +0200
> 
> > From: Eric Dumazet <edumazet@google.com>
> > 
> > Add ECN (Explicit Congestion Notification) marking capability to netem
> > 
> > tc qdisc add dev eth0 root netem drop 0.5 ecn
> > 
> > Instead of dropping packets, try to ECN mark them.
> > 
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> 
> Applied.

At least give a day of review, rather than premature acceptance
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet May 1, 2012, 5:10 p.m. UTC | #6
On Tue, 2012-05-01 at 09:59 -0700, Stephen Hemminger wrote:

> The concept is fine, but a couple of questions.
>  1. Why a whole u32 for boolean?

a boolean in this structure wont save space, and this file is full of
u32. Why bother ?

IMHO boolean are fine for function arguments, but in a structure, not
very helpful.

>  2. The logic in this part of netem is setup to handle case of random duplication
>     combined with random loss. With ecn option set, will this code correctly
>     handled a duplication combined with a loss and send one packet?
>     It looks like the new code would change that behaviour.

Hmm, I'll take a look.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller May 1, 2012, 5:17 p.m. UTC | #7
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 1 May 2012 09:59:38 -0700

> On Tue, 01 May 2012 09:40:51 -0400 (EDT)
> David Miller <davem@davemloft.net> wrote:
> 
>> From: Eric Dumazet <eric.dumazet@gmail.com>
>> Date: Tue, 01 May 2012 11:11:05 +0200
>> 
>> > From: Eric Dumazet <edumazet@google.com>
>> > 
>> > Add ECN (Explicit Congestion Notification) marking capability to netem
>> > 
>> > tc qdisc add dev eth0 root netem drop 0.5 ecn
>> > 
>> > Instead of dropping packets, try to ECN mark them.
>> > 
>> > Signed-off-by: Eric Dumazet <edumazet@google.com>
>> 
>> Applied.
> 
> At least give a day of review, rather than premature acceptance

Yeah, because Eric Dumazet is going to fall off the face of the
planet and not fix whatever problems you have with his work.

Get real Stephen.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Taht May 1, 2012, 6:49 p.m. UTC | #8
On Tue, May 1, 2012 at 10:17 AM, David Miller <davem@davemloft.net> wrote:
> From: Stephen Hemminger <shemminger@vyatta.com>
> Date: Tue, 1 May 2012 09:59:38 -0700
>
>> On Tue, 01 May 2012 09:40:51 -0400 (EDT)
>> David Miller <davem@davemloft.net> wrote:
>>
>>> From: Eric Dumazet <eric.dumazet@gmail.com>
>>> Date: Tue, 01 May 2012 11:11:05 +0200
>>>
>>> > From: Eric Dumazet <edumazet@google.com>
>>> >
>>> > Add ECN (Explicit Congestion Notification) marking capability to netem
>>> >
>>> > tc qdisc add dev eth0 root netem drop 0.5 ecn
>>> >
>>> > Instead of dropping packets, try to ECN mark them.
>>> >
>>> > Signed-off-by: Eric Dumazet <edumazet@google.com>
>>>
>>> Applied.
>>
>> At least give a day of review, rather than premature acceptance
>
> Yeah, because Eric Dumazet is going to fall off the face of the
> planet and not fix whatever problems you have with his work.
>
> Get real Stephen.

I'd be more concerned about wayward buses, or a team of elite
ninja's hired by outraged BSD and windows hackers sent to slow
eric down.

As I think about it, I think he could handle the ninja problem, tho.
David Miller May 1, 2012, 7:13 p.m. UTC | #9
From: Dave Taht <dave.taht@gmail.com>
Date: Tue, 1 May 2012 11:49:51 -0700

> As I think about it, I think he could handle the ninja problem, tho.

Eric is the Networking Ninja.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 410b33d..ffe975c 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -509,6 +509,7 @@  enum {
 	TCA_NETEM_CORRUPT,
 	TCA_NETEM_LOSS,
 	TCA_NETEM_RATE,
+	TCA_NETEM_ECN,
 	__TCA_NETEM_MAX,
 };
 
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 1109731..231cd11 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -26,6 +26,7 @@ 
 
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
+#include <net/inet_ecn.h>
 
 #define VERSION "1.3"
 
@@ -78,6 +79,7 @@  struct netem_sched_data {
 	psched_tdiff_t jitter;
 
 	u32 loss;
+	u32 ecn;
 	u32 limit;
 	u32 counter;
 	u32 gap;
@@ -374,9 +376,12 @@  static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		++count;
 
 	/* Drop packet? */
-	if (loss_event(q))
-		--count;
-
+	if (loss_event(q)) {
+		if (q->ecn && INET_ECN_set_ce(skb))
+			sch->qstats.drops++; /* mark packet */
+		else
+			--count;
+	}
 	if (count == 0) {
 		sch->qstats.drops++;
 		kfree_skb(skb);
@@ -706,6 +711,7 @@  static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
+	[TCA_NETEM_ECN]		= { .type = NLA_U32 },
 };
 
 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -776,6 +782,9 @@  static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 	if (tb[TCA_NETEM_RATE])
 		get_rate(sch, tb[TCA_NETEM_RATE]);
 
+	if (tb[TCA_NETEM_ECN])
+		q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
+
 	q->loss_model = CLG_RANDOM;
 	if (tb[TCA_NETEM_LOSS])
 		ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
@@ -902,6 +911,9 @@  static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
 		goto nla_put_failure;
 
+	if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
+		goto nla_put_failure;
+
 	if (dump_loss_model(q, skb) != 0)
 		goto nla_put_failure;