diff mbox

[5/5] net: add skb.old_queue_mapping

Message ID 1292251414-5154-5-git-send-email-xiaosuo@gmail.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Changli Gao Dec. 13, 2010, 2:43 p.m. UTC
For the skbs returned from ifb, we should use the queue_mapping
saved before ifb.

We save old queue_mapping in old_queue_mapping just before calling 
dev_queue_xmit, and restore the old_queue_mapping to queue_mapping
just before reinjecting the skb.

dev_pick_tx() use the current queue_mapping for the skbs reinjected
by ifb.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
---
 drivers/net/ifb.c      |    1 +
 include/linux/skbuff.h |    3 +++
 net/core/dev.c         |    5 +++++
 net/sched/act_mirred.c |    1 +
 4 files changed, 10 insertions(+)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Eric Dumazet Dec. 13, 2010, 4:56 p.m. UTC | #1
Le lundi 13 décembre 2010 à 22:43 +0800, Changli Gao a écrit :
> For the skbs returned from ifb, we should use the queue_mapping
> saved before ifb.
> 
> We save old queue_mapping in old_queue_mapping just before calling 
> dev_queue_xmit, and restore the old_queue_mapping to queue_mapping
> just before reinjecting the skb.
> 
> dev_pick_tx() use the current queue_mapping for the skbs reinjected
> by ifb.
> 
> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ---
>  drivers/net/ifb.c      |    1 +
>  include/linux/skbuff.h |    3 +++
>  net/core/dev.c         |    5 +++++
>  net/sched/act_mirred.c |    1 +
>  4 files changed, 10 insertions(+)
> diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
> index 16c767b..481e4b1 100644
> --- a/drivers/net/ifb.c
> +++ b/drivers/net/ifb.c
> @@ -95,6 +95,7 @@ static void ri_tasklet(unsigned long _dev)
>  		u64_stats_update_end(&qp->syncp);
>  		skb->skb_iif = dev->ifindex;
>  
> +		skb->queue_mapping = skb->old_queue_mapping;
>  		if (from & AT_EGRESS) {
>  			dev_queue_xmit(skb);
>  		} else if (from & AT_INGRESS) {
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 19f37a6..2ce2a96 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -403,6 +403,9 @@ struct sk_buff {
>  	};
>  
>  	__u16			vlan_tci;
> +#ifdef CONFIG_NET_CLS_ACT
> +	__u16			old_queue_mapping;
> +#endif
>  

Are you sure we need this field here ? Why not using cb[] for example ?


>  	sk_buff_data_t		transport_header;
>  	sk_buff_data_t		network_header;
> diff --git a/net/core/dev.c b/net/core/dev.c
> index d28b3a0..8e97cfd 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2190,6 +2190,11 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
>  	int queue_index;
>  	const struct net_device_ops *ops = dev->netdev_ops;
>  
> +#ifdef CONFIG_NET_CLS_ACT
> +	if (skb->tc_verd & TC_NCLS)
> +		queue_index = skb_get_queue_mapping(skb);
> +	else
> +#endif
>  	if (dev->real_num_tx_queues == 1)
>  		queue_index = 0;
>  	else if (ops->ndo_select_queue) {
> diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
> index 0c311be..853eb30 100644
> --- a/net/sched/act_mirred.c
> +++ b/net/sched/act_mirred.c
> @@ -197,6 +197,7 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
>  
>  	skb2->skb_iif = skb->dev->ifindex;
>  	skb2->dev = dev;
> +	skb2->old_queue_mapping = skb->queue_mapping;
>  	dev_queue_xmit(skb2);
>  	err = 0;
>  


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Dec. 13, 2010, 5:58 p.m. UTC | #2
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 13 Dec 2010 17:56:52 +0100

> Le lundi 13 décembre 2010 à 22:43 +0800, Changli Gao a écrit :
>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>> index 19f37a6..2ce2a96 100644
>> --- a/include/linux/skbuff.h
>> +++ b/include/linux/skbuff.h
>> @@ -403,6 +403,9 @@ struct sk_buff {
>>  	};
>>  
>>  	__u16			vlan_tci;
>> +#ifdef CONFIG_NET_CLS_ACT
>> +	__u16			old_queue_mapping;
>> +#endif
>>  
> 
> Are you sure we need this field here ? Why not using cb[] for example ?

Agreed, we should be removing sk_buff members not adding new ones.

We should especially not be adding new members to this critical
structure for more obscure facilities like ifb.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Changli Gao Dec. 13, 2010, 11:58 p.m. UTC | #3
On Tue, Dec 14, 2010 at 12:56 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le lundi 13 décembre 2010 à 22:43 +0800, Changli Gao a écrit :
>> For the skbs returned from ifb, we should use the queue_mapping
>> saved before ifb.
>>
>> We save old queue_mapping in old_queue_mapping just before calling
>> dev_queue_xmit, and restore the old_queue_mapping to queue_mapping
>> just before reinjecting the skb.
>>
>> dev_pick_tx() use the current queue_mapping for the skbs reinjected
>> by ifb.
>>
>> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
>> ---
>>  drivers/net/ifb.c      |    1 +
>>  include/linux/skbuff.h |    3 +++
>>  net/core/dev.c         |    5 +++++
>>  net/sched/act_mirred.c |    1 +
>>  4 files changed, 10 insertions(+)
>> diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
>> index 16c767b..481e4b1 100644
>> --- a/drivers/net/ifb.c
>> +++ b/drivers/net/ifb.c
>> @@ -95,6 +95,7 @@ static void ri_tasklet(unsigned long _dev)
>>               u64_stats_update_end(&qp->syncp);
>>               skb->skb_iif = dev->ifindex;
>>
>> +             skb->queue_mapping = skb->old_queue_mapping;
>>               if (from & AT_EGRESS) {
>>                       dev_queue_xmit(skb);
>>               } else if (from & AT_INGRESS) {
>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>> index 19f37a6..2ce2a96 100644
>> --- a/include/linux/skbuff.h
>> +++ b/include/linux/skbuff.h
>> @@ -403,6 +403,9 @@ struct sk_buff {
>>       };
>>
>>       __u16                   vlan_tci;
>> +#ifdef CONFIG_NET_CLS_ACT
>> +     __u16                   old_queue_mapping;
>> +#endif
>>
>
> Are you sure we need this field here ? Why not using cb[] for example ?
>

skb->cb can only be used in the same layer, so we can't use it to save
a value crossing layers. A quick grep finds sch_netem just uses
skb->cb. Maybe we can put all the skb->cb in the qdisc layer and
old_queue_mapping in a structure as a new skb->cb. Is it OK? Thanks.
Changli Gao Dec. 15, 2010, 2:59 a.m. UTC | #4
On Tue, Dec 14, 2010 at 7:58 AM, Changli Gao <xiaosuo@gmail.com> wrote:
>
> skb->cb can only be used in the same layer, so we can't use it to save
> a value crossing layers. A quick grep finds sch_netem just uses
> skb->cb. Maybe we can put all the skb->cb in the qdisc layer and
> old_queue_mapping in a structure as a new skb->cb. Is it OK? Thanks.
>

I can't do that. Below the qdisc layer, there is another user of
skb->cb GSO. Though we can set the GSO features of ifb to skip
gso_segments(), it is too tricky and weak.
jamal Dec. 15, 2010, 12:40 p.m. UTC | #5
On Mon, 2010-12-13 at 22:43 +0800, Changli Gao wrote:
> For the skbs returned from ifb, we should use the queue_mapping
> saved before ifb.
> 
> We save old queue_mapping in old_queue_mapping just before calling 
> dev_queue_xmit, and restore the old_queue_mapping to queue_mapping
> just before reinjecting the skb.
> 
> dev_pick_tx() use the current queue_mapping for the skbs reinjected
> by ifb.

You are hard-coding policy here, no? ifb can do a lot of funky things
which change the nature of the flow. I can shape, i can edit packets
etc. Why is the queue mapping any different?

cheers,
jamal

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Changli Gao Dec. 15, 2010, 12:52 p.m. UTC | #6
On Wed, Dec 15, 2010 at 8:40 PM, jamal <hadi@cyberus.ca> wrote:
> On Mon, 2010-12-13 at 22:43 +0800, Changli Gao wrote:
>>
>> dev_pick_tx() use the current queue_mapping for the skbs reinjected
>> by ifb.
>
> You are hard-coding policy here, no? ifb can do a lot of funky things
> which change the nature of the flow. I can shape, i can edit packets
> etc. Why is the queue mapping any different?
>

It seems reasonable. Thanks. I'll remove this hard-coding policy.
diff mbox

Patch

diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 16c767b..481e4b1 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -95,6 +95,7 @@  static void ri_tasklet(unsigned long _dev)
 		u64_stats_update_end(&qp->syncp);
 		skb->skb_iif = dev->ifindex;
 
+		skb->queue_mapping = skb->old_queue_mapping;
 		if (from & AT_EGRESS) {
 			dev_queue_xmit(skb);
 		} else if (from & AT_INGRESS) {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 19f37a6..2ce2a96 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -403,6 +403,9 @@  struct sk_buff {
 	};
 
 	__u16			vlan_tci;
+#ifdef CONFIG_NET_CLS_ACT
+	__u16			old_queue_mapping;
+#endif
 
 	sk_buff_data_t		transport_header;
 	sk_buff_data_t		network_header;
diff --git a/net/core/dev.c b/net/core/dev.c
index d28b3a0..8e97cfd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2190,6 +2190,11 @@  static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 	int queue_index;
 	const struct net_device_ops *ops = dev->netdev_ops;
 
+#ifdef CONFIG_NET_CLS_ACT
+	if (skb->tc_verd & TC_NCLS)
+		queue_index = skb_get_queue_mapping(skb);
+	else
+#endif
 	if (dev->real_num_tx_queues == 1)
 		queue_index = 0;
 	else if (ops->ndo_select_queue) {
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 0c311be..853eb30 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -197,6 +197,7 @@  static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 
 	skb2->skb_iif = skb->dev->ifindex;
 	skb2->dev = dev;
+	skb2->old_queue_mapping = skb->queue_mapping;
 	dev_queue_xmit(skb2);
 	err = 0;