diff mbox

[v6] rps: Receive Packet Steering

Message ID 1268318738.2986.518.camel@edumazet-laptop
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet March 11, 2010, 2:45 p.m. UTC
Le mercredi 10 mars 2010 à 23:00 -0800, Tom Herbert a écrit :
> Added explicit entries entries in sysfs to set RPS cpus for each device queue.  This includes:
> 
> - Create RX queue instances which are attached to each netdevice (netdev_rx_queue).  These are analogous to the netdev_queue structures which are used for transmit.  RX queue instances hold an rps_map.
> - RX queues are allocated in alloc_netdev_mq for the given queue_count (basically we're assuming #RX queues == #TX queues).  If a driver does not call alloc_netdev_mq, a single instance is created in register_netdevice.
> - Added subdrectories in sysfs for per-queue RPS settings.  These are in /sys/class/net/eth<n>/queues/rx-<n>.  The "queues" directory is not necessary, but keeps the eth<n> directory clean (I don't have a stong opinion on this).  Presumably, if TX queues needed entries that could go under tx-<n>
> 
> Also addessed some previous issues raised:
> - Copy rxhash in skb_clone
> - Took __netif_receive_skb out of kernel-doc
> - Removed VLA containing VLA ugliness (rx queues above fixed that)
> - Copy and clear rps_mask before calling net_rps_action
> 

Pretty much exciting day began, thanks Tom !

I reviewed/tested your V6 patch, and have few comments :

- When a netdevice is freed, I believe you leak rps_maps if they were
previously allocated. I found following patch necessary to fix it.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Tom Herbert March 11, 2010, 5:11 p.m. UTC | #1
Eric, thanks for the great comments!

>
> - When a netdevice is freed, I believe you leak rps_maps if they were
> previously allocated. I found following patch necessary to fix it.
>
Yes.

> - Maybe use max(RPM_MAP_SIZE(x), L1_CACHE_BYTES) for allocations to
> avoid false sharing for small maps. (Or else, other part of cache line
> might be given to a user that might dirty it at high rate)
>
Okay.

> - "struct netdev_rx_queue" being only read in fast path, I am not sure
> an alignement is necessary, apart the false sharing that can be
> separately addressed.
>
If we get rid of the double indirection like you describe below, then
this can be allocated in the same way dev->_tx is (kcalloc).  Also,
this might a a good structure for writing stats, etc.

>
> - A double indirection to get a struct netdev_rx_queue pointer is
> expensive, not for heavy load benchmarks, but for latencies in seldom
> used devices. I understand you added this for kobject deferred release
> and kfreeing, but this seems a high price to pay in our fast path. I
> used another solution.
>
Yes, this is much better.  The kobject documentation has dire warnings
about placing more than one kobject in a data structure, I think I
took that too literally!

>
> - WARN(1, "Recieved packet on %s for queue %u, "
>   -> Received
>
Yes, i before e expect after c...

> - Use of cpumask_t for variable on stack is discouraged, we shall use
> cpumask_var_t if possible. Easy for show/store commands. (see my patch)
>
Yes.

> Yet, adding a memory allocation in net_rx_action() seems overkill, so we
> might use two static masks per cpu, and perform a flip ? This would help
> machines with 4096 cpus : To Be Done ?

That is an interesting idea.  I'll try it.

>
>
> - RTNL to guard rps map exchange ???
> This sounds like BKL (Big Kernel Lock) syndrom to me :
> Trying to avoid it if possible is better, because it wont exist anymore
> in ten years with 0.99 probability.
>
Yes.

>
> For ease of discussion, I cooked following patch on top of yours :
>

I will apply it.

> Thanks !
>
>  include/linux/netdevice.h |    8 ++-
>  net/core/dev.c            |   64 ++++++++++-------------------
>  net/core/net-sysfs.c      |   78 ++++++++++++++++++++++--------------
>  3 files changed, 77 insertions(+), 73 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 468da0a..3f4a986 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -544,9 +544,11 @@ struct rps_map {
>
>  /* This structure contains an instance of an RX queue. */
>  struct netdev_rx_queue {
> -       struct kobject kobj;
>        struct rps_map *rps_map;
> -} ____cacheline_aligned_in_smp;
> +       struct kobject kobj;
> +       struct netdev_rx_queue *first;
> +       atomic_t count;
> +};
>
>  /*
>  * This structure defines the management hooks for network devices.
> @@ -897,7 +899,7 @@ struct net_device {
>
>        struct kset             *queues_kset;
>
> -       struct netdev_rx_queue  **_rx;
> +       struct netdev_rx_queue  *_rx;
>
>        /* Number of RX queues allocated at alloc_netdev_mq() time  */
>        unsigned int            num_rx_queues;
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 939b1a2..402f23a 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2195,15 +2195,15 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
>                u16 index = skb_get_rx_queue(skb);
>                if (unlikely(index >= dev->num_rx_queues)) {
>                        if (net_ratelimit()) {
> -                               WARN(1, "Recieved packet on %s for queue %u, "
> +                               WARN(1, "Received packet on %s for queue %u, "
>                                    "but number of RX queues is %u\n",
>                                     dev->name, index, dev->num_rx_queues);
>                        }
>                        goto done;
>                }
> -               rxqueue = dev->_rx[index];
> +               rxqueue = dev->_rx + index;
>        } else
> -               rxqueue = dev->_rx[0];
> +               rxqueue = dev->_rx;
>
>        if (!rxqueue->rps_map)
>                goto done;
> @@ -5236,20 +5236,17 @@ int register_netdevice(struct net_device *dev)
>                 * alloc_netdev_mq
>                 */
>
> -               dev->_rx = kzalloc(sizeof(struct netdev_rx_queue *),
> -                   GFP_KERNEL);
> +               dev->_rx = kzalloc(max_t(unsigned,
> +                                        sizeof(struct netdev_rx_queue),
> +                                        L1_CACHE_BYTES),
> +                                  GFP_KERNEL);
>                if (!dev->_rx) {
>                        ret = -ENOMEM;
>                        goto out;
>                }
>
> -               dev->_rx[0] = kzalloc(sizeof(struct netdev_rx_queue),
> -                   GFP_KERNEL);
> -               if (!dev->_rx[0]) {
> -                       kfree(dev->_rx);
> -                       ret = -ENOMEM;
> -                       goto out;
> -               }
> +               dev->_rx[0].first = dev->_rx;
> +               atomic_set(&dev->_rx[0].count, 1);
>                dev->num_rx_queues = 1;
>        }
>
> @@ -5610,7 +5607,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
>                void (*setup)(struct net_device *), unsigned int queue_count)
>  {
>        struct netdev_queue *tx;
> -       struct netdev_rx_queue **rx;
> +       struct netdev_rx_queue *rx;
>        struct net_device *dev;
>        size_t alloc_size;
>        struct net_device *p;
> @@ -5635,37 +5632,30 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
>
>        tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
>        if (!tx) {
> -               printk(KERN_ERR "alloc_netdev: Unable to allocate "
> -                      "tx qdiscs.\n");
> +               pr_err("alloc_netdev: Unable to allocate tx qdiscs.\n");
>                goto free_p;
>        }
>
> -       /*
> -        * Allocate RX queue structures, this includes an array of pointers
> -        * and netdev_queue_rx stuctures (individually allocated since
> -        * each has a kobject).
> -        */
> -       rx = kzalloc(queue_count *
> -           sizeof(struct netdev_rx_queue *), GFP_KERNEL);
> +       rx = kzalloc(max_t(unsigned,
> +                          queue_count * sizeof(struct netdev_rx_queue),
> +                          L1_CACHE_BYTES),
> +                    GFP_KERNEL);
>        if (!rx) {
> -               printk(KERN_ERR "alloc_netdev: Unable to allocate "
> -                      "rx queues array.\n");
> +               pr_err("alloc_netdev: Unable to allocate rx queues.\n");
>                goto free_tx;
>        }
> -       for (i = 0; i < queue_count; i++) {
> -               rx[i] = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
> -               if (!rx[i]) {
> -                       printk(KERN_ERR "alloc_netdev: Unable to allocate "
> -                           "rx queues.\n");
> -                       goto free_rx;
> -               }
> -       }
> +       atomic_set(&rx->count, queue_count);
> +       /*
> +        * Use counter is located in first element of this array
> +        */
> +       for (i = 0; i < queue_count; i++)
> +               rx[i].first = rx;
>
>        dev = PTR_ALIGN(p, NETDEV_ALIGN);
>        dev->padded = (char *)dev - (char *)p;
>
>        if (dev_addr_init(dev))
> -               goto free_tx;
> +               goto free_rx;
>
>        dev_unicast_init(dev);
>
> @@ -5693,8 +5683,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
>        return dev;
>
>  free_rx:
> -       for (i = 0; i < queue_count; i++)
> -               kfree(rx[i]);
>        kfree(rx);
>  free_tx:
>        kfree(tx);
> @@ -5720,12 +5708,6 @@ void free_netdev(struct net_device *dev)
>
>        kfree(dev->_tx);
>
> -       /*
> -        * Free RX queue pointer array.  Actual netdev_rx_queue objects are
> -        * freed by kobject release.
> -        */
> -       kfree(dev->_rx);
> -
>        /* Flush device addresses */
>        dev_addr_flush(dev);
>
> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
> index a07d6ec..de75258 100644
> --- a/net/core/net-sysfs.c
> +++ b/net/core/net-sysfs.c
> @@ -516,24 +516,26 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue,
>        size_t len = 0;
>        struct rps_map *map;
>        int i;
> -       cpumask_t mask;
> +       cpumask_var_t mask;
>
> -       cpus_clear(mask);
> +       if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
> +               return -ENOMEM;
>
>        rcu_read_lock();
>        map = rcu_dereference(queue->rps_map);
>        if (map) {
>                for (i = 0; i < map->len; i++)
> -                       cpu_set(map->cpus[i], mask);
> +                       cpumask_set_cpu(map->cpus[i], mask);
>
> -               len += cpumask_scnprintf(buf + len, PAGE_SIZE, &mask);
> +               len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
>                if (PAGE_SIZE - len < 3) {
>                        rcu_read_unlock();
> +                       free_cpumask_var(mask);
>                        return -EINVAL;
>                }
>        }
>        rcu_read_unlock();
> -
> +       free_cpumask_var(mask);
>        len += sprintf(buf + len, "\n");
>        return len;
>  }
> @@ -550,38 +552,48 @@ ssize_t store_rps_map(struct netdev_rx_queue *queue,
>                      const char *buf, size_t len)
>  {
>        struct rps_map *old_map, *map;
> -       cpumask_t mask;
> -       int err, cpu, i, weight;
> +       cpumask_var_t mask;
> +       int err, cpu, i;
> +       static DEFINE_SPINLOCK(rps_map_lock);
>
>        if (!capable(CAP_NET_ADMIN))
>                return -EPERM;
>
> -       err = bitmap_parse(buf, len, cpumask_bits(&mask), nr_cpumask_bits);
> -       if (err)
> +       if (!alloc_cpumask_var(&mask, GFP_KERNEL))
> +               return -ENOMEM;
> +
> +       err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
> +       if (err) {
> +               free_cpumask_var(mask);
>                return err;
> +       }
> +       map = kzalloc(max_t(unsigned,
> +                           RPS_MAP_SIZE(cpumask_weight(mask)),
> +                           L1_CACHE_BYTES),
> +                     GFP_KERNEL);
> +       if (!map) {
> +               free_cpumask_var(mask);
> +               return -ENOMEM;
> +       }
> +       i = 0;
> +       for_each_cpu_and(cpu, mask, cpu_online_mask)
> +               map->cpus[i++] = cpu;
> +       if (i)
> +               map->len = i;
> +       else {
> +               kfree(map);
> +               map = NULL;
> +       }
>
> -       weight = cpumask_weight(&mask);
> -       if (weight) {
> -               map = kzalloc(RPS_MAP_SIZE(weight), GFP_KERNEL);
> -               if (!map)
> -                       return -ENOMEM;
> -
> -               cpus_and(mask, mask, cpu_online_map);
> -               i = 0;
> -               for_each_cpu_mask(cpu, mask)
> -                       map->cpus[i++] = cpu;
> -               map->len = i;
> -       } else
> -               map = NULL;
> -
> -       rtnl_lock();
> +       spin_lock(&rps_map_lock);
>        old_map = queue->rps_map;
>        rcu_assign_pointer(queue->rps_map, map);
> -       rtnl_unlock();
> +       spin_unlock(&rps_map_lock);
>
>        if (old_map)
>                call_rcu(&old_map->rcu, rps_map_release);
>
> +       free_cpumask_var(mask);
>        return len;
>  }
>
> @@ -596,8 +608,16 @@ static struct attribute *rx_queue_default_attrs[] = {
>  static void rx_queue_release(struct kobject *kobj)
>  {
>        struct netdev_rx_queue *queue = to_rx_queue(kobj);
> +       struct rps_map *map = queue->rps_map;
> +       struct netdev_rx_queue *first = queue->first;
>
> -       kfree(queue);
> +       if (map)
> +               call_rcu(&map->rcu, rps_map_release);
> +       /*
> +        * Free the array containing us only if all elems were released
> +        */
> +       if (atomic_dec_and_test(&first->count))
> +               kfree(first);
>  }
>
>  static struct kobj_type rx_queue_ktype = {
> @@ -609,7 +629,7 @@ static struct kobj_type rx_queue_ktype = {
>  static int rx_queue_add_kobject(struct net_device *net, int index)
>  {
>        int error = 0;
> -       struct netdev_rx_queue *queue = net->_rx[index];
> +       struct netdev_rx_queue *queue = net->_rx + index;
>        struct kobject *kobj = &queue->kobj;
>
>        kobj->kset = net->queues_kset;
> @@ -642,7 +662,7 @@ static int rx_queue_register_kobjects(struct net_device *net)
>
>        if (error)
>                while (--i >= 0)
> -                       kobject_put(&net->_rx[i]->kobj);
> +                       kobject_put(&net->_rx[i].kobj);
>
>        return error;
>  }
> @@ -652,7 +672,7 @@ static void rx_queue_remove_kobjects(struct net_device *net)
>        int i;
>
>        for (i = 0; i < net->num_rx_queues; i++)
> -               kobject_put(&net->_rx[i]->kobj);
> +               kobject_put(&net->_rx[i].kobj);
>        kset_unregister(net->queues_kset);
>  }
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a07d6ec..ef82b9d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -596,7 +596,10 @@  static struct attribute *rx_queue_default_attrs[] =
{
 static void rx_queue_release(struct kobject *kobj)
 {
        struct netdev_rx_queue *queue = to_rx_queue(kobj);
+       struct rps_map *map = queue->rps_map;
 
+       if (map)
+               call_rcu(&map->rcu, rps_map_release);
        kfree(queue);
 }
 
- memory leak in alloc_netdev_mq() if dev_addr_init() fails
	-> must use goto free_rx;


- Maybe use max(RPM_MAP_SIZE(x), L1_CACHE_BYTES) for allocations to
avoid false sharing for small maps. (Or else, other part of cache line
might be given to a user that might dirty it at high rate)

- "struct netdev_rx_queue" being only read in fast path, I am not sure
an alignement is necessary, apart the false sharing that can be
separately addressed.


- A double indirection to get a struct netdev_rx_queue pointer is
expensive, not for heavy load benchmarks, but for latencies in seldom
used devices. I understand you added this for kobject deferred release
and kfreeing, but this seems a high price to pay in our fast path. I
used another solution.


- WARN(1, "Recieved packet on %s for queue %u, "
   -> Received

- Use of cpumask_t for variable on stack is discouraged, we shall use
cpumask_var_t if possible. Easy for show/store commands. (see my patch)

Yet, adding a memory allocation in net_rx_action() seems overkill, so we
might use two static masks per cpu, and perform a flip ? This would help
machines with 4096 cpus : To Be Done ?


- RTNL to guard rps map exchange ???
This sounds like BKL (Big Kernel Lock) syndrom to me :
Trying to avoid it if possible is better, because it wont exist anymore
in ten years with 0.99 probability.


For ease of discussion, I cooked following patch on top of yours :

Thanks !

 include/linux/netdevice.h |    8 ++-
 net/core/dev.c            |   64 ++++++++++-------------------
 net/core/net-sysfs.c      |   78 ++++++++++++++++++++++--------------
 3 files changed, 77 insertions(+), 73 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 468da0a..3f4a986 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -544,9 +544,11 @@  struct rps_map {
 
 /* This structure contains an instance of an RX queue. */
 struct netdev_rx_queue {
-	struct kobject kobj;
 	struct rps_map *rps_map;
-} ____cacheline_aligned_in_smp;
+	struct kobject kobj;
+	struct netdev_rx_queue *first;
+	atomic_t count;
+};
 
 /*
  * This structure defines the management hooks for network devices.
@@ -897,7 +899,7 @@  struct net_device {
 
 	struct kset		*queues_kset;
 
-	struct netdev_rx_queue	**_rx;
+	struct netdev_rx_queue	*_rx;
 
 	/* Number of RX queues allocated at alloc_netdev_mq() time  */
 	unsigned int		num_rx_queues;
diff --git a/net/core/dev.c b/net/core/dev.c
index 939b1a2..402f23a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2195,15 +2195,15 @@  static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
 		u16 index = skb_get_rx_queue(skb);
 		if (unlikely(index >= dev->num_rx_queues)) {
 			if (net_ratelimit()) {
-				WARN(1, "Recieved packet on %s for queue %u, "
+				WARN(1, "Received packet on %s for queue %u, "
 				    "but number of RX queues is %u\n",
 				     dev->name, index, dev->num_rx_queues);
 			}
 			goto done;
 		}
-		rxqueue = dev->_rx[index];
+		rxqueue = dev->_rx + index;
 	} else
-		rxqueue = dev->_rx[0];
+		rxqueue = dev->_rx;
 
 	if (!rxqueue->rps_map)
 		goto done;
@@ -5236,20 +5236,17 @@  int register_netdevice(struct net_device *dev)
 		 * alloc_netdev_mq
 		 */
 
-		dev->_rx = kzalloc(sizeof(struct netdev_rx_queue *),
-		    GFP_KERNEL);
+		dev->_rx = kzalloc(max_t(unsigned,
+					 sizeof(struct netdev_rx_queue),
+					 L1_CACHE_BYTES),
+				   GFP_KERNEL);
 		if (!dev->_rx) {
 			ret = -ENOMEM;
 			goto out;
 		}
 
-		dev->_rx[0] = kzalloc(sizeof(struct netdev_rx_queue),
-		    GFP_KERNEL);
-		if (!dev->_rx[0]) {
-			kfree(dev->_rx);
-			ret = -ENOMEM;
-			goto out;
-		}
+		dev->_rx[0].first = dev->_rx;
+		atomic_set(&dev->_rx[0].count, 1);
 		dev->num_rx_queues = 1;
 	}
 
@@ -5610,7 +5607,7 @@  struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		void (*setup)(struct net_device *), unsigned int queue_count)
 {
 	struct netdev_queue *tx;
-	struct netdev_rx_queue **rx;
+	struct netdev_rx_queue *rx;
 	struct net_device *dev;
 	size_t alloc_size;
 	struct net_device *p;
@@ -5635,37 +5632,30 @@  struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
 	if (!tx) {
-		printk(KERN_ERR "alloc_netdev: Unable to allocate "
-		       "tx qdiscs.\n");
+		pr_err("alloc_netdev: Unable to allocate tx qdiscs.\n");
 		goto free_p;
 	}
 
-	/*
-	 * Allocate RX queue structures, this includes an array of pointers
-	 * and netdev_queue_rx stuctures (individually allocated since
-	 * each has a kobject).
-	 */
-	rx = kzalloc(queue_count *
-	    sizeof(struct netdev_rx_queue *), GFP_KERNEL);
+	rx = kzalloc(max_t(unsigned,
+			   queue_count * sizeof(struct netdev_rx_queue),
+			   L1_CACHE_BYTES),
+		     GFP_KERNEL);
 	if (!rx) {
-		printk(KERN_ERR "alloc_netdev: Unable to allocate "
-		       "rx queues array.\n");
+		pr_err("alloc_netdev: Unable to allocate rx queues.\n");
 		goto free_tx;
 	}
-	for (i = 0; i < queue_count; i++) {
-		rx[i] = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
-		if (!rx[i]) {
-			printk(KERN_ERR "alloc_netdev: Unable to allocate "
-			    "rx queues.\n");
-			goto free_rx;
-		}
-	}
+	atomic_set(&rx->count, queue_count);
+	/*
+	 * Use counter is located in first element of this array
+	 */
+	for (i = 0; i < queue_count; i++)
+		rx[i].first = rx;
 
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
 
 	if (dev_addr_init(dev))
-		goto free_tx;
+		goto free_rx;
 
 	dev_unicast_init(dev);
 
@@ -5693,8 +5683,6 @@  struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	return dev;
 
 free_rx:
-	for (i = 0; i < queue_count; i++)
-		kfree(rx[i]);
 	kfree(rx);
 free_tx:
 	kfree(tx);
@@ -5720,12 +5708,6 @@  void free_netdev(struct net_device *dev)
 
 	kfree(dev->_tx);
 
-	/*
-	 * Free RX queue pointer array.  Actual netdev_rx_queue objects are
-	 * freed by kobject release.
-	 */
-	kfree(dev->_rx);
-
 	/* Flush device addresses */
 	dev_addr_flush(dev);
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a07d6ec..de75258 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -516,24 +516,26 @@  static ssize_t show_rps_map(struct netdev_rx_queue *queue,
 	size_t len = 0;
 	struct rps_map *map;
 	int i;
-	cpumask_t mask;
+	cpumask_var_t mask;
 
-	cpus_clear(mask);
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
 
 	rcu_read_lock();
 	map = rcu_dereference(queue->rps_map);
 	if (map) {
 		for (i = 0; i < map->len; i++)
-			cpu_set(map->cpus[i], mask);
+			cpumask_set_cpu(map->cpus[i], mask);
 
-		len += cpumask_scnprintf(buf + len, PAGE_SIZE, &mask);
+		len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
 		if (PAGE_SIZE - len < 3) {
 			rcu_read_unlock();
+			free_cpumask_var(mask);
 			return -EINVAL;
 		}
 	}
 	rcu_read_unlock();
-
+	free_cpumask_var(mask);
 	len += sprintf(buf + len, "\n");
 	return len;
 }
@@ -550,38 +552,48 @@  ssize_t store_rps_map(struct netdev_rx_queue *queue,
 		      const char *buf, size_t len)
 {
 	struct rps_map *old_map, *map;
-	cpumask_t mask;
-	int err, cpu, i, weight;
+	cpumask_var_t mask;
+	int err, cpu, i;
+	static DEFINE_SPINLOCK(rps_map_lock);
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	err = bitmap_parse(buf, len, cpumask_bits(&mask), nr_cpumask_bits);
-	if (err)
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+	if (err) {
+		free_cpumask_var(mask);
 		return err;
+	}
+	map = kzalloc(max_t(unsigned,
+			    RPS_MAP_SIZE(cpumask_weight(mask)),
+			    L1_CACHE_BYTES),
+		      GFP_KERNEL);
+	if (!map) {
+		free_cpumask_var(mask);
+		return -ENOMEM;
+	}
+	i = 0;
+	for_each_cpu_and(cpu, mask, cpu_online_mask)
+		map->cpus[i++] = cpu;
+	if (i)
+ 		map->len = i;
+	else {
+		kfree(map);
+ 		map = NULL;
+	}
 
-	weight = cpumask_weight(&mask);
-	if (weight) {
-		map = kzalloc(RPS_MAP_SIZE(weight), GFP_KERNEL);
-		if (!map)
-			return -ENOMEM;
-
-		cpus_and(mask, mask, cpu_online_map);
-		i = 0;
-		for_each_cpu_mask(cpu, mask)
-			map->cpus[i++] = cpu;
-		map->len = i;
-	} else
-		map = NULL;
-
-	rtnl_lock();
+	spin_lock(&rps_map_lock);
 	old_map = queue->rps_map;
 	rcu_assign_pointer(queue->rps_map, map);
-	rtnl_unlock();
+	spin_unlock(&rps_map_lock);
 
 	if (old_map)
 		call_rcu(&old_map->rcu, rps_map_release);
 
+	free_cpumask_var(mask);
 	return len;
 }
 
@@ -596,8 +608,16 @@  static struct attribute *rx_queue_default_attrs[] = {
 static void rx_queue_release(struct kobject *kobj)
 {
 	struct netdev_rx_queue *queue = to_rx_queue(kobj);
+	struct rps_map *map = queue->rps_map;
+	struct netdev_rx_queue *first = queue->first;
 
-	kfree(queue);
+	if (map)
+		call_rcu(&map->rcu, rps_map_release);
+	/*
+	 * Free the array containing us only if all elems were released
+	 */
+	if (atomic_dec_and_test(&first->count))
+		kfree(first);
 }
 
 static struct kobj_type rx_queue_ktype = {
@@ -609,7 +629,7 @@  static struct kobj_type rx_queue_ktype = {
 static int rx_queue_add_kobject(struct net_device *net, int index)
 {
 	int error = 0;
-	struct netdev_rx_queue *queue = net->_rx[index];
+	struct netdev_rx_queue *queue = net->_rx + index;
 	struct kobject *kobj = &queue->kobj;
 
 	kobj->kset = net->queues_kset;
@@ -642,7 +662,7 @@  static int rx_queue_register_kobjects(struct net_device *net)
 
 	if (error)
 		while (--i >= 0)
-			kobject_put(&net->_rx[i]->kobj);
+			kobject_put(&net->_rx[i].kobj);
 
 	return error;
 }
@@ -652,7 +672,7 @@  static void rx_queue_remove_kobjects(struct net_device *net)
 	int i;
 
 	for (i = 0; i < net->num_rx_queues; i++)
-		kobject_put(&net->_rx[i]->kobj);
+		kobject_put(&net->_rx[i].kobj);
 	kset_unregister(net->queues_kset);
 }