diff mbox

[1/3] net: introduce a list of device addresses dev_addr_list

Message ID 20090415083223.GF21342@psychotron.englab.brq.redhat.com
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

Jiri Pirko April 15, 2009, 8:32 a.m. UTC
This patch introduces a new list in struct net_device and brings a set of
functions to handle the work with device address list. The list is a replacement
for the original dev_addr field and because in some situations there is need to
carry several device addresses with the net device. To be backward compatible,
dev_addr is made to point to the first member of the list so original drivers
sees no difference.

Note: patch adding list_first_entry_rcu (currently in Ingo's tip tree) needed.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
---
 include/linux/etherdevice.h |   24 ++++
 include/linux/netdevice.h   |   31 +++++-
 net/core/dev.c              |  263 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 316 insertions(+), 2 deletions(-)

Comments

David Miller April 15, 2009, 9:21 a.m. UTC | #1
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 15 Apr 2009 10:32:24 +0200

> This patch introduces a new list in struct net_device and brings a set of
> functions to handle the work with device address list. The list is a replacement
> for the original dev_addr field and because in some situations there is need to
> carry several device addresses with the net device. To be backward compatible,
> dev_addr is made to point to the first member of the list so original drivers
> sees no difference.
> 
> Note: patch adding list_first_entry_rcu (currently in Ingo's tip tree) needed.
> 
> Signed-off-by: Jiri Pirko <jpirko@redhat.com>

Jiri, please add some distinguishing text to your subject lines when
you post fixed up version of patches.  Like "v2" or something like
that, and make a note under the commit message of the changes you've
made from the previous version.

Otherwise I think it's a dup (because I get a thousand copies anyways)
and will just delete it both in my inbox and on patchwork.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet April 15, 2009, 9:27 a.m. UTC | #2
Jiri Pirko a écrit :
> This patch introduces a new list in struct net_device and brings a set of
> functions to handle the work with device address list. The list is a replacement
> for the original dev_addr field and because in some situations there is need to
> carry several device addresses with the net device. To be backward compatible,
> dev_addr is made to point to the first member of the list so original drivers
> sees no difference.
> 

You see no difference ? Please look more closely...

I see one additional dereference in hot path, to small objects possibly
with false sharing effects.

So I would advise not changing dev_addr[] to a pointer.
And instead copy first netdev_hw_addr into it.

Also, doing a kzalloc(sizeof(struct netdev_hw_addr)) for allocating these structs
might give a block of memory < L1_CACHE_SIZE so kernel is free to give other
part of this cache line to some other layer that could be a hot spot, so
false sharing could happen.

kzalloc(max(sizeof(*ha), L1_CACHE_SIZE)) is thus higly recommended here.

> Note: patch adding list_first_entry_rcu (currently in Ingo's tip tree) needed.
> 
> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
> ---
>  include/linux/etherdevice.h |   24 ++++
>  include/linux/netdevice.h   |   31 +++++-
>  net/core/dev.c              |  263 +++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 316 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
> index a1f17ab..348a75e 100644
> --- a/include/linux/etherdevice.h
> +++ b/include/linux/etherdevice.h
> @@ -205,4 +205,28 @@ static inline int compare_ether_header(const void *a, const void *b)
>  	       (a32[1] ^ b32[1]) | (a32[2] ^ b32[2]);
>  }
>  
> +/**
> + * is_etherdev_addr - Tell if given Ethernet address belongs to the device.
> + * @dev: Pointer to a device structure
> + * @addr: Pointer to a six-byte array containing the Ethernet address
> + *
> + * Compare passed address with all addresses of the device. Return true if the
> + * address if one of the device addresses.
> + */
> +static inline bool is_etherdev_addr(const struct net_device *dev,
> +				    const u8 *addr)
> +{
> +	struct netdev_hw_addr *ha;
> +	int res = 1;
> +
> +	rcu_read_lock();
> +	for_each_dev_addr(dev, ha) {
> +		res = compare_ether_addr(addr, ha->addr);

compare_ether_addr_64bits() please ?

> +		if (!res)
> +			break;
> +	}
> +	rcu_read_unlock();
> +	return !res;
> +}
> +
>  #endif	/* _LINUX_ETHERDEVICE_H */
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 2e7783f..77abfdf 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -210,6 +210,12 @@ struct dev_addr_list
>  #define dmi_users	da_users
>  #define dmi_gusers	da_gusers
>  
> +struct netdev_hw_addr {
> +	struct list_head	list;
> +	unsigned char		addr[MAX_ADDR_LEN];
> +	int			refcount;
> +};
> +
>  struct hh_cache
>  {
>  	struct hh_cache *hh_next;	/* Next entry			     */
> @@ -776,8 +782,11 @@ struct net_device
>   */
>  	unsigned long		last_rx;	/* Time of last Rx	*/
>  	/* Interface address info used in eth_type_trans() */
> -	unsigned char		dev_addr[MAX_ADDR_LEN];	/* hw address, (before bcast
> -							   because most packets are unicast) */
> +	unsigned char		*dev_addr;	/* hw address, (before bcast
> +						   because most packets are
> +						   unicast) */
> +
> +	struct list_head	dev_addr_list; /* list of device hw addresses */
>  
>  	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
>  
> @@ -1778,6 +1787,13 @@ static inline void netif_addr_unlock_bh(struct net_device *dev)
>  	spin_unlock_bh(&dev->addr_list_lock);
>  }
>  
> +/*
> + * dev_addr_list walker. Should be used only for read access. Call with
> + * rcu_read_lock held.
> + */
> +#define for_each_dev_addr(dev, ha) \
> +		list_for_each_entry_rcu(ha, &dev->dev_addr_list, list)
> +
>  /* These functions live elsewhere (drivers/net/net_init.c, but related) */
>  
>  extern void		ether_setup(struct net_device *dev);
> @@ -1790,6 +1806,17 @@ extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
>  	alloc_netdev_mq(sizeof_priv, name, setup, 1)
>  extern int		register_netdev(struct net_device *dev);
>  extern void		unregister_netdev(struct net_device *dev);
> +
> +/* Functions used for device addresses handling */
> +extern int		dev_addr_add(struct net_device *dev,
> +				     unsigned char *addr);
> +extern int		dev_addr_del(struct net_device *dev,
> +				     unsigned char *addr);
> +extern int		dev_addr_add_multiple(struct net_device *to_dev,
> +					      struct net_device *from_dev);
> +extern int		dev_addr_del_multiple(struct net_device *to_dev,
> +					      struct net_device *from_dev);
> +
>  /* Functions used for secondary unicast and multicast support */
>  extern void		dev_set_rx_mode(struct net_device *dev);
>  extern void		__dev_set_rx_mode(struct net_device *dev);
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 91d792d..04cddbb 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -3437,6 +3437,262 @@ void dev_set_rx_mode(struct net_device *dev)
>  	netif_addr_unlock_bh(dev);
>  }
>  
> +/* hw addresses list handling functions */
> +
> +static int __hw_addr_add_ii(struct list_head *list, unsigned char *addr,
> +			    int addr_len, int ignore_index)
> +{
> +	struct netdev_hw_addr *ha;
> +	int i = 0;
> +
> +	if (addr_len > MAX_ADDR_LEN)
> +		return -EINVAL;
> +
> +	rcu_read_lock();

This locking is highly suspect.

> +	list_for_each_entry_rcu(ha, list, list) {
> +		if (i++ != ignore_index &&
> +		    !memcmp(ha->addr, addr, addr_len)) {
> +			ha->refcount++;
> +			rcu_read_unlock();
> +			return 0;
> +		}
> +	}
> +	rcu_read_unlock();

Since you obviously need a write lock here to be sure following
can be done by one cpu only.

You have same problem all over this patch.

> +
> +	ha = kzalloc(sizeof(*ha), GFP_ATOMIC);

kzalloc(max(sizeof(*ha), L1_CACHE_SIZE), GFP_...) is thus higly recommended here.

Also, why GFP_ATOMIC is needed here ?

> +	if (!ha)
> +		return -ENOMEM;
> +	memcpy(ha->addr, addr, addr_len);
> +	ha->refcount = 1;
> +	list_add_tail_rcu(&ha->list, list);
> +	return 0;
> +}
> +
> +static int __hw_addr_add(struct list_head *list, unsigned char *addr,
> +				int addr_len)
> +{
> +	return __hw_addr_add_ii(list, addr, addr_len, -1);
> +}
> +
> +static int __hw_addr_del_ii(struct list_head *list, unsigned char *addr,
> +			    int addr_len, int ignore_index)
> +{
> +	struct netdev_hw_addr *ha;
> +	int i = 0;
> +
> +	list_for_each_entry(ha, list, list) {
> +		if (i++ != ignore_index &&
> +		    !memcmp(ha->addr, addr, addr_len)) {
> +			if (--ha->refcount)
> +				return 0;
> +			list_del_rcu(&ha->list);
> +			synchronize_rcu();

Oh well... I'm pretty sure this synchronize_rcu() call can be avoided,
dont you think ? Check kfree_rcu() or equivalent, as it seems not yet
included in current kernels...

> +			kfree(ha);
> +			return 0;
> +		}
> +	}
> +	return -ENOENT;
> +}
> +
> +static int __hw_addr_del(struct list_head *list, unsigned char *addr,
> +				int addr_len)
> +{
> +	return __hw_addr_del_ii(list, addr, addr_len, -1);
> +}
> +
> +static int __hw_addr_add_multiple_ii(struct list_head *to_list,
> +				     struct list_head *from_list,
> +				     int addr_len, int ignore_index)
> +{
> +	int err = 0;
> +	struct netdev_hw_addr *ha, *ha2;
> +
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(ha, from_list, list) {
> +		err = __hw_addr_add_ii(to_list, ha->addr, addr_len, 0);
> +		if (err)
> +			goto unroll;
> +	}
> +	goto unlock;
> +unroll:
> +	list_for_each_entry_rcu(ha2, from_list, list) {
> +		if (ha2 == ha)
> +			break;
> +		__hw_addr_del_ii(to_list, ha2->addr, addr_len, 0);
> +	}
> +unlock:
> +	rcu_read_unlock();
> +	return err;
> +}
> +
> +static int __hw_addr_add_multiple(struct list_head *to_list,
> +					 struct list_head *from_list,
> +					 int addr_len)
> +{
> +	return __hw_addr_add_multiple_ii(to_list, from_list, addr_len, -1);
> +}
> +
> +static void __hw_addr_del_multiple_ii(struct list_head *to_list,
> +				      struct list_head *from_list,
> +				      int addr_len, int ignore_index)
> +{
> +	struct netdev_hw_addr *ha;
> +
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(ha, from_list, list) {
> +		__hw_addr_del_ii(to_list, ha->addr, addr_len, 0);
> +	}
> +	rcu_read_unlock();
> +}
> +
> +static void __hw_addr_del_multiple(struct list_head *to_list,
> +					 struct list_head *from_list,
> +					 int addr_len)
> +{
> +	__hw_addr_del_multiple_ii(to_list, from_list, addr_len, -1);
> +}
> +
> +static void __hw_addr_flush(struct list_head *list)
> +{
> +	struct netdev_hw_addr *ha, *tmp;
> +
> +	list_for_each_entry_safe(ha, tmp, list, list) {
> +		list_del_rcu(&ha->list);
> +		synchronize_rcu();

	Oh no... :(

> +		kfree(ha);
> +	}
> +}
> +
> +/* Device addresses handling functions */
> +
> +static void dev_addr_flush(struct net_device *dev)
> +{
> +	__hw_addr_flush(&dev->dev_addr_list);
> +	dev->dev_addr = NULL;
> +}
> +
> +static int dev_addr_init(struct net_device *dev)
> +{
> +	unsigned char addr[MAX_ADDR_LEN];
> +	struct netdev_hw_addr *ha;
> +	int err;
> +
> +	INIT_LIST_HEAD(&dev->dev_addr_list);
> +	memset(addr, 0, sizeof(*addr));
> +	err = __hw_addr_add(&dev->dev_addr_list, addr, sizeof(*addr));
> +	if (!err) {
> +		/*
> +		 * Get the first (previously created) address from the list
> +		 * and set dev_addr pointer to this location.
> +		 */
> +		rcu_read_lock();

locking is not correct or unnecessary

> +		ha = list_first_entry_rcu(&dev->dev_addr_list,
> +					  struct netdev_hw_addr, list);
> +		dev->dev_addr = ha->addr;
> +		rcu_read_unlock();
> +	}
> +	return err;
> +}
> +
> +/**
> + *	dev_addr_add	- Add a device address
> + *	@dev: device
> + *	@addr: address to add
> + *
> + *	Add a device address to the device or increase the reference count if
> + *	it already exists.
> + *
> + *	The caller must hold the rtnl_mutex.
> + */
> +int dev_addr_add(struct net_device *dev, unsigned char *addr)
> +{
> +	int err;
> +
> +	ASSERT_RTNL();
> +
> +	err = __hw_addr_add_ii(&dev->dev_addr_list, addr, dev->addr_len, 0);
> +	if (!err)
> +		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
> +	return err;
> +}
> +EXPORT_SYMBOL(dev_addr_add);
> +
> +/**
> + *	dev_addr_del	- Release a device address.
> + *	@dev: device
> + *	@addr: address to delete
> + *
> + *	Release reference to a device address and remove it from the device
> + *	if the reference count drops to zero.
> + *
> + *	The caller must hold the rtnl_mutex.
> + */
> +int dev_addr_del(struct net_device *dev, unsigned char *addr)
> +{
> +	int err;
> +
> +	ASSERT_RTNL();
> +
> +	err = __hw_addr_del_ii(&dev->dev_addr_list, addr, dev->addr_len, 0);
> +	if (!err)
> +		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
> +	return err;
> +}
> +EXPORT_SYMBOL(dev_addr_del);
> +
> +/**
> + *	dev_addr_add_multiple	- Add device addresses from another device
> + *	@to_dev: device to which addresses will be added
> + *	@from_dev: device from which addresses will be added
> + *
> + *	Add device addresses of the one device to another.
> + *
> + *	The caller must hold the rtnl_mutex.
> + */
> +int dev_addr_add_multiple(struct net_device *to_dev,
> +			  struct net_device *from_dev)
> +{
> +	int err;
> +
> +	ASSERT_RTNL();
> +
> +	if (from_dev->addr_len != to_dev->addr_len)
> +		return -EINVAL;
> +	err = __hw_addr_add_multiple_ii(&to_dev->dev_addr_list,
> +					&from_dev->dev_addr_list,
> +					to_dev->addr_len, 0);
> +	if (!err)
> +		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
> +	return err;
> +}
> +EXPORT_SYMBOL(dev_addr_add_multiple);
> +
> +/**
> + *	dev_addr_del_multiple	- Delete device addresses by another device
> + *	@to_dev: device where the addresses will be deleted
> + *	@from_dev: device by which addresses the addresses will be deleted
> + *
> + *	Deletes addresses in to device by the list of addresses in from device.
> + *
> + *	The caller must hold the rtnl_mutex.
> + */
> +int dev_addr_del_multiple(struct net_device *to_dev,
> +			  struct net_device *from_dev)
> +{
> +	ASSERT_RTNL();
> +
> +	if (from_dev->addr_len != to_dev->addr_len)
> +		return -EINVAL;
> +	__hw_addr_add_multiple_ii(&to_dev->dev_addr_list,
> +				  &from_dev->dev_addr_list,
> +				  to_dev->addr_len, 0);
> +	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
> +	return 0;
> +}
> +EXPORT_SYMBOL(dev_addr_del_multiple);
> +
> +/* unicast and multicast addresses handling functions */
> +
>  int __dev_addr_delete(struct dev_addr_list **list, int *count,
>  		      void *addr, int alen, int glbl)
>  {
> @@ -4257,6 +4513,9 @@ static void rollback_registered(struct net_device *dev)
>  	 */
>  	dev_addr_discard(dev);
>  
> +	/* Flush device addresses */
> +	dev_addr_flush(dev);
> +
>  	if (dev->netdev_ops->ndo_uninit)
>  		dev->netdev_ops->ndo_uninit(dev);
>  
> @@ -4779,6 +5038,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
>  
>  	dev->gso_max_size = GSO_MAX_SIZE;
>  
> +	dev_addr_init(dev);
>  	netdev_init_queues(dev);
>  
>  	INIT_LIST_HEAD(&dev->napi_list);
> @@ -4965,6 +5225,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
>  	 */
>  	dev_addr_discard(dev);
>  
> +	/* Flush device addresses */
> +	dev_addr_flush(dev);
> +
>  	netdev_unregister_kobject(dev);
>  
>  	/* Actually switch the network namespace */


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller April 15, 2009, 9:31 a.m. UTC | #3
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 15 Apr 2009 11:27:50 +0200

> Since you obviously need a write lock here to be sure following
> can be done by one cpu only.
> 
> You have same problem all over this patch.

RTNL semaphore is held across all modification operations.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Patrick McHardy April 15, 2009, 10:13 a.m. UTC | #4
David Miller wrote:
> From: Eric Dumazet <dada1@cosmosbay.com>
> Date: Wed, 15 Apr 2009 11:27:50 +0200
> 
>> Since you obviously need a write lock here to be sure following
>> can be done by one cpu only.
>>
>> You have same problem all over this patch.
> 
> RTNL semaphore is held across all modification operations.

If this will also be used for multicast lists, changes can happen
(IPv6) without the RTNL.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller April 15, 2009, 10:15 a.m. UTC | #5
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 15 Apr 2009 12:13:57 +0200

> David Miller wrote:
>> From: Eric Dumazet <dada1@cosmosbay.com>
>> Date: Wed, 15 Apr 2009 11:27:50 +0200
>> 
>>> Since you obviously need a write lock here to be sure following
>>> can be done by one cpu only.
>>>
>>> You have same problem all over this patch.
>> RTNL semaphore is held across all modification operations.
> 
> If this will also be used for multicast lists, changes can happen
> (IPv6) without the RTNL.

Indeed, that is true :-/
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Patrick McHardy April 15, 2009, 10:41 a.m. UTC | #6
David Miller wrote:
> From: Patrick McHardy <kaber@trash.net>
> Date: Wed, 15 Apr 2009 12:13:57 +0200
> 
>> David Miller wrote:
>>> From: Eric Dumazet <dada1@cosmosbay.com>
>>> Date: Wed, 15 Apr 2009 11:27:50 +0200
>>>
>>>> Since you obviously need a write lock here to be sure following
>>>> can be done by one cpu only.
>>>>
>>>> You have same problem all over this patch.
>>> RTNL semaphore is held across all modification operations.
>> If this will also be used for multicast lists, changes can happen
>> (IPv6) without the RTNL.
> 
> Indeed, that is true :-/

Herbert (I think) suggested to make address list updates in softirq
context a two-step process, where addresses would first be added to
a temporary list and the final change would be done in process context
while holding the RTNL.

Given the complicated mess we currently have, this would be a very
worthwhile change IMO.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller April 15, 2009, 10:45 a.m. UTC | #7
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 15 Apr 2009 12:41:01 +0200

> Herbert (I think) suggested to make address list updates in softirq
> context a two-step process, where addresses would first be added to
> a temporary list and the final change would be done in process context
> while holding the RTNL.
> 
> Given the complicated mess we currently have, this would be a very
> worthwhile change IMO.

This would break the IPV6 TAHI tests if you think we could use
such an idea for that.

When IPV6 packets arrive that influence multicast and unicast
address lists, the effect must be essentially immediate.  Such
that a subsequent packet will cause the kernel the behave
with the necessary side effects, no matter how quickly that
next packet arrives.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Patrick McHardy April 15, 2009, 10:47 a.m. UTC | #8
David Miller wrote:
> From: Patrick McHardy <kaber@trash.net>
> Date: Wed, 15 Apr 2009 12:41:01 +0200
> 
>> Herbert (I think) suggested to make address list updates in softirq
>> context a two-step process, where addresses would first be added to
>> a temporary list and the final change would be done in process context
>> while holding the RTNL.
>>
>> Given the complicated mess we currently have, this would be a very
>> worthwhile change IMO.
> 
> This would break the IPV6 TAHI tests if you think we could use
> such an idea for that.
> 
> When IPV6 packets arrive that influence multicast and unicast
> address lists, the effect must be essentially immediate.  Such
> that a subsequent packet will cause the kernel the behave
> with the necessary side effects, no matter how quickly that
> next packet arrives.

I see, thanks for the explanation.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko April 15, 2009, 11:17 a.m. UTC | #9
Wed, Apr 15, 2009 at 11:27:50AM CEST, dada1@cosmosbay.com wrote:
>Jiri Pirko a écrit :
>> This patch introduces a new list in struct net_device and brings a set of
>> functions to handle the work with device address list. The list is a replacement
>> for the original dev_addr field and because in some situations there is need to
>> carry several device addresses with the net device. To be backward compatible,
>> dev_addr is made to point to the first member of the list so original drivers
>> sees no difference.
>> 
>
>You see no difference ? Please look more closely...
>
>I see one additional dereference in hot path, to small objects possibly
>with false sharing effects.
>
>So I would advise not changing dev_addr[] to a pointer.
>And instead copy first netdev_hw_addr into it.

Hmm :( That is what I was trying to avoid. If the first netdev_hw_addr in the
list is a copy of dev_addr, then there must be synchronizing of those two. This
would be a pain.. Plus I thought that eventually dev_addr would not be
accessible directly but only by set of macros/inlines to accesse the list, and
then dev_addr would be removed from struct net_device.
>
>Also, doing a kzalloc(sizeof(struct netdev_hw_addr)) for allocating these structs
>might give a block of memory < L1_CACHE_SIZE so kernel is free to give other
>part of this cache line to some other layer that could be a hot spot, so
>false sharing could happen.
>
>kzalloc(max(sizeof(*ha), L1_CACHE_SIZE)) is thus higly recommended here.
You mean PAGE_CACHE_SIZE? I think that would be little wasting... But I see your
point...
>
>> Note: patch adding list_first_entry_rcu (currently in Ingo's tip tree) needed.
>> 
>> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>> ---
>>  include/linux/etherdevice.h |   24 ++++
>>  include/linux/netdevice.h   |   31 +++++-
>>  net/core/dev.c              |  263 +++++++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 316 insertions(+), 2 deletions(-)
>> 
>> diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
>> index a1f17ab..348a75e 100644
>> --- a/include/linux/etherdevice.h
>> +++ b/include/linux/etherdevice.h
>> @@ -205,4 +205,28 @@ static inline int compare_ether_header(const void *a, const void *b)
>>  	       (a32[1] ^ b32[1]) | (a32[2] ^ b32[2]);
>>  }
>>  
>> +/**
>> + * is_etherdev_addr - Tell if given Ethernet address belongs to the device.
>> + * @dev: Pointer to a device structure
>> + * @addr: Pointer to a six-byte array containing the Ethernet address
>> + *
>> + * Compare passed address with all addresses of the device. Return true if the
>> + * address if one of the device addresses.
>> + */
>> +static inline bool is_etherdev_addr(const struct net_device *dev,
>> +				    const u8 *addr)
>> +{
>> +	struct netdev_hw_addr *ha;
>> +	int res = 1;
>> +
>> +	rcu_read_lock();
>> +	for_each_dev_addr(dev, ha) {
>> +		res = compare_ether_addr(addr, ha->addr);
>
>compare_ether_addr_64bits() please ?
>
I used the original as the bridge code used it. Ok, noted.

<snip>

>> +static int __hw_addr_add_ii(struct list_head *list, unsigned char *addr,
>> +			    int addr_len, int ignore_index)
>> +{
>> +	struct netdev_hw_addr *ha;
>> +	int i = 0;
>> +
>> +	if (addr_len > MAX_ADDR_LEN)
>> +		return -EINVAL;
>> +
>> +	rcu_read_lock();
>
>This locking is highly suspect.
>
>> +	list_for_each_entry_rcu(ha, list, list) {
>> +		if (i++ != ignore_index &&
>> +		    !memcmp(ha->addr, addr, addr_len)) {
>> +			ha->refcount++;
>> +			rcu_read_unlock();
>> +			return 0;
>> +		}
>> +	}
>> +	rcu_read_unlock();
>
>Since you obviously need a write lock here to be sure following
>can be done by one cpu only.
>
>You have same problem all over this patch.

Yes, as Dave wrote, this is guarded by RTNL mutex.
>
>> +
>> +	ha = kzalloc(sizeof(*ha), GFP_ATOMIC);
>
>kzalloc(max(sizeof(*ha), L1_CACHE_SIZE), GFP_...) is thus higly recommended here.
>
>Also, why GFP_ATOMIC is needed here ?

Yes, it is not needed here. I've copied it here from the original unicast and
multicast add funtion to stay close but as I can see, there is no need for it
there either.
Noted.
>

<snip>

>> +	list_for_each_entry(ha, list, list) {
>> +		if (i++ != ignore_index &&
>> +		    !memcmp(ha->addr, addr, addr_len)) {
>> +			if (--ha->refcount)
>> +				return 0;
>> +			list_del_rcu(&ha->list);
>> +			synchronize_rcu();
>
>Oh well... I'm pretty sure this synchronize_rcu() call can be avoided,
>dont you think ? Check kfree_rcu() or equivalent, as it seems not yet
>included in current kernels...
>
Well once kfree_rcu() will be in the tree I will be happy to replace this.

>> +			kfree(ha);
>> +			return 0;
>> +		}
>> +	}
>> +	return -ENOENT;

<snip>

>> +	err = __hw_addr_add(&dev->dev_addr_list, addr, sizeof(*addr));
>> +	if (!err) {
>> +		/*
>> +		 * Get the first (previously created) address from the list
>> +		 * and set dev_addr pointer to this location.
>> +		 */
>> +		rcu_read_lock();
>
>locking is not correct or unnecessary

Agree that here locking is not necessary, but I wanted to stay consistent to the
rest of the code. Do you think I should remove locking here entirely?

>
>> +		ha = list_first_entry_rcu(&dev->dev_addr_list,
>> +					  struct netdev_hw_addr, list);
>> +		dev->dev_addr = ha->addr;
>> +		rcu_read_unlock();
>> +	}
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Patrick McHardy April 15, 2009, 11:22 a.m. UTC | #10
Jiri Pirko wrote:

>> Since you obviously need a write lock here to be sure following
>> can be done by one cpu only.
>>
>> You have same problem all over this patch.
> 
> Yes, as Dave wrote, this is guarded by RTNL mutex.

This was incorrect. IPv6 adds multicast addresses in softirq context.

>>> +
>>> +	ha = kzalloc(sizeof(*ha), GFP_ATOMIC);
>> kzalloc(max(sizeof(*ha), L1_CACHE_SIZE), GFP_...) is thus higly recommended here.
>>
>> Also, why GFP_ATOMIC is needed here ?
> 
> Yes, it is not needed here. I've copied it here from the original unicast and
> multicast add funtion to stay close but as I can see, there is no need for it
> there either.
> Noted.

Also needed for IPv6 in softirq context.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko April 15, 2009, 11:28 a.m. UTC | #11
Wed, Apr 15, 2009 at 01:22:32PM CEST, kaber@trash.net wrote:
> Jiri Pirko wrote:
>
>>> Since you obviously need a write lock here to be sure following
>>> can be done by one cpu only.
>>>
>>> You have same problem all over this patch.
>>
>> Yes, as Dave wrote, this is guarded by RTNL mutex.
>
> This was incorrect. IPv6 adds multicast addresses in softirq context.

Yes, I see that.
>
>>>> +
>>>> +	ha = kzalloc(sizeof(*ha), GFP_ATOMIC);
>>> kzalloc(max(sizeof(*ha), L1_CACHE_SIZE), GFP_...) is thus higly recommended here.
>>>
>>> Also, why GFP_ATOMIC is needed here ?
>>
>> Yes, it is not needed here. I've copied it here from the original unicast and
>> multicast add funtion to stay close but as I can see, there is no need for it
>> there either.
>> Noted.
>
> Also needed for IPv6 in softirq context.
>

Noted...
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet April 15, 2009, 12:28 p.m. UTC | #12
Jiri Pirko a écrit :
> Wed, Apr 15, 2009 at 11:27:50AM CEST, dada1@cosmosbay.com wrote:

>> kzalloc(max(sizeof(*ha), L1_CACHE_SIZE)) is thus higly recommended here.
> You mean PAGE_CACHE_SIZE? I think that would be little wasting... But I see your
> point...

No, I meant L1_CACHE_BYTES    (usually 64 bytes on x86), I always confuse BYTES and SIZE on this one...


>>> +	list_for_each_entry(ha, list, list) {
>>> +		if (i++ != ignore_index &&
>>> +		    !memcmp(ha->addr, addr, addr_len)) {
>>> +			if (--ha->refcount)
>>> +				return 0;
>>> +			list_del_rcu(&ha->list);
>>> +			synchronize_rcu();
>> Oh well... I'm pretty sure this synchronize_rcu() call can be avoided,
>> dont you think ? Check kfree_rcu() or equivalent, as it seems not yet
>> included in current kernels...
>>
> Well once kfree_rcu() will be in the tree I will be happy to replace this.

If kfree_rcu() not yet available, please use a regular call_rcu() construct
(thus adding a struct rcu_head rcu; in struct netdev_hw_addr)

If you delete say 10 addresses on a device, while RTNL (or other lock) locked,
that means a lot of calls to synchronize_rcu() and a long lock hold time.

> 
>>> +			kfree(ha);
>>> +			return 0;
>>> +		}
>>> +	}
>>> +	return -ENOENT;
> 
> <snip>
> 
>>> +	err = __hw_addr_add(&dev->dev_addr_list, addr, sizeof(*addr));
>>> +	if (!err) {
>>> +		/*
>>> +		 * Get the first (previously created) address from the list
>>> +		 * and set dev_addr pointer to this location.
>>> +		 */
>>> +		rcu_read_lock();
>> locking is not correct or unnecessary
> 
> Agree that here locking is not necessary, but I wanted to stay consistent to the
> rest of the code. Do you think I should remove locking here entirely?

Yes, it is very confusing for reviewers because we feel patch submiter
is not comfortable with locking rules.

Check for example dev_add_pack() in net/core/dev.c : It uses list_add_rcu()
but as it also uses a regular spinlock, there is no point using rcu_read_lock().

void dev_add_pack(struct packet_type *pt)
{
        int hash;

        spin_lock_bh(&ptype_lock);
        if (pt->type == htons(ETH_P_ALL))
                list_add_rcu(&pt->list, &ptype_all);
        else {
                hash = ntohs(pt->type) & PTYPE_HASH_MASK;
                list_add_rcu(&pt->list, &ptype_base[hash]);
        }
        spin_unlock_bh(&ptype_lock);
}



Please note list_add_rcu() (and/or rcu_assign_pointer()) are still needed to protect
readers that dont use the spinlock at all.

If you use fact that RTNL is locked when calling your code, you could add
ASSERT_RTNL();
at strategic points so that this assertion can be checked at runtime.

(but Patrick & David wrote that you should not assume RTNL, so you probably need another lock...)

Thank you

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko April 15, 2009, 2:42 p.m. UTC | #13
Wed, Apr 15, 2009 at 12:13:57PM CEST, kaber@trash.net wrote:
> David Miller wrote:
>> From: Eric Dumazet <dada1@cosmosbay.com>
>> Date: Wed, 15 Apr 2009 11:27:50 +0200
>>
>>> Since you obviously need a write lock here to be sure following
>>> can be done by one cpu only.
>>>
>>> You have same problem all over this patch.
>>
>> RTNL semaphore is held across all modification operations.
>
> If this will also be used for multicast lists, changes can happen
> (IPv6) without the RTNL.

Ok, but for dev_addr_X() functions the RTNL mutex is sufficient so I see no
point of adding another lock here. When the multicast handling functions will be
implemented to use netdev_hw_addr and it's layer, then we need to use update
lock in dev_multicast_X.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index a1f17ab..348a75e 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -205,4 +205,28 @@  static inline int compare_ether_header(const void *a, const void *b)
 	       (a32[1] ^ b32[1]) | (a32[2] ^ b32[2]);
 }
 
+/**
+ * is_etherdev_addr - Tell if given Ethernet address belongs to the device.
+ * @dev: Pointer to a device structure
+ * @addr: Pointer to a six-byte array containing the Ethernet address
+ *
+ * Compare passed address with all addresses of the device. Return true if the
+ * address if one of the device addresses.
+ */
+static inline bool is_etherdev_addr(const struct net_device *dev,
+				    const u8 *addr)
+{
+	struct netdev_hw_addr *ha;
+	int res = 1;
+
+	rcu_read_lock();
+	for_each_dev_addr(dev, ha) {
+		res = compare_ether_addr(addr, ha->addr);
+		if (!res)
+			break;
+	}
+	rcu_read_unlock();
+	return !res;
+}
+
 #endif	/* _LINUX_ETHERDEVICE_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2e7783f..77abfdf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -210,6 +210,12 @@  struct dev_addr_list
 #define dmi_users	da_users
 #define dmi_gusers	da_gusers
 
+struct netdev_hw_addr {
+	struct list_head	list;
+	unsigned char		addr[MAX_ADDR_LEN];
+	int			refcount;
+};
+
 struct hh_cache
 {
 	struct hh_cache *hh_next;	/* Next entry			     */
@@ -776,8 +782,11 @@  struct net_device
  */
 	unsigned long		last_rx;	/* Time of last Rx	*/
 	/* Interface address info used in eth_type_trans() */
-	unsigned char		dev_addr[MAX_ADDR_LEN];	/* hw address, (before bcast
-							   because most packets are unicast) */
+	unsigned char		*dev_addr;	/* hw address, (before bcast
+						   because most packets are
+						   unicast) */
+
+	struct list_head	dev_addr_list; /* list of device hw addresses */
 
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 
@@ -1778,6 +1787,13 @@  static inline void netif_addr_unlock_bh(struct net_device *dev)
 	spin_unlock_bh(&dev->addr_list_lock);
 }
 
+/*
+ * dev_addr_list walker. Should be used only for read access. Call with
+ * rcu_read_lock held.
+ */
+#define for_each_dev_addr(dev, ha) \
+		list_for_each_entry_rcu(ha, &dev->dev_addr_list, list)
+
 /* These functions live elsewhere (drivers/net/net_init.c, but related) */
 
 extern void		ether_setup(struct net_device *dev);
@@ -1790,6 +1806,17 @@  extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	alloc_netdev_mq(sizeof_priv, name, setup, 1)
 extern int		register_netdev(struct net_device *dev);
 extern void		unregister_netdev(struct net_device *dev);
+
+/* Functions used for device addresses handling */
+extern int		dev_addr_add(struct net_device *dev,
+				     unsigned char *addr);
+extern int		dev_addr_del(struct net_device *dev,
+				     unsigned char *addr);
+extern int		dev_addr_add_multiple(struct net_device *to_dev,
+					      struct net_device *from_dev);
+extern int		dev_addr_del_multiple(struct net_device *to_dev,
+					      struct net_device *from_dev);
+
 /* Functions used for secondary unicast and multicast support */
 extern void		dev_set_rx_mode(struct net_device *dev);
 extern void		__dev_set_rx_mode(struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index 91d792d..04cddbb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3437,6 +3437,262 @@  void dev_set_rx_mode(struct net_device *dev)
 	netif_addr_unlock_bh(dev);
 }
 
+/* hw addresses list handling functions */
+
+static int __hw_addr_add_ii(struct list_head *list, unsigned char *addr,
+			    int addr_len, int ignore_index)
+{
+	struct netdev_hw_addr *ha;
+	int i = 0;
+
+	if (addr_len > MAX_ADDR_LEN)
+		return -EINVAL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ha, list, list) {
+		if (i++ != ignore_index &&
+		    !memcmp(ha->addr, addr, addr_len)) {
+			ha->refcount++;
+			rcu_read_unlock();
+			return 0;
+		}
+	}
+	rcu_read_unlock();
+
+	ha = kzalloc(sizeof(*ha), GFP_ATOMIC);
+	if (!ha)
+		return -ENOMEM;
+	memcpy(ha->addr, addr, addr_len);
+	ha->refcount = 1;
+	list_add_tail_rcu(&ha->list, list);
+	return 0;
+}
+
+static int __hw_addr_add(struct list_head *list, unsigned char *addr,
+				int addr_len)
+{
+	return __hw_addr_add_ii(list, addr, addr_len, -1);
+}
+
+static int __hw_addr_del_ii(struct list_head *list, unsigned char *addr,
+			    int addr_len, int ignore_index)
+{
+	struct netdev_hw_addr *ha;
+	int i = 0;
+
+	list_for_each_entry(ha, list, list) {
+		if (i++ != ignore_index &&
+		    !memcmp(ha->addr, addr, addr_len)) {
+			if (--ha->refcount)
+				return 0;
+			list_del_rcu(&ha->list);
+			synchronize_rcu();
+			kfree(ha);
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+static int __hw_addr_del(struct list_head *list, unsigned char *addr,
+				int addr_len)
+{
+	return __hw_addr_del_ii(list, addr, addr_len, -1);
+}
+
+static int __hw_addr_add_multiple_ii(struct list_head *to_list,
+				     struct list_head *from_list,
+				     int addr_len, int ignore_index)
+{
+	int err = 0;
+	struct netdev_hw_addr *ha, *ha2;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ha, from_list, list) {
+		err = __hw_addr_add_ii(to_list, ha->addr, addr_len, 0);
+		if (err)
+			goto unroll;
+	}
+	goto unlock;
+unroll:
+	list_for_each_entry_rcu(ha2, from_list, list) {
+		if (ha2 == ha)
+			break;
+		__hw_addr_del_ii(to_list, ha2->addr, addr_len, 0);
+	}
+unlock:
+	rcu_read_unlock();
+	return err;
+}
+
+static int __hw_addr_add_multiple(struct list_head *to_list,
+					 struct list_head *from_list,
+					 int addr_len)
+{
+	return __hw_addr_add_multiple_ii(to_list, from_list, addr_len, -1);
+}
+
+static void __hw_addr_del_multiple_ii(struct list_head *to_list,
+				      struct list_head *from_list,
+				      int addr_len, int ignore_index)
+{
+	struct netdev_hw_addr *ha;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ha, from_list, list) {
+		__hw_addr_del_ii(to_list, ha->addr, addr_len, 0);
+	}
+	rcu_read_unlock();
+}
+
+static void __hw_addr_del_multiple(struct list_head *to_list,
+					 struct list_head *from_list,
+					 int addr_len)
+{
+	__hw_addr_del_multiple_ii(to_list, from_list, addr_len, -1);
+}
+
+static void __hw_addr_flush(struct list_head *list)
+{
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, list, list) {
+		list_del_rcu(&ha->list);
+		synchronize_rcu();
+		kfree(ha);
+	}
+}
+
+/* Device addresses handling functions */
+
+static void dev_addr_flush(struct net_device *dev)
+{
+	__hw_addr_flush(&dev->dev_addr_list);
+	dev->dev_addr = NULL;
+}
+
+static int dev_addr_init(struct net_device *dev)
+{
+	unsigned char addr[MAX_ADDR_LEN];
+	struct netdev_hw_addr *ha;
+	int err;
+
+	INIT_LIST_HEAD(&dev->dev_addr_list);
+	memset(addr, 0, sizeof(*addr));
+	err = __hw_addr_add(&dev->dev_addr_list, addr, sizeof(*addr));
+	if (!err) {
+		/*
+		 * Get the first (previously created) address from the list
+		 * and set dev_addr pointer to this location.
+		 */
+		rcu_read_lock();
+		ha = list_first_entry_rcu(&dev->dev_addr_list,
+					  struct netdev_hw_addr, list);
+		dev->dev_addr = ha->addr;
+		rcu_read_unlock();
+	}
+	return err;
+}
+
+/**
+ *	dev_addr_add	- Add a device address
+ *	@dev: device
+ *	@addr: address to add
+ *
+ *	Add a device address to the device or increase the reference count if
+ *	it already exists.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add(struct net_device *dev, unsigned char *addr)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	err = __hw_addr_add_ii(&dev->dev_addr_list, addr, dev->addr_len, 0);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_add);
+
+/**
+ *	dev_addr_del	- Release a device address.
+ *	@dev: device
+ *	@addr: address to delete
+ *
+ *	Release reference to a device address and remove it from the device
+ *	if the reference count drops to zero.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del(struct net_device *dev, unsigned char *addr)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	err = __hw_addr_del_ii(&dev->dev_addr_list, addr, dev->addr_len, 0);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_del);
+
+/**
+ *	dev_addr_add_multiple	- Add device addresses from another device
+ *	@to_dev: device to which addresses will be added
+ *	@from_dev: device from which addresses will be added
+ *
+ *	Add device addresses of the one device to another.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	if (from_dev->addr_len != to_dev->addr_len)
+		return -EINVAL;
+	err = __hw_addr_add_multiple_ii(&to_dev->dev_addr_list,
+					&from_dev->dev_addr_list,
+					to_dev->addr_len, 0);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_add_multiple);
+
+/**
+ *	dev_addr_del_multiple	- Delete device addresses by another device
+ *	@to_dev: device where the addresses will be deleted
+ *	@from_dev: device by which addresses the addresses will be deleted
+ *
+ *	Deletes addresses in to device by the list of addresses in from device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev)
+{
+	ASSERT_RTNL();
+
+	if (from_dev->addr_len != to_dev->addr_len)
+		return -EINVAL;
+	__hw_addr_add_multiple_ii(&to_dev->dev_addr_list,
+				  &from_dev->dev_addr_list,
+				  to_dev->addr_len, 0);
+	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+	return 0;
+}
+EXPORT_SYMBOL(dev_addr_del_multiple);
+
+/* unicast and multicast addresses handling functions */
+
 int __dev_addr_delete(struct dev_addr_list **list, int *count,
 		      void *addr, int alen, int glbl)
 {
@@ -4257,6 +4513,9 @@  static void rollback_registered(struct net_device *dev)
 	 */
 	dev_addr_discard(dev);
 
+	/* Flush device addresses */
+	dev_addr_flush(dev);
+
 	if (dev->netdev_ops->ndo_uninit)
 		dev->netdev_ops->ndo_uninit(dev);
 
@@ -4779,6 +5038,7 @@  struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	dev->gso_max_size = GSO_MAX_SIZE;
 
+	dev_addr_init(dev);
 	netdev_init_queues(dev);
 
 	INIT_LIST_HEAD(&dev->napi_list);
@@ -4965,6 +5225,9 @@  int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	 */
 	dev_addr_discard(dev);
 
+	/* Flush device addresses */
+	dev_addr_flush(dev);
+
 	netdev_unregister_kobject(dev);
 
 	/* Actually switch the network namespace */