diff mbox

[net-next,1/2] macvtap: Let TUNSETOFFLOAD actually controll offload features.

Message ID 1371653272-11703-2-git-send-email-vyasevic@redhat.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Vlad Yasevich June 19, 2013, 2:47 p.m. UTC
When the user issues TUNSETOFFLOAD ioctl, macvtap does not do
anything other then to verify arguments.  This patch adds
functionality to allow users to actually control offload features.
NETIF_F_GSO and NETIF_F_GRO are always on, but the rest of the
features can be controlled.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
---
 drivers/net/macvlan.c      |  9 +++++++++
 drivers/net/macvtap.c      | 41 ++++++++++++++++++++++++++++++++++++++++-
 include/linux/if_macvlan.h |  1 +
 3 files changed, 50 insertions(+), 1 deletion(-)

Comments

Michael S. Tsirkin June 19, 2013, 3:16 p.m. UTC | #1
On Wed, Jun 19, 2013 at 10:47:51AM -0400, Vlad Yasevich wrote:
> When the user issues TUNSETOFFLOAD ioctl, macvtap does not do
> anything other then to verify arguments.  This patch adds
> functionality to allow users to actually control offload features.
> NETIF_F_GSO and NETIF_F_GRO are always on, but the rest of the
> features can be controlled.
> 
> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
> ---
>  drivers/net/macvlan.c      |  9 +++++++++
>  drivers/net/macvtap.c      | 41 ++++++++++++++++++++++++++++++++++++++++-
>  include/linux/if_macvlan.h |  1 +
>  3 files changed, 50 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
> index edfddc5..fa47415 100644
> --- a/drivers/net/macvlan.c
> +++ b/drivers/net/macvlan.c
> @@ -638,6 +638,14 @@ static int macvlan_ethtool_get_settings(struct net_device *dev,
>  	return __ethtool_get_settings(vlan->lowerdev, cmd);
>  }
>  
> +static netdev_features_t macvlan_fix_features(struct net_device *dev,
> +					      netdev_features_t features)
> +{
> +	struct macvlan_dev *vlan = netdev_priv(dev);
> +
> +	return (features & vlan->set_features) | (features & ~MACVLAN_FEATURES);

A bit clearer as

> +	return features & (vlan->set_features | ~MACVLAN_FEATURES);

> +}
> +
>  static const struct ethtool_ops macvlan_ethtool_ops = {
>  	.get_link		= ethtool_op_get_link,
>  	.get_settings		= macvlan_ethtool_get_settings,
> @@ -651,6 +659,7 @@ static const struct net_device_ops macvlan_netdev_ops = {
>  	.ndo_stop		= macvlan_stop,
>  	.ndo_start_xmit		= macvlan_start_xmit,
>  	.ndo_change_mtu		= macvlan_change_mtu,
> +	.ndo_fix_features	= macvlan_fix_features,
>  	.ndo_change_rx_flags	= macvlan_change_rx_flags,
>  	.ndo_set_mac_address	= macvlan_set_mac_address,
>  	.ndo_set_rx_mode	= macvlan_set_mac_lists,
> diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
> index 5a76f20..09f0b1f 100644
> --- a/drivers/net/macvtap.c
> +++ b/drivers/net/macvtap.c
> @@ -976,6 +976,45 @@ static int macvtap_ioctl_set_queue(struct file *file, unsigned int flags)
>  	return ret;
>  }
>  
> +static int set_offload(struct macvtap_queue *q, unsigned long arg)
> +{
> +	struct macvlan_dev *vlan;
> +	netdev_features_t features = NETIF_F_GSO|NETIF_F_GRO;
> +	int err = 0;
> +
> +	if (arg & TUN_F_CSUM) {
> +		features = NETIF_F_HW_CSUM;
> +
> +		if (arg & (TUN_F_TSO4 | TUN_F_TSO6)) {
> +			if (arg & TUN_F_TSO_ECN)
> +				features |= NETIF_F_TSO_ECN;
> +			if (arg & TUN_F_TSO4)
> +				features |= NETIF_F_TSO;
> +			if (arg & TUN_F_TSO6)
> +				features |= NETIF_F_TSO6;
> +		}
> +
> +		if (arg & TUN_F_UFO)
> +			features |= NETIF_F_UFO;

Hmm this looks strange. The meaning of offloads
with tun is exactly the reverse from vlan/macvtap.

For example, assume that you disable TSO.
For tun this means: "don't send TSO packets to userspace".
What this patch makes it mean for macvtap is
"don't send TSO packets from userspace on the network".

So, userspace using this ioctl
to control tun would get a surprising result.

> +	}
> +
> +	rtnl_lock();
> +	rcu_read_lock_bh();
> +	vlan = rcu_dereference_bh(q->vlan);
> +	if (!vlan) {
> +		err = -ENOLINK;
> +		goto unlock;
> +	}
> +
> +	vlan->set_features = features;
> +	netdev_update_features(vlan->dev);
> +
> +unlock:
> +	rcu_read_unlock_bh();
> +	rtnl_unlock();
> +	return err;
> +}
> +
>  /*
>   * provide compatibility with generic tun/tap interface
>   */
> @@ -1062,7 +1101,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
>  			 got enabled for forwarded frames */
>  		if (!(q->flags & IFF_VNET_HDR))
>  			return  -EINVAL;
> -		return 0;
> +		return set_offload(q, arg);
>  
>  	default:
>  		return -EINVAL;
> diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
> index f49a9f6..e446e82 100644
> --- a/include/linux/if_macvlan.h
> +++ b/include/linux/if_macvlan.h
> @@ -65,6 +65,7 @@ struct macvlan_dev {
>  
>  	DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
>  
> +	netdev_features_t	set_features;
>  	enum macvlan_mode	mode;
>  	u16			flags;
>  	int (*receive)(struct sk_buff *skb);
> -- 
> 1.8.1.4
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet June 19, 2013, 3:17 p.m. UTC | #2
On Wed, 2013-06-19 at 10:47 -0400, Vlad Yasevich wrote:
> When the user issues TUNSETOFFLOAD ioctl, macvtap does not do
> anything other then to verify arguments.  This patch adds
> functionality to allow users to actually control offload features.
> NETIF_F_GSO and NETIF_F_GRO are always on, but the rest of the
> features can be controlled.
> 
> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
> ---
>  drivers/net/macvlan.c      |  9 +++++++++
>  drivers/net/macvtap.c      | 41 ++++++++++++++++++++++++++++++++++++++++-
>  include/linux/if_macvlan.h |  1 +
>  3 files changed, 50 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
> index edfddc5..fa47415 100644
> --- a/drivers/net/macvlan.c
> +++ b/drivers/net/macvlan.c
> @@ -638,6 +638,14 @@ static int macvlan_ethtool_get_settings(struct net_device *dev,
>  	return __ethtool_get_settings(vlan->lowerdev, cmd);
>  }
>  
> +static netdev_features_t macvlan_fix_features(struct net_device *dev,
> +					      netdev_features_t features)
> +{
> +	struct macvlan_dev *vlan = netdev_priv(dev);
> +
> +	return (features & vlan->set_features) | (features & ~MACVLAN_FEATURES);
> +}
> +
>  static const struct ethtool_ops macvlan_ethtool_ops = {
>  	.get_link		= ethtool_op_get_link,
>  	.get_settings		= macvlan_ethtool_get_settings,
> @@ -651,6 +659,7 @@ static const struct net_device_ops macvlan_netdev_ops = {
>  	.ndo_stop		= macvlan_stop,
>  	.ndo_start_xmit		= macvlan_start_xmit,
>  	.ndo_change_mtu		= macvlan_change_mtu,
> +	.ndo_fix_features	= macvlan_fix_features,
>  	.ndo_change_rx_flags	= macvlan_change_rx_flags,
>  	.ndo_set_mac_address	= macvlan_set_mac_address,
>  	.ndo_set_rx_mode	= macvlan_set_mac_lists,
> diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
> index 5a76f20..09f0b1f 100644
> --- a/drivers/net/macvtap.c
> +++ b/drivers/net/macvtap.c
> @@ -976,6 +976,45 @@ static int macvtap_ioctl_set_queue(struct file *file, unsigned int flags)
>  	return ret;
>  }
>  
> +static int set_offload(struct macvtap_queue *q, unsigned long arg)
> +{
> +	struct macvlan_dev *vlan;
> +	netdev_features_t features = NETIF_F_GSO|NETIF_F_GRO;
> +	int err = 0;
> +
> +	if (arg & TUN_F_CSUM) {
> +		features = NETIF_F_HW_CSUM;
> +
> +		if (arg & (TUN_F_TSO4 | TUN_F_TSO6)) {
> +			if (arg & TUN_F_TSO_ECN)
> +				features |= NETIF_F_TSO_ECN;
> +			if (arg & TUN_F_TSO4)
> +				features |= NETIF_F_TSO;
> +			if (arg & TUN_F_TSO6)
> +				features |= NETIF_F_TSO6;
> +		}
> +
> +		if (arg & TUN_F_UFO)
> +			features |= NETIF_F_UFO;
> +	}
> +
> +	rtnl_lock();
> +	rcu_read_lock_bh();

This looks wrong/suspect to me.

Once RTNL is owned, you should not need rcu_read_lock_bh()

(A BH handler will not change q->vlan )

BTW, it looks like ->vlan is protected by macvtap_lock

> +	vlan = rcu_dereference_bh(q->vlan);

vlan = rtnl_dereference(q->vlan);

> +	if (!vlan) {
> +		err = -ENOLINK;
> +		goto unlock;
> +	}
> +
> +	vlan->set_features = features;
> +	netdev_update_features(vlan->dev);

Can this really be called with BH disabled ?

> +
> +unlock:
> +	rcu_read_unlock_bh();
> +	rtnl_unlock();
> +	return err;
> +}
> +



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Yasevich June 19, 2013, 3:26 p.m. UTC | #3
On 06/19/2013 11:17 AM, Eric Dumazet wrote:
> On Wed, 2013-06-19 at 10:47 -0400, Vlad Yasevich wrote:
>> When the user issues TUNSETOFFLOAD ioctl, macvtap does not do
>> anything other then to verify arguments.  This patch adds
>> functionality to allow users to actually control offload features.
>> NETIF_F_GSO and NETIF_F_GRO are always on, but the rest of the
>> features can be controlled.
>>
>> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
>> ---
>>   drivers/net/macvlan.c      |  9 +++++++++
>>   drivers/net/macvtap.c      | 41 ++++++++++++++++++++++++++++++++++++++++-
>>   include/linux/if_macvlan.h |  1 +
>>   3 files changed, 50 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
>> index edfddc5..fa47415 100644
>> --- a/drivers/net/macvlan.c
>> +++ b/drivers/net/macvlan.c
>> @@ -638,6 +638,14 @@ static int macvlan_ethtool_get_settings(struct net_device *dev,
>>   	return __ethtool_get_settings(vlan->lowerdev, cmd);
>>   }
>>
>> +static netdev_features_t macvlan_fix_features(struct net_device *dev,
>> +					      netdev_features_t features)
>> +{
>> +	struct macvlan_dev *vlan = netdev_priv(dev);
>> +
>> +	return (features & vlan->set_features) | (features & ~MACVLAN_FEATURES);
>> +}
>> +
>>   static const struct ethtool_ops macvlan_ethtool_ops = {
>>   	.get_link		= ethtool_op_get_link,
>>   	.get_settings		= macvlan_ethtool_get_settings,
>> @@ -651,6 +659,7 @@ static const struct net_device_ops macvlan_netdev_ops = {
>>   	.ndo_stop		= macvlan_stop,
>>   	.ndo_start_xmit		= macvlan_start_xmit,
>>   	.ndo_change_mtu		= macvlan_change_mtu,
>> +	.ndo_fix_features	= macvlan_fix_features,
>>   	.ndo_change_rx_flags	= macvlan_change_rx_flags,
>>   	.ndo_set_mac_address	= macvlan_set_mac_address,
>>   	.ndo_set_rx_mode	= macvlan_set_mac_lists,
>> diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
>> index 5a76f20..09f0b1f 100644
>> --- a/drivers/net/macvtap.c
>> +++ b/drivers/net/macvtap.c
>> @@ -976,6 +976,45 @@ static int macvtap_ioctl_set_queue(struct file *file, unsigned int flags)
>>   	return ret;
>>   }
>>
>> +static int set_offload(struct macvtap_queue *q, unsigned long arg)
>> +{
>> +	struct macvlan_dev *vlan;
>> +	netdev_features_t features = NETIF_F_GSO|NETIF_F_GRO;
>> +	int err = 0;
>> +
>> +	if (arg & TUN_F_CSUM) {
>> +		features = NETIF_F_HW_CSUM;
>> +
>> +		if (arg & (TUN_F_TSO4 | TUN_F_TSO6)) {
>> +			if (arg & TUN_F_TSO_ECN)
>> +				features |= NETIF_F_TSO_ECN;
>> +			if (arg & TUN_F_TSO4)
>> +				features |= NETIF_F_TSO;
>> +			if (arg & TUN_F_TSO6)
>> +				features |= NETIF_F_TSO6;
>> +		}
>> +
>> +		if (arg & TUN_F_UFO)
>> +			features |= NETIF_F_UFO;
>> +	}
>> +
>> +	rtnl_lock();
>> +	rcu_read_lock_bh();
>
> This looks wrong/suspect to me.
>
> Once RTNL is owned, you should not need rcu_read_lock_bh()
>

I think I do since vlan pointer may change even when I am holding
rtnl.  rtnl is needed to change features.  rcu is needed to get
the vlan pointer.

> (A BH handler will not change q->vlan )

No, but the _bh rcu calls seem to be used when dereferencing q->vlan.
I am not sure the reason for that...

>
> BTW, it looks like ->vlan is protected by macvtap_lock

Right.  This is why I use rcu to get vlan.  rtnl is needed to avoid
asserts in the feature change code.

-vlad

>
>> +	vlan = rcu_dereference_bh(q->vlan);
>
> vlan = rtnl_dereference(q->vlan);
>
>> +	if (!vlan) {
>> +		err = -ENOLINK;
>> +		goto unlock;
>> +	}
>> +
>> +	vlan->set_features = features;
>> +	netdev_update_features(vlan->dev);
>
> Can this really be called with BH disabled ?
>
>> +
>> +unlock:
>> +	rcu_read_unlock_bh();
>> +	rtnl_unlock();
>> +	return err;
>> +}
>> +
>
>
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet June 19, 2013, 3:46 p.m. UTC | #4
On Wed, 2013-06-19 at 11:26 -0400, Vlad Yasevich wrote:

> I think I do since vlan pointer may change even when I am holding
> rtnl.  rtnl is needed to change features.  rcu is needed to get
> the vlan pointer.
> 
> > (A BH handler will not change q->vlan )
> 
> No, but the _bh rcu calls seem to be used when dereferencing q->vlan.
> I am not sure the reason for that...

You mix the reader/fast path, properly using RCU,
and the writer path, using macvtap_lock ( a spinlock ).

That's clear sign you missed something.

> 
> >
> > BTW, it looks like ->vlan is protected by macvtap_lock
> 
> Right.  This is why I use rcu to get vlan.  rtnl is needed to avoid
> asserts in the feature change code.

The management should be allowed to sleep, and rcu_read_lock_bh()
disallows that.

Maybe some driver callback will really sleep and crash after your patch.

vi +69 drivers/net/macvtap.c

/*
 * RCU usage:
 * The macvtap_queue and the macvlan_dev are loosely coupled, the
 * pointers from one to the other can only be read while rcu_read_lock
 * or macvtap_lock is held.

Your patch does not respect the rules of this driver.

macvtap_lock is always acquired from process context, without any need
for _bh variant.

Quite frankly, I would switch this driver to use a mutex for
macvtap_lock. 

And simply remove it, as RTNL is most probably already owned.





--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Yasevich June 19, 2013, 3:47 p.m. UTC | #5
On 06/19/2013 11:16 AM, Michael S. Tsirkin wrote:
> On Wed, Jun 19, 2013 at 10:47:51AM -0400, Vlad Yasevich wrote:
>> When the user issues TUNSETOFFLOAD ioctl, macvtap does not do
>> anything other then to verify arguments.  This patch adds
>> functionality to allow users to actually control offload features.
>> NETIF_F_GSO and NETIF_F_GRO are always on, but the rest of the
>> features can be controlled.
>>
>> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
>> ---
>>   drivers/net/macvlan.c      |  9 +++++++++
>>   drivers/net/macvtap.c      | 41 ++++++++++++++++++++++++++++++++++++++++-
>>   include/linux/if_macvlan.h |  1 +
>>   3 files changed, 50 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
>> index edfddc5..fa47415 100644
>> --- a/drivers/net/macvlan.c
>> +++ b/drivers/net/macvlan.c
>> @@ -638,6 +638,14 @@ static int macvlan_ethtool_get_settings(struct net_device *dev,
>>   	return __ethtool_get_settings(vlan->lowerdev, cmd);
>>   }
>>
>> +static netdev_features_t macvlan_fix_features(struct net_device *dev,
>> +					      netdev_features_t features)
>> +{
>> +	struct macvlan_dev *vlan = netdev_priv(dev);
>> +
>> +	return (features & vlan->set_features) | (features & ~MACVLAN_FEATURES);
>
> A bit clearer as
>
>> +	return features & (vlan->set_features | ~MACVLAN_FEATURES);

OK

>
>> +}
>> +
>>   static const struct ethtool_ops macvlan_ethtool_ops = {
>>   	.get_link		= ethtool_op_get_link,
>>   	.get_settings		= macvlan_ethtool_get_settings,
>> @@ -651,6 +659,7 @@ static const struct net_device_ops macvlan_netdev_ops = {
>>   	.ndo_stop		= macvlan_stop,
>>   	.ndo_start_xmit		= macvlan_start_xmit,
>>   	.ndo_change_mtu		= macvlan_change_mtu,
>> +	.ndo_fix_features	= macvlan_fix_features,
>>   	.ndo_change_rx_flags	= macvlan_change_rx_flags,
>>   	.ndo_set_mac_address	= macvlan_set_mac_address,
>>   	.ndo_set_rx_mode	= macvlan_set_mac_lists,
>> diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
>> index 5a76f20..09f0b1f 100644
>> --- a/drivers/net/macvtap.c
>> +++ b/drivers/net/macvtap.c
>> @@ -976,6 +976,45 @@ static int macvtap_ioctl_set_queue(struct file *file, unsigned int flags)
>>   	return ret;
>>   }
>>
>> +static int set_offload(struct macvtap_queue *q, unsigned long arg)
>> +{
>> +	struct macvlan_dev *vlan;
>> +	netdev_features_t features = NETIF_F_GSO|NETIF_F_GRO;
>> +	int err = 0;
>> +
>> +	if (arg & TUN_F_CSUM) {
>> +		features = NETIF_F_HW_CSUM;
>> +
>> +		if (arg & (TUN_F_TSO4 | TUN_F_TSO6)) {
>> +			if (arg & TUN_F_TSO_ECN)
>> +				features |= NETIF_F_TSO_ECN;
>> +			if (arg & TUN_F_TSO4)
>> +				features |= NETIF_F_TSO;
>> +			if (arg & TUN_F_TSO6)
>> +				features |= NETIF_F_TSO6;
>> +		}
>> +
>> +		if (arg & TUN_F_UFO)
>> +			features |= NETIF_F_UFO;
>
> Hmm this looks strange. The meaning of offloads
> with tun is exactly the reverse from vlan/macvtap.
>
> For example, assume that you disable TSO.
> For tun this means: "don't send TSO packets to userspace".
> What this patch makes it mean for macvtap is
> "don't send TSO packets from userspace on the network".
>

Isn't a user space write to TUN exactly the same as
a user space write to macvtap?  It looks to me like the
are and so features for them would work the same way.

macvlan and macvtap would be different, but I think that's
to be expected.

> So, userspace using this ioctl
> to control tun would get a surprising result.

By surprising do you mean that if user space writes
a TSO packet to a macvtap where TSO is disabled, the TSO
packet is still sent to the network?


>
>> +	}
>> +
>> +	rtnl_lock();
>> +	rcu_read_lock_bh();
>> +	vlan = rcu_dereference_bh(q->vlan);
>> +	if (!vlan) {
>> +		err = -ENOLINK;
>> +		goto unlock;
>> +	}
>> +
>> +	vlan->set_features = features;
>> +	netdev_update_features(vlan->dev);
>> +
>> +unlock:
>> +	rcu_read_unlock_bh();
>> +	rtnl_unlock();
>> +	return err;
>> +}
>> +
>>   /*
>>    * provide compatibility with generic tun/tap interface
>>    */
>> @@ -1062,7 +1101,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
>>   			 got enabled for forwarded frames */
>>   		if (!(q->flags & IFF_VNET_HDR))
>>   			return  -EINVAL;
>> -		return 0;
>> +		return set_offload(q, arg);
>>
>>   	default:
>>   		return -EINVAL;
>> diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
>> index f49a9f6..e446e82 100644
>> --- a/include/linux/if_macvlan.h
>> +++ b/include/linux/if_macvlan.h
>> @@ -65,6 +65,7 @@ struct macvlan_dev {
>>
>>   	DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
>>
>> +	netdev_features_t	set_features;
>>   	enum macvlan_mode	mode;
>>   	u16			flags;
>>   	int (*receive)(struct sk_buff *skb);
>> --
>> 1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin June 19, 2013, 3:55 p.m. UTC | #6
On Wed, Jun 19, 2013 at 11:47:42AM -0400, Vlad Yasevich wrote:
> On 06/19/2013 11:16 AM, Michael S. Tsirkin wrote:
> >On Wed, Jun 19, 2013 at 10:47:51AM -0400, Vlad Yasevich wrote:
> >>When the user issues TUNSETOFFLOAD ioctl, macvtap does not do
> >>anything other then to verify arguments.  This patch adds
> >>functionality to allow users to actually control offload features.
> >>NETIF_F_GSO and NETIF_F_GRO are always on, but the rest of the
> >>features can be controlled.
> >>
> >>Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
> >>---
> >>  drivers/net/macvlan.c      |  9 +++++++++
> >>  drivers/net/macvtap.c      | 41 ++++++++++++++++++++++++++++++++++++++++-
> >>  include/linux/if_macvlan.h |  1 +
> >>  3 files changed, 50 insertions(+), 1 deletion(-)
> >>
> >>diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
> >>index edfddc5..fa47415 100644
> >>--- a/drivers/net/macvlan.c
> >>+++ b/drivers/net/macvlan.c
> >>@@ -638,6 +638,14 @@ static int macvlan_ethtool_get_settings(struct net_device *dev,
> >>  	return __ethtool_get_settings(vlan->lowerdev, cmd);
> >>  }
> >>
> >>+static netdev_features_t macvlan_fix_features(struct net_device *dev,
> >>+					      netdev_features_t features)
> >>+{
> >>+	struct macvlan_dev *vlan = netdev_priv(dev);
> >>+
> >>+	return (features & vlan->set_features) | (features & ~MACVLAN_FEATURES);
> >
> >A bit clearer as
> >
> >>+	return features & (vlan->set_features | ~MACVLAN_FEATURES);
> 
> OK
> 
> >
> >>+}
> >>+
> >>  static const struct ethtool_ops macvlan_ethtool_ops = {
> >>  	.get_link		= ethtool_op_get_link,
> >>  	.get_settings		= macvlan_ethtool_get_settings,
> >>@@ -651,6 +659,7 @@ static const struct net_device_ops macvlan_netdev_ops = {
> >>  	.ndo_stop		= macvlan_stop,
> >>  	.ndo_start_xmit		= macvlan_start_xmit,
> >>  	.ndo_change_mtu		= macvlan_change_mtu,
> >>+	.ndo_fix_features	= macvlan_fix_features,
> >>  	.ndo_change_rx_flags	= macvlan_change_rx_flags,
> >>  	.ndo_set_mac_address	= macvlan_set_mac_address,
> >>  	.ndo_set_rx_mode	= macvlan_set_mac_lists,
> >>diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
> >>index 5a76f20..09f0b1f 100644
> >>--- a/drivers/net/macvtap.c
> >>+++ b/drivers/net/macvtap.c
> >>@@ -976,6 +976,45 @@ static int macvtap_ioctl_set_queue(struct file *file, unsigned int flags)
> >>  	return ret;
> >>  }
> >>
> >>+static int set_offload(struct macvtap_queue *q, unsigned long arg)
> >>+{
> >>+	struct macvlan_dev *vlan;
> >>+	netdev_features_t features = NETIF_F_GSO|NETIF_F_GRO;
> >>+	int err = 0;
> >>+
> >>+	if (arg & TUN_F_CSUM) {
> >>+		features = NETIF_F_HW_CSUM;
> >>+
> >>+		if (arg & (TUN_F_TSO4 | TUN_F_TSO6)) {
> >>+			if (arg & TUN_F_TSO_ECN)
> >>+				features |= NETIF_F_TSO_ECN;
> >>+			if (arg & TUN_F_TSO4)
> >>+				features |= NETIF_F_TSO;
> >>+			if (arg & TUN_F_TSO6)
> >>+				features |= NETIF_F_TSO6;
> >>+		}
> >>+
> >>+		if (arg & TUN_F_UFO)
> >>+			features |= NETIF_F_UFO;
> >
> >Hmm this looks strange. The meaning of offloads
> >with tun is exactly the reverse from vlan/macvtap.
> >
> >For example, assume that you disable TSO.
> >For tun this means: "don't send TSO packets to userspace".
> >What this patch makes it mean for macvtap is
> >"don't send TSO packets from userspace on the network".
> >
> 
> Isn't a user space write to TUN exactly the same as
> a user space write to macvtap?  It looks to me like the
> are and so features for them would work the same way.
> 
> macvlan and macvtap would be different, but I think that's
> to be expected.

They aren't the same.

Userspace write on tun causes a packet to be *received* from tun.
Userspace write on macvtap causes a packet to be *transmitted*
on macvlan.




> >So, userspace using this ioctl
> >to control tun would get a surprising result.
> 
> By surprising do you mean that if user space writes
> a TSO packet to a macvtap where TSO is disabled, the TSO
> packet is still sent to the network?

No.
tun offloads only control packets send to userspace.
When I disable TSO on tun this means
don't send *me* TSO packets. Instead, you try to
mess with packets *received* from me and being
sent outside.

> 
> >
> >>+	}
> >>+
> >>+	rtnl_lock();
> >>+	rcu_read_lock_bh();
> >>+	vlan = rcu_dereference_bh(q->vlan);
> >>+	if (!vlan) {
> >>+		err = -ENOLINK;
> >>+		goto unlock;
> >>+	}
> >>+
> >>+	vlan->set_features = features;
> >>+	netdev_update_features(vlan->dev);
> >>+
> >>+unlock:
> >>+	rcu_read_unlock_bh();
> >>+	rtnl_unlock();
> >>+	return err;
> >>+}
> >>+
> >>  /*
> >>   * provide compatibility with generic tun/tap interface
> >>   */
> >>@@ -1062,7 +1101,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
> >>  			 got enabled for forwarded frames */
> >>  		if (!(q->flags & IFF_VNET_HDR))
> >>  			return  -EINVAL;
> >>-		return 0;
> >>+		return set_offload(q, arg);
> >>
> >>  	default:
> >>  		return -EINVAL;
> >>diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
> >>index f49a9f6..e446e82 100644
> >>--- a/include/linux/if_macvlan.h
> >>+++ b/include/linux/if_macvlan.h
> >>@@ -65,6 +65,7 @@ struct macvlan_dev {
> >>
> >>  	DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
> >>
> >>+	netdev_features_t	set_features;
> >>  	enum macvlan_mode	mode;
> >>  	u16			flags;
> >>  	int (*receive)(struct sk_buff *skb);
> >>--
> >>1.8.1.4
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Yasevich June 19, 2013, 4:20 p.m. UTC | #7
On 06/19/2013 11:46 AM, Eric Dumazet wrote:
> On Wed, 2013-06-19 at 11:26 -0400, Vlad Yasevich wrote:
>
>> I think I do since vlan pointer may change even when I am holding
>> rtnl.  rtnl is needed to change features.  rcu is needed to get
>> the vlan pointer.
>>
>>> (A BH handler will not change q->vlan )
>>
>> No, but the _bh rcu calls seem to be used when dereferencing q->vlan.
>> I am not sure the reason for that...
>
> You mix the reader/fast path, properly using RCU,
> and the writer path, using macvtap_lock ( a spinlock ).
>
> That's clear sign you missed something.
>

I don't think I need macvtap_lock as I am not modifying the relationship
between q and vlan.  I am attempting to modify the macvlan device 
features.  So macvtap_lock does not apply, and _rcu is used.

Looking at the entire macvtap driver, only the _bh variants of rcu
are used throughout the driver,  including in the ioctl() function.  I
am not sure why the driver requires BH to be disabled, but that
seems to be the case.

>>
>>>
>>> BTW, it looks like ->vlan is protected by macvtap_lock
>>
>> Right.  This is why I use rcu to get vlan.  rtnl is needed to avoid
>> asserts in the feature change code.
>
> The management should be allowed to sleep, and rcu_read_lock_bh()
> disallows that.
>
> Maybe some driver callback will really sleep and crash after your patch.
>
> vi +69 drivers/net/macvtap.c
>
> /*
>   * RCU usage:
>   * The macvtap_queue and the macvlan_dev are loosely coupled, the
>   * pointers from one to the other can only be read while rcu_read_lock
>   * or macvtap_lock is held.
>
> Your patch does not respect the rules of this driver.

Why not?  It uses rcu to acquire the pointer thus following the rules. 
The use of  the pointer is within the critical section so we are
guaranteed to have a valid pointer.

>
> macvtap_lock is always acquired from process context, without any need
> for _bh variant.
>

No, the lock is acquired only when modifying the relationship between 
the macvtap_queue and macvtap_dev.


> Quite frankly, I would switch this driver to use a mutex for
> macvtap_lock.
>
> And simply remove it, as RTNL is most probably already owned.
>

That's the issue.  RTNL is not owned in the ioctl case.  In fact
rtnl_lock was added to the patch because RTNL asserts were triggered
when changing device features.

-vlad


>
>
>
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet June 19, 2013, 4:34 p.m. UTC | #8
On Wed, 2013-06-19 at 12:20 -0400, Vlad Yasevich wrote:

> That's the issue.  RTNL is not owned in the ioctl case.

So it was clearly wrong. The fix was simply to get RTNL in ioctl.

Unfortunately this was not spotted earlier, this is not a reason to add
more kludge.

RTNL is the only mutex needed for the write path.

A management path using RCU_BH and RTNL and a spinlock is wrong.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Yasevich June 19, 2013, 5:05 p.m. UTC | #9
On 06/19/2013 11:55 AM, Michael S. Tsirkin wrote:
> On Wed, Jun 19, 2013 at 11:47:42AM -0400, Vlad Yasevich wrote:
>> On 06/19/2013 11:16 AM, Michael S. Tsirkin wrote:
>>> On Wed, Jun 19, 2013 at 10:47:51AM -0400, Vlad Yasevich wrote:
>>>> When the user issues TUNSETOFFLOAD ioctl, macvtap does not do
>>>> anything other then to verify arguments.  This patch adds
>>>> functionality to allow users to actually control offload features.
>>>> NETIF_F_GSO and NETIF_F_GRO are always on, but the rest of the
>>>> features can be controlled.
>>>>
>>>> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
>>>> ---
>>>>   drivers/net/macvlan.c      |  9 +++++++++
>>>>   drivers/net/macvtap.c      | 41 ++++++++++++++++++++++++++++++++++++++++-
>>>>   include/linux/if_macvlan.h |  1 +
>>>>   3 files changed, 50 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
>>>> index edfddc5..fa47415 100644
>>>> --- a/drivers/net/macvlan.c
>>>> +++ b/drivers/net/macvlan.c
>>>> @@ -638,6 +638,14 @@ static int macvlan_ethtool_get_settings(struct net_device *dev,
>>>>   	return __ethtool_get_settings(vlan->lowerdev, cmd);
>>>>   }
>>>>
>>>> +static netdev_features_t macvlan_fix_features(struct net_device *dev,
>>>> +					      netdev_features_t features)
>>>> +{
>>>> +	struct macvlan_dev *vlan = netdev_priv(dev);
>>>> +
>>>> +	return (features & vlan->set_features) | (features & ~MACVLAN_FEATURES);
>>>
>>> A bit clearer as
>>>
>>>> +	return features & (vlan->set_features | ~MACVLAN_FEATURES);
>>
>> OK
>>
>>>
>>>> +}
>>>> +
>>>>   static const struct ethtool_ops macvlan_ethtool_ops = {
>>>>   	.get_link		= ethtool_op_get_link,
>>>>   	.get_settings		= macvlan_ethtool_get_settings,
>>>> @@ -651,6 +659,7 @@ static const struct net_device_ops macvlan_netdev_ops = {
>>>>   	.ndo_stop		= macvlan_stop,
>>>>   	.ndo_start_xmit		= macvlan_start_xmit,
>>>>   	.ndo_change_mtu		= macvlan_change_mtu,
>>>> +	.ndo_fix_features	= macvlan_fix_features,
>>>>   	.ndo_change_rx_flags	= macvlan_change_rx_flags,
>>>>   	.ndo_set_mac_address	= macvlan_set_mac_address,
>>>>   	.ndo_set_rx_mode	= macvlan_set_mac_lists,
>>>> diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
>>>> index 5a76f20..09f0b1f 100644
>>>> --- a/drivers/net/macvtap.c
>>>> +++ b/drivers/net/macvtap.c
>>>> @@ -976,6 +976,45 @@ static int macvtap_ioctl_set_queue(struct file *file, unsigned int flags)
>>>>   	return ret;
>>>>   }
>>>>
>>>> +static int set_offload(struct macvtap_queue *q, unsigned long arg)
>>>> +{
>>>> +	struct macvlan_dev *vlan;
>>>> +	netdev_features_t features = NETIF_F_GSO|NETIF_F_GRO;
>>>> +	int err = 0;
>>>> +
>>>> +	if (arg & TUN_F_CSUM) {
>>>> +		features = NETIF_F_HW_CSUM;
>>>> +
>>>> +		if (arg & (TUN_F_TSO4 | TUN_F_TSO6)) {
>>>> +			if (arg & TUN_F_TSO_ECN)
>>>> +				features |= NETIF_F_TSO_ECN;
>>>> +			if (arg & TUN_F_TSO4)
>>>> +				features |= NETIF_F_TSO;
>>>> +			if (arg & TUN_F_TSO6)
>>>> +				features |= NETIF_F_TSO6;
>>>> +		}
>>>> +
>>>> +		if (arg & TUN_F_UFO)
>>>> +			features |= NETIF_F_UFO;
>>>
>>> Hmm this looks strange. The meaning of offloads
>>> with tun is exactly the reverse from vlan/macvtap.
>>>
>>> For example, assume that you disable TSO.
>>> For tun this means: "don't send TSO packets to userspace".
>>> What this patch makes it mean for macvtap is
>>> "don't send TSO packets from userspace on the network".
>>>
>>
>> Isn't a user space write to TUN exactly the same as
>> a user space write to macvtap?  It looks to me like the
>> are and so features for them would work the same way.
>>
>> macvlan and macvtap would be different, but I think that's
>> to be expected.
>
> They aren't the same.
>
> Userspace write on tun causes a packet to be *received* from tun.
> Userspace write on macvtap causes a packet to be *transmitted*
> on macvlan.
>
>
>
>
>>> So, userspace using this ioctl
>>> to control tun would get a surprising result.
>>
>> By surprising do you mean that if user space writes
>> a TSO packet to a macvtap where TSO is disabled, the TSO
>> packet is still sent to the network?
>
> No.
> tun offloads only control packets send to userspace.
> When I disable TSO on tun this means
> don't send *me* TSO packets. Instead, you try to
> mess with packets *received* from me and being
> sent outside.

Ok, I see how that might be the perception, but that
is not what is actually happening.

In actuality, transmitted packets are not messed with because
macvtap does not perform any offload checks on _transmit_.
It passed the user specified packet to lower level device
to deal with is that sees fit.  As a result any user specified
offloads are kept and it is possible to set a TSO packet on
a TSO-disabled macvtap just like it is possible to do so on tun.

If you really don't like me mucking around with device features
then how about the following:
  1) macvlan_dev gets a new tap feature set.
  2) TUNSETOFFLOAD adjusts the above feature set.
  3) The feature set is retrievable with a new ioclt TUNGETOFFLOAD
  4) These tap features are consulted in macvtap_forward.

This does not invert the notion of device features for macvtap, but
it does make it harder to see which features the user of macvtap has
disabled.

-vlad
>
>>
>>>
>>>> +	}
>>>> +
>>>> +	rtnl_lock();
>>>> +	rcu_read_lock_bh();
>>>> +	vlan = rcu_dereference_bh(q->vlan);
>>>> +	if (!vlan) {
>>>> +		err = -ENOLINK;
>>>> +		goto unlock;
>>>> +	}
>>>> +
>>>> +	vlan->set_features = features;
>>>> +	netdev_update_features(vlan->dev);
>>>> +
>>>> +unlock:
>>>> +	rcu_read_unlock_bh();
>>>> +	rtnl_unlock();
>>>> +	return err;
>>>> +}
>>>> +
>>>>   /*
>>>>    * provide compatibility with generic tun/tap interface
>>>>    */
>>>> @@ -1062,7 +1101,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
>>>>   			 got enabled for forwarded frames */
>>>>   		if (!(q->flags & IFF_VNET_HDR))
>>>>   			return  -EINVAL;
>>>> -		return 0;
>>>> +		return set_offload(q, arg);
>>>>
>>>>   	default:
>>>>   		return -EINVAL;
>>>> diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
>>>> index f49a9f6..e446e82 100644
>>>> --- a/include/linux/if_macvlan.h
>>>> +++ b/include/linux/if_macvlan.h
>>>> @@ -65,6 +65,7 @@ struct macvlan_dev {
>>>>
>>>>   	DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
>>>>
>>>> +	netdev_features_t	set_features;
>>>>   	enum macvlan_mode	mode;
>>>>   	u16			flags;
>>>>   	int (*receive)(struct sk_buff *skb);
>>>> --
>>>> 1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin June 19, 2013, 6:17 p.m. UTC | #10
On Wed, Jun 19, 2013 at 01:05:20PM -0400, Vlad Yasevich wrote:
> On 06/19/2013 11:55 AM, Michael S. Tsirkin wrote:
> >On Wed, Jun 19, 2013 at 11:47:42AM -0400, Vlad Yasevich wrote:
> >>On 06/19/2013 11:16 AM, Michael S. Tsirkin wrote:
> >>>On Wed, Jun 19, 2013 at 10:47:51AM -0400, Vlad Yasevich wrote:
> >>>>When the user issues TUNSETOFFLOAD ioctl, macvtap does not do
> >>>>anything other then to verify arguments.  This patch adds
> >>>>functionality to allow users to actually control offload features.
> >>>>NETIF_F_GSO and NETIF_F_GRO are always on, but the rest of the
> >>>>features can be controlled.
> >>>>
> >>>>Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
> >>>>---
> >>>>  drivers/net/macvlan.c      |  9 +++++++++
> >>>>  drivers/net/macvtap.c      | 41 ++++++++++++++++++++++++++++++++++++++++-
> >>>>  include/linux/if_macvlan.h |  1 +
> >>>>  3 files changed, 50 insertions(+), 1 deletion(-)
> >>>>
> >>>>diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
> >>>>index edfddc5..fa47415 100644
> >>>>--- a/drivers/net/macvlan.c
> >>>>+++ b/drivers/net/macvlan.c
> >>>>@@ -638,6 +638,14 @@ static int macvlan_ethtool_get_settings(struct net_device *dev,
> >>>>  	return __ethtool_get_settings(vlan->lowerdev, cmd);
> >>>>  }
> >>>>
> >>>>+static netdev_features_t macvlan_fix_features(struct net_device *dev,
> >>>>+					      netdev_features_t features)
> >>>>+{
> >>>>+	struct macvlan_dev *vlan = netdev_priv(dev);
> >>>>+
> >>>>+	return (features & vlan->set_features) | (features & ~MACVLAN_FEATURES);
> >>>
> >>>A bit clearer as
> >>>
> >>>>+	return features & (vlan->set_features | ~MACVLAN_FEATURES);
> >>
> >>OK
> >>
> >>>
> >>>>+}
> >>>>+
> >>>>  static const struct ethtool_ops macvlan_ethtool_ops = {
> >>>>  	.get_link		= ethtool_op_get_link,
> >>>>  	.get_settings		= macvlan_ethtool_get_settings,
> >>>>@@ -651,6 +659,7 @@ static const struct net_device_ops macvlan_netdev_ops = {
> >>>>  	.ndo_stop		= macvlan_stop,
> >>>>  	.ndo_start_xmit		= macvlan_start_xmit,
> >>>>  	.ndo_change_mtu		= macvlan_change_mtu,
> >>>>+	.ndo_fix_features	= macvlan_fix_features,
> >>>>  	.ndo_change_rx_flags	= macvlan_change_rx_flags,
> >>>>  	.ndo_set_mac_address	= macvlan_set_mac_address,
> >>>>  	.ndo_set_rx_mode	= macvlan_set_mac_lists,
> >>>>diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
> >>>>index 5a76f20..09f0b1f 100644
> >>>>--- a/drivers/net/macvtap.c
> >>>>+++ b/drivers/net/macvtap.c
> >>>>@@ -976,6 +976,45 @@ static int macvtap_ioctl_set_queue(struct file *file, unsigned int flags)
> >>>>  	return ret;
> >>>>  }
> >>>>
> >>>>+static int set_offload(struct macvtap_queue *q, unsigned long arg)
> >>>>+{
> >>>>+	struct macvlan_dev *vlan;
> >>>>+	netdev_features_t features = NETIF_F_GSO|NETIF_F_GRO;
> >>>>+	int err = 0;
> >>>>+
> >>>>+	if (arg & TUN_F_CSUM) {
> >>>>+		features = NETIF_F_HW_CSUM;
> >>>>+
> >>>>+		if (arg & (TUN_F_TSO4 | TUN_F_TSO6)) {
> >>>>+			if (arg & TUN_F_TSO_ECN)
> >>>>+				features |= NETIF_F_TSO_ECN;
> >>>>+			if (arg & TUN_F_TSO4)
> >>>>+				features |= NETIF_F_TSO;
> >>>>+			if (arg & TUN_F_TSO6)
> >>>>+				features |= NETIF_F_TSO6;
> >>>>+		}
> >>>>+
> >>>>+		if (arg & TUN_F_UFO)
> >>>>+			features |= NETIF_F_UFO;
> >>>
> >>>Hmm this looks strange. The meaning of offloads
> >>>with tun is exactly the reverse from vlan/macvtap.
> >>>
> >>>For example, assume that you disable TSO.
> >>>For tun this means: "don't send TSO packets to userspace".
> >>>What this patch makes it mean for macvtap is
> >>>"don't send TSO packets from userspace on the network".
> >>>
> >>
> >>Isn't a user space write to TUN exactly the same as
> >>a user space write to macvtap?  It looks to me like the
> >>are and so features for them would work the same way.
> >>
> >>macvlan and macvtap would be different, but I think that's
> >>to be expected.
> >
> >They aren't the same.
> >
> >Userspace write on tun causes a packet to be *received* from tun.
> >Userspace write on macvtap causes a packet to be *transmitted*
> >on macvlan.
> >
> >
> >
> >
> >>>So, userspace using this ioctl
> >>>to control tun would get a surprising result.
> >>
> >>By surprising do you mean that if user space writes
> >>a TSO packet to a macvtap where TSO is disabled, the TSO
> >>packet is still sent to the network?
> >
> >No.
> >tun offloads only control packets send to userspace.
> >When I disable TSO on tun this means
> >don't send *me* TSO packets. Instead, you try to
> >mess with packets *received* from me and being
> >sent outside.
> 
> Ok, I see how that might be the perception, but that
> is not what is actually happening.
> 
> In actuality, transmitted packets are not messed with
> because
> macvtap does not perform any offload checks on _transmit_.
> It passed the user specified packet to lower level device
> to deal with is that sees fit.

For example, if there's no checksum, and TX checksum offload
is off, won't that calculate the checksum?
That's messing with packets.

>  As a result any user specified
> offloads are kept and it is possible to set a TSO packet on
> a TSO-disabled macvtap just like it is possible to do so on tun.

If you disable TSO in tun, userspace won't
get any TSO packets. It's broken in macvtap and your patch
does not fix it.

> 
> If you really don't like me mucking around with device features
> then how about the following:
>  1) macvlan_dev gets a new tap feature set.
>  2) TUNSETOFFLOAD adjusts the above feature set.
>  3) The feature set is retrievable with a new ioclt TUNGETOFFLOAD
>  4) These tap features are consulted in macvtap_forward.
> 
> This does not invert the notion of device features for macvtap, but
> it does make it harder to see which features the user of macvtap has
> disabled.
> 
> -vlad

The issue I mentioned is that you got the direction wrong.

> >
> >>
> >>>
> >>>>+	}
> >>>>+
> >>>>+	rtnl_lock();
> >>>>+	rcu_read_lock_bh();
> >>>>+	vlan = rcu_dereference_bh(q->vlan);
> >>>>+	if (!vlan) {
> >>>>+		err = -ENOLINK;
> >>>>+		goto unlock;
> >>>>+	}
> >>>>+
> >>>>+	vlan->set_features = features;
> >>>>+	netdev_update_features(vlan->dev);
> >>>>+
> >>>>+unlock:
> >>>>+	rcu_read_unlock_bh();
> >>>>+	rtnl_unlock();
> >>>>+	return err;
> >>>>+}
> >>>>+
> >>>>  /*
> >>>>   * provide compatibility with generic tun/tap interface
> >>>>   */
> >>>>@@ -1062,7 +1101,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
> >>>>  			 got enabled for forwarded frames */
> >>>>  		if (!(q->flags & IFF_VNET_HDR))
> >>>>  			return  -EINVAL;
> >>>>-		return 0;
> >>>>+		return set_offload(q, arg);
> >>>>
> >>>>  	default:
> >>>>  		return -EINVAL;
> >>>>diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
> >>>>index f49a9f6..e446e82 100644
> >>>>--- a/include/linux/if_macvlan.h
> >>>>+++ b/include/linux/if_macvlan.h
> >>>>@@ -65,6 +65,7 @@ struct macvlan_dev {
> >>>>
> >>>>  	DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
> >>>>
> >>>>+	netdev_features_t	set_features;
> >>>>  	enum macvlan_mode	mode;
> >>>>  	u16			flags;
> >>>>  	int (*receive)(struct sk_buff *skb);
> >>>>--
> >>>>1.8.1.4
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Yasevich June 19, 2013, 6:59 p.m. UTC | #11
Arnd

MST suggested I add you.  Do you remember the reason
why macvtap uses rcu_read_lock_bh() instead of plain
rcu_read_lock()?  Additionally it seems to use
synchronize_rcu(), not the _bh() version.

Thanks
-vlad

On 06/19/2013 12:20 PM, Vlad Yasevich wrote:
> On 06/19/2013 11:46 AM, Eric Dumazet wrote:
>> On Wed, 2013-06-19 at 11:26 -0400, Vlad Yasevich wrote:
>>
>>> I think I do since vlan pointer may change even when I am holding
>>> rtnl.  rtnl is needed to change features.  rcu is needed to get
>>> the vlan pointer.
>>>
>>>> (A BH handler will not change q->vlan )
>>>
>>> No, but the _bh rcu calls seem to be used when dereferencing q->vlan.
>>> I am not sure the reason for that...
>>
>> You mix the reader/fast path, properly using RCU,
>> and the writer path, using macvtap_lock ( a spinlock ).
>>
>> That's clear sign you missed something.
>>
>
> I don't think I need macvtap_lock as I am not modifying the relationship
> between q and vlan.  I am attempting to modify the macvlan device
> features.  So macvtap_lock does not apply, and _rcu is used.
>
> Looking at the entire macvtap driver, only the _bh variants of rcu
> are used throughout the driver,  including in the ioctl() function.  I
> am not sure why the driver requires BH to be disabled, but that
> seems to be the case.
>
>>>
>>>>
>>>> BTW, it looks like ->vlan is protected by macvtap_lock
>>>
>>> Right.  This is why I use rcu to get vlan.  rtnl is needed to avoid
>>> asserts in the feature change code.
>>
>> The management should be allowed to sleep, and rcu_read_lock_bh()
>> disallows that.
>>
>> Maybe some driver callback will really sleep and crash after your patch.
>>
>> vi +69 drivers/net/macvtap.c
>>
>> /*
>>   * RCU usage:
>>   * The macvtap_queue and the macvlan_dev are loosely coupled, the
>>   * pointers from one to the other can only be read while rcu_read_lock
>>   * or macvtap_lock is held.
>>
>> Your patch does not respect the rules of this driver.
>
> Why not?  It uses rcu to acquire the pointer thus following the rules.
> The use of  the pointer is within the critical section so we are
> guaranteed to have a valid pointer.
>
>>
>> macvtap_lock is always acquired from process context, without any need
>> for _bh variant.
>>
>
> No, the lock is acquired only when modifying the relationship between
> the macvtap_queue and macvtap_dev.
>
>
>> Quite frankly, I would switch this driver to use a mutex for
>> macvtap_lock.
>>
>> And simply remove it, as RTNL is most probably already owned.
>>
>
> That's the issue.  RTNL is not owned in the ioctl case.  In fact
> rtnl_lock was added to the patch because RTNL asserts were triggered
> when changing device features.
>
> -vlad
>
>
>>
>>
>>
>>
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann June 19, 2013, 10:38 p.m. UTC | #12
On Wednesday 19 June 2013, Vlad Yasevich wrote:
> Arnd
> 
> MST suggested I add you.  Do you remember the reason
> why macvtap uses rcu_read_lock_bh() instead of plain
> rcu_read_lock()?  Additionally it seems to use
> synchronize_rcu(), not the _bh() version.

I don't actually remember, but looking back at the git history, it
seemst to come from one of the earliest versions of the code, and
the locking was changed soon after that. Originally I needed
rcu_read_lock for any function called from the network stack,
which is equivalent to rcu_read_lock_bh as it is run from the
network softirq. Using rcu_read_lock_bh for functions called from
the chardev file operations might not be necessary but was
consistent at the time.

Looking at the state now, I think calling synchronize_rcu()
instead of synchronize_rcu_bh() is not a bug but implies
a longer grace period than necessary (I'm not sure about that)
and extra overhead from disabling softirqs in rcu_read_lock.
It's probably a good idea to revisit this and do it right.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index edfddc5..fa47415 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -638,6 +638,14 @@  static int macvlan_ethtool_get_settings(struct net_device *dev,
 	return __ethtool_get_settings(vlan->lowerdev, cmd);
 }
 
+static netdev_features_t macvlan_fix_features(struct net_device *dev,
+					      netdev_features_t features)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+
+	return (features & vlan->set_features) | (features & ~MACVLAN_FEATURES);
+}
+
 static const struct ethtool_ops macvlan_ethtool_ops = {
 	.get_link		= ethtool_op_get_link,
 	.get_settings		= macvlan_ethtool_get_settings,
@@ -651,6 +659,7 @@  static const struct net_device_ops macvlan_netdev_ops = {
 	.ndo_stop		= macvlan_stop,
 	.ndo_start_xmit		= macvlan_start_xmit,
 	.ndo_change_mtu		= macvlan_change_mtu,
+	.ndo_fix_features	= macvlan_fix_features,
 	.ndo_change_rx_flags	= macvlan_change_rx_flags,
 	.ndo_set_mac_address	= macvlan_set_mac_address,
 	.ndo_set_rx_mode	= macvlan_set_mac_lists,
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 5a76f20..09f0b1f 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -976,6 +976,45 @@  static int macvtap_ioctl_set_queue(struct file *file, unsigned int flags)
 	return ret;
 }
 
+static int set_offload(struct macvtap_queue *q, unsigned long arg)
+{
+	struct macvlan_dev *vlan;
+	netdev_features_t features = NETIF_F_GSO|NETIF_F_GRO;
+	int err = 0;
+
+	if (arg & TUN_F_CSUM) {
+		features = NETIF_F_HW_CSUM;
+
+		if (arg & (TUN_F_TSO4 | TUN_F_TSO6)) {
+			if (arg & TUN_F_TSO_ECN)
+				features |= NETIF_F_TSO_ECN;
+			if (arg & TUN_F_TSO4)
+				features |= NETIF_F_TSO;
+			if (arg & TUN_F_TSO6)
+				features |= NETIF_F_TSO6;
+		}
+
+		if (arg & TUN_F_UFO)
+			features |= NETIF_F_UFO;
+	}
+
+	rtnl_lock();
+	rcu_read_lock_bh();
+	vlan = rcu_dereference_bh(q->vlan);
+	if (!vlan) {
+		err = -ENOLINK;
+		goto unlock;
+	}
+
+	vlan->set_features = features;
+	netdev_update_features(vlan->dev);
+
+unlock:
+	rcu_read_unlock_bh();
+	rtnl_unlock();
+	return err;
+}
+
 /*
  * provide compatibility with generic tun/tap interface
  */
@@ -1062,7 +1101,7 @@  static long macvtap_ioctl(struct file *file, unsigned int cmd,
 			 got enabled for forwarded frames */
 		if (!(q->flags & IFF_VNET_HDR))
 			return  -EINVAL;
-		return 0;
+		return set_offload(q, arg);
 
 	default:
 		return -EINVAL;
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index f49a9f6..e446e82 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -65,6 +65,7 @@  struct macvlan_dev {
 
 	DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
 
+	netdev_features_t	set_features;
 	enum macvlan_mode	mode;
 	u16			flags;
 	int (*receive)(struct sk_buff *skb);