diff mbox series

[net-next,3/4] bridge: support binding vlan dev link state to vlan member bridge ports

Message ID 20190402153543.6277-4-mmanning@vyatta.att-mail.com
State Changes Requested
Delegated to: David Miller
Headers show
Series net: support binding vlan dev link state to vlan member bridge ports | expand

Commit Message

Mike Manning April 2, 2019, 3:35 p.m. UTC
In the case of vlan filtering on bridges, the bridge may also have the
corresponding vlan devices as upper devices. A vlan bridge binding mode
is added to allow the link state of the vlan device to track only the
state of the subset of bridge ports that are also members of the vlan,
rather than that of all bridge ports. This mode is set with a vlan flag
rather than a bridge sysfs so that the 8021q module is aware that it
should not set the link state for the vlan device.

If bridge vlan is configured, the bridge device event handling results
in the link state for an upper device being set, if it is a vlan device
with the vlan bridge binding mode enabled. This also sets a
vlan_bridge_binding flag so that subsequent UP/DOWN/CHANGE events for
the ports in that bridge result in a link state update of the vlan
device if required.

The link state of the vlan device is up if there is at least one bridge
port that is a vlan member that is admin & oper up, otherwise its oper
state is IF_OPER_LOWERLAYERDOWN.

Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com>
---
 net/bridge/br.c         |  23 ++++++--
 net/bridge/br_private.h |  17 ++++++
 net/bridge/br_vlan.c    | 143 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 179 insertions(+), 4 deletions(-)

Comments

Nikolay Aleksandrov April 2, 2019, 7:22 p.m. UTC | #1
On 02/04/2019 18:35, Mike Manning wrote:
> In the case of vlan filtering on bridges, the bridge may also have the
> corresponding vlan devices as upper devices. A vlan bridge binding mode
> is added to allow the link state of the vlan device to track only the
> state of the subset of bridge ports that are also members of the vlan,
> rather than that of all bridge ports. This mode is set with a vlan flag
> rather than a bridge sysfs so that the 8021q module is aware that it
> should not set the link state for the vlan device.
> 
> If bridge vlan is configured, the bridge device event handling results
> in the link state for an upper device being set, if it is a vlan device
> with the vlan bridge binding mode enabled. This also sets a
> vlan_bridge_binding flag so that subsequent UP/DOWN/CHANGE events for
> the ports in that bridge result in a link state update of the vlan
> device if required.
> 
> The link state of the vlan device is up if there is at least one bridge
> port that is a vlan member that is admin & oper up, otherwise its oper
> state is IF_OPER_LOWERLAYERDOWN.
> 
> Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com>
> ---
>  net/bridge/br.c         |  23 ++++++--
>  net/bridge/br_private.h |  17 ++++++
>  net/bridge/br_vlan.c    | 143 ++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 179 insertions(+), 4 deletions(-)
> 

Hi,
Please CC bridge maintainers when sending bridge patches.
One question/thought - can't we add a ports_up counter in the vlan's master
struct and keep how many ports are up for that vlan ?
The important part would be to keep it correct, i.e. vlan_add/del should inc/dec
as well as port up/down. Then we can directly update its carrier on port event
without doing a possible O(n^2) walk, we just need to walk over the port vlans
and adjust counters which is always O(n) based on num of that port's vlans.

Some more comments below.


> diff --git a/net/bridge/br.c b/net/bridge/br.c
> index a5174e5001d8..b80cd5ccd590 100644
> --- a/net/bridge/br.c
> +++ b/net/bridge/br.c
> @@ -40,10 +40,21 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
>  	bool changed_addr;
>  	int err;
>  
> -	/* register of bridge completed, add sysfs entries */
> -	if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) {
> -		br_sysfs_addbr(dev);
> -		return NOTIFY_DONE;
> +	if (dev->priv_flags & IFF_EBRIDGE) {
> +		if (event == NETDEV_REGISTER) {
> +			/* register of bridge completed, add sysfs entries */
> +			br_sysfs_addbr(dev);
> +			return NOTIFY_DONE;
> +		}
> +#ifdef CONFIG_BRIDGE_VLAN_FILTERING
> +		if (event == NETDEV_CHANGEUPPER) {
> +			struct netdev_notifier_changeupper_info *info = ptr;
> +
> +			br_vlan_upper_change(dev, info->upper_dev,
> +					     info->linking);
> +			return NOTIFY_DONE;
> +		}
> +#endif
>  	}
>  
>  	/* not a port of a bridge */
> @@ -126,6 +137,10 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
>  		break;
>  	}
>  
> +#ifdef CONFIG_BRIDGE_VLAN_FILTERING
> +	br_vlan_port_event(p, br, event);
> +#endif
> +
>  	/* Events that may cause spanning tree to refresh */
>  	if (!notified && (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
>  			  event == NETDEV_CHANGE || event == NETDEV_DOWN))
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index 00deef7fc1f3..604de174abe0 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -336,6 +336,7 @@ struct net_bridge {
>  #ifdef CONFIG_BRIDGE_VLAN_FILTERING
>  	__be16				vlan_proto;
>  	u16				default_pvid;
> +	u8				vlan_bridge_binding;

Use the bridge private bit options for this, don't add new fields. Take a look
at the br_opt_get/br_opt_toggle and the BROPT_ options.

>  	struct net_bridge_vlan_group	__rcu *vlgrp;
>  #endif
>  
> @@ -896,6 +897,10 @@ int nbp_vlan_init(struct net_bridge_port *port, struct netlink_ext_ack *extack);
>  int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask);
>  void br_vlan_get_stats(const struct net_bridge_vlan *v,
>  		       struct br_vlan_stats *stats);
> +void br_vlan_port_event(struct net_bridge_port *p, struct net_bridge *br,
> +			unsigned long event);
> +void br_vlan_upper_change(struct net_device *dev, struct net_device *upper_dev,
> +			  bool linking);
>  
>  static inline struct net_bridge_vlan_group *br_vlan_group(
>  					const struct net_bridge *br)
> @@ -1079,6 +1084,18 @@ static inline void br_vlan_get_stats(const struct net_bridge_vlan *v,
>  				     struct br_vlan_stats *stats)
>  {
>  }
> +
> +static inline void br_vlan_port_event(struct net_bridge_port *p,
> +				      struct net_bridge *br,
> +				      unsigned long event)
> +{
> +}
> +
> +static inline void br_vlan_upper_change(struct net_device *dev,
> +					struct net_device *upper_dev,
> +					bool linking)
> +{
> +}
>  #endif
>  
>  struct nf_br_ops {
> diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
> index 96abf8feb9dc..642373231386 100644
> --- a/net/bridge/br_vlan.c
> +++ b/net/bridge/br_vlan.c
> @@ -1265,3 +1265,146 @@ int br_vlan_get_info(const struct net_device *dev, u16 vid,
>  	return 0;
>  }
>  EXPORT_SYMBOL_GPL(br_vlan_get_info);
> +
> +static int br_vlan_is_bind_vlan_dev(struct net_device *dev)

const dev

> +{
> +	return is_vlan_dev(dev) &&
> +		!!(vlan_dev_priv(dev)->flags & VLAN_FLAG_BRIDGE_BINDING);
> +}
> +
> +static int br_vlan_is_bind_vlan_dev_fn(struct net_device *dev,
> +				       __always_unused void *data)
> +{
> +	return br_vlan_is_bind_vlan_dev(dev);
> +}
> +
> +static int br_vlan_has_upper_bind_vlan_dev(struct net_device *dev)
> +{
> +	int found;
> +
> +	rcu_read_lock();
> +	found = netdev_walk_all_upper_dev_rcu(dev, br_vlan_is_bind_vlan_dev_fn,
> +					      NULL);
> +	rcu_read_unlock();
> +
> +	return found;
> +}
> +
> +struct br_vlan_bind_walk_data {
> +	u16 vid;
> +	struct net_device *result;
> +};
> +
> +static int br_vlan_match_bind_vlan_dev_fn(struct net_device *dev, void *data_in)
> +{
> +	struct br_vlan_bind_walk_data *data = data_in;
> +	int found = 0;
> +
> +	if (br_vlan_is_bind_vlan_dev(dev) &&
> +	    vlan_dev_priv(dev)->vlan_id == data->vid) {
> +		dev_hold(dev);

Why do you need dev_hold() ?
This seems to be running under rtnl.

> +		data->result = dev;
> +		found = 1;
> +	}
> +
> +	return found;
> +}
> +
> +/* If found, returns the vlan device with a reference held, else returns NULL.
> + */
> +static struct net_device *
> +br_vlan_get_upper_bind_vlan_dev(struct net_device *dev, u16 vid)
> +{
> +	struct br_vlan_bind_walk_data data = {
> +		.vid = vid,
> +	};
> +
> +	rcu_read_lock();
> +	netdev_walk_all_upper_dev_rcu(dev, br_vlan_match_bind_vlan_dev_fn,
> +				      &data);
> +	rcu_read_unlock();
> +
> +	return data.result;
> +}
> +
> +static bool br_vlan_is_dev_up(struct net_device *dev)

const dev

> +{
> +	return  !!(dev->flags & IFF_UP) && netif_oper_up(dev);
> +}
> +
> +static void br_vlan_set_vlan_dev_state(struct net_bridge *br,
> +				       struct net_device *vlan_dev)
> +{
> +	u16 vid = vlan_dev_priv(vlan_dev)->vlan_id;
> +	struct net_bridge_vlan_group *vg;
> +	struct net_bridge_port *p;
> +	bool has_carrier = false;
> +
> +	list_for_each_entry(p, &br->port_list, list) {
> +		vg = nbp_vlan_group(p);
> +		if (br_vlan_find(vg, vid) && br_vlan_is_dev_up(p->dev)) {
> +			has_carrier = true;
> +			break;
> +		}
> +	}
> +
> +	if (netif_carrier_ok(vlan_dev)) {
> +		if (!has_carrier)
> +			netif_carrier_off(vlan_dev);
> +	} else {
> +		if (has_carrier)
> +			netif_carrier_on(vlan_dev);
> +	}
> +}
> +
> +static void br_vlan_set_all_vlan_dev_state(struct net_bridge_port *p,
> +					   struct net_bridge *br)

br is redundant, you can access it via p->br, you can define it locally
if needed

> +{
> +	struct net_bridge_vlan_group *vg = nbp_vlan_group(p);
> +	struct net_bridge_vlan *vlan;
> +	struct net_device *vlan_dev;
> +
> +	list_for_each_entry(vlan, &vg->vlan_list, vlist) {
> +		vlan_dev = br_vlan_get_upper_bind_vlan_dev(br->dev, vlan->vid);
> +		if (vlan_dev) {
> +			if (br_vlan_is_dev_up(p->dev)) {
> +				if (!netif_carrier_ok(vlan_dev))
> +					netif_carrier_on(vlan_dev);
> +			} else {
> +				br_vlan_set_vlan_dev_state(br, vlan_dev);
> +			}
> +			dev_put(vlan_dev);
> +		}
> +	}
> +}
> +
> +void br_vlan_upper_change(struct net_device *dev, struct net_device *upper_dev,
> +			  bool linking)
> +{
> +	struct net_bridge *br = netdev_priv(dev);
> +
> +	if (!br_vlan_is_bind_vlan_dev(upper_dev))
> +		return;
> +
> +	if (linking) {
> +		br_vlan_set_vlan_dev_state(br, upper_dev);
> +		br->vlan_bridge_binding = 1;
> +	} else {
> +		br->vlan_bridge_binding = br_vlan_has_upper_bind_vlan_dev(dev);
> +	}
> +}
> +
> +void br_vlan_port_event(struct net_bridge_port *p, struct net_bridge *br,
> +			unsigned long event)

br is redundant, p->br is available

> +{
> +	if (!br->vlan_bridge_binding)
> +		return;
> +
> +	switch (event) {
> +	case NETDEV_CHANGE:
> +	case NETDEV_DOWN:
> +	case NETDEV_UP:
> +		br_vlan_set_all_vlan_dev_state(p, br);
> +		break;
> +	}
> +}
>
Mike Manning April 3, 2019, 5:43 p.m. UTC | #2
On 02/04/2019 20:22, Nikolay Aleksandrov wrote:
> On 02/04/2019 18:35, Mike Manning wrote:
>> In the case of vlan filtering on bridges, the bridge may also have the
>> corresponding vlan devices as upper devices. A vlan bridge binding mode
>> is added to allow the link state of the vlan device to track only the
>> state of the subset of bridge ports that are also members of the vlan,
>> rather than that of all bridge ports. This mode is set with a vlan flag
>> rather than a bridge sysfs so that the 8021q module is aware that it
>> should not set the link state for the vlan device.
>>
>> If bridge vlan is configured, the bridge device event handling results
>> in the link state for an upper device being set, if it is a vlan device
>> with the vlan bridge binding mode enabled. This also sets a
>> vlan_bridge_binding flag so that subsequent UP/DOWN/CHANGE events for
>> the ports in that bridge result in a link state update of the vlan
>> device if required.
>>
>> The link state of the vlan device is up if there is at least one bridge
>> port that is a vlan member that is admin & oper up, otherwise its oper
>> state is IF_OPER_LOWERLAYERDOWN.
>>
>> Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com>
>> ---
>>  net/bridge/br.c         |  23 ++++++--
>>  net/bridge/br_private.h |  17 ++++++
>>  net/bridge/br_vlan.c    | 143 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 179 insertions(+), 4 deletions(-)
>>
> Hi,
> Please CC bridge maintainers when sending bridge patches.
Thank you very much for the review, I will CC you and Roopa when I have
the v1 series ready.
> One question/thought - can't we add a ports_up counter in the vlan's master
> struct and keep how many ports are up for that vlan ?

This would have been my preferred choice, but for this one would need to
know the old link state for a port so as to determine if/what link state
transition has occurred for a NETDEV_CHANGE notification. This is if
only a single counter is kept for the vlan for all ports (also it might
be difficult to recover from an error in the counter). I could see it
working if one kept track of the operational state for each port in the
vlan in a data structure specific to this purpose i.e. that is more
efficient than the existing walk. However, speed in processing these
state changes is not that important, also the link state is quickly
determined when it might matter more, i.e. on link up of a port.

> The important part would be to keep it correct, i.e. vlan_add/del should inc/dec
> as well as port up/down. Then we can directly update its carrier on port event
> without doing a possible O(n^2) walk, we just need to walk over the port vlans
> and adjust counters which is always O(n) based on num of that port's vlans.
>
> Some more comments below.
I will make all the other changes you have requested.
Nikolay Aleksandrov April 3, 2019, 5:53 p.m. UTC | #3
On 03/04/2019 20:43, Mike Manning wrote:
> On 02/04/2019 20:22, Nikolay Aleksandrov wrote:
>> On 02/04/2019 18:35, Mike Manning wrote:
>>> In the case of vlan filtering on bridges, the bridge may also have the
>>> corresponding vlan devices as upper devices. A vlan bridge binding mode
>>> is added to allow the link state of the vlan device to track only the
>>> state of the subset of bridge ports that are also members of the vlan,
>>> rather than that of all bridge ports. This mode is set with a vlan flag
>>> rather than a bridge sysfs so that the 8021q module is aware that it
>>> should not set the link state for the vlan device.
>>>
>>> If bridge vlan is configured, the bridge device event handling results
>>> in the link state for an upper device being set, if it is a vlan device
>>> with the vlan bridge binding mode enabled. This also sets a
>>> vlan_bridge_binding flag so that subsequent UP/DOWN/CHANGE events for
>>> the ports in that bridge result in a link state update of the vlan
>>> device if required.
>>>
>>> The link state of the vlan device is up if there is at least one bridge
>>> port that is a vlan member that is admin & oper up, otherwise its oper
>>> state is IF_OPER_LOWERLAYERDOWN.
>>>
>>> Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com>
>>> ---
>>>  net/bridge/br.c         |  23 ++++++--
>>>  net/bridge/br_private.h |  17 ++++++
>>>  net/bridge/br_vlan.c    | 143 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>  3 files changed, 179 insertions(+), 4 deletions(-)
>>>
>> Hi,
>> Please CC bridge maintainers when sending bridge patches.
> Thank you very much for the review, I will CC you and Roopa when I have
> the v1 series ready.
>> One question/thought - can't we add a ports_up counter in the vlan's master
>> struct and keep how many ports are up for that vlan ?
> 
> This would have been my preferred choice, but for this one would need to
> know the old link state for a port so as to determine if/what link state
> transition has occurred for a NETDEV_CHANGE notification. This is if
> only a single counter is kept for the vlan for all ports (also it might
> be difficult to recover from an error in the counter). I could see it
> working if one kept track of the operational state for each port in the
> vlan in a data structure specific to this purpose i.e. that is more
> efficient than the existing walk. However, speed in processing these
> state changes is not that important, also the link state is quickly
> determined when it might matter more, i.e. on link up of a port.
> 

Indeed, the NETDEV_CHANGE is harder, but we can keep the last known carrier state
in the per-port structure and make a decision based on that and the new state.
That wouldn't require any additional structures. Speed is important to us when
we deploy the bridge at scale, we have tests with thousands of vlans and devices
where this walk would become expensive on link flaps.

>> The important part would be to keep it correct, i.e. vlan_add/del should inc/dec
>> as well as port up/down. Then we can directly update its carrier on port event
>> without doing a possible O(n^2) walk, we just need to walk over the port vlans
>> and adjust counters which is always O(n) based on num of that port's vlans.
>>
>> Some more comments below.
> I will make all the other changes you have requested.
> 

Thanks!
Nikolay Aleksandrov April 3, 2019, 6:17 p.m. UTC | #4
On 03/04/2019 20:53, Nikolay Aleksandrov wrote:
> On 03/04/2019 20:43, Mike Manning wrote:
>> On 02/04/2019 20:22, Nikolay Aleksandrov wrote:
>>> On 02/04/2019 18:35, Mike Manning wrote:
>>>> In the case of vlan filtering on bridges, the bridge may also have the
>>>> corresponding vlan devices as upper devices. A vlan bridge binding mode
>>>> is added to allow the link state of the vlan device to track only the
>>>> state of the subset of bridge ports that are also members of the vlan,
>>>> rather than that of all bridge ports. This mode is set with a vlan flag
>>>> rather than a bridge sysfs so that the 8021q module is aware that it
>>>> should not set the link state for the vlan device.
>>>>
>>>> If bridge vlan is configured, the bridge device event handling results
>>>> in the link state for an upper device being set, if it is a vlan device
>>>> with the vlan bridge binding mode enabled. This also sets a
>>>> vlan_bridge_binding flag so that subsequent UP/DOWN/CHANGE events for
>>>> the ports in that bridge result in a link state update of the vlan
>>>> device if required.
>>>>
>>>> The link state of the vlan device is up if there is at least one bridge
>>>> port that is a vlan member that is admin & oper up, otherwise its oper
>>>> state is IF_OPER_LOWERLAYERDOWN.
>>>>
>>>> Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com>
>>>> ---
>>>>  net/bridge/br.c         |  23 ++++++--
>>>>  net/bridge/br_private.h |  17 ++++++
>>>>  net/bridge/br_vlan.c    | 143 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>>  3 files changed, 179 insertions(+), 4 deletions(-)
>>>>
>>> Hi,
>>> Please CC bridge maintainers when sending bridge patches.
>> Thank you very much for the review, I will CC you and Roopa when I have
>> the v1 series ready.
>>> One question/thought - can't we add a ports_up counter in the vlan's master
>>> struct and keep how many ports are up for that vlan ?
>>
>> This would have been my preferred choice, but for this one would need to
>> know the old link state for a port so as to determine if/what link state
>> transition has occurred for a NETDEV_CHANGE notification. This is if
>> only a single counter is kept for the vlan for all ports (also it might
>> be difficult to recover from an error in the counter). I could see it
>> working if one kept track of the operational state for each port in the
>> vlan in a data structure specific to this purpose i.e. that is more
>> efficient than the existing walk. However, speed in processing these
>> state changes is not that important, also the link state is quickly
>> determined when it might matter more, i.e. on link up of a port.
>>
> 
> Indeed, the NETDEV_CHANGE is harder, but we can keep the last known carrier state
> in the per-port structure and make a decision based on that and the new state.
> That wouldn't require any additional structures. Speed is important to us when
> we deploy the bridge at scale, we have tests with thousands of vlans and devices
> where this walk would become expensive on link flaps.
> 

In fact we already have a similar tracking field used for the port state, maybe
it can be used as an indicator. That state needs to be taken into account anyway
or the carrier state would be wrong.

>>> The important part would be to keep it correct, i.e. vlan_add/del should inc/dec
>>> as well as port up/down. Then we can directly update its carrier on port event
>>> without doing a possible O(n^2) walk, we just need to walk over the port vlans
>>> and adjust counters which is always O(n) based on num of that port's vlans.
>>>
>>> Some more comments below.
>> I will make all the other changes you have requested.
>>
> 
> Thanks!
>
Nikolay Aleksandrov April 3, 2019, 6:23 p.m. UTC | #5
On 03/04/2019 21:17, Nikolay Aleksandrov wrote:
> On 03/04/2019 20:53, Nikolay Aleksandrov wrote:
>> On 03/04/2019 20:43, Mike Manning wrote:
>>> On 02/04/2019 20:22, Nikolay Aleksandrov wrote:
>>>> On 02/04/2019 18:35, Mike Manning wrote:
>>>>> In the case of vlan filtering on bridges, the bridge may also have the
>>>>> corresponding vlan devices as upper devices. A vlan bridge binding mode
>>>>> is added to allow the link state of the vlan device to track only the
>>>>> state of the subset of bridge ports that are also members of the vlan,
>>>>> rather than that of all bridge ports. This mode is set with a vlan flag
>>>>> rather than a bridge sysfs so that the 8021q module is aware that it
>>>>> should not set the link state for the vlan device.
>>>>>
>>>>> If bridge vlan is configured, the bridge device event handling results
>>>>> in the link state for an upper device being set, if it is a vlan device
>>>>> with the vlan bridge binding mode enabled. This also sets a
>>>>> vlan_bridge_binding flag so that subsequent UP/DOWN/CHANGE events for
>>>>> the ports in that bridge result in a link state update of the vlan
>>>>> device if required.
>>>>>
>>>>> The link state of the vlan device is up if there is at least one bridge
>>>>> port that is a vlan member that is admin & oper up, otherwise its oper
>>>>> state is IF_OPER_LOWERLAYERDOWN.
>>>>>
>>>>> Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com>
>>>>> ---
>>>>>  net/bridge/br.c         |  23 ++++++--
>>>>>  net/bridge/br_private.h |  17 ++++++
>>>>>  net/bridge/br_vlan.c    | 143 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>  3 files changed, 179 insertions(+), 4 deletions(-)
>>>>>
>>>> Hi,
>>>> Please CC bridge maintainers when sending bridge patches.
>>> Thank you very much for the review, I will CC you and Roopa when I have
>>> the v1 series ready.
>>>> One question/thought - can't we add a ports_up counter in the vlan's master
>>>> struct and keep how many ports are up for that vlan ?
>>>
>>> This would have been my preferred choice, but for this one would need to
>>> know the old link state for a port so as to determine if/what link state
>>> transition has occurred for a NETDEV_CHANGE notification. This is if
>>> only a single counter is kept for the vlan for all ports (also it might
>>> be difficult to recover from an error in the counter). I could see it
>>> working if one kept track of the operational state for each port in the
>>> vlan in a data structure specific to this purpose i.e. that is more
>>> efficient than the existing walk. However, speed in processing these
>>> state changes is not that important, also the link state is quickly
>>> determined when it might matter more, i.e. on link up of a port.
>>>
>>
>> Indeed, the NETDEV_CHANGE is harder, but we can keep the last known carrier state
>> in the per-port structure and make a decision based on that and the new state.
>> That wouldn't require any additional structures. Speed is important to us when
>> we deploy the bridge at scale, we have tests with thousands of vlans and devices
>> where this walk would become expensive on link flaps.
>>
> 
> In fact we already have a similar tracking field used for the port state, maybe
> it can be used as an indicator. That state needs to be taken into account anyway
> or the carrier state would be wrong.
> 

Nevermind the last sentence, spoke too quickly. An additional structure may be needed
after all, this will need some investigating.

>>>> The important part would be to keep it correct, i.e. vlan_add/del should inc/dec
>>>> as well as port up/down. Then we can directly update its carrier on port event
>>>> without doing a possible O(n^2) walk, we just need to walk over the port vlans
>>>> and adjust counters which is always O(n) based on num of that port's vlans.
>>>>
>>>> Some more comments below.
>>> I will make all the other changes you have requested.
>>>
>>
>> Thanks!
>>
>
Mike Manning April 17, 2019, 6:16 p.m. UTC | #6
On 03/04/2019 19:23, Nikolay Aleksandrov wrote:
> On 03/04/2019 21:17, Nikolay Aleksandrov wrote:
>> On 03/04/2019 20:53, Nikolay Aleksandrov wrote:
>>> On 03/04/2019 20:43, Mike Manning wrote:
>>>> On 02/04/2019 20:22, Nikolay Aleksandrov wrote:
>>>>> On 02/04/2019 18:35, Mike Manning wrote:
>>>>>> In the case of vlan filtering on bridges, the bridge may also have the
>>>>>> corresponding vlan devices as upper devices. A vlan bridge binding mode
>>>>>> is added to allow the link state of the vlan device to track only the
>>>>>> state of the subset of bridge ports that are also members of the vlan,
>>>>>> rather than that of all bridge ports. This mode is set with a vlan flag
>>>>>> rather than a bridge sysfs so that the 8021q module is aware that it
>>>>>> should not set the link state for the vlan device.
>>>>>>
>>>>>> If bridge vlan is configured, the bridge device event handling results
>>>>>> in the link state for an upper device being set, if it is a vlan device
>>>>>> with the vlan bridge binding mode enabled. This also sets a
>>>>>> vlan_bridge_binding flag so that subsequent UP/DOWN/CHANGE events for
>>>>>> the ports in that bridge result in a link state update of the vlan
>>>>>> device if required.
>>>>>>
>>>>>> The link state of the vlan device is up if there is at least one bridge
>>>>>> port that is a vlan member that is admin & oper up, otherwise its oper
>>>>>> state is IF_OPER_LOWERLAYERDOWN.
>>>>>>
>>>>>> Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com>
>>>>>> ---
>>>>>>  net/bridge/br.c         |  23 ++++++--
>>>>>>  net/bridge/br_private.h |  17 ++++++
>>>>>>  net/bridge/br_vlan.c    | 143 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>  3 files changed, 179 insertions(+), 4 deletions(-)
>>>>>>
>>>>> Hi,
>>>>> Please CC bridge maintainers when sending bridge patches.
>>>> Thank you very much for the review, I will CC you and Roopa when I have
>>>> the v1 series ready.
>>>>> One question/thought - can't we add a ports_up counter in the vlan's master
>>>>> struct and keep how many ports are up for that vlan ?
>>>> This would have been my preferred choice, but for this one would need to
>>>> know the old link state for a port so as to determine if/what link state
>>>> transition has occurred for a NETDEV_CHANGE notification. This is if
>>>> only a single counter is kept for the vlan for all ports (also it might
>>>> be difficult to recover from an error in the counter). I could see it
>>>> working if one kept track of the operational state for each port in the
>>>> vlan in a data structure specific to this purpose i.e. that is more
>>>> efficient than the existing walk. However, speed in processing these
>>>> state changes is not that important, also the link state is quickly
>>>> determined when it might matter more, i.e. on link up of a port.
>>>>
>>> Indeed, the NETDEV_CHANGE is harder, but we can keep the last known carrier state
>>> in the per-port structure and make a decision based on that and the new state.
>>> That wouldn't require any additional structures. Speed is important to us when
>>> we deploy the bridge at scale, we have tests with thousands of vlans and devices
>>> where this walk would become expensive on link flaps.
>>>
>> In fact we already have a similar tracking field used for the port state, maybe
>> it can be used as an indicator. That state needs to be taken into account anyway
>> or the carrier state would be wrong.
>>
> Nevermind the last sentence, spoke too quickly. An additional structure may be needed
> after all, this will need some investigating.
>
>>>>> The important part would be to keep it correct, i.e. vlan_add/del should inc/dec
>>>>> as well as port up/down. Then we can directly update its carrier on port event
>>>>> without doing a possible O(n^2) walk, we just need to walk over the port vlans
>>>>> and adjust counters which is always O(n) based on num of that port's vlans.
>>>>>
>>>>> Some more comments below.
>>>> I will make all the other changes you have requested.
>>>>
>>> Thanks!
>>>
I have made the other changes in the v2 series about to be sent,
including adding a patch for handling link down on the bridge device to
cover the STP case.

However, I don't see a straightforward mechanism that is not error-prone
in terms of a new structure for storing the required link state of all
ports for all vlan devices (as one needs to track addition/deletion of
vlan devices for vlans, and addition/deletion of ports as members of
these vlans) so that one does not have to use the existing structures
for checking vlan devices and ports in those vlans.

Given that the proposed functionality is optional and so does not impact
any existing use-cases, is performance such a consideration? And
typically one would expect most ports to be up, in which case the link
state of the vlan device is set to up without having to check the state
of other ports, or at least not having to check many ports.
diff mbox series

Patch

diff --git a/net/bridge/br.c b/net/bridge/br.c
index a5174e5001d8..b80cd5ccd590 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -40,10 +40,21 @@  static int br_device_event(struct notifier_block *unused, unsigned long event, v
 	bool changed_addr;
 	int err;
 
-	/* register of bridge completed, add sysfs entries */
-	if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) {
-		br_sysfs_addbr(dev);
-		return NOTIFY_DONE;
+	if (dev->priv_flags & IFF_EBRIDGE) {
+		if (event == NETDEV_REGISTER) {
+			/* register of bridge completed, add sysfs entries */
+			br_sysfs_addbr(dev);
+			return NOTIFY_DONE;
+		}
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+		if (event == NETDEV_CHANGEUPPER) {
+			struct netdev_notifier_changeupper_info *info = ptr;
+
+			br_vlan_upper_change(dev, info->upper_dev,
+					     info->linking);
+			return NOTIFY_DONE;
+		}
+#endif
 	}
 
 	/* not a port of a bridge */
@@ -126,6 +137,10 @@  static int br_device_event(struct notifier_block *unused, unsigned long event, v
 		break;
 	}
 
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+	br_vlan_port_event(p, br, event);
+#endif
+
 	/* Events that may cause spanning tree to refresh */
 	if (!notified && (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
 			  event == NETDEV_CHANGE || event == NETDEV_DOWN))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 00deef7fc1f3..604de174abe0 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -336,6 +336,7 @@  struct net_bridge {
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
 	__be16				vlan_proto;
 	u16				default_pvid;
+	u8				vlan_bridge_binding;
 	struct net_bridge_vlan_group	__rcu *vlgrp;
 #endif
 
@@ -896,6 +897,10 @@  int nbp_vlan_init(struct net_bridge_port *port, struct netlink_ext_ack *extack);
 int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask);
 void br_vlan_get_stats(const struct net_bridge_vlan *v,
 		       struct br_vlan_stats *stats);
+void br_vlan_port_event(struct net_bridge_port *p, struct net_bridge *br,
+			unsigned long event);
+void br_vlan_upper_change(struct net_device *dev, struct net_device *upper_dev,
+			  bool linking);
 
 static inline struct net_bridge_vlan_group *br_vlan_group(
 					const struct net_bridge *br)
@@ -1079,6 +1084,18 @@  static inline void br_vlan_get_stats(const struct net_bridge_vlan *v,
 				     struct br_vlan_stats *stats)
 {
 }
+
+static inline void br_vlan_port_event(struct net_bridge_port *p,
+				      struct net_bridge *br,
+				      unsigned long event)
+{
+}
+
+static inline void br_vlan_upper_change(struct net_device *dev,
+					struct net_device *upper_dev,
+					bool linking)
+{
+}
 #endif
 
 struct nf_br_ops {
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 96abf8feb9dc..642373231386 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1265,3 +1265,146 @@  int br_vlan_get_info(const struct net_device *dev, u16 vid,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(br_vlan_get_info);
+
+static int br_vlan_is_bind_vlan_dev(struct net_device *dev)
+{
+	return is_vlan_dev(dev) &&
+		!!(vlan_dev_priv(dev)->flags & VLAN_FLAG_BRIDGE_BINDING);
+}
+
+static int br_vlan_is_bind_vlan_dev_fn(struct net_device *dev,
+				       __always_unused void *data)
+{
+	return br_vlan_is_bind_vlan_dev(dev);
+}
+
+static int br_vlan_has_upper_bind_vlan_dev(struct net_device *dev)
+{
+	int found;
+
+	rcu_read_lock();
+	found = netdev_walk_all_upper_dev_rcu(dev, br_vlan_is_bind_vlan_dev_fn,
+					      NULL);
+	rcu_read_unlock();
+
+	return found;
+}
+
+struct br_vlan_bind_walk_data {
+	u16 vid;
+	struct net_device *result;
+};
+
+static int br_vlan_match_bind_vlan_dev_fn(struct net_device *dev, void *data_in)
+{
+	struct br_vlan_bind_walk_data *data = data_in;
+	int found = 0;
+
+	if (br_vlan_is_bind_vlan_dev(dev) &&
+	    vlan_dev_priv(dev)->vlan_id == data->vid) {
+		dev_hold(dev);
+		data->result = dev;
+		found = 1;
+	}
+
+	return found;
+}
+
+/* If found, returns the vlan device with a reference held, else returns NULL.
+ */
+static struct net_device *
+br_vlan_get_upper_bind_vlan_dev(struct net_device *dev, u16 vid)
+{
+	struct br_vlan_bind_walk_data data = {
+		.vid = vid,
+	};
+
+	rcu_read_lock();
+	netdev_walk_all_upper_dev_rcu(dev, br_vlan_match_bind_vlan_dev_fn,
+				      &data);
+	rcu_read_unlock();
+
+	return data.result;
+}
+
+static bool br_vlan_is_dev_up(struct net_device *dev)
+{
+	return  !!(dev->flags & IFF_UP) && netif_oper_up(dev);
+}
+
+static void br_vlan_set_vlan_dev_state(struct net_bridge *br,
+				       struct net_device *vlan_dev)
+{
+	u16 vid = vlan_dev_priv(vlan_dev)->vlan_id;
+	struct net_bridge_vlan_group *vg;
+	struct net_bridge_port *p;
+	bool has_carrier = false;
+
+	list_for_each_entry(p, &br->port_list, list) {
+		vg = nbp_vlan_group(p);
+		if (br_vlan_find(vg, vid) && br_vlan_is_dev_up(p->dev)) {
+			has_carrier = true;
+			break;
+		}
+	}
+
+	if (netif_carrier_ok(vlan_dev)) {
+		if (!has_carrier)
+			netif_carrier_off(vlan_dev);
+	} else {
+		if (has_carrier)
+			netif_carrier_on(vlan_dev);
+	}
+}
+
+static void br_vlan_set_all_vlan_dev_state(struct net_bridge_port *p,
+					   struct net_bridge *br)
+{
+	struct net_bridge_vlan_group *vg = nbp_vlan_group(p);
+	struct net_bridge_vlan *vlan;
+	struct net_device *vlan_dev;
+
+	list_for_each_entry(vlan, &vg->vlan_list, vlist) {
+		vlan_dev = br_vlan_get_upper_bind_vlan_dev(br->dev, vlan->vid);
+		if (vlan_dev) {
+			if (br_vlan_is_dev_up(p->dev)) {
+				if (!netif_carrier_ok(vlan_dev))
+					netif_carrier_on(vlan_dev);
+			} else {
+				br_vlan_set_vlan_dev_state(br, vlan_dev);
+			}
+			dev_put(vlan_dev);
+		}
+	}
+}
+
+void br_vlan_upper_change(struct net_device *dev, struct net_device *upper_dev,
+			  bool linking)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	if (!br_vlan_is_bind_vlan_dev(upper_dev))
+		return;
+
+	if (linking) {
+		br_vlan_set_vlan_dev_state(br, upper_dev);
+		br->vlan_bridge_binding = 1;
+	} else {
+		br->vlan_bridge_binding = br_vlan_has_upper_bind_vlan_dev(dev);
+	}
+}
+
+void br_vlan_port_event(struct net_bridge_port *p, struct net_bridge *br,
+			unsigned long event)
+{
+	if (!br->vlan_bridge_binding)
+		return;
+
+	switch (event) {
+	case NETDEV_CHANGE:
+	case NETDEV_DOWN:
+	case NETDEV_UP:
+		br_vlan_set_all_vlan_dev_state(p, br);
+		break;
+	}
+}