Message ID | 20190402153543.6277-4-mmanning@vyatta.att-mail.com |
---|---|
State | Changes Requested |
Delegated to: | David Miller |
Headers | show |
Series | net: support binding vlan dev link state to vlan member bridge ports | expand |
On 02/04/2019 18:35, Mike Manning wrote: > In the case of vlan filtering on bridges, the bridge may also have the > corresponding vlan devices as upper devices. A vlan bridge binding mode > is added to allow the link state of the vlan device to track only the > state of the subset of bridge ports that are also members of the vlan, > rather than that of all bridge ports. This mode is set with a vlan flag > rather than a bridge sysfs so that the 8021q module is aware that it > should not set the link state for the vlan device. > > If bridge vlan is configured, the bridge device event handling results > in the link state for an upper device being set, if it is a vlan device > with the vlan bridge binding mode enabled. This also sets a > vlan_bridge_binding flag so that subsequent UP/DOWN/CHANGE events for > the ports in that bridge result in a link state update of the vlan > device if required. > > The link state of the vlan device is up if there is at least one bridge > port that is a vlan member that is admin & oper up, otherwise its oper > state is IF_OPER_LOWERLAYERDOWN. > > Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com> > --- > net/bridge/br.c | 23 ++++++-- > net/bridge/br_private.h | 17 ++++++ > net/bridge/br_vlan.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 179 insertions(+), 4 deletions(-) > Hi, Please CC bridge maintainers when sending bridge patches. One question/thought - can't we add a ports_up counter in the vlan's master struct and keep how many ports are up for that vlan ? The important part would be to keep it correct, i.e. vlan_add/del should inc/dec as well as port up/down. Then we can directly update its carrier on port event without doing a possible O(n^2) walk, we just need to walk over the port vlans and adjust counters which is always O(n) based on num of that port's vlans. Some more comments below. > diff --git a/net/bridge/br.c b/net/bridge/br.c > index a5174e5001d8..b80cd5ccd590 100644 > --- a/net/bridge/br.c > +++ b/net/bridge/br.c > @@ -40,10 +40,21 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v > bool changed_addr; > int err; > > - /* register of bridge completed, add sysfs entries */ > - if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) { > - br_sysfs_addbr(dev); > - return NOTIFY_DONE; > + if (dev->priv_flags & IFF_EBRIDGE) { > + if (event == NETDEV_REGISTER) { > + /* register of bridge completed, add sysfs entries */ > + br_sysfs_addbr(dev); > + return NOTIFY_DONE; > + } > +#ifdef CONFIG_BRIDGE_VLAN_FILTERING > + if (event == NETDEV_CHANGEUPPER) { > + struct netdev_notifier_changeupper_info *info = ptr; > + > + br_vlan_upper_change(dev, info->upper_dev, > + info->linking); > + return NOTIFY_DONE; > + } > +#endif > } > > /* not a port of a bridge */ > @@ -126,6 +137,10 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v > break; > } > > +#ifdef CONFIG_BRIDGE_VLAN_FILTERING > + br_vlan_port_event(p, br, event); > +#endif > + > /* Events that may cause spanning tree to refresh */ > if (!notified && (event == NETDEV_CHANGEADDR || event == NETDEV_UP || > event == NETDEV_CHANGE || event == NETDEV_DOWN)) > diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h > index 00deef7fc1f3..604de174abe0 100644 > --- a/net/bridge/br_private.h > +++ b/net/bridge/br_private.h > @@ -336,6 +336,7 @@ struct net_bridge { > #ifdef CONFIG_BRIDGE_VLAN_FILTERING > __be16 vlan_proto; > u16 default_pvid; > + u8 vlan_bridge_binding; Use the bridge private bit options for this, don't add new fields. Take a look at the br_opt_get/br_opt_toggle and the BROPT_ options. > struct net_bridge_vlan_group __rcu *vlgrp; > #endif > > @@ -896,6 +897,10 @@ int nbp_vlan_init(struct net_bridge_port *port, struct netlink_ext_ack *extack); > int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask); > void br_vlan_get_stats(const struct net_bridge_vlan *v, > struct br_vlan_stats *stats); > +void br_vlan_port_event(struct net_bridge_port *p, struct net_bridge *br, > + unsigned long event); > +void br_vlan_upper_change(struct net_device *dev, struct net_device *upper_dev, > + bool linking); > > static inline struct net_bridge_vlan_group *br_vlan_group( > const struct net_bridge *br) > @@ -1079,6 +1084,18 @@ static inline void br_vlan_get_stats(const struct net_bridge_vlan *v, > struct br_vlan_stats *stats) > { > } > + > +static inline void br_vlan_port_event(struct net_bridge_port *p, > + struct net_bridge *br, > + unsigned long event) > +{ > +} > + > +static inline void br_vlan_upper_change(struct net_device *dev, > + struct net_device *upper_dev, > + bool linking) > +{ > +} > #endif > > struct nf_br_ops { > diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c > index 96abf8feb9dc..642373231386 100644 > --- a/net/bridge/br_vlan.c > +++ b/net/bridge/br_vlan.c > @@ -1265,3 +1265,146 @@ int br_vlan_get_info(const struct net_device *dev, u16 vid, > return 0; > } > EXPORT_SYMBOL_GPL(br_vlan_get_info); > + > +static int br_vlan_is_bind_vlan_dev(struct net_device *dev) const dev > +{ > + return is_vlan_dev(dev) && > + !!(vlan_dev_priv(dev)->flags & VLAN_FLAG_BRIDGE_BINDING); > +} > + > +static int br_vlan_is_bind_vlan_dev_fn(struct net_device *dev, > + __always_unused void *data) > +{ > + return br_vlan_is_bind_vlan_dev(dev); > +} > + > +static int br_vlan_has_upper_bind_vlan_dev(struct net_device *dev) > +{ > + int found; > + > + rcu_read_lock(); > + found = netdev_walk_all_upper_dev_rcu(dev, br_vlan_is_bind_vlan_dev_fn, > + NULL); > + rcu_read_unlock(); > + > + return found; > +} > + > +struct br_vlan_bind_walk_data { > + u16 vid; > + struct net_device *result; > +}; > + > +static int br_vlan_match_bind_vlan_dev_fn(struct net_device *dev, void *data_in) > +{ > + struct br_vlan_bind_walk_data *data = data_in; > + int found = 0; > + > + if (br_vlan_is_bind_vlan_dev(dev) && > + vlan_dev_priv(dev)->vlan_id == data->vid) { > + dev_hold(dev); Why do you need dev_hold() ? This seems to be running under rtnl. > + data->result = dev; > + found = 1; > + } > + > + return found; > +} > + > +/* If found, returns the vlan device with a reference held, else returns NULL. > + */ > +static struct net_device * > +br_vlan_get_upper_bind_vlan_dev(struct net_device *dev, u16 vid) > +{ > + struct br_vlan_bind_walk_data data = { > + .vid = vid, > + }; > + > + rcu_read_lock(); > + netdev_walk_all_upper_dev_rcu(dev, br_vlan_match_bind_vlan_dev_fn, > + &data); > + rcu_read_unlock(); > + > + return data.result; > +} > + > +static bool br_vlan_is_dev_up(struct net_device *dev) const dev > +{ > + return !!(dev->flags & IFF_UP) && netif_oper_up(dev); > +} > + > +static void br_vlan_set_vlan_dev_state(struct net_bridge *br, > + struct net_device *vlan_dev) > +{ > + u16 vid = vlan_dev_priv(vlan_dev)->vlan_id; > + struct net_bridge_vlan_group *vg; > + struct net_bridge_port *p; > + bool has_carrier = false; > + > + list_for_each_entry(p, &br->port_list, list) { > + vg = nbp_vlan_group(p); > + if (br_vlan_find(vg, vid) && br_vlan_is_dev_up(p->dev)) { > + has_carrier = true; > + break; > + } > + } > + > + if (netif_carrier_ok(vlan_dev)) { > + if (!has_carrier) > + netif_carrier_off(vlan_dev); > + } else { > + if (has_carrier) > + netif_carrier_on(vlan_dev); > + } > +} > + > +static void br_vlan_set_all_vlan_dev_state(struct net_bridge_port *p, > + struct net_bridge *br) br is redundant, you can access it via p->br, you can define it locally if needed > +{ > + struct net_bridge_vlan_group *vg = nbp_vlan_group(p); > + struct net_bridge_vlan *vlan; > + struct net_device *vlan_dev; > + > + list_for_each_entry(vlan, &vg->vlan_list, vlist) { > + vlan_dev = br_vlan_get_upper_bind_vlan_dev(br->dev, vlan->vid); > + if (vlan_dev) { > + if (br_vlan_is_dev_up(p->dev)) { > + if (!netif_carrier_ok(vlan_dev)) > + netif_carrier_on(vlan_dev); > + } else { > + br_vlan_set_vlan_dev_state(br, vlan_dev); > + } > + dev_put(vlan_dev); > + } > + } > +} > + > +void br_vlan_upper_change(struct net_device *dev, struct net_device *upper_dev, > + bool linking) > +{ > + struct net_bridge *br = netdev_priv(dev); > + > + if (!br_vlan_is_bind_vlan_dev(upper_dev)) > + return; > + > + if (linking) { > + br_vlan_set_vlan_dev_state(br, upper_dev); > + br->vlan_bridge_binding = 1; > + } else { > + br->vlan_bridge_binding = br_vlan_has_upper_bind_vlan_dev(dev); > + } > +} > + > +void br_vlan_port_event(struct net_bridge_port *p, struct net_bridge *br, > + unsigned long event) br is redundant, p->br is available > +{ > + if (!br->vlan_bridge_binding) > + return; > + > + switch (event) { > + case NETDEV_CHANGE: > + case NETDEV_DOWN: > + case NETDEV_UP: > + br_vlan_set_all_vlan_dev_state(p, br); > + break; > + } > +} >
On 02/04/2019 20:22, Nikolay Aleksandrov wrote: > On 02/04/2019 18:35, Mike Manning wrote: >> In the case of vlan filtering on bridges, the bridge may also have the >> corresponding vlan devices as upper devices. A vlan bridge binding mode >> is added to allow the link state of the vlan device to track only the >> state of the subset of bridge ports that are also members of the vlan, >> rather than that of all bridge ports. This mode is set with a vlan flag >> rather than a bridge sysfs so that the 8021q module is aware that it >> should not set the link state for the vlan device. >> >> If bridge vlan is configured, the bridge device event handling results >> in the link state for an upper device being set, if it is a vlan device >> with the vlan bridge binding mode enabled. This also sets a >> vlan_bridge_binding flag so that subsequent UP/DOWN/CHANGE events for >> the ports in that bridge result in a link state update of the vlan >> device if required. >> >> The link state of the vlan device is up if there is at least one bridge >> port that is a vlan member that is admin & oper up, otherwise its oper >> state is IF_OPER_LOWERLAYERDOWN. >> >> Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com> >> --- >> net/bridge/br.c | 23 ++++++-- >> net/bridge/br_private.h | 17 ++++++ >> net/bridge/br_vlan.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++ >> 3 files changed, 179 insertions(+), 4 deletions(-) >> > Hi, > Please CC bridge maintainers when sending bridge patches. Thank you very much for the review, I will CC you and Roopa when I have the v1 series ready. > One question/thought - can't we add a ports_up counter in the vlan's master > struct and keep how many ports are up for that vlan ? This would have been my preferred choice, but for this one would need to know the old link state for a port so as to determine if/what link state transition has occurred for a NETDEV_CHANGE notification. This is if only a single counter is kept for the vlan for all ports (also it might be difficult to recover from an error in the counter). I could see it working if one kept track of the operational state for each port in the vlan in a data structure specific to this purpose i.e. that is more efficient than the existing walk. However, speed in processing these state changes is not that important, also the link state is quickly determined when it might matter more, i.e. on link up of a port. > The important part would be to keep it correct, i.e. vlan_add/del should inc/dec > as well as port up/down. Then we can directly update its carrier on port event > without doing a possible O(n^2) walk, we just need to walk over the port vlans > and adjust counters which is always O(n) based on num of that port's vlans. > > Some more comments below. I will make all the other changes you have requested.
On 03/04/2019 20:43, Mike Manning wrote: > On 02/04/2019 20:22, Nikolay Aleksandrov wrote: >> On 02/04/2019 18:35, Mike Manning wrote: >>> In the case of vlan filtering on bridges, the bridge may also have the >>> corresponding vlan devices as upper devices. A vlan bridge binding mode >>> is added to allow the link state of the vlan device to track only the >>> state of the subset of bridge ports that are also members of the vlan, >>> rather than that of all bridge ports. This mode is set with a vlan flag >>> rather than a bridge sysfs so that the 8021q module is aware that it >>> should not set the link state for the vlan device. >>> >>> If bridge vlan is configured, the bridge device event handling results >>> in the link state for an upper device being set, if it is a vlan device >>> with the vlan bridge binding mode enabled. This also sets a >>> vlan_bridge_binding flag so that subsequent UP/DOWN/CHANGE events for >>> the ports in that bridge result in a link state update of the vlan >>> device if required. >>> >>> The link state of the vlan device is up if there is at least one bridge >>> port that is a vlan member that is admin & oper up, otherwise its oper >>> state is IF_OPER_LOWERLAYERDOWN. >>> >>> Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com> >>> --- >>> net/bridge/br.c | 23 ++++++-- >>> net/bridge/br_private.h | 17 ++++++ >>> net/bridge/br_vlan.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++ >>> 3 files changed, 179 insertions(+), 4 deletions(-) >>> >> Hi, >> Please CC bridge maintainers when sending bridge patches. > Thank you very much for the review, I will CC you and Roopa when I have > the v1 series ready. >> One question/thought - can't we add a ports_up counter in the vlan's master >> struct and keep how many ports are up for that vlan ? > > This would have been my preferred choice, but for this one would need to > know the old link state for a port so as to determine if/what link state > transition has occurred for a NETDEV_CHANGE notification. This is if > only a single counter is kept for the vlan for all ports (also it might > be difficult to recover from an error in the counter). I could see it > working if one kept track of the operational state for each port in the > vlan in a data structure specific to this purpose i.e. that is more > efficient than the existing walk. However, speed in processing these > state changes is not that important, also the link state is quickly > determined when it might matter more, i.e. on link up of a port. > Indeed, the NETDEV_CHANGE is harder, but we can keep the last known carrier state in the per-port structure and make a decision based on that and the new state. That wouldn't require any additional structures. Speed is important to us when we deploy the bridge at scale, we have tests with thousands of vlans and devices where this walk would become expensive on link flaps. >> The important part would be to keep it correct, i.e. vlan_add/del should inc/dec >> as well as port up/down. Then we can directly update its carrier on port event >> without doing a possible O(n^2) walk, we just need to walk over the port vlans >> and adjust counters which is always O(n) based on num of that port's vlans. >> >> Some more comments below. > I will make all the other changes you have requested. > Thanks!
On 03/04/2019 20:53, Nikolay Aleksandrov wrote: > On 03/04/2019 20:43, Mike Manning wrote: >> On 02/04/2019 20:22, Nikolay Aleksandrov wrote: >>> On 02/04/2019 18:35, Mike Manning wrote: >>>> In the case of vlan filtering on bridges, the bridge may also have the >>>> corresponding vlan devices as upper devices. A vlan bridge binding mode >>>> is added to allow the link state of the vlan device to track only the >>>> state of the subset of bridge ports that are also members of the vlan, >>>> rather than that of all bridge ports. This mode is set with a vlan flag >>>> rather than a bridge sysfs so that the 8021q module is aware that it >>>> should not set the link state for the vlan device. >>>> >>>> If bridge vlan is configured, the bridge device event handling results >>>> in the link state for an upper device being set, if it is a vlan device >>>> with the vlan bridge binding mode enabled. This also sets a >>>> vlan_bridge_binding flag so that subsequent UP/DOWN/CHANGE events for >>>> the ports in that bridge result in a link state update of the vlan >>>> device if required. >>>> >>>> The link state of the vlan device is up if there is at least one bridge >>>> port that is a vlan member that is admin & oper up, otherwise its oper >>>> state is IF_OPER_LOWERLAYERDOWN. >>>> >>>> Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com> >>>> --- >>>> net/bridge/br.c | 23 ++++++-- >>>> net/bridge/br_private.h | 17 ++++++ >>>> net/bridge/br_vlan.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++ >>>> 3 files changed, 179 insertions(+), 4 deletions(-) >>>> >>> Hi, >>> Please CC bridge maintainers when sending bridge patches. >> Thank you very much for the review, I will CC you and Roopa when I have >> the v1 series ready. >>> One question/thought - can't we add a ports_up counter in the vlan's master >>> struct and keep how many ports are up for that vlan ? >> >> This would have been my preferred choice, but for this one would need to >> know the old link state for a port so as to determine if/what link state >> transition has occurred for a NETDEV_CHANGE notification. This is if >> only a single counter is kept for the vlan for all ports (also it might >> be difficult to recover from an error in the counter). I could see it >> working if one kept track of the operational state for each port in the >> vlan in a data structure specific to this purpose i.e. that is more >> efficient than the existing walk. However, speed in processing these >> state changes is not that important, also the link state is quickly >> determined when it might matter more, i.e. on link up of a port. >> > > Indeed, the NETDEV_CHANGE is harder, but we can keep the last known carrier state > in the per-port structure and make a decision based on that and the new state. > That wouldn't require any additional structures. Speed is important to us when > we deploy the bridge at scale, we have tests with thousands of vlans and devices > where this walk would become expensive on link flaps. > In fact we already have a similar tracking field used for the port state, maybe it can be used as an indicator. That state needs to be taken into account anyway or the carrier state would be wrong. >>> The important part would be to keep it correct, i.e. vlan_add/del should inc/dec >>> as well as port up/down. Then we can directly update its carrier on port event >>> without doing a possible O(n^2) walk, we just need to walk over the port vlans >>> and adjust counters which is always O(n) based on num of that port's vlans. >>> >>> Some more comments below. >> I will make all the other changes you have requested. >> > > Thanks! >
On 03/04/2019 21:17, Nikolay Aleksandrov wrote: > On 03/04/2019 20:53, Nikolay Aleksandrov wrote: >> On 03/04/2019 20:43, Mike Manning wrote: >>> On 02/04/2019 20:22, Nikolay Aleksandrov wrote: >>>> On 02/04/2019 18:35, Mike Manning wrote: >>>>> In the case of vlan filtering on bridges, the bridge may also have the >>>>> corresponding vlan devices as upper devices. A vlan bridge binding mode >>>>> is added to allow the link state of the vlan device to track only the >>>>> state of the subset of bridge ports that are also members of the vlan, >>>>> rather than that of all bridge ports. This mode is set with a vlan flag >>>>> rather than a bridge sysfs so that the 8021q module is aware that it >>>>> should not set the link state for the vlan device. >>>>> >>>>> If bridge vlan is configured, the bridge device event handling results >>>>> in the link state for an upper device being set, if it is a vlan device >>>>> with the vlan bridge binding mode enabled. This also sets a >>>>> vlan_bridge_binding flag so that subsequent UP/DOWN/CHANGE events for >>>>> the ports in that bridge result in a link state update of the vlan >>>>> device if required. >>>>> >>>>> The link state of the vlan device is up if there is at least one bridge >>>>> port that is a vlan member that is admin & oper up, otherwise its oper >>>>> state is IF_OPER_LOWERLAYERDOWN. >>>>> >>>>> Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com> >>>>> --- >>>>> net/bridge/br.c | 23 ++++++-- >>>>> net/bridge/br_private.h | 17 ++++++ >>>>> net/bridge/br_vlan.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++ >>>>> 3 files changed, 179 insertions(+), 4 deletions(-) >>>>> >>>> Hi, >>>> Please CC bridge maintainers when sending bridge patches. >>> Thank you very much for the review, I will CC you and Roopa when I have >>> the v1 series ready. >>>> One question/thought - can't we add a ports_up counter in the vlan's master >>>> struct and keep how many ports are up for that vlan ? >>> >>> This would have been my preferred choice, but for this one would need to >>> know the old link state for a port so as to determine if/what link state >>> transition has occurred for a NETDEV_CHANGE notification. This is if >>> only a single counter is kept for the vlan for all ports (also it might >>> be difficult to recover from an error in the counter). I could see it >>> working if one kept track of the operational state for each port in the >>> vlan in a data structure specific to this purpose i.e. that is more >>> efficient than the existing walk. However, speed in processing these >>> state changes is not that important, also the link state is quickly >>> determined when it might matter more, i.e. on link up of a port. >>> >> >> Indeed, the NETDEV_CHANGE is harder, but we can keep the last known carrier state >> in the per-port structure and make a decision based on that and the new state. >> That wouldn't require any additional structures. Speed is important to us when >> we deploy the bridge at scale, we have tests with thousands of vlans and devices >> where this walk would become expensive on link flaps. >> > > In fact we already have a similar tracking field used for the port state, maybe > it can be used as an indicator. That state needs to be taken into account anyway > or the carrier state would be wrong. > Nevermind the last sentence, spoke too quickly. An additional structure may be needed after all, this will need some investigating. >>>> The important part would be to keep it correct, i.e. vlan_add/del should inc/dec >>>> as well as port up/down. Then we can directly update its carrier on port event >>>> without doing a possible O(n^2) walk, we just need to walk over the port vlans >>>> and adjust counters which is always O(n) based on num of that port's vlans. >>>> >>>> Some more comments below. >>> I will make all the other changes you have requested. >>> >> >> Thanks! >> >
On 03/04/2019 19:23, Nikolay Aleksandrov wrote: > On 03/04/2019 21:17, Nikolay Aleksandrov wrote: >> On 03/04/2019 20:53, Nikolay Aleksandrov wrote: >>> On 03/04/2019 20:43, Mike Manning wrote: >>>> On 02/04/2019 20:22, Nikolay Aleksandrov wrote: >>>>> On 02/04/2019 18:35, Mike Manning wrote: >>>>>> In the case of vlan filtering on bridges, the bridge may also have the >>>>>> corresponding vlan devices as upper devices. A vlan bridge binding mode >>>>>> is added to allow the link state of the vlan device to track only the >>>>>> state of the subset of bridge ports that are also members of the vlan, >>>>>> rather than that of all bridge ports. This mode is set with a vlan flag >>>>>> rather than a bridge sysfs so that the 8021q module is aware that it >>>>>> should not set the link state for the vlan device. >>>>>> >>>>>> If bridge vlan is configured, the bridge device event handling results >>>>>> in the link state for an upper device being set, if it is a vlan device >>>>>> with the vlan bridge binding mode enabled. This also sets a >>>>>> vlan_bridge_binding flag so that subsequent UP/DOWN/CHANGE events for >>>>>> the ports in that bridge result in a link state update of the vlan >>>>>> device if required. >>>>>> >>>>>> The link state of the vlan device is up if there is at least one bridge >>>>>> port that is a vlan member that is admin & oper up, otherwise its oper >>>>>> state is IF_OPER_LOWERLAYERDOWN. >>>>>> >>>>>> Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com> >>>>>> --- >>>>>> net/bridge/br.c | 23 ++++++-- >>>>>> net/bridge/br_private.h | 17 ++++++ >>>>>> net/bridge/br_vlan.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++ >>>>>> 3 files changed, 179 insertions(+), 4 deletions(-) >>>>>> >>>>> Hi, >>>>> Please CC bridge maintainers when sending bridge patches. >>>> Thank you very much for the review, I will CC you and Roopa when I have >>>> the v1 series ready. >>>>> One question/thought - can't we add a ports_up counter in the vlan's master >>>>> struct and keep how many ports are up for that vlan ? >>>> This would have been my preferred choice, but for this one would need to >>>> know the old link state for a port so as to determine if/what link state >>>> transition has occurred for a NETDEV_CHANGE notification. This is if >>>> only a single counter is kept for the vlan for all ports (also it might >>>> be difficult to recover from an error in the counter). I could see it >>>> working if one kept track of the operational state for each port in the >>>> vlan in a data structure specific to this purpose i.e. that is more >>>> efficient than the existing walk. However, speed in processing these >>>> state changes is not that important, also the link state is quickly >>>> determined when it might matter more, i.e. on link up of a port. >>>> >>> Indeed, the NETDEV_CHANGE is harder, but we can keep the last known carrier state >>> in the per-port structure and make a decision based on that and the new state. >>> That wouldn't require any additional structures. Speed is important to us when >>> we deploy the bridge at scale, we have tests with thousands of vlans and devices >>> where this walk would become expensive on link flaps. >>> >> In fact we already have a similar tracking field used for the port state, maybe >> it can be used as an indicator. That state needs to be taken into account anyway >> or the carrier state would be wrong. >> > Nevermind the last sentence, spoke too quickly. An additional structure may be needed > after all, this will need some investigating. > >>>>> The important part would be to keep it correct, i.e. vlan_add/del should inc/dec >>>>> as well as port up/down. Then we can directly update its carrier on port event >>>>> without doing a possible O(n^2) walk, we just need to walk over the port vlans >>>>> and adjust counters which is always O(n) based on num of that port's vlans. >>>>> >>>>> Some more comments below. >>>> I will make all the other changes you have requested. >>>> >>> Thanks! >>> I have made the other changes in the v2 series about to be sent, including adding a patch for handling link down on the bridge device to cover the STP case. However, I don't see a straightforward mechanism that is not error-prone in terms of a new structure for storing the required link state of all ports for all vlan devices (as one needs to track addition/deletion of vlan devices for vlans, and addition/deletion of ports as members of these vlans) so that one does not have to use the existing structures for checking vlan devices and ports in those vlans. Given that the proposed functionality is optional and so does not impact any existing use-cases, is performance such a consideration? And typically one would expect most ports to be up, in which case the link state of the vlan device is set to up without having to check the state of other ports, or at least not having to check many ports.
diff --git a/net/bridge/br.c b/net/bridge/br.c index a5174e5001d8..b80cd5ccd590 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -40,10 +40,21 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v bool changed_addr; int err; - /* register of bridge completed, add sysfs entries */ - if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) { - br_sysfs_addbr(dev); - return NOTIFY_DONE; + if (dev->priv_flags & IFF_EBRIDGE) { + if (event == NETDEV_REGISTER) { + /* register of bridge completed, add sysfs entries */ + br_sysfs_addbr(dev); + return NOTIFY_DONE; + } +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + if (event == NETDEV_CHANGEUPPER) { + struct netdev_notifier_changeupper_info *info = ptr; + + br_vlan_upper_change(dev, info->upper_dev, + info->linking); + return NOTIFY_DONE; + } +#endif } /* not a port of a bridge */ @@ -126,6 +137,10 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v break; } +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + br_vlan_port_event(p, br, event); +#endif + /* Events that may cause spanning tree to refresh */ if (!notified && (event == NETDEV_CHANGEADDR || event == NETDEV_UP || event == NETDEV_CHANGE || event == NETDEV_DOWN)) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 00deef7fc1f3..604de174abe0 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -336,6 +336,7 @@ struct net_bridge { #ifdef CONFIG_BRIDGE_VLAN_FILTERING __be16 vlan_proto; u16 default_pvid; + u8 vlan_bridge_binding; struct net_bridge_vlan_group __rcu *vlgrp; #endif @@ -896,6 +897,10 @@ int nbp_vlan_init(struct net_bridge_port *port, struct netlink_ext_ack *extack); int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask); void br_vlan_get_stats(const struct net_bridge_vlan *v, struct br_vlan_stats *stats); +void br_vlan_port_event(struct net_bridge_port *p, struct net_bridge *br, + unsigned long event); +void br_vlan_upper_change(struct net_device *dev, struct net_device *upper_dev, + bool linking); static inline struct net_bridge_vlan_group *br_vlan_group( const struct net_bridge *br) @@ -1079,6 +1084,18 @@ static inline void br_vlan_get_stats(const struct net_bridge_vlan *v, struct br_vlan_stats *stats) { } + +static inline void br_vlan_port_event(struct net_bridge_port *p, + struct net_bridge *br, + unsigned long event) +{ +} + +static inline void br_vlan_upper_change(struct net_device *dev, + struct net_device *upper_dev, + bool linking) +{ +} #endif struct nf_br_ops { diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 96abf8feb9dc..642373231386 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -1265,3 +1265,146 @@ int br_vlan_get_info(const struct net_device *dev, u16 vid, return 0; } EXPORT_SYMBOL_GPL(br_vlan_get_info); + +static int br_vlan_is_bind_vlan_dev(struct net_device *dev) +{ + return is_vlan_dev(dev) && + !!(vlan_dev_priv(dev)->flags & VLAN_FLAG_BRIDGE_BINDING); +} + +static int br_vlan_is_bind_vlan_dev_fn(struct net_device *dev, + __always_unused void *data) +{ + return br_vlan_is_bind_vlan_dev(dev); +} + +static int br_vlan_has_upper_bind_vlan_dev(struct net_device *dev) +{ + int found; + + rcu_read_lock(); + found = netdev_walk_all_upper_dev_rcu(dev, br_vlan_is_bind_vlan_dev_fn, + NULL); + rcu_read_unlock(); + + return found; +} + +struct br_vlan_bind_walk_data { + u16 vid; + struct net_device *result; +}; + +static int br_vlan_match_bind_vlan_dev_fn(struct net_device *dev, void *data_in) +{ + struct br_vlan_bind_walk_data *data = data_in; + int found = 0; + + if (br_vlan_is_bind_vlan_dev(dev) && + vlan_dev_priv(dev)->vlan_id == data->vid) { + dev_hold(dev); + data->result = dev; + found = 1; + } + + return found; +} + +/* If found, returns the vlan device with a reference held, else returns NULL. + */ +static struct net_device * +br_vlan_get_upper_bind_vlan_dev(struct net_device *dev, u16 vid) +{ + struct br_vlan_bind_walk_data data = { + .vid = vid, + }; + + rcu_read_lock(); + netdev_walk_all_upper_dev_rcu(dev, br_vlan_match_bind_vlan_dev_fn, + &data); + rcu_read_unlock(); + + return data.result; +} + +static bool br_vlan_is_dev_up(struct net_device *dev) +{ + return !!(dev->flags & IFF_UP) && netif_oper_up(dev); +} + +static void br_vlan_set_vlan_dev_state(struct net_bridge *br, + struct net_device *vlan_dev) +{ + u16 vid = vlan_dev_priv(vlan_dev)->vlan_id; + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p; + bool has_carrier = false; + + list_for_each_entry(p, &br->port_list, list) { + vg = nbp_vlan_group(p); + if (br_vlan_find(vg, vid) && br_vlan_is_dev_up(p->dev)) { + has_carrier = true; + break; + } + } + + if (netif_carrier_ok(vlan_dev)) { + if (!has_carrier) + netif_carrier_off(vlan_dev); + } else { + if (has_carrier) + netif_carrier_on(vlan_dev); + } +} + +static void br_vlan_set_all_vlan_dev_state(struct net_bridge_port *p, + struct net_bridge *br) +{ + struct net_bridge_vlan_group *vg = nbp_vlan_group(p); + struct net_bridge_vlan *vlan; + struct net_device *vlan_dev; + + list_for_each_entry(vlan, &vg->vlan_list, vlist) { + vlan_dev = br_vlan_get_upper_bind_vlan_dev(br->dev, vlan->vid); + if (vlan_dev) { + if (br_vlan_is_dev_up(p->dev)) { + if (!netif_carrier_ok(vlan_dev)) + netif_carrier_on(vlan_dev); + } else { + br_vlan_set_vlan_dev_state(br, vlan_dev); + } + dev_put(vlan_dev); + } + } +} + +void br_vlan_upper_change(struct net_device *dev, struct net_device *upper_dev, + bool linking) +{ + struct net_bridge *br = netdev_priv(dev); + + if (!br_vlan_is_bind_vlan_dev(upper_dev)) + return; + + if (linking) { + br_vlan_set_vlan_dev_state(br, upper_dev); + br->vlan_bridge_binding = 1; + } else { + br->vlan_bridge_binding = br_vlan_has_upper_bind_vlan_dev(dev); + } +} + +void br_vlan_port_event(struct net_bridge_port *p, struct net_bridge *br, + unsigned long event) +{ + if (!br->vlan_bridge_binding) + return; + + switch (event) { + case NETDEV_CHANGE: + case NETDEV_DOWN: + case NETDEV_UP: + br_vlan_set_all_vlan_dev_state(p, br); + break; + } +}
In the case of vlan filtering on bridges, the bridge may also have the corresponding vlan devices as upper devices. A vlan bridge binding mode is added to allow the link state of the vlan device to track only the state of the subset of bridge ports that are also members of the vlan, rather than that of all bridge ports. This mode is set with a vlan flag rather than a bridge sysfs so that the 8021q module is aware that it should not set the link state for the vlan device. If bridge vlan is configured, the bridge device event handling results in the link state for an upper device being set, if it is a vlan device with the vlan bridge binding mode enabled. This also sets a vlan_bridge_binding flag so that subsequent UP/DOWN/CHANGE events for the ports in that bridge result in a link state update of the vlan device if required. The link state of the vlan device is up if there is at least one bridge port that is a vlan member that is admin & oper up, otherwise its oper state is IF_OPER_LOWERLAYERDOWN. Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com> --- net/bridge/br.c | 23 ++++++-- net/bridge/br_private.h | 17 ++++++ net/bridge/br_vlan.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 179 insertions(+), 4 deletions(-)