diff mbox series

[RFC,net-next,10/13] net: bridge: add port flags for host flooding

Message ID 20200521211036.668624-11-olteanv@gmail.com
State RFC
Delegated to: David Miller
Headers show
Series RX filtering for DSA switches | expand

Commit Message

Vladimir Oltean May 21, 2020, 9:10 p.m. UTC
From: Vladimir Oltean <vladimir.oltean@nxp.com>

In cases where the bridge is offloaded by a switchdev, there are
situations where we can optimize RX filtering towards the host. To be
precise, the host only needs to do termination, which it can do by
responding at the MAC addresses of the slave ports and of the bridge
interface itself. But most notably, it doesn't need to do forwarding,
so there is no need to see packets with unknown destination address.

But there are, however, cases when a switchdev does need to flood to the
CPU. Such an example is when the switchdev is bridged with a foreign
interface, and since there is no offloaded datapath, packets need to
pass through the CPU. Currently this is the only identified case, but it
can be extended at any time.

So far, switchdev implementers made driver-level assumptions, such as:
this chip is never integrated in SoCs where it can be bridged with a
foreign interface, so I'll just disable host flooding and save some CPU
cycles. Or: I can never know what else can be bridged with this
switchdev port, so I must leave host flooding enabled in any case.

Let the bridge drive the host flooding decision, and pass it to
switchdev via the same mechanism as the external flooding flags.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
---
 include/linux/if_bridge.h |  3 +++
 net/bridge/br_if.c        | 40 +++++++++++++++++++++++++++++++++++++++
 net/bridge/br_switchdev.c |  4 +++-
 3 files changed, 46 insertions(+), 1 deletion(-)

Comments

Nikolay Aleksandrov May 22, 2020, 12:38 p.m. UTC | #1
On 22/05/2020 00:10, Vladimir Oltean wrote:
> From: Vladimir Oltean <vladimir.oltean@nxp.com>
> 
> In cases where the bridge is offloaded by a switchdev, there are
> situations where we can optimize RX filtering towards the host. To be
> precise, the host only needs to do termination, which it can do by
> responding at the MAC addresses of the slave ports and of the bridge
> interface itself. But most notably, it doesn't need to do forwarding,
> so there is no need to see packets with unknown destination address.
> 
> But there are, however, cases when a switchdev does need to flood to the
> CPU. Such an example is when the switchdev is bridged with a foreign
> interface, and since there is no offloaded datapath, packets need to
> pass through the CPU. Currently this is the only identified case, but it
> can be extended at any time.
> 
> So far, switchdev implementers made driver-level assumptions, such as:
> this chip is never integrated in SoCs where it can be bridged with a
> foreign interface, so I'll just disable host flooding and save some CPU
> cycles. Or: I can never know what else can be bridged with this
> switchdev port, so I must leave host flooding enabled in any case.
> 
> Let the bridge drive the host flooding decision, and pass it to
> switchdev via the same mechanism as the external flooding flags.
> 
> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
> ---
>  include/linux/if_bridge.h |  3 +++
>  net/bridge/br_if.c        | 40 +++++++++++++++++++++++++++++++++++++++
>  net/bridge/br_switchdev.c |  4 +++-
>  3 files changed, 46 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> index b3a8d3054af0..6891a432862d 100644
> --- a/include/linux/if_bridge.h
> +++ b/include/linux/if_bridge.h
> @@ -49,6 +49,9 @@ struct br_ip_list {
>  #define BR_ISOLATED		BIT(16)
>  #define BR_MRP_AWARE		BIT(17)
>  #define BR_MRP_LOST_CONT	BIT(18)
> +#define BR_HOST_FLOOD		BIT(19)
> +#define BR_HOST_MCAST_FLOOD	BIT(20)
> +#define BR_HOST_BCAST_FLOOD	BIT(21)
>  
>  #define BR_DEFAULT_AGEING_TIME	(300 * HZ)
>  
> diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> index a0e9a7937412..aae59d1e619b 100644
> --- a/net/bridge/br_if.c
> +++ b/net/bridge/br_if.c
> @@ -166,6 +166,45 @@ void br_manage_promisc(struct net_bridge *br)
>  	}
>  }
>  
> +static int br_manage_host_flood(struct net_bridge *br)
> +{
> +	const unsigned long mask = BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD |
> +				   BR_HOST_BCAST_FLOOD;
> +	struct net_bridge_port *p, *q;
> +
> +	list_for_each_entry(p, &br->port_list, list) {
> +		unsigned long flags = p->flags;
> +		bool sw_bridging = false;
> +		int err;
> +
> +		list_for_each_entry(q, &br->port_list, list) {
> +			if (p == q)
> +				continue;
> +
> +			if (!netdev_port_same_parent_id(p->dev, q->dev)) {
> +				sw_bridging = true;
> +				break;
> +			}
> +		}
> +
> +		if (sw_bridging)
> +			flags |= mask;
> +		else
> +			flags &= ~mask;
> +
> +		if (flags == p->flags)
> +			continue;
> +
> +		err = br_switchdev_set_port_flag(p, flags, mask);
> +		if (err)
> +			return err;
> +
> +		p->flags = flags;
> +	}
> +
> +	return 0;
> +}
> +
>  int nbp_backup_change(struct net_bridge_port *p,
>  		      struct net_device *backup_dev)
>  {
> @@ -231,6 +270,7 @@ static void nbp_update_port_count(struct net_bridge *br)
>  		br->auto_cnt = cnt;
>  		br_manage_promisc(br);
>  	}
> +	br_manage_host_flood(br);
>  }
>  

Can we do this only at port add/del ?
Right now it will be invoked also by br_port_flags_change() upon BR_AUTO_MASK flag change.

>  static void nbp_delete_promisc(struct net_bridge_port *p)
> diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
> index 015209bf44aa..360806ac7463 100644
> --- a/net/bridge/br_switchdev.c
> +++ b/net/bridge/br_switchdev.c
> @@ -56,7 +56,9 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
>  
>  /* Flags that can be offloaded to hardware */
>  #define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \
> -				  BR_MCAST_FLOOD | BR_BCAST_FLOOD)
> +				  BR_MCAST_FLOOD | BR_BCAST_FLOOD | \
> +				  BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD | \
> +				  BR_HOST_BCAST_FLOOD)
>  
>  int br_switchdev_set_port_flag(struct net_bridge_port *p,
>  			       unsigned long flags,
>
Vladimir Oltean May 22, 2020, 1:13 p.m. UTC | #2
On Fri, 22 May 2020 at 15:38, Nikolay Aleksandrov
<nikolay@cumulusnetworks.com> wrote:
>
> On 22/05/2020 00:10, Vladimir Oltean wrote:
> > From: Vladimir Oltean <vladimir.oltean@nxp.com>
> >
> > In cases where the bridge is offloaded by a switchdev, there are
> > situations where we can optimize RX filtering towards the host. To be
> > precise, the host only needs to do termination, which it can do by
> > responding at the MAC addresses of the slave ports and of the bridge
> > interface itself. But most notably, it doesn't need to do forwarding,
> > so there is no need to see packets with unknown destination address.
> >
> > But there are, however, cases when a switchdev does need to flood to the
> > CPU. Such an example is when the switchdev is bridged with a foreign
> > interface, and since there is no offloaded datapath, packets need to
> > pass through the CPU. Currently this is the only identified case, but it
> > can be extended at any time.
> >
> > So far, switchdev implementers made driver-level assumptions, such as:
> > this chip is never integrated in SoCs where it can be bridged with a
> > foreign interface, so I'll just disable host flooding and save some CPU
> > cycles. Or: I can never know what else can be bridged with this
> > switchdev port, so I must leave host flooding enabled in any case.
> >
> > Let the bridge drive the host flooding decision, and pass it to
> > switchdev via the same mechanism as the external flooding flags.
> >
> > Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
> > ---
> >  include/linux/if_bridge.h |  3 +++
> >  net/bridge/br_if.c        | 40 +++++++++++++++++++++++++++++++++++++++
> >  net/bridge/br_switchdev.c |  4 +++-
> >  3 files changed, 46 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> > index b3a8d3054af0..6891a432862d 100644
> > --- a/include/linux/if_bridge.h
> > +++ b/include/linux/if_bridge.h
> > @@ -49,6 +49,9 @@ struct br_ip_list {
> >  #define BR_ISOLATED          BIT(16)
> >  #define BR_MRP_AWARE         BIT(17)
> >  #define BR_MRP_LOST_CONT     BIT(18)
> > +#define BR_HOST_FLOOD                BIT(19)
> > +#define BR_HOST_MCAST_FLOOD  BIT(20)
> > +#define BR_HOST_BCAST_FLOOD  BIT(21)
> >
> >  #define BR_DEFAULT_AGEING_TIME       (300 * HZ)
> >
> > diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> > index a0e9a7937412..aae59d1e619b 100644
> > --- a/net/bridge/br_if.c
> > +++ b/net/bridge/br_if.c
> > @@ -166,6 +166,45 @@ void br_manage_promisc(struct net_bridge *br)
> >       }
> >  }
> >
> > +static int br_manage_host_flood(struct net_bridge *br)
> > +{
> > +     const unsigned long mask = BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD |
> > +                                BR_HOST_BCAST_FLOOD;
> > +     struct net_bridge_port *p, *q;
> > +
> > +     list_for_each_entry(p, &br->port_list, list) {
> > +             unsigned long flags = p->flags;
> > +             bool sw_bridging = false;
> > +             int err;
> > +
> > +             list_for_each_entry(q, &br->port_list, list) {
> > +                     if (p == q)
> > +                             continue;
> > +
> > +                     if (!netdev_port_same_parent_id(p->dev, q->dev)) {
> > +                             sw_bridging = true;
> > +                             break;
> > +                     }
> > +             }
> > +
> > +             if (sw_bridging)
> > +                     flags |= mask;
> > +             else
> > +                     flags &= ~mask;
> > +
> > +             if (flags == p->flags)
> > +                     continue;
> > +
> > +             err = br_switchdev_set_port_flag(p, flags, mask);
> > +             if (err)
> > +                     return err;
> > +
> > +             p->flags = flags;
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> >  int nbp_backup_change(struct net_bridge_port *p,
> >                     struct net_device *backup_dev)
> >  {
> > @@ -231,6 +270,7 @@ static void nbp_update_port_count(struct net_bridge *br)
> >               br->auto_cnt = cnt;
> >               br_manage_promisc(br);
> >       }
> > +     br_manage_host_flood(br);
> >  }
> >
>
> Can we do this only at port add/del ?
> Right now it will be invoked also by br_port_flags_change() upon BR_AUTO_MASK flag change.
>

Yes, we can do that.
Actually I have some doubts about BR_HOST_BCAST_FLOOD. We can't
disable that in the no-foreign-interface case, can we? For IPv6, it
looks like the stack does take care of installing dev_mc addresses for
the neighbor discovery protocol, but for IPv4 I guess the assumption
is that broadcast ARP should always be processed?

> >  static void nbp_delete_promisc(struct net_bridge_port *p)
> > diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
> > index 015209bf44aa..360806ac7463 100644
> > --- a/net/bridge/br_switchdev.c
> > +++ b/net/bridge/br_switchdev.c
> > @@ -56,7 +56,9 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
> >
> >  /* Flags that can be offloaded to hardware */
> >  #define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \
> > -                               BR_MCAST_FLOOD | BR_BCAST_FLOOD)
> > +                               BR_MCAST_FLOOD | BR_BCAST_FLOOD | \
> > +                               BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD | \
> > +                               BR_HOST_BCAST_FLOOD)
> >
> >  int br_switchdev_set_port_flag(struct net_bridge_port *p,
> >                              unsigned long flags,
> >
>

Thanks,
-Vladimir
Allan W. Nielsen May 22, 2020, 6:45 p.m. UTC | #3
On 22.05.2020 16:13, Vladimir Oltean wrote:
>EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
>
>On Fri, 22 May 2020 at 15:38, Nikolay Aleksandrov
><nikolay@cumulusnetworks.com> wrote:
>>
>> On 22/05/2020 00:10, Vladimir Oltean wrote:
>> > From: Vladimir Oltean <vladimir.oltean@nxp.com>
>> >
>> > In cases where the bridge is offloaded by a switchdev, there are
>> > situations where we can optimize RX filtering towards the host. To be
>> > precise, the host only needs to do termination, which it can do by
>> > responding at the MAC addresses of the slave ports and of the bridge
>> > interface itself. But most notably, it doesn't need to do forwarding,
>> > so there is no need to see packets with unknown destination address.
>> >
>> > But there are, however, cases when a switchdev does need to flood to the
>> > CPU. Such an example is when the switchdev is bridged with a foreign
>> > interface, and since there is no offloaded datapath, packets need to
>> > pass through the CPU. Currently this is the only identified case, but it
>> > can be extended at any time.
>> >
>> > So far, switchdev implementers made driver-level assumptions, such as:
>> > this chip is never integrated in SoCs where it can be bridged with a
>> > foreign interface, so I'll just disable host flooding and save some CPU
>> > cycles. Or: I can never know what else can be bridged with this
>> > switchdev port, so I must leave host flooding enabled in any case.
>> >
>> > Let the bridge drive the host flooding decision, and pass it to
>> > switchdev via the same mechanism as the external flooding flags.
>> >
>> > Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
>> > ---
>> >  include/linux/if_bridge.h |  3 +++
>> >  net/bridge/br_if.c        | 40 +++++++++++++++++++++++++++++++++++++++
>> >  net/bridge/br_switchdev.c |  4 +++-
>> >  3 files changed, 46 insertions(+), 1 deletion(-)
>> >
>> > diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
>> > index b3a8d3054af0..6891a432862d 100644
>> > --- a/include/linux/if_bridge.h
>> > +++ b/include/linux/if_bridge.h
>> > @@ -49,6 +49,9 @@ struct br_ip_list {
>> >  #define BR_ISOLATED          BIT(16)
>> >  #define BR_MRP_AWARE         BIT(17)
>> >  #define BR_MRP_LOST_CONT     BIT(18)
>> > +#define BR_HOST_FLOOD                BIT(19)
>> > +#define BR_HOST_MCAST_FLOOD  BIT(20)
>> > +#define BR_HOST_BCAST_FLOOD  BIT(21)
>> >
>> >  #define BR_DEFAULT_AGEING_TIME       (300 * HZ)
>> >
>> > diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
>> > index a0e9a7937412..aae59d1e619b 100644
>> > --- a/net/bridge/br_if.c
>> > +++ b/net/bridge/br_if.c
>> > @@ -166,6 +166,45 @@ void br_manage_promisc(struct net_bridge *br)
>> >       }
>> >  }
>> >
>> > +static int br_manage_host_flood(struct net_bridge *br)
>> > +{
>> > +     const unsigned long mask = BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD |
>> > +                                BR_HOST_BCAST_FLOOD;
>> > +     struct net_bridge_port *p, *q;
>> > +
>> > +     list_for_each_entry(p, &br->port_list, list) {
>> > +             unsigned long flags = p->flags;
>> > +             bool sw_bridging = false;
>> > +             int err;
>> > +
>> > +             list_for_each_entry(q, &br->port_list, list) {
>> > +                     if (p == q)
>> > +                             continue;
>> > +
>> > +                     if (!netdev_port_same_parent_id(p->dev, q->dev)) {
>> > +                             sw_bridging = true;
>> > +                             break;
>> > +                     }
>> > +             }
>> > +
>> > +             if (sw_bridging)
>> > +                     flags |= mask;
>> > +             else
>> > +                     flags &= ~mask;
>> > +
>> > +             if (flags == p->flags)
>> > +                     continue;
>> > +
>> > +             err = br_switchdev_set_port_flag(p, flags, mask);
>> > +             if (err)
>> > +                     return err;
>> > +
>> > +             p->flags = flags;
>> > +     }
>> > +
>> > +     return 0;
>> > +}
>> > +
>> >  int nbp_backup_change(struct net_bridge_port *p,
>> >                     struct net_device *backup_dev)
>> >  {
>> > @@ -231,6 +270,7 @@ static void nbp_update_port_count(struct net_bridge *br)
>> >               br->auto_cnt = cnt;
>> >               br_manage_promisc(br);
>> >       }
>> > +     br_manage_host_flood(br);
>> >  }
>> >
>>
>> Can we do this only at port add/del ?
>> Right now it will be invoked also by br_port_flags_change() upon BR_AUTO_MASK flag change.
>>
>
>Yes, we can do that.
>Actually I have some doubts about BR_HOST_BCAST_FLOOD. We can't
>disable that in the no-foreign-interface case, can we? For IPv6, it
>looks like the stack does take care of installing dev_mc addresses for
>the neighbor discovery protocol, but for IPv4 I guess the assumption
>is that broadcast ARP should always be processed?

Ideally this should be per VLAN. In case of IPv4, you only need to be
part of the broadcast domain on VLANs with an associated vlan-interface.

>> >  static void nbp_delete_promisc(struct net_bridge_port *p)
>> > diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
>> > index 015209bf44aa..360806ac7463 100644
>> > --- a/net/bridge/br_switchdev.c
>> > +++ b/net/bridge/br_switchdev.c
>> > @@ -56,7 +56,9 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
>> >
>> >  /* Flags that can be offloaded to hardware */
>> >  #define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \
>> > -                               BR_MCAST_FLOOD | BR_BCAST_FLOOD)
>> > +                               BR_MCAST_FLOOD | BR_BCAST_FLOOD | \
>> > +                               BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD | \
>> > +                               BR_HOST_BCAST_FLOOD)
>> >
>> >  int br_switchdev_set_port_flag(struct net_bridge_port *p,
>> >                              unsigned long flags,
>> >
>>
>
>Thanks,
>-Vladimir
/Allan
Ido Schimmel May 24, 2020, 2:26 p.m. UTC | #4
On Fri, May 22, 2020 at 12:10:33AM +0300, Vladimir Oltean wrote:
> From: Vladimir Oltean <vladimir.oltean@nxp.com>
> 
> In cases where the bridge is offloaded by a switchdev, there are
> situations where we can optimize RX filtering towards the host. To be
> precise, the host only needs to do termination, which it can do by
> responding at the MAC addresses of the slave ports and of the bridge
> interface itself. But most notably, it doesn't need to do forwarding,
> so there is no need to see packets with unknown destination address.
> 
> But there are, however, cases when a switchdev does need to flood to the
> CPU. Such an example is when the switchdev is bridged with a foreign
> interface, and since there is no offloaded datapath, packets need to
> pass through the CPU. Currently this is the only identified case, but it
> can be extended at any time.
> 
> So far, switchdev implementers made driver-level assumptions, such as:
> this chip is never integrated in SoCs where it can be bridged with a
> foreign interface, so I'll just disable host flooding and save some CPU
> cycles. Or: I can never know what else can be bridged with this
> switchdev port, so I must leave host flooding enabled in any case.
> 
> Let the bridge drive the host flooding decision, and pass it to
> switchdev via the same mechanism as the external flooding flags.
> 
> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
> ---
>  include/linux/if_bridge.h |  3 +++
>  net/bridge/br_if.c        | 40 +++++++++++++++++++++++++++++++++++++++
>  net/bridge/br_switchdev.c |  4 +++-
>  3 files changed, 46 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> index b3a8d3054af0..6891a432862d 100644
> --- a/include/linux/if_bridge.h
> +++ b/include/linux/if_bridge.h
> @@ -49,6 +49,9 @@ struct br_ip_list {
>  #define BR_ISOLATED		BIT(16)
>  #define BR_MRP_AWARE		BIT(17)
>  #define BR_MRP_LOST_CONT	BIT(18)
> +#define BR_HOST_FLOOD		BIT(19)
> +#define BR_HOST_MCAST_FLOOD	BIT(20)
> +#define BR_HOST_BCAST_FLOOD	BIT(21)
>  
>  #define BR_DEFAULT_AGEING_TIME	(300 * HZ)
>  
> diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> index a0e9a7937412..aae59d1e619b 100644
> --- a/net/bridge/br_if.c
> +++ b/net/bridge/br_if.c
> @@ -166,6 +166,45 @@ void br_manage_promisc(struct net_bridge *br)
>  	}
>  }
>  
> +static int br_manage_host_flood(struct net_bridge *br)
> +{
> +	const unsigned long mask = BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD |
> +				   BR_HOST_BCAST_FLOOD;
> +	struct net_bridge_port *p, *q;
> +
> +	list_for_each_entry(p, &br->port_list, list) {
> +		unsigned long flags = p->flags;
> +		bool sw_bridging = false;
> +		int err;
> +
> +		list_for_each_entry(q, &br->port_list, list) {
> +			if (p == q)
> +				continue;
> +
> +			if (!netdev_port_same_parent_id(p->dev, q->dev)) {
> +				sw_bridging = true;

It's not that simple. There are cases where not all bridge slaves have
the same parent ID and still there is no reason to flood traffic to the
CPU. VXLAN, for example.

You could argue that the VXLAN device needs to have the same parent ID
as the physical netdevs member in the bridge, but it will break your
data path. For example, lets assume your hardware decided to flood a
packet in L2. The packet will egress all the local ports, but will also
perform VXLAN encapsulation. The packet continues with the IP of the
remote VTEP(s) to the underlay router and then encounters a neighbour
miss exception, which sends it to the CPU for resolution.

Since this exception was encountered in the router the driver would mark
the packet with 'offload_fwd_mark', as it already performed L2
forwarding. If the VXLAN device has the same parent ID as the physical
netdevs, then the Linux bridge will never let it egress, nothing will
trigger neighbour resolution and the packet will be discarded.

> +				break;
> +			}
> +		}
> +
> +		if (sw_bridging)
> +			flags |= mask;
> +		else
> +			flags &= ~mask;
> +
> +		if (flags == p->flags)
> +			continue;
> +
> +		err = br_switchdev_set_port_flag(p, flags, mask);
> +		if (err)
> +			return err;
> +
> +		p->flags = flags;
> +	}
> +
> +	return 0;
> +}
> +
>  int nbp_backup_change(struct net_bridge_port *p,
>  		      struct net_device *backup_dev)
>  {
> @@ -231,6 +270,7 @@ static void nbp_update_port_count(struct net_bridge *br)
>  		br->auto_cnt = cnt;
>  		br_manage_promisc(br);
>  	}
> +	br_manage_host_flood(br);
>  }
>  
>  static void nbp_delete_promisc(struct net_bridge_port *p)
> diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
> index 015209bf44aa..360806ac7463 100644
> --- a/net/bridge/br_switchdev.c
> +++ b/net/bridge/br_switchdev.c
> @@ -56,7 +56,9 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
>  
>  /* Flags that can be offloaded to hardware */
>  #define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \
> -				  BR_MCAST_FLOOD | BR_BCAST_FLOOD)
> +				  BR_MCAST_FLOOD | BR_BCAST_FLOOD | \
> +				  BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD | \
> +				  BR_HOST_BCAST_FLOOD)
>  
>  int br_switchdev_set_port_flag(struct net_bridge_port *p,
>  			       unsigned long flags,
> -- 
> 2.25.1
>
Vladimir Oltean May 24, 2020, 4:13 p.m. UTC | #5
Hi Ido,

On Sun, 24 May 2020 at 17:26, Ido Schimmel <idosch@idosch.org> wrote:
>
> On Fri, May 22, 2020 at 12:10:33AM +0300, Vladimir Oltean wrote:
> > From: Vladimir Oltean <vladimir.oltean@nxp.com>
> >
> > In cases where the bridge is offloaded by a switchdev, there are
> > situations where we can optimize RX filtering towards the host. To be
> > precise, the host only needs to do termination, which it can do by
> > responding at the MAC addresses of the slave ports and of the bridge
> > interface itself. But most notably, it doesn't need to do forwarding,
> > so there is no need to see packets with unknown destination address.
> >
> > But there are, however, cases when a switchdev does need to flood to the
> > CPU. Such an example is when the switchdev is bridged with a foreign
> > interface, and since there is no offloaded datapath, packets need to
> > pass through the CPU. Currently this is the only identified case, but it
> > can be extended at any time.
> >
> > So far, switchdev implementers made driver-level assumptions, such as:
> > this chip is never integrated in SoCs where it can be bridged with a
> > foreign interface, so I'll just disable host flooding and save some CPU
> > cycles. Or: I can never know what else can be bridged with this
> > switchdev port, so I must leave host flooding enabled in any case.
> >
> > Let the bridge drive the host flooding decision, and pass it to
> > switchdev via the same mechanism as the external flooding flags.
> >
> > Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
> > ---
> >  include/linux/if_bridge.h |  3 +++
> >  net/bridge/br_if.c        | 40 +++++++++++++++++++++++++++++++++++++++
> >  net/bridge/br_switchdev.c |  4 +++-
> >  3 files changed, 46 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> > index b3a8d3054af0..6891a432862d 100644
> > --- a/include/linux/if_bridge.h
> > +++ b/include/linux/if_bridge.h
> > @@ -49,6 +49,9 @@ struct br_ip_list {
> >  #define BR_ISOLATED          BIT(16)
> >  #define BR_MRP_AWARE         BIT(17)
> >  #define BR_MRP_LOST_CONT     BIT(18)
> > +#define BR_HOST_FLOOD                BIT(19)
> > +#define BR_HOST_MCAST_FLOOD  BIT(20)
> > +#define BR_HOST_BCAST_FLOOD  BIT(21)
> >
> >  #define BR_DEFAULT_AGEING_TIME       (300 * HZ)
> >
> > diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> > index a0e9a7937412..aae59d1e619b 100644
> > --- a/net/bridge/br_if.c
> > +++ b/net/bridge/br_if.c
> > @@ -166,6 +166,45 @@ void br_manage_promisc(struct net_bridge *br)
> >       }
> >  }
> >
> > +static int br_manage_host_flood(struct net_bridge *br)
> > +{
> > +     const unsigned long mask = BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD |
> > +                                BR_HOST_BCAST_FLOOD;
> > +     struct net_bridge_port *p, *q;
> > +
> > +     list_for_each_entry(p, &br->port_list, list) {
> > +             unsigned long flags = p->flags;
> > +             bool sw_bridging = false;
> > +             int err;
> > +
> > +             list_for_each_entry(q, &br->port_list, list) {
> > +                     if (p == q)
> > +                             continue;
> > +
> > +                     if (!netdev_port_same_parent_id(p->dev, q->dev)) {
> > +                             sw_bridging = true;
>
> It's not that simple. There are cases where not all bridge slaves have
> the same parent ID and still there is no reason to flood traffic to the
> CPU. VXLAN, for example.
>
> You could argue that the VXLAN device needs to have the same parent ID
> as the physical netdevs member in the bridge, but it will break your
> data path. For example, lets assume your hardware decided to flood a
> packet in L2. The packet will egress all the local ports, but will also
> perform VXLAN encapsulation. The packet continues with the IP of the
> remote VTEP(s) to the underlay router and then encounters a neighbour
> miss exception, which sends it to the CPU for resolution.
>
> Since this exception was encountered in the router the driver would mark
> the packet with 'offload_fwd_mark', as it already performed L2
> forwarding. If the VXLAN device has the same parent ID as the physical
> netdevs, then the Linux bridge will never let it egress, nothing will
> trigger neighbour resolution and the packet will be discarded.
>

I wasn't going to argue that.
Ok, so with a bridged VXLAN only certain multicast DMACs corresponding
to multicast IPs should be flooded to the CPU.
Actually Allan's example was a bit simpler, he said that host flooding
can be made a per-VLAN flag. I'm glad that you raised this. So maybe
we should try to define some mechanism by which virtual interfaces can
specify to the bridge that they don't need to see all traffic? Do you
have any ideas?

> > +                             break;
> > +                     }
> > +             }
> > +
> > +             if (sw_bridging)
> > +                     flags |= mask;
> > +             else
> > +                     flags &= ~mask;
> > +
> > +             if (flags == p->flags)
> > +                     continue;
> > +
> > +             err = br_switchdev_set_port_flag(p, flags, mask);
> > +             if (err)
> > +                     return err;
> > +
> > +             p->flags = flags;
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> >  int nbp_backup_change(struct net_bridge_port *p,
> >                     struct net_device *backup_dev)
> >  {
> > @@ -231,6 +270,7 @@ static void nbp_update_port_count(struct net_bridge *br)
> >               br->auto_cnt = cnt;
> >               br_manage_promisc(br);
> >       }
> > +     br_manage_host_flood(br);
> >  }
> >
> >  static void nbp_delete_promisc(struct net_bridge_port *p)
> > diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
> > index 015209bf44aa..360806ac7463 100644
> > --- a/net/bridge/br_switchdev.c
> > +++ b/net/bridge/br_switchdev.c
> > @@ -56,7 +56,9 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
> >
> >  /* Flags that can be offloaded to hardware */
> >  #define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \
> > -                               BR_MCAST_FLOOD | BR_BCAST_FLOOD)
> > +                               BR_MCAST_FLOOD | BR_BCAST_FLOOD | \
> > +                               BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD | \
> > +                               BR_HOST_BCAST_FLOOD)
> >
> >  int br_switchdev_set_port_flag(struct net_bridge_port *p,
> >                              unsigned long flags,
> > --
> > 2.25.1
> >

Thanks,
-Vladimir
Ido Schimmel May 25, 2020, 8:11 p.m. UTC | #6
On Sun, May 24, 2020 at 07:13:46PM +0300, Vladimir Oltean wrote:
> Hi Ido,
> 
> On Sun, 24 May 2020 at 17:26, Ido Schimmel <idosch@idosch.org> wrote:
> >
> > On Fri, May 22, 2020 at 12:10:33AM +0300, Vladimir Oltean wrote:
> > > From: Vladimir Oltean <vladimir.oltean@nxp.com>
> > >
> > > In cases where the bridge is offloaded by a switchdev, there are
> > > situations where we can optimize RX filtering towards the host. To be
> > > precise, the host only needs to do termination, which it can do by
> > > responding at the MAC addresses of the slave ports and of the bridge
> > > interface itself. But most notably, it doesn't need to do forwarding,
> > > so there is no need to see packets with unknown destination address.
> > >
> > > But there are, however, cases when a switchdev does need to flood to the
> > > CPU. Such an example is when the switchdev is bridged with a foreign
> > > interface, and since there is no offloaded datapath, packets need to
> > > pass through the CPU. Currently this is the only identified case, but it
> > > can be extended at any time.
> > >
> > > So far, switchdev implementers made driver-level assumptions, such as:
> > > this chip is never integrated in SoCs where it can be bridged with a
> > > foreign interface, so I'll just disable host flooding and save some CPU
> > > cycles. Or: I can never know what else can be bridged with this
> > > switchdev port, so I must leave host flooding enabled in any case.
> > >
> > > Let the bridge drive the host flooding decision, and pass it to
> > > switchdev via the same mechanism as the external flooding flags.
> > >
> > > Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
> > > ---
> > >  include/linux/if_bridge.h |  3 +++
> > >  net/bridge/br_if.c        | 40 +++++++++++++++++++++++++++++++++++++++
> > >  net/bridge/br_switchdev.c |  4 +++-
> > >  3 files changed, 46 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> > > index b3a8d3054af0..6891a432862d 100644
> > > --- a/include/linux/if_bridge.h
> > > +++ b/include/linux/if_bridge.h
> > > @@ -49,6 +49,9 @@ struct br_ip_list {
> > >  #define BR_ISOLATED          BIT(16)
> > >  #define BR_MRP_AWARE         BIT(17)
> > >  #define BR_MRP_LOST_CONT     BIT(18)
> > > +#define BR_HOST_FLOOD                BIT(19)
> > > +#define BR_HOST_MCAST_FLOOD  BIT(20)
> > > +#define BR_HOST_BCAST_FLOOD  BIT(21)
> > >
> > >  #define BR_DEFAULT_AGEING_TIME       (300 * HZ)
> > >
> > > diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> > > index a0e9a7937412..aae59d1e619b 100644
> > > --- a/net/bridge/br_if.c
> > > +++ b/net/bridge/br_if.c
> > > @@ -166,6 +166,45 @@ void br_manage_promisc(struct net_bridge *br)
> > >       }
> > >  }
> > >
> > > +static int br_manage_host_flood(struct net_bridge *br)
> > > +{
> > > +     const unsigned long mask = BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD |
> > > +                                BR_HOST_BCAST_FLOOD;
> > > +     struct net_bridge_port *p, *q;
> > > +
> > > +     list_for_each_entry(p, &br->port_list, list) {
> > > +             unsigned long flags = p->flags;
> > > +             bool sw_bridging = false;
> > > +             int err;
> > > +
> > > +             list_for_each_entry(q, &br->port_list, list) {
> > > +                     if (p == q)
> > > +                             continue;
> > > +
> > > +                     if (!netdev_port_same_parent_id(p->dev, q->dev)) {
> > > +                             sw_bridging = true;
> >
> > It's not that simple. There are cases where not all bridge slaves have
> > the same parent ID and still there is no reason to flood traffic to the
> > CPU. VXLAN, for example.
> >
> > You could argue that the VXLAN device needs to have the same parent ID
> > as the physical netdevs member in the bridge, but it will break your
> > data path. For example, lets assume your hardware decided to flood a
> > packet in L2. The packet will egress all the local ports, but will also
> > perform VXLAN encapsulation. The packet continues with the IP of the
> > remote VTEP(s) to the underlay router and then encounters a neighbour
> > miss exception, which sends it to the CPU for resolution.
> >
> > Since this exception was encountered in the router the driver would mark
> > the packet with 'offload_fwd_mark', as it already performed L2
> > forwarding. If the VXLAN device has the same parent ID as the physical
> > netdevs, then the Linux bridge will never let it egress, nothing will
> > trigger neighbour resolution and the packet will be discarded.
> >
> 
> I wasn't going to argue that.
> Ok, so with a bridged VXLAN only certain multicast DMACs corresponding
> to multicast IPs should be flooded to the CPU.
> Actually Allan's example was a bit simpler, he said that host flooding
> can be made a per-VLAN flag. I'm glad that you raised this. So maybe
> we should try to define some mechanism by which virtual interfaces can
> specify to the bridge that they don't need to see all traffic? Do you
> have any ideas?

Maybe, when a port joins a bridge, query member ports if they can
forward traffic to it in hardware and based on the answer determine the
flooding towards the CPU?

> 
> > > +                             break;
> > > +                     }
> > > +             }
> > > +
> > > +             if (sw_bridging)
> > > +                     flags |= mask;
> > > +             else
> > > +                     flags &= ~mask;
> > > +
> > > +             if (flags == p->flags)
> > > +                     continue;
> > > +
> > > +             err = br_switchdev_set_port_flag(p, flags, mask);
> > > +             if (err)
> > > +                     return err;
> > > +
> > > +             p->flags = flags;
> > > +     }
> > > +
> > > +     return 0;
> > > +}
> > > +
> > >  int nbp_backup_change(struct net_bridge_port *p,
> > >                     struct net_device *backup_dev)
> > >  {
> > > @@ -231,6 +270,7 @@ static void nbp_update_port_count(struct net_bridge *br)
> > >               br->auto_cnt = cnt;
> > >               br_manage_promisc(br);
> > >       }
> > > +     br_manage_host_flood(br);
> > >  }
> > >
> > >  static void nbp_delete_promisc(struct net_bridge_port *p)
> > > diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
> > > index 015209bf44aa..360806ac7463 100644
> > > --- a/net/bridge/br_switchdev.c
> > > +++ b/net/bridge/br_switchdev.c
> > > @@ -56,7 +56,9 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
> > >
> > >  /* Flags that can be offloaded to hardware */
> > >  #define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \
> > > -                               BR_MCAST_FLOOD | BR_BCAST_FLOOD)
> > > +                               BR_MCAST_FLOOD | BR_BCAST_FLOOD | \
> > > +                               BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD | \
> > > +                               BR_HOST_BCAST_FLOOD)
> > >
> > >  int br_switchdev_set_port_flag(struct net_bridge_port *p,
> > >                              unsigned long flags,
> > > --
> > > 2.25.1
> > >
> 
> Thanks,
> -Vladimir
Vladimir Oltean May 25, 2020, 8:32 p.m. UTC | #7
On Mon, 25 May 2020 at 23:11, Ido Schimmel <idosch@idosch.org> wrote:
>
> On Sun, May 24, 2020 at 07:13:46PM +0300, Vladimir Oltean wrote:
> > Hi Ido,
> >
> > On Sun, 24 May 2020 at 17:26, Ido Schimmel <idosch@idosch.org> wrote:
> > >
> > > On Fri, May 22, 2020 at 12:10:33AM +0300, Vladimir Oltean wrote:
> > > > From: Vladimir Oltean <vladimir.oltean@nxp.com>
> > > >
> > > > In cases where the bridge is offloaded by a switchdev, there are
> > > > situations where we can optimize RX filtering towards the host. To be
> > > > precise, the host only needs to do termination, which it can do by
> > > > responding at the MAC addresses of the slave ports and of the bridge
> > > > interface itself. But most notably, it doesn't need to do forwarding,
> > > > so there is no need to see packets with unknown destination address.
> > > >
> > > > But there are, however, cases when a switchdev does need to flood to the
> > > > CPU. Such an example is when the switchdev is bridged with a foreign
> > > > interface, and since there is no offloaded datapath, packets need to
> > > > pass through the CPU. Currently this is the only identified case, but it
> > > > can be extended at any time.
> > > >
> > > > So far, switchdev implementers made driver-level assumptions, such as:
> > > > this chip is never integrated in SoCs where it can be bridged with a
> > > > foreign interface, so I'll just disable host flooding and save some CPU
> > > > cycles. Or: I can never know what else can be bridged with this
> > > > switchdev port, so I must leave host flooding enabled in any case.
> > > >
> > > > Let the bridge drive the host flooding decision, and pass it to
> > > > switchdev via the same mechanism as the external flooding flags.
> > > >
> > > > Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
> > > > ---
> > > >  include/linux/if_bridge.h |  3 +++
> > > >  net/bridge/br_if.c        | 40 +++++++++++++++++++++++++++++++++++++++
> > > >  net/bridge/br_switchdev.c |  4 +++-
> > > >  3 files changed, 46 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> > > > index b3a8d3054af0..6891a432862d 100644
> > > > --- a/include/linux/if_bridge.h
> > > > +++ b/include/linux/if_bridge.h
> > > > @@ -49,6 +49,9 @@ struct br_ip_list {
> > > >  #define BR_ISOLATED          BIT(16)
> > > >  #define BR_MRP_AWARE         BIT(17)
> > > >  #define BR_MRP_LOST_CONT     BIT(18)
> > > > +#define BR_HOST_FLOOD                BIT(19)
> > > > +#define BR_HOST_MCAST_FLOOD  BIT(20)
> > > > +#define BR_HOST_BCAST_FLOOD  BIT(21)
> > > >
> > > >  #define BR_DEFAULT_AGEING_TIME       (300 * HZ)
> > > >
> > > > diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> > > > index a0e9a7937412..aae59d1e619b 100644
> > > > --- a/net/bridge/br_if.c
> > > > +++ b/net/bridge/br_if.c
> > > > @@ -166,6 +166,45 @@ void br_manage_promisc(struct net_bridge *br)
> > > >       }
> > > >  }
> > > >
> > > > +static int br_manage_host_flood(struct net_bridge *br)
> > > > +{
> > > > +     const unsigned long mask = BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD |
> > > > +                                BR_HOST_BCAST_FLOOD;
> > > > +     struct net_bridge_port *p, *q;
> > > > +
> > > > +     list_for_each_entry(p, &br->port_list, list) {
> > > > +             unsigned long flags = p->flags;
> > > > +             bool sw_bridging = false;
> > > > +             int err;
> > > > +
> > > > +             list_for_each_entry(q, &br->port_list, list) {
> > > > +                     if (p == q)
> > > > +                             continue;
> > > > +
> > > > +                     if (!netdev_port_same_parent_id(p->dev, q->dev)) {
> > > > +                             sw_bridging = true;
> > >
> > > It's not that simple. There are cases where not all bridge slaves have
> > > the same parent ID and still there is no reason to flood traffic to the
> > > CPU. VXLAN, for example.
> > >
> > > You could argue that the VXLAN device needs to have the same parent ID
> > > as the physical netdevs member in the bridge, but it will break your
> > > data path. For example, lets assume your hardware decided to flood a
> > > packet in L2. The packet will egress all the local ports, but will also
> > > perform VXLAN encapsulation. The packet continues with the IP of the
> > > remote VTEP(s) to the underlay router and then encounters a neighbour
> > > miss exception, which sends it to the CPU for resolution.
> > >
> > > Since this exception was encountered in the router the driver would mark
> > > the packet with 'offload_fwd_mark', as it already performed L2
> > > forwarding. If the VXLAN device has the same parent ID as the physical
> > > netdevs, then the Linux bridge will never let it egress, nothing will
> > > trigger neighbour resolution and the packet will be discarded.
> > >
> >
> > I wasn't going to argue that.
> > Ok, so with a bridged VXLAN only certain multicast DMACs corresponding
> > to multicast IPs should be flooded to the CPU.
> > Actually Allan's example was a bit simpler, he said that host flooding
> > can be made a per-VLAN flag. I'm glad that you raised this. So maybe
> > we should try to define some mechanism by which virtual interfaces can
> > specify to the bridge that they don't need to see all traffic? Do you
> > have any ideas?
>
> Maybe, when a port joins a bridge, query member ports if they can
> forward traffic to it in hardware and based on the answer determine the
> flooding towards the CPU?
>

Ok, should this be a new ndo or some already existing mechanism? In
what level of detail does the bridge need to know what filters is the
virtual interface going to apply? Just binary yes/no? In that case,
could it only check for the netdev ops?

> >
> > > > +                             break;
> > > > +                     }
> > > > +             }
> > > > +
> > > > +             if (sw_bridging)
> > > > +                     flags |= mask;
> > > > +             else
> > > > +                     flags &= ~mask;
> > > > +
> > > > +             if (flags == p->flags)
> > > > +                     continue;
> > > > +
> > > > +             err = br_switchdev_set_port_flag(p, flags, mask);
> > > > +             if (err)
> > > > +                     return err;
> > > > +
> > > > +             p->flags = flags;
> > > > +     }
> > > > +
> > > > +     return 0;
> > > > +}
> > > > +
> > > >  int nbp_backup_change(struct net_bridge_port *p,
> > > >                     struct net_device *backup_dev)
> > > >  {
> > > > @@ -231,6 +270,7 @@ static void nbp_update_port_count(struct net_bridge *br)
> > > >               br->auto_cnt = cnt;
> > > >               br_manage_promisc(br);
> > > >       }
> > > > +     br_manage_host_flood(br);
> > > >  }
> > > >
> > > >  static void nbp_delete_promisc(struct net_bridge_port *p)
> > > > diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
> > > > index 015209bf44aa..360806ac7463 100644
> > > > --- a/net/bridge/br_switchdev.c
> > > > +++ b/net/bridge/br_switchdev.c
> > > > @@ -56,7 +56,9 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
> > > >
> > > >  /* Flags that can be offloaded to hardware */
> > > >  #define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \
> > > > -                               BR_MCAST_FLOOD | BR_BCAST_FLOOD)
> > > > +                               BR_MCAST_FLOOD | BR_BCAST_FLOOD | \
> > > > +                               BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD | \
> > > > +                               BR_HOST_BCAST_FLOOD)
> > > >
> > > >  int br_switchdev_set_port_flag(struct net_bridge_port *p,
> > > >                              unsigned long flags,
> > > > --
> > > > 2.25.1
> > > >
> >
> > Thanks,
> > -Vladimir

Thanks,
-Vladimir
Vladimir Oltean July 20, 2020, 11:08 a.m. UTC | #8
On Fri, May 22, 2020 at 08:45:34PM +0200, Allan W. Nielsen wrote:
> On 22.05.2020 16:13, Vladimir Oltean wrote:
> > EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> > 
> > On Fri, 22 May 2020 at 15:38, Nikolay Aleksandrov
> > <nikolay@cumulusnetworks.com> wrote:
> > > 
> > > On 22/05/2020 00:10, Vladimir Oltean wrote:
> > > > From: Vladimir Oltean <vladimir.oltean@nxp.com>
> > > >
> > > > In cases where the bridge is offloaded by a switchdev, there are
> > > > situations where we can optimize RX filtering towards the host. To be
> > > > precise, the host only needs to do termination, which it can do by
> > > > responding at the MAC addresses of the slave ports and of the bridge
> > > > interface itself. But most notably, it doesn't need to do forwarding,
> > > > so there is no need to see packets with unknown destination address.
> > > >
> > > > But there are, however, cases when a switchdev does need to flood to the
> > > > CPU. Such an example is when the switchdev is bridged with a foreign
> > > > interface, and since there is no offloaded datapath, packets need to
> > > > pass through the CPU. Currently this is the only identified case, but it
> > > > can be extended at any time.
> > > >
> > > > So far, switchdev implementers made driver-level assumptions, such as:
> > > > this chip is never integrated in SoCs where it can be bridged with a
> > > > foreign interface, so I'll just disable host flooding and save some CPU
> > > > cycles. Or: I can never know what else can be bridged with this
> > > > switchdev port, so I must leave host flooding enabled in any case.
> > > >
> > > > Let the bridge drive the host flooding decision, and pass it to
> > > > switchdev via the same mechanism as the external flooding flags.
> > > >
> > > > Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
> > > > ---
> > > >  include/linux/if_bridge.h |  3 +++
> > > >  net/bridge/br_if.c        | 40 +++++++++++++++++++++++++++++++++++++++
> > > >  net/bridge/br_switchdev.c |  4 +++-
> > > >  3 files changed, 46 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> > > > index b3a8d3054af0..6891a432862d 100644
> > > > --- a/include/linux/if_bridge.h
> > > > +++ b/include/linux/if_bridge.h
> > > > @@ -49,6 +49,9 @@ struct br_ip_list {
> > > >  #define BR_ISOLATED          BIT(16)
> > > >  #define BR_MRP_AWARE         BIT(17)
> > > >  #define BR_MRP_LOST_CONT     BIT(18)
> > > > +#define BR_HOST_FLOOD                BIT(19)
> > > > +#define BR_HOST_MCAST_FLOOD  BIT(20)
> > > > +#define BR_HOST_BCAST_FLOOD  BIT(21)
> > > >
> > > >  #define BR_DEFAULT_AGEING_TIME       (300 * HZ)
> > > >
> > > > diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> > > > index a0e9a7937412..aae59d1e619b 100644
> > > > --- a/net/bridge/br_if.c
> > > > +++ b/net/bridge/br_if.c
> > > > @@ -166,6 +166,45 @@ void br_manage_promisc(struct net_bridge *br)
> > > >       }
> > > >  }
> > > >
> > > > +static int br_manage_host_flood(struct net_bridge *br)
> > > > +{
> > > > +     const unsigned long mask = BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD |
> > > > +                                BR_HOST_BCAST_FLOOD;
> > > > +     struct net_bridge_port *p, *q;
> > > > +
> > > > +     list_for_each_entry(p, &br->port_list, list) {
> > > > +             unsigned long flags = p->flags;
> > > > +             bool sw_bridging = false;
> > > > +             int err;
> > > > +
> > > > +             list_for_each_entry(q, &br->port_list, list) {
> > > > +                     if (p == q)
> > > > +                             continue;
> > > > +
> > > > +                     if (!netdev_port_same_parent_id(p->dev, q->dev)) {
> > > > +                             sw_bridging = true;
> > > > +                             break;
> > > > +                     }
> > > > +             }
> > > > +
> > > > +             if (sw_bridging)
> > > > +                     flags |= mask;
> > > > +             else
> > > > +                     flags &= ~mask;
> > > > +
> > > > +             if (flags == p->flags)
> > > > +                     continue;
> > > > +
> > > > +             err = br_switchdev_set_port_flag(p, flags, mask);
> > > > +             if (err)
> > > > +                     return err;
> > > > +
> > > > +             p->flags = flags;
> > > > +     }
> > > > +
> > > > +     return 0;
> > > > +}
> > > > +
> > > >  int nbp_backup_change(struct net_bridge_port *p,
> > > >                     struct net_device *backup_dev)
> > > >  {
> > > > @@ -231,6 +270,7 @@ static void nbp_update_port_count(struct net_bridge *br)
> > > >               br->auto_cnt = cnt;
> > > >               br_manage_promisc(br);
> > > >       }
> > > > +     br_manage_host_flood(br);
> > > >  }
> > > >
> > > 
> > > Can we do this only at port add/del ?
> > > Right now it will be invoked also by br_port_flags_change() upon BR_AUTO_MASK flag change.
> > > 
> > 
> > Yes, we can do that.
> > Actually I have some doubts about BR_HOST_BCAST_FLOOD. We can't
> > disable that in the no-foreign-interface case, can we? For IPv6, it
> > looks like the stack does take care of installing dev_mc addresses for
> > the neighbor discovery protocol, but for IPv4 I guess the assumption
> > is that broadcast ARP should always be processed?
> 
> Ideally this should be per VLAN. In case of IPv4, you only need to be
> part of the broadcast domain on VLANs with an associated vlan-interface.
> 

In Ocelot, what is the mechanism to remove the CPU from the flood domain
of a particular VLAN?

I thought of 2 approaches:

- VLAN_FLOOD_DIS in ANA:ANA_TABLES:VLANTIDX. But this disables flooding
  at the level of the entire VLAN, regardless of source and destination
  ports. So it cannot be used, as it interferes with the VLANs from the
  bridge.

- Removing the CPU from VLAN_PORT_MASK. But the documentation for this
  field says:

      Frames classified to this VLAN can only be 0x3F
      sent to ports in this mask. Note that the CPU
      port module is always member of all VLANs
      and its VLAN membership can therefore not
      be configured through this mask.

So I don't think any of them works.

> > > >  static void nbp_delete_promisc(struct net_bridge_port *p)
> > > > diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
> > > > index 015209bf44aa..360806ac7463 100644
> > > > --- a/net/bridge/br_switchdev.c
> > > > +++ b/net/bridge/br_switchdev.c
> > > > @@ -56,7 +56,9 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
> > > >
> > > >  /* Flags that can be offloaded to hardware */
> > > >  #define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \
> > > > -                               BR_MCAST_FLOOD | BR_BCAST_FLOOD)
> > > > +                               BR_MCAST_FLOOD | BR_BCAST_FLOOD | \
> > > > +                               BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD | \
> > > > +                               BR_HOST_BCAST_FLOOD)
> > > >
> > > >  int br_switchdev_set_port_flag(struct net_bridge_port *p,
> > > >                              unsigned long flags,
> > > >
> > > 
> > 
> > Thanks,
> > -Vladimir
> /Allan

Thanks,
-Vladimir
Vladimir Oltean July 23, 2020, 10:35 p.m. UTC | #9
On Mon, May 25, 2020 at 11:11:11PM +0300, Ido Schimmel wrote:
> On Sun, May 24, 2020 at 07:13:46PM +0300, Vladimir Oltean wrote:
> > Hi Ido,
> > 
> > On Sun, 24 May 2020 at 17:26, Ido Schimmel <idosch@idosch.org> wrote:
> > >
> > > On Fri, May 22, 2020 at 12:10:33AM +0300, Vladimir Oltean wrote:
> > > > From: Vladimir Oltean <vladimir.oltean@nxp.com>
> > > >
> > > > In cases where the bridge is offloaded by a switchdev, there are
> > > > situations where we can optimize RX filtering towards the host. To be
> > > > precise, the host only needs to do termination, which it can do by
> > > > responding at the MAC addresses of the slave ports and of the bridge
> > > > interface itself. But most notably, it doesn't need to do forwarding,
> > > > so there is no need to see packets with unknown destination address.
> > > >
> > > > But there are, however, cases when a switchdev does need to flood to the
> > > > CPU. Such an example is when the switchdev is bridged with a foreign
> > > > interface, and since there is no offloaded datapath, packets need to
> > > > pass through the CPU. Currently this is the only identified case, but it
> > > > can be extended at any time.
> > > >
> > > > So far, switchdev implementers made driver-level assumptions, such as:
> > > > this chip is never integrated in SoCs where it can be bridged with a
> > > > foreign interface, so I'll just disable host flooding and save some CPU
> > > > cycles. Or: I can never know what else can be bridged with this
> > > > switchdev port, so I must leave host flooding enabled in any case.
> > > >
> > > > Let the bridge drive the host flooding decision, and pass it to
> > > > switchdev via the same mechanism as the external flooding flags.
> > > >
> > > > Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
> > > > ---
> > > >  include/linux/if_bridge.h |  3 +++
> > > >  net/bridge/br_if.c        | 40 +++++++++++++++++++++++++++++++++++++++
> > > >  net/bridge/br_switchdev.c |  4 +++-
> > > >  3 files changed, 46 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> > > > index b3a8d3054af0..6891a432862d 100644
> > > > --- a/include/linux/if_bridge.h
> > > > +++ b/include/linux/if_bridge.h
> > > > @@ -49,6 +49,9 @@ struct br_ip_list {
> > > >  #define BR_ISOLATED          BIT(16)
> > > >  #define BR_MRP_AWARE         BIT(17)
> > > >  #define BR_MRP_LOST_CONT     BIT(18)
> > > > +#define BR_HOST_FLOOD                BIT(19)
> > > > +#define BR_HOST_MCAST_FLOOD  BIT(20)
> > > > +#define BR_HOST_BCAST_FLOOD  BIT(21)
> > > >
> > > >  #define BR_DEFAULT_AGEING_TIME       (300 * HZ)
> > > >
> > > > diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> > > > index a0e9a7937412..aae59d1e619b 100644
> > > > --- a/net/bridge/br_if.c
> > > > +++ b/net/bridge/br_if.c
> > > > @@ -166,6 +166,45 @@ void br_manage_promisc(struct net_bridge *br)
> > > >       }
> > > >  }
> > > >
> > > > +static int br_manage_host_flood(struct net_bridge *br)
> > > > +{
> > > > +     const unsigned long mask = BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD |
> > > > +                                BR_HOST_BCAST_FLOOD;
> > > > +     struct net_bridge_port *p, *q;
> > > > +
> > > > +     list_for_each_entry(p, &br->port_list, list) {
> > > > +             unsigned long flags = p->flags;
> > > > +             bool sw_bridging = false;
> > > > +             int err;
> > > > +
> > > > +             list_for_each_entry(q, &br->port_list, list) {
> > > > +                     if (p == q)
> > > > +                             continue;
> > > > +
> > > > +                     if (!netdev_port_same_parent_id(p->dev, q->dev)) {
> > > > +                             sw_bridging = true;
> > >
> > > It's not that simple. There are cases where not all bridge slaves have
> > > the same parent ID and still there is no reason to flood traffic to the
> > > CPU. VXLAN, for example.
> > >
> > > You could argue that the VXLAN device needs to have the same parent ID
> > > as the physical netdevs member in the bridge, but it will break your
> > > data path. For example, lets assume your hardware decided to flood a
> > > packet in L2. The packet will egress all the local ports, but will also
> > > perform VXLAN encapsulation. The packet continues with the IP of the
> > > remote VTEP(s) to the underlay router and then encounters a neighbour
> > > miss exception, which sends it to the CPU for resolution.
> > >
> > > Since this exception was encountered in the router the driver would mark
> > > the packet with 'offload_fwd_mark', as it already performed L2
> > > forwarding. If the VXLAN device has the same parent ID as the physical
> > > netdevs, then the Linux bridge will never let it egress, nothing will
> > > trigger neighbour resolution and the packet will be discarded.
> > >
> > 
> > I wasn't going to argue that.
> > Ok, so with a bridged VXLAN only certain multicast DMACs corresponding
> > to multicast IPs should be flooded to the CPU.
> > Actually Allan's example was a bit simpler, he said that host flooding
> > can be made a per-VLAN flag. I'm glad that you raised this. So maybe
> > we should try to define some mechanism by which virtual interfaces can
> > specify to the bridge that they don't need to see all traffic? Do you
> > have any ideas?
> 
> Maybe, when a port joins a bridge, query member ports if they can
> forward traffic to it in hardware and based on the answer determine the
> flooding towards the CPU?
> 

Hi Ido, Allan,

I understand less and less of this. What I don't really understand is,
if you have a switchdev bridged with a vtep like this:

 +-------------------------+
 |           br0           |
 +-------------------------+
     |                |
     |           +--------+
     |           | vxlan0 |
     |           +--------+
     |                |
 +--------+      +--------+
 |  swp0  |      |  eth0  |
 +--------+      +--------+

why would the swp0 interface care about the remote_ip at all. To the
traffic seen by swp0, the VXLAN segment doesn't exist. Encapsulation and
decapsulation all happen outside of the switchdev interface. All that
switchdev sees is that, from the CPU side, it's talking to a bunch of
MAC addresses.

The same comment also applies for 8021q, in fact. I did try this
experiment, to bridge a switchdev with a VLAN sub-interface of another
port. I don't know why, I used to have the misconception that the desire
in doing that would be to somehow only extract one VLAN ID from the
switchdev, and the rest could be kept outside of the CPU's flooding
domain. But that isn't the case at all. When bridging, I'm bridging the
_entire_ traffic of swp0 with, say, eth0.100. And, as in the case of
vxlan, encap/decap all happens outside of switchdev. So, contrary to my
initial expectation, if I'm receiving on swp0 a packet tagged with VLAN
100, it would end up exiting the bridge, on eth0, with 2 VLAN tags with
ID 100.

Simply put, I think my change is fine the way it is. Either that, or I
just don't understand your comment about querying bridge members whether
they can forward in hardware. How are you dealing with this today?

Thanks,
-Vladimir
Ido Schimmel July 27, 2020, 5:15 p.m. UTC | #10
On Fri, Jul 24, 2020 at 01:35:51AM +0300, Vladimir Oltean wrote:
> On Mon, May 25, 2020 at 11:11:11PM +0300, Ido Schimmel wrote:
> > On Sun, May 24, 2020 at 07:13:46PM +0300, Vladimir Oltean wrote:
> > > Hi Ido,
> > > 
> > > On Sun, 24 May 2020 at 17:26, Ido Schimmel <idosch@idosch.org> wrote:
> > > >
> > > > On Fri, May 22, 2020 at 12:10:33AM +0300, Vladimir Oltean wrote:
> > > > > From: Vladimir Oltean <vladimir.oltean@nxp.com>
> > > > >
> > > > > In cases where the bridge is offloaded by a switchdev, there are
> > > > > situations where we can optimize RX filtering towards the host. To be
> > > > > precise, the host only needs to do termination, which it can do by
> > > > > responding at the MAC addresses of the slave ports and of the bridge
> > > > > interface itself. But most notably, it doesn't need to do forwarding,
> > > > > so there is no need to see packets with unknown destination address.
> > > > >
> > > > > But there are, however, cases when a switchdev does need to flood to the
> > > > > CPU. Such an example is when the switchdev is bridged with a foreign
> > > > > interface, and since there is no offloaded datapath, packets need to
> > > > > pass through the CPU. Currently this is the only identified case, but it
> > > > > can be extended at any time.
> > > > >
> > > > > So far, switchdev implementers made driver-level assumptions, such as:
> > > > > this chip is never integrated in SoCs where it can be bridged with a
> > > > > foreign interface, so I'll just disable host flooding and save some CPU
> > > > > cycles. Or: I can never know what else can be bridged with this
> > > > > switchdev port, so I must leave host flooding enabled in any case.
> > > > >
> > > > > Let the bridge drive the host flooding decision, and pass it to
> > > > > switchdev via the same mechanism as the external flooding flags.
> > > > >
> > > > > Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
> > > > > ---
> > > > >  include/linux/if_bridge.h |  3 +++
> > > > >  net/bridge/br_if.c        | 40 +++++++++++++++++++++++++++++++++++++++
> > > > >  net/bridge/br_switchdev.c |  4 +++-
> > > > >  3 files changed, 46 insertions(+), 1 deletion(-)
> > > > >
> > > > > diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> > > > > index b3a8d3054af0..6891a432862d 100644
> > > > > --- a/include/linux/if_bridge.h
> > > > > +++ b/include/linux/if_bridge.h
> > > > > @@ -49,6 +49,9 @@ struct br_ip_list {
> > > > >  #define BR_ISOLATED          BIT(16)
> > > > >  #define BR_MRP_AWARE         BIT(17)
> > > > >  #define BR_MRP_LOST_CONT     BIT(18)
> > > > > +#define BR_HOST_FLOOD                BIT(19)
> > > > > +#define BR_HOST_MCAST_FLOOD  BIT(20)
> > > > > +#define BR_HOST_BCAST_FLOOD  BIT(21)
> > > > >
> > > > >  #define BR_DEFAULT_AGEING_TIME       (300 * HZ)
> > > > >
> > > > > diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> > > > > index a0e9a7937412..aae59d1e619b 100644
> > > > > --- a/net/bridge/br_if.c
> > > > > +++ b/net/bridge/br_if.c
> > > > > @@ -166,6 +166,45 @@ void br_manage_promisc(struct net_bridge *br)
> > > > >       }
> > > > >  }
> > > > >
> > > > > +static int br_manage_host_flood(struct net_bridge *br)
> > > > > +{
> > > > > +     const unsigned long mask = BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD |
> > > > > +                                BR_HOST_BCAST_FLOOD;
> > > > > +     struct net_bridge_port *p, *q;
> > > > > +
> > > > > +     list_for_each_entry(p, &br->port_list, list) {
> > > > > +             unsigned long flags = p->flags;
> > > > > +             bool sw_bridging = false;
> > > > > +             int err;
> > > > > +
> > > > > +             list_for_each_entry(q, &br->port_list, list) {
> > > > > +                     if (p == q)
> > > > > +                             continue;
> > > > > +
> > > > > +                     if (!netdev_port_same_parent_id(p->dev, q->dev)) {
> > > > > +                             sw_bridging = true;
> > > >
> > > > It's not that simple. There are cases where not all bridge slaves have
> > > > the same parent ID and still there is no reason to flood traffic to the
> > > > CPU. VXLAN, for example.
> > > >
> > > > You could argue that the VXLAN device needs to have the same parent ID
> > > > as the physical netdevs member in the bridge, but it will break your
> > > > data path. For example, lets assume your hardware decided to flood a
> > > > packet in L2. The packet will egress all the local ports, but will also
> > > > perform VXLAN encapsulation. The packet continues with the IP of the
> > > > remote VTEP(s) to the underlay router and then encounters a neighbour
> > > > miss exception, which sends it to the CPU for resolution.
> > > >
> > > > Since this exception was encountered in the router the driver would mark
> > > > the packet with 'offload_fwd_mark', as it already performed L2
> > > > forwarding. If the VXLAN device has the same parent ID as the physical
> > > > netdevs, then the Linux bridge will never let it egress, nothing will
> > > > trigger neighbour resolution and the packet will be discarded.
> > > >
> > > 
> > > I wasn't going to argue that.
> > > Ok, so with a bridged VXLAN only certain multicast DMACs corresponding
> > > to multicast IPs should be flooded to the CPU.
> > > Actually Allan's example was a bit simpler, he said that host flooding
> > > can be made a per-VLAN flag. I'm glad that you raised this. So maybe
> > > we should try to define some mechanism by which virtual interfaces can
> > > specify to the bridge that they don't need to see all traffic? Do you
> > > have any ideas?
> > 
> > Maybe, when a port joins a bridge, query member ports if they can
> > forward traffic to it in hardware and based on the answer determine the
> > flooding towards the CPU?
> > 
> 
> Hi Ido, Allan,
> 
> I understand less and less of this. What I don't really understand is,
> if you have a switchdev bridged with a vtep like this:
> 
>  +-------------------------+
>  |           br0           |
>  +-------------------------+
>      |                |
>      |           +--------+
>      |           | vxlan0 |
>      |           +--------+
>      |                |
>  +--------+      +--------+
>  |  swp0  |      |  eth0  |
>  +--------+      +--------+
> 
> why would the swp0 interface care about the remote_ip at all. To the
> traffic seen by swp0, the VXLAN segment doesn't exist. Encapsulation and
> decapsulation all happen outside of the switchdev interface. All that
> switchdev sees is that, from the CPU side, it's talking to a bunch of
> MAC addresses.

I don't understand "Encapsulation and decapsulation all happen outside
of the switchdev interface". What does it mean? Encapsulation and
decapsulation happen in hardware... Frame is received by swp0, forwarded
to hardware VTEP, encapsulated and routed towards its destination. The
CPU does not see the packet. Same with decapsulation.

You patch instructs drivers to flood traffic to the CPU if netdevs with
different parent ID are member in it. I explained that this breaks with
VXLAN.

swp0 and vxlan0 do not have the same parent ID yet this does not mean
packets should be flooded to the CPU. I also explained why they should
not have the same parent ID:

1. Packet is received by swp0
2. Forwarded in hardware to hardware VTEP
3. VTEP performs encapsulation
4. VXLAN packet is routed in hardware
5. VXLAN packet encounters a neighbour miss in router
6. Original packet is trapped to CPU from swp0 because of an unresolved
neighbour
7. Driver marks packet with 'offload_fwd_mark' because it was already L2
forwarded in hardware
8. Packet reaches software bridge
9. bridge does not forward it to swpX netdevs because they share the
same parent ID as swp0 and packet is marked
10. Packet is forwarded to vxlan0
11. Packet is routed in and neighbour is resolved

Since the neighbour is now resolved the next packet will be completely
forwarded in hardware.

If vxlan0 and swp0 had the same parent ID, then step 10 would never
happen and the neighbour would never be resolved.

> 
> The same comment also applies for 8021q, in fact. I did try this
> experiment, to bridge a switchdev with a VLAN sub-interface of another
> port. I don't know why, I used to have the misconception that the desire
> in doing that would be to somehow only extract one VLAN ID from the
> switchdev, and the rest could be kept outside of the CPU's flooding
> domain. But that isn't the case at all. When bridging, I'm bridging the
> _entire_ traffic of swp0 with, say, eth0.100. And, as in the case of
> vxlan, encap/decap all happens outside of switchdev. So, contrary to my
> initial expectation, if I'm receiving on swp0 a packet tagged with VLAN
> 100, it would end up exiting the bridge, on eth0, with 2 VLAN tags with
> ID 100.
> 
> Simply put, I think my change is fine the way it is. Either that, or I
> just don't understand your comment about querying bridge members whether
> they can forward in hardware. How are you dealing with this today?
> 
> Thanks,
> -Vladimir
diff mbox series

Patch

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index b3a8d3054af0..6891a432862d 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -49,6 +49,9 @@  struct br_ip_list {
 #define BR_ISOLATED		BIT(16)
 #define BR_MRP_AWARE		BIT(17)
 #define BR_MRP_LOST_CONT	BIT(18)
+#define BR_HOST_FLOOD		BIT(19)
+#define BR_HOST_MCAST_FLOOD	BIT(20)
+#define BR_HOST_BCAST_FLOOD	BIT(21)
 
 #define BR_DEFAULT_AGEING_TIME	(300 * HZ)
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a0e9a7937412..aae59d1e619b 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -166,6 +166,45 @@  void br_manage_promisc(struct net_bridge *br)
 	}
 }
 
+static int br_manage_host_flood(struct net_bridge *br)
+{
+	const unsigned long mask = BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD |
+				   BR_HOST_BCAST_FLOOD;
+	struct net_bridge_port *p, *q;
+
+	list_for_each_entry(p, &br->port_list, list) {
+		unsigned long flags = p->flags;
+		bool sw_bridging = false;
+		int err;
+
+		list_for_each_entry(q, &br->port_list, list) {
+			if (p == q)
+				continue;
+
+			if (!netdev_port_same_parent_id(p->dev, q->dev)) {
+				sw_bridging = true;
+				break;
+			}
+		}
+
+		if (sw_bridging)
+			flags |= mask;
+		else
+			flags &= ~mask;
+
+		if (flags == p->flags)
+			continue;
+
+		err = br_switchdev_set_port_flag(p, flags, mask);
+		if (err)
+			return err;
+
+		p->flags = flags;
+	}
+
+	return 0;
+}
+
 int nbp_backup_change(struct net_bridge_port *p,
 		      struct net_device *backup_dev)
 {
@@ -231,6 +270,7 @@  static void nbp_update_port_count(struct net_bridge *br)
 		br->auto_cnt = cnt;
 		br_manage_promisc(br);
 	}
+	br_manage_host_flood(br);
 }
 
 static void nbp_delete_promisc(struct net_bridge_port *p)
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 015209bf44aa..360806ac7463 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -56,7 +56,9 @@  bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
 
 /* Flags that can be offloaded to hardware */
 #define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \
-				  BR_MCAST_FLOOD | BR_BCAST_FLOOD)
+				  BR_MCAST_FLOOD | BR_BCAST_FLOOD | \
+				  BR_HOST_FLOOD | BR_HOST_MCAST_FLOOD | \
+				  BR_HOST_BCAST_FLOOD)
 
 int br_switchdev_set_port_flag(struct net_bridge_port *p,
 			       unsigned long flags,