diff mbox

[RFC,1/2] net: dsa: integrate with SWITCHDEV for HW bridging

Message ID 1424201196-4901-2-git-send-email-f.fainelli@gmail.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Florian Fainelli Feb. 17, 2015, 7:26 p.m. UTC
In order to support bridging offloads in DSA switch drivers, select
NET_SWITCHDEV to get access to the port_stp_update and parent_get_id
NDOs that we are required to implement.

To facilitate the integratation at the DSA driver level, we implement 3
types of operations:

- port_join_bridge
- port_leave_bridge
- port_stp_update

DSA will resolve which switch ports that are currently bridge port
members as some Switch hardware/drivers need to know about that to limit
the register programming to just the relevant registers (especially for
slow MDIO buses).

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 include/net/dsa.h  |  10 +++++
 net/dsa/Kconfig    |   1 +
 net/dsa/dsa.c      |   7 ++++
 net/dsa/dsa_priv.h |   2 +
 net/dsa/slave.c    | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 137 insertions(+)

Comments

Guenter Roeck Feb. 17, 2015, 8:13 p.m. UTC | #1
On 02/17/2015 11:26 AM, Florian Fainelli wrote:
> In order to support bridging offloads in DSA switch drivers, select
> NET_SWITCHDEV to get access to the port_stp_update and parent_get_id
> NDOs that we are required to implement.
>
> To facilitate the integratation at the DSA driver level, we implement 3
> types of operations:
>
> - port_join_bridge
> - port_leave_bridge
> - port_stp_update
>
> DSA will resolve which switch ports that are currently bridge port
> members as some Switch hardware/drivers need to know about that to limit
> the register programming to just the relevant registers (especially for
> slow MDIO buses).
>
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
> ---
>   include/net/dsa.h  |  10 +++++
>   net/dsa/Kconfig    |   1 +
>   net/dsa/dsa.c      |   7 ++++
>   net/dsa/dsa_priv.h |   2 +
>   net/dsa/slave.c    | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>   5 files changed, 137 insertions(+)
>
> diff --git a/include/net/dsa.h b/include/net/dsa.h
> index ed3c34bbb67a..92be34791963 100644
> --- a/include/net/dsa.h
> +++ b/include/net/dsa.h
> @@ -275,6 +275,16 @@ struct dsa_switch_driver {
>   	int	(*get_regs_len)(struct dsa_switch *ds, int port);
>   	void	(*get_regs)(struct dsa_switch *ds, int port,
>   			    struct ethtool_regs *regs, void *p);
> +
> +	/*
> +	 * Bridge integration
> +	 */
> +	int	(*port_join_bridge)(struct dsa_switch *ds, int port,
> +				    u32 br_port_mask);
> +	int	(*port_leave_bridge)(struct dsa_switch *ds, int port,
> +				     u32 br_port_mask);
> +	int	(*port_stp_update)(struct dsa_switch *ds, int port,
> +				   u8 state);
>   };
>
>   void register_switch_driver(struct dsa_switch_driver *type);
> diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
> index 5f8ac404535b..b45206e8dd3e 100644
> --- a/net/dsa/Kconfig
> +++ b/net/dsa/Kconfig
> @@ -8,6 +8,7 @@ config NET_DSA
>   	tristate
>   	depends on HAVE_NET_DSA
>   	select PHYLIB
> +	select NET_SWITCHDEV

Should this be "select" or "depends on" ?

Downside of depends is that we'll need some ifdefs in the code,
but on the other side it would let people disable it if it is
not needed.

Thanks,
Guenter

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florian Fainelli Feb. 17, 2015, 8:24 p.m. UTC | #2
On 17/02/15 12:13, Guenter Roeck wrote:
> On 02/17/2015 11:26 AM, Florian Fainelli wrote:
>> In order to support bridging offloads in DSA switch drivers, select
>> NET_SWITCHDEV to get access to the port_stp_update and parent_get_id
>> NDOs that we are required to implement.
>>
>> To facilitate the integratation at the DSA driver level, we implement 3
>> types of operations:
>>
>> - port_join_bridge
>> - port_leave_bridge
>> - port_stp_update
>>
>> DSA will resolve which switch ports that are currently bridge port
>> members as some Switch hardware/drivers need to know about that to limit
>> the register programming to just the relevant registers (especially for
>> slow MDIO buses).
>>
>> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
>> ---
>>   include/net/dsa.h  |  10 +++++
>>   net/dsa/Kconfig    |   1 +
>>   net/dsa/dsa.c      |   7 ++++
>>   net/dsa/dsa_priv.h |   2 +
>>   net/dsa/slave.c    | 117
>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   5 files changed, 137 insertions(+)
>>
>> diff --git a/include/net/dsa.h b/include/net/dsa.h
>> index ed3c34bbb67a..92be34791963 100644
>> --- a/include/net/dsa.h
>> +++ b/include/net/dsa.h
>> @@ -275,6 +275,16 @@ struct dsa_switch_driver {
>>       int    (*get_regs_len)(struct dsa_switch *ds, int port);
>>       void    (*get_regs)(struct dsa_switch *ds, int port,
>>                   struct ethtool_regs *regs, void *p);
>> +
>> +    /*
>> +     * Bridge integration
>> +     */
>> +    int    (*port_join_bridge)(struct dsa_switch *ds, int port,
>> +                    u32 br_port_mask);
>> +    int    (*port_leave_bridge)(struct dsa_switch *ds, int port,
>> +                     u32 br_port_mask);
>> +    int    (*port_stp_update)(struct dsa_switch *ds, int port,
>> +                   u8 state);
>>   };
>>
>>   void register_switch_driver(struct dsa_switch_driver *type);
>> diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
>> index 5f8ac404535b..b45206e8dd3e 100644
>> --- a/net/dsa/Kconfig
>> +++ b/net/dsa/Kconfig
>> @@ -8,6 +8,7 @@ config NET_DSA
>>       tristate
>>       depends on HAVE_NET_DSA
>>       select PHYLIB
>> +    select NET_SWITCHDEV
> 
> Should this be "select" or "depends on" ?
> 
> Downside of depends is that we'll need some ifdefs in the code,
> but on the other side it would let people disable it if it is
> not needed.

The code overhead is not huge, and I would think that by enforcing
NET_SWITCHDEV we encourage better DSA driver practices and promote HW
bridging, if you think this should be made conditional, I guess we can
do that.
Guenter Roeck Feb. 17, 2015, 8:28 p.m. UTC | #3
On 02/17/2015 12:24 PM, Florian Fainelli wrote:
> On 17/02/15 12:13, Guenter Roeck wrote:
>> On 02/17/2015 11:26 AM, Florian Fainelli wrote:
>>> In order to support bridging offloads in DSA switch drivers, select
>>> NET_SWITCHDEV to get access to the port_stp_update and parent_get_id
>>> NDOs that we are required to implement.
>>>
>>> To facilitate the integratation at the DSA driver level, we implement 3
>>> types of operations:
>>>
>>> - port_join_bridge
>>> - port_leave_bridge
>>> - port_stp_update
>>>
>>> DSA will resolve which switch ports that are currently bridge port
>>> members as some Switch hardware/drivers need to know about that to limit
>>> the register programming to just the relevant registers (especially for
>>> slow MDIO buses).
>>>
>>> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
>>> ---
>>>    include/net/dsa.h  |  10 +++++
>>>    net/dsa/Kconfig    |   1 +
>>>    net/dsa/dsa.c      |   7 ++++
>>>    net/dsa/dsa_priv.h |   2 +
>>>    net/dsa/slave.c    | 117
>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>    5 files changed, 137 insertions(+)
>>>
>>> diff --git a/include/net/dsa.h b/include/net/dsa.h
>>> index ed3c34bbb67a..92be34791963 100644
>>> --- a/include/net/dsa.h
>>> +++ b/include/net/dsa.h
>>> @@ -275,6 +275,16 @@ struct dsa_switch_driver {
>>>        int    (*get_regs_len)(struct dsa_switch *ds, int port);
>>>        void    (*get_regs)(struct dsa_switch *ds, int port,
>>>                    struct ethtool_regs *regs, void *p);
>>> +
>>> +    /*
>>> +     * Bridge integration
>>> +     */
>>> +    int    (*port_join_bridge)(struct dsa_switch *ds, int port,
>>> +                    u32 br_port_mask);
>>> +    int    (*port_leave_bridge)(struct dsa_switch *ds, int port,
>>> +                     u32 br_port_mask);
>>> +    int    (*port_stp_update)(struct dsa_switch *ds, int port,
>>> +                   u8 state);
>>>    };
>>>
>>>    void register_switch_driver(struct dsa_switch_driver *type);
>>> diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
>>> index 5f8ac404535b..b45206e8dd3e 100644
>>> --- a/net/dsa/Kconfig
>>> +++ b/net/dsa/Kconfig
>>> @@ -8,6 +8,7 @@ config NET_DSA
>>>        tristate
>>>        depends on HAVE_NET_DSA
>>>        select PHYLIB
>>> +    select NET_SWITCHDEV
>>
>> Should this be "select" or "depends on" ?
>>
>> Downside of depends is that we'll need some ifdefs in the code,
>> but on the other side it would let people disable it if it is
>> not needed.
>
> The code overhead is not huge, and I would think that by enforcing
> NET_SWITCHDEV we encourage better DSA driver practices and promote HW
> bridging, if you think this should be made conditional, I guess we can
> do that.
>

For sure not me ... I am happy forcing it. In my use case it would
always be enabled. I just don't want to go too far along that route
just to have to add a bunch of ifdefs later on.

Guenter

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andrew Lunn Feb. 18, 2015, 1:19 a.m. UTC | #4
> +/* Return a bitmask of all ports being currently bridged. Note that on
> + * leave, the mask will still return the bitmask of ports currently bridged,
> + * prior to port removal, and this is exactly what we want.
> + */
> +static u32 dsa_slave_br_port_mask(struct dsa_switch *ds)
> +{
> +	unsigned int port;
> +	u32 mask = 0;
> +
> +	for (port = 0; port < DSA_MAX_PORTS; port++) {
> +		if (!((1 << port) & ds->phys_port_mask))
> +			continue;
> +
> +		if (ds->ports[port]->priv_flags & IFF_BRIDGE_PORT)
> +			mask |= 1 << port;
> +	}
> +
> +       return mask;
> +}
> +
> +static int dsa_slave_bridge_port_join(struct net_device *dev,
> +				      struct net_device *bridge)
> +{
> +	struct dsa_slave_priv *p = netdev_priv(dev);
> +	struct dsa_switch *ds = p->parent;
> +	int ret = -EOPNOTSUPP;
> +
> +	if (ds->drv->port_join_bridge)
> +		ret = ds->drv->port_join_bridge(ds, p->port,
> +						dsa_slave_br_port_mask(ds));

Hi Florian

Shouldn't this bridge port mask also be dependent on bridge?

I'm thinking of cases like

brctl addbr br0
brctl addif br0 lan0
brctl addif br0 lan1

brctl addbr br1
brctl addif br1 lan2
brctl addif br1 lan3

We have two software bridges, so need two masks.  It does look like
your hardware and the Marvell hardware supports this, disjoint sets of
bridged ports.  But with the current implementation, your going to end
up with one hardware bridge with four ports, and broken STP.

       Andrew
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florian Fainelli Feb. 18, 2015, 1:43 a.m. UTC | #5
On 17/02/15 17:19, Andrew Lunn wrote:
>> +/* Return a bitmask of all ports being currently bridged. Note that on
>> + * leave, the mask will still return the bitmask of ports currently bridged,
>> + * prior to port removal, and this is exactly what we want.
>> + */
>> +static u32 dsa_slave_br_port_mask(struct dsa_switch *ds)
>> +{
>> +	unsigned int port;
>> +	u32 mask = 0;
>> +
>> +	for (port = 0; port < DSA_MAX_PORTS; port++) {
>> +		if (!((1 << port) & ds->phys_port_mask))
>> +			continue;
>> +
>> +		if (ds->ports[port]->priv_flags & IFF_BRIDGE_PORT)
>> +			mask |= 1 << port;
>> +	}
>> +
>> +       return mask;
>> +}
>> +
>> +static int dsa_slave_bridge_port_join(struct net_device *dev,
>> +				      struct net_device *bridge)
>> +{
>> +	struct dsa_slave_priv *p = netdev_priv(dev);
>> +	struct dsa_switch *ds = p->parent;
>> +	int ret = -EOPNOTSUPP;
>> +
>> +	if (ds->drv->port_join_bridge)
>> +		ret = ds->drv->port_join_bridge(ds, p->port,
>> +						dsa_slave_br_port_mask(ds));
> 
> Hi Florian
> 
> Shouldn't this bridge port mask also be dependent on bridge?

Yes, you are very right, thankfully this is a RFC patch because of that ;)

> 
> I'm thinking of cases like
> 
> brctl addbr br0
> brctl addif br0 lan0
> brctl addif br0 lan1
> 
> brctl addbr br1
> brctl addif br1 lan2

mask = 0x4 | 0x2 | 0x1 # FAIL

> brctl addif br1 lan3

mask = 0x8 | 0x4 | 0x2 | 0x1 # FAIL

> 
> We have two software bridges, so need two masks.  It does look like
> your hardware and the Marvell hardware supports this, disjoint sets of
> bridged ports.  But with the current implementation, your going to end
> up with one hardware bridge with four ports, and broken STP.

Yep, I will rework that patch set to address that, and I can actually
test that, which is even better, thanks!
Guenter Roeck Feb. 18, 2015, 3:53 a.m. UTC | #6
On 02/17/2015 05:19 PM, Andrew Lunn wrote:
>> +/* Return a bitmask of all ports being currently bridged. Note that on
>> + * leave, the mask will still return the bitmask of ports currently bridged,
>> + * prior to port removal, and this is exactly what we want.
>> + */
>> +static u32 dsa_slave_br_port_mask(struct dsa_switch *ds)
>> +{
>> +	unsigned int port;
>> +	u32 mask = 0;
>> +
>> +	for (port = 0; port < DSA_MAX_PORTS; port++) {
>> +		if (!((1 << port) & ds->phys_port_mask))
>> +			continue;
>> +
>> +		if (ds->ports[port]->priv_flags & IFF_BRIDGE_PORT)
>> +			mask |= 1 << port;
>> +	}
>> +
>> +       return mask;
>> +}
>> +
>> +static int dsa_slave_bridge_port_join(struct net_device *dev,
>> +				      struct net_device *bridge)
>> +{
>> +	struct dsa_slave_priv *p = netdev_priv(dev);
>> +	struct dsa_switch *ds = p->parent;
>> +	int ret = -EOPNOTSUPP;
>> +
>> +	if (ds->drv->port_join_bridge)
>> +		ret = ds->drv->port_join_bridge(ds, p->port,
>> +						dsa_slave_br_port_mask(ds));
>
> Hi Florian
>
> Shouldn't this bridge port mask also be dependent on bridge?
>
> I'm thinking of cases like
>
> brctl addbr br0
> brctl addif br0 lan0
> brctl addif br0 lan1
>
> brctl addbr br1
> brctl addif br1 lan2
> brctl addif br1 lan3
>
> We have two software bridges, so need two masks.  It does look like
> your hardware and the Marvell hardware supports this, disjoint sets of
> bridged ports.  But with the current implementation, your going to end
> up with one hardware bridge with four ports, and broken STP.
>

Same problem here.

With the code above I see the ports which are part of "a" bridge group,
but I can not determine which group the port is supposed to join.

I don't really see the value in providing the port mask to
port_join_bridge, but maybe I am missing something. In my
implementation I just passed on bridge * and used it to
determine which port belongs to which bridge group.
It doesn't have to be that, but maybe we can use the bridge
ifindex or anything else that lets me determine which bridge
group the port belongs to.

Guenter

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florian Fainelli Feb. 18, 2015, 4:53 a.m. UTC | #7
2015-02-17 19:53 GMT-08:00 Guenter Roeck <linux@roeck-us.net>:
> On 02/17/2015 05:19 PM, Andrew Lunn wrote:
>>>
>>> +/* Return a bitmask of all ports being currently bridged. Note that on
>>> + * leave, the mask will still return the bitmask of ports currently
>>> bridged,
>>> + * prior to port removal, and this is exactly what we want.
>>> + */
>>> +static u32 dsa_slave_br_port_mask(struct dsa_switch *ds)
>>> +{
>>> +       unsigned int port;
>>> +       u32 mask = 0;
>>> +
>>> +       for (port = 0; port < DSA_MAX_PORTS; port++) {
>>> +               if (!((1 << port) & ds->phys_port_mask))
>>> +                       continue;
>>> +
>>> +               if (ds->ports[port]->priv_flags & IFF_BRIDGE_PORT)
>>> +                       mask |= 1 << port;
>>> +       }
>>> +
>>> +       return mask;
>>> +}
>>> +
>>> +static int dsa_slave_bridge_port_join(struct net_device *dev,
>>> +                                     struct net_device *bridge)
>>> +{
>>> +       struct dsa_slave_priv *p = netdev_priv(dev);
>>> +       struct dsa_switch *ds = p->parent;
>>> +       int ret = -EOPNOTSUPP;
>>> +
>>> +       if (ds->drv->port_join_bridge)
>>> +               ret = ds->drv->port_join_bridge(ds, p->port,
>>> +
>>> dsa_slave_br_port_mask(ds));
>>
>>
>> Hi Florian
>>
>> Shouldn't this bridge port mask also be dependent on bridge?
>>
>> I'm thinking of cases like
>>
>> brctl addbr br0
>> brctl addif br0 lan0
>> brctl addif br0 lan1
>>
>> brctl addbr br1
>> brctl addif br1 lan2
>> brctl addif br1 lan3
>>
>> We have two software bridges, so need two masks.  It does look like
>> your hardware and the Marvell hardware supports this, disjoint sets of
>> bridged ports.  But with the current implementation, your going to end
>> up with one hardware bridge with four ports, and broken STP.
>>
>
> Same problem here.
>
> With the code above I see the ports which are part of "a" bridge group,
> but I can not determine which group the port is supposed to join.

I have put a v2 here which addresses that by retaining which bridge
the port was added to and comparing that against the bridge net_device
we want to join:

https://github.com/ffainelli/linux/tree/dsa-hw-bridge-v2

>
> I don't really see the value in providing the port mask to
> port_join_bridge, but maybe I am missing something. In my
> implementation I just passed on bridge * and used it to
> determine which port belongs to which bridge group.
> It doesn't have to be that, but maybe we can use the bridge
> ifindex or anything else that lets me determine which bridge
> group the port belongs to.

As I described in the cover letter, I tend to think that resolving
this bitmask is part of the abstraction DSA should provide to its
driver, maybe it is more future proof and better to give access to the
bridge net_device pointer such that drivers can figure out the bridge
details themselves. With that in mind, maybe we can do something like
this:

- add the bridge net_device pointer to the join/leave callbacks
- provide a helper like dsa_slave_br_port_mask that drivers use or not
Guenter Roeck Feb. 18, 2015, 6:14 a.m. UTC | #8
On 02/17/2015 08:53 PM, Florian Fainelli wrote:
> 2015-02-17 19:53 GMT-08:00 Guenter Roeck <linux@roeck-us.net>:
[...]
>
> I have put a v2 here which addresses that by retaining which bridge
> the port was added to and comparing that against the bridge net_device
> we want to join:
>
> https://github.com/ffainelli/linux/tree/dsa-hw-bridge-v2
>
>>
>> I don't really see the value in providing the port mask to
>> port_join_bridge, but maybe I am missing something. In my
>> implementation I just passed on bridge * and used it to
>> determine which port belongs to which bridge group.
>> It doesn't have to be that, but maybe we can use the bridge
>> ifindex or anything else that lets me determine which bridge
>> group the port belongs to.
>
> As I described in the cover letter, I tend to think that resolving
> this bitmask is part of the abstraction DSA should provide to its
> driver, maybe it is more future proof and better to give access to the
> bridge net_device pointer such that drivers can figure out the bridge
> details themselves. With that in mind, maybe we can do something like
> this:
>
> - add the bridge net_device pointer to the join/leave callbacks
> - provide a helper like dsa_slave_br_port_mask that drivers use or not
>
Not sure if that is needed.

At first glance v2 should do it. I only use the bridge pointer
to create a set of bit masks, one for each bridge group. This is
pretty much what you do now. So I should have the information I need,
and it should actually simplify my code to some degree.

Unfortunately I won't be able to work on this for the next few days,
since I'll be at the Linux Collaboration Summit in Santa Rosa.

Guenter

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andrew Lunn Feb. 18, 2015, 1:49 p.m. UTC | #9
> I have put a v2 here which addresses that by retaining which bridge
> the port was added to and comparing that against the bridge net_device
> we want to join:
> 
> https://github.com/ffainelli/linux/tree/dsa-hw-bridge-v2

Hi Florian

This looks O.K.

If we ever get a switch which does not allow disjoint sets of ports,
it will need some changes, but lets not over engineer it now.

   Andrew
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Guenter Roeck Feb. 18, 2015, 2:05 p.m. UTC | #10
On 02/18/2015 05:49 AM, Andrew Lunn wrote:
>> I have put a v2 here which addresses that by retaining which bridge
>> the port was added to and comparing that against the bridge net_device
>> we want to join:
>>
>> https://github.com/ffainelli/linux/tree/dsa-hw-bridge-v2
>
> Hi Florian
>
> This looks O.K.
>
> If we ever get a switch which does not allow disjoint sets of ports,
> it will need some changes, but lets not over engineer it now.
>

Good point. The driver for that chip would have to detect the situation,
maybe by storing the bridge mask, and reject to configure the port
if there is a bridge mask mismatch. So I think this could work with the
current API.

Guenter

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Guenter Roeck Feb. 23, 2015, 2:20 a.m. UTC | #11
On 02/17/2015 11:26 AM, Florian Fainelli wrote:
> In order to support bridging offloads in DSA switch drivers, select
> NET_SWITCHDEV to get access to the port_stp_update and parent_get_id
> NDOs that we are required to implement.
>
> To facilitate the integratation at the DSA driver level, we implement 3
> types of operations:
>

Hi Florian,

>
> +/* Return a bitmask of all ports being currently bridged. Note that on
> + * leave, the mask will still return the bitmask of ports currently bridged,
> + * prior to port removal, and this is exactly what we want.
> + */
> +static u32 dsa_slave_br_port_mask(struct dsa_switch *ds)
> +{
> +	unsigned int port;
> +	u32 mask = 0;
> +
> +	for (port = 0; port < DSA_MAX_PORTS; port++) {
> +		if (!((1 << port) & ds->phys_port_mask))
> +			continue;
> +
> +		if (ds->ports[port]->priv_flags & IFF_BRIDGE_PORT)
> +			mask |= 1 << port;

Problem is that the function can be called through dsa_slave_netdevice_event
before the slave devices are fully initialized.

After adding

+               if (!ds->ports[port]) {
+                       netdev_err(bridge,
+                                  "No ports data for port %d, mask=0x%x\n",
+                                  port, ds->phys_port_mask);
+                       continue;
+               }

and with some more debug messages added to dsa_switch_setup(), I see the following.

[   14.187290] e1000e 0000:00:19.0 em1: [0]: Creating slave device for port 1(port1)
[   14.272605] e1000e 0000:00:19.0 em1: [0]: Creating slave device for port 2(port2)
[   14.353118] e1000e 0000:00:19.0 em1: [0]: Creating slave device for port 3(port3)
[   14.472002] br0: No ports data for port 3, mask=0x1e
[   14.472053] br0: No ports data for port 4, mask=0x1e
[   14.472753] e1000e 0000:00:19.0 em1: [0]: Creating slave device for port 4(host2esb)

This happens if I add the bridge configuration to /etc/network/interfaces instead
of creating the bridge manually. Apparently dsa_switch_setup() is not yet complete
when dsa_slave_netdevice_event is executed to handle a state change on one of its
newly created slave interfaces.

The relevant information from /etc/network/interfaces is:

auto br0

iface port1 inet manual
iface port2 inet manual

iface br0 inet dhcp
	bridge_ports port1 port2
	up ifconfig br0 mtu 1492

Any idea how to best address this ?

Thanks,
Guenter

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andrew Lunn Feb. 23, 2015, 3:14 a.m. UTC | #12
On Sun, Feb 22, 2015 at 06:20:44PM -0800, Guenter Roeck wrote:
> On 02/17/2015 11:26 AM, Florian Fainelli wrote:
> >In order to support bridging offloads in DSA switch drivers, select
> >NET_SWITCHDEV to get access to the port_stp_update and parent_get_id
> >NDOs that we are required to implement.
> >
> >To facilitate the integratation at the DSA driver level, we implement 3
> >types of operations:
> >
> 
> Hi Florian,
> 
> >
> >+/* Return a bitmask of all ports being currently bridged. Note that on
> >+ * leave, the mask will still return the bitmask of ports currently bridged,
> >+ * prior to port removal, and this is exactly what we want.
> >+ */
> >+static u32 dsa_slave_br_port_mask(struct dsa_switch *ds)
> >+{
> >+	unsigned int port;
> >+	u32 mask = 0;
> >+
> >+	for (port = 0; port < DSA_MAX_PORTS; port++) {
> >+		if (!((1 << port) & ds->phys_port_mask))
> >+			continue;
> >+
> >+		if (ds->ports[port]->priv_flags & IFF_BRIDGE_PORT)
> >+			mask |= 1 << port;
> 
> Problem is that the function can be called through dsa_slave_netdevice_event
> before the slave devices are fully initialized.
> 
> After adding
> 
> +               if (!ds->ports[port]) {
> +                       netdev_err(bridge,
> +                                  "No ports data for port %d, mask=0x%x\n",
> +                                  port, ds->phys_port_mask);
> +                       continue;
> +               }
> 
> and with some more debug messages added to dsa_switch_setup(), I see the following.
> 
> [   14.187290] e1000e 0000:00:19.0 em1: [0]: Creating slave device for port 1(port1)
> [   14.272605] e1000e 0000:00:19.0 em1: [0]: Creating slave device for port 2(port2)
> [   14.353118] e1000e 0000:00:19.0 em1: [0]: Creating slave device for port 3(port3)
> [   14.472002] br0: No ports data for port 3, mask=0x1e
> [   14.472053] br0: No ports data for port 4, mask=0x1e
> [   14.472753] e1000e 0000:00:19.0 em1: [0]: Creating slave device for port 4(host2esb)
> 
> This happens if I add the bridge configuration to /etc/network/interfaces instead
> of creating the bridge manually. Apparently dsa_switch_setup() is not yet complete
> when dsa_slave_netdevice_event is executed to handle a state change on one of its
> newly created slave interfaces.
> 
> The relevant information from /etc/network/interfaces is:
> 
> auto br0
> 
> iface port1 inet manual
> iface port2 inet manual
> 
> iface br0 inet dhcp
> 	bridge_ports port1 port2

Hi Guenter

Does this actually matter? The ports which don't exists yet are not
being added to the bridge. The mask will come out correct. What
happens when port4 is made a member of the bridge? I expect it
works. It is the creation of the interface which triggers hotplug to
read interfaces and add the interface to the port.

It might however be a good idea to document somewhere that each
interface should be configured without assuming anything about the
state of other interfaces.

     Andrew
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Guenter Roeck Feb. 23, 2015, 4:07 a.m. UTC | #13
On 02/22/2015 07:14 PM, Andrew Lunn wrote:
> On Sun, Feb 22, 2015 at 06:20:44PM -0800, Guenter Roeck wrote:
>> On 02/17/2015 11:26 AM, Florian Fainelli wrote:
>>> In order to support bridging offloads in DSA switch drivers, select
>>> NET_SWITCHDEV to get access to the port_stp_update and parent_get_id
>>> NDOs that we are required to implement.
>>>
>>> To facilitate the integratation at the DSA driver level, we implement 3
>>> types of operations:
>>>
>>
>> Hi Florian,
>>
>>>
>>> +/* Return a bitmask of all ports being currently bridged. Note that on
>>> + * leave, the mask will still return the bitmask of ports currently bridged,
>>> + * prior to port removal, and this is exactly what we want.
>>> + */
>>> +static u32 dsa_slave_br_port_mask(struct dsa_switch *ds)
>>> +{
>>> +	unsigned int port;
>>> +	u32 mask = 0;
>>> +
>>> +	for (port = 0; port < DSA_MAX_PORTS; port++) {
>>> +		if (!((1 << port) & ds->phys_port_mask))
>>> +			continue;
>>> +
>>> +		if (ds->ports[port]->priv_flags & IFF_BRIDGE_PORT)
>>> +			mask |= 1 << port;
>>
>> Problem is that the function can be called through dsa_slave_netdevice_event
>> before the slave devices are fully initialized.
>>
>> After adding
>>
>> +               if (!ds->ports[port]) {
>> +                       netdev_err(bridge,
>> +                                  "No ports data for port %d, mask=0x%x\n",
>> +                                  port, ds->phys_port_mask);
>> +                       continue;
>> +               }
>>
>> and with some more debug messages added to dsa_switch_setup(), I see the following.
>>
>> [   14.187290] e1000e 0000:00:19.0 em1: [0]: Creating slave device for port 1(port1)
>> [   14.272605] e1000e 0000:00:19.0 em1: [0]: Creating slave device for port 2(port2)
>> [   14.353118] e1000e 0000:00:19.0 em1: [0]: Creating slave device for port 3(port3)
>> [   14.472002] br0: No ports data for port 3, mask=0x1e
>> [   14.472053] br0: No ports data for port 4, mask=0x1e
>> [   14.472753] e1000e 0000:00:19.0 em1: [0]: Creating slave device for port 4(host2esb)
>>
>> This happens if I add the bridge configuration to /etc/network/interfaces instead
>> of creating the bridge manually. Apparently dsa_switch_setup() is not yet complete
>> when dsa_slave_netdevice_event is executed to handle a state change on one of its
>> newly created slave interfaces.
>>
>> The relevant information from /etc/network/interfaces is:
>>
>> auto br0
>>
>> iface port1 inet manual
>> iface port2 inet manual
>>
>> iface br0 inet dhcp
>> 	bridge_ports port1 port2
>
> Hi Guenter
>
> Does this actually matter? The ports which don't exists yet are not
> being added to the bridge. The mask will come out correct. What
> happens when port4 is made a member of the bridge? I expect it
> works. It is the creation of the interface which triggers hotplug to
> read interfaces and add the interface to the port.
>

	if (!ds->ports[port])
		continue;

might be an option. However, I am not sure that what you say is correct,
at least not strictly speaking. dsa_slave_create() returns the created
slave device, which is added to ds->ports[port] in dsa_switch_setup().
Since there is no protection in dsa_switch_setup(), there is no guarantee
that the callback doesn't happen prior to the initialization of
ds->ports[port]. So the above would leave a race condition, where the
port being added to the bridge _is_ one for which ds->ports[port] is
not yet initialized.

Protecting the entire slave creation loop in dsa_switch_setup()
and using register_netdevice() in dsa_slave_create() solves the problem
as far as I can see, I just don't know if it is an acceptable solution.

Guenter

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/dsa.h b/include/net/dsa.h
index ed3c34bbb67a..92be34791963 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -275,6 +275,16 @@  struct dsa_switch_driver {
 	int	(*get_regs_len)(struct dsa_switch *ds, int port);
 	void	(*get_regs)(struct dsa_switch *ds, int port,
 			    struct ethtool_regs *regs, void *p);
+
+	/*
+	 * Bridge integration
+	 */
+	int	(*port_join_bridge)(struct dsa_switch *ds, int port,
+				    u32 br_port_mask);
+	int	(*port_leave_bridge)(struct dsa_switch *ds, int port,
+				     u32 br_port_mask);
+	int	(*port_stp_update)(struct dsa_switch *ds, int port,
+				   u8 state);
 };
 
 void register_switch_driver(struct dsa_switch_driver *type);
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 5f8ac404535b..b45206e8dd3e 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -8,6 +8,7 @@  config NET_DSA
 	tristate
 	depends on HAVE_NET_DSA
 	select PHYLIB
+	select NET_SWITCHDEV
 
 if NET_DSA
 
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 2173402d87e0..293e0c3cc445 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -830,6 +830,10 @@  static struct packet_type dsa_pack_type __read_mostly = {
 	.func	= dsa_switch_rcv,
 };
 
+static struct notifier_block dsa_netdevice_nb __read_mostly = {
+	.notifier_call	= dsa_slave_netdevice_event,
+};
+
 #ifdef CONFIG_PM_SLEEP
 static int dsa_suspend(struct device *d)
 {
@@ -888,6 +892,8 @@  static int __init dsa_init_module(void)
 {
 	int rc;
 
+	register_netdevice_notifier(&dsa_netdevice_nb);
+
 	rc = platform_driver_register(&dsa_driver);
 	if (rc)
 		return rc;
@@ -900,6 +906,7 @@  module_init(dsa_init_module);
 
 static void __exit dsa_cleanup_module(void)
 {
+	unregister_netdevice_notifier(&dsa_netdevice_nb);
 	dev_remove_pack(&dsa_pack_type);
 	platform_driver_unregister(&dsa_driver);
 }
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index dc9756d3154c..e6f4301e719e 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -58,6 +58,8 @@  struct net_device *dsa_slave_create(struct dsa_switch *ds,
 				    int port, char *name);
 int dsa_slave_suspend(struct net_device *slave_dev);
 int dsa_slave_resume(struct net_device *slave_dev);
+int dsa_slave_netdevice_event(struct notifier_block *unused,
+			      unsigned long event, void *ptr);
 
 /* tag_dsa.c */
 extern const struct dsa_device_ops dsa_netdev_ops;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d104ae15836f..ea31b9be320f 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -10,10 +10,12 @@ 
 
 #include <linux/list.h>
 #include <linux/etherdevice.h>
+#include <linux/netdevice.h>
 #include <linux/phy.h>
 #include <linux/phy_fixed.h>
 #include <linux/of_net.h>
 #include <linux/of_mdio.h>
+#include <net/rtnetlink.h>
 #include "dsa_priv.h"
 
 /* slave mii_bus handling ***************************************************/
@@ -194,6 +196,77 @@  static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return -EOPNOTSUPP;
 }
 
+/* Return a bitmask of all ports being currently bridged. Note that on
+ * leave, the mask will still return the bitmask of ports currently bridged,
+ * prior to port removal, and this is exactly what we want.
+ */
+static u32 dsa_slave_br_port_mask(struct dsa_switch *ds)
+{
+	unsigned int port;
+	u32 mask = 0;
+
+	for (port = 0; port < DSA_MAX_PORTS; port++) {
+		if (!((1 << port) & ds->phys_port_mask))
+			continue;
+
+		if (ds->ports[port]->priv_flags & IFF_BRIDGE_PORT)
+			mask |= 1 << port;
+	}
+
+       return mask;
+}
+
+static int dsa_slave_bridge_port_join(struct net_device *dev,
+				      struct net_device *bridge)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+	int ret = -EOPNOTSUPP;
+
+	if (ds->drv->port_join_bridge)
+		ret = ds->drv->port_join_bridge(ds, p->port,
+						dsa_slave_br_port_mask(ds));
+
+	return ret;
+}
+
+static int dsa_slave_bridge_port_leave(struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+	int ret = -EOPNOTSUPP;
+
+	if (ds->drv->port_leave_bridge)
+		ret = ds->drv->port_leave_bridge(ds, p->port,
+						 dsa_slave_br_port_mask(ds));
+
+	return ret;
+}
+
+static int dsa_slave_parent_id_get(struct net_device *dev,
+				   struct netdev_phys_item_id *psid)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+
+	psid->id_len = sizeof(ds->index);
+	memcpy(&psid->id, &ds->index, psid->id_len);
+
+	return 0;
+}
+
+static int dsa_slave_stp_update(struct net_device *dev, u8 state)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+	int ret = -EOPNOTSUPP;
+
+	if (ds->drv->port_stp_update)
+		ret = ds->drv->port_stp_update(ds, p->port, state);
+
+	return ret;
+}
+
 static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
@@ -470,6 +543,8 @@  static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_set_rx_mode	= dsa_slave_set_rx_mode,
 	.ndo_set_mac_address	= dsa_slave_set_mac_address,
 	.ndo_do_ioctl		= dsa_slave_ioctl,
+	.ndo_switch_parent_id_get = dsa_slave_parent_id_get,
+	.ndo_switch_port_stp_update = dsa_slave_stp_update,
 };
 
 static void dsa_slave_adjust_link(struct net_device *dev)
@@ -678,3 +753,45 @@  dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 
 	return slave_dev;
 }
+
+static bool dsa_slave_dev_check(struct net_device *dev)
+{
+	return dev->netdev_ops == &dsa_slave_netdev_ops;
+}
+
+static int dsa_slave_master_changed(struct net_device *dev)
+{
+	struct net_device *master = netdev_master_upper_dev_get(dev);
+	int err = 0;
+
+	if (master && master->rtnl_link_ops &&
+	    !strcmp(master->rtnl_link_ops->kind, "bridge"))
+		err = dsa_slave_bridge_port_join(dev, master);
+	else
+		err = dsa_slave_bridge_port_leave(dev);
+
+	return err;
+}
+
+int dsa_slave_netdevice_event(struct notifier_block *unused,
+			      unsigned long event, void *ptr)
+{
+	struct netdevice_dev *dev;
+	int err = 0;
+
+	switch (event) {
+	case NETDEV_CHANGEUPPER:
+		dev = netdev_notifier_info_to_dev(ptr);
+		if (!dsa_slave_dev_check(dev))
+			goto out;
+
+		err = dsa_slave_master_changed(dev);
+		if (err)
+			netdev_warn(dev, "failed to reflect master change\n");
+
+		break;
+	}
+
+out:
+	return NOTIFY_DONE;
+}