diff mbox

[RFC] vlan: Try to adjust lower device mtu when configuring 802.1AD vlans

Message ID 1396387054-4510-1-git-send-email-vyasevic@redhat.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Vlad Yasevich April 1, 2014, 9:17 p.m. UTC
802.1AD vlans supposed to encapsulate 802.1Q vlans.  To
do this, we need an extra 4 bytes of header which are typically
not accounted for by lower devices.  Some devices can not
support frames longer then 1522 bytes at all.  Such devices
can not really support 802.1AD, even in software, without
the vlan reducing its mtu value.

This patch propses to increate the lower devices MTU to 1504
in case of 802.1AD configuration, and if device doesn't
support it, fail the creation of the vlan.  The user has an
option to configure older-style Q-in-Q vlans and manually
lower the mtu to support such encapsulation.

CC: Patrik McHardy <kaber@trash.net>
Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
---
 net/8021q/vlan.c         | 17 +++++++++++++++++
 net/8021q/vlan_netlink.c | 11 ++++++++---
 2 files changed, 25 insertions(+), 3 deletions(-)

Comments

Florian Fainelli April 1, 2014, 9:34 p.m. UTC | #1
Hi Vlad,

2014-04-01 14:17 GMT-07:00 Vlad Yasevich <vyasevic@redhat.com>:
> 802.1AD vlans supposed to encapsulate 802.1Q vlans.  To
> do this, we need an extra 4 bytes of header which are typically
> not accounted for by lower devices.  Some devices can not
> support frames longer then 1522 bytes at all.  Such devices
> can not really support 802.1AD, even in software, without
> the vlan reducing its mtu value.
>
> This patch propses to increate the lower devices MTU to 1504
> in case of 802.1AD configuration, and if device doesn't
> support it, fail the creation of the vlan.  The user has an
> option to configure older-style Q-in-Q vlans and manually
> lower the mtu to support such encapsulation.
>
> CC: Patrik McHardy <kaber@trash.net>
> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
> ---
>  net/8021q/vlan.c         | 17 +++++++++++++++++
>  net/8021q/vlan_netlink.c | 11 ++++++++---
>  2 files changed, 25 insertions(+), 3 deletions(-)
>
> diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
> index 175273f..0328d73 100644
> --- a/net/8021q/vlan.c
> +++ b/net/8021q/vlan.c
> @@ -126,6 +126,7 @@ int vlan_check_real_dev(struct net_device *real_dev,
>                         __be16 protocol, u16 vlan_id)
>  {
>         const char *name = real_dev->name;
> +       int err;
>
>         if (real_dev->features & NETIF_F_VLAN_CHALLENGED) {
>                 pr_info("VLANs not supported on %s\n", name);
> @@ -135,6 +136,21 @@ int vlan_check_real_dev(struct net_device *real_dev,
>         if (vlan_find_dev(real_dev, protocol, vlan_id) != NULL)
>                 return -EEXIST;
>
> +       if (protocol == htons(ETH_P_8021AD)) {
> +               /* 8021AD vlan is meant to encapsulate 8021Q and thus we
> +                * need to make sure that lower device can handle a
> +                * larger mtu.
> +                * If the lower device still has a default ethernet mtu,
> +                * bump it up 4 bytes.  If not, it was set by user and
> +                * we'll trust the user knows what he is doing.
> +                */
> +               if (real_dev->mtu == VLAN_ETH_DATA_LEN &&
> +                   dev_set_mtu(real_dev, real_dev->mtu + VLAN_HLEN))
> +                       pr_warn("802.1AD mode is not supported on %s due to mtu limitations.\n", name);
> +                       return -EOPNOTSUPP

Missing semicolon here.

> +               }
> +       }
> +
>         return 0;
>  }
>
> @@ -259,6 +275,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
>          * hope the underlying device can handle it.
>          */
>         new_dev->mtu = real_dev->mtu;
> +
>         new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT);
>
>         vlan = vlan_dev_priv(new_dev);
> diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
> index c7e634a..a925a8d 100644
> --- a/net/8021q/vlan_netlink.c
> +++ b/net/8021q/vlan_netlink.c
> @@ -144,10 +144,15 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
>         if (err < 0)
>                 return err;
>
> -       if (!tb[IFLA_MTU])
> -               dev->mtu = real_dev->mtu;
> -       else if (dev->mtu > real_dev->mtu)
> +       if (!tb[IFLA_MTU]) {
> +               if (vlan->vlan_proto == htons(ETH_P_8021AD) &&
> +                   real_dev->mtu == VLAN_ETH_DATA_LEN + VLAN_HLEN)

Parenthesis for the add operation would help clarifying things. Should
not that be a >= comparison? We want to make sure that real_dev has
enough MTU room for allowing the default MTU size on 802.1AD VLAN
device, or does the initial comment vlan_check_real_dev() also apply
here?

> +                       dev->mtu = VLAN_ETH_DATA_LEN;
> +               else
> +                       dev->mtu = real_dev->mtu;
> +       } else if (dev->mtu > real_dev->mtu) {
>                 return -EINVAL;
> +       }
>
>         err = vlan_changelink(dev, tb, data);
>         if (err < 0)
> --
> 1.8.5.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Patrick McHardy April 2, 2014, 12:21 p.m. UTC | #2
On Tue, Apr 01, 2014 at 05:17:34PM -0400, Vlad Yasevich wrote:
> 802.1AD vlans supposed to encapsulate 802.1Q vlans.  To
> do this, we need an extra 4 bytes of header which are typically
> not accounted for by lower devices.  Some devices can not
> support frames longer then 1522 bytes at all.  Such devices
> can not really support 802.1AD, even in software, without
> the vlan reducing its mtu value.
> 
> This patch propses to increate the lower devices MTU to 1504
> in case of 802.1AD configuration, and if device doesn't
> support it, fail the creation of the vlan.  The user has an
> option to configure older-style Q-in-Q vlans and manually
> lower the mtu to support such encapsulation.

I think you should do the opposite. The lower layer device may be used
for other things than the VLAN, so it doesn't seem right to change it's
MTU. Instead I'd propose to set the MTU of the 802.1ad VLAN device to
the lower device'e MTU - 4 unless a MTU has been specified by the user.

BTW, I couldn't find anything related to MTU handling in the 802.1ad
standard, however I only have an old copy and might have looked in the
wrong place. Do you have any information how this is supposed to be
handled?
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Yasevich April 2, 2014, 1:31 p.m. UTC | #3
On 04/02/2014 08:21 AM, Patrick McHardy wrote:
> On Tue, Apr 01, 2014 at 05:17:34PM -0400, Vlad Yasevich wrote:
>> 802.1AD vlans supposed to encapsulate 802.1Q vlans.  To
>> do this, we need an extra 4 bytes of header which are typically
>> not accounted for by lower devices.  Some devices can not
>> support frames longer then 1522 bytes at all.  Such devices
>> can not really support 802.1AD, even in software, without
>> the vlan reducing its mtu value.
>>
>> This patch propses to increate the lower devices MTU to 1504
>> in case of 802.1AD configuration, and if device doesn't
>> support it, fail the creation of the vlan.  The user has an
>> option to configure older-style Q-in-Q vlans and manually
>> lower the mtu to support such encapsulation.
> 
> I think you should do the opposite. The lower layer device may be used
> for other things than the VLAN, so it doesn't seem right to change it's
> MTU. Instead I'd propose to set the MTU of the 802.1ad VLAN device to
> the lower device'e MTU - 4 unless a MTU has been specified by the user.
> 

The decrease of vlan mtu was my initial take on this as well.  The
problematic case with this is forwarding by an encapsulating
bridge (bridge that has 802.1AD as one port and ethX as others). The
frame from ethX will not fit into the mtu of the vlan device in
this case and the packet is dropped.  Ideally, we'd generate and ICMP
Too Big, but with the bridge we can't/don't do that.

Another problem is that linux assumes that MTU == MRU in case of
device receive buffer programming.  Thus, full sized 802.1AD
frames transmitted by the switch supporting it will probably get dropped
by the driver/firmware as too long.  I've tested this and saw it
happen on my systems.

An alternative I've thought off is to adjust the rx size in the drivers
when 802.1AD is configured, but that touches all the drivers, and
doesn't work well for not vlan-filtering drivers.  It needs a new
ndo api to adjust the rx length to make it consistent across all
devices.

> BTW, I couldn't find anything related to MTU handling in the 802.1ad
> standard, however I only have an old copy and might have looked in the
> wrong place. Do you have any information how this is supposed to be
> handled?
> 

The standard doesn't seem to mention anything about it, but looking
at switch implementations, most of them require a bump in the mtu to
1504 to support 802.1AD.  Some allow for the decrease in vlan mtu, but
that also requires mss translations as well.

-vlad



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Toshiaki Makita April 2, 2014, 4:37 p.m. UTC | #4
On Wed, 2014-04-02 at 09:31 -0400, Vlad Yasevich wrote:
> On 04/02/2014 08:21 AM, Patrick McHardy wrote:
> > On Tue, Apr 01, 2014 at 05:17:34PM -0400, Vlad Yasevich wrote:
> >> 802.1AD vlans supposed to encapsulate 802.1Q vlans.  To
> >> do this, we need an extra 4 bytes of header which are typically
> >> not accounted for by lower devices.  Some devices can not
> >> support frames longer then 1522 bytes at all.  Such devices
> >> can not really support 802.1AD, even in software, without
> >> the vlan reducing its mtu value.
> >>
> >> This patch propses to increate the lower devices MTU to 1504
> >> in case of 802.1AD configuration, and if device doesn't
> >> support it, fail the creation of the vlan.  The user has an
> >> option to configure older-style Q-in-Q vlans and manually
> >> lower the mtu to support such encapsulation.
> > 
> > I think you should do the opposite. The lower layer device may be used
> > for other things than the VLAN, so it doesn't seem right to change it's
> > MTU. Instead I'd propose to set the MTU of the 802.1ad VLAN device to
> > the lower device'e MTU - 4 unless a MTU has been specified by the user.
> > 
> 
> The decrease of vlan mtu was my initial take on this as well.  The
> problematic case with this is forwarding by an encapsulating
> bridge (bridge that has 802.1AD as one port and ethX as others). The
> frame from ethX will not fit into the mtu of the vlan device in
> this case and the packet is dropped.  Ideally, we'd generate and ICMP
> Too Big, but with the bridge we can't/don't do that.
> 
> Another problem is that linux assumes that MTU == MRU in case of
> device receive buffer programming.  Thus, full sized 802.1AD
> frames transmitted by the switch supporting it will probably get dropped
> by the driver/firmware as too long.  I've tested this and saw it
> happen on my systems.
> 
> An alternative I've thought off is to adjust the rx size in the drivers
> when 802.1AD is configured, but that touches all the drivers, and
> doesn't work well for not vlan-filtering drivers.  It needs a new
> ndo api to adjust the rx length to make it consistent across all
> devices.
> 
> > BTW, I couldn't find anything related to MTU handling in the 802.1ad
> > standard, however I only have an old copy and might have looked in the
> > wrong place. Do you have any information how this is supposed to be
> > handled?
> > 
> 
> The standard doesn't seem to mention anything about it, but looking
> at switch implementations, most of them require a bump in the mtu to
> 1504 to support 802.1AD.  Some allow for the decrease in vlan mtu, but
> that also requires mss translations as well.

802.1ad was merged into 802.1Q-2011, and G.2.2 in it refers to maximum
pdu size. However, this doesn't seem to mention the case where frames
are double tagged.

MEF 6.1 requires UNI MTU size >= 1522 and MEF 31 requires E-NNI MTU size
>= 1526 (In these documents, MTU seems to mean frame size).
This implies that we should allow 1508 bytes of MTU size when we use
802.1AD.

Is 1504 enough?

Thanks,
Toshiaki Makita

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Yasevich April 2, 2014, 4:44 p.m. UTC | #5
On 04/02/2014 12:37 PM, Toshiaki Makita wrote:
> On Wed, 2014-04-02 at 09:31 -0400, Vlad Yasevich wrote:
>> On 04/02/2014 08:21 AM, Patrick McHardy wrote:
>>> On Tue, Apr 01, 2014 at 05:17:34PM -0400, Vlad Yasevich wrote:
>>>> 802.1AD vlans supposed to encapsulate 802.1Q vlans.  To
>>>> do this, we need an extra 4 bytes of header which are typically
>>>> not accounted for by lower devices.  Some devices can not
>>>> support frames longer then 1522 bytes at all.  Such devices
>>>> can not really support 802.1AD, even in software, without
>>>> the vlan reducing its mtu value.
>>>>
>>>> This patch propses to increate the lower devices MTU to 1504
>>>> in case of 802.1AD configuration, and if device doesn't
>>>> support it, fail the creation of the vlan.  The user has an
>>>> option to configure older-style Q-in-Q vlans and manually
>>>> lower the mtu to support such encapsulation.
>>>
>>> I think you should do the opposite. The lower layer device may be used
>>> for other things than the VLAN, so it doesn't seem right to change it's
>>> MTU. Instead I'd propose to set the MTU of the 802.1ad VLAN device to
>>> the lower device'e MTU - 4 unless a MTU has been specified by the user.
>>>
>>
>> The decrease of vlan mtu was my initial take on this as well.  The
>> problematic case with this is forwarding by an encapsulating
>> bridge (bridge that has 802.1AD as one port and ethX as others). The
>> frame from ethX will not fit into the mtu of the vlan device in
>> this case and the packet is dropped.  Ideally, we'd generate and ICMP
>> Too Big, but with the bridge we can't/don't do that.
>>
>> Another problem is that linux assumes that MTU == MRU in case of
>> device receive buffer programming.  Thus, full sized 802.1AD
>> frames transmitted by the switch supporting it will probably get dropped
>> by the driver/firmware as too long.  I've tested this and saw it
>> happen on my systems.
>>
>> An alternative I've thought off is to adjust the rx size in the drivers
>> when 802.1AD is configured, but that touches all the drivers, and
>> doesn't work well for not vlan-filtering drivers.  It needs a new
>> ndo api to adjust the rx length to make it consistent across all
>> devices.
>>
>>> BTW, I couldn't find anything related to MTU handling in the 802.1ad
>>> standard, however I only have an old copy and might have looked in the
>>> wrong place. Do you have any information how this is supposed to be
>>> handled?
>>>
>>
>> The standard doesn't seem to mention anything about it, but looking
>> at switch implementations, most of them require a bump in the mtu to
>> 1504 to support 802.1AD.  Some allow for the decrease in vlan mtu, but
>> that also requires mss translations as well.
> 
> 802.1ad was merged into 802.1Q-2011, and G.2.2 in it refers to maximum
> pdu size. However, this doesn't seem to mention the case where frames
> are double tagged.
> 
> MEF 6.1 requires UNI MTU size >= 1522 and MEF 31 requires E-NNI MTU size
>> = 1526 (In these documents, MTU seems to mean frame size).
> This implies that we should allow 1508 bytes of MTU size when we use
> 802.1AD.
> 

1522 = 1500 + 14 + 4 (.1Q) + 4 (FCS)

> Is 1504 enough?

1526 = 1500 + 14 +4 (.1Q) + 4 (.1AD) + 4(FCS)

This is why Cisco docs recommend mtu of 1504.

Of course this doesn't in any way account for stacked .1AD tags.

-vlad

> 
> Thanks,
> Toshiaki Makita
> 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Toshiaki Makita April 3, 2014, 8:32 a.m. UTC | #6
(2014/04/03 1:44), Vlad Yasevich wrote:
> On 04/02/2014 12:37 PM, Toshiaki Makita wrote:
>> On Wed, 2014-04-02 at 09:31 -0400, Vlad Yasevich wrote:
>>> On 04/02/2014 08:21 AM, Patrick McHardy wrote:
>>>> On Tue, Apr 01, 2014 at 05:17:34PM -0400, Vlad Yasevich wrote:
>>>>> 802.1AD vlans supposed to encapsulate 802.1Q vlans.  To
>>>>> do this, we need an extra 4 bytes of header which are typically
>>>>> not accounted for by lower devices.  Some devices can not
>>>>> support frames longer then 1522 bytes at all.  Such devices
>>>>> can not really support 802.1AD, even in software, without
>>>>> the vlan reducing its mtu value.
>>>>>
>>>>> This patch propses to increate the lower devices MTU to 1504
>>>>> in case of 802.1AD configuration, and if device doesn't
>>>>> support it, fail the creation of the vlan.  The user has an
>>>>> option to configure older-style Q-in-Q vlans and manually
>>>>> lower the mtu to support such encapsulation.
>>>>
>>>> I think you should do the opposite. The lower layer device may be used
>>>> for other things than the VLAN, so it doesn't seem right to change it's
>>>> MTU. Instead I'd propose to set the MTU of the 802.1ad VLAN device to
>>>> the lower device'e MTU - 4 unless a MTU has been specified by the user.
>>>>
>>>
>>> The decrease of vlan mtu was my initial take on this as well.  The
>>> problematic case with this is forwarding by an encapsulating
>>> bridge (bridge that has 802.1AD as one port and ethX as others). The
>>> frame from ethX will not fit into the mtu of the vlan device in
>>> this case and the packet is dropped.  Ideally, we'd generate and ICMP
>>> Too Big, but with the bridge we can't/don't do that.
>>>
>>> Another problem is that linux assumes that MTU == MRU in case of
>>> device receive buffer programming.  Thus, full sized 802.1AD
>>> frames transmitted by the switch supporting it will probably get dropped
>>> by the driver/firmware as too long.  I've tested this and saw it
>>> happen on my systems.
>>>
>>> An alternative I've thought off is to adjust the rx size in the drivers
>>> when 802.1AD is configured, but that touches all the drivers, and
>>> doesn't work well for not vlan-filtering drivers.  It needs a new
>>> ndo api to adjust the rx length to make it consistent across all
>>> devices.
>>>
>>>> BTW, I couldn't find anything related to MTU handling in the 802.1ad
>>>> standard, however I only have an old copy and might have looked in the
>>>> wrong place. Do you have any information how this is supposed to be
>>>> handled?
>>>>
>>>
>>> The standard doesn't seem to mention anything about it, but looking
>>> at switch implementations, most of them require a bump in the mtu to
>>> 1504 to support 802.1AD.  Some allow for the decrease in vlan mtu, but
>>> that also requires mss translations as well.
>>
>> 802.1ad was merged into 802.1Q-2011, and G.2.2 in it refers to maximum
>> pdu size. However, this doesn't seem to mention the case where frames
>> are double tagged.
>>
>> MEF 6.1 requires UNI MTU size >= 1522 and MEF 31 requires E-NNI MTU size
>>> = 1526 (In these documents, MTU seems to mean frame size).
>> This implies that we should allow 1508 bytes of MTU size when we use
>> 802.1AD.
>>
> 
> 1522 = 1500 + 14 + 4 (.1Q) + 4 (FCS)
> 
>> Is 1504 enough?
> 
> 1526 = 1500 + 14 +4 (.1Q) + 4 (.1AD) + 4(FCS)

Thank you for the supplementation.

> 
> This is why Cisco docs recommend mtu of 1504.
> 
> Of course this doesn't in any way account for stacked .1AD tags.

So we are likely to receive 1508 (1526) sized frames in 802.1ad network.
Is it correct that you confirmed most NICs can receive 1508 sized frames
with 1504 mtu size setting?

Thanks,
Toshiaki Makita
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Yasevich April 3, 2014, 1:07 p.m. UTC | #7
On 04/03/2014 04:32 AM, Toshiaki Makita wrote:
> (2014/04/03 1:44), Vlad Yasevich wrote:
>> On 04/02/2014 12:37 PM, Toshiaki Makita wrote:
>>> On Wed, 2014-04-02 at 09:31 -0400, Vlad Yasevich wrote:
>>>> On 04/02/2014 08:21 AM, Patrick McHardy wrote:
>>>>> On Tue, Apr 01, 2014 at 05:17:34PM -0400, Vlad Yasevich wrote:
>>>>>> 802.1AD vlans supposed to encapsulate 802.1Q vlans.  To
>>>>>> do this, we need an extra 4 bytes of header which are typically
>>>>>> not accounted for by lower devices.  Some devices can not
>>>>>> support frames longer then 1522 bytes at all.  Such devices
>>>>>> can not really support 802.1AD, even in software, without
>>>>>> the vlan reducing its mtu value.
>>>>>>
>>>>>> This patch propses to increate the lower devices MTU to 1504
>>>>>> in case of 802.1AD configuration, and if device doesn't
>>>>>> support it, fail the creation of the vlan.  The user has an
>>>>>> option to configure older-style Q-in-Q vlans and manually
>>>>>> lower the mtu to support such encapsulation.
>>>>>
>>>>> I think you should do the opposite. The lower layer device may be used
>>>>> for other things than the VLAN, so it doesn't seem right to change it's
>>>>> MTU. Instead I'd propose to set the MTU of the 802.1ad VLAN device to
>>>>> the lower device'e MTU - 4 unless a MTU has been specified by the user.
>>>>>
>>>>
>>>> The decrease of vlan mtu was my initial take on this as well.  The
>>>> problematic case with this is forwarding by an encapsulating
>>>> bridge (bridge that has 802.1AD as one port and ethX as others). The
>>>> frame from ethX will not fit into the mtu of the vlan device in
>>>> this case and the packet is dropped.  Ideally, we'd generate and ICMP
>>>> Too Big, but with the bridge we can't/don't do that.
>>>>
>>>> Another problem is that linux assumes that MTU == MRU in case of
>>>> device receive buffer programming.  Thus, full sized 802.1AD
>>>> frames transmitted by the switch supporting it will probably get dropped
>>>> by the driver/firmware as too long.  I've tested this and saw it
>>>> happen on my systems.
>>>>
>>>> An alternative I've thought off is to adjust the rx size in the drivers
>>>> when 802.1AD is configured, but that touches all the drivers, and
>>>> doesn't work well for not vlan-filtering drivers.  It needs a new
>>>> ndo api to adjust the rx length to make it consistent across all
>>>> devices.
>>>>
>>>>> BTW, I couldn't find anything related to MTU handling in the 802.1ad
>>>>> standard, however I only have an old copy and might have looked in the
>>>>> wrong place. Do you have any information how this is supposed to be
>>>>> handled?
>>>>>
>>>>
>>>> The standard doesn't seem to mention anything about it, but looking
>>>> at switch implementations, most of them require a bump in the mtu to
>>>> 1504 to support 802.1AD.  Some allow for the decrease in vlan mtu, but
>>>> that also requires mss translations as well.
>>>
>>> 802.1ad was merged into 802.1Q-2011, and G.2.2 in it refers to maximum
>>> pdu size. However, this doesn't seem to mention the case where frames
>>> are double tagged.
>>>
>>> MEF 6.1 requires UNI MTU size >= 1522 and MEF 31 requires E-NNI MTU size
>>>> = 1526 (In these documents, MTU seems to mean frame size).
>>> This implies that we should allow 1508 bytes of MTU size when we use
>>> 802.1AD.
>>>
>>
>> 1522 = 1500 + 14 + 4 (.1Q) + 4 (FCS)
>>
>>> Is 1504 enough?
>>
>> 1526 = 1500 + 14 +4 (.1Q) + 4 (.1AD) + 4(FCS)
> 
> Thank you for the supplementation.
> 
>>
>> This is why Cisco docs recommend mtu of 1504.
>>
>> Of course this doesn't in any way account for stacked .1AD tags.
> 
> So we are likely to receive 1508 (1526) sized frames in 802.1ad network.

1526 byte frame is 1504 mtu, as demonstrated above.

> Is it correct that you confirmed most NICs can receive 1508 sized frames
> with 1504 mtu size setting?

Some might, but I haven't confirmed that.  Most NICs already account for
802.1Q header in their receive buffer calculations.  Some nics jump
to the 2K rx size and enable jumbo mode once rx size goes above 1522
bytes.  I think those will be able to receive larger frames.  Others
don't support jumbo mode at all.  These nics can't support 802.1AD
without reducing mtu on the vlan interface itself.  That, however, leads
to other necessary configuration changes which is why this proposal
leaves it up to the user to configure.

-vlad
> 
> Thanks,
> Toshiaki Makita
> 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Toshiaki Makita April 4, 2014, 3:08 p.m. UTC | #8
On Thu, 2014-04-03 at 09:07 -0400, Vlad Yasevich wrote:
> On 04/03/2014 04:32 AM, Toshiaki Makita wrote:
> > (2014/04/03 1:44), Vlad Yasevich wrote:
> >> On 04/02/2014 12:37 PM, Toshiaki Makita wrote:
> >>> On Wed, 2014-04-02 at 09:31 -0400, Vlad Yasevich wrote:
> >>>> On 04/02/2014 08:21 AM, Patrick McHardy wrote:
> >>>>> On Tue, Apr 01, 2014 at 05:17:34PM -0400, Vlad Yasevich wrote:
> >>>>>> 802.1AD vlans supposed to encapsulate 802.1Q vlans.  To
> >>>>>> do this, we need an extra 4 bytes of header which are typically
> >>>>>> not accounted for by lower devices.  Some devices can not
> >>>>>> support frames longer then 1522 bytes at all.  Such devices
> >>>>>> can not really support 802.1AD, even in software, without
> >>>>>> the vlan reducing its mtu value.
> >>>>>>
> >>>>>> This patch propses to increate the lower devices MTU to 1504
> >>>>>> in case of 802.1AD configuration, and if device doesn't
> >>>>>> support it, fail the creation of the vlan.  The user has an
> >>>>>> option to configure older-style Q-in-Q vlans and manually
> >>>>>> lower the mtu to support such encapsulation.
> >>>>>
> >>>>> I think you should do the opposite. The lower layer device may be used
> >>>>> for other things than the VLAN, so it doesn't seem right to change it's
> >>>>> MTU. Instead I'd propose to set the MTU of the 802.1ad VLAN device to
> >>>>> the lower device'e MTU - 4 unless a MTU has been specified by the user.
> >>>>>
> >>>>
> >>>> The decrease of vlan mtu was my initial take on this as well.  The
> >>>> problematic case with this is forwarding by an encapsulating
> >>>> bridge (bridge that has 802.1AD as one port and ethX as others). The
> >>>> frame from ethX will not fit into the mtu of the vlan device in
> >>>> this case and the packet is dropped.  Ideally, we'd generate and ICMP
> >>>> Too Big, but with the bridge we can't/don't do that.
> >>>>
> >>>> Another problem is that linux assumes that MTU == MRU in case of
> >>>> device receive buffer programming.  Thus, full sized 802.1AD
> >>>> frames transmitted by the switch supporting it will probably get dropped
> >>>> by the driver/firmware as too long.  I've tested this and saw it
> >>>> happen on my systems.
> >>>>
> >>>> An alternative I've thought off is to adjust the rx size in the drivers
> >>>> when 802.1AD is configured, but that touches all the drivers, and
> >>>> doesn't work well for not vlan-filtering drivers.  It needs a new
> >>>> ndo api to adjust the rx length to make it consistent across all
> >>>> devices.
> >>>>
> >>>>> BTW, I couldn't find anything related to MTU handling in the 802.1ad
> >>>>> standard, however I only have an old copy and might have looked in the
> >>>>> wrong place. Do you have any information how this is supposed to be
> >>>>> handled?
> >>>>>
> >>>>
> >>>> The standard doesn't seem to mention anything about it, but looking
> >>>> at switch implementations, most of them require a bump in the mtu to
> >>>> 1504 to support 802.1AD.  Some allow for the decrease in vlan mtu, but
> >>>> that also requires mss translations as well.
> >>>
> >>> 802.1ad was merged into 802.1Q-2011, and G.2.2 in it refers to maximum
> >>> pdu size. However, this doesn't seem to mention the case where frames
> >>> are double tagged.
> >>>
> >>> MEF 6.1 requires UNI MTU size >= 1522 and MEF 31 requires E-NNI MTU size
> >>>> = 1526 (In these documents, MTU seems to mean frame size).
> >>> This implies that we should allow 1508 bytes of MTU size when we use
> >>> 802.1AD.
> >>>
> >>
> >> 1522 = 1500 + 14 + 4 (.1Q) + 4 (FCS)
> >>
> >>> Is 1504 enough?
> >>
> >> 1526 = 1500 + 14 +4 (.1Q) + 4 (.1AD) + 4(FCS)
> > 
> > Thank you for the supplementation.
> > 
> >>
> >> This is why Cisco docs recommend mtu of 1504.
> >>
> >> Of course this doesn't in any way account for stacked .1AD tags.
> > 
> > So we are likely to receive 1508 (1526) sized frames in 802.1ad network.
> 
> 1526 byte frame is 1504 mtu, as demonstrated above.

Not so sure.
It's true only if NIC reserves extra 4 bytes for mtu.
If the outer 802.1ad tag is not recognized as a vlan tag by NIC, both
the outer tag and the inner tag are not ethernet header but payload to
the NIC.

> 
> > Is it correct that you confirmed most NICs can receive 1508 sized frames
> > with 1504 mtu size setting?
> 
> Some might, but I haven't confirmed that.  Most NICs already account for
> 802.1Q header in their receive buffer calculations.  Some nics jump
> to the 2K rx size and enable jumbo mode once rx size goes above 1522
> bytes.  I think those will be able to receive larger frames. 

Thank you very much, got it.

> Others
> don't support jumbo mode at all.  These nics can't support 802.1AD
> without reducing mtu on the vlan interface itself.  

Doesn't setting mtu to 1508 help us in some cases?

Thanks,
Toshiaki Makita

> That, however, leads
> to other necessary configuration changes which is why this proposal
> leaves it up to the user to configure.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Yasevich April 4, 2014, 3:22 p.m. UTC | #9
On 04/04/2014 11:08 AM, Toshiaki Makita wrote:
> On Thu, 2014-04-03 at 09:07 -0400, Vlad Yasevich wrote:
>> On 04/03/2014 04:32 AM, Toshiaki Makita wrote:
>>> (2014/04/03 1:44), Vlad Yasevich wrote:
>>>> On 04/02/2014 12:37 PM, Toshiaki Makita wrote:
>>>>> On Wed, 2014-04-02 at 09:31 -0400, Vlad Yasevich wrote:
>>>>>> On 04/02/2014 08:21 AM, Patrick McHardy wrote:
>>>>>>> On Tue, Apr 01, 2014 at 05:17:34PM -0400, Vlad Yasevich wrote:
>>>>>>>> 802.1AD vlans supposed to encapsulate 802.1Q vlans.  To
>>>>>>>> do this, we need an extra 4 bytes of header which are typically
>>>>>>>> not accounted for by lower devices.  Some devices can not
>>>>>>>> support frames longer then 1522 bytes at all.  Such devices
>>>>>>>> can not really support 802.1AD, even in software, without
>>>>>>>> the vlan reducing its mtu value.
>>>>>>>>
>>>>>>>> This patch propses to increate the lower devices MTU to 1504
>>>>>>>> in case of 802.1AD configuration, and if device doesn't
>>>>>>>> support it, fail the creation of the vlan.  The user has an
>>>>>>>> option to configure older-style Q-in-Q vlans and manually
>>>>>>>> lower the mtu to support such encapsulation.
>>>>>>>
>>>>>>> I think you should do the opposite. The lower layer device may be used
>>>>>>> for other things than the VLAN, so it doesn't seem right to change it's
>>>>>>> MTU. Instead I'd propose to set the MTU of the 802.1ad VLAN device to
>>>>>>> the lower device'e MTU - 4 unless a MTU has been specified by the user.
>>>>>>>
>>>>>>
>>>>>> The decrease of vlan mtu was my initial take on this as well.  The
>>>>>> problematic case with this is forwarding by an encapsulating
>>>>>> bridge (bridge that has 802.1AD as one port and ethX as others). The
>>>>>> frame from ethX will not fit into the mtu of the vlan device in
>>>>>> this case and the packet is dropped.  Ideally, we'd generate and ICMP
>>>>>> Too Big, but with the bridge we can't/don't do that.
>>>>>>
>>>>>> Another problem is that linux assumes that MTU == MRU in case of
>>>>>> device receive buffer programming.  Thus, full sized 802.1AD
>>>>>> frames transmitted by the switch supporting it will probably get dropped
>>>>>> by the driver/firmware as too long.  I've tested this and saw it
>>>>>> happen on my systems.
>>>>>>
>>>>>> An alternative I've thought off is to adjust the rx size in the drivers
>>>>>> when 802.1AD is configured, but that touches all the drivers, and
>>>>>> doesn't work well for not vlan-filtering drivers.  It needs a new
>>>>>> ndo api to adjust the rx length to make it consistent across all
>>>>>> devices.
>>>>>>
>>>>>>> BTW, I couldn't find anything related to MTU handling in the 802.1ad
>>>>>>> standard, however I only have an old copy and might have looked in the
>>>>>>> wrong place. Do you have any information how this is supposed to be
>>>>>>> handled?
>>>>>>>
>>>>>>
>>>>>> The standard doesn't seem to mention anything about it, but looking
>>>>>> at switch implementations, most of them require a bump in the mtu to
>>>>>> 1504 to support 802.1AD.  Some allow for the decrease in vlan mtu, but
>>>>>> that also requires mss translations as well.
>>>>>
>>>>> 802.1ad was merged into 802.1Q-2011, and G.2.2 in it refers to maximum
>>>>> pdu size. However, this doesn't seem to mention the case where frames
>>>>> are double tagged.
>>>>>
>>>>> MEF 6.1 requires UNI MTU size >= 1522 and MEF 31 requires E-NNI MTU size
>>>>>> = 1526 (In these documents, MTU seems to mean frame size).
>>>>> This implies that we should allow 1508 bytes of MTU size when we use
>>>>> 802.1AD.
>>>>>
>>>>
>>>> 1522 = 1500 + 14 + 4 (.1Q) + 4 (FCS)
>>>>
>>>>> Is 1504 enough?
>>>>
>>>> 1526 = 1500 + 14 +4 (.1Q) + 4 (.1AD) + 4(FCS)
>>>
>>> Thank you for the supplementation.
>>>
>>>>
>>>> This is why Cisco docs recommend mtu of 1504.
>>>>
>>>> Of course this doesn't in any way account for stacked .1AD tags.
>>>
>>> So we are likely to receive 1508 (1526) sized frames in 802.1ad network.
>>
>> 1526 byte frame is 1504 mtu, as demonstrated above.
> 
> Not so sure.
> It's true only if NIC reserves extra 4 bytes for mtu.

Pretty much all drivers reserve extra 4 bytes for the .1Q header.

> If the outer 802.1ad tag is not recognized as a vlan tag by NIC, both
> the outer tag and the inner tag are not ethernet header but payload to
> the NIC.

But the nic doesn't really care about MTU values itself.  It uses it
to compute the frame length that it will support for rx and tx.  That
computation is what the above math shows.

So, the nics that do not support .1AD acceleration (the ones you
mentioned above), will already account for the .1Q header, but the MTU
(payload) needs to increased by 4 bytes to account for .1AD header.
We don't have to account for .1Q header again.

This actually reminds me that there is a bug in the e1000e code where
setting mtu to 1504 doesn't make it work.  One has to got up to 1508 to
get the right sizing.

> 
>>
>>> Is it correct that you confirmed most NICs can receive 1508 sized frames
>>> with 1504 mtu size setting?
>>
>> Some might, but I haven't confirmed that.  Most NICs already account for
>> 802.1Q header in their receive buffer calculations.  Some nics jump
>> to the 2K rx size and enable jumbo mode once rx size goes above 1522
>> bytes.  I think those will be able to receive larger frames. 
> 
> Thank you very much, got it.
> 
>> Others
>> don't support jumbo mode at all.  These nics can't support 802.1AD
>> without reducing mtu on the vlan interface itself.  
> 
> Doesn't setting mtu to 1508 help us in some cases?

Not in the cases where jumbo is _not_ supported.  The only thing
that makes these nics work is reduction of the mtu on the vlan device.
On nics that support jumbo, going up to 1504 makes things work.

-vlad

> 
> Thanks,
> Toshiaki Makita
> 
>> That, however, leads
>> to other necessary configuration changes which is why this proposal
>> leaves it up to the user to configure.
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Toshiaki Makita April 6, 2014, 3:21 p.m. UTC | #10
On Fri, 2014-04-04 at 11:22 -0400, Vlad Yasevich wrote:
> On 04/04/2014 11:08 AM, Toshiaki Makita wrote:
> > On Thu, 2014-04-03 at 09:07 -0400, Vlad Yasevich wrote:
> >> On 04/03/2014 04:32 AM, Toshiaki Makita wrote:
> >>> (2014/04/03 1:44), Vlad Yasevich wrote:
> >>>> On 04/02/2014 12:37 PM, Toshiaki Makita wrote:
> >>>>> On Wed, 2014-04-02 at 09:31 -0400, Vlad Yasevich wrote:
> >>>>>> On 04/02/2014 08:21 AM, Patrick McHardy wrote:
> >>>>>>> On Tue, Apr 01, 2014 at 05:17:34PM -0400, Vlad Yasevich wrote:
> >>>>>>>> 802.1AD vlans supposed to encapsulate 802.1Q vlans.  To
> >>>>>>>> do this, we need an extra 4 bytes of header which are typically
> >>>>>>>> not accounted for by lower devices.  Some devices can not
> >>>>>>>> support frames longer then 1522 bytes at all.  Such devices
> >>>>>>>> can not really support 802.1AD, even in software, without
> >>>>>>>> the vlan reducing its mtu value.
> >>>>>>>>
> >>>>>>>> This patch propses to increate the lower devices MTU to 1504
> >>>>>>>> in case of 802.1AD configuration, and if device doesn't
> >>>>>>>> support it, fail the creation of the vlan.  The user has an
> >>>>>>>> option to configure older-style Q-in-Q vlans and manually
> >>>>>>>> lower the mtu to support such encapsulation.
> >>>>>>>
> >>>>>>> I think you should do the opposite. The lower layer device may be used
> >>>>>>> for other things than the VLAN, so it doesn't seem right to change it's
> >>>>>>> MTU. Instead I'd propose to set the MTU of the 802.1ad VLAN device to
> >>>>>>> the lower device'e MTU - 4 unless a MTU has been specified by the user.
> >>>>>>>
> >>>>>>
> >>>>>> The decrease of vlan mtu was my initial take on this as well.  The
> >>>>>> problematic case with this is forwarding by an encapsulating
> >>>>>> bridge (bridge that has 802.1AD as one port and ethX as others). The
> >>>>>> frame from ethX will not fit into the mtu of the vlan device in
> >>>>>> this case and the packet is dropped.  Ideally, we'd generate and ICMP
> >>>>>> Too Big, but with the bridge we can't/don't do that.
> >>>>>>
> >>>>>> Another problem is that linux assumes that MTU == MRU in case of
> >>>>>> device receive buffer programming.  Thus, full sized 802.1AD
> >>>>>> frames transmitted by the switch supporting it will probably get dropped
> >>>>>> by the driver/firmware as too long.  I've tested this and saw it
> >>>>>> happen on my systems.
> >>>>>>
> >>>>>> An alternative I've thought off is to adjust the rx size in the drivers
> >>>>>> when 802.1AD is configured, but that touches all the drivers, and
> >>>>>> doesn't work well for not vlan-filtering drivers.  It needs a new
> >>>>>> ndo api to adjust the rx length to make it consistent across all
> >>>>>> devices.
> >>>>>>
> >>>>>>> BTW, I couldn't find anything related to MTU handling in the 802.1ad
> >>>>>>> standard, however I only have an old copy and might have looked in the
> >>>>>>> wrong place. Do you have any information how this is supposed to be
> >>>>>>> handled?
> >>>>>>>
> >>>>>>
> >>>>>> The standard doesn't seem to mention anything about it, but looking
> >>>>>> at switch implementations, most of them require a bump in the mtu to
> >>>>>> 1504 to support 802.1AD.  Some allow for the decrease in vlan mtu, but
> >>>>>> that also requires mss translations as well.
> >>>>>
> >>>>> 802.1ad was merged into 802.1Q-2011, and G.2.2 in it refers to maximum
> >>>>> pdu size. However, this doesn't seem to mention the case where frames
> >>>>> are double tagged.
> >>>>>
> >>>>> MEF 6.1 requires UNI MTU size >= 1522 and MEF 31 requires E-NNI MTU size
> >>>>>> = 1526 (In these documents, MTU seems to mean frame size).
> >>>>> This implies that we should allow 1508 bytes of MTU size when we use
> >>>>> 802.1AD.
> >>>>>
> >>>>
> >>>> 1522 = 1500 + 14 + 4 (.1Q) + 4 (FCS)
> >>>>
> >>>>> Is 1504 enough?
> >>>>
> >>>> 1526 = 1500 + 14 +4 (.1Q) + 4 (.1AD) + 4(FCS)
> >>>
> >>> Thank you for the supplementation.
> >>>
> >>>>
> >>>> This is why Cisco docs recommend mtu of 1504.
> >>>>
> >>>> Of course this doesn't in any way account for stacked .1AD tags.
> >>>
> >>> So we are likely to receive 1508 (1526) sized frames in 802.1ad network.
> >>
> >> 1526 byte frame is 1504 mtu, as demonstrated above.
> > 
> > Not so sure.
> > It's true only if NIC reserves extra 4 bytes for mtu.
> 
> Pretty much all drivers reserve extra 4 bytes for the .1Q header.

Looking over some drivers, as you say, most drivers do it.
But I couldn't find extra room for vlan header in cxgb.

Also, some drivers don't seem to like this approach...
bnx2x already reserves 8 bytes for vlans.
qlge accepts only 1500 or 9000 mtu (and maybe 1500 setting allows up to
2048 frame size?)

> 
> > If the outer 802.1ad tag is not recognized as a vlan tag by NIC, both
> > the outer tag and the inner tag are not ethernet header but payload to
> > the NIC.
> 
> But the nic doesn't really care about MTU values itself.  It uses it
> to compute the frame length that it will support for rx and tx.  That
> computation is what the above math shows.
> 
> So, the nics that do not support .1AD acceleration (the ones you
> mentioned above), will already account for the .1Q header, but the MTU
> (payload) needs to increased by 4 bytes to account for .1AD header.
> We don't have to account for .1Q header again.

Fair enough.

Thanks,
Toshiaki Makita


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 175273f..0328d73 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -126,6 +126,7 @@  int vlan_check_real_dev(struct net_device *real_dev,
 			__be16 protocol, u16 vlan_id)
 {
 	const char *name = real_dev->name;
+	int err;
 
 	if (real_dev->features & NETIF_F_VLAN_CHALLENGED) {
 		pr_info("VLANs not supported on %s\n", name);
@@ -135,6 +136,21 @@  int vlan_check_real_dev(struct net_device *real_dev,
 	if (vlan_find_dev(real_dev, protocol, vlan_id) != NULL)
 		return -EEXIST;
 
+	if (protocol == htons(ETH_P_8021AD)) {
+		/* 8021AD vlan is meant to encapsulate 8021Q and thus we
+		 * need to make sure that lower device can handle a
+		 * larger mtu.
+		 * If the lower device still has a default ethernet mtu,
+		 * bump it up 4 bytes.  If not, it was set by user and
+		 * we'll trust the user knows what he is doing.
+		 */
+		if (real_dev->mtu == VLAN_ETH_DATA_LEN &&
+		    dev_set_mtu(real_dev, real_dev->mtu + VLAN_HLEN))
+			pr_warn("802.1AD mode is not supported on %s due to mtu limitations.\n", name);
+			return -EOPNOTSUPP
+		} 
+	}
+
 	return 0;
 }
 
@@ -259,6 +275,7 @@  static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	 * hope the underlying device can handle it.
 	 */
 	new_dev->mtu = real_dev->mtu;
+
 	new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT);
 
 	vlan = vlan_dev_priv(new_dev);
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index c7e634a..a925a8d 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -144,10 +144,15 @@  static int vlan_newlink(struct net *src_net, struct net_device *dev,
 	if (err < 0)
 		return err;
 
-	if (!tb[IFLA_MTU])
-		dev->mtu = real_dev->mtu;
-	else if (dev->mtu > real_dev->mtu)
+	if (!tb[IFLA_MTU]) {
+		if (vlan->vlan_proto == htons(ETH_P_8021AD) &&
+	    	    real_dev->mtu == VLAN_ETH_DATA_LEN + VLAN_HLEN)
+			dev->mtu = VLAN_ETH_DATA_LEN;
+		else
+			dev->mtu = real_dev->mtu;
+	} else if (dev->mtu > real_dev->mtu) {
 		return -EINVAL;
+	}
 
 	err = vlan_changelink(dev, tb, data);
 	if (err < 0)