diff mbox series

[net-next,v2] ip6_vti: adjust vti mtu according to mtu of output device

Message ID 1513086812-24896-1-git-send-email-alexey.kodanev@oracle.com
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series [net-next,v2] ip6_vti: adjust vti mtu according to mtu of output device | expand

Commit Message

Alexey Kodanev Dec. 12, 2017, 1:53 p.m. UTC
LTP/udp6_ipsec_vti tests fail when sending large UDP datagrams that
require fragmentation and the underlying device has MTU <= 1500. This
happens because ip6_vti sets mtu to ETH_DATA_LEN and not updating it
depending on a destination address or link parameter.

Further attempts to send UDP packets may succeed because pmtu gets
updated on ICMPV6_PKT_TOOBIG in vti6_err().

Here is the example when the output device MTU is set to 9000:

  # ip a sh ltp_ns_veth2
      ltp_ns_veth2@if7: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 9000 ...
        inet 10.0.0.2/24 scope global ltp_ns_veth2
        inet6 fd00::2/64 scope global

  # ip li add vti6 type vti6 local fd00::2 remote fd00::1
  # ip li show vti6
      vti6@NONE: <POINTOPOINT,NOARP> mtu 1500 ...
        link/tunnel6 fd00::2 peer fd00::1

After the patch:
  # ip li add vti6 type vti6 local fd00::2 remote fd00::1
  # ip li show vti6
      vti6@NONE: <POINTOPOINT,NOARP> mtu 8832 ...
        link/tunnel6 fd00::2 peer fd00::1

Regarding ip_vti, it already tunes MTU with ip_tunnel_bind_dev().

Reported-by: Petr Vorel <pvorel@suse.cz>
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
---
v2: * cleanup commit message issues (thanks to Shannon)

    * handle the case when we don't have route but have device parameter

    * cast new MTU to int and then check the maximum (tdev->mtu can be
      less than dev->hard_header_len)

When changing the tunnel parameters, MTU can be updated as well... should
we also check that parms 'link', 'laddr' or 'raddr' were actually changed
in vti6_tnl_change() and/or IFLA_MTU wasn't set?

 net/ipv6/ip6_vti.c |   22 ++++++++++++++++++++++
 1 files changed, 22 insertions(+), 0 deletions(-)

Comments

Shannon Nelson Dec. 12, 2017, 5:53 p.m. UTC | #1
On 12/12/2017 5:53 AM, Alexey Kodanev wrote:
> LTP/udp6_ipsec_vti tests fail when sending large UDP datagrams that
> require fragmentation and the underlying device has MTU <= 1500. This
> happens because ip6_vti sets mtu to ETH_DATA_LEN and not updating it
> depending on a destination address or link parameter.
> 
> Further attempts to send UDP packets may succeed because pmtu gets
> updated on ICMPV6_PKT_TOOBIG in vti6_err().
> 
> Here is the example when the output device MTU is set to 9000:
> 
>    # ip a sh ltp_ns_veth2
>        ltp_ns_veth2@if7: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 9000 ...
>          inet 10.0.0.2/24 scope global ltp_ns_veth2
>          inet6 fd00::2/64 scope global
> 
>    # ip li add vti6 type vti6 local fd00::2 remote fd00::1
>    # ip li show vti6
>        vti6@NONE: <POINTOPOINT,NOARP> mtu 1500 ...
>          link/tunnel6 fd00::2 peer fd00::1
> 
> After the patch:
>    # ip li add vti6 type vti6 local fd00::2 remote fd00::1
>    # ip li show vti6
>        vti6@NONE: <POINTOPOINT,NOARP> mtu 8832 ...
>          link/tunnel6 fd00::2 peer fd00::1
> 
> Regarding ip_vti, it already tunes MTU with ip_tunnel_bind_dev().
> 
> Reported-by: Petr Vorel <pvorel@suse.cz>
> Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
> ---
> v2: * cleanup commit message issues (thanks to Shannon)

Acked-by: Shannon Nelson <shannon.nelson@oracle.com>

> 
>      * handle the case when we don't have route but have device parameter
> 
>      * cast new MTU to int and then check the maximum (tdev->mtu can be
>        less than dev->hard_header_len)
> 
> When changing the tunnel parameters, MTU can be updated as well... should
> we also check that parms 'link', 'laddr' or 'raddr' were actually changed
> in vti6_tnl_change() and/or IFLA_MTU wasn't set?
> 
>   net/ipv6/ip6_vti.c |   22 ++++++++++++++++++++++
>   1 files changed, 22 insertions(+), 0 deletions(-)
> 
> diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
> index dbb74f3..d4624c2 100644
> --- a/net/ipv6/ip6_vti.c
> +++ b/net/ipv6/ip6_vti.c
> @@ -626,6 +626,7 @@ static void vti6_link_config(struct ip6_tnl *t)
>   {
>   	struct net_device *dev = t->dev;
>   	struct __ip6_tnl_parm *p = &t->parms;
> +	struct net_device *tdev = NULL;
>   
>   	memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
>   	memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
> @@ -638,6 +639,27 @@ static void vti6_link_config(struct ip6_tnl *t)
>   		dev->flags |= IFF_POINTOPOINT;
>   	else
>   		dev->flags &= ~IFF_POINTOPOINT;
> +
> +	if (p->flags & IP6_TNL_F_CAP_XMIT) {
> +		int strict = (ipv6_addr_type(&p->raddr) &
> +			      (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
> +
> +		struct rt6_info *rt = rt6_lookup(t->net,
> +						 &p->raddr, &p->laddr,
> +						 p->link, strict);
> +
> +		if (rt)
> +			tdev = rt->dst.dev;
> +		ip6_rt_put(rt);
> +	}
> +
> +	if (!tdev && p->link)
> +		tdev = __dev_get_by_index(t->net, p->link);
> +
> +	if (tdev) {
> +		dev->mtu = max_t(int, tdev->mtu - dev->hard_header_len,
> +				 IPV6_MIN_MTU);
> +	}
>   }
>   
>   /**
>
David Miller Dec. 13, 2017, 8:09 p.m. UTC | #2
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Tue, 12 Dec 2017 16:53:32 +0300

> +	if (tdev) {
> +		dev->mtu = max_t(int, tdev->mtu - dev->hard_header_len,
> +				 IPV6_MIN_MTU);
> +	}

Please don't use curly braces for a single-statement basic block.

Thank you.
diff mbox series

Patch

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index dbb74f3..d4624c2 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -626,6 +626,7 @@  static void vti6_link_config(struct ip6_tnl *t)
 {
 	struct net_device *dev = t->dev;
 	struct __ip6_tnl_parm *p = &t->parms;
+	struct net_device *tdev = NULL;
 
 	memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
 	memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
@@ -638,6 +639,27 @@  static void vti6_link_config(struct ip6_tnl *t)
 		dev->flags |= IFF_POINTOPOINT;
 	else
 		dev->flags &= ~IFF_POINTOPOINT;
+
+	if (p->flags & IP6_TNL_F_CAP_XMIT) {
+		int strict = (ipv6_addr_type(&p->raddr) &
+			      (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
+
+		struct rt6_info *rt = rt6_lookup(t->net,
+						 &p->raddr, &p->laddr,
+						 p->link, strict);
+
+		if (rt)
+			tdev = rt->dst.dev;
+		ip6_rt_put(rt);
+	}
+
+	if (!tdev && p->link)
+		tdev = __dev_get_by_index(t->net, p->link);
+
+	if (tdev) {
+		dev->mtu = max_t(int, tdev->mtu - dev->hard_header_len,
+				 IPV6_MIN_MTU);
+	}
 }
 
 /**