diff mbox series

[net-next,3/6] vxlan: Support for PMTU discovery on directly bridged links

Message ID 9c5e81621d9fc94cc1d1f77e177986434ca9564f.1596487323.git.sbrivio@redhat.com
State Changes Requested
Delegated to: David Miller
Headers show
Series Support PMTU discovery with bridged UDP tunnels | expand

Commit Message

Stefano Brivio Aug. 3, 2020, 8:52 p.m. UTC
If the interface is a bridge or Open vSwitch port, and we can't
forward a packet because it exceeds the local PMTU estimate,
trigger an ICMP or ICMPv6 reply to the sender, using the same
interface to forward it back.

If metadata collection is enabled, reverse destination and source
addresses, so that Open vSwitch is able to match this packet against
the existing, reverse flow.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
 drivers/net/vxlan.c | 49 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 43 insertions(+), 6 deletions(-)

Comments

David Ahern Aug. 3, 2020, 11:48 p.m. UTC | #1
On 8/3/20 2:52 PM, Stefano Brivio wrote:
> +		err = skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM,
> +					    netif_is_bridge_port(dev) ||
> +					    netif_is_ovs_port(dev));

you have this check in a few places. Maybe a new helper like
netif_is_any_bridge_port that can check both IFF flags in 1 go.
Stefano Brivio Aug. 4, 2020, 5:53 a.m. UTC | #2
On Mon, 3 Aug 2020 17:48:48 -0600
David Ahern <dsahern@gmail.com> wrote:

> On 8/3/20 2:52 PM, Stefano Brivio wrote:
> > +		err = skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM,
> > +					    netif_is_bridge_port(dev) ||
> > +					    netif_is_ovs_port(dev));  
> 
> you have this check in a few places. Maybe a new helper like
> netif_is_any_bridge_port that can check both IFF flags in 1 go.

Ah, yes, good idea, added to 1/6 v2 as it's where it's needed now.
diff mbox series

Patch

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 21ea79f65410..88941f26f851 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2494,7 +2494,8 @@  static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
 
 /* Bypass encapsulation if the destination is local */
 static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
-			       struct vxlan_dev *dst_vxlan, __be32 vni)
+			       struct vxlan_dev *dst_vxlan, __be32 vni,
+			       bool snoop)
 {
 	struct pcpu_sw_netstats *tx_stats, *rx_stats;
 	union vxlan_addr loopback;
@@ -2526,7 +2527,7 @@  static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
 		goto drop;
 	}
 
-	if (dst_vxlan->cfg.flags & VXLAN_F_LEARN)
+	if ((dst_vxlan->cfg.flags & VXLAN_F_LEARN) && snoop)
 		vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni);
 
 	u64_stats_update_begin(&tx_stats->syncp);
@@ -2575,7 +2576,7 @@  static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
 
 			return -ENOENT;
 		}
-		vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni);
+		vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
 		return 1;
 	}
 
@@ -2611,7 +2612,8 @@  static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		if (vxlan_addr_any(dst)) {
 			if (did_rsc) {
 				/* short-circuited back to local bridge */
-				vxlan_encap_bypass(skb, vxlan, vxlan, default_vni);
+				vxlan_encap_bypass(skb, vxlan, vxlan,
+						   default_vni, true);
 				return;
 			}
 			goto drop;
@@ -2714,7 +2716,24 @@  static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		}
 
 		ndst = &rt->dst;
-		skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM, false);
+		err = skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM,
+					    netif_is_bridge_port(dev) ||
+					    netif_is_ovs_port(dev));
+		if (err < 0) {
+			goto tx_error;
+		} else if (err) {
+			if (info) {
+				struct in_addr src, dst;
+
+				src = remote_ip.sin.sin_addr;
+				dst = local_ip.sin.sin_addr;
+				info->key.u.ipv4.src = src.s_addr;
+				info->key.u.ipv4.dst = dst.s_addr;
+			}
+			vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
+			dst_release(ndst);
+			goto out_unlock;
+		}
 
 		tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb);
 		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
@@ -2754,7 +2773,25 @@  static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 				goto out_unlock;
 		}
 
-		skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM, false);
+		err = skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM,
+					    netif_is_bridge_port(dev) ||
+					    netif_is_ovs_port(dev));
+		if (err < 0) {
+			goto tx_error;
+		} else if (err) {
+			if (info) {
+				struct in6_addr src, dst;
+
+				src = remote_ip.sin6.sin6_addr;
+				dst = local_ip.sin6.sin6_addr;
+				info->key.u.ipv6.src = src;
+				info->key.u.ipv6.dst = dst;
+			}
+
+			vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
+			dst_release(ndst);
+			goto out_unlock;
+		}
 
 		tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb);
 		ttl = ttl ? : ip6_dst_hoplimit(ndst);