diff mbox

[SRU,Utopic,v2,3/4] netfilter: bridge: forward IPv6 fragmented packets

Message ID 1455669793-20093-4-git-send-email-jay.vosburgh@canonical.com
State New
Headers show

Commit Message

Jay Vosburgh Feb. 17, 2016, 12:43 a.m. UTC
From: Bernhard Thaler <bernhard.thaler@wvnet.at>

BugLink: https://bugs.launchpad.net/nova/+bug/1463911

IPv6 fragmented packets are not forwarded on an ethernet bridge
with netfilter ip6_tables loaded. e.g. steps to reproduce

1) create a simple bridge like this

        modprobe br_netfilter
        brctl addbr br0
        brctl addif br0 eth0
        brctl addif br0 eth2
        ifconfig eth0 up
        ifconfig eth2 up
        ifconfig br0 up

2) place a host with an IPv6 address on each side of the bridge

        set IPv6 address on host A:
        ip -6 addr add fd01:2345:6789:1::1/64 dev eth0

        set IPv6 address on host B:
        ip -6 addr add fd01:2345:6789:1::2/64 dev eth0

3) run a simple ping command on host A with packets > MTU

        ping6 -s 4000 fd01:2345:6789:1::2

4) wait some time and run e.g. "ip6tables -t nat -nvL" on the bridge

IPv6 fragmented packets traverse the bridge cleanly until somebody runs.
"ip6tables -t nat -nvL". As soon as it is run (and netfilter modules are
loaded) IPv6 fragmented packets do not traverse the bridge any more (you
see no more responses in ping's output).

After applying this patch IPv6 fragmented packets traverse the bridge
cleanly in above scenario.

Signed-off-by: Bernhard Thaler <bernhard.thaler@wvnet.at>
[pablo@netfilter.org: small changes to br_nf_dev_queue_xmit]
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
(backported from commit efb6de9b4ba0092b2c55f6a52d16294a8a698edd)
Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
---
 include/linux/netfilter_ipv6.h |   1 +
 include/linux/skbuff.h         |   1 +
 net/bridge/br_netfilter.c      | 126 +++++++++++++++++++++++++++++++----------
 net/bridge/br_private.h        |   6 +-
 net/ipv6/netfilter.c           |   1 +
 5 files changed, 103 insertions(+), 32 deletions(-)
diff mbox

Patch

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 64dad1cc1a4b..7c832ba364cc 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -25,6 +25,7 @@  void ipv6_netfilter_fini(void);
 struct nf_ipv6_ops {
 	int (*chk_addr)(struct net *net, const struct in6_addr *addr,
 			const struct net_device *dev, int strict);
+	int (*fragment)(struct sk_buff *skb, int (*output)(struct sk_buff *));
 };
 
 extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 010bc80be91c..ce5b38d66e42 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -140,6 +140,7 @@  struct nf_conntrack {
 struct nf_bridge_info {
 	atomic_t		use;
 	unsigned int		mask;
+	__u16			frag_max_size;
 	struct net_device	*physindev;
 	struct net_device	*physoutdev;
 	unsigned long		data[32 / sizeof(unsigned long)];
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 9e6c1ac7e0ea..276d18ef4a5b 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -34,6 +34,7 @@ 
 
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/addrconf.h>
 #include <net/route.h>
 
 #include <asm/uaccess.h>
@@ -174,7 +175,7 @@  void br_netfilter_rtable_init(struct net_bridge *br)
 /* largest possible L2 header, see br_nf_dev_queue_xmit() */
 #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
 
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 struct brnf_frag_data {
 	char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
 	u8 encap_size;
@@ -363,6 +364,8 @@  static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
 	struct rtable *rt;
 
+	nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
+
 	if (nf_bridge->mask & BRNF_PKT_TYPE) {
 		skb->pkt_type = PACKET_OTHERHOST;
 		nf_bridge->mask ^= BRNF_PKT_TYPE;
@@ -634,37 +637,65 @@  bad:
 
 }
 
-/* Replicate the checks that IPv6 does on packet reception and pass the packet
- * to ip6tables, which doesn't support NAT, so things are fairly simple. */
-static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
-					   struct sk_buff *skb,
-					   const struct net_device *in,
-					   const struct net_device *out,
-					   int (*okfn)(struct sk_buff *))
+/* Equivalent to br_validate_ipv4 for IPv6 */
+static int br_validate_ipv6(struct sk_buff *skb)
 {
 	const struct ipv6hdr *hdr;
+	struct net_device *dev = skb->dev;
+	struct inet6_dev *idev = in6_dev_get(skb->dev);
 	u32 pkt_len;
+	u8 ip6h_len = sizeof(struct ipv6hdr);
 
-	if (skb->len < sizeof(struct ipv6hdr))
-		return NF_DROP;
+	if (!pskb_may_pull(skb, ip6h_len))
+		goto inhdr_error;
 
-	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
-		return NF_DROP;
+	if (skb->len < ip6h_len)
+		goto drop;
 
 	hdr = ipv6_hdr(skb);
 
 	if (hdr->version != 6)
-		return NF_DROP;
+		goto inhdr_error;
 
 	pkt_len = ntohs(hdr->payload_len);
 
 	if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
-		if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
-			return NF_DROP;
-		if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
-			return NF_DROP;
+		if (pkt_len + ip6h_len > skb->len) {
+			IP6_INC_STATS_BH(dev_net(dev), idev,
+					 IPSTATS_MIB_INTRUNCATEDPKTS);
+			goto drop;
+		}
+		if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) {
+			IP6_INC_STATS_BH(dev_net(dev), idev,
+					 IPSTATS_MIB_INDISCARDS);
+			goto drop;
+		}
 	}
 	if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
+		goto drop;
+
+	memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
+	/* No IP options in IPv6 header; however it should be
+	 * checked if some next headers need special treatment
+	 */
+	return 0;
+
+inhdr_error:
+	IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS);
+drop:
+	return -1;
+}
+
+
+/* Replicate the checks that IPv6 does on packet reception and pass the packet
+ * to ip6tables, which doesn't support NAT, so things are fairly simple. */
+static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
+					   struct sk_buff *skb,
+					   const struct net_device *in,
+					   const struct net_device *out,
+					   int (*okfn)(struct sk_buff *))
+{
+	if (br_validate_ipv6(skb))
 		return NF_DROP;
 
 	nf_bridge_put(skb->nf_bridge);
@@ -762,6 +793,9 @@  static int br_nf_forward_finish(struct sk_buff *skb)
 	struct net_device *in;
 
 	if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) {
+		if (skb->protocol == htons(ETH_P_IPV6))
+			nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
+
 		in = nf_bridge->physindev;
 		if (nf_bridge->mask & BRNF_PKT_TYPE) {
 			skb->pkt_type = PACKET_OTHERHOST;
@@ -826,6 +860,13 @@  static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
 
 	/* The physdev module checks on this */
 	nf_bridge->mask |= BRNF_BRIDGED;
+
+	if (pf == NFPROTO_IPV6) {
+		if (br_validate_ipv6(skb))
+			return NF_DROP;
+		IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
+	}
+
 	nf_bridge->physoutdev = skb->dev;
 	if (pf == NFPROTO_IPV4)
 		skb->protocol = htons(ETH_P_IP);
@@ -874,7 +915,7 @@  static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
 	return NF_STOLEN;
 }
 
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 static int br_nf_push_frag_xmit(struct sk_buff *skb)
 {
 	struct brnf_frag_data *data;
@@ -898,14 +939,20 @@  static int br_nf_push_frag_xmit(struct sk_buff *skb)
 
 	return br_dev_queue_push_xmit(skb);
 }
+#endif
 
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
-	int ret;
+	unsigned int mtu_reserved;
 
-	if (skb->protocol == htons(ETH_P_IP) &&
-	    skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
-	    !skb_is_gso(skb)) {
+	mtu_reserved = nf_bridge_mtu_reduction(skb);
+
+	if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
+		return br_dev_queue_push_xmit(skb);
+	}
+
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+	if (skb->protocol == htons(ETH_P_IP)) {
 		struct brnf_frag_data *data;
 
 		if (br_parse_ip_options(skb))
@@ -924,19 +971,36 @@  static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 		skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
 						 data->size);
 
-		ret = ip_fragment(skb, br_nf_push_frag_xmit);
-	} else {
-		ret = br_dev_queue_push_xmit(skb);
+		return ip_fragment(skb, br_nf_push_frag_xmit);
 	}
+#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+	if (skb->protocol == htons(ETH_P_IPV6)) {
+		const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
+		struct brnf_frag_data *data;
 
-	return ret;
-}
-#else
-static int br_nf_dev_queue_xmit(struct sk_buff *skb)
-{
+		if (br_validate_ipv6(skb))
+			return NF_DROP;
+
+		IP6CB(skb)->frag_max_size = skb->nf_bridge->frag_max_size;
+
+		nf_bridge_update_protocol(skb);
+
+		data = this_cpu_ptr(&brnf_frag_data_storage);
+		data->encap_size = nf_bridge_encap_header_len(skb);
+		data->size = ETH_HLEN + data->encap_size;
+
+		skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
+						 data->size);
+
+		if (v6ops)
+			return v6ops->fragment(skb, br_nf_push_frag_xmit);
+		else
+			return -EMSGSIZE;
+	}
+#endif
         return br_dev_queue_push_xmit(skb);
 }
-#endif
 
 /* PF_BRIDGE/POST_ROUTING ********************************************/
 static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 4fd47a1a0e9a..7575600fca07 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -18,6 +18,7 @@ 
 #include <linux/netpoll.h>
 #include <linux/u64_stats_sync.h>
 #include <net/route.h>
+#include <net/ip6_fib.h>
 #include <linux/if_vlan.h>
 
 #define BR_HASH_BITS 8
@@ -222,7 +223,10 @@  struct net_bridge
 	spinlock_t			hash_lock;
 	struct hlist_head		hash[BR_HASH_SIZE];
 #ifdef CONFIG_BRIDGE_NETFILTER
-	struct rtable 			fake_rtable;
+	union {
+		struct rtable		fake_rtable;
+		struct rt6_info		fake_rt6_info;
+	};
 	bool				nf_call_iptables;
 	bool				nf_call_ip6tables;
 	bool				nf_call_arptables;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index d38e6a8d8b9f..717cfc056230 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -191,6 +191,7 @@  static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
 
 static const struct nf_ipv6_ops ipv6ops = {
 	.chk_addr	= ipv6_chk_addr,
+	.fragment	= ip6_fragment
 };
 
 static const struct nf_afinfo nf_ip6_afinfo = {