diff mbox series

[net,v2] ipv4: reset rt_iif for recirculated mcast/bcast out pkts

Message ID 20190626062116.4319-1-ssuryaextr@gmail.com
State Accepted
Delegated to: David Miller
Headers show
Series [net,v2] ipv4: reset rt_iif for recirculated mcast/bcast out pkts | expand

Commit Message

Stephen Suryaputra June 26, 2019, 6:21 a.m. UTC
Multicast or broadcast egress packets have rt_iif set to the oif. These
packets might be recirculated back as input and lookup to the raw
sockets may fail because they are bound to the incoming interface
(skb_iif). If rt_iif is not zero, during the lookup, inet_iif() function
returns rt_iif instead of skb_iif. Hence, the lookup fails.

v2: Make it non vrf specific (David Ahern). Reword the changelog to
    reflect it.
Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
---
 include/net/route.h  |  1 +
 net/ipv4/ip_output.c | 12 ++++++++++++
 net/ipv4/route.c     | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 46 insertions(+)

Comments

David Ahern June 26, 2019, 5:28 p.m. UTC | #1
On 6/26/19 12:21 AM, Stephen Suryaputra wrote:
> Multicast or broadcast egress packets have rt_iif set to the oif. These
> packets might be recirculated back as input and lookup to the raw
> sockets may fail because they are bound to the incoming interface
> (skb_iif). If rt_iif is not zero, during the lookup, inet_iif() function
> returns rt_iif instead of skb_iif. Hence, the lookup fails.
> 
> v2: Make it non vrf specific (David Ahern). Reword the changelog to
>     reflect it.
> Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
> ---
>  include/net/route.h  |  1 +
>  net/ipv4/ip_output.c | 12 ++++++++++++
>  net/ipv4/route.c     | 33 +++++++++++++++++++++++++++++++++
>  3 files changed, 46 insertions(+)

Reviewed-by: David Ahern <dsahern@gmail.com>
David Miller June 26, 2019, 7:40 p.m. UTC | #2
From: Stephen Suryaputra <ssuryaextr@gmail.com>
Date: Wed, 26 Jun 2019 02:21:16 -0400

> Multicast or broadcast egress packets have rt_iif set to the oif. These
> packets might be recirculated back as input and lookup to the raw
> sockets may fail because they are bound to the incoming interface
> (skb_iif). If rt_iif is not zero, during the lookup, inet_iif() function
> returns rt_iif instead of skb_iif. Hence, the lookup fails.
> 
> v2: Make it non vrf specific (David Ahern). Reword the changelog to
>     reflect it.
> Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>

Applied and queued up for -stable, thanks.
diff mbox series

Patch

diff --git a/include/net/route.h b/include/net/route.h
index 065b47754f05..55ff71ffb796 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -221,6 +221,7 @@  void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt);
 struct rtable *rt_dst_alloc(struct net_device *dev,
 			     unsigned int flags, u16 type,
 			     bool nopolicy, bool noxfrm, bool will_cache);
+struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt);
 
 struct in_ifaddr;
 void fib_add_ifaddr(struct in_ifaddr *);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 16f9159234a2..8c2ec35b6512 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -318,6 +318,7 @@  static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
 static int ip_mc_finish_output(struct net *net, struct sock *sk,
 			       struct sk_buff *skb)
 {
+	struct rtable *new_rt;
 	int ret;
 
 	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
@@ -326,6 +327,17 @@  static int ip_mc_finish_output(struct net *net, struct sock *sk,
 		return ret;
 	}
 
+	/* Reset rt_iif so that inet_iif() will return skb->skb_iif. Setting
+	 * this to non-zero causes ipi_ifindex in in_pktinfo to be overwritten,
+	 * see ipv4_pktinfo_prepare().
+	 */
+	new_rt = rt_dst_clone(net->loopback_dev, skb_rtable(skb));
+	if (new_rt) {
+		new_rt->rt_iif = 0;
+		skb_dst_drop(skb);
+		skb_dst_set(skb, &new_rt->dst);
+	}
+
 	return dev_loopback_xmit(net, sk, skb);
 }
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6cb7cff22db9..8ea0735a6754 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1647,6 +1647,39 @@  struct rtable *rt_dst_alloc(struct net_device *dev,
 }
 EXPORT_SYMBOL(rt_dst_alloc);
 
+struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
+{
+	struct rtable *new_rt;
+
+	new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
+			   rt->dst.flags);
+
+	if (new_rt) {
+		new_rt->rt_genid = rt_genid_ipv4(dev_net(dev));
+		new_rt->rt_flags = rt->rt_flags;
+		new_rt->rt_type = rt->rt_type;
+		new_rt->rt_is_input = rt->rt_is_input;
+		new_rt->rt_iif = rt->rt_iif;
+		new_rt->rt_pmtu = rt->rt_pmtu;
+		new_rt->rt_mtu_locked = rt->rt_mtu_locked;
+		new_rt->rt_gw_family = rt->rt_gw_family;
+		if (rt->rt_gw_family == AF_INET)
+			new_rt->rt_gw4 = rt->rt_gw4;
+		else if (rt->rt_gw_family == AF_INET6)
+			new_rt->rt_gw6 = rt->rt_gw6;
+		INIT_LIST_HEAD(&new_rt->rt_uncached);
+
+		new_rt->dst.flags |= DST_HOST;
+		new_rt->dst.input = rt->dst.input;
+		new_rt->dst.output = rt->dst.output;
+		new_rt->dst.error = rt->dst.error;
+		new_rt->dst.lastuse = jiffies;
+		new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate);
+	}
+	return new_rt;
+}
+EXPORT_SYMBOL(rt_dst_clone);
+
 /* called in rcu_read_lock() section */
 int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			  u8 tos, struct net_device *dev,