diff mbox

[ovs-dev,v2] datapath: backport: vxlan: avoid using stale vxlan socket.

Message ID 1477949792-68529-1-git-send-email-pshelar@ovn.org
State Accepted
Headers show

Commit Message

Pravin Shelar Oct. 31, 2016, 9:36 p.m. UTC
Upstream commit:
    commit c6fcc4fc5f8b592600c7409e769ab68da0fb1eca
    Author: pravin shelar <pshelar@ovn.org>
    Date:   Fri Oct 28 09:59:15 2016 -0700

    vxlan: avoid using stale vxlan socket.

    When vxlan device is closed vxlan socket is freed. This
    operation can race with vxlan-xmit function which
    dereferences vxlan socket. Following patch uses RCU
    mechanism to avoid this situation.

    Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
---
 datapath/linux/compat/include/net/vxlan.h | 14 +------
 datapath/linux/compat/vxlan.c             | 66 +++++++++++++++++++------------
 2 files changed, 42 insertions(+), 38 deletions(-)

Comments

Joe Stringer Nov. 1, 2016, 12:24 a.m. UTC | #1
On 31 October 2016 at 14:36, Pravin B Shelar <pshelar@ovn.org> wrote:
> Upstream commit:
>     commit c6fcc4fc5f8b592600c7409e769ab68da0fb1eca
>     Author: pravin shelar <pshelar@ovn.org>
>     Date:   Fri Oct 28 09:59:15 2016 -0700
>
>     vxlan: avoid using stale vxlan socket.
>
>     When vxlan device is closed vxlan socket is freed. This
>     operation can race with vxlan-xmit function which
>     dereferences vxlan socket. Following patch uses RCU
>     mechanism to avoid this situation.
>
>     Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
>     Signed-off-by: David S. Miller <davem@davemloft.net>
>
> Signed-off-by: Pravin B Shelar <pshelar@ovn.org>

Acked-by: Joe Stringer <joe@ovn.org>

I assume you'll also follow up upstream with the unused function. Thanks!
Pravin Shelar Nov. 1, 2016, 5:06 a.m. UTC | #2
On Mon, Oct 31, 2016 at 5:24 PM, Joe Stringer <joe@ovn.org> wrote:
> On 31 October 2016 at 14:36, Pravin B Shelar <pshelar@ovn.org> wrote:
>> Upstream commit:
>>     commit c6fcc4fc5f8b592600c7409e769ab68da0fb1eca
>>     Author: pravin shelar <pshelar@ovn.org>
>>     Date:   Fri Oct 28 09:59:15 2016 -0700
>>
>>     vxlan: avoid using stale vxlan socket.
>>
>>     When vxlan device is closed vxlan socket is freed. This
>>     operation can race with vxlan-xmit function which
>>     dereferences vxlan socket. Following patch uses RCU
>>     mechanism to avoid this situation.
>>
>>     Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
>>     Signed-off-by: David S. Miller <davem@davemloft.net>
>>
>> Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
>
> Acked-by: Joe Stringer <joe@ovn.org>
>
Thanks for review. I pushed patch to master and branch-2.6.
I am planing on backporting it to 2.5 also. but that patch is going to
be significantly different than this patch due to tunnel code
divergence.

> I assume you'll also follow up upstream with the unused function. Thanks!

yes, I have few patches for upstream tunnel modules.
diff mbox

Patch

diff --git a/datapath/linux/compat/include/net/vxlan.h b/datapath/linux/compat/include/net/vxlan.h
index 8212d3a..5bc8969 100644
--- a/datapath/linux/compat/include/net/vxlan.h
+++ b/datapath/linux/compat/include/net/vxlan.h
@@ -253,9 +253,9 @@  struct vxlan_config {
 struct vxlan_dev {
 	struct hlist_node hlist;	/* vni hash table */
 	struct list_head  next;		/* vxlan's per namespace list */
-	struct vxlan_sock *vn4_sock;	/* listening socket for IPv4 */
+	struct vxlan_sock __rcu *vn4_sock;	/* listening socket for IPv4 */
 #if IS_ENABLED(CONFIG_IPV6)
-	struct vxlan_sock *vn6_sock;	/* listening socket for IPv6 */
+	struct vxlan_sock __rcu *vn6_sock;	/* listening socket for IPv6 */
 #endif
 	struct net_device *dev;
 	struct net	  *net;		/* netns for packet i/o */
@@ -309,16 +309,6 @@  struct vxlan_dev {
 struct net_device *rpl_vxlan_dev_create(struct net *net, const char *name,
 				    u8 name_assign_type, struct vxlan_config *conf);
 
-static inline __be16 vxlan_dev_dst_port(struct vxlan_dev *vxlan,
-					unsigned short family)
-{
-#if IS_ENABLED(CONFIG_IPV6)
-	if (family == AF_INET6)
-		return inet_sk(vxlan->vn6_sock->sock->sk)->inet_sport;
-#endif
-	return inet_sk(vxlan->vn4_sock->sock->sk)->inet_sport;
-}
-
 static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
 						     netdev_features_t features)
 {
diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c
index 47a5a68..d5dbe8d 100644
--- a/datapath/linux/compat/vxlan.c
+++ b/datapath/linux/compat/vxlan.c
@@ -429,17 +429,20 @@  static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
 static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
 {
 	struct vxlan_dev *vxlan;
+	struct vxlan_sock *sock4;
+	struct vxlan_sock *sock6 = NULL;
 	unsigned short family = dev->default_dst.remote_ip.sa.sa_family;
 
+	sock4 = rtnl_dereference(dev->vn4_sock);
+
 	/* The vxlan_sock is only used by dev, leaving group has
 	 * no effect on other vxlan devices.
 	 */
-	if (family == AF_INET && dev->vn4_sock &&
-	    atomic_read(&dev->vn4_sock->refcnt) == 1)
+	if (family == AF_INET && sock4 && atomic_read(&sock4->refcnt) == 1)
 		return false;
 #if IS_ENABLED(CONFIG_IPV6)
-	if (family == AF_INET6 && dev->vn6_sock &&
-	    atomic_read(&dev->vn6_sock->refcnt) == 1)
+	sock6 = rtnl_dereference(dev->vn6_sock);
+	if (family == AF_INET6 && sock6 && atomic_read(&sock6->refcnt) == 1)
 		return false;
 #endif
 
@@ -447,10 +450,12 @@  static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
 		if (!netif_running(vxlan->dev) || vxlan == dev)
 			continue;
 
-		if (family == AF_INET && vxlan->vn4_sock != dev->vn4_sock)
+		if (family == AF_INET &&
+		    rtnl_dereference(vxlan->vn4_sock) != sock4)
 			continue;
 #if IS_ENABLED(CONFIG_IPV6)
-		if (family == AF_INET6 && vxlan->vn6_sock != dev->vn6_sock)
+		if (family == AF_INET6 &&
+		    rtnl_dereference(vxlan->vn6_sock) != sock6)
 			continue;
 #endif
 
@@ -488,22 +493,25 @@  static bool __vxlan_sock_release_prep(struct vxlan_sock *vs)
 
 static void vxlan_sock_release(struct vxlan_dev *vxlan)
 {
-	bool ipv4 = __vxlan_sock_release_prep(vxlan->vn4_sock);
+	struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
 #if IS_ENABLED(CONFIG_IPV6)
-	bool ipv6 = __vxlan_sock_release_prep(vxlan->vn6_sock);
+	struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
+
+	rcu_assign_pointer(vxlan->vn6_sock, NULL);
 #endif
 
+	rcu_assign_pointer(vxlan->vn4_sock, NULL);
 	synchronize_net();
 
-	if (ipv4) {
-		udp_tunnel_sock_release(vxlan->vn4_sock->sock);
-		kfree(vxlan->vn4_sock);
+	if (__vxlan_sock_release_prep(sock4)) {
+		udp_tunnel_sock_release(sock4->sock);
+		kfree(sock4);
 	}
 
 #if IS_ENABLED(CONFIG_IPV6)
-	if (ipv6) {
-		udp_tunnel_sock_release(vxlan->vn6_sock->sock);
-		kfree(vxlan->vn6_sock);
+	if (__vxlan_sock_release_prep(sock6)) {
+		udp_tunnel_sock_release(sock6->sock);
+		kfree(sock6);
 	}
 #endif
 }
@@ -946,11 +954,15 @@  static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
 					  struct dst_cache *dst_cache,
 					  const struct ip_tunnel_info *info)
 {
+	struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
 	bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
 	struct dst_entry *ndst;
 	struct flowi6 fl6;
 	int err;
 
+	if (!sock6)
+		return ERR_PTR(-EIO);
+
 	if (tos && !info)
 		use_cache = false;
 	if (use_cache) {
@@ -969,7 +981,7 @@  static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
 
 #ifdef HAVE_IPV6_DST_LOOKUP_NET
 	err = ipv6_stub->ipv6_dst_lookup(vxlan->net,
-					 vxlan->vn6_sock->sock->sk,
+					 sock6->sock->sk,
 					 &ndst, &fl6);
 #else
 #ifdef HAVE_IPV6_STUB
@@ -1085,9 +1097,11 @@  static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	}
 
 	if (dst->sa.sa_family == AF_INET) {
-		if (!vxlan->vn4_sock)
+		struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
+
+		if (!sock4)
 			goto drop;
-		sk = vxlan->vn4_sock->sock->sk;
+		sk = sock4->sock->sk;
 
 		rt = vxlan_get_route(vxlan, skb,
 				     rdst ? rdst->remote_ifindex : 0, tos,
@@ -1140,12 +1154,13 @@  static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 				    src_port, dst_port, xnet, !udp_sum);
 #if IS_ENABLED(CONFIG_IPV6)
 	} else {
+		struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
 		struct dst_entry *ndst;
 		u32 rt6i_flags;
 
-		if (!vxlan->vn6_sock)
+		if (!sock6)
 			goto drop;
-		sk = vxlan->vn6_sock->sock->sk;
+		sk = sock6->sock->sk;
 
 		ndst = vxlan6_get_route(vxlan, skb,
 					rdst ? rdst->remote_ifindex : 0, tos,
@@ -1434,9 +1449,10 @@  int ovs_vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 	dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
 
 	if (ip_tunnel_info_af(info) == AF_INET) {
+		struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
 		struct rtable *rt;
 
-		if (!vxlan->vn4_sock)
+		if (!sock4)
 			return -EINVAL;
 		rt = vxlan_get_route(vxlan, skb, 0, info->key.tos,
 				     info->key.u.ipv4.dst,
@@ -1448,8 +1464,6 @@  int ovs_vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 #if IS_ENABLED(CONFIG_IPV6)
 		struct dst_entry *ndst;
 
-		if (!vxlan->vn6_sock)
-			return -EINVAL;
 		ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos,
 					info->key.label, &info->key.u.ipv6.dst,
 					&info->key.u.ipv6.src, NULL, info);
@@ -1784,10 +1798,10 @@  static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
 		return PTR_ERR(vs);
 #if IS_ENABLED(CONFIG_IPV6)
 	if (ipv6)
-		vxlan->vn6_sock = vs;
+		rcu_assign_pointer(vxlan->vn6_sock, vs);
 	else
 #endif
-		vxlan->vn4_sock = vs;
+		rcu_assign_pointer(vxlan->vn4_sock, vs);
 	vxlan_vs_add_dev(vs, vxlan);
 	return 0;
 }
@@ -1798,9 +1812,9 @@  static int vxlan_sock_add(struct vxlan_dev *vxlan)
 	bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA;
 	int ret = 0;
 
-	vxlan->vn4_sock = NULL;
+	RCU_INIT_POINTER(vxlan->vn4_sock, NULL);
 #if IS_ENABLED(CONFIG_IPV6)
-	vxlan->vn6_sock = NULL;
+	RCU_INIT_POINTER(vxlan->vn6_sock, NULL);
 	if (ipv6 || metadata)
 		ret = __vxlan_sock_add(vxlan, true);
 #endif