diff mbox series

[bpf-next,v3,07/15] bpf: introduce new bpf AF_XDP map type BPF_MAP_TYPE_XSKMAP

Message ID 20180502110136.3738-8-bjorn.topel@gmail.com
State Accepted, archived
Delegated to: BPF Maintainers
Headers show
Series Introducing AF_XDP support | expand

Commit Message

Björn Töpel May 2, 2018, 11:01 a.m. UTC
From: Björn Töpel <bjorn.topel@intel.com>

The xskmap is yet another BPF map, very much inspired by
dev/cpu/sockmap, and is a holder of AF_XDP sockets. A user application
adds AF_XDP sockets into the map, and by using the bpf_redirect_map
helper, an XDP program can redirect XDP frames to an AF_XDP socket.

Note that a socket that is bound to certain ifindex/queue index will
*only* accept XDP frames from that netdev/queue index. If an XDP
program tries to redirect from a netdev/queue index other than what
the socket is bound to, the frame will not be received on the socket.

A socket can reside in multiple maps.

v3: Fixed race and simplified code.
v2: Removed one indirection in map lookup.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 include/linux/bpf.h       |  25 +++++
 include/linux/bpf_types.h |   3 +
 include/net/xdp_sock.h    |   7 ++
 include/uapi/linux/bpf.h  |   1 +
 kernel/bpf/Makefile       |   3 +
 kernel/bpf/verifier.c     |   8 +-
 kernel/bpf/xskmap.c       | 239 ++++++++++++++++++++++++++++++++++++++++++++++
 net/xdp/xsk.c             |   5 +
 8 files changed, 289 insertions(+), 2 deletions(-)
 create mode 100644 kernel/bpf/xskmap.c

Comments

Eric Dumazet Oct. 8, 2018, 3:31 p.m. UTC | #1
On 05/02/2018 04:01 AM, Björn Töpel wrote:
> From: Björn Töpel <bjorn.topel@intel.com>
> 
> The xskmap is yet another BPF map, very much inspired by
> dev/cpu/sockmap, and is a holder of AF_XDP sockets. A user application
> adds AF_XDP sockets into the map, and by using the bpf_redirect_map
> helper, an XDP program can redirect XDP frames to an AF_XDP socket.
> 
> Note that a socket that is bound to certain ifindex/queue index will
> *only* accept XDP frames from that netdev/queue index. If an XDP
> program tries to redirect from a netdev/queue index other than what
> the socket is bound to, the frame will not be received on the socket.
> 
> A socket can reside in multiple maps.
> 
> v3: Fixed race and simplified code.
> v2: Removed one indirection in map lookup.
> 
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
> ---
>  include/linux/bpf.h       |  25 +++++
>  include/linux/bpf_types.h |   3 +
>  include/net/xdp_sock.h    |   7 ++
>  include/uapi/linux/bpf.h  |   1 +
>  kernel/bpf/Makefile       |   3 +
>  kernel/bpf/verifier.c     |   8 +-
>  kernel/bpf/xskmap.c       | 239 ++++++++++++++++++++++++++++++++++++++++++++++
>  net/xdp/xsk.c             |   5 +
>  8 files changed, 289 insertions(+), 2 deletions(-)
>  create mode 100644 kernel/bpf/xskmap.c
> 

This function is called under rcu_read_lock() , from map_update_elem()

> +
> +static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
> +			       u64 map_flags)
> +{
> +	struct xsk_map *m = container_of(map, struct xsk_map, map);
> +	u32 i = *(u32 *)key, fd = *(u32 *)value;
> +	struct xdp_sock *xs, *old_xs;
> +	struct socket *sock;
> +	int err;
> +
> +	if (unlikely(map_flags > BPF_EXIST))
> +		return -EINVAL;
> +	if (unlikely(i >= m->map.max_entries))
> +		return -E2BIG;
> +	if (unlikely(map_flags == BPF_NOEXIST))
> +		return -EEXIST;
> +
> +	sock = sockfd_lookup(fd, &err);
> +	if (!sock)
> +		return err;
> +
> +	if (sock->sk->sk_family != PF_XDP) {
> +		sockfd_put(sock);
> +		return -EOPNOTSUPP;
> +	}
> +
> +	xs = (struct xdp_sock *)sock->sk;
> +
> +	if (!xsk_is_setup_for_bpf_map(xs)) {
> +		sockfd_put(sock);
> +		return -EOPNOTSUPP;
> +	}
> +
> +	sock_hold(sock->sk);
> +
> +	old_xs = xchg(&m->xsk_map[i], xs);
> +	if (old_xs) {
> +		/* Make sure we've flushed everything. */

So it is illegal to call synchronize_net(), since it is a reschedule point.

> +		synchronize_net();
> +		sock_put((struct sock *)old_xs);
> +	}
> +
> +	sockfd_put(sock);
> +	return 0;
> +}
>
Björn Töpel Oct. 8, 2018, 4:05 p.m. UTC | #2
Den mån 8 okt. 2018 kl 17:31 skrev Eric Dumazet <eric.dumazet@gmail.com>:
>
> On 05/02/2018 04:01 AM, Björn Töpel wrote:
> > From: Björn Töpel <bjorn.topel@intel.com>
> >
> > The xskmap is yet another BPF map, very much inspired by
> > dev/cpu/sockmap, and is a holder of AF_XDP sockets. A user application
> > adds AF_XDP sockets into the map, and by using the bpf_redirect_map
> > helper, an XDP program can redirect XDP frames to an AF_XDP socket.
> >
> > Note that a socket that is bound to certain ifindex/queue index will
> > *only* accept XDP frames from that netdev/queue index. If an XDP
> > program tries to redirect from a netdev/queue index other than what
> > the socket is bound to, the frame will not be received on the socket.
> >
> > A socket can reside in multiple maps.
> >
> > v3: Fixed race and simplified code.
> > v2: Removed one indirection in map lookup.
> >
> > Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
> > ---
> >  include/linux/bpf.h       |  25 +++++
> >  include/linux/bpf_types.h |   3 +
> >  include/net/xdp_sock.h    |   7 ++
> >  include/uapi/linux/bpf.h  |   1 +
> >  kernel/bpf/Makefile       |   3 +
> >  kernel/bpf/verifier.c     |   8 +-
> >  kernel/bpf/xskmap.c       | 239 ++++++++++++++++++++++++++++++++++++++++++++++
> >  net/xdp/xsk.c             |   5 +
> >  8 files changed, 289 insertions(+), 2 deletions(-)
> >  create mode 100644 kernel/bpf/xskmap.c
> >
>
> This function is called under rcu_read_lock() , from map_update_elem()
>
> > +
> > +static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
> > +                            u64 map_flags)
> > +{
> > +     struct xsk_map *m = container_of(map, struct xsk_map, map);
> > +     u32 i = *(u32 *)key, fd = *(u32 *)value;
> > +     struct xdp_sock *xs, *old_xs;
> > +     struct socket *sock;
> > +     int err;
> > +
> > +     if (unlikely(map_flags > BPF_EXIST))
> > +             return -EINVAL;
> > +     if (unlikely(i >= m->map.max_entries))
> > +             return -E2BIG;
> > +     if (unlikely(map_flags == BPF_NOEXIST))
> > +             return -EEXIST;
> > +
> > +     sock = sockfd_lookup(fd, &err);
> > +     if (!sock)
> > +             return err;
> > +
> > +     if (sock->sk->sk_family != PF_XDP) {
> > +             sockfd_put(sock);
> > +             return -EOPNOTSUPP;
> > +     }
> > +
> > +     xs = (struct xdp_sock *)sock->sk;
> > +
> > +     if (!xsk_is_setup_for_bpf_map(xs)) {
> > +             sockfd_put(sock);
> > +             return -EOPNOTSUPP;
> > +     }
> > +
> > +     sock_hold(sock->sk);
> > +
> > +     old_xs = xchg(&m->xsk_map[i], xs);
> > +     if (old_xs) {
> > +             /* Make sure we've flushed everything. */
>
> So it is illegal to call synchronize_net(), since it is a reschedule point.
>

Thanks for finding and pointing this out, Eric!

I'll have look and get back with a patch.


Björn


> > +             synchronize_net();
> > +             sock_put((struct sock *)old_xs);
> > +     }
> > +
> > +     sockfd_put(sock);
> > +     return 0;
> > +}
> >
>
>
>
Björn Töpel Oct. 8, 2018, 4:52 p.m. UTC | #3
Den mån 8 okt. 2018 kl 18:05 skrev Björn Töpel <bjorn.topel@gmail.com>:
>
> Den mån 8 okt. 2018 kl 17:31 skrev Eric Dumazet <eric.dumazet@gmail.com>:
> >
[...]
> > So it is illegal to call synchronize_net(), since it is a reschedule point.
> >
>
> Thanks for finding and pointing this out, Eric!
>
> I'll have look and get back with a patch.
>

Eric, something in the lines of the patch below? Or is it considered
bad practice to use call_rcu in this context (prone to DoSing the
kernel)?

Thanks for spending time on the xskmap code. Very much appreciated!

From 491f7bd87705f72c45e59242fc6c3b1db9d3b56d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@intel.com>
Date: Mon, 8 Oct 2018 18:34:11 +0200
Subject: [PATCH] xsk: do not call synchronize_net() under RCU read lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

XSKMAP update and delete functions called synchronize_net(), which can
sleep. It is not allowed to sleep during an RCU read section.

Fixes: fbfc504a24f5 ("bpf: introduce new bpf AF_XDP map type
BPF_MAP_TYPE_XSKMAP")
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 include/net/xdp_sock.h |  1 +
 kernel/bpf/xskmap.c    | 21 +++++++++++----------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 13acb9803a6d..5b430141a3f6 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -68,6 +68,7 @@ struct xdp_sock {
      */
     spinlock_t tx_completion_lock;
     u64 rx_dropped;
+    struct rcu_head rcu;
 };

 struct xdp_buff;
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 9f8463afda9c..51e8e2785612 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -157,6 +157,13 @@ static void *xsk_map_lookup_elem(struct bpf_map
*map, void *key)
     return NULL;
 }

+static void __xsk_map_remove_async(struct rcu_head *rcu)
+{
+    struct xdp_sock *xs = container_of(rcu, struct xdp_sock, rcu);
+
+    sock_put((struct sock *)xs);
+}
+
 static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
                    u64 map_flags)
 {
@@ -192,11 +199,8 @@ static int xsk_map_update_elem(struct bpf_map
*map, void *key, void *value,
     sock_hold(sock->sk);

     old_xs = xchg(&m->xsk_map[i], xs);
-    if (old_xs) {
-        /* Make sure we've flushed everything. */
-        synchronize_net();
-        sock_put((struct sock *)old_xs);
-    }
+    if (old_xs)
+        call_rcu(&old_xs->rcu, __xsk_map_remove_async);

     sockfd_put(sock);
     return 0;
@@ -212,11 +216,8 @@ static int xsk_map_delete_elem(struct bpf_map
*map, void *key)
         return -EINVAL;

     old_xs = xchg(&m->xsk_map[k], NULL);
-    if (old_xs) {
-        /* Make sure we've flushed everything. */
-        synchronize_net();
-        sock_put((struct sock *)old_xs);
-    }
+    if (old_xs)
+        call_rcu(&old_xs->rcu, __xsk_map_remove_async);

     return 0;
 }
Eric Dumazet Oct. 8, 2018, 4:55 p.m. UTC | #4
On 10/08/2018 09:05 AM, Björn Töpel wrote:

> 
> Thanks for finding and pointing this out, Eric!
> 
> I'll have look and get back with a patch.
> 
>

You might take a look at SOCK_RCU_FREE flag for sockets.
Björn Töpel Oct. 8, 2018, 5:04 p.m. UTC | #5
Den mån 8 okt. 2018 kl 18:55 skrev Eric Dumazet <eric.dumazet@gmail.com>:
>
[...]
>
> You might take a look at SOCK_RCU_FREE flag for sockets.
>

Ah, thanks! I'll use this instead.
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c553f6f9c6b0..68ecdb4eea09 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -676,6 +676,31 @@  static inline int sock_map_prog(struct bpf_map *map,
 }
 #endif
 
+#if defined(CONFIG_XDP_SOCKETS)
+struct xdp_sock;
+struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key);
+int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+		       struct xdp_sock *xs);
+void __xsk_map_flush(struct bpf_map *map);
+#else
+struct xdp_sock;
+static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
+						     u32 key)
+{
+	return NULL;
+}
+
+static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+				     struct xdp_sock *xs)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void __xsk_map_flush(struct bpf_map *map)
+{
+}
+#endif
+
 /* verifier prototypes for helper functions called from eBPF programs */
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 2b28fcf6f6ae..d7df1b323082 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -49,4 +49,7 @@  BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
+#if defined(CONFIG_XDP_SOCKETS)
+BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
+#endif
 #endif
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index a0342dff6a4d..ce3a2ab16b8f 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -28,6 +28,7 @@  struct xdp_sock {
 	struct xsk_queue *rx;
 	struct net_device *dev;
 	struct xdp_umem *umem;
+	struct list_head flush_node;
 	u16 queue_id;
 	/* Protects multiple processes in the control path */
 	struct mutex mutex;
@@ -39,6 +40,7 @@  struct xdp_buff;
 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
 int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
 void xsk_flush(struct xdp_sock *xs);
+bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
 #else
 static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
@@ -53,6 +55,11 @@  static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 static inline void xsk_flush(struct xdp_sock *xs)
 {
 }
+
+static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
+{
+	return false;
+}
 #endif /* CONFIG_XDP_SOCKETS */
 
 #endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8daef7326bb7..a3a495052511 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -116,6 +116,7 @@  enum bpf_map_type {
 	BPF_MAP_TYPE_DEVMAP,
 	BPF_MAP_TYPE_SOCKMAP,
 	BPF_MAP_TYPE_CPUMAP,
+	BPF_MAP_TYPE_XSKMAP,
 };
 
 enum bpf_prog_type {
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 35c485fa9ea3..f27f5496d6fe 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -8,6 +8,9 @@  obj-$(CONFIG_BPF_SYSCALL) += btf.o
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_BPF_SYSCALL) += devmap.o
 obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
+ifeq ($(CONFIG_XDP_SOCKETS),y)
+obj-$(CONFIG_BPF_SYSCALL) += xskmap.o
+endif
 obj-$(CONFIG_BPF_SYSCALL) += offload.o
 ifeq ($(CONFIG_STREAM_PARSER),y)
 ifeq ($(CONFIG_INET),y)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 712d8655e916..0d91f18b2eb5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2070,8 +2070,11 @@  static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (func_id != BPF_FUNC_redirect_map)
 			goto error;
 		break;
-	/* Restrict bpf side of cpumap, open when use-cases appear */
+	/* Restrict bpf side of cpumap and xskmap, open when use-cases
+	 * appear.
+	 */
 	case BPF_MAP_TYPE_CPUMAP:
+	case BPF_MAP_TYPE_XSKMAP:
 		if (func_id != BPF_FUNC_redirect_map)
 			goto error;
 		break;
@@ -2118,7 +2121,8 @@  static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		break;
 	case BPF_FUNC_redirect_map:
 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
-		    map->map_type != BPF_MAP_TYPE_CPUMAP)
+		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
+		    map->map_type != BPF_MAP_TYPE_XSKMAP)
 			goto error;
 		break;
 	case BPF_FUNC_sk_redirect_map:
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
new file mode 100644
index 000000000000..869dbb11b612
--- /dev/null
+++ b/kernel/bpf/xskmap.c
@@ -0,0 +1,239 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* XSKMAP used for AF_XDP sockets
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/bpf.h>
+#include <linux/capability.h>
+#include <net/xdp_sock.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+struct xsk_map {
+	struct bpf_map map;
+	struct xdp_sock **xsk_map;
+	struct list_head __percpu *flush_list;
+};
+
+static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
+{
+	int cpu, err = -EINVAL;
+	struct xsk_map *m;
+	u64 cost;
+
+	if (!capable(CAP_NET_ADMIN))
+		return ERR_PTR(-EPERM);
+
+	if (attr->max_entries == 0 || attr->key_size != 4 ||
+	    attr->value_size != 4 ||
+	    attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
+		return ERR_PTR(-EINVAL);
+
+	m = kzalloc(sizeof(*m), GFP_USER);
+	if (!m)
+		return ERR_PTR(-ENOMEM);
+
+	bpf_map_init_from_attr(&m->map, attr);
+
+	cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
+	cost += sizeof(struct list_head) * num_possible_cpus();
+	if (cost >= U32_MAX - PAGE_SIZE)
+		goto free_m;
+
+	m->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+	/* Notice returns -EPERM on if map size is larger than memlock limit */
+	err = bpf_map_precharge_memlock(m->map.pages);
+	if (err)
+		goto free_m;
+
+	m->flush_list = alloc_percpu(struct list_head);
+	if (!m->flush_list)
+		goto free_m;
+
+	for_each_possible_cpu(cpu)
+		INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
+
+	m->xsk_map = bpf_map_area_alloc(m->map.max_entries *
+					sizeof(struct xdp_sock *),
+					m->map.numa_node);
+	if (!m->xsk_map)
+		goto free_percpu;
+	return &m->map;
+
+free_percpu:
+	free_percpu(m->flush_list);
+free_m:
+	kfree(m);
+	return ERR_PTR(err);
+}
+
+static void xsk_map_free(struct bpf_map *map)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	int i;
+
+	synchronize_net();
+
+	for (i = 0; i < map->max_entries; i++) {
+		struct xdp_sock *xs;
+
+		xs = m->xsk_map[i];
+		if (!xs)
+			continue;
+
+		sock_put((struct sock *)xs);
+	}
+
+	free_percpu(m->flush_list);
+	bpf_map_area_free(m->xsk_map);
+	kfree(m);
+}
+
+static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	u32 index = key ? *(u32 *)key : U32_MAX;
+	u32 *next = next_key;
+
+	if (index >= m->map.max_entries) {
+		*next = 0;
+		return 0;
+	}
+
+	if (index == m->map.max_entries - 1)
+		return -ENOENT;
+	*next = index + 1;
+	return 0;
+}
+
+struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	struct xdp_sock *xs;
+
+	if (key >= map->max_entries)
+		return NULL;
+
+	xs = READ_ONCE(m->xsk_map[key]);
+	return xs;
+}
+
+int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+		       struct xdp_sock *xs)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	struct list_head *flush_list = this_cpu_ptr(m->flush_list);
+	int err;
+
+	err = xsk_rcv(xs, xdp);
+	if (err)
+		return err;
+
+	if (!xs->flush_node.prev)
+		list_add(&xs->flush_node, flush_list);
+
+	return 0;
+}
+
+void __xsk_map_flush(struct bpf_map *map)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	struct list_head *flush_list = this_cpu_ptr(m->flush_list);
+	struct xdp_sock *xs, *tmp;
+
+	list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
+		xsk_flush(xs);
+		__list_del(xs->flush_node.prev, xs->flush_node.next);
+		xs->flush_node.prev = NULL;
+	}
+}
+
+static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
+{
+	return NULL;
+}
+
+static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
+			       u64 map_flags)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	u32 i = *(u32 *)key, fd = *(u32 *)value;
+	struct xdp_sock *xs, *old_xs;
+	struct socket *sock;
+	int err;
+
+	if (unlikely(map_flags > BPF_EXIST))
+		return -EINVAL;
+	if (unlikely(i >= m->map.max_entries))
+		return -E2BIG;
+	if (unlikely(map_flags == BPF_NOEXIST))
+		return -EEXIST;
+
+	sock = sockfd_lookup(fd, &err);
+	if (!sock)
+		return err;
+
+	if (sock->sk->sk_family != PF_XDP) {
+		sockfd_put(sock);
+		return -EOPNOTSUPP;
+	}
+
+	xs = (struct xdp_sock *)sock->sk;
+
+	if (!xsk_is_setup_for_bpf_map(xs)) {
+		sockfd_put(sock);
+		return -EOPNOTSUPP;
+	}
+
+	sock_hold(sock->sk);
+
+	old_xs = xchg(&m->xsk_map[i], xs);
+	if (old_xs) {
+		/* Make sure we've flushed everything. */
+		synchronize_net();
+		sock_put((struct sock *)old_xs);
+	}
+
+	sockfd_put(sock);
+	return 0;
+}
+
+static int xsk_map_delete_elem(struct bpf_map *map, void *key)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	struct xdp_sock *old_xs;
+	int k = *(u32 *)key;
+
+	if (k >= map->max_entries)
+		return -EINVAL;
+
+	old_xs = xchg(&m->xsk_map[k], NULL);
+	if (old_xs) {
+		/* Make sure we've flushed everything. */
+		synchronize_net();
+		sock_put((struct sock *)old_xs);
+	}
+
+	return 0;
+}
+
+const struct bpf_map_ops xsk_map_ops = {
+	.map_alloc = xsk_map_alloc,
+	.map_free = xsk_map_free,
+	.map_get_next_key = xsk_map_get_next_key,
+	.map_lookup_elem = xsk_map_lookup_elem,
+	.map_update_elem = xsk_map_update_elem,
+	.map_delete_elem = xsk_map_delete_elem,
+};
+
+
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 4e1e6c581e1d..b931a0db5588 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -41,6 +41,11 @@  static struct xdp_sock *xdp_sk(struct sock *sk)
 	return (struct xdp_sock *)sk;
 }
 
+bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
+{
+	return !!xs->rx;
+}
+
 static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
 	u32 *id, len = xdp->data_end - xdp->data;