[bpf-next,2/9] net: Add ID (if needed) to sock_reuseport and expose reuseport_lock

Message ID 20180808080122.3013814-1-kafai@fb.com
State Accepted
Delegated to: BPF Maintainers
Headers show
Series
  • Introduce BPF_MAP_TYPE_REUSEPORT_SOCKARRAY and BPF_PROG_TYPE_SK_REUSEPORT
Related show

Commit Message

Martin Lau Aug. 8, 2018, 8:01 a.m.
A later patch will introduce a BPF_MAP_TYPE_REUSEPORT_ARRAY which
allows a SO_REUSEPORT sk to be added to a bpf map.  When a sk
is removed from reuse->socks[], it also needs to be removed from
the bpf map.  Also, when adding a sk to a bpf map, the bpf
map needs to ensure it is indeed in a reuse->socks[].
Hence, reuseport_lock is needed by the bpf map to ensure its
map_update_elem() and map_delete_elem() operations are in-sync with
the reuse->socks[].  The BPF_MAP_TYPE_REUSEPORT_ARRAY map will only
acquire the reuseport_lock after ensuring the adding sk is already
in a reuseport group (i.e. reuse->socks[]).  The map_lookup_elem()
will be lockless.

This patch also adds an ID to sock_reuseport.  A later patch
will introduce BPF_PROG_TYPE_SK_REUSEPORT which allows
a bpf prog to select a sk from a bpf map.  It is inflexible to
statically enforce a bpf map can only contain the sk belonging to
a particular reuse->socks[] (i.e. same IP:PORT) during the bpf
verification time. For example, think about the the map-in-map situation
where the inner map can be dynamically changed in runtime and the outer
map may have inner maps belonging to different reuseport groups.
Hence, when the bpf prog (in the new BPF_PROG_TYPE_SK_REUSEPORT
type) selects a sk,  this selected sk has to be checked to ensure it
belongs to the requesting reuseport group (i.e. the group serving
that IP:PORT).

The "sk->sk_reuseport_cb" pointer cannot be used for this checking
purpose because the pointer value will change after reuseport_grow().
Instead of saving all checking conditions like the ones
preced calling "reuseport_add_sock()" and compare them everytime a
bpf_prog is run, a 32bits ID is introduced to survive the
reuseport_grow().  The ID is only acquired if any of the
reuse->socks[] is added to the newly introduced
"BPF_MAP_TYPE_REUSEPORT_ARRAY" map.

If "BPF_MAP_TYPE_REUSEPORT_ARRAY" is not used,  the changes in this
patch is a no-op.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/sock_reuseport.h |  6 ++++++
 net/core/sock_reuseport.c    | 27 ++++++++++++++++++++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

Patch

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 6bef7a0052f2..e1a7681856f7 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -5,8 +5,11 @@ 
 #include <linux/filter.h>
 #include <linux/skbuff.h>
 #include <linux/types.h>
+#include <linux/spinlock.h>
 #include <net/sock.h>
 
+extern spinlock_t reuseport_lock;
+
 struct sock_reuseport {
 	struct rcu_head		rcu;
 
@@ -16,6 +19,8 @@  struct sock_reuseport {
 	 * reuse->socks[] group.
 	 */
 	unsigned int		synq_overflow_ts;
+	/* ID stays the same even after the size of socks[] grows. */
+	unsigned int		reuseport_id;
 	struct bpf_prog __rcu	*prog;		/* optional BPF sock selector */
 	struct sock		*socks[0];	/* array of sock pointers */
 };
@@ -29,5 +34,6 @@  extern struct sock *reuseport_select_sock(struct sock *sk,
 					  int hdr_len);
 extern struct bpf_prog *reuseport_attach_prog(struct sock *sk,
 					      struct bpf_prog *prog);
+int reuseport_get_id(struct sock_reuseport *reuse);
 
 #endif  /* _SOCK_REUSEPORT_H */
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 3f188fad0162..cf2e4d305af9 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -8,11 +8,33 @@ 
 
 #include <net/sock_reuseport.h>
 #include <linux/bpf.h>
+#include <linux/idr.h>
 #include <linux/rcupdate.h>
 
 #define INIT_SOCKS 128
 
-static DEFINE_SPINLOCK(reuseport_lock);
+DEFINE_SPINLOCK(reuseport_lock);
+
+#define REUSEPORT_MIN_ID 1
+static DEFINE_IDA(reuseport_ida);
+
+int reuseport_get_id(struct sock_reuseport *reuse)
+{
+	int id;
+
+	if (reuse->reuseport_id)
+		return reuse->reuseport_id;
+
+	id = ida_simple_get(&reuseport_ida, REUSEPORT_MIN_ID, 0,
+			    /* Called under reuseport_lock */
+			    GFP_ATOMIC);
+	if (id < 0)
+		return id;
+
+	reuse->reuseport_id = id;
+
+	return reuse->reuseport_id;
+}
 
 static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
 {
@@ -78,6 +100,7 @@  static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
 	more_reuse->max_socks = more_socks_size;
 	more_reuse->num_socks = reuse->num_socks;
 	more_reuse->prog = reuse->prog;
+	more_reuse->reuseport_id = reuse->reuseport_id;
 
 	memcpy(more_reuse->socks, reuse->socks,
 	       reuse->num_socks * sizeof(struct sock *));
@@ -102,6 +125,8 @@  static void reuseport_free_rcu(struct rcu_head *head)
 	reuse = container_of(head, struct sock_reuseport, rcu);
 	if (reuse->prog)
 		bpf_prog_destroy(reuse->prog);
+	if (reuse->reuseport_id)
+		ida_simple_remove(&reuseport_ida, reuse->reuseport_id);
 	kfree(reuse);
 }