diff mbox series

[v1,bpf-next,09/11] bpf: Support bpf_get_socket_cookie_sock() for BPF_PROG_TYPE_SK_REUSEPORT.

Message ID 20201201144418.35045-10-kuniyu@amazon.co.jp
State Superseded
Headers show
Series Socket migration for SO_REUSEPORT. | expand

Commit Message

Kuniyuki Iwashima Dec. 1, 2020, 2:44 p.m. UTC
We will call sock_reuseport.prog for socket migration in the next commit,
so the eBPF program has to know which listener is closing in order to
select the new listener.

Currently, we can get a unique ID for each listener in the userspace by
calling bpf_map_lookup_elem() for BPF_MAP_TYPE_REUSEPORT_SOCKARRAY map.

This patch makes the sk pointer available in sk_reuseport_md so that we can
get the ID by BPF_FUNC_get_socket_cookie() in the eBPF program.

Link: https://lore.kernel.org/netdev/20201119001154.kapwihc2plp4f7zc@kafai-mbp.dhcp.thefacebook.com/
Suggested-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
---
 include/uapi/linux/bpf.h       |  8 ++++++++
 net/core/filter.c              | 12 +++++++++++-
 tools/include/uapi/linux/bpf.h |  8 ++++++++
 3 files changed, 27 insertions(+), 1 deletion(-)

Comments

Martin KaFai Lau Dec. 4, 2020, 7:58 p.m. UTC | #1
On Tue, Dec 01, 2020 at 11:44:16PM +0900, Kuniyuki Iwashima wrote:
> We will call sock_reuseport.prog for socket migration in the next commit,
> so the eBPF program has to know which listener is closing in order to
> select the new listener.
> 
> Currently, we can get a unique ID for each listener in the userspace by
> calling bpf_map_lookup_elem() for BPF_MAP_TYPE_REUSEPORT_SOCKARRAY map.
> 
> This patch makes the sk pointer available in sk_reuseport_md so that we can
> get the ID by BPF_FUNC_get_socket_cookie() in the eBPF program.
> 
> Link: https://lore.kernel.org/netdev/20201119001154.kapwihc2plp4f7zc@kafai-mbp.dhcp.thefacebook.com/
> Suggested-by: Martin KaFai Lau <kafai@fb.com>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> ---
>  include/uapi/linux/bpf.h       |  8 ++++++++
>  net/core/filter.c              | 12 +++++++++++-
>  tools/include/uapi/linux/bpf.h |  8 ++++++++
>  3 files changed, 27 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index efe342bf3dbc..3e9b8bd42b4e 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1650,6 +1650,13 @@ union bpf_attr {
>   * 		A 8-byte long non-decreasing number on success, or 0 if the
>   * 		socket field is missing inside *skb*.
>   *
> + * u64 bpf_get_socket_cookie(struct bpf_sock *sk)
> + * 	Description
> + * 		Equivalent to bpf_get_socket_cookie() helper that accepts
> + * 		*skb*, but gets socket from **struct bpf_sock** context.
> + * 	Return
> + * 		A 8-byte long non-decreasing number.
> + *
>   * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
>   * 	Description
>   * 		Equivalent to bpf_get_socket_cookie() helper that accepts
> @@ -4420,6 +4427,7 @@ struct sk_reuseport_md {
>  	__u32 bind_inany;	/* Is sock bound to an INANY address? */
>  	__u32 hash;		/* A hash of the packet 4 tuples */
>  	__u8 migration;		/* Migration type */
> +	__bpf_md_ptr(struct bpf_sock *, sk); /* current listening socket */
>  };
>  
>  #define BPF_TAG_SIZE	8
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 0a0634787bb4..1059d31847ef 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -4628,7 +4628,7 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
>  	.func		= bpf_get_socket_cookie_sock,
>  	.gpl_only	= false,
>  	.ret_type	= RET_INTEGER,
> -	.arg1_type	= ARG_PTR_TO_CTX,
> +	.arg1_type	= ARG_PTR_TO_SOCKET,
This will break existing bpf prog (BPF_PROG_TYPE_CGROUP_SOCK)
using this proto.  A new proto is needed and there is
an on-going patch doing this [0].

[0]: https://lore.kernel.org/bpf/20201203213330.1657666-1-revest@google.com/
Kuniyuki Iwashima Dec. 6, 2020, 4:36 a.m. UTC | #2
I'm sending this mail just for logging because I failed to send mails only 
to LKML, netdev, and bpf yesterday.


From:   Martin KaFai Lau <kafai@fb.com>
Date:   Fri, 4 Dec 2020 11:58:07 -0800
> On Tue, Dec 01, 2020 at 11:44:16PM +0900, Kuniyuki Iwashima wrote:
> > We will call sock_reuseport.prog for socket migration in the next commit,
> > so the eBPF program has to know which listener is closing in order to
> > select the new listener.
> > 
> > Currently, we can get a unique ID for each listener in the userspace by
> > calling bpf_map_lookup_elem() for BPF_MAP_TYPE_REUSEPORT_SOCKARRAY map.
> > 
> > This patch makes the sk pointer available in sk_reuseport_md so that we can
> > get the ID by BPF_FUNC_get_socket_cookie() in the eBPF program.
> > 
> > Link: https://lore.kernel.org/netdev/20201119001154.kapwihc2plp4f7zc@kafai-mbp.dhcp.thefacebook.com/
> > Suggested-by: Martin KaFai Lau <kafai@fb.com>
> > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> > ---
> >  include/uapi/linux/bpf.h       |  8 ++++++++
> >  net/core/filter.c              | 12 +++++++++++-
> >  tools/include/uapi/linux/bpf.h |  8 ++++++++
> >  3 files changed, 27 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index efe342bf3dbc..3e9b8bd42b4e 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -1650,6 +1650,13 @@ union bpf_attr {
> >   * 		A 8-byte long non-decreasing number on success, or 0 if the
> >   * 		socket field is missing inside *skb*.
> >   *
> > + * u64 bpf_get_socket_cookie(struct bpf_sock *sk)
> > + * 	Description
> > + * 		Equivalent to bpf_get_socket_cookie() helper that accepts
> > + * 		*skb*, but gets socket from **struct bpf_sock** context.
> > + * 	Return
> > + * 		A 8-byte long non-decreasing number.
> > + *
> >   * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
> >   * 	Description
> >   * 		Equivalent to bpf_get_socket_cookie() helper that accepts
> > @@ -4420,6 +4427,7 @@ struct sk_reuseport_md {
> >  	__u32 bind_inany;	/* Is sock bound to an INANY address? */
> >  	__u32 hash;		/* A hash of the packet 4 tuples */
> >  	__u8 migration;		/* Migration type */
> > +	__bpf_md_ptr(struct bpf_sock *, sk); /* current listening socket */
> >  };
> >  
> >  #define BPF_TAG_SIZE	8
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index 0a0634787bb4..1059d31847ef 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -4628,7 +4628,7 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
> >  	.func		= bpf_get_socket_cookie_sock,
> >  	.gpl_only	= false,
> >  	.ret_type	= RET_INTEGER,
> > -	.arg1_type	= ARG_PTR_TO_CTX,
> > +	.arg1_type	= ARG_PTR_TO_SOCKET,
> This will break existing bpf prog (BPF_PROG_TYPE_CGROUP_SOCK)
> using this proto.  A new proto is needed and there is
> an on-going patch doing this [0].
> 
> [0]: https://lore.kernel.org/bpf/20201203213330.1657666-1-revest@google.com/

Thank you for notifying me of this patch!
I will define another proto, but may drop the part if the above patch is
already merged then.
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index efe342bf3dbc..3e9b8bd42b4e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1650,6 +1650,13 @@  union bpf_attr {
  * 		A 8-byte long non-decreasing number on success, or 0 if the
  * 		socket field is missing inside *skb*.
  *
+ * u64 bpf_get_socket_cookie(struct bpf_sock *sk)
+ * 	Description
+ * 		Equivalent to bpf_get_socket_cookie() helper that accepts
+ * 		*skb*, but gets socket from **struct bpf_sock** context.
+ * 	Return
+ * 		A 8-byte long non-decreasing number.
+ *
  * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
  * 	Description
  * 		Equivalent to bpf_get_socket_cookie() helper that accepts
@@ -4420,6 +4427,7 @@  struct sk_reuseport_md {
 	__u32 bind_inany;	/* Is sock bound to an INANY address? */
 	__u32 hash;		/* A hash of the packet 4 tuples */
 	__u8 migration;		/* Migration type */
+	__bpf_md_ptr(struct bpf_sock *, sk); /* current listening socket */
 };
 
 #define BPF_TAG_SIZE	8
diff --git a/net/core/filter.c b/net/core/filter.c
index 0a0634787bb4..1059d31847ef 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4628,7 +4628,7 @@  static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
 	.func		= bpf_get_socket_cookie_sock,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg1_type	= ARG_PTR_TO_SOCKET,
 };
 
 BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
@@ -9982,6 +9982,8 @@  sk_reuseport_func_proto(enum bpf_func_id func_id,
 		return &sk_reuseport_load_bytes_proto;
 	case BPF_FUNC_skb_load_bytes_relative:
 		return &sk_reuseport_load_bytes_relative_proto;
+	case BPF_FUNC_get_socket_cookie:
+		return &bpf_get_socket_cookie_sock_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -10015,6 +10017,10 @@  sk_reuseport_is_valid_access(int off, int size,
 		return prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE &&
 			size == sizeof(__u8);
 
+	case offsetof(struct sk_reuseport_md, sk):
+		info->reg_type = PTR_TO_SOCKET;
+		return size == sizeof(__u64);
+
 	/* Fields that allow narrowing */
 	case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
 		if (size < sizeof_field(struct sk_buff, protocol))
@@ -10091,6 +10097,10 @@  static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
 	case offsetof(struct sk_reuseport_md, migration):
 		SK_REUSEPORT_LOAD_FIELD(migration);
 		break;
+
+	case offsetof(struct sk_reuseport_md, sk):
+		SK_REUSEPORT_LOAD_FIELD(sk);
+		break;
 	}
 
 	return insn - insn_buf;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index efe342bf3dbc..3e9b8bd42b4e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1650,6 +1650,13 @@  union bpf_attr {
  * 		A 8-byte long non-decreasing number on success, or 0 if the
  * 		socket field is missing inside *skb*.
  *
+ * u64 bpf_get_socket_cookie(struct bpf_sock *sk)
+ * 	Description
+ * 		Equivalent to bpf_get_socket_cookie() helper that accepts
+ * 		*skb*, but gets socket from **struct bpf_sock** context.
+ * 	Return
+ * 		A 8-byte long non-decreasing number.
+ *
  * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
  * 	Description
  * 		Equivalent to bpf_get_socket_cookie() helper that accepts
@@ -4420,6 +4427,7 @@  struct sk_reuseport_md {
 	__u32 bind_inany;	/* Is sock bound to an INANY address? */
 	__u32 hash;		/* A hash of the packet 4 tuples */
 	__u8 migration;		/* Migration type */
+	__bpf_md_ptr(struct bpf_sock *, sk); /* current listening socket */
 };
 
 #define BPF_TAG_SIZE	8