diff mbox

[1/2] bpf: remove struct bpf_prog_type_list

Message ID 20170404172711.4088-1-johannes@sipsolutions.net
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Johannes Berg April 4, 2017, 5:27 p.m. UTC
From: Johannes Berg <johannes.berg@intel.com>

There's no need to have struct bpf_prog_type_list since
it just contains a list_head, the type, and the ops
pointer. Since the types are densely packed and not
actually dynamically registered, it's much easier and
smaller to have an array of type->ops pointer.

This doesn't really change the image size much, but in
the running image it saves a few hundred bytes because
the structs are removed and traded against __init code.

While at it, also mark bpf_register_prog_type() __init
since it's only called from code already marked so.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
So I'm not sure about this - I looked at this code since
I wanted to see if we could even register prog_types from
modules, but that seems to be impossible right now ...
---
 include/linux/bpf.h      | 12 +++------
 include/uapi/linux/bpf.h |  2 ++
 kernel/bpf/syscall.c     | 26 ++++++++++----------
 kernel/trace/bpf_trace.c | 21 +++-------------
 net/core/filter.c        | 63 +++++++-----------------------------------------
 5 files changed, 31 insertions(+), 93 deletions(-)

Comments

Johannes Berg April 4, 2017, 5:32 p.m. UTC | #1
Oops, I really meant to send these as RFC more than anything, because I
don't really understand why it's done that way :)

FWIW, the bloat-o-meter looks similar in both cases, like this:

add/remove: 0/11 grow/shrink: 9/1 up/down: 145/-365 (-220)
function                                     old     new   delta
bpf_map_types                                 16      96     +80
register_htab_map                             56      76     +20
register_array_map                            32      42     +10
bpf_register_map_type                         35      45     +10
register_trie_map                             20      25      +5
register_stack_map                            20      25      +5
register_prog_array_map                       20      25      +5
register_perf_event_array_map                 20      25      +5
register_cgroup_array_map                     20      25      +5
sys_bpf                                     1140    1127     -13
trie_type                                     32       -     -32
stack_map_type                                32       -     -32
prog_array_type                               32       -     -32
perf_event_array_type                         32       -     -32
percpu_array_type                             32       -     -32
htab_type                                     32       -     -32
htab_percpu_type                              32       -     -32
htab_lru_type                                 32       -     -32
htab_lru_percpu_type                          32       -     -32
cgroup_array_type                             32       -     -32
array_type                                    32       -     -32

johannes
Alexei Starovoitov April 5, 2017, 5:57 p.m. UTC | #2
On Tue, Apr 04, 2017 at 07:27:10PM +0200, Johannes Berg wrote:
> From: Johannes Berg <johannes.berg@intel.com>
> 
> There's no need to have struct bpf_prog_type_list since
> it just contains a list_head, the type, and the ops
> pointer. Since the types are densely packed and not
> actually dynamically registered, it's much easier and
> smaller to have an array of type->ops pointer.
> 
> This doesn't really change the image size much, but in
> the running image it saves a few hundred bytes because
> the structs are removed and traded against __init code.
> 
> While at it, also mark bpf_register_prog_type() __init
> since it's only called from code already marked so.
> 
> Signed-off-by: Johannes Berg <johannes.berg@intel.com>
> ---
> So I'm not sure about this - I looked at this code since
> I wanted to see if we could even register prog_types from
> modules, but that seems to be impossible right now ...
> ---
>  include/linux/bpf.h      | 12 +++------
>  include/uapi/linux/bpf.h |  2 ++
>  kernel/bpf/syscall.c     | 26 ++++++++++----------
>  kernel/trace/bpf_trace.c | 21 +++-------------
>  net/core/filter.c        | 63 +++++++-----------------------------------------
>  5 files changed, 31 insertions(+), 93 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 909fc033173a..891a76aaccaa 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -169,12 +169,6 @@ struct bpf_verifier_ops {
>  				  struct bpf_prog *prog);
>  };
>  
> -struct bpf_prog_type_list {
> -	struct list_head list_node;
> -	const struct bpf_verifier_ops *ops;
> -	enum bpf_prog_type type;
> -};
> -
>  struct bpf_prog_aux {
>  	atomic_t refcnt;
>  	u32 used_map_cnt;
> @@ -234,7 +228,8 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
>  #ifdef CONFIG_BPF_SYSCALL
>  DECLARE_PER_CPU(int, bpf_prog_active);
>  
> -void bpf_register_prog_type(struct bpf_prog_type_list *tl);
> +void bpf_register_prog_type(enum bpf_prog_type type,
> +			    const struct bpf_verifier_ops *ops);
>  void bpf_register_map_type(struct bpf_map_type_list *tl);
>  
>  struct bpf_prog *bpf_prog_get(u32 ufd);
> @@ -295,7 +290,8 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
>  /* verify correctness of eBPF program */
>  int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
>  #else
> -static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl)
> +static inline void bpf_register_prog_type(enum bpf_prog_type type,
> +					  const struct bpf_verifier_ops *ops)
>  {
>  }
>  
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 0539a0ceef38..cc68f5bbf458 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -112,6 +112,8 @@ enum bpf_prog_type {
>  	BPF_PROG_TYPE_LWT_IN,
>  	BPF_PROG_TYPE_LWT_OUT,
>  	BPF_PROG_TYPE_LWT_XMIT,
> +
> +	NUM_BPF_PROG_TYPES,

I don't think it's right to add something to uapi because
kernel implemetation changed. If we decide to go back to
link list this will be unused. One can argue that this can
be used for other purpose, but I'd like to separate such
discussions.
I think defining it internally as an array
and adding runtime check to bpf_register_prog_type() that
prog type id is less than the size of the array will be
clean enough. Even better if we can statically init
the array and drop bpf_register_prog_type() completely
then compiler will figure out the size of the array
at compile time and runtime check in find() will be sizeof(array).

>  static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
>  {
> -	struct bpf_prog_type_list *tl;
> -
> -	list_for_each_entry(tl, &bpf_prog_types, list_node) {
> -		if (tl->type == type) {
> -			prog->aux->ops = tl->ops;
> -			prog->type = type;
> -			return 0;
> -		}
> -	}
> +	if (type >= NUM_BPF_PROG_TYPES || !bpf_prog_types[type])
> +		return -EINVAL;
>  
> -	return -EINVAL;
> +	prog->aux->ops = bpf_prog_types[type];
> +	prog->type = type;

this is nice improvement.
please add a check for null too, since when folks
backport stuff there will be holes in this array.
Backports typically do
enum bpf_prog_type {
  PROG_A,
  PROG_B
  PROG_X = 10;
};

>  static int __init register_sk_filter_ops(void)
>  {
> -	bpf_register_prog_type(&sk_filter_type);
> -	bpf_register_prog_type(&sched_cls_type);
> -	bpf_register_prog_type(&sched_act_type);
> -	bpf_register_prog_type(&xdp_type);
> -	bpf_register_prog_type(&cg_skb_type);
> -	bpf_register_prog_type(&cg_sock_type);
> -	bpf_register_prog_type(&lwt_in_type);
> -	bpf_register_prog_type(&lwt_out_type);
> -	bpf_register_prog_type(&lwt_xmit_type);
> +	bpf_register_prog_type(BPF_PROG_TYPE_SOCKET_FILTER, &sk_filter_ops);
> +	bpf_register_prog_type(BPF_PROG_TYPE_SCHED_CLS, &tc_cls_act_ops);
> +	bpf_register_prog_type(BPF_PROG_TYPE_SCHED_ACT, &tc_cls_act_ops);
> +	bpf_register_prog_type(BPF_PROG_TYPE_XDP, &xdp_ops);
> +	bpf_register_prog_type(BPF_PROG_TYPE_CGROUP_SKB, &cg_skb_ops);
> +	bpf_register_prog_type(BPF_PROG_TYPE_CGROUP_SOCK, &cg_sock_ops);
> +	bpf_register_prog_type(BPF_PROG_TYPE_LWT_IN, &lwt_inout_ops);
> +	bpf_register_prog_type(BPF_PROG_TYPE_LWT_OUT, &lwt_inout_ops);
> +	bpf_register_prog_type(BPF_PROG_TYPE_LWT_XMIT, &lwt_xmit_ops);

I think we should be able to statically init the array as well and
avoid these calls too and we won't need to add anything to uapi.
diff mbox

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 909fc033173a..891a76aaccaa 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -169,12 +169,6 @@  struct bpf_verifier_ops {
 				  struct bpf_prog *prog);
 };
 
-struct bpf_prog_type_list {
-	struct list_head list_node;
-	const struct bpf_verifier_ops *ops;
-	enum bpf_prog_type type;
-};
-
 struct bpf_prog_aux {
 	atomic_t refcnt;
 	u32 used_map_cnt;
@@ -234,7 +228,8 @@  u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
 #ifdef CONFIG_BPF_SYSCALL
 DECLARE_PER_CPU(int, bpf_prog_active);
 
-void bpf_register_prog_type(struct bpf_prog_type_list *tl);
+void bpf_register_prog_type(enum bpf_prog_type type,
+			    const struct bpf_verifier_ops *ops);
 void bpf_register_map_type(struct bpf_map_type_list *tl);
 
 struct bpf_prog *bpf_prog_get(u32 ufd);
@@ -295,7 +290,8 @@  static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
 /* verify correctness of eBPF program */
 int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
 #else
-static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl)
+static inline void bpf_register_prog_type(enum bpf_prog_type type,
+					  const struct bpf_verifier_ops *ops)
 {
 }
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0539a0ceef38..cc68f5bbf458 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -112,6 +112,8 @@  enum bpf_prog_type {
 	BPF_PROG_TYPE_LWT_IN,
 	BPF_PROG_TYPE_LWT_OUT,
 	BPF_PROG_TYPE_LWT_XMIT,
+
+	NUM_BPF_PROG_TYPES,
 };
 
 enum bpf_attach_type {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7af0dcc5d755..1156eccf36a5 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -564,26 +564,26 @@  static int map_get_next_key(union bpf_attr *attr)
 	return err;
 }
 
-static LIST_HEAD(bpf_prog_types);
+static const struct bpf_verifier_ops *
+bpf_prog_types[NUM_BPF_PROG_TYPES] __ro_after_init;
 
 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
 {
-	struct bpf_prog_type_list *tl;
-
-	list_for_each_entry(tl, &bpf_prog_types, list_node) {
-		if (tl->type == type) {
-			prog->aux->ops = tl->ops;
-			prog->type = type;
-			return 0;
-		}
-	}
+	if (type >= NUM_BPF_PROG_TYPES || !bpf_prog_types[type])
+		return -EINVAL;
 
-	return -EINVAL;
+	prog->aux->ops = bpf_prog_types[type];
+	prog->type = type;
+	return 0;
 }
 
-void bpf_register_prog_type(struct bpf_prog_type_list *tl)
+void __init bpf_register_prog_type(enum bpf_prog_type type,
+				   const struct bpf_verifier_ops *ops)
 {
-	list_add(&tl->list_node, &bpf_prog_types);
+	if (WARN_ON(bpf_prog_types[type]))
+		return;
+
+	bpf_prog_types[type] = ops;
 }
 
 /* fixup insn->imm field of bpf_call instructions:
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index cee9802cf3e0..1e45e1cd0174 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -506,11 +506,6 @@  static const struct bpf_verifier_ops kprobe_prog_ops = {
 	.is_valid_access = kprobe_prog_is_valid_access,
 };
 
-static struct bpf_prog_type_list kprobe_tl __ro_after_init = {
-	.ops	= &kprobe_prog_ops,
-	.type	= BPF_PROG_TYPE_KPROBE,
-};
-
 BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
 	   u64, flags, void *, data, u64, size)
 {
@@ -589,11 +584,6 @@  static const struct bpf_verifier_ops tracepoint_prog_ops = {
 	.is_valid_access = tp_prog_is_valid_access,
 };
 
-static struct bpf_prog_type_list tracepoint_tl __ro_after_init = {
-	.ops	= &tracepoint_prog_ops,
-	.type	= BPF_PROG_TYPE_TRACEPOINT,
-};
-
 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
 				    enum bpf_reg_type *reg_type)
 {
@@ -648,16 +638,11 @@  static const struct bpf_verifier_ops perf_event_prog_ops = {
 	.convert_ctx_access	= pe_prog_convert_ctx_access,
 };
 
-static struct bpf_prog_type_list perf_event_tl __ro_after_init = {
-	.ops	= &perf_event_prog_ops,
-	.type	= BPF_PROG_TYPE_PERF_EVENT,
-};
-
 static int __init register_kprobe_prog_ops(void)
 {
-	bpf_register_prog_type(&kprobe_tl);
-	bpf_register_prog_type(&tracepoint_tl);
-	bpf_register_prog_type(&perf_event_tl);
+	bpf_register_prog_type(BPF_PROG_TYPE_KPROBE, &kprobe_prog_ops);
+	bpf_register_prog_type(BPF_PROG_TYPE_TRACEPOINT, &tracepoint_prog_ops);
+	bpf_register_prog_type(BPF_PROG_TYPE_PERF_EVENT, &perf_event_prog_ops);
 	return 0;
 }
 late_initcall(register_kprobe_prog_ops);
diff --git a/net/core/filter.c b/net/core/filter.c
index ebaeaf2e46e8..d5218c891d43 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3296,62 +3296,17 @@  static const struct bpf_verifier_ops cg_sock_ops = {
 	.convert_ctx_access	= sock_filter_convert_ctx_access,
 };
 
-static struct bpf_prog_type_list sk_filter_type __ro_after_init = {
-	.ops	= &sk_filter_ops,
-	.type	= BPF_PROG_TYPE_SOCKET_FILTER,
-};
-
-static struct bpf_prog_type_list sched_cls_type __ro_after_init = {
-	.ops	= &tc_cls_act_ops,
-	.type	= BPF_PROG_TYPE_SCHED_CLS,
-};
-
-static struct bpf_prog_type_list sched_act_type __ro_after_init = {
-	.ops	= &tc_cls_act_ops,
-	.type	= BPF_PROG_TYPE_SCHED_ACT,
-};
-
-static struct bpf_prog_type_list xdp_type __ro_after_init = {
-	.ops	= &xdp_ops,
-	.type	= BPF_PROG_TYPE_XDP,
-};
-
-static struct bpf_prog_type_list cg_skb_type __ro_after_init = {
-	.ops	= &cg_skb_ops,
-	.type	= BPF_PROG_TYPE_CGROUP_SKB,
-};
-
-static struct bpf_prog_type_list lwt_in_type __ro_after_init = {
-	.ops	= &lwt_inout_ops,
-	.type	= BPF_PROG_TYPE_LWT_IN,
-};
-
-static struct bpf_prog_type_list lwt_out_type __ro_after_init = {
-	.ops	= &lwt_inout_ops,
-	.type	= BPF_PROG_TYPE_LWT_OUT,
-};
-
-static struct bpf_prog_type_list lwt_xmit_type __ro_after_init = {
-	.ops	= &lwt_xmit_ops,
-	.type	= BPF_PROG_TYPE_LWT_XMIT,
-};
-
-static struct bpf_prog_type_list cg_sock_type __ro_after_init = {
-	.ops	= &cg_sock_ops,
-	.type	= BPF_PROG_TYPE_CGROUP_SOCK
-};
-
 static int __init register_sk_filter_ops(void)
 {
-	bpf_register_prog_type(&sk_filter_type);
-	bpf_register_prog_type(&sched_cls_type);
-	bpf_register_prog_type(&sched_act_type);
-	bpf_register_prog_type(&xdp_type);
-	bpf_register_prog_type(&cg_skb_type);
-	bpf_register_prog_type(&cg_sock_type);
-	bpf_register_prog_type(&lwt_in_type);
-	bpf_register_prog_type(&lwt_out_type);
-	bpf_register_prog_type(&lwt_xmit_type);
+	bpf_register_prog_type(BPF_PROG_TYPE_SOCKET_FILTER, &sk_filter_ops);
+	bpf_register_prog_type(BPF_PROG_TYPE_SCHED_CLS, &tc_cls_act_ops);
+	bpf_register_prog_type(BPF_PROG_TYPE_SCHED_ACT, &tc_cls_act_ops);
+	bpf_register_prog_type(BPF_PROG_TYPE_XDP, &xdp_ops);
+	bpf_register_prog_type(BPF_PROG_TYPE_CGROUP_SKB, &cg_skb_ops);
+	bpf_register_prog_type(BPF_PROG_TYPE_CGROUP_SOCK, &cg_sock_ops);
+	bpf_register_prog_type(BPF_PROG_TYPE_LWT_IN, &lwt_inout_ops);
+	bpf_register_prog_type(BPF_PROG_TYPE_LWT_OUT, &lwt_inout_ops);
+	bpf_register_prog_type(BPF_PROG_TYPE_LWT_XMIT, &lwt_xmit_ops);
 
 	return 0;
 }