diff mbox series

[v10,1/5] add infrastructure for tagging functions as error injectable

Message ID 1513365176-6744-2-git-send-email-josef@toxicpanda.com
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series Add the ability to do BPF directed error injection | expand

Commit Message

Josef Bacik Dec. 15, 2017, 7:12 p.m. UTC
From: Josef Bacik <jbacik@fb.com>

Using BPF we can override kprob'ed functions and return arbitrary
values.  Obviously this can be a bit unsafe, so make this feature opt-in
for functions.  Simply tag a function with KPROBE_ERROR_INJECT_SYMBOL in
order to give BPF access to that function for error injection purposes.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/vmlinux.lds.h |  10 +++
 include/linux/bpf.h               |  11 +++
 include/linux/kprobes.h           |   1 +
 include/linux/module.h            |   5 ++
 kernel/kprobes.c                  | 163 ++++++++++++++++++++++++++++++++++++++
 kernel/module.c                   |   6 +-
 6 files changed, 195 insertions(+), 1 deletion(-)

Comments

Masami Hiramatsu (Google) Dec. 18, 2017, 9:11 a.m. UTC | #1
On Fri, 15 Dec 2017 14:12:52 -0500
Josef Bacik <josef@toxicpanda.com> wrote:

> From: Josef Bacik <jbacik@fb.com>
> 
> Using BPF we can override kprob'ed functions and return arbitrary
> values.  Obviously this can be a bit unsafe, so make this feature opt-in
> for functions.  Simply tag a function with KPROBE_ERROR_INJECT_SYMBOL in
> order to give BPF access to that function for error injection purposes.
> 

NAK. I'm very confused. What the reason to add this feature is implemented
in kernel/kprobes.c? It is seemed within an usual "usage" of kprobes.
I recommend you to implement this somewhere else... like
kernel/error_injection.c, or kernel/module.c.

More precisely list up the reasons why,

 - This is just for providing an API to check the address within an 
  address-range list inside kmodule (not related to kprobes).
 - There is no check in kprobes to modified address by using the API.
  (yes, that will cause a big overhead...)
 - This can mislead user NOT to change the instruction pointer from
  the kprobes except for that list.
 - If user intends to insert a piece of code (like livepatch) in a
  function, they do NOT think it is an "error injection".
 - Or if they find this API, and "what?? I can not change instruction
  pointer by kprobes? but I can." and report it a bug on lkml...

So, I don't like to see this in kprobes.c. It is better to make another
layer to do this.

Thank you,

> Signed-off-by: Josef Bacik <jbacik@fb.com>
> Acked-by: Ingo Molnar <mingo@kernel.org>
> ---
>  include/asm-generic/vmlinux.lds.h |  10 +++
>  include/linux/bpf.h               |  11 +++
>  include/linux/kprobes.h           |   1 +
>  include/linux/module.h            |   5 ++
>  kernel/kprobes.c                  | 163 ++++++++++++++++++++++++++++++++++++++
>  kernel/module.c                   |   6 +-
>  6 files changed, 195 insertions(+), 1 deletion(-)
> 
> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> index ee8b707d9fa9..a2e8582d094a 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -136,6 +136,15 @@
>  #define KPROBE_BLACKLIST()
>  #endif
>  
> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
> +#define ERROR_INJECT_LIST()	. = ALIGN(8);						\
> +				VMLINUX_SYMBOL(__start_kprobe_error_inject_list) = .;	\
> +				KEEP(*(_kprobe_error_inject_list))			\
> +				VMLINUX_SYMBOL(__stop_kprobe_error_inject_list) = .;
> +#else
> +#define ERROR_INJECT_LIST()
> +#endif
> +
>  #ifdef CONFIG_EVENT_TRACING
>  #define FTRACE_EVENTS()	. = ALIGN(8);					\
>  			VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
> @@ -564,6 +573,7 @@
>  	FTRACE_EVENTS()							\
>  	TRACE_SYSCALLS()						\
>  	KPROBE_BLACKLIST()						\
> +	ERROR_INJECT_LIST()						\
>  	MEM_DISCARD(init.rodata)					\
>  	CLK_OF_TABLES()							\
>  	RESERVEDMEM_OF_TABLES()						\
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index e55e4255a210..7f4d2a953173 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -576,4 +576,15 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto;
>  void bpf_user_rnd_init_once(void);
>  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
>  
> +#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
> +#define BPF_ALLOW_ERROR_INJECTION(fname)				\
> +static unsigned long __used						\
> +	__attribute__((__section__("_kprobe_error_inject_list")))	\
> +	_eil_addr_##fname = (unsigned long)fname;
> +#else
> +#define BPF_ALLOW_ERROR_INJECTION(fname)
> +#endif
> +#endif
> +
>  #endif /* _LINUX_BPF_H */
> diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
> index 9440a2fc8893..963fd364f3d6 100644
> --- a/include/linux/kprobes.h
> +++ b/include/linux/kprobes.h
> @@ -271,6 +271,7 @@ extern bool arch_kprobe_on_func_entry(unsigned long offset);
>  extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
>  
>  extern bool within_kprobe_blacklist(unsigned long addr);
> +extern bool within_kprobe_error_injection_list(unsigned long addr);
>  
>  struct kprobe_insn_cache {
>  	struct mutex mutex;
> diff --git a/include/linux/module.h b/include/linux/module.h
> index c69b49abe877..548fa09fa806 100644
> --- a/include/linux/module.h
> +++ b/include/linux/module.h
> @@ -475,6 +475,11 @@ struct module {
>  	ctor_fn_t *ctors;
>  	unsigned int num_ctors;
>  #endif
> +
> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
> +	unsigned int num_kprobe_ei_funcs;
> +	unsigned long *kprobe_ei_funcs;
> +#endif
>  } ____cacheline_aligned __randomize_layout;
>  #ifndef MODULE_ARCH_INIT
>  #define MODULE_ARCH_INIT {}
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index da2ccf142358..b4aab48ad258 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -83,6 +83,16 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
>  	return &(kretprobe_table_locks[hash].lock);
>  }
>  
> +/* List of symbols that can be overriden for error injection. */
> +static LIST_HEAD(kprobe_error_injection_list);
> +static DEFINE_MUTEX(kprobe_ei_mutex);
> +struct kprobe_ei_entry {
> +	struct list_head list;
> +	unsigned long start_addr;
> +	unsigned long end_addr;
> +	void *priv;
> +};
> +
>  /* Blacklist -- list of struct kprobe_blacklist_entry */
>  static LIST_HEAD(kprobe_blacklist);
>  
> @@ -1394,6 +1404,17 @@ bool within_kprobe_blacklist(unsigned long addr)
>  	return false;
>  }
>  
> +bool within_kprobe_error_injection_list(unsigned long addr)
> +{
> +	struct kprobe_ei_entry *ent;
> +
> +	list_for_each_entry(ent, &kprobe_error_injection_list, list) {
> +		if (addr >= ent->start_addr && addr < ent->end_addr)
> +			return true;
> +	}
> +	return false;
> +}
> +
>  /*
>   * If we have a symbol_name argument, look it up and add the offset field
>   * to it. This way, we can specify a relative address to a symbol.
> @@ -2168,6 +2189,86 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
>  	return 0;
>  }
>  
> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
> +/* Markers of the _kprobe_error_inject_list section */
> +extern unsigned long __start_kprobe_error_inject_list[];
> +extern unsigned long __stop_kprobe_error_inject_list[];
> +
> +/*
> + * Lookup and populate the kprobe_error_injection_list.
> + *
> + * For safety reasons we only allow certain functions to be overriden with
> + * bpf_error_injection, so we need to populate the list of the symbols that have
> + * been marked as safe for overriding.
> + */
> +static void populate_kprobe_error_injection_list(unsigned long *start,
> +						 unsigned long *end,
> +						 void *priv)
> +{
> +	unsigned long *iter;
> +	struct kprobe_ei_entry *ent;
> +	unsigned long entry, offset = 0, size = 0;
> +
> +	mutex_lock(&kprobe_ei_mutex);
> +	for (iter = start; iter < end; iter++) {
> +		entry = arch_deref_entry_point((void *)*iter);
> +
> +		if (!kernel_text_address(entry) ||
> +		    !kallsyms_lookup_size_offset(entry, &size, &offset)) {
> +			pr_err("Failed to find error inject entry at %p\n",
> +				(void *)entry);
> +			continue;
> +		}
> +
> +		ent = kmalloc(sizeof(*ent), GFP_KERNEL);
> +		if (!ent)
> +			break;
> +		ent->start_addr = entry;
> +		ent->end_addr = entry + size;
> +		ent->priv = priv;
> +		INIT_LIST_HEAD(&ent->list);
> +		list_add_tail(&ent->list, &kprobe_error_injection_list);
> +	}
> +	mutex_unlock(&kprobe_ei_mutex);
> +}
> +
> +static void __init populate_kernel_kprobe_ei_list(void)
> +{
> +	populate_kprobe_error_injection_list(__start_kprobe_error_inject_list,
> +					     __stop_kprobe_error_inject_list,
> +					     NULL);
> +}
> +
> +static void module_load_kprobe_ei_list(struct module *mod)
> +{
> +	if (!mod->num_kprobe_ei_funcs)
> +		return;
> +	populate_kprobe_error_injection_list(mod->kprobe_ei_funcs,
> +					     mod->kprobe_ei_funcs +
> +					     mod->num_kprobe_ei_funcs, mod);
> +}
> +
> +static void module_unload_kprobe_ei_list(struct module *mod)
> +{
> +	struct kprobe_ei_entry *ent, *n;
> +	if (!mod->num_kprobe_ei_funcs)
> +		return;
> +
> +	mutex_lock(&kprobe_ei_mutex);
> +	list_for_each_entry_safe(ent, n, &kprobe_error_injection_list, list) {
> +		if (ent->priv == mod) {
> +			list_del_init(&ent->list);
> +			kfree(ent);
> +		}
> +	}
> +	mutex_unlock(&kprobe_ei_mutex);
> +}
> +#else
> +static inline void __init populate_kernel_kprobe_ei_list(void) {}
> +static inline void module_load_kprobe_ei_list(struct module *m) {}
> +static inline void module_unload_kprobe_ei_list(struct module *m) {}
> +#endif
> +
>  /* Module notifier call back, checking kprobes on the module */
>  static int kprobes_module_callback(struct notifier_block *nb,
>  				   unsigned long val, void *data)
> @@ -2178,6 +2279,11 @@ static int kprobes_module_callback(struct notifier_block *nb,
>  	unsigned int i;
>  	int checkcore = (val == MODULE_STATE_GOING);
>  
> +	if (val == MODULE_STATE_COMING)
> +		module_load_kprobe_ei_list(mod);
> +	else if (val == MODULE_STATE_GOING)
> +		module_unload_kprobe_ei_list(mod);
> +
>  	if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
>  		return NOTIFY_DONE;
>  
> @@ -2240,6 +2346,8 @@ static int __init init_kprobes(void)
>  		pr_err("Please take care of using kprobes.\n");
>  	}
>  
> +	populate_kernel_kprobe_ei_list();
> +
>  	if (kretprobe_blacklist_size) {
>  		/* lookup the function address from its name */
>  		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
> @@ -2407,6 +2515,56 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = {
>  	.release        = seq_release,
>  };
>  
> +/*
> + * kprobes/error_injection_list -- shows which functions can be overriden for
> + * error injection.
> + * */
> +static void *kprobe_ei_seq_start(struct seq_file *m, loff_t *pos)
> +{
> +	mutex_lock(&kprobe_ei_mutex);
> +	return seq_list_start(&kprobe_error_injection_list, *pos);
> +}
> +
> +static void kprobe_ei_seq_stop(struct seq_file *m, void *v)
> +{
> +	mutex_unlock(&kprobe_ei_mutex);
> +}
> +
> +static void *kprobe_ei_seq_next(struct seq_file *m, void *v, loff_t *pos)
> +{
> +	return seq_list_next(v, &kprobe_error_injection_list, pos);
> +}
> +
> +static int kprobe_ei_seq_show(struct seq_file *m, void *v)
> +{
> +	char buffer[KSYM_SYMBOL_LEN];
> +	struct kprobe_ei_entry *ent =
> +		list_entry(v, struct kprobe_ei_entry, list);
> +
> +	sprint_symbol(buffer, ent->start_addr);
> +	seq_printf(m, "%s\n", buffer);
> +	return 0;
> +}
> +
> +static const struct seq_operations kprobe_ei_seq_ops = {
> +	.start = kprobe_ei_seq_start,
> +	.next  = kprobe_ei_seq_next,
> +	.stop  = kprobe_ei_seq_stop,
> +	.show  = kprobe_ei_seq_show,
> +};
> +
> +static int kprobe_ei_open(struct inode *inode, struct file *filp)
> +{
> +	return seq_open(filp, &kprobe_ei_seq_ops);
> +}
> +
> +static const struct file_operations debugfs_kprobe_ei_ops = {
> +	.open           = kprobe_ei_open,
> +	.read           = seq_read,
> +	.llseek         = seq_lseek,
> +	.release        = seq_release,
> +};
> +
>  static void arm_all_kprobes(void)
>  {
>  	struct hlist_head *head;
> @@ -2548,6 +2706,11 @@ static int __init debugfs_kprobe_init(void)
>  	if (!file)
>  		goto error;
>  
> +	file = debugfs_create_file("error_injection_list", 0444, dir, NULL,
> +				  &debugfs_kprobe_ei_ops);
> +	if (!file)
> +		goto error;
> +
>  	return 0;
>  
>  error:
> diff --git a/kernel/module.c b/kernel/module.c
> index dea01ac9cb74..bd695bfdc5c4 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -3118,7 +3118,11 @@ static int find_module_sections(struct module *mod, struct load_info *info)
>  					     sizeof(*mod->ftrace_callsites),
>  					     &mod->num_ftrace_callsites);
>  #endif
> -
> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
> +	mod->kprobe_ei_funcs = section_objs(info, "_kprobe_error_inject_list",
> +					    sizeof(*mod->kprobe_ei_funcs),
> +					    &mod->num_kprobe_ei_funcs);
> +#endif
>  	mod->extable = section_objs(info, "__ex_table",
>  				    sizeof(*mod->extable), &mod->num_exentries);
>  
> -- 
> 2.7.5
>
Masami Hiramatsu (Google) Dec. 19, 2017, 6:29 a.m. UTC | #2
On Fri, 15 Dec 2017 14:12:52 -0500
Josef Bacik <josef@toxicpanda.com> wrote:

> From: Josef Bacik <jbacik@fb.com>
> 
> Using BPF we can override kprob'ed functions and return arbitrary
> values.  Obviously this can be a bit unsafe, so make this feature opt-in
> for functions.  Simply tag a function with KPROBE_ERROR_INJECT_SYMBOL in
> order to give BPF access to that function for error injection purposes.
> 
> Signed-off-by: Josef Bacik <jbacik@fb.com>
> Acked-by: Ingo Molnar <mingo@kernel.org>
> ---
>  include/asm-generic/vmlinux.lds.h |  10 +++
>  include/linux/bpf.h               |  11 +++
>  include/linux/kprobes.h           |   1 +
>  include/linux/module.h            |   5 ++
>  kernel/kprobes.c                  | 163 ++++++++++++++++++++++++++++++++++++++
>  kernel/module.c                   |   6 +-
>  6 files changed, 195 insertions(+), 1 deletion(-)
> 
> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> index ee8b707d9fa9..a2e8582d094a 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -136,6 +136,15 @@
>  #define KPROBE_BLACKLIST()
>  #endif
>  
> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
> +#define ERROR_INJECT_LIST()	. = ALIGN(8);						\
> +				VMLINUX_SYMBOL(__start_kprobe_error_inject_list) = .;	\
> +				KEEP(*(_kprobe_error_inject_list))			\
> +				VMLINUX_SYMBOL(__stop_kprobe_error_inject_list) = .;
> +#else
> +#define ERROR_INJECT_LIST()
> +#endif
> +
>  #ifdef CONFIG_EVENT_TRACING
>  #define FTRACE_EVENTS()	. = ALIGN(8);					\
>  			VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
> @@ -564,6 +573,7 @@
>  	FTRACE_EVENTS()							\
>  	TRACE_SYSCALLS()						\
>  	KPROBE_BLACKLIST()						\
> +	ERROR_INJECT_LIST()						\
>  	MEM_DISCARD(init.rodata)					\
>  	CLK_OF_TABLES()							\
>  	RESERVEDMEM_OF_TABLES()						\
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index e55e4255a210..7f4d2a953173 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -576,4 +576,15 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto;
>  void bpf_user_rnd_init_once(void);
>  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
>  
> +#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE

BTW, CONFIG_BPF_KPROBE_OVERRIDE is also confusable name.
Since this feature override a function to just return with
some return value (as far as I understand, or would you
also plan to modify execution path inside a function?),
I think it should be better CONFIG_BPF_FUNCTION_OVERRIDE or
CONFIG_BPF_EXECUTION_OVERRIDE.

Indeed, BPF is based on kprobes, but it seems you are limiting it
with ftrace (function-call trace) (I'm not sure the reason why),
so using "kprobes" for this feature seems strange for me.

The idea in this patch itself (marking injectable function on
a list) is OK to me. 

Thank you,

> +#define BPF_ALLOW_ERROR_INJECTION(fname)				\
> +static unsigned long __used						\
> +	__attribute__((__section__("_kprobe_error_inject_list")))	\
> +	_eil_addr_##fname = (unsigned long)fname;
> +#else
> +#define BPF_ALLOW_ERROR_INJECTION(fname)
> +#endif
> +#endif
> +
>  #endif /* _LINUX_BPF_H */
> diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
> index 9440a2fc8893..963fd364f3d6 100644
> --- a/include/linux/kprobes.h
> +++ b/include/linux/kprobes.h
> @@ -271,6 +271,7 @@ extern bool arch_kprobe_on_func_entry(unsigned long offset);
>  extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
>  
>  extern bool within_kprobe_blacklist(unsigned long addr);
> +extern bool within_kprobe_error_injection_list(unsigned long addr);
>  
>  struct kprobe_insn_cache {
>  	struct mutex mutex;
> diff --git a/include/linux/module.h b/include/linux/module.h
> index c69b49abe877..548fa09fa806 100644
> --- a/include/linux/module.h
> +++ b/include/linux/module.h
> @@ -475,6 +475,11 @@ struct module {
>  	ctor_fn_t *ctors;
>  	unsigned int num_ctors;
>  #endif
> +
> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
> +	unsigned int num_kprobe_ei_funcs;
> +	unsigned long *kprobe_ei_funcs;
> +#endif
>  } ____cacheline_aligned __randomize_layout;
>  #ifndef MODULE_ARCH_INIT
>  #define MODULE_ARCH_INIT {}
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index da2ccf142358..b4aab48ad258 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -83,6 +83,16 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
>  	return &(kretprobe_table_locks[hash].lock);
>  }
>  
> +/* List of symbols that can be overriden for error injection. */
> +static LIST_HEAD(kprobe_error_injection_list);
> +static DEFINE_MUTEX(kprobe_ei_mutex);
> +struct kprobe_ei_entry {
> +	struct list_head list;
> +	unsigned long start_addr;
> +	unsigned long end_addr;
> +	void *priv;
> +};
> +
>  /* Blacklist -- list of struct kprobe_blacklist_entry */
>  static LIST_HEAD(kprobe_blacklist);
>  
> @@ -1394,6 +1404,17 @@ bool within_kprobe_blacklist(unsigned long addr)
>  	return false;
>  }
>  
> +bool within_kprobe_error_injection_list(unsigned long addr)
> +{
> +	struct kprobe_ei_entry *ent;
> +
> +	list_for_each_entry(ent, &kprobe_error_injection_list, list) {
> +		if (addr >= ent->start_addr && addr < ent->end_addr)
> +			return true;
> +	}
> +	return false;
> +}
> +
>  /*
>   * If we have a symbol_name argument, look it up and add the offset field
>   * to it. This way, we can specify a relative address to a symbol.
> @@ -2168,6 +2189,86 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
>  	return 0;
>  }
>  
> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
> +/* Markers of the _kprobe_error_inject_list section */
> +extern unsigned long __start_kprobe_error_inject_list[];
> +extern unsigned long __stop_kprobe_error_inject_list[];
> +
> +/*
> + * Lookup and populate the kprobe_error_injection_list.
> + *
> + * For safety reasons we only allow certain functions to be overriden with
> + * bpf_error_injection, so we need to populate the list of the symbols that have
> + * been marked as safe for overriding.
> + */
> +static void populate_kprobe_error_injection_list(unsigned long *start,
> +						 unsigned long *end,
> +						 void *priv)
> +{
> +	unsigned long *iter;
> +	struct kprobe_ei_entry *ent;
> +	unsigned long entry, offset = 0, size = 0;
> +
> +	mutex_lock(&kprobe_ei_mutex);
> +	for (iter = start; iter < end; iter++) {
> +		entry = arch_deref_entry_point((void *)*iter);
> +
> +		if (!kernel_text_address(entry) ||
> +		    !kallsyms_lookup_size_offset(entry, &size, &offset)) {
> +			pr_err("Failed to find error inject entry at %p\n",
> +				(void *)entry);
> +			continue;
> +		}
> +
> +		ent = kmalloc(sizeof(*ent), GFP_KERNEL);
> +		if (!ent)
> +			break;
> +		ent->start_addr = entry;
> +		ent->end_addr = entry + size;
> +		ent->priv = priv;
> +		INIT_LIST_HEAD(&ent->list);
> +		list_add_tail(&ent->list, &kprobe_error_injection_list);
> +	}
> +	mutex_unlock(&kprobe_ei_mutex);
> +}
> +
> +static void __init populate_kernel_kprobe_ei_list(void)
> +{
> +	populate_kprobe_error_injection_list(__start_kprobe_error_inject_list,
> +					     __stop_kprobe_error_inject_list,
> +					     NULL);
> +}
> +
> +static void module_load_kprobe_ei_list(struct module *mod)
> +{
> +	if (!mod->num_kprobe_ei_funcs)
> +		return;
> +	populate_kprobe_error_injection_list(mod->kprobe_ei_funcs,
> +					     mod->kprobe_ei_funcs +
> +					     mod->num_kprobe_ei_funcs, mod);
> +}
> +
> +static void module_unload_kprobe_ei_list(struct module *mod)
> +{
> +	struct kprobe_ei_entry *ent, *n;
> +	if (!mod->num_kprobe_ei_funcs)
> +		return;
> +
> +	mutex_lock(&kprobe_ei_mutex);
> +	list_for_each_entry_safe(ent, n, &kprobe_error_injection_list, list) {
> +		if (ent->priv == mod) {
> +			list_del_init(&ent->list);
> +			kfree(ent);
> +		}
> +	}
> +	mutex_unlock(&kprobe_ei_mutex);
> +}
> +#else
> +static inline void __init populate_kernel_kprobe_ei_list(void) {}
> +static inline void module_load_kprobe_ei_list(struct module *m) {}
> +static inline void module_unload_kprobe_ei_list(struct module *m) {}
> +#endif
> +
>  /* Module notifier call back, checking kprobes on the module */
>  static int kprobes_module_callback(struct notifier_block *nb,
>  				   unsigned long val, void *data)
> @@ -2178,6 +2279,11 @@ static int kprobes_module_callback(struct notifier_block *nb,
>  	unsigned int i;
>  	int checkcore = (val == MODULE_STATE_GOING);
>  
> +	if (val == MODULE_STATE_COMING)
> +		module_load_kprobe_ei_list(mod);
> +	else if (val == MODULE_STATE_GOING)
> +		module_unload_kprobe_ei_list(mod);
> +
>  	if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
>  		return NOTIFY_DONE;
>  
> @@ -2240,6 +2346,8 @@ static int __init init_kprobes(void)
>  		pr_err("Please take care of using kprobes.\n");
>  	}
>  
> +	populate_kernel_kprobe_ei_list();
> +
>  	if (kretprobe_blacklist_size) {
>  		/* lookup the function address from its name */
>  		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
> @@ -2407,6 +2515,56 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = {
>  	.release        = seq_release,
>  };
>  
> +/*
> + * kprobes/error_injection_list -- shows which functions can be overriden for
> + * error injection.
> + * */
> +static void *kprobe_ei_seq_start(struct seq_file *m, loff_t *pos)
> +{
> +	mutex_lock(&kprobe_ei_mutex);
> +	return seq_list_start(&kprobe_error_injection_list, *pos);
> +}
> +
> +static void kprobe_ei_seq_stop(struct seq_file *m, void *v)
> +{
> +	mutex_unlock(&kprobe_ei_mutex);
> +}
> +
> +static void *kprobe_ei_seq_next(struct seq_file *m, void *v, loff_t *pos)
> +{
> +	return seq_list_next(v, &kprobe_error_injection_list, pos);
> +}
> +
> +static int kprobe_ei_seq_show(struct seq_file *m, void *v)
> +{
> +	char buffer[KSYM_SYMBOL_LEN];
> +	struct kprobe_ei_entry *ent =
> +		list_entry(v, struct kprobe_ei_entry, list);
> +
> +	sprint_symbol(buffer, ent->start_addr);
> +	seq_printf(m, "%s\n", buffer);
> +	return 0;
> +}
> +
> +static const struct seq_operations kprobe_ei_seq_ops = {
> +	.start = kprobe_ei_seq_start,
> +	.next  = kprobe_ei_seq_next,
> +	.stop  = kprobe_ei_seq_stop,
> +	.show  = kprobe_ei_seq_show,
> +};
> +
> +static int kprobe_ei_open(struct inode *inode, struct file *filp)
> +{
> +	return seq_open(filp, &kprobe_ei_seq_ops);
> +}
> +
> +static const struct file_operations debugfs_kprobe_ei_ops = {
> +	.open           = kprobe_ei_open,
> +	.read           = seq_read,
> +	.llseek         = seq_lseek,
> +	.release        = seq_release,
> +};
> +
>  static void arm_all_kprobes(void)
>  {
>  	struct hlist_head *head;
> @@ -2548,6 +2706,11 @@ static int __init debugfs_kprobe_init(void)
>  	if (!file)
>  		goto error;
>  
> +	file = debugfs_create_file("error_injection_list", 0444, dir, NULL,
> +				  &debugfs_kprobe_ei_ops);
> +	if (!file)
> +		goto error;
> +
>  	return 0;
>  
>  error:
> diff --git a/kernel/module.c b/kernel/module.c
> index dea01ac9cb74..bd695bfdc5c4 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -3118,7 +3118,11 @@ static int find_module_sections(struct module *mod, struct load_info *info)
>  					     sizeof(*mod->ftrace_callsites),
>  					     &mod->num_ftrace_callsites);
>  #endif
> -
> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
> +	mod->kprobe_ei_funcs = section_objs(info, "_kprobe_error_inject_list",
> +					    sizeof(*mod->kprobe_ei_funcs),
> +					    &mod->num_kprobe_ei_funcs);
> +#endif
>  	mod->extable = section_objs(info, "__ex_table",
>  				    sizeof(*mod->extable), &mod->num_exentries);
>  
> -- 
> 2.7.5
>
Alexei Starovoitov Dec. 20, 2017, 2:14 a.m. UTC | #3
On 12/18/17 10:29 PM, Masami Hiramatsu wrote:
>>
>> +#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
>> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
>
> BTW, CONFIG_BPF_KPROBE_OVERRIDE is also confusable name.
> Since this feature override a function to just return with
> some return value (as far as I understand, or would you
> also plan to modify execution path inside a function?),
> I think it should be better CONFIG_BPF_FUNCTION_OVERRIDE or
> CONFIG_BPF_EXECUTION_OVERRIDE.

I don't think such renaming makes sense.
The feature is overriding kprobe by changing how kprobe returns.
It doesn't override BPF_FUNCTION or BPF_EXECUTION.
The kernel enters and exists bpf program as normal.

> Indeed, BPF is based on kprobes, but it seems you are limiting it
> with ftrace (function-call trace) (I'm not sure the reason why),
> so using "kprobes" for this feature seems strange for me.

do you have an idea how kprobe override can happen when kprobe
placed in the middle of the function?

Please make your suggestion as patches based on top of bpf-next.

Thanks
Masami Hiramatsu (Google) Dec. 20, 2017, 7:13 a.m. UTC | #4
On Tue, 19 Dec 2017 18:14:17 -0800
Alexei Starovoitov <ast@fb.com> wrote:

> On 12/18/17 10:29 PM, Masami Hiramatsu wrote:
> >>
> >> +#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
> >> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
> >
> > BTW, CONFIG_BPF_KPROBE_OVERRIDE is also confusable name.
> > Since this feature override a function to just return with
> > some return value (as far as I understand, or would you
> > also plan to modify execution path inside a function?),
> > I think it should be better CONFIG_BPF_FUNCTION_OVERRIDE or
> > CONFIG_BPF_EXECUTION_OVERRIDE.
> 
> I don't think such renaming makes sense.
> The feature is overriding kprobe by changing how kprobe returns.
> It doesn't override BPF_FUNCTION or BPF_EXECUTION.

No, I meant this is BPF's feature which override FUNCTION, so
BPF is a kind of namespace. (Is that only for a function entry
because it can not tweak stackframe at this morment?)

> The kernel enters and exists bpf program as normal.

Yeah, but that bpf program modifies instruction pointer, am I correct?

> 
> > Indeed, BPF is based on kprobes, but it seems you are limiting it
> > with ftrace (function-call trace) (I'm not sure the reason why),
> > so using "kprobes" for this feature seems strange for me.
> 
> do you have an idea how kprobe override can happen when kprobe
> placed in the middle of the function?

For example, if you know a basic block in the function, maybe
you can skip a block or something like that. But nowadays
it is somewhat hard because optimizer mixed it up.

> 
> Please make your suggestion as patches based on top of bpf-next.

bpf-next seems already pick this series. Would you mean I revert it and
write new patch?

Thank you,

> 
> Thanks
>
Daniel Borkmann Dec. 20, 2017, 10:09 a.m. UTC | #5
On 12/20/2017 08:13 AM, Masami Hiramatsu wrote:
> On Tue, 19 Dec 2017 18:14:17 -0800
> Alexei Starovoitov <ast@fb.com> wrote:
[...]
>> Please make your suggestion as patches based on top of bpf-next.
> 
> bpf-next seems already pick this series. Would you mean I revert it and
> write new patch?

No, please submit as follow-ups instead, thanks Masami!
Masami Hiramatsu (Google) Dec. 20, 2017, 11 a.m. UTC | #6
On Fri, 15 Dec 2017 14:12:52 -0500
Josef Bacik <josef@toxicpanda.com> wrote:

> From: Josef Bacik <jbacik@fb.com>
> 
> Using BPF we can override kprob'ed functions and return arbitrary
> values.  Obviously this can be a bit unsafe, so make this feature opt-in
> for functions.  Simply tag a function with KPROBE_ERROR_INJECT_SYMBOL in
> order to give BPF access to that function for error injection purposes.
> 
> Signed-off-by: Josef Bacik <jbacik@fb.com>
> Acked-by: Ingo Molnar <mingo@kernel.org>
> ---
>  include/asm-generic/vmlinux.lds.h |  10 +++
>  include/linux/bpf.h               |  11 +++
>  include/linux/kprobes.h           |   1 +
>  include/linux/module.h            |   5 ++
>  kernel/kprobes.c                  | 163 ++++++++++++++++++++++++++++++++++++++
>  kernel/module.c                   |   6 +-
>  6 files changed, 195 insertions(+), 1 deletion(-)
> 
> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> index ee8b707d9fa9..a2e8582d094a 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -136,6 +136,15 @@
>  #define KPROBE_BLACKLIST()
>  #endif
>  
> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
> +#define ERROR_INJECT_LIST()	. = ALIGN(8);						\
> +				VMLINUX_SYMBOL(__start_kprobe_error_inject_list) = .;	\
> +				KEEP(*(_kprobe_error_inject_list))			\
> +				VMLINUX_SYMBOL(__stop_kprobe_error_inject_list) = .;
> +#else
> +#define ERROR_INJECT_LIST()
> +#endif
> +
>  #ifdef CONFIG_EVENT_TRACING
>  #define FTRACE_EVENTS()	. = ALIGN(8);					\
>  			VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
> @@ -564,6 +573,7 @@
>  	FTRACE_EVENTS()							\
>  	TRACE_SYSCALLS()						\
>  	KPROBE_BLACKLIST()						\
> +	ERROR_INJECT_LIST()						\
>  	MEM_DISCARD(init.rodata)					\
>  	CLK_OF_TABLES()							\
>  	RESERVEDMEM_OF_TABLES()						\
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index e55e4255a210..7f4d2a953173 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -576,4 +576,15 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto;
>  void bpf_user_rnd_init_once(void);
>  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
>  
> +#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
> +#define BPF_ALLOW_ERROR_INJECTION(fname)				\
> +static unsigned long __used						\
> +	__attribute__((__section__("_kprobe_error_inject_list")))	\
> +	_eil_addr_##fname = (unsigned long)fname;
> +#else
> +#define BPF_ALLOW_ERROR_INJECTION(fname)
> +#endif
> +#endif

This part shows this feature belongs to bpf, if it is a part of kprobes,
it should be defined in include/asm-generic/kprobes.h as NOKPROBE_SYMBOL
does.

Why this is defined in BPF, but list is under kprobes?



> +
>  #endif /* _LINUX_BPF_H */
> diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
> index 9440a2fc8893..963fd364f3d6 100644
> --- a/include/linux/kprobes.h
> +++ b/include/linux/kprobes.h
> @@ -271,6 +271,7 @@ extern bool arch_kprobe_on_func_entry(unsigned long offset);
>  extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
>  
>  extern bool within_kprobe_blacklist(unsigned long addr);
> +extern bool within_kprobe_error_injection_list(unsigned long addr);
>  
>  struct kprobe_insn_cache {
>  	struct mutex mutex;
> diff --git a/include/linux/module.h b/include/linux/module.h
> index c69b49abe877..548fa09fa806 100644
> --- a/include/linux/module.h
> +++ b/include/linux/module.h
> @@ -475,6 +475,11 @@ struct module {
>  	ctor_fn_t *ctors;
>  	unsigned int num_ctors;
>  #endif
> +
> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
> +	unsigned int num_kprobe_ei_funcs;
> +	unsigned long *kprobe_ei_funcs;
> +#endif
>  } ____cacheline_aligned __randomize_layout;
>  #ifndef MODULE_ARCH_INIT
>  #define MODULE_ARCH_INIT {}
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index da2ccf142358..b4aab48ad258 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -83,6 +83,16 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
>  	return &(kretprobe_table_locks[hash].lock);
>  }
>  
> +/* List of symbols that can be overriden for error injection. */
> +static LIST_HEAD(kprobe_error_injection_list);
> +static DEFINE_MUTEX(kprobe_ei_mutex);
> +struct kprobe_ei_entry {
> +	struct list_head list;
> +	unsigned long start_addr;
> +	unsigned long end_addr;
> +	void *priv;
> +};

Again, no kprobe user except for bpf, which is actually trace_kprobe user,
only refer this.

I mean 

"bpf uses trace_kprobe, trace_kprobe uses kprobe."

So there is no direct relationship with kprobe.
For example, kprobe user modules can OVERRIDE any functions.
And there is no generic error injection code in the kernel
except for the bpf currently.

Of course, I can accept this code if you accept that I make a
generic error injection code on ftrace without BPF.

Ingo, that is what you intended?

Thank you,

> +
>  /* Blacklist -- list of struct kprobe_blacklist_entry */
>  static LIST_HEAD(kprobe_blacklist);
>  
> @@ -1394,6 +1404,17 @@ bool within_kprobe_blacklist(unsigned long addr)
>  	return false;
>  }
>  
> +bool within_kprobe_error_injection_list(unsigned long addr)
> +{
> +	struct kprobe_ei_entry *ent;
> +
> +	list_for_each_entry(ent, &kprobe_error_injection_list, list) {
> +		if (addr >= ent->start_addr && addr < ent->end_addr)
> +			return true;
> +	}
> +	return false;
> +}
> +
>  /*
>   * If we have a symbol_name argument, look it up and add the offset field
>   * to it. This way, we can specify a relative address to a symbol.
> @@ -2168,6 +2189,86 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
>  	return 0;
>  }
>  
> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
> +/* Markers of the _kprobe_error_inject_list section */
> +extern unsigned long __start_kprobe_error_inject_list[];
> +extern unsigned long __stop_kprobe_error_inject_list[];
> +
> +/*
> + * Lookup and populate the kprobe_error_injection_list.
> + *
> + * For safety reasons we only allow certain functions to be overriden with
> + * bpf_error_injection, so we need to populate the list of the symbols that have
> + * been marked as safe for overriding.
> + */
> +static void populate_kprobe_error_injection_list(unsigned long *start,
> +						 unsigned long *end,
> +						 void *priv)
> +{
> +	unsigned long *iter;
> +	struct kprobe_ei_entry *ent;
> +	unsigned long entry, offset = 0, size = 0;
> +
> +	mutex_lock(&kprobe_ei_mutex);
> +	for (iter = start; iter < end; iter++) {
> +		entry = arch_deref_entry_point((void *)*iter);
> +
> +		if (!kernel_text_address(entry) ||
> +		    !kallsyms_lookup_size_offset(entry, &size, &offset)) {
> +			pr_err("Failed to find error inject entry at %p\n",
> +				(void *)entry);
> +			continue;
> +		}
> +
> +		ent = kmalloc(sizeof(*ent), GFP_KERNEL);
> +		if (!ent)
> +			break;
> +		ent->start_addr = entry;
> +		ent->end_addr = entry + size;
> +		ent->priv = priv;
> +		INIT_LIST_HEAD(&ent->list);
> +		list_add_tail(&ent->list, &kprobe_error_injection_list);
> +	}
> +	mutex_unlock(&kprobe_ei_mutex);
> +}
> +
> +static void __init populate_kernel_kprobe_ei_list(void)
> +{
> +	populate_kprobe_error_injection_list(__start_kprobe_error_inject_list,
> +					     __stop_kprobe_error_inject_list,
> +					     NULL);
> +}
> +
> +static void module_load_kprobe_ei_list(struct module *mod)
> +{
> +	if (!mod->num_kprobe_ei_funcs)
> +		return;
> +	populate_kprobe_error_injection_list(mod->kprobe_ei_funcs,
> +					     mod->kprobe_ei_funcs +
> +					     mod->num_kprobe_ei_funcs, mod);
> +}
> +
> +static void module_unload_kprobe_ei_list(struct module *mod)
> +{
> +	struct kprobe_ei_entry *ent, *n;
> +	if (!mod->num_kprobe_ei_funcs)
> +		return;
> +
> +	mutex_lock(&kprobe_ei_mutex);
> +	list_for_each_entry_safe(ent, n, &kprobe_error_injection_list, list) {
> +		if (ent->priv == mod) {
> +			list_del_init(&ent->list);
> +			kfree(ent);
> +		}
> +	}
> +	mutex_unlock(&kprobe_ei_mutex);
> +}
> +#else
> +static inline void __init populate_kernel_kprobe_ei_list(void) {}
> +static inline void module_load_kprobe_ei_list(struct module *m) {}
> +static inline void module_unload_kprobe_ei_list(struct module *m) {}
> +#endif
> +
>  /* Module notifier call back, checking kprobes on the module */
>  static int kprobes_module_callback(struct notifier_block *nb,
>  				   unsigned long val, void *data)
> @@ -2178,6 +2279,11 @@ static int kprobes_module_callback(struct notifier_block *nb,
>  	unsigned int i;
>  	int checkcore = (val == MODULE_STATE_GOING);
>  
> +	if (val == MODULE_STATE_COMING)
> +		module_load_kprobe_ei_list(mod);
> +	else if (val == MODULE_STATE_GOING)
> +		module_unload_kprobe_ei_list(mod);
> +
>  	if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
>  		return NOTIFY_DONE;
>  
> @@ -2240,6 +2346,8 @@ static int __init init_kprobes(void)
>  		pr_err("Please take care of using kprobes.\n");
>  	}
>  
> +	populate_kernel_kprobe_ei_list();
> +
>  	if (kretprobe_blacklist_size) {
>  		/* lookup the function address from its name */
>  		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
> @@ -2407,6 +2515,56 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = {
>  	.release        = seq_release,
>  };
>  
> +/*
> + * kprobes/error_injection_list -- shows which functions can be overriden for
> + * error injection.
> + * */
> +static void *kprobe_ei_seq_start(struct seq_file *m, loff_t *pos)
> +{
> +	mutex_lock(&kprobe_ei_mutex);
> +	return seq_list_start(&kprobe_error_injection_list, *pos);
> +}
> +
> +static void kprobe_ei_seq_stop(struct seq_file *m, void *v)
> +{
> +	mutex_unlock(&kprobe_ei_mutex);
> +}
> +
> +static void *kprobe_ei_seq_next(struct seq_file *m, void *v, loff_t *pos)
> +{
> +	return seq_list_next(v, &kprobe_error_injection_list, pos);
> +}
> +
> +static int kprobe_ei_seq_show(struct seq_file *m, void *v)
> +{
> +	char buffer[KSYM_SYMBOL_LEN];
> +	struct kprobe_ei_entry *ent =
> +		list_entry(v, struct kprobe_ei_entry, list);
> +
> +	sprint_symbol(buffer, ent->start_addr);
> +	seq_printf(m, "%s\n", buffer);
> +	return 0;
> +}
> +
> +static const struct seq_operations kprobe_ei_seq_ops = {
> +	.start = kprobe_ei_seq_start,
> +	.next  = kprobe_ei_seq_next,
> +	.stop  = kprobe_ei_seq_stop,
> +	.show  = kprobe_ei_seq_show,
> +};
> +
> +static int kprobe_ei_open(struct inode *inode, struct file *filp)
> +{
> +	return seq_open(filp, &kprobe_ei_seq_ops);
> +}
> +
> +static const struct file_operations debugfs_kprobe_ei_ops = {
> +	.open           = kprobe_ei_open,
> +	.read           = seq_read,
> +	.llseek         = seq_lseek,
> +	.release        = seq_release,
> +};
> +
>  static void arm_all_kprobes(void)
>  {
>  	struct hlist_head *head;
> @@ -2548,6 +2706,11 @@ static int __init debugfs_kprobe_init(void)
>  	if (!file)
>  		goto error;
>  
> +	file = debugfs_create_file("error_injection_list", 0444, dir, NULL,
> +				  &debugfs_kprobe_ei_ops);
> +	if (!file)
> +		goto error;
> +
>  	return 0;
>  
>  error:
> diff --git a/kernel/module.c b/kernel/module.c
> index dea01ac9cb74..bd695bfdc5c4 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -3118,7 +3118,11 @@ static int find_module_sections(struct module *mod, struct load_info *info)
>  					     sizeof(*mod->ftrace_callsites),
>  					     &mod->num_ftrace_callsites);
>  #endif
> -
> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
> +	mod->kprobe_ei_funcs = section_objs(info, "_kprobe_error_inject_list",
> +					    sizeof(*mod->kprobe_ei_funcs),
> +					    &mod->num_kprobe_ei_funcs);
> +#endif
>  	mod->extable = section_objs(info, "__ex_table",
>  				    sizeof(*mod->extable), &mod->num_exentries);
>  
> -- 
> 2.7.5
>
Alexei Starovoitov Dec. 20, 2017, 5:22 p.m. UTC | #7
On 12/19/17 11:13 PM, Masami Hiramatsu wrote:
> On Tue, 19 Dec 2017 18:14:17 -0800
> Alexei Starovoitov <ast@fb.com> wrote:
>
>> On 12/18/17 10:29 PM, Masami Hiramatsu wrote:
>>>>
>>>> +#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
>>>> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
>>>
>>> BTW, CONFIG_BPF_KPROBE_OVERRIDE is also confusable name.
>>> Since this feature override a function to just return with
>>> some return value (as far as I understand, or would you
>>> also plan to modify execution path inside a function?),
>>> I think it should be better CONFIG_BPF_FUNCTION_OVERRIDE or
>>> CONFIG_BPF_EXECUTION_OVERRIDE.
>>
>> I don't think such renaming makes sense.
>> The feature is overriding kprobe by changing how kprobe returns.
>> It doesn't override BPF_FUNCTION or BPF_EXECUTION.
>
> No, I meant this is BPF's feature which override FUNCTION, so
> BPF is a kind of namespace. (Is that only for a function entry
> because it can not tweak stackframe at this morment?)
>
>> The kernel enters and exists bpf program as normal.
>
> Yeah, but that bpf program modifies instruction pointer, am I correct?

no. bpf side is asking kprobe side to modify it.
bpf cannot do such things as modifying IP or any other register
directly.

>>
>>> Indeed, BPF is based on kprobes, but it seems you are limiting it
>>> with ftrace (function-call trace) (I'm not sure the reason why),
>>> so using "kprobes" for this feature seems strange for me.
>>
>> do you have an idea how kprobe override can happen when kprobe
>> placed in the middle of the function?
>
> For example, if you know a basic block in the function, maybe
> you can skip a block or something like that. But nowadays
> it is somewhat hard because optimizer mixed it up.

still missing how that can work...
Alexei Starovoitov Dec. 20, 2017, 5:33 p.m. UTC | #8
On 12/20/17 3:00 AM, Masami Hiramatsu wrote:
> On Fri, 15 Dec 2017 14:12:52 -0500
> Josef Bacik <josef@toxicpanda.com> wrote:
>
>> From: Josef Bacik <jbacik@fb.com>
>>
>> Using BPF we can override kprob'ed functions and return arbitrary
>> values.  Obviously this can be a bit unsafe, so make this feature opt-in
>> for functions.  Simply tag a function with KPROBE_ERROR_INJECT_SYMBOL in
>> order to give BPF access to that function for error injection purposes.
>>
>> Signed-off-by: Josef Bacik <jbacik@fb.com>
>> Acked-by: Ingo Molnar <mingo@kernel.org>
>> ---
>>  include/asm-generic/vmlinux.lds.h |  10 +++
>>  include/linux/bpf.h               |  11 +++
>>  include/linux/kprobes.h           |   1 +
>>  include/linux/module.h            |   5 ++
>>  kernel/kprobes.c                  | 163 ++++++++++++++++++++++++++++++++++++++
>>  kernel/module.c                   |   6 +-
>>  6 files changed, 195 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
>> index ee8b707d9fa9..a2e8582d094a 100644
>> --- a/include/asm-generic/vmlinux.lds.h
>> +++ b/include/asm-generic/vmlinux.lds.h
>> @@ -136,6 +136,15 @@
>>  #define KPROBE_BLACKLIST()
>>  #endif
>>
>> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
>> +#define ERROR_INJECT_LIST()	. = ALIGN(8);						\
>> +				VMLINUX_SYMBOL(__start_kprobe_error_inject_list) = .;	\
>> +				KEEP(*(_kprobe_error_inject_list))			\
>> +				VMLINUX_SYMBOL(__stop_kprobe_error_inject_list) = .;
>> +#else
>> +#define ERROR_INJECT_LIST()
>> +#endif
>> +
>>  #ifdef CONFIG_EVENT_TRACING
>>  #define FTRACE_EVENTS()	. = ALIGN(8);					\
>>  			VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
>> @@ -564,6 +573,7 @@
>>  	FTRACE_EVENTS()							\
>>  	TRACE_SYSCALLS()						\
>>  	KPROBE_BLACKLIST()						\
>> +	ERROR_INJECT_LIST()						\
>>  	MEM_DISCARD(init.rodata)					\
>>  	CLK_OF_TABLES()							\
>>  	RESERVEDMEM_OF_TABLES()						\
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index e55e4255a210..7f4d2a953173 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -576,4 +576,15 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto;
>>  void bpf_user_rnd_init_once(void);
>>  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
>>
>> +#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
>> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
>> +#define BPF_ALLOW_ERROR_INJECTION(fname)				\
>> +static unsigned long __used						\
>> +	__attribute__((__section__("_kprobe_error_inject_list")))	\
>> +	_eil_addr_##fname = (unsigned long)fname;
>> +#else
>> +#define BPF_ALLOW_ERROR_INJECTION(fname)
>> +#endif
>> +#endif
>
> This part shows this feature belongs to bpf, if it is a part of kprobes,
> it should be defined in include/asm-generic/kprobes.h as NOKPROBE_SYMBOL
> does.
>
> Why this is defined in BPF, but list is under kprobes?

because Ingo specifically requested that macro that marks the function
will be in bpf.h, so any .c file that starts adding such marks will
include that file instead of pulling stuff from kprobe.

>
> So there is no direct relationship with kprobe.
> For example, kprobe user modules can OVERRIDE any functions.
> And there is no generic error injection code in the kernel
> except for the bpf currently.

_currently_ is key word.

> Of course, I can accept this code if you accept that I make a
> generic error injection code on ftrace without BPF.

what stops other pieces of kernel to use the same technique?
The bpf verifier coupled together with opt-in
per-function marks via BPF_ALLOW_ERROR_INJECTION
give _safe_ way to do error injection.

I can imagine how you can hack kprobe text based interface to
use the same technique, but I suggest to wait and see how we
build on it in bpf land before replicating things in
pure kprobe land.
diff mbox series

Patch

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index ee8b707d9fa9..a2e8582d094a 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -136,6 +136,15 @@ 
 #define KPROBE_BLACKLIST()
 #endif
 
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+#define ERROR_INJECT_LIST()	. = ALIGN(8);						\
+				VMLINUX_SYMBOL(__start_kprobe_error_inject_list) = .;	\
+				KEEP(*(_kprobe_error_inject_list))			\
+				VMLINUX_SYMBOL(__stop_kprobe_error_inject_list) = .;
+#else
+#define ERROR_INJECT_LIST()
+#endif
+
 #ifdef CONFIG_EVENT_TRACING
 #define FTRACE_EVENTS()	. = ALIGN(8);					\
 			VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
@@ -564,6 +573,7 @@ 
 	FTRACE_EVENTS()							\
 	TRACE_SYSCALLS()						\
 	KPROBE_BLACKLIST()						\
+	ERROR_INJECT_LIST()						\
 	MEM_DISCARD(init.rodata)					\
 	CLK_OF_TABLES()							\
 	RESERVEDMEM_OF_TABLES()						\
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e55e4255a210..7f4d2a953173 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -576,4 +576,15 @@  extern const struct bpf_func_proto bpf_sock_map_update_proto;
 void bpf_user_rnd_init_once(void);
 u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+#define BPF_ALLOW_ERROR_INJECTION(fname)				\
+static unsigned long __used						\
+	__attribute__((__section__("_kprobe_error_inject_list")))	\
+	_eil_addr_##fname = (unsigned long)fname;
+#else
+#define BPF_ALLOW_ERROR_INJECTION(fname)
+#endif
+#endif
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 9440a2fc8893..963fd364f3d6 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -271,6 +271,7 @@  extern bool arch_kprobe_on_func_entry(unsigned long offset);
 extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
 
 extern bool within_kprobe_blacklist(unsigned long addr);
+extern bool within_kprobe_error_injection_list(unsigned long addr);
 
 struct kprobe_insn_cache {
 	struct mutex mutex;
diff --git a/include/linux/module.h b/include/linux/module.h
index c69b49abe877..548fa09fa806 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -475,6 +475,11 @@  struct module {
 	ctor_fn_t *ctors;
 	unsigned int num_ctors;
 #endif
+
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+	unsigned int num_kprobe_ei_funcs;
+	unsigned long *kprobe_ei_funcs;
+#endif
 } ____cacheline_aligned __randomize_layout;
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index da2ccf142358..b4aab48ad258 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -83,6 +83,16 @@  static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
 	return &(kretprobe_table_locks[hash].lock);
 }
 
+/* List of symbols that can be overriden for error injection. */
+static LIST_HEAD(kprobe_error_injection_list);
+static DEFINE_MUTEX(kprobe_ei_mutex);
+struct kprobe_ei_entry {
+	struct list_head list;
+	unsigned long start_addr;
+	unsigned long end_addr;
+	void *priv;
+};
+
 /* Blacklist -- list of struct kprobe_blacklist_entry */
 static LIST_HEAD(kprobe_blacklist);
 
@@ -1394,6 +1404,17 @@  bool within_kprobe_blacklist(unsigned long addr)
 	return false;
 }
 
+bool within_kprobe_error_injection_list(unsigned long addr)
+{
+	struct kprobe_ei_entry *ent;
+
+	list_for_each_entry(ent, &kprobe_error_injection_list, list) {
+		if (addr >= ent->start_addr && addr < ent->end_addr)
+			return true;
+	}
+	return false;
+}
+
 /*
  * If we have a symbol_name argument, look it up and add the offset field
  * to it. This way, we can specify a relative address to a symbol.
@@ -2168,6 +2189,86 @@  static int __init populate_kprobe_blacklist(unsigned long *start,
 	return 0;
 }
 
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+/* Markers of the _kprobe_error_inject_list section */
+extern unsigned long __start_kprobe_error_inject_list[];
+extern unsigned long __stop_kprobe_error_inject_list[];
+
+/*
+ * Lookup and populate the kprobe_error_injection_list.
+ *
+ * For safety reasons we only allow certain functions to be overriden with
+ * bpf_error_injection, so we need to populate the list of the symbols that have
+ * been marked as safe for overriding.
+ */
+static void populate_kprobe_error_injection_list(unsigned long *start,
+						 unsigned long *end,
+						 void *priv)
+{
+	unsigned long *iter;
+	struct kprobe_ei_entry *ent;
+	unsigned long entry, offset = 0, size = 0;
+
+	mutex_lock(&kprobe_ei_mutex);
+	for (iter = start; iter < end; iter++) {
+		entry = arch_deref_entry_point((void *)*iter);
+
+		if (!kernel_text_address(entry) ||
+		    !kallsyms_lookup_size_offset(entry, &size, &offset)) {
+			pr_err("Failed to find error inject entry at %p\n",
+				(void *)entry);
+			continue;
+		}
+
+		ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+		if (!ent)
+			break;
+		ent->start_addr = entry;
+		ent->end_addr = entry + size;
+		ent->priv = priv;
+		INIT_LIST_HEAD(&ent->list);
+		list_add_tail(&ent->list, &kprobe_error_injection_list);
+	}
+	mutex_unlock(&kprobe_ei_mutex);
+}
+
+static void __init populate_kernel_kprobe_ei_list(void)
+{
+	populate_kprobe_error_injection_list(__start_kprobe_error_inject_list,
+					     __stop_kprobe_error_inject_list,
+					     NULL);
+}
+
+static void module_load_kprobe_ei_list(struct module *mod)
+{
+	if (!mod->num_kprobe_ei_funcs)
+		return;
+	populate_kprobe_error_injection_list(mod->kprobe_ei_funcs,
+					     mod->kprobe_ei_funcs +
+					     mod->num_kprobe_ei_funcs, mod);
+}
+
+static void module_unload_kprobe_ei_list(struct module *mod)
+{
+	struct kprobe_ei_entry *ent, *n;
+	if (!mod->num_kprobe_ei_funcs)
+		return;
+
+	mutex_lock(&kprobe_ei_mutex);
+	list_for_each_entry_safe(ent, n, &kprobe_error_injection_list, list) {
+		if (ent->priv == mod) {
+			list_del_init(&ent->list);
+			kfree(ent);
+		}
+	}
+	mutex_unlock(&kprobe_ei_mutex);
+}
+#else
+static inline void __init populate_kernel_kprobe_ei_list(void) {}
+static inline void module_load_kprobe_ei_list(struct module *m) {}
+static inline void module_unload_kprobe_ei_list(struct module *m) {}
+#endif
+
 /* Module notifier call back, checking kprobes on the module */
 static int kprobes_module_callback(struct notifier_block *nb,
 				   unsigned long val, void *data)
@@ -2178,6 +2279,11 @@  static int kprobes_module_callback(struct notifier_block *nb,
 	unsigned int i;
 	int checkcore = (val == MODULE_STATE_GOING);
 
+	if (val == MODULE_STATE_COMING)
+		module_load_kprobe_ei_list(mod);
+	else if (val == MODULE_STATE_GOING)
+		module_unload_kprobe_ei_list(mod);
+
 	if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
 		return NOTIFY_DONE;
 
@@ -2240,6 +2346,8 @@  static int __init init_kprobes(void)
 		pr_err("Please take care of using kprobes.\n");
 	}
 
+	populate_kernel_kprobe_ei_list();
+
 	if (kretprobe_blacklist_size) {
 		/* lookup the function address from its name */
 		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
@@ -2407,6 +2515,56 @@  static const struct file_operations debugfs_kprobe_blacklist_ops = {
 	.release        = seq_release,
 };
 
+/*
+ * kprobes/error_injection_list -- shows which functions can be overriden for
+ * error injection.
+ * */
+static void *kprobe_ei_seq_start(struct seq_file *m, loff_t *pos)
+{
+	mutex_lock(&kprobe_ei_mutex);
+	return seq_list_start(&kprobe_error_injection_list, *pos);
+}
+
+static void kprobe_ei_seq_stop(struct seq_file *m, void *v)
+{
+	mutex_unlock(&kprobe_ei_mutex);
+}
+
+static void *kprobe_ei_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	return seq_list_next(v, &kprobe_error_injection_list, pos);
+}
+
+static int kprobe_ei_seq_show(struct seq_file *m, void *v)
+{
+	char buffer[KSYM_SYMBOL_LEN];
+	struct kprobe_ei_entry *ent =
+		list_entry(v, struct kprobe_ei_entry, list);
+
+	sprint_symbol(buffer, ent->start_addr);
+	seq_printf(m, "%s\n", buffer);
+	return 0;
+}
+
+static const struct seq_operations kprobe_ei_seq_ops = {
+	.start = kprobe_ei_seq_start,
+	.next  = kprobe_ei_seq_next,
+	.stop  = kprobe_ei_seq_stop,
+	.show  = kprobe_ei_seq_show,
+};
+
+static int kprobe_ei_open(struct inode *inode, struct file *filp)
+{
+	return seq_open(filp, &kprobe_ei_seq_ops);
+}
+
+static const struct file_operations debugfs_kprobe_ei_ops = {
+	.open           = kprobe_ei_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release,
+};
+
 static void arm_all_kprobes(void)
 {
 	struct hlist_head *head;
@@ -2548,6 +2706,11 @@  static int __init debugfs_kprobe_init(void)
 	if (!file)
 		goto error;
 
+	file = debugfs_create_file("error_injection_list", 0444, dir, NULL,
+				  &debugfs_kprobe_ei_ops);
+	if (!file)
+		goto error;
+
 	return 0;
 
 error:
diff --git a/kernel/module.c b/kernel/module.c
index dea01ac9cb74..bd695bfdc5c4 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3118,7 +3118,11 @@  static int find_module_sections(struct module *mod, struct load_info *info)
 					     sizeof(*mod->ftrace_callsites),
 					     &mod->num_ftrace_callsites);
 #endif
-
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+	mod->kprobe_ei_funcs = section_objs(info, "_kprobe_error_inject_list",
+					    sizeof(*mod->kprobe_ei_funcs),
+					    &mod->num_kprobe_ei_funcs);
+#endif
 	mod->extable = section_objs(info, "__ex_table",
 				    sizeof(*mod->extable), &mod->num_exentries);