diff mbox series

[RFC,bpf-next,v3,03/12] bpf: verifier: remove dead code

Message ID 20181229030923.4804-4-jakub.kicinski@netronome.com
State RFC, archived
Delegated to: BPF Maintainers
Headers show
Series bpf: dead code elimination | expand

Commit Message

Jakub Kicinski Dec. 29, 2018, 3:09 a.m. UTC
Instead of overwriting dead code with jmp -1 instructions
remove it completely for root.  Adjust verifier state and
line info appropriately.

v2:
 - adjust func_info (Alexei);
 - make sure first instruction retains line info (Alexei).

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 include/linux/filter.h |   1 +
 kernel/bpf/core.c      |  12 +++
 kernel/bpf/verifier.c  | 197 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 207 insertions(+), 3 deletions(-)

Comments

Yonghong Song Dec. 30, 2018, 10:02 p.m. UTC | #1
On 12/28/18 7:09 PM, Jakub Kicinski wrote:
> Instead of overwriting dead code with jmp -1 instructions
> remove it completely for root.  Adjust verifier state and
> line info appropriately.
> 
> v2:
>   - adjust func_info (Alexei);
>   - make sure first instruction retains line info (Alexei).
> 
> Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> ---
>   include/linux/filter.h |   1 +
>   kernel/bpf/core.c      |  12 +++
>   kernel/bpf/verifier.c  | 197 ++++++++++++++++++++++++++++++++++++++++-
>   3 files changed, 207 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 8c8544b375eb..2cdb50bbf867 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -782,6 +782,7 @@ static inline bool bpf_dump_raw_ok(void)
>   
>   struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
>   				       const struct bpf_insn *patch, u32 len);
> +int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt);
>   
>   void bpf_clear_redirect_map(struct bpf_map *map);
>   
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index a40057b2c556..cc6fa754627c 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -461,6 +461,18 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
>   	return prog_adj;
>   }
>   
> +int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt)
> +{
> +	/* Branch offsets can't overflow when program is shrinking, no need
> +	 * to call bpf_adj_branches(..., true) here
> +	 */
> +	memmove(prog->insnsi + off, prog->insnsi + off + cnt,
> +		sizeof(struct bpf_insn) * (prog->len - off - cnt));
> +	prog->len -= cnt;
> +
> +	return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false));
> +}
> +
>   void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
>   {
>   	int i;
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 30e2cd399b4a..a3880c6dac97 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -6233,6 +6233,171 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
>   	return new_prog;
>   }
>   
> +static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
> +					      u32 off, u32 cnt)
> +{
> +	int i, j;
> +
> +	/* find first prog starting at or after off (first to remove) */
> +	for (i = 0; i < env->subprog_cnt; i++)
> +		if (env->subprog_info[i].start >= off)
> +			break;
> +	/* find first prog starting at or after off + cnt (first to stay) */
> +	for (j = i; j < env->subprog_cnt; j++)
> +		if (env->subprog_info[j].start >= off + cnt)
> +			break;
> +	/* if j doesn't start exactly at off + cnt, we are just removing
> +	 * the front of previous prog
> +	 */
> +	if (env->subprog_info[j].start != off + cnt)
> +		j--;
> +
> +	if (j > i) {
> +		struct bpf_prog_aux *aux = env->prog->aux;
> +		int move;
> +
> +		/* move fake 'exit' subprog as well */
> +		move = env->subprog_cnt + 1 - j;
> +
> +		memmove(env->subprog_info + i,
> +			env->subprog_info + j,
> +			sizeof(*env->subprog_info) * move);
> +		env->subprog_cnt -= j - i;
> +
> +		/* remove func_info */
> +		if (aux->func_info) {
> +			move = aux->func_info_cnt - j;
> +
> +			memmove(aux->func_info + i,
> +				aux->func_info + j,
> +				sizeof(*aux->func_info) * move);
> +			aux->func_info_cnt -= j - i;
> +		}
> +	} else {
> +		/* convert i from "first prog to remove" to "first to adjust" */
> +		if (env->subprog_info[i].start == off)
> +			i++;
> +	}
> +
> +	/* update fake 'exit' subprog as well */
> +	for (; i <= env->subprog_cnt; i++)
> +		env->subprog_info[i].start -= cnt;
> +
> +	return 0;
> +}

adjust_subprog_starts_after_remove() and update func_info looks correct 
to me.

> +
> +static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
> +				      u32 cnt)
> +{
> +	struct bpf_subprog_info *need_first_linfo;
> +	struct bpf_prog *prog = env->prog;
> +	u32 i, l_off, l_cnt, nr_linfo;
> +	struct bpf_line_info *linfo;
> +
> +	nr_linfo = prog->aux->nr_linfo;
> +	if (!nr_linfo || !cnt)
> +		return 0;
> +
> +	linfo = prog->aux->linfo;
> +
> +	/* progs are already adjusted/removed, if program starts on off, it may
> +	 * had its start cut off and its line info may need to be preserved
> +	 */
> +	for (i = 0; i < env->subprog_cnt; i++)
> +		if (env->subprog_info[i].start >= off)
> +			break;
> +	if (i < env->subprog_cnt && env->subprog_info[i].start == off)
> +		need_first_linfo = &env->subprog_info[i];
> +	else
> +		need_first_linfo = NULL;
> +
> +	/* find first line info to remove */
> +	for (i = 0; i < nr_linfo; i++)
> +		if (linfo[i].insn_off >= off)
> +			break;
> +
> +	/* count lines to be removed */
> +	l_off = i;
> +	l_cnt = 0;
> +	for (; i < nr_linfo; i++)
> +		if (linfo[i].insn_off < off + cnt)
> +			l_cnt++;
> +		else
> +			break;
> +
> +	/* either we didn't actually cut the start or we can just use line info
> +	 * of first instruction if it exists
> +	 */
> +	if (i < nr_linfo && linfo[i].insn_off == off + cnt)
> +		need_first_linfo = NULL;
> +	if (need_first_linfo) {
> +		if (WARN_ONCE(!l_cnt,
> +			      "verifier bug - no linfo removed, yet its missing"))
> +			return -EINVAL;
> +		if (WARN_ONCE(need_first_linfo->linfo_idx < l_off ||
> +			      need_first_linfo->linfo_idx >= l_off + l_cnt,
> +			      "verifier bug - removed prog linfo not in removed range"))
> +			return -EINVAL;
> +		/* subprog linfo_idx is not adjusted yet, so just find out
> +		 * which line it used to be and swap it
> +		 */
> +		memmove(linfo + l_off, linfo + need_first_linfo->linfo_idx,
> +			sizeof(*linfo));
> +		need_first_linfo->linfo_idx = l_off;
> +		linfo[l_off].insn_off = off;
> +
> +		l_off++;
> +		l_cnt--;
> +	}

In this routine, we handle need_first_linfo if the first insn of a 
subprogram is deleted.
I wonder whether we need to handle the last deleted linfo as well. For 
example:
    func1:
      line_info1:
         insn1
         insn2
    func2:
      line_info2:
         insn3
         insn4:

if insn2 and insn3 are deleted, the above algorithm will produce:
     func1:
       line_info1:
        insn1
     func2:
        insn4

func2 is missing the first line info, which should be line_info2 with
adjusted insn offset to the start of func2.

Another example:
    func1:
       line_info1:
          insn1
          insn2
       line_info2:
          insn3
          insn4
If insn2 and insn3 are deleted, we will get
     func1:
       line_info1:
         insn1
         insn4
This is not ideal either. We should attribute insn4 to line_info2.

> +
> +	/* remove the line info which refers to the removed instructions */
> +	if (l_cnt) {
> +		memmove(linfo + l_off, linfo + i,
> +			sizeof(*linfo) * (nr_linfo - i));
> +
> +		prog->aux->nr_linfo -= l_cnt;
> +		nr_linfo = prog->aux->nr_linfo;
> +	}
> +
> +	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
> +	for (i = l_off; i < nr_linfo; i++)
> +		linfo[i].insn_off -= cnt;
> +
> +	/* fix up all subprogs (incl. 'exit') which start >= off */
> +	for (i = 0; i <= env->subprog_cnt; i++)
> +		if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
> +			env->subprog_info[i].linfo_idx -= l_cnt;
> +
> +	return 0;
> +}
> +
> +static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
> +{
> +	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
> +	unsigned int orig_prog_len = env->prog->len;
> +	int err;
> +
> +	if (!cnt)
> +		return 0;

This check is probably not needed as all call sites (including
patch #4) guarantees non-zero cnt.

> +
> +	err = bpf_remove_insns(env->prog, off, cnt);
> +	if (err)
> +		return err;
> +
> +	err = adjust_subprog_starts_after_remove(env, off, cnt);
> +	if (err)
> +		return err;
> +
> +	err = bpf_adj_linfo_after_remove(env, off, cnt);
> +	if (err)
> +		return err;
> +
> +	memmove(aux_data + off,	aux_data + off + cnt,
> +		sizeof(*aux_data) * (orig_prog_len - off - cnt));
> +
> +	return 0;
> +}
> +
>   /* The verifier does more data flow analysis than llvm and will not
>    * explore branches that are dead at run time. Malicious programs can
>    * have dead code too. Therefore replace all dead at-run-time code
> @@ -6293,6 +6458,30 @@ static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
>   	}
>   }
>   
> +static int opt_remove_dead_code(struct bpf_verifier_env *env)
> +{
> +	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
> +	int insn_cnt = env->prog->len;
> +	int i, err;
> +
> +	for (i = 0; i < insn_cnt; i++) {
> +		int j;
> +
> +		j = 0;
> +		while (i + j < insn_cnt && !aux_data[i + j].seen)
> +			j++;
> +		if (!j)
> +			continue;
> +
> +		err = verifier_remove_insns(env, i, j);
> +		if (err)
> +			return err;
> +		insn_cnt = env->prog->len;
> +	}
> +
> +	return 0;
> +}
> +
>   /* convert load instructions that access fields of a context type into a
>    * sequence of instructions that access fields of the underlying structure:
>    *     struct __sk_buff    -> struct sk_buff
> @@ -7032,11 +7221,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
>   	if (is_priv) {
>   		if (ret == 0)
>   			opt_hard_wire_dead_code_branches(env);
> +		if (ret == 0)
> +			ret = opt_remove_dead_code(env);
> +	} else {
> +		if (ret == 0)
> +			sanitize_dead_code(env);
>   	}
>   
> -	if (ret == 0)
> -		sanitize_dead_code(env);
> -
>   	if (ret == 0)
>   		/* program is valid, convert *(u32*)(ctx + off) accesses */
>   		ret = convert_ctx_accesses(env);
>
Jakub Kicinski Dec. 31, 2018, 8:31 p.m. UTC | #2
On Sun, 30 Dec 2018 22:02:10 +0000, Yonghong Song wrote:
> On 12/28/18 7:09 PM, Jakub Kicinski wrote:
> > +static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
> > +				      u32 cnt)
> > +{
> > +	struct bpf_subprog_info *need_first_linfo;
> > +	struct bpf_prog *prog = env->prog;
> > +	u32 i, l_off, l_cnt, nr_linfo;
> > +	struct bpf_line_info *linfo;
> > +
> > +	nr_linfo = prog->aux->nr_linfo;
> > +	if (!nr_linfo || !cnt)
> > +		return 0;
> > +
> > +	linfo = prog->aux->linfo;
> > +
> > +	/* progs are already adjusted/removed, if program starts on off, it may
> > +	 * had its start cut off and its line info may need to be preserved
> > +	 */
> > +	for (i = 0; i < env->subprog_cnt; i++)
> > +		if (env->subprog_info[i].start >= off)
> > +			break;
> > +	if (i < env->subprog_cnt && env->subprog_info[i].start == off)
> > +		need_first_linfo = &env->subprog_info[i];
> > +	else
> > +		need_first_linfo = NULL;
> > +
> > +	/* find first line info to remove */
> > +	for (i = 0; i < nr_linfo; i++)
> > +		if (linfo[i].insn_off >= off)
> > +			break;
> > +
> > +	/* count lines to be removed */
> > +	l_off = i;
> > +	l_cnt = 0;
> > +	for (; i < nr_linfo; i++)
> > +		if (linfo[i].insn_off < off + cnt)
> > +			l_cnt++;
> > +		else
> > +			break;
> > +
> > +	/* either we didn't actually cut the start or we can just use line info
> > +	 * of first instruction if it exists
> > +	 */
> > +	if (i < nr_linfo && linfo[i].insn_off == off + cnt)
> > +		need_first_linfo = NULL;
> > +	if (need_first_linfo) {
> > +		if (WARN_ONCE(!l_cnt,
> > +			      "verifier bug - no linfo removed, yet its missing"))
> > +			return -EINVAL;
> > +		if (WARN_ONCE(need_first_linfo->linfo_idx < l_off ||
> > +			      need_first_linfo->linfo_idx >= l_off + l_cnt,
> > +			      "verifier bug - removed prog linfo not in removed range"))
> > +			return -EINVAL;
> > +		/* subprog linfo_idx is not adjusted yet, so just find out
> > +		 * which line it used to be and swap it
> > +		 */
> > +		memmove(linfo + l_off, linfo + need_first_linfo->linfo_idx,
> > +			sizeof(*linfo));
> > +		need_first_linfo->linfo_idx = l_off;
> > +		linfo[l_off].insn_off = off;
> > +
> > +		l_off++;
> > +		l_cnt--;
> > +	}  
> 
> In this routine, we handle need_first_linfo if the first insn of a 
> subprogram is deleted.
> I wonder whether we need to handle the last deleted linfo as well. For 
> example:
>     func1:
>       line_info1:
>          insn1
>          insn2
>     func2:
>       line_info2:
>          insn3
>          insn4:
> 
> if insn2 and insn3 are deleted, the above algorithm will produce:
>      func1:
>        line_info1:
>         insn1
>      func2:
>         insn4
> 
> func2 is missing the first line info, which should be line_info2 with
> adjusted insn offset to the start of func2.

Mm.. should not happen, we should end up with:

     func1:
       line_info1:
         insn1
     func2:
       line_info2:
         insn4

func2 after func adjust will start at off and there is no line info for
off + cnt (insn4), so we will preserve line_info2.

I've added the following test_btf test, just to be sure:

{
	.descr = "line_info (dead end + subprog start w/ no linfo)",
	.raw_types = {
		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
		BTF_FUNC_PROTO_ENC(1, 1),			/* [2] */
			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
		BTF_FUNC_ENC(NAME_TBD, 2),			/* [3] */
		BTF_FUNC_ENC(NAME_TBD, 2),			/* [4] */
		BTF_END_RAW,
	},
	BTF_STR_SEC("\0int\0x\0main\0func\0/* main linfo */\0/* func linfo */"),
	.insns = {
		BPF_MOV64_IMM(BPF_REG_0, 0),
		BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 1, 2), /* dead */
		BPF_CALL_REL(2),
		BPF_EXIT_INSN(),
		BPF_EXIT_INSN(), /* dead */
		/* func */
		BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* dead */
		BPF_MOV64_IMM(BPF_REG_0, 0),
		BPF_EXIT_INSN(),
	},
	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
	.func_info_cnt = 2,
	.func_info_rec_size = 8,
	.func_info = { {0, 3}, {5, 4}, },
	.line_info = {
		BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10),
		BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10),
		BTF_END_RAW,
	},
	.line_info_rec_size = sizeof(struct bpf_line_info),
	.nr_jited_ksyms = 2,
},

~# bpftool prog dump xlated id 48
int main(int x):
; /* main linfo */
   0: (b7) r0 = 0
   1: (85) call pc+1#0xffffffffc041e782
   2: (95) exit
int func(int x):
; /* func linfo */
   3: (b7) r0 = 0
   4: (95) exit

~# bpftool prog dump jited id 48
int main(int x):
0xffffffffc041cb2f:
; /* main linfo */
   0:	push   %rbp
   1:	mov    %rsp,%rbp
   4:	sub    $0x28,%rsp
   b:	sub    $0x28,%rbp
   f:	mov    %rbx,0x0(%rbp)
  13:	mov    %r13,0x8(%rbp)
  17:	mov    %r14,0x10(%rbp)
  1b:	mov    %r15,0x18(%rbp)
  1f:	xor    %eax,%eax
  21:	mov    %rax,0x20(%rbp)
  25:	xor    %eax,%eax
  27:	callq  0x0000000000001c53
  2c:	mov    0x0(%rbp),%rbx
  30:	mov    0x8(%rbp),%r13
  34:	mov    0x10(%rbp),%r14
  38:	mov    0x18(%rbp),%r15
  3c:	add    $0x28,%rbp
  40:	leaveq 
  41:	retq   

int func(int x):
0xffffffffc041e782:
; /* func linfo */
   0:	push   %rbp
   1:	mov    %rsp,%rbp
   4:	sub    $0x28,%rsp
   b:	sub    $0x28,%rbp
   f:	mov    %rbx,0x0(%rbp)
  13:	mov    %r13,0x8(%rbp)
  17:	mov    %r14,0x10(%rbp)
  1b:	mov    %r15,0x18(%rbp)
  1f:	xor    %eax,%eax
  21:	mov    %rax,0x20(%rbp)
  25:	xor    %eax,%eax
  27:	mov    0x0(%rbp),%rbx
  2b:	mov    0x8(%rbp),%r13
  2f:	mov    0x10(%rbp),%r14
  33:	mov    0x18(%rbp),%r15
  37:	add    $0x28,%rbp
  3b:	leaveq 
  3c:	retq   

> Another example:
>     func1:
>        line_info1:
>           insn1
>           insn2
>        line_info2:
>           insn3
>           insn4
> If insn2 and insn3 are deleted, we will get
>      func1:
>        line_info1:
>          insn1
>          insn4
> This is not ideal either. We should attribute insn4 to line_info2.

Cool, I'm a little in the dark on what the definition of line info is,
so your suggestions are very much appreciated!  If you're saying that
all instructions below line info are considered part of that "line",
that'd simplify the code.

So should I always preserve "last" line info if first live instruction
doesn't have one?  

linfo1:               
	insn1         
	insn2 /* dead */
linfo2:                 
	insn3 /* dead */
	insn4           
linfo3:                        
	insn5

  ||
  \/

linfo1:               
	insn1         
linfo2:                 
	insn4           
linfo3:                        
	insn5


That'd simplify things quite a bit!

> > +
> > +	/* remove the line info which refers to the removed instructions */
> > +	if (l_cnt) {
> > +		memmove(linfo + l_off, linfo + i,
> > +			sizeof(*linfo) * (nr_linfo - i));
> > +
> > +		prog->aux->nr_linfo -= l_cnt;
> > +		nr_linfo = prog->aux->nr_linfo;
> > +	}
> > +
> > +	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
> > +	for (i = l_off; i < nr_linfo; i++)
> > +		linfo[i].insn_off -= cnt;
> > +
> > +	/* fix up all subprogs (incl. 'exit') which start >= off */
> > +	for (i = 0; i <= env->subprog_cnt; i++)
> > +		if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
> > +			env->subprog_info[i].linfo_idx -= l_cnt;
> > +
> > +	return 0;
> > +}
> > +
> > +static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
> > +{
> > +	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
> > +	unsigned int orig_prog_len = env->prog->len;
> > +	int err;
> > +
> > +	if (!cnt)
> > +		return 0;  
> 
> This check is probably not needed as all call sites (including
> patch #4) guarantees non-zero cnt.

Ack, thanks for the review, I was unsure if this is not convention.
Yonghong Song Dec. 31, 2018, 9:41 p.m. UTC | #3
On 12/31/18 12:31 PM, Jakub Kicinski wrote:
> On Sun, 30 Dec 2018 22:02:10 +0000, Yonghong Song wrote:
>> On 12/28/18 7:09 PM, Jakub Kicinski wrote:
>>> +static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
>>> +				      u32 cnt)
>>> +{
>>> +	struct bpf_subprog_info *need_first_linfo;
>>> +	struct bpf_prog *prog = env->prog;
>>> +	u32 i, l_off, l_cnt, nr_linfo;
>>> +	struct bpf_line_info *linfo;
>>> +
>>> +	nr_linfo = prog->aux->nr_linfo;
>>> +	if (!nr_linfo || !cnt)
>>> +		return 0;
>>> +
>>> +	linfo = prog->aux->linfo;
>>> +
>>> +	/* progs are already adjusted/removed, if program starts on off, it may
>>> +	 * had its start cut off and its line info may need to be preserved
>>> +	 */
>>> +	for (i = 0; i < env->subprog_cnt; i++)
>>> +		if (env->subprog_info[i].start >= off)
>>> +			break;
>>> +	if (i < env->subprog_cnt && env->subprog_info[i].start == off)
>>> +		need_first_linfo = &env->subprog_info[i];
>>> +	else
>>> +		need_first_linfo = NULL;
>>> +
>>> +	/* find first line info to remove */
>>> +	for (i = 0; i < nr_linfo; i++)
>>> +		if (linfo[i].insn_off >= off)
>>> +			break;
>>> +
>>> +	/* count lines to be removed */
>>> +	l_off = i;
>>> +	l_cnt = 0;
>>> +	for (; i < nr_linfo; i++)
>>> +		if (linfo[i].insn_off < off + cnt)
>>> +			l_cnt++;
>>> +		else
>>> +			break;
>>> +
>>> +	/* either we didn't actually cut the start or we can just use line info
>>> +	 * of first instruction if it exists
>>> +	 */
>>> +	if (i < nr_linfo && linfo[i].insn_off == off + cnt)
>>> +		need_first_linfo = NULL;
>>> +	if (need_first_linfo) {
>>> +		if (WARN_ONCE(!l_cnt,
>>> +			      "verifier bug - no linfo removed, yet its missing"))
>>> +			return -EINVAL;
>>> +		if (WARN_ONCE(need_first_linfo->linfo_idx < l_off ||
>>> +			      need_first_linfo->linfo_idx >= l_off + l_cnt,
>>> +			      "verifier bug - removed prog linfo not in removed range"))
>>> +			return -EINVAL;
>>> +		/* subprog linfo_idx is not adjusted yet, so just find out
>>> +		 * which line it used to be and swap it
>>> +		 */
>>> +		memmove(linfo + l_off, linfo + need_first_linfo->linfo_idx,
>>> +			sizeof(*linfo));
>>> +		need_first_linfo->linfo_idx = l_off;
>>> +		linfo[l_off].insn_off = off;
>>> +
>>> +		l_off++;
>>> +		l_cnt--;
>>> +	}
>>
>> In this routine, we handle need_first_linfo if the first insn of a
>> subprogram is deleted.
>> I wonder whether we need to handle the last deleted linfo as well. For
>> example:
>>      func1:
>>        line_info1:
>>           insn1
>>           insn2
>>      func2:
>>        line_info2:
>>           insn3
>>           insn4:
>>
>> if insn2 and insn3 are deleted, the above algorithm will produce:
>>       func1:
>>         line_info1:
>>          insn1
>>       func2:
>>          insn4
>>
>> func2 is missing the first line info, which should be line_info2 with
>> adjusted insn offset to the start of func2.
> 
> Mm.. should not happen, we should end up with:
> 
>       func1:
>         line_info1:
>           insn1
>       func2:
>         line_info2:
>           insn4
> 
> func2 after func adjust will start at off and there is no line info for
> off + cnt (insn4), so we will preserve line_info2.

Thanks for verification, I missed that
 >>> +	/* count lines to be removed */
 >>> +	l_off = i;
 >>> +	l_cnt = 0;
 >>> +	for (; i < nr_linfo; i++)
 >>> +		if (linfo[i].insn_off < off + cnt)
 >>> +			l_cnt++;
 >>> +		else
 >>> +			break;
once you reached linfo[i].insn_off >= off + cnt, no more counting.
since l_cnt starts from 0, so the last one is actually preserved.

> 
> I've added the following test_btf test, just to be sure:
> 
> {
> 	.descr = "line_info (dead end + subprog start w/ no linfo)",
> 	.raw_types = {
> 		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
> 		BTF_FUNC_PROTO_ENC(1, 1),			/* [2] */
> 			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
> 		BTF_FUNC_ENC(NAME_TBD, 2),			/* [3] */
> 		BTF_FUNC_ENC(NAME_TBD, 2),			/* [4] */
> 		BTF_END_RAW,
> 	},
> 	BTF_STR_SEC("\0int\0x\0main\0func\0/* main linfo */\0/* func linfo */"),
> 	.insns = {
> 		BPF_MOV64_IMM(BPF_REG_0, 0),
> 		BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 1, 2), /* dead */
> 		BPF_CALL_REL(2),
> 		BPF_EXIT_INSN(),
> 		BPF_EXIT_INSN(), /* dead */
> 		/* func */
> 		BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* dead */
> 		BPF_MOV64_IMM(BPF_REG_0, 0),
> 		BPF_EXIT_INSN(),
> 	},
> 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
> 	.func_info_cnt = 2,
> 	.func_info_rec_size = 8,
> 	.func_info = { {0, 3}, {5, 4}, },
> 	.line_info = {
> 		BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10),
> 		BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10),
> 		BTF_END_RAW,
> 	},
> 	.line_info_rec_size = sizeof(struct bpf_line_info),
> 	.nr_jited_ksyms = 2,
> },
> 
> ~# bpftool prog dump xlated id 48
> int main(int x):
> ; /* main linfo */
>     0: (b7) r0 = 0
>     1: (85) call pc+1#0xffffffffc041e782
>     2: (95) exit
> int func(int x):
> ; /* func linfo */
>     3: (b7) r0 = 0
>     4: (95) exit
> 
> ~# bpftool prog dump jited id 48
> int main(int x):
> 0xffffffffc041cb2f:
> ; /* main linfo */
>     0:	push   %rbp
>     1:	mov    %rsp,%rbp
>     4:	sub    $0x28,%rsp
>     b:	sub    $0x28,%rbp
>     f:	mov    %rbx,0x0(%rbp)
>    13:	mov    %r13,0x8(%rbp)
>    17:	mov    %r14,0x10(%rbp)
>    1b:	mov    %r15,0x18(%rbp)
>    1f:	xor    %eax,%eax
>    21:	mov    %rax,0x20(%rbp)
>    25:	xor    %eax,%eax
>    27:	callq  0x0000000000001c53
>    2c:	mov    0x0(%rbp),%rbx
>    30:	mov    0x8(%rbp),%r13
>    34:	mov    0x10(%rbp),%r14
>    38:	mov    0x18(%rbp),%r15
>    3c:	add    $0x28,%rbp
>    40:	leaveq
>    41:	retq
> 
> int func(int x):
> 0xffffffffc041e782:
> ; /* func linfo */
>     0:	push   %rbp
>     1:	mov    %rsp,%rbp
>     4:	sub    $0x28,%rsp
>     b:	sub    $0x28,%rbp
>     f:	mov    %rbx,0x0(%rbp)
>    13:	mov    %r13,0x8(%rbp)
>    17:	mov    %r14,0x10(%rbp)
>    1b:	mov    %r15,0x18(%rbp)
>    1f:	xor    %eax,%eax
>    21:	mov    %rax,0x20(%rbp)
>    25:	xor    %eax,%eax
>    27:	mov    0x0(%rbp),%rbx
>    2b:	mov    0x8(%rbp),%r13
>    2f:	mov    0x10(%rbp),%r14
>    33:	mov    0x18(%rbp),%r15
>    37:	add    $0x28,%rbp
>    3b:	leaveq
>    3c:	retq
> 
>> Another example:
>>      func1:
>>         line_info1:
>>            insn1
>>            insn2
>>         line_info2:
>>            insn3
>>            insn4
>> If insn2 and insn3 are deleted, we will get
>>       func1:
>>         line_info1:
>>           insn1
>>           insn4
>> This is not ideal either. We should attribute insn4 to line_info2.
> 
> Cool, I'm a little in the dark on what the definition of line info is,
> so your suggestions are very much appreciated!  If you're saying that
> all instructions below line info are considered part of that "line",
> that'd simplify the code.
> 
> So should I always preserve "last" line info if first live instruction
> doesn't have one?
> 
> linfo1:
> 	insn1
> 	insn2 /* dead */
> linfo2:
> 	insn3 /* dead */
> 	insn4
> linfo3:
> 	insn5
> 
>    ||
>    \/
> 
> linfo1:
> 	insn1
> linfo2:
> 	insn4
> linfo3:
> 	insn5

Yes, the above is the desirable output.

> 
> 
> That'd simplify things quite a bit!
> 
>>> +
>>> +	/* remove the line info which refers to the removed instructions */
>>> +	if (l_cnt) {
>>> +		memmove(linfo + l_off, linfo + i,
>>> +			sizeof(*linfo) * (nr_linfo - i));
>>> +
>>> +		prog->aux->nr_linfo -= l_cnt;
>>> +		nr_linfo = prog->aux->nr_linfo;
>>> +	}
>>> +
>>> +	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
>>> +	for (i = l_off; i < nr_linfo; i++)
>>> +		linfo[i].insn_off -= cnt;
>>> +
>>> +	/* fix up all subprogs (incl. 'exit') which start >= off */
>>> +	for (i = 0; i <= env->subprog_cnt; i++)
>>> +		if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
>>> +			env->subprog_info[i].linfo_idx -= l_cnt;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
>>> +{
>>> +	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
>>> +	unsigned int orig_prog_len = env->prog->len;
>>> +	int err;
>>> +
>>> +	if (!cnt)
>>> +		return 0;
>>
>> This check is probably not needed as all call sites (including
>> patch #4) guarantees non-zero cnt.
> 
> Ack, thanks for the review, I was unsure if this is not convention.
>
Jakub Kicinski Dec. 31, 2018, 10:27 p.m. UTC | #4
On Mon, 31 Dec 2018 21:41:22 +0000, Yonghong Song wrote:
> > func2 after func adjust will start at off and there is no line info for
> > off + cnt (insn4), so we will preserve line_info2.  
> 
> Thanks for verification, I missed that
>  >>> +	/* count lines to be removed */
>  >>> +	l_off = i;
>  >>> +	l_cnt = 0;
>  >>> +	for (; i < nr_linfo; i++)
>  >>> +		if (linfo[i].insn_off < off + cnt)
>  >>> +			l_cnt++;
>  >>> +		else
>  >>> +			break;  
> once you reached linfo[i].insn_off >= off + cnt, no more counting.
> since l_cnt starts from 0, so the last one is actually preserved.

Ah, I see!

> > linfo1:
> > 	insn1
> > 	insn2 /* dead */
> > linfo2:
> > 	insn3 /* dead */
> > 	insn4
> > linfo3:
> > 	insn5
> > 
> >    ||
> >    \/
> > 
> > linfo1:
> > 	insn1
> > linfo2:
> > 	insn4
> > linfo3:
> > 	insn5  
> 
> Yes, the above is the desirable output.

Perfect, let me respin.
diff mbox series

Patch

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 8c8544b375eb..2cdb50bbf867 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -782,6 +782,7 @@  static inline bool bpf_dump_raw_ok(void)
 
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
+int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt);
 
 void bpf_clear_redirect_map(struct bpf_map *map);
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index a40057b2c556..cc6fa754627c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -461,6 +461,18 @@  struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 	return prog_adj;
 }
 
+int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt)
+{
+	/* Branch offsets can't overflow when program is shrinking, no need
+	 * to call bpf_adj_branches(..., true) here
+	 */
+	memmove(prog->insnsi + off, prog->insnsi + off + cnt,
+		sizeof(struct bpf_insn) * (prog->len - off - cnt));
+	prog->len -= cnt;
+
+	return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false));
+}
+
 void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
 {
 	int i;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 30e2cd399b4a..a3880c6dac97 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6233,6 +6233,171 @@  static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
 	return new_prog;
 }
 
+static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
+					      u32 off, u32 cnt)
+{
+	int i, j;
+
+	/* find first prog starting at or after off (first to remove) */
+	for (i = 0; i < env->subprog_cnt; i++)
+		if (env->subprog_info[i].start >= off)
+			break;
+	/* find first prog starting at or after off + cnt (first to stay) */
+	for (j = i; j < env->subprog_cnt; j++)
+		if (env->subprog_info[j].start >= off + cnt)
+			break;
+	/* if j doesn't start exactly at off + cnt, we are just removing
+	 * the front of previous prog
+	 */
+	if (env->subprog_info[j].start != off + cnt)
+		j--;
+
+	if (j > i) {
+		struct bpf_prog_aux *aux = env->prog->aux;
+		int move;
+
+		/* move fake 'exit' subprog as well */
+		move = env->subprog_cnt + 1 - j;
+
+		memmove(env->subprog_info + i,
+			env->subprog_info + j,
+			sizeof(*env->subprog_info) * move);
+		env->subprog_cnt -= j - i;
+
+		/* remove func_info */
+		if (aux->func_info) {
+			move = aux->func_info_cnt - j;
+
+			memmove(aux->func_info + i,
+				aux->func_info + j,
+				sizeof(*aux->func_info) * move);
+			aux->func_info_cnt -= j - i;
+		}
+	} else {
+		/* convert i from "first prog to remove" to "first to adjust" */
+		if (env->subprog_info[i].start == off)
+			i++;
+	}
+
+	/* update fake 'exit' subprog as well */
+	for (; i <= env->subprog_cnt; i++)
+		env->subprog_info[i].start -= cnt;
+
+	return 0;
+}
+
+static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
+				      u32 cnt)
+{
+	struct bpf_subprog_info *need_first_linfo;
+	struct bpf_prog *prog = env->prog;
+	u32 i, l_off, l_cnt, nr_linfo;
+	struct bpf_line_info *linfo;
+
+	nr_linfo = prog->aux->nr_linfo;
+	if (!nr_linfo || !cnt)
+		return 0;
+
+	linfo = prog->aux->linfo;
+
+	/* progs are already adjusted/removed, if program starts on off, it may
+	 * had its start cut off and its line info may need to be preserved
+	 */
+	for (i = 0; i < env->subprog_cnt; i++)
+		if (env->subprog_info[i].start >= off)
+			break;
+	if (i < env->subprog_cnt && env->subprog_info[i].start == off)
+		need_first_linfo = &env->subprog_info[i];
+	else
+		need_first_linfo = NULL;
+
+	/* find first line info to remove */
+	for (i = 0; i < nr_linfo; i++)
+		if (linfo[i].insn_off >= off)
+			break;
+
+	/* count lines to be removed */
+	l_off = i;
+	l_cnt = 0;
+	for (; i < nr_linfo; i++)
+		if (linfo[i].insn_off < off + cnt)
+			l_cnt++;
+		else
+			break;
+
+	/* either we didn't actually cut the start or we can just use line info
+	 * of first instruction if it exists
+	 */
+	if (i < nr_linfo && linfo[i].insn_off == off + cnt)
+		need_first_linfo = NULL;
+	if (need_first_linfo) {
+		if (WARN_ONCE(!l_cnt,
+			      "verifier bug - no linfo removed, yet its missing"))
+			return -EINVAL;
+		if (WARN_ONCE(need_first_linfo->linfo_idx < l_off ||
+			      need_first_linfo->linfo_idx >= l_off + l_cnt,
+			      "verifier bug - removed prog linfo not in removed range"))
+			return -EINVAL;
+		/* subprog linfo_idx is not adjusted yet, so just find out
+		 * which line it used to be and swap it
+		 */
+		memmove(linfo + l_off, linfo + need_first_linfo->linfo_idx,
+			sizeof(*linfo));
+		need_first_linfo->linfo_idx = l_off;
+		linfo[l_off].insn_off = off;
+
+		l_off++;
+		l_cnt--;
+	}
+
+	/* remove the line info which refers to the removed instructions */
+	if (l_cnt) {
+		memmove(linfo + l_off, linfo + i,
+			sizeof(*linfo) * (nr_linfo - i));
+
+		prog->aux->nr_linfo -= l_cnt;
+		nr_linfo = prog->aux->nr_linfo;
+	}
+
+	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
+	for (i = l_off; i < nr_linfo; i++)
+		linfo[i].insn_off -= cnt;
+
+	/* fix up all subprogs (incl. 'exit') which start >= off */
+	for (i = 0; i <= env->subprog_cnt; i++)
+		if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
+			env->subprog_info[i].linfo_idx -= l_cnt;
+
+	return 0;
+}
+
+static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
+{
+	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+	unsigned int orig_prog_len = env->prog->len;
+	int err;
+
+	if (!cnt)
+		return 0;
+
+	err = bpf_remove_insns(env->prog, off, cnt);
+	if (err)
+		return err;
+
+	err = adjust_subprog_starts_after_remove(env, off, cnt);
+	if (err)
+		return err;
+
+	err = bpf_adj_linfo_after_remove(env, off, cnt);
+	if (err)
+		return err;
+
+	memmove(aux_data + off,	aux_data + off + cnt,
+		sizeof(*aux_data) * (orig_prog_len - off - cnt));
+
+	return 0;
+}
+
 /* The verifier does more data flow analysis than llvm and will not
  * explore branches that are dead at run time. Malicious programs can
  * have dead code too. Therefore replace all dead at-run-time code
@@ -6293,6 +6458,30 @@  static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
 	}
 }
 
+static int opt_remove_dead_code(struct bpf_verifier_env *env)
+{
+	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+	int insn_cnt = env->prog->len;
+	int i, err;
+
+	for (i = 0; i < insn_cnt; i++) {
+		int j;
+
+		j = 0;
+		while (i + j < insn_cnt && !aux_data[i + j].seen)
+			j++;
+		if (!j)
+			continue;
+
+		err = verifier_remove_insns(env, i, j);
+		if (err)
+			return err;
+		insn_cnt = env->prog->len;
+	}
+
+	return 0;
+}
+
 /* convert load instructions that access fields of a context type into a
  * sequence of instructions that access fields of the underlying structure:
  *     struct __sk_buff    -> struct sk_buff
@@ -7032,11 +7221,13 @@  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 	if (is_priv) {
 		if (ret == 0)
 			opt_hard_wire_dead_code_branches(env);
+		if (ret == 0)
+			ret = opt_remove_dead_code(env);
+	} else {
+		if (ret == 0)
+			sanitize_dead_code(env);
 	}
 
-	if (ret == 0)
-		sanitize_dead_code(env);
-
 	if (ret == 0)
 		/* program is valid, convert *(u32*)(ctx + off) accesses */
 		ret = convert_ctx_accesses(env);