From patchwork Thu Mar 29 22:45:57 2018
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Edward Cree <ecree@solarflare.com>
X-Patchwork-Id: 893055
X-Patchwork-Delegate: bpf@iogearbox.net
Return-Path: <netdev-owner@vger.kernel.org>
X-Original-To: patchwork-incoming-netdev@ozlabs.org
Delivered-To: patchwork-incoming-netdev@ozlabs.org
Authentication-Results: ozlabs.org;
	spf=none (mailfrom) smtp.mailfrom=vger.kernel.org
	(client-ip=209.132.180.67; helo=vger.kernel.org;
	envelope-from=netdev-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none)
	header.from=solarflare.com
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 40C0GY30ddz9s0y
	for <patchwork-incoming-netdev@ozlabs.org>;
	Fri, 30 Mar 2018 09:46:13 +1100 (AEDT)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1751341AbeC2WqE (ORCPT
	<rfc822;patchwork-incoming-netdev@ozlabs.org>);
	Thu, 29 Mar 2018 18:46:04 -0400
Received: from dispatch1-us1.ppe-hosted.com ([67.231.154.164]:42134 "EHLO
	dispatch1-us1.ppe-hosted.com" rhost-flags-OK-OK-OK-OK)
	by vger.kernel.org with ESMTP id S1750735AbeC2WqD (ORCPT
	<rfc822;netdev@vger.kernel.org>); Thu, 29 Mar 2018 18:46:03 -0400
X-Virus-Scanned: Proofpoint Essentials engine
Received: from webmail.solarflare.com (webmail.solarflare.com
	[12.187.104.26])
	(using TLSv1 with cipher ECDHE-RSA-AES256-SHA (256/256 bits))
	(No client certificate requested)
	by mx1-us3.ppe-hosted.com (Proofpoint Essentials ESMTP Server) with
	ESMTPS id C9AAB6C0055; Thu, 29 Mar 2018 22:46:01 +0000 (UTC)
Received: from ec-desktop.uk.solarflarecom.com (10.17.20.45) by
	ocex03.SolarFlarecom.com (10.20.40.36) with Microsoft SMTP Server
	(TLS) id 15.0.1044.25; Thu, 29 Mar 2018 15:45:58 -0700
From: Edward Cree <ecree@solarflare.com>
Subject: [PATCH v2 bpf-next 1/3] bpf/verifier: validate func_calls by marking
	at do_check() time
To: Alexei Starovoitov <alexei.starovoitov@gmail.com>,
	Daniel Borkmann <daniel@iogearbox.net>
CC: <netdev@vger.kernel.org>
References: <d13f6b60-3632-3e4d-111a-2edb4198b473@solarflare.com>
Message-ID: <c124a8de-dba9-dc31-4880-b061c8c9943b@solarflare.com>
Date: Thu, 29 Mar 2018 23:45:57 +0100
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101
	Thunderbird/52.5.2
MIME-Version: 1.0
In-Reply-To: <d13f6b60-3632-3e4d-111a-2edb4198b473@solarflare.com>
Content-Language: en-GB
X-Originating-IP: [10.17.20.45]
X-MDID: 1522363562-ZspuZECflJ+8
Sender: netdev-owner@vger.kernel.org
Precedence: bulk
List-ID: <netdev.vger.kernel.org>
X-Mailing-List: netdev@vger.kernel.org

Removes a couple of passes from the verifier, one to check subprogs don't
 overlap etc., and one to compute max stack depth (which now is done by
 topologically sorting the call graph).  This improves the asymptotic
 complexity of a number of operations, for instance the max stack depth
 check is now O(n) in the number of subprogs, rather than having to walk
 every insn of every possible call chain.

Signed-off-by: Edward Cree <ecree@solarflare.com>
---
 include/linux/bpf_verifier.h |  24 ++-
 kernel/bpf/verifier.c        | 451 ++++++++++++++++++++++++-------------------
 2 files changed, 267 insertions(+), 208 deletions(-)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 7e61c395fddf..3af3f9cceede 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -146,6 +146,7 @@ struct bpf_insn_aux_data {
 		s32 call_imm;			/* saved imm field of call insn */
 	};
 	int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
+	u16 subprogno; /* subprog in which this insn resides, valid iff @seen */
 	bool seen; /* this insn was processed by the verifier */
 };
 
@@ -173,6 +174,15 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
 
 #define BPF_MAX_SUBPROGS 256
 
+struct bpf_subprog_info {
+	/* which other subprogs does this one directly call? */
+	DECLARE_BITMAP(callees, BPF_MAX_SUBPROGS);
+	u32 start; /* insn idx of function entry point */
+	u16 stack_depth; /* max. stack depth used by this function */
+	u16 total_stack_depth; /* max. stack depth used by entire call chain */
+	u16 len; /* #insns in this subprog */
+};
+
 /* single container for all structs
  * one verifier_env per bpf_check() call
  */
@@ -189,11 +199,10 @@ struct bpf_verifier_env {
 	u32 id_gen;			/* used to generate unique reg IDs */
 	bool allow_ptr_leaks;
 	bool seen_direct_write;
+	bool seen_pseudo_call;		/* populated at check_cfg() time */
 	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
 	struct bpf_verifier_log log;
-	u32 subprog_starts[BPF_MAX_SUBPROGS];
-	/* computes the stack depth of each bpf function */
-	u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1];
+	struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS];
 	u32 subprog_cnt;
 };
 
@@ -202,11 +211,16 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 					   const char *fmt, ...);
 
-static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
+static inline struct bpf_func_state *cur_frame(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state *cur = env->cur_state;
 
-	return cur->frame[cur->curframe]->regs;
+	return cur->frame[cur->curframe];
+}
+
+static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
+{
+	return cur_frame(env)->regs;
 }
 
 int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8acd2207e412..33963357a7ef 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -736,111 +736,49 @@ enum reg_arg_type {
 	DST_OP_NO_MARK	/* same as above, check only, don't mark */
 };
 
-static int cmp_subprogs(const void *a, const void *b)
+static int find_subprog(struct bpf_verifier_env *env, int insn_idx)
 {
-	return *(int *)a - *(int *)b;
-}
-
-static int find_subprog(struct bpf_verifier_env *env, int off)
-{
-	u32 *p;
+	struct bpf_insn_aux_data *aux;
+	int insn_cnt = env->prog->len;
+	u32 subprogno;
 
-	p = bsearch(&off, env->subprog_starts, env->subprog_cnt,
-		    sizeof(env->subprog_starts[0]), cmp_subprogs);
-	if (!p)
+	if (insn_idx >= insn_cnt || insn_idx < 0) {
+		verbose(env, "find_subprog of invalid insn_idx %d\n", insn_idx);
+		return -EINVAL;
+	}
+	aux = &env->insn_aux_data[insn_idx];
+	if (!aux->seen) /* haven't visited this line yet */
 		return -ENOENT;
-	return p - env->subprog_starts;
-
+	subprogno = aux->subprogno;
+	/* validate that we are at start of subprog */
+	if (env->subprog_info[subprogno].start != insn_idx) {
+		verbose(env, "insn_idx %d is in subprog %u but that starts at %d\n",
+			insn_idx, subprogno, env->subprog_info[subprogno].start);
+		return -EINVAL;
+	}
+	return subprogno;
 }
 
 static int add_subprog(struct bpf_verifier_env *env, int off)
 {
 	int insn_cnt = env->prog->len;
+	struct bpf_subprog_info *info;
 	int ret;
 
 	if (off >= insn_cnt || off < 0) {
 		verbose(env, "call to invalid destination\n");
 		return -EINVAL;
 	}
-	ret = find_subprog(env, off);
-	if (ret >= 0)
-		return 0;
 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
 		verbose(env, "too many subprograms\n");
 		return -E2BIG;
 	}
-	env->subprog_starts[env->subprog_cnt++] = off;
-	sort(env->subprog_starts, env->subprog_cnt,
-	     sizeof(env->subprog_starts[0]), cmp_subprogs, NULL);
-	return 0;
-}
-
-static int check_subprogs(struct bpf_verifier_env *env)
-{
-	int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
-	struct bpf_insn *insn = env->prog->insnsi;
-	int insn_cnt = env->prog->len;
-
-	/* determine subprog starts. The end is one before the next starts */
-	for (i = 0; i < insn_cnt; i++) {
-		if (insn[i].code != (BPF_JMP | BPF_CALL))
-			continue;
-		if (insn[i].src_reg != BPF_PSEUDO_CALL)
-			continue;
-		if (!env->allow_ptr_leaks) {
-			verbose(env, "function calls to other bpf functions are allowed for root only\n");
-			return -EPERM;
-		}
-		if (bpf_prog_is_dev_bound(env->prog->aux)) {
-			verbose(env, "function calls in offloaded programs are not supported yet\n");
-			return -EINVAL;
-		}
-		ret = add_subprog(env, i + insn[i].imm + 1);
-		if (ret < 0)
-			return ret;
-	}
-
-	if (env->log.level > 1)
-		for (i = 0; i < env->subprog_cnt; i++)
-			verbose(env, "func#%d @%d\n", i, env->subprog_starts[i]);
-
-	/* now check that all jumps are within the same subprog */
-	subprog_start = 0;
-	if (env->subprog_cnt == cur_subprog)
-		subprog_end = insn_cnt;
-	else
-		subprog_end = env->subprog_starts[cur_subprog++];
-	for (i = 0; i < insn_cnt; i++) {
-		u8 code = insn[i].code;
-
-		if (BPF_CLASS(code) != BPF_JMP)
-			goto next;
-		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
-			goto next;
-		off = i + insn[i].off + 1;
-		if (off < subprog_start || off >= subprog_end) {
-			verbose(env, "jump out of range from insn %d to %d\n", i, off);
-			return -EINVAL;
-		}
-next:
-		if (i == subprog_end - 1) {
-			/* to avoid fall-through from one subprog into another
-			 * the last insn of the subprog should be either exit
-			 * or unconditional jump back
-			 */
-			if (code != (BPF_JMP | BPF_EXIT) &&
-			    code != (BPF_JMP | BPF_JA)) {
-				verbose(env, "last insn is not an exit or jmp\n");
-				return -EINVAL;
-			}
-			subprog_start = subprog_end;
-			if (env->subprog_cnt == cur_subprog)
-				subprog_end = insn_cnt;
-			else
-				subprog_end = env->subprog_starts[cur_subprog++];
-		}
-	}
-	return 0;
+	ret = env->subprog_cnt++;
+	info = &env->subprog_info[ret];
+	info->start = off;
+	info->stack_depth = 0;
+	memset(info->callees, 0, sizeof(info->callees));
+	return ret;
 }
 
 static
@@ -1470,80 +1408,84 @@ static int update_stack_depth(struct bpf_verifier_env *env,
 			      const struct bpf_func_state *func,
 			      int off)
 {
-	u16 stack = env->subprog_stack_depth[func->subprogno];
+	struct bpf_subprog_info *info;
 
-	if (stack >= -off)
-		return 0;
+	if (!func) return -EFAULT;
+	if (func->subprogno >= BPF_MAX_SUBPROGS) return -E2BIG;
+	info = &env->subprog_info[func->subprogno];
 
 	/* update known max for given subprogram */
-	env->subprog_stack_depth[func->subprogno] = -off;
+	info->stack_depth = max_t(u16, info->stack_depth, -off);
 	return 0;
 }
 
-/* starting from main bpf function walk all instructions of the function
- * and recursively walk all callees that given function can call.
- * Ignore jump and exit insns.
- * Since recursion is prevented by check_cfg() this algorithm
- * only needs a local stack of MAX_CALL_FRAMES to remember callsites
+/* Topologically sort the call graph, and thereby determine the maximum stack
+ * depth of each subprog's worst-case call chain.  Store in total_stack_depth.
+ * The tsort is performed using Kahn's algorithm.
  */
 static int check_max_stack_depth(struct bpf_verifier_env *env)
 {
-	int depth = 0, frame = 0, subprog = 0, i = 0, subprog_end;
-	struct bpf_insn *insn = env->prog->insnsi;
-	int insn_cnt = env->prog->len;
-	int ret_insn[MAX_CALL_FRAMES];
-	int ret_prog[MAX_CALL_FRAMES];
-
-process_func:
-	/* round up to 32-bytes, since this is granularity
-	 * of interpreter stack size
-	 */
-	depth += round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
-	if (depth > MAX_BPF_STACK) {
-		verbose(env, "combined stack size of %d calls is %d. Too large\n",
-			frame + 1, depth);
-		return -EACCES;
-	}
-continue_func:
-	if (env->subprog_cnt == subprog)
-		subprog_end = insn_cnt;
-	else
-		subprog_end = env->subprog_starts[subprog];
-	for (; i < subprog_end; i++) {
-		if (insn[i].code != (BPF_JMP | BPF_CALL))
-			continue;
-		if (insn[i].src_reg != BPF_PSEUDO_CALL)
-			continue;
-		/* remember insn and function to return to */
-		ret_insn[frame] = i + 1;
-		ret_prog[frame] = subprog;
-
-		/* find the callee */
-		i = i + insn[i].imm + 1;
-		subprog = find_subprog(env, i);
-		if (subprog < 0) {
-			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
-				  i);
-			return -EFAULT;
+	DECLARE_BITMAP(frontier, BPF_MAX_SUBPROGS) = {0};
+	DECLARE_BITMAP(visited, BPF_MAX_SUBPROGS) = {0};
+	int subprog, i, j;
+
+	/* subprog 0 has no incoming edges, and should be the only such */
+	__set_bit(0, frontier);
+	env->subprog_info[0].total_stack_depth = env->subprog_info[0].stack_depth;
+
+	while (true) {
+		/* select a frontier node */
+		subprog = find_first_bit(frontier, BPF_MAX_SUBPROGS);
+		if (subprog >= BPF_MAX_SUBPROGS) /* frontier is empty, done */
+			break;
+		/* remove it from the frontier */
+		__clear_bit(subprog, frontier);
+		/* validate its total stack depth */
+		if (env->subprog_info[subprog].total_stack_depth > MAX_BPF_STACK) {
+			verbose(env, "combined stack size of calls to %d (at insn %d) is %d.  Too large\n",
+				subprog, env->subprog_info[subprog].start,
+				env->subprog_info[subprog].total_stack_depth);
+			return -EACCES;
 		}
-		subprog++;
-		frame++;
-		if (frame >= MAX_CALL_FRAMES) {
-			WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
-			return -EFAULT;
+		if (env->log.level > 1) {
+			verbose(env, "combined stack size of calls to %d (at insn %d) is %d\n",
+				subprog, env->subprog_info[subprog].start,
+				env->subprog_info[subprog].total_stack_depth);
+		}
+		__set_bit(subprog, visited);
+		/* for each callee */
+		for_each_set_bit(i, env->subprog_info[subprog].callees,
+				 BPF_MAX_SUBPROGS) {
+			/* round up to 32-bytes, since this is granularity of
+			 * interpreter stack size
+			 */
+			u16 stack_depth = round_up(max_t(u16, env->subprog_info[i].stack_depth, 1), 32);
+
+			/* Update callee total stack depth */
+			env->subprog_info[i].total_stack_depth = max_t(u16,
+				env->subprog_info[i].total_stack_depth,
+				env->subprog_info[subprog].total_stack_depth +
+				stack_depth);
+			/* does it have unvisited callers? */
+			for_each_clear_bit(j, visited, env->subprog_cnt) {
+				if (test_bit(i, env->subprog_info[j].callees))
+					break;
+			}
+			/* if not, add it to the frontier */
+			if (j >= env->subprog_cnt)
+				__set_bit(i, frontier);
 		}
-		goto process_func;
 	}
-	/* end of for() loop means the last insn of the 'subprog'
-	 * was reached. Doesn't matter whether it was JA or EXIT
-	 */
-	if (frame == 0)
-		return 0;
-	depth -= round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
-	frame--;
-	i = ret_insn[frame];
-	subprog = ret_prog[frame];
-	goto continue_func;
+
+	/* are any nodes left unvisited? */
+	subprog = find_first_zero_bit(visited, env->subprog_cnt);
+	if (subprog < env->subprog_cnt) {
+		/* then call graph is not acyclic, which shouldn't happen */
+		verbose(env, "verifier bug.  Call graph has a cycle including subprog %d (at insn %d)\n",
+			subprog, env->subprog_info[subprog].start);
+		return -EFAULT;
+	}
+	return 0;
 }
 
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
@@ -1558,8 +1500,7 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env,
 			  start);
 		return -EFAULT;
 	}
-	subprog++;
-	return env->subprog_stack_depth[subprog];
+	return env->subprog_info[subprog].stack_depth;
 }
 #endif
 
@@ -2097,7 +2038,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 	case BPF_FUNC_tail_call:
 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
 			goto error;
-		if (env->subprog_cnt) {
+		if (env->subprog_cnt > 1) {
 			verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
 			return -EINVAL;
 		}
@@ -2233,22 +2174,33 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 {
 	struct bpf_verifier_state *state = env->cur_state;
 	struct bpf_func_state *caller, *callee;
-	int i, subprog, target_insn;
+	int i, subprog, target;
 
 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
 		verbose(env, "the call stack of %d frames is too deep\n",
 			state->curframe + 2);
 		return -E2BIG;
 	}
-
-	target_insn = *insn_idx + insn->imm;
-	subprog = find_subprog(env, target_insn + 1);
-	if (subprog < 0) {
-		verbose(env, "verifier bug. No program starts at insn %d\n",
-			target_insn + 1);
-		return -EFAULT;
+	if (!env->allow_ptr_leaks) {
+		verbose(env, "function calls to other bpf functions are allowed for root only\n");
+		return -EPERM;
+	}
+	if (bpf_prog_is_dev_bound(env->prog->aux)) {
+		verbose(env, "function calls in offloaded programs are not supported yet\n");
+		return -EINVAL;
 	}
 
+
+	target = *insn_idx + insn->imm;
+	/* We will increment insn_idx (PC) in do_check() after handling this
+	 * call insn, so the actual start of the function is target + 1.
+	 */
+	subprog = find_subprog(env, target + 1);
+	if (subprog == -ENOENT)
+		subprog = add_subprog(env, target + 1);
+	if (subprog < 0)
+		return subprog;
+
 	caller = state->frame[state->curframe];
 	if (state->frame[state->curframe + 1]) {
 		verbose(env, "verifier bug. Frame %d already allocated\n",
@@ -2261,6 +2213,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		return -ENOMEM;
 	state->frame[state->curframe + 1] = callee;
 
+	/* record edge in call graph */
+	__set_bit(subprog, env->subprog_info[caller->subprogno].callees);
+
 	/* callee cannot access r0, r6 - r9 for reading and has to write
 	 * into its own stack before reading from it.
 	 * callee can read/write into caller's stack
@@ -2269,7 +2224,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			/* remember the callsite, it will be used by bpf_exit */
 			*insn_idx /* callsite */,
 			state->curframe + 1 /* frameno within this callchain */,
-			subprog + 1 /* subprog number within this prog */);
+			subprog /* subprog number within this prog */);
 
 	/* copy r1 - r5 args that callee can access */
 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
@@ -2285,7 +2240,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	state->curframe++;
 
 	/* and go analyze first insn of the callee */
-	*insn_idx = target_insn;
+	*insn_idx = target;
 
 	if (env->log.level) {
 		verbose(env, "caller:\n");
@@ -3828,13 +3783,13 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		return -EINVAL;
 	}
 
-	if (env->subprog_cnt) {
+	if (env->seen_pseudo_call) {
 		/* when program has LD_ABS insn JITs and interpreter assume
 		 * that r1 == ctx == skb which is not the case for callees
 		 * that can have arbitrary arguments. It's problematic
 		 * for main prog as well since JITs would need to analyze
 		 * all functions in order to make proper register save/restore
-		 * decisions in the main prog. Hence disallow LD_ABS with calls
+		 * decisions in the main prog. Hence disallow LD_ABS with calls.
 		 */
 		verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
 		return -EINVAL;
@@ -4016,10 +3971,6 @@ static int check_cfg(struct bpf_verifier_env *env)
 	int ret = 0;
 	int i, t;
 
-	ret = check_subprogs(env);
-	if (ret < 0)
-		return ret;
-
 	insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
 	if (!insn_state)
 		return -ENOMEM;
@@ -4053,6 +4004,7 @@ static int check_cfg(struct bpf_verifier_env *env)
 			if (t + 1 < insn_cnt)
 				env->explored_states[t + 1] = STATE_LIST_MARK;
 			if (insns[t].src_reg == BPF_PSEUDO_CALL) {
+				env->seen_pseudo_call = true;
 				env->explored_states[t] = STATE_LIST_MARK;
 				ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
 				if (ret == 1)
@@ -4534,6 +4486,52 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	return 0;
 }
 
+/* Checks that all subprogs are contiguous */
+static int validate_subprogs(struct bpf_verifier_env *env)
+{
+	int cur_subprog = -1, i;
+
+	for (i = 0; i < env->prog->len; i++) {
+		struct bpf_insn_aux_data *aux = &env->insn_aux_data[i];
+
+		if (!aux->seen) {
+			if (cur_subprog < 0) { /* can't happen */
+				verbose(env, "unseen insn %d before subprog\n",
+					i);
+				return -EINVAL;
+			}
+			/* unreachable code belongs to the subprog in which it
+			 * is embedded.  Otherwise a forward jump could create
+			 * an apparently non-contiguous subprog.
+			 */
+			aux->subprogno = cur_subprog;
+			continue;
+		}
+		if (aux->subprogno != cur_subprog) {
+			/* change of subprog should only happen at start of
+			 * new subprog, since subprog must start with entry
+			 * point & be contiguous thereafter.
+			 */
+			if (unlikely(aux->subprogno >= env->subprog_cnt)) {
+				/* can't happen */
+				verbose(env, "verifier ran off subprog array, "
+					"%u >= %u\n",
+					aux->subprogno, env->subprog_cnt);
+				return -EINVAL;
+			}
+			if (env->subprog_info[aux->subprogno].start != i) {
+				verbose(env, "subprog %u is non-contiguous at %d\n",
+					aux->subprogno, i);
+				return -EINVAL;
+			}
+			/* entered new subprog */
+			cur_subprog = aux->subprogno;
+		}
+	}
+
+	return 0;
+}
+
 static int do_check(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state *state;
@@ -4542,6 +4540,7 @@ static int do_check(struct bpf_verifier_env *env)
 	int insn_cnt = env->prog->len, i;
 	int insn_idx, prev_insn_idx = 0;
 	int insn_processed = 0;
+	int mainprogno;
 	bool do_print_state = false;
 
 	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
@@ -4555,12 +4554,17 @@ static int do_check(struct bpf_verifier_env *env)
 		return -ENOMEM;
 	}
 	env->cur_state = state;
+	mainprogno = add_subprog(env, 0);
+	if (mainprogno < 0)
+		return mainprogno;
 	init_func_state(env, state->frame[0],
 			BPF_MAIN_FUNC /* callsite */,
 			0 /* frameno */,
-			0 /* subprogno, zero == main subprog */);
+			mainprogno /* subprogno */);
 	insn_idx = 0;
 	for (;;) {
+		struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+		struct bpf_func_state *frame = cur_frame(env);
 		struct bpf_insn *insn;
 		u8 class;
 		int err;
@@ -4581,6 +4585,17 @@ static int do_check(struct bpf_verifier_env *env)
 			return -E2BIG;
 		}
 
+		/* check the insn doesn't appear in multiple subprogs, which
+		 * would imply bad function calls/returns/jumps
+		 */
+		if (aux->seen && aux->subprogno != frame->subprogno) {
+			verbose(env, "insn %d was in subprog %u, now %u\n",
+				insn_idx, aux->subprogno, frame->subprogno);
+			return -EINVAL;
+		}
+		aux->subprogno = frame->subprogno;
+		aux->seen = true;
+
 		err = is_state_visited(env, insn_idx);
 		if (err < 0)
 			return err;
@@ -4605,7 +4620,7 @@ static int do_check(struct bpf_verifier_env *env)
 			else
 				verbose(env, "\nfrom %d to %d:",
 					prev_insn_idx, insn_idx);
-			print_verifier_state(env, state->frame[state->curframe]);
+			print_verifier_state(env, frame);
 			do_print_state = false;
 		}
 
@@ -4627,7 +4642,6 @@ static int do_check(struct bpf_verifier_env *env)
 		}
 
 		regs = cur_regs(env);
-		env->insn_aux_data[insn_idx].seen = true;
 		if (class == BPF_ALU || class == BPF_ALU64) {
 			err = check_alu_op(env, insn);
 			if (err)
@@ -4843,7 +4857,15 @@ static int do_check(struct bpf_verifier_env *env)
 					return err;
 
 				insn_idx++;
-				env->insn_aux_data[insn_idx].seen = true;
+				/* Mark second half of LD_IMM64 insn */
+				aux++;
+				if (aux->seen && aux->subprogno != frame->subprogno) {
+					verbose(env, "insn %d was in subprog %u, now %u\n",
+						insn_idx, aux->subprogno, frame->subprogno);
+					return -EINVAL;
+				}
+				aux->subprogno = frame->subprogno;
+				aux->seen = true;
 			} else {
 				verbose(env, "invalid BPF_LD mode\n");
 				return -EINVAL;
@@ -4858,16 +4880,16 @@ static int do_check(struct bpf_verifier_env *env)
 
 	verbose(env, "processed %d insns (limit %d), stack depth ",
 		insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
-	for (i = 0; i < env->subprog_cnt + 1; i++) {
-		u32 depth = env->subprog_stack_depth[i];
+	for (i = 0; i < env->subprog_cnt; i++) {
+		u32 depth = env->subprog_info[i].stack_depth;
 
-		verbose(env, "%d", depth);
-		if (i + 1 < env->subprog_cnt + 1)
+		if (i)
 			verbose(env, "+");
+		verbose(env, "%d", depth);
 	}
 	verbose(env, "\n");
-	env->prog->aux->stack_depth = env->subprog_stack_depth[0];
-	return 0;
+	env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
+	return validate_subprogs(env);
 }
 
 static int check_map_prealloc(struct bpf_map *map)
@@ -5059,8 +5081,10 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
 	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
 	memcpy(new_data + off + cnt - 1, old_data + off,
 	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
-	for (i = off; i < off + cnt - 1; i++)
+	for (i = off; i < off + cnt - 1; i++) {
 		new_data[i].seen = true;
+		new_data[i].subprogno = old_data[off].subprogno;
+	}
 	env->insn_aux_data = new_data;
 	vfree(old_data);
 	return 0;
@@ -5073,9 +5097,9 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len
 	if (len == 1)
 		return;
 	for (i = 0; i < env->subprog_cnt; i++) {
-		if (env->subprog_starts[i] < off)
+		if (env->subprog_info[i].start < off)
 			continue;
-		env->subprog_starts[i] += len - 1;
+		env->subprog_info[i].start += len - 1;
 	}
 }
 
@@ -5234,15 +5258,19 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 static int jit_subprogs(struct bpf_verifier_env *env)
 {
 	struct bpf_prog *prog = env->prog, **func, *tmp;
-	int i, j, subprog_start, subprog_end = 0, len, subprog;
+	int i, j, subprog;
 	struct bpf_insn *insn;
 	void *old_bpf_func;
 	int err = -ENOMEM;
 
-	if (env->subprog_cnt == 0)
+	if (env->subprog_cnt <= 1)
 		return 0;
 
+	for (i = 0; i <= env->subprog_cnt; i++)
+		env->subprog_info[i].len = 0;
+
 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+		env->subprog_info[env->insn_aux_data[i].subprogno].len++;
 		if (insn->code != (BPF_JMP | BPF_CALL) ||
 		    insn->src_reg != BPF_PSEUDO_CALL)
 			continue;
@@ -5255,7 +5283,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		/* temporarily remember subprog id inside insn instead of
 		 * aux_data, since next loop will split up all insns into funcs
 		 */
-		insn->off = subprog + 1;
+		insn->off = subprog;
 		/* remember original imm in case JIT fails and fallback
 		 * to interpreter will be needed
 		 */
@@ -5264,25 +5292,37 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		insn->imm = 1;
 	}
 
-	func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL);
+	func = kzalloc(sizeof(prog) * env->subprog_cnt, GFP_KERNEL);
 	if (!func)
 		return -ENOMEM;
 
-	for (i = 0; i <= env->subprog_cnt; i++) {
-		subprog_start = subprog_end;
-		if (env->subprog_cnt == i)
-			subprog_end = prog->len;
-		else
-			subprog_end = env->subprog_starts[i];
+	for (i = 0; i < env->subprog_cnt; i++) {
+		struct bpf_subprog_info *info = &env->subprog_info[i];
+		int k = 0;
 
-		len = subprog_end - subprog_start;
-		func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
+		func[i] = bpf_prog_alloc(bpf_prog_size(info->len), GFP_USER);
 		if (!func[i])
 			goto out_free;
-		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
-		       len * sizeof(struct bpf_insn));
+
+		for (j = 0; j < prog->len; j++) {
+			if (env->insn_aux_data[j].subprogno != i)
+				continue;
+			if (WARN_ON_ONCE(k >= info->len)) {
+				verbose(env, "Tried to add insn %d to subprog %d but previously counted %d\n",
+					k, i, info->len);
+				err = -EFAULT;
+				goto out_free;
+			}
+			memcpy(func[i]->insnsi + k++, prog->insnsi + j,
+			       sizeof(struct bpf_insn));
+		}
+		if (WARN_ON_ONCE(k != info->len)) {
+			verbose(env, "Only %d insns in subprog %d but previously counted %d\n",
+				k, i, info->len);
+		}
+
 		func[i]->type = prog->type;
-		func[i]->len = len;
+		func[i]->len = info->len;
 		if (bpf_prog_calc_tag(func[i]))
 			goto out_free;
 		func[i]->is_func = 1;
@@ -5290,7 +5330,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		 * Long term would need debug info to populate names
 		 */
 		func[i]->aux->name[0] = 'F';
-		func[i]->aux->stack_depth = env->subprog_stack_depth[i];
+		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
 		func[i]->jit_requested = 1;
 		func[i] = bpf_int_jit_compile(func[i]);
 		if (!func[i]->jited) {
@@ -5303,7 +5343,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 	 * now populate all bpf_calls with correct addresses and
 	 * run last pass of JIT
 	 */
-	for (i = 0; i <= env->subprog_cnt; i++) {
+	for (i = 0; i < env->subprog_cnt; i++) {
 		insn = func[i]->insnsi;
 		for (j = 0; j < func[i]->len; j++, insn++) {
 			if (insn->code != (BPF_JMP | BPF_CALL) ||
@@ -5311,12 +5351,16 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 				continue;
 			subprog = insn->off;
 			insn->off = 0;
+			if (subprog < 0 || subprog >= env->subprog_cnt) {
+				err = -EFAULT;
+				goto out_free;
+			}
 			insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
 				func[subprog]->bpf_func -
 				__bpf_call_base;
 		}
 	}
-	for (i = 0; i <= env->subprog_cnt; i++) {
+	for (i = 0; i < env->subprog_cnt; i++) {
 		old_bpf_func = func[i]->bpf_func;
 		tmp = bpf_int_jit_compile(func[i]);
 		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
@@ -5330,7 +5374,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 	/* finally lock prog and jit images for all functions and
 	 * populate kallsysm
 	 */
-	for (i = 0; i <= env->subprog_cnt; i++) {
+	for (i = 0; i < env->subprog_cnt; i++) {
 		bpf_prog_lock_ro(func[i]);
 		bpf_prog_kallsyms_add(func[i]);
 	}
@@ -5347,7 +5391,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 			continue;
 		insn->off = env->insn_aux_data[i].call_imm;
 		subprog = find_subprog(env, i + insn->off + 1);
-		addr  = (unsigned long)func[subprog + 1]->bpf_func;
+		addr  = (unsigned long)func[subprog]->bpf_func;
 		addr &= PAGE_MASK;
 		insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
 			    addr - __bpf_call_base;
@@ -5356,10 +5400,10 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 	prog->jited = 1;
 	prog->bpf_func = func[0]->bpf_func;
 	prog->aux->func = func;
-	prog->aux->func_cnt = env->subprog_cnt + 1;
+	prog->aux->func_cnt = env->subprog_cnt;
 	return 0;
 out_free:
-	for (i = 0; i <= env->subprog_cnt; i++)
+	for (i = 0; i < env->subprog_cnt; i++)
 		if (func[i])
 			bpf_jit_free(func[i]);
 	kfree(func);
@@ -5389,6 +5433,7 @@ static int fixup_call_args(struct bpf_verifier_env *env)
 		err = jit_subprogs(env);
 		if (err == 0)
 			return 0;
+		verbose(env, "failed to jit_subprogs %d\n", err);
 	}
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
 	for (i = 0; i < prog->len; i++, insn++) {

From patchwork Thu Mar 29 22:46:17 2018
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Edward Cree <ecree@solarflare.com>
X-Patchwork-Id: 893056
X-Patchwork-Delegate: bpf@iogearbox.net
Return-Path: <netdev-owner@vger.kernel.org>
X-Original-To: patchwork-incoming-netdev@ozlabs.org
Delivered-To: patchwork-incoming-netdev@ozlabs.org
Authentication-Results: ozlabs.org;
	spf=none (mailfrom) smtp.mailfrom=vger.kernel.org
	(client-ip=209.132.180.67; helo=vger.kernel.org;
	envelope-from=netdev-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none)
	header.from=solarflare.com
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 40C0Gr4P2Xz9s25
	for <patchwork-incoming-netdev@ozlabs.org>;
	Fri, 30 Mar 2018 09:46:28 +1100 (AEDT)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1751968AbeC2WqX (ORCPT
	<rfc822;patchwork-incoming-netdev@ozlabs.org>);
	Thu, 29 Mar 2018 18:46:23 -0400
Received: from dispatch1-us1.ppe-hosted.com ([67.231.154.164]:49540 "EHLO
	dispatch1-us1.ppe-hosted.com" rhost-flags-OK-OK-OK-OK)
	by vger.kernel.org with ESMTP id S1751865AbeC2WqW (ORCPT
	<rfc822;netdev@vger.kernel.org>); Thu, 29 Mar 2018 18:46:22 -0400
X-Virus-Scanned: Proofpoint Essentials engine
Received: from webmail.solarflare.com (webmail.solarflare.com
	[12.187.104.26])
	(using TLSv1 with cipher ECDHE-RSA-AES256-SHA (256/256 bits))
	(No client certificate requested)
	by mx1-us3.ppe-hosted.com (Proofpoint Essentials ESMTP Server) with
	ESMTPS id C84AEB8005D; Thu, 29 Mar 2018 22:46:21 +0000 (UTC)
Received: from ec-desktop.uk.solarflarecom.com (10.17.20.45) by
	ocex03.SolarFlarecom.com (10.20.40.36) with Microsoft SMTP Server
	(TLS) id 15.0.1044.25; Thu, 29 Mar 2018 15:46:18 -0700
From: Edward Cree <ecree@solarflare.com>
Subject: [PATCH v2 bpf-next 2/3] bpf/verifier: update selftests
To: Alexei Starovoitov <alexei.starovoitov@gmail.com>,
	Daniel Borkmann <daniel@iogearbox.net>
CC: <netdev@vger.kernel.org>
References: <d13f6b60-3632-3e4d-111a-2edb4198b473@solarflare.com>
Message-ID: <67020a72-c8a5-76be-1357-e7a0f8def9fd@solarflare.com>
Date: Thu, 29 Mar 2018 23:46:17 +0100
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101
	Thunderbird/52.5.2
MIME-Version: 1.0
In-Reply-To: <d13f6b60-3632-3e4d-111a-2edb4198b473@solarflare.com>
Content-Language: en-GB
X-Originating-IP: [10.17.20.45]
X-MDID: 1522363582-moKTtGtiif40
Sender: netdev-owner@vger.kernel.org
Precedence: bulk
List-ID: <netdev.vger.kernel.org>
X-Mailing-List: netdev@vger.kernel.org

Error messages for some bad programs have changed, partly because we now
 check for loops / out-of-bounds jumps before checking subprogs.
Also added a test ("calls: interleaved functions") to ensure that subprogs
 are required to be contiguous.
It wasn't entirely clear to me what "calls: wrong recursive calls" was
 meant to test for, since all of the JMP|CALL insns are unreachable.  I've
 changed it so that they are now reachable, which causes static back-edges
 to be detected (since that, like insn reachability, is now tested before
 subprog boundaries are determined).

Signed-off-by: Edward Cree <ecree@solarflare.com>
---
 tools/testing/selftests/bpf/test_verifier.c | 51 ++++++++++++++++++-----------
 1 file changed, 32 insertions(+), 19 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3e7718b1a9ae..cc45a0b52439 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -646,7 +646,7 @@ static struct bpf_test tests[] = {
 		.insns = {
 			BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2),
 		},
-		.errstr = "not an exit",
+		.errstr = "jump out of range",
 		.result = REJECT,
 	},
 	{
@@ -9442,13 +9442,13 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-		.errstr = "last insn is not an exit or jmp",
+		.errstr = "insn 1 was in subprog 1, now 0",
 		.result = REJECT,
 	},
 	{
 		"calls: wrong recursive calls",
 		.insns = {
-			BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 3),
 			BPF_JMP_IMM(BPF_JA, 0, 0, 4),
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
@@ -9457,7 +9457,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-		.errstr = "jump out of range",
+		.errstr = "back-edge from insn",
 		.result = REJECT,
 	},
 	{
@@ -9508,7 +9508,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-		.errstr = "jump out of range",
+		.errstr = "insn 5 was in subprog 1, now 0",
 		.result = REJECT,
 	},
 	{
@@ -9787,7 +9787,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.errstr = "jump out of range from insn 1 to 4",
+		.errstr = "insn 5 was in subprog 1, now 0",
 		.result = REJECT,
 	},
 	{
@@ -9803,13 +9803,12 @@ static struct bpf_test tests[] = {
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
 			BPF_EXIT_INSN(),
-			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
-				    offsetof(struct __sk_buff, len)),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3),
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-		.errstr = "jump out of range from insn 11 to 9",
+		.errstr = "insn 9 was in subprog 1, now 2",
 		.result = REJECT,
 	},
 	{
@@ -9861,7 +9860,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-		.errstr = "invalid destination",
+		.errstr = "jump out of range from insn 2 to -1",
 		.result = REJECT,
 	},
 	{
@@ -9873,7 +9872,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-		.errstr = "invalid destination",
+		.errstr = "jump out of range from insn 2 to -2147483646",
 		.result = REJECT,
 	},
 	{
@@ -9886,7 +9885,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-		.errstr = "jump out of range",
+		.errstr = "insn 1 was in subprog 0, now 1",
 		.result = REJECT,
 	},
 	{
@@ -9899,7 +9898,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-		.errstr = "jump out of range",
+		.errstr = "insn 4 was in subprog 1, now 0",
 		.result = REJECT,
 	},
 	{
@@ -9913,7 +9912,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -2),
 		},
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-		.errstr = "not an exit",
+		.errstr = "jump out of range from insn 5 to 6",
 		.result = REJECT,
 	},
 	{
@@ -9927,7 +9926,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-		.errstr = "last insn",
+		.errstr = "insn_idx 5 is in subprog 1 but that starts at 4",
 		.result = REJECT,
 	},
 	{
@@ -9942,7 +9941,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-		.errstr = "last insn",
+		.errstr = "insn_idx 5 is in subprog 1 but that starts at 4",
 		.result = REJECT,
 	},
 	{
@@ -9982,12 +9981,11 @@ static struct bpf_test tests[] = {
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_0),
-			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
-				    offsetof(struct __sk_buff, len)),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-		.errstr = "not an exit",
+		.errstr = "insn 10 was in subprog 2, now 1",
 		.result = REJECT,
 	},
 	{
@@ -11423,6 +11421,21 @@ static struct bpf_test tests[] = {
 		.errstr = "BPF_XADD stores into R2 packet",
 		.prog_type = BPF_PROG_TYPE_XDP,
 	},
+	{
+		"calls: interleaved functions",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "subprog 0 is non-contiguous at 5",
+		.result = REJECT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)

From patchwork Thu Mar 29 22:46:36 2018
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Edward Cree <ecree@solarflare.com>
X-Patchwork-Id: 893057
X-Patchwork-Delegate: bpf@iogearbox.net
Return-Path: <netdev-owner@vger.kernel.org>
X-Original-To: patchwork-incoming-netdev@ozlabs.org
Delivered-To: patchwork-incoming-netdev@ozlabs.org
Authentication-Results: ozlabs.org;
	spf=none (mailfrom) smtp.mailfrom=vger.kernel.org
	(client-ip=209.132.180.67; helo=vger.kernel.org;
	envelope-from=netdev-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none)
	header.from=solarflare.com
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 40C0HC2yY4z9s0y
	for <patchwork-incoming-netdev@ozlabs.org>;
	Fri, 30 Mar 2018 09:46:47 +1100 (AEDT)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1752072AbeC2Wqn (ORCPT
	<rfc822;patchwork-incoming-netdev@ozlabs.org>);
	Thu, 29 Mar 2018 18:46:43 -0400
Received: from dispatch1-us1.ppe-hosted.com ([67.231.154.164]:50288 "EHLO
	dispatch1-us1.ppe-hosted.com" rhost-flags-OK-OK-OK-OK)
	by vger.kernel.org with ESMTP id S1751191AbeC2Wqm (ORCPT
	<rfc822;netdev@vger.kernel.org>); Thu, 29 Mar 2018 18:46:42 -0400
X-Virus-Scanned: Proofpoint Essentials engine
Received: from webmail.solarflare.com (webmail.solarflare.com
	[12.187.104.26])
	(using TLSv1 with cipher ECDHE-RSA-AES256-SHA (256/256 bits))
	(No client certificate requested)
	by mx1-us3.ppe-hosted.com (Proofpoint Essentials ESMTP Server) with
	ESMTPS id E778E980066; Thu, 29 Mar 2018 22:46:40 +0000 (UTC)
Received: from ec-desktop.uk.solarflarecom.com (10.17.20.45) by
	ocex03.SolarFlarecom.com (10.20.40.36) with Microsoft SMTP Server
	(TLS) id 15.0.1044.25; Thu, 29 Mar 2018 15:46:37 -0700
From: Edward Cree <ecree@solarflare.com>
Subject: [PATCH v2 bpf-next 3/3] bpf/verifier: per-register parent pointers
To: Alexei Starovoitov <alexei.starovoitov@gmail.com>,
	Daniel Borkmann <daniel@iogearbox.net>
CC: <netdev@vger.kernel.org>
References: <d13f6b60-3632-3e4d-111a-2edb4198b473@solarflare.com>
Message-ID: <eb2a8b89-5b99-cfc1-5c40-f7a83da25985@solarflare.com>
Date: Thu, 29 Mar 2018 23:46:36 +0100
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101
	Thunderbird/52.5.2
MIME-Version: 1.0
In-Reply-To: <d13f6b60-3632-3e4d-111a-2edb4198b473@solarflare.com>
Content-Language: en-GB
X-Originating-IP: [10.17.20.45]
X-MDID: 1522363601-vLJ4YtmCfR7W
Sender: netdev-owner@vger.kernel.org
Precedence: bulk
List-ID: <netdev.vger.kernel.org>
X-Mailing-List: netdev@vger.kernel.org

By giving each register its own liveness chain, we elide the skip_callee()
 logic.  Instead, each register's parent is the state it inherits from;
 both check_func_call() and prepare_func_exit() automatically connect
 reg states to the correct chain since when they copy the reg state across
 (r1-r5 into the callee as args, and r0 out as the return value) they also
 copy the parent pointer.

Signed-off-by: Edward Cree <ecree@solarflare.com>
---
 include/linux/bpf_verifier.h |   8 +-
 kernel/bpf/verifier.c        | 180 ++++++++++---------------------------------
 2 files changed, 45 insertions(+), 143 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 3af3f9cceede..2ec31b388dd6 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -41,6 +41,7 @@ enum bpf_reg_liveness {
 };
 
 struct bpf_reg_state {
+	/* Ordering of fields matters.  See states_equal() */
 	enum bpf_reg_type type;
 	union {
 		/* valid when type == PTR_TO_PACKET */
@@ -59,7 +60,6 @@ struct bpf_reg_state {
 	 * came from, when one is tested for != NULL.
 	 */
 	u32 id;
-	/* Ordering of fields matters.  See states_equal() */
 	/* For scalar types (SCALAR_VALUE), this represents our knowledge of
 	 * the actual value.
 	 * For pointer types, this represents the variable part of the offset
@@ -76,15 +76,15 @@ struct bpf_reg_state {
 	s64 smax_value; /* maximum possible (s64)value */
 	u64 umin_value; /* minimum possible (u64)value */
 	u64 umax_value; /* maximum possible (u64)value */
+	/* parentage chain for liveness checking */
+	struct bpf_reg_state *parent;
 	/* Inside the callee two registers can be both PTR_TO_STACK like
 	 * R1=fp-8 and R2=fp-8, but one of them points to this function stack
 	 * while another to the caller's stack. To differentiate them 'frameno'
 	 * is used which is an index in bpf_verifier_state->frame[] array
 	 * pointing to bpf_func_state.
-	 * This field must be second to last, for states_equal() reasons.
 	 */
 	u32 frameno;
-	/* This field must be last, for states_equal() reasons. */
 	enum bpf_reg_liveness live;
 };
 
@@ -107,7 +107,6 @@ struct bpf_stack_state {
  */
 struct bpf_func_state {
 	struct bpf_reg_state regs[MAX_BPF_REG];
-	struct bpf_verifier_state *parent;
 	/* index of call instruction that called into this func */
 	int callsite;
 	/* stack frame number of this function state from pov of
@@ -129,7 +128,6 @@ struct bpf_func_state {
 struct bpf_verifier_state {
 	/* call stack tracking */
 	struct bpf_func_state *frame[MAX_CALL_FRAMES];
-	struct bpf_verifier_state *parent;
 	u32 curframe;
 };
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 33963357a7ef..edb2ec0da95c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -355,9 +355,9 @@ static int copy_stack_state(struct bpf_func_state *dst,
 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
  * make it consume minimal amount of memory. check_stack_write() access from
  * the program calls into realloc_func_state() to grow the stack size.
- * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
- * which this function copies over. It points to previous bpf_verifier_state
- * which is never reallocated
+ * Note there is a non-zero parent pointer inside each reg of bpf_verifier_state
+ * which this function copies over. It points to corresponding reg in previous
+ * bpf_verifier_state which is never reallocated
  */
 static int realloc_func_state(struct bpf_func_state *state, int size,
 			      bool copy_old)
@@ -441,7 +441,6 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 		dst_state->frame[i] = NULL;
 	}
 	dst_state->curframe = src->curframe;
-	dst_state->parent = src->parent;
 	for (i = 0; i <= src->curframe; i++) {
 		dst = dst_state->frame[i];
 		if (!dst) {
@@ -707,6 +706,7 @@ static void init_reg_state(struct bpf_verifier_env *env,
 	for (i = 0; i < MAX_BPF_REG; i++) {
 		mark_reg_not_init(env, regs, i);
 		regs[i].live = REG_LIVE_NONE;
+		regs[i].parent = NULL;
 	}
 
 	/* frame pointer */
@@ -781,74 +781,21 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
 	return ret;
 }
 
-static
-struct bpf_verifier_state *skip_callee(struct bpf_verifier_env *env,
-				       const struct bpf_verifier_state *state,
-				       struct bpf_verifier_state *parent,
-				       u32 regno)
-{
-	struct bpf_verifier_state *tmp = NULL;
-
-	/* 'parent' could be a state of caller and
-	 * 'state' could be a state of callee. In such case
-	 * parent->curframe < state->curframe
-	 * and it's ok for r1 - r5 registers
-	 *
-	 * 'parent' could be a callee's state after it bpf_exit-ed.
-	 * In such case parent->curframe > state->curframe
-	 * and it's ok for r0 only
-	 */
-	if (parent->curframe == state->curframe ||
-	    (parent->curframe < state->curframe &&
-	     regno >= BPF_REG_1 && regno <= BPF_REG_5) ||
-	    (parent->curframe > state->curframe &&
-	       regno == BPF_REG_0))
-		return parent;
-
-	if (parent->curframe > state->curframe &&
-	    regno >= BPF_REG_6) {
-		/* for callee saved regs we have to skip the whole chain
-		 * of states that belong to callee and mark as LIVE_READ
-		 * the registers before the call
-		 */
-		tmp = parent;
-		while (tmp && tmp->curframe != state->curframe) {
-			tmp = tmp->parent;
-		}
-		if (!tmp)
-			goto bug;
-		parent = tmp;
-	} else {
-		goto bug;
-	}
-	return parent;
-bug:
-	verbose(env, "verifier bug regno %d tmp %p\n", regno, tmp);
-	verbose(env, "regno %d parent frame %d current frame %d\n",
-		regno, parent->curframe, state->curframe);
-	return NULL;
-}
-
+/* Parentage chain of this register (or stack slot) should take care of all
+ * issues like callee-saved registers, stack slot allocation time, etc.
+ */
 static int mark_reg_read(struct bpf_verifier_env *env,
-			 const struct bpf_verifier_state *state,
-			 struct bpf_verifier_state *parent,
-			 u32 regno)
+			 const struct bpf_reg_state *state,
+			 struct bpf_reg_state *parent)
 {
 	bool writes = parent == state->parent; /* Observe write marks */
 
-	if (regno == BPF_REG_FP)
-		/* We don't need to worry about FP liveness because it's read-only */
-		return 0;
-
 	while (parent) {
 		/* if read wasn't screened by an earlier write ... */
-		if (writes && state->frame[state->curframe]->regs[regno].live & REG_LIVE_WRITTEN)
+		if (writes && state->live & REG_LIVE_WRITTEN)
 			break;
-		parent = skip_callee(env, state, parent, regno);
-		if (!parent)
-			return -EFAULT;
 		/* ... then we depend on parent's value */
-		parent->frame[parent->curframe]->regs[regno].live |= REG_LIVE_READ;
+		parent->live |= REG_LIVE_READ;
 		state = parent;
 		parent = state->parent;
 		writes = true;
@@ -874,7 +821,10 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
 			verbose(env, "R%d !read_ok\n", regno);
 			return -EACCES;
 		}
-		return mark_reg_read(env, vstate, vstate->parent, regno);
+		/* We don't need to worry about FP liveness because it's read-only */
+		if (regno != BPF_REG_FP)
+			return mark_reg_read(env, &regs[regno],
+					     regs[regno].parent);
 	} else {
 		/* check whether register used as dest operand can be written to */
 		if (regno == BPF_REG_FP) {
@@ -986,61 +936,6 @@ static int check_stack_write(struct bpf_verifier_env *env,
 	return 0;
 }
 
-/* registers of every function are unique and mark_reg_read() propagates
- * the liveness in the following cases:
- * - from callee into caller for R1 - R5 that were used as arguments
- * - from caller into callee for R0 that used as result of the call
- * - from caller to the same caller skipping states of the callee for R6 - R9,
- *   since R6 - R9 are callee saved by implicit function prologue and
- *   caller's R6 != callee's R6, so when we propagate liveness up to
- *   parent states we need to skip callee states for R6 - R9.
- *
- * stack slot marking is different, since stacks of caller and callee are
- * accessible in both (since caller can pass a pointer to caller's stack to
- * callee which can pass it to another function), hence mark_stack_slot_read()
- * has to propagate the stack liveness to all parent states at given frame number.
- * Consider code:
- * f1() {
- *   ptr = fp - 8;
- *   *ptr = ctx;
- *   call f2 {
- *      .. = *ptr;
- *   }
- *   .. = *ptr;
- * }
- * First *ptr is reading from f1's stack and mark_stack_slot_read() has
- * to mark liveness at the f1's frame and not f2's frame.
- * Second *ptr is also reading from f1's stack and mark_stack_slot_read() has
- * to propagate liveness to f2 states at f1's frame level and further into
- * f1 states at f1's frame level until write into that stack slot
- */
-static void mark_stack_slot_read(struct bpf_verifier_env *env,
-				 const struct bpf_verifier_state *state,
-				 struct bpf_verifier_state *parent,
-				 int slot, int frameno)
-{
-	bool writes = parent == state->parent; /* Observe write marks */
-
-	while (parent) {
-		if (parent->frame[frameno]->allocated_stack <= slot * BPF_REG_SIZE)
-			/* since LIVE_WRITTEN mark is only done for full 8-byte
-			 * write the read marks are conservative and parent
-			 * state may not even have the stack allocated. In such case
-			 * end the propagation, since the loop reached beginning
-			 * of the function
-			 */
-			break;
-		/* if read wasn't screened by an earlier write ... */
-		if (writes && state->frame[frameno]->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
-			break;
-		/* ... then we depend on parent's value */
-		parent->frame[frameno]->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
-		state = parent;
-		parent = state->parent;
-		writes = true;
-	}
-}
-
 static int check_stack_read(struct bpf_verifier_env *env,
 			    struct bpf_func_state *reg_state /* func where register points to */,
 			    int off, int size, int value_regno)
@@ -1078,8 +973,8 @@ static int check_stack_read(struct bpf_verifier_env *env,
 			 */
 			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
 		}
-		mark_stack_slot_read(env, vstate, vstate->parent, spi,
-				     reg_state->frameno);
+		mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
+			      reg_state->stack[spi].spilled_ptr.parent);
 		return 0;
 	} else {
 		int zeros = 0;
@@ -1095,8 +990,8 @@ static int check_stack_read(struct bpf_verifier_env *env,
 				off, i, size);
 			return -EACCES;
 		}
-		mark_stack_slot_read(env, vstate, vstate->parent, spi,
-				     reg_state->frameno);
+		mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
+			      reg_state->stack[spi].spilled_ptr.parent);
 		if (value_regno >= 0) {
 			if (zeros == size) {
 				/* any size read into register is zero extended,
@@ -1783,8 +1678,8 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 		/* reading any byte out of 8-byte 'spill_slot' will cause
 		 * the whole slot to be marked as 'read'
 		 */
-		mark_stack_slot_read(env, env->cur_state, env->cur_state->parent,
-				     spi, state->frameno);
+		mark_reg_read(env, &state->stack[spi].spilled_ptr,
+			      state->stack[spi].spilled_ptr.parent);
 	}
 	return update_stack_depth(env, state, off);
 }
@@ -2226,11 +2121,13 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			state->curframe + 1 /* frameno within this callchain */,
 			subprog /* subprog number within this prog */);
 
-	/* copy r1 - r5 args that callee can access */
+	/* copy r1 - r5 args that callee can access.  The copy includes parent
+	 * pointers, which connects us up to the liveness chain
+	 */
 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
 		callee->regs[i] = caller->regs[i];
 
-	/* after the call regsiters r0 - r5 were scratched */
+	/* after the call registers r0 - r5 were scratched */
 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
 		mark_reg_not_init(env, caller->regs, caller_saved[i]);
 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
@@ -4136,7 +4033,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 		/* explored state didn't use this */
 		return true;
 
-	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, frameno)) == 0;
+	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
 
 	if (rold->type == PTR_TO_STACK)
 		/* two stack pointers are equal only if they're pointing to
@@ -4369,7 +4266,7 @@ static bool states_equal(struct bpf_verifier_env *env,
  * equivalent state (jump target or such) we didn't arrive by the straight-line
  * code, so read marks in the state must propagate to the parent regardless
  * of the state's write marks. That's what 'parent == state->parent' comparison
- * in mark_reg_read() and mark_stack_slot_read() is for.
+ * in mark_reg_read() is for.
  */
 static int propagate_liveness(struct bpf_verifier_env *env,
 			      const struct bpf_verifier_state *vstate,
@@ -4390,7 +4287,8 @@ static int propagate_liveness(struct bpf_verifier_env *env,
 		if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
 			continue;
 		if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
-			err = mark_reg_read(env, vstate, vparent, i);
+			err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i],
+					    &vparent->frame[vstate->curframe]->regs[i]);
 			if (err)
 				return err;
 		}
@@ -4405,7 +4303,8 @@ static int propagate_liveness(struct bpf_verifier_env *env,
 			if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
 				continue;
 			if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
-				mark_stack_slot_read(env, vstate, vparent, i, frame);
+				mark_reg_read(env, &state->stack[i].spilled_ptr,
+					      &parent->stack[i].spilled_ptr);
 		}
 	}
 	return err;
@@ -4415,7 +4314,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 {
 	struct bpf_verifier_state_list *new_sl;
 	struct bpf_verifier_state_list *sl;
-	struct bpf_verifier_state *cur = env->cur_state;
+	struct bpf_verifier_state *cur = env->cur_state, *new;
 	int i, j, err;
 
 	sl = env->explored_states[insn_idx];
@@ -4457,16 +4356,18 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 		return -ENOMEM;
 
 	/* add new state to the head of linked list */
-	err = copy_verifier_state(&new_sl->state, cur);
+	new = &new_sl->state;
+	err = copy_verifier_state(new, cur);
 	if (err) {
-		free_verifier_state(&new_sl->state, false);
+		free_verifier_state(new, false);
 		kfree(new_sl);
 		return err;
 	}
 	new_sl->next = env->explored_states[insn_idx];
 	env->explored_states[insn_idx] = new_sl;
 	/* connect new state to parentage chain */
-	cur->parent = &new_sl->state;
+	for (i = 0; i < BPF_REG_FP; i++)
+		cur_regs(env)[i].parent = &new->frame[new->curframe]->regs[i];
 	/* clear write marks in current state: the writes we did are not writes
 	 * our child did, so they don't screen off its reads from us.
 	 * (There are no read marks in current state, because reads always mark
@@ -4479,9 +4380,13 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	/* all stack frames are accessible from callee, clear them all */
 	for (j = 0; j <= cur->curframe; j++) {
 		struct bpf_func_state *frame = cur->frame[j];
+		struct bpf_func_state *newframe = new->frame[j];
 
-		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++)
+		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
 			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
+			frame->stack[i].spilled_ptr.parent =
+						&newframe->stack[i].spilled_ptr;
+		}
 	}
 	return 0;
 }
@@ -4547,7 +4452,6 @@ static int do_check(struct bpf_verifier_env *env)
 	if (!state)
 		return -ENOMEM;
 	state->curframe = 0;
-	state->parent = NULL;
 	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
 	if (!state->frame[0]) {
 		kfree(state);