diff mbox

[v3,net-next,02/12] bpf/verifier: rework value tracking

Message ID 2244b48b-f415-3239-6912-cb09f0abc546@solarflare.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Edward Cree June 27, 2017, 12:56 p.m. UTC
Tracks value alignment by means of tracking known & unknown bits.
Tightens some min/max value checks and fixes a couple of bugs therein.
If pointer leaks are allowed, and adjust_ptr_min_max_vals returns -EACCES,
 treat the pointer as an unknown scalar and try again, because we might be
 able to conclude something about the result (e.g. pointer & 0x40 is either
 0 or 0x40).

Signed-off-by: Edward Cree <ecree@solarflare.com>
---
 include/linux/bpf.h          |   34 +-
 include/linux/bpf_verifier.h |   40 +-
 include/linux/tnum.h         |   79 ++
 kernel/bpf/Makefile          |    2 +-
 kernel/bpf/tnum.c            |  163 ++++
 kernel/bpf/verifier.c        | 1692 ++++++++++++++++++++++++------------------
 6 files changed, 1235 insertions(+), 775 deletions(-)
 create mode 100644 include/linux/tnum.h
 create mode 100644 kernel/bpf/tnum.c

Comments

Daniel Borkmann June 28, 2017, 3:15 p.m. UTC | #1
On 06/27/2017 02:56 PM, Edward Cree wrote:
> Tracks value alignment by means of tracking known & unknown bits.
> Tightens some min/max value checks and fixes a couple of bugs therein.

You mean the one in relation to patch 1/12? Would be good to elaborate
here since otherwise this gets forgotten few weeks later.

Could you also document all the changes that verifier will then start
allowing for after the patch?

> If pointer leaks are allowed, and adjust_ptr_min_max_vals returns -EACCES,
>   treat the pointer as an unknown scalar and try again, because we might be
>   able to conclude something about the result (e.g. pointer & 0x40 is either
>   0 or 0x40).
>
> Signed-off-by: Edward Cree <ecree@solarflare.com>
[...]
>   /* check whether memory at (regno + off) is accessible for t = (read | write)
> @@ -899,52 +965,79 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
>   	struct bpf_reg_state *reg = &state->regs[regno];
>   	int size, err = 0;
>
> -	if (reg->type == PTR_TO_STACK)
> -		off += reg->imm;
> -
>   	size = bpf_size_to_bytes(bpf_size);
>   	if (size < 0)
>   		return size;
>
[...]
> -	if (reg->type == PTR_TO_MAP_VALUE ||
> -	    reg->type == PTR_TO_MAP_VALUE_ADJ) {
> +	/* for access checks, reg->off is just part of off */
> +	off += reg->off;

Could you elaborate on why removing the reg->type == PTR_TO_STACK?
Also in context of below PTR_TO_CTX.

[...]
>   	} else if (reg->type == PTR_TO_CTX) {
> -		enum bpf_reg_type reg_type = UNKNOWN_VALUE;
> +		enum bpf_reg_type reg_type = SCALAR_VALUE;
>
>   		if (t == BPF_WRITE && value_regno >= 0 &&
>   		    is_pointer_value(env, value_regno)) {
>   			verbose("R%d leaks addr into ctx\n", value_regno);
>   			return -EACCES;
>   		}
> +		/* ctx accesses must be at a fixed offset, so that we can
> +		 * determine what type of data were returned.
> +		 */
> +		if (!tnum_is_const(reg->var_off)) {
> +			char tn_buf[48];
> +
> +			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
> +			verbose("variable ctx access var_off=%s off=%d size=%d",
> +				tn_buf, off, size);
> +			return -EACCES;
> +		}
> +		off += reg->var_off.value;

... f.e. in PTR_TO_CTX case the only access that is currently
allowed is LDX/STX with fixed offset from insn->off, which is
passed as off param to check_mem_access(). Can you elaborate on
off += reg->var_off.value? Meaning we make this more dynamic
as long as access is known const?

>   		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
>   		if (!err && t == BPF_READ && value_regno >= 0) {
> -			mark_reg_unknown_value_and_range(state->regs,
> -							 value_regno);
> -			/* note that reg.[id|off|range] == 0 */
> +			/* ctx access returns either a scalar, or a
> +			 * PTR_TO_PACKET[_END].  In the latter case, we know
> +			 * the offset is zero.
> +			 */
> +			if (reg_type == SCALAR_VALUE)
> +				mark_reg_unknown(state->regs, value_regno);
> +			else
> +				mark_reg_known_zero(state->regs, value_regno);
> +			state->regs[value_regno].id = 0;
> +			state->regs[value_regno].off = 0;
> +			state->regs[value_regno].range = 0;
>   			state->regs[value_regno].type = reg_type;
> -			state->regs[value_regno].aux_off = 0;
> -			state->regs[value_regno].aux_off_align = 0;
>   		}
>
> -	} else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
> +	} else if (reg->type == PTR_TO_STACK) {
[...]
Edward Cree June 28, 2017, 4:07 p.m. UTC | #2
On 28/06/17 16:15, Daniel Borkmann wrote:
> On 06/27/2017 02:56 PM, Edward Cree wrote:
>> Tracks value alignment by means of tracking known & unknown bits.
>> Tightens some min/max value checks and fixes a couple of bugs therein.
>
> You mean the one in relation to patch 1/12? Would be good to elaborate
> here since otherwise this gets forgotten few weeks later.
That wasn't the only one; there were also some in the new min/max value
 calculation for ALU ops.  For instance, in subtraction we were taking
 the new bounds as [min-min, max-max] instead of [min-max, max-min].
I can't remember what else there was and there might also have been some
 that I missed but that got incidentally fixed by the rewrite.  But I
 guess I should change "checks" to "checks and updates" in the above?
> Could you also document all the changes that verifier will then start
> allowing for after the patch?
Maybe not the changes, because the old verifier had a lot of special
 cases, but I could, and probably should, document the new behaviour
 (maybe in Documentation/networking/filter.txt, that already has a bit
 of description of the verifier).
> [...]
>>   /* check whether memory at (regno + off) is accessible for t = (read | write)
>> @@ -899,52 +965,79 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
>>       struct bpf_reg_state *reg = &state->regs[regno];
>>       int size, err = 0;
>>
>> -    if (reg->type == PTR_TO_STACK)
>> -        off += reg->imm;
>> -
>>       size = bpf_size_to_bytes(bpf_size);
>>       if (size < 0)
>>           return size;
>>
> [...]
>> -    if (reg->type == PTR_TO_MAP_VALUE ||
>> -        reg->type == PTR_TO_MAP_VALUE_ADJ) {
>> +    /* for access checks, reg->off is just part of off */
>> +    off += reg->off;
>
> Could you elaborate on why removing the reg->type == PTR_TO_STACK?
Previously bpf_reg_state had a member 'imm' which, for PTR_TO_STACK, was
 a fixed offset, so we had to add it in to the offset.  Now we instead
 have reg->off and it's generic to all pointerish types, so we don't need
 special handling of PTR_TO_STACK here.
> Also in context of below PTR_TO_CTX.
>
> [...]
>>       } else if (reg->type == PTR_TO_CTX) {
>> -        enum bpf_reg_type reg_type = UNKNOWN_VALUE;
>> +        enum bpf_reg_type reg_type = SCALAR_VALUE;
>>
>>           if (t == BPF_WRITE && value_regno >= 0 &&
>>               is_pointer_value(env, value_regno)) {
>>               verbose("R%d leaks addr into ctx\n", value_regno);
>>               return -EACCES;
>>           }
>> +        /* ctx accesses must be at a fixed offset, so that we can
>> +         * determine what type of data were returned.
>> +         */
>> +        if (!tnum_is_const(reg->var_off)) {
>> +            char tn_buf[48];
>> +
>> +            tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
>> +            verbose("variable ctx access var_off=%s off=%d size=%d",
>> +                tn_buf, off, size);
>> +            return -EACCES;
>> +        }
>> +        off += reg->var_off.value;
>
> ... f.e. in PTR_TO_CTX case the only access that is currently
> allowed is LDX/STX with fixed offset from insn->off, which is
> passed as off param to check_mem_access(). Can you elaborate on
> off += reg->var_off.value? Meaning we make this more dynamic
> as long as access is known const?
So, I can't actually figure out how to construct a pointer with a known
 variable offset, but future changes to the verifier (like learning from
 comparing two pointers with the same base) could make it possible.  The
 situation we're handling here is where our register holds ctx + x,
 where x is also known to be some constant value k, and currently I don't
 know if that's possible except for the trivial case of k==0, and the edge
 case where k is too big to fit in the s32 reg->off (in which case the
 check_ctx_access will presumably reject it).
Stepping back a bit, each register holding a pointer type has two offsets,
 reg->off and reg->var_off, and the latter is a tnum representing
 knowledge about a value that's not necessarily exactly known.  But
 tnum_is_const checks that it _is_ exactly known.
There is another case that we allow now through the reg->off handling:
 adding a constant to a pointer and then dereferencing it.
So, with r1=ctx, instead of r2 = *(r1 + off), you can write
    r3 = r1 + off
    r2 = *(r1 + 0)
 if for some reason that suits you better.  But in that case, because off
 is a known value (either an immediate, or a register whose value is
 exactly known), that value gets added to r3->off rather than r3->var_off;
 see adjust_ptr_min_max_vals().

Hope that's clear,
-Ed
Daniel Borkmann June 28, 2017, 5:09 p.m. UTC | #3
On 06/27/2017 02:56 PM, Edward Cree wrote:
> Tracks value alignment by means of tracking known & unknown bits.
> Tightens some min/max value checks and fixes a couple of bugs therein.
> If pointer leaks are allowed, and adjust_ptr_min_max_vals returns -EACCES,
>   treat the pointer as an unknown scalar and try again, because we might be
>   able to conclude something about the result (e.g. pointer & 0x40 is either
>   0 or 0x40).
>
> Signed-off-by: Edward Cree <ecree@solarflare.com>
[...]
> +static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
> +				   struct bpf_insn *insn)
> +{
> +	struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg, *src_reg;
> +	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
> +	u8 opcode = BPF_OP(insn->code);
> +	int rc;
> +
> +	dst_reg = &regs[insn->dst_reg];
> +	check_reg_overflow(dst_reg);
> +	src_reg = NULL;
> +	if (dst_reg->type != SCALAR_VALUE)
> +		ptr_reg = dst_reg;
> +	if (BPF_SRC(insn->code) == BPF_X) {
> +		src_reg = &regs[insn->src_reg];
> +		check_reg_overflow(src_reg);
> +
> +		if (src_reg->type != SCALAR_VALUE) {
> +			if (dst_reg->type != SCALAR_VALUE) {
> +				/* Combining two pointers by any ALU op yields
> +				 * an arbitrary scalar.
> +				 */
> +				if (!env->allow_ptr_leaks) {
> +					verbose("R%d pointer %s pointer prohibited\n",
> +						insn->dst_reg,
> +						bpf_alu_string[opcode >> 4]);
> +					return -EACCES;
> +				}
> +				mark_reg_unknown(regs, insn->dst_reg);
> +				return 0;
> +			} else {
> +				/* scalar += pointer
> +				 * This is legal, but we have to reverse our
> +				 * src/dest handling in computing the range
> +				 */
> +				rc = adjust_ptr_min_max_vals(env, insn,
> +							     src_reg, dst_reg);
> +				if (rc == -EACCES && env->allow_ptr_leaks) {
> +					/* scalar += unknown scalar */
> +					__mark_reg_unknown(&off_reg);
> +					return adjust_scalar_min_max_vals(
> +							env, insn,
> +							dst_reg, &off_reg);

Could you elaborate on this one? If I understand it correctly, then
the scalar += pointer case would mean the following: given I have one
of the allowed pointer types in adjust_ptr_min_max_vals() then the
prior scalar type inherits the ptr type/id. I would then 'destroy' the
pointer value so we get a -EACCES on it. We mark the tmp off_reg as
scalar type, but shouldn't also actual dst_reg be marked as such
like in below pointer += scalar case, such that we undo the prior
ptr_type inheritance?

> +				}
> +				return rc;
> +			}
> +		} else if (ptr_reg) {
> +			/* pointer += scalar */
> +			rc = adjust_ptr_min_max_vals(env, insn,
> +						     dst_reg, src_reg);
> +			if (rc == -EACCES && env->allow_ptr_leaks) {
> +				/* unknown scalar += scalar */
> +				__mark_reg_unknown(dst_reg);
> +				return adjust_scalar_min_max_vals(
> +						env, insn, dst_reg, src_reg);
> +			}
> +			return rc;
> +		}
> +	} else {
[...]
Edward Cree June 28, 2017, 6:28 p.m. UTC | #4
On 28/06/17 18:09, Daniel Borkmann wrote:
> Could you elaborate on this one? If I understand it correctly, then
> the scalar += pointer case would mean the following: given I have one
> of the allowed pointer types in adjust_ptr_min_max_vals() then the
> prior scalar type inherits the ptr type/id. I would then 'destroy' the
> pointer value so we get a -EACCES on it. We mark the tmp off_reg as
> scalar type, but shouldn't also actual dst_reg be marked as such
> like in below pointer += scalar case, such that we undo the prior
> ptr_type inheritance?
Good catch.  The intent was that adjust_ptr_min_max_vals() wouldn't mark
 dst_reg's type/id in the case when it returned -EACCES, but indeed there
 are some such paths, and rather than changing those it may be easier to
 change the type/id back to scalar/0.  I don't think we need to go as far
 as calling __mark_reg_unknown() on it though, its bounds and align
 shouldn't have been screwed up by adjust_ptr_min_max_vals().

-Ed
Daniel Borkmann June 28, 2017, 7:44 p.m. UTC | #5
On 06/28/2017 06:07 PM, Edward Cree wrote:
> On 28/06/17 16:15, Daniel Borkmann wrote:
>> On 06/27/2017 02:56 PM, Edward Cree wrote:
>>> Tracks value alignment by means of tracking known & unknown bits.
>>> Tightens some min/max value checks and fixes a couple of bugs therein.
>>
>> You mean the one in relation to patch 1/12? Would be good to elaborate
>> here since otherwise this gets forgotten few weeks later.
> That wasn't the only one; there were also some in the new min/max value
>   calculation for ALU ops.  For instance, in subtraction we were taking
>   the new bounds as [min-min, max-max] instead of [min-max, max-min].
> I can't remember what else there was and there might also have been some
>   that I missed but that got incidentally fixed by the rewrite.  But I
>   guess I should change "checks" to "checks and updates" in the above?

Ok. Would be good though to have them all covered in the selftests
part of your series if possible, so we can make sure to keep track
of these cases.

>> Could you also document all the changes that verifier will then start
>> allowing for after the patch?
> Maybe not the changes, because the old verifier had a lot of special
>   cases, but I could, and probably should, document the new behaviour
>   (maybe in Documentation/networking/filter.txt, that already has a bit
>   of description of the verifier).

Yeah, that would definitely help; filter.txt should be fine.

>> [...]
>>>    /* check whether memory at (regno + off) is accessible for t = (read | write)
>>> @@ -899,52 +965,79 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
>>>        struct bpf_reg_state *reg = &state->regs[regno];
>>>        int size, err = 0;
>>>
>>> -    if (reg->type == PTR_TO_STACK)
>>> -        off += reg->imm;
>>> -
>>>        size = bpf_size_to_bytes(bpf_size);
>>>        if (size < 0)
>>>            return size;
>>>
>> [...]
>>> -    if (reg->type == PTR_TO_MAP_VALUE ||
>>> -        reg->type == PTR_TO_MAP_VALUE_ADJ) {
>>> +    /* for access checks, reg->off is just part of off */
>>> +    off += reg->off;
>>
>> Could you elaborate on why removing the reg->type == PTR_TO_STACK?
> Previously bpf_reg_state had a member 'imm' which, for PTR_TO_STACK, was
>   a fixed offset, so we had to add it in to the offset.  Now we instead
>   have reg->off and it's generic to all pointerish types, so we don't need
>   special handling of PTR_TO_STACK here.
>> Also in context of below PTR_TO_CTX.
>>
>> [...]
>>>        } else if (reg->type == PTR_TO_CTX) {
>>> -        enum bpf_reg_type reg_type = UNKNOWN_VALUE;
>>> +        enum bpf_reg_type reg_type = SCALAR_VALUE;
>>>
>>>            if (t == BPF_WRITE && value_regno >= 0 &&
>>>                is_pointer_value(env, value_regno)) {
>>>                verbose("R%d leaks addr into ctx\n", value_regno);
>>>                return -EACCES;
>>>            }
>>> +        /* ctx accesses must be at a fixed offset, so that we can
>>> +         * determine what type of data were returned.
>>> +         */
>>> +        if (!tnum_is_const(reg->var_off)) {
>>> +            char tn_buf[48];
>>> +
>>> +            tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
>>> +            verbose("variable ctx access var_off=%s off=%d size=%d",
>>> +                tn_buf, off, size);
>>> +            return -EACCES;
>>> +        }
>>> +        off += reg->var_off.value;
>>
>> ... f.e. in PTR_TO_CTX case the only access that is currently
>> allowed is LDX/STX with fixed offset from insn->off, which is
>> passed as off param to check_mem_access(). Can you elaborate on
>> off += reg->var_off.value? Meaning we make this more dynamic
>> as long as access is known const?
> So, I can't actually figure out how to construct a pointer with a known
>   variable offset, but future changes to the verifier (like learning from
>   comparing two pointers with the same base) could make it possible.  The
>   situation we're handling here is where our register holds ctx + x,
>   where x is also known to be some constant value k, and currently I don't
>   know if that's possible except for the trivial case of k==0, and the edge
>   case where k is too big to fit in the s32 reg->off (in which case the
>   check_ctx_access will presumably reject it).
> Stepping back a bit, each register holding a pointer type has two offsets,
>   reg->off and reg->var_off, and the latter is a tnum representing
>   knowledge about a value that's not necessarily exactly known.  But
>   tnum_is_const checks that it _is_ exactly known.

Right, I was reviewing this with the thought in mind where we could
run into a pruning situation where in the first path we either add
a scalar or offset to the ctx ptr that is then spilled to stack, later
filled to a reg again with eventual successful exit. And the second path
would prune on the spilled reg, but even if scalar, we require that it's
a _known_ const whereas reading back from stack marks it unknown, so that
is not possible. So all is fine; including your below example since it
all has to be a _known_ scalar.

> There is another case that we allow now through the reg->off handling:
>   adding a constant to a pointer and then dereferencing it.
> So, with r1=ctx, instead of r2 = *(r1 + off), you can write
>      r3 = r1 + off
>      r2 = *(r1 + 0)
>   if for some reason that suits you better.  But in that case, because off
>   is a known value (either an immediate, or a register whose value is
>   exactly known), that value gets added to r3->off rather than r3->var_off;
>   see adjust_ptr_min_max_vals().
>
> Hope that's clear,

Yep, thanks!
kernel test robot June 29, 2017, 7:48 a.m. UTC | #6
Hi Edward,

[auto build test ERROR on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Edward-Cree/bpf-rewrite-value-tracking-in-verifier/20170629-012559
config: ia64-allmodconfig (attached as .config)
compiler: ia64-linux-gcc (GCC) 6.2.0
reproduce:
        wget https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=ia64 

Note: the linux-review/Edward-Cree/bpf-rewrite-value-tracking-in-verifier/20170629-012559 HEAD 7a882286a655cc99ca765008aa3f830f6ad6b3eb builds fine.
      It only hurts bisectibility.

All errors (new ones prefixed by >>):

   drivers/net//ethernet/netronome/nfp/bpf/verifier.c: In function 'nfp_bpf_check_exit':
>> drivers/net//ethernet/netronome/nfp/bpf/verifier.c:86:20: error: 'CONST_IMM' undeclared (first use in this function)
     if (reg0->type != CONST_IMM) {
                       ^~~~~~~~~
   drivers/net//ethernet/netronome/nfp/bpf/verifier.c:86:20: note: each undeclared identifier is reported only once for each function it appears in
   In file included from include/linux/kernel.h:13:0,
                    from include/linux/list.h:8,
                    from include/linux/timer.h:4,
                    from include/linux/workqueue.h:8,
                    from include/linux/bpf.h:12,
                    from drivers/net//ethernet/netronome/nfp/bpf/verifier.c:36:
>> drivers/net//ethernet/netronome/nfp/bpf/verifier.c:88:20: error: 'const struct bpf_reg_state' has no member named 'imm'
       reg0->type, reg0->imm);
                       ^
   include/linux/printk.h:308:34: note: in definition of macro 'pr_info'
     printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
                                     ^~~~~~~~~~~
   drivers/net//ethernet/netronome/nfp/bpf/verifier.c:93:10: error: 'const struct bpf_reg_state' has no member named 'imm'
         reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) {
             ^~
   drivers/net//ethernet/netronome/nfp/bpf/verifier.c:93:29: error: 'const struct bpf_reg_state' has no member named 'imm'
         reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) {
                                ^~
   In file included from include/linux/kernel.h:13:0,
                    from include/linux/list.h:8,
                    from include/linux/timer.h:4,
                    from include/linux/workqueue.h:8,
                    from include/linux/bpf.h:12,
                    from drivers/net//ethernet/netronome/nfp/bpf/verifier.c:36:
   drivers/net//ethernet/netronome/nfp/bpf/verifier.c:95:20: error: 'const struct bpf_reg_state' has no member named 'imm'
       reg0->type, reg0->imm);
                       ^
   include/linux/printk.h:308:34: note: in definition of macro 'pr_info'
     printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
                                     ^~~~~~~~~~~
   drivers/net//ethernet/netronome/nfp/bpf/verifier.c:99:44: error: 'const struct bpf_reg_state' has no member named 'imm'
     if (nfp_prog->act == NN_ACT_DIRECT && reg0->imm <= TC_ACT_REDIRECT &&
                                               ^~
   drivers/net//ethernet/netronome/nfp/bpf/verifier.c:100:10: error: 'const struct bpf_reg_state' has no member named 'imm'
         reg0->imm != TC_ACT_SHOT && reg0->imm != TC_ACT_STOLEN &&
             ^~
   drivers/net//ethernet/netronome/nfp/bpf/verifier.c:100:38: error: 'const struct bpf_reg_state' has no member named 'imm'
         reg0->imm != TC_ACT_SHOT && reg0->imm != TC_ACT_STOLEN &&
                                         ^~
   drivers/net//ethernet/netronome/nfp/bpf/verifier.c:101:10: error: 'const struct bpf_reg_state' has no member named 'imm'
         reg0->imm != TC_ACT_QUEUED) {
             ^~
   In file included from include/linux/kernel.h:13:0,
                    from include/linux/list.h:8,
                    from include/linux/timer.h:4,
                    from include/linux/workqueue.h:8,
                    from include/linux/bpf.h:12,
                    from drivers/net//ethernet/netronome/nfp/bpf/verifier.c:36:
   drivers/net//ethernet/netronome/nfp/bpf/verifier.c:103:20: error: 'const struct bpf_reg_state' has no member named 'imm'
       reg0->type, reg0->imm);
                       ^
   include/linux/printk.h:308:34: note: in definition of macro 'pr_info'
     printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
                                     ^~~~~~~~~~~

vim +/CONST_IMM +86 drivers/net//ethernet/netronome/nfp/bpf/verifier.c

cd7df56e drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c Jakub Kicinski 2016-09-21  80  {
cd7df56e drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c Jakub Kicinski 2016-09-21  81  	const struct bpf_reg_state *reg0 = &env->cur_state.regs[0];
cd7df56e drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c Jakub Kicinski 2016-09-21  82  
6d677075 drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c Jakub Kicinski 2016-11-03  83  	if (nfp_prog->act == NN_ACT_XDP)
6d677075 drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c Jakub Kicinski 2016-11-03  84  		return 0;
6d677075 drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c Jakub Kicinski 2016-11-03  85  
cd7df56e drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c Jakub Kicinski 2016-09-21 @86  	if (reg0->type != CONST_IMM) {
cd7df56e drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c Jakub Kicinski 2016-09-21  87  		pr_info("unsupported exit state: %d, imm: %llx\n",
cd7df56e drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c Jakub Kicinski 2016-09-21 @88  			reg0->type, reg0->imm);
cd7df56e drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c Jakub Kicinski 2016-09-21  89  		return -EINVAL;
cd7df56e drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c Jakub Kicinski 2016-09-21  90  	}
cd7df56e drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c Jakub Kicinski 2016-09-21  91  

:::::: The code at line 86 was first introduced by commit
:::::: cd7df56ed3e60d046ddb3acd987778c00aa9ee33 nfp: add BPF to NFP code translator

:::::: TO: Jakub Kicinski <jakub.kicinski@netronome.com>
:::::: CC: David S. Miller <davem@davemloft.net>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Nadav Amit July 6, 2017, 9:21 p.m. UTC | #7
Edward Cree via iovisor-dev <iovisor-dev@lists.iovisor.org> wrote:

> Tracks value alignment by means of tracking known & unknown bits.
> Tightens some min/max value checks and fixes a couple of bugs therein.
> If pointer leaks are allowed, and adjust_ptr_min_max_vals returns -EACCES,
> treat the pointer as an unknown scalar and try again, because we might be
> able to conclude something about the result (e.g. pointer & 0x40 is either
> 0 or 0x40).
> 
> Signed-off-by: Edward Cree <ecree@solarflare.com>
> ---
> include/linux/bpf.h          |   34 +-
> include/linux/bpf_verifier.h |   40 +-
> include/linux/tnum.h         |   79 ++
> kernel/bpf/Makefile          |    2 +-
> kernel/bpf/tnum.c            |  163 ++++
> kernel/bpf/verifier.c        | 1692 ++++++++++++++++++++++++------------------
> 6 files changed, 1235 insertions(+), 775 deletions(-)

(RESEND)

I find it a bit surprising that such huge changes that can affect security
and robustness are performed in one patch. Personally, I cannot comprehend
all of these changes. In addition, I think that it is valuable to describe
in detail the bugs that the patch solves and when they were introduced.

I can bring up some concerns regarding inconsistencies in offset checks and
size, not spilling SCALAR and my wish not to limit packet size. However,
when the patch is that big, I think it is useless.

Nadav
Edward Cree July 7, 2017, 1:48 p.m. UTC | #8
On 06/07/17 22:21, Nadav Amit wrote:
> I find it a bit surprising that such huge changes that can affect security
> and robustness are performed in one patch.
In the first version of the series, this was two patches, with "feed
 pointer-to-unknown-scalar casts into scalar ALU path" split out from the rest;
 but that just caused a lot of code movement and confusing diffs, hence why I
 folded it into the same patch.
As for the rest of it, I'm not sure it can be split up: I'm changing the
 definition and semantics of a core data structure (struct bpf_reg_state)
 and I don't see any reasonable intermediate steps that would even compile.
For instance, replacing reg->imm (with its two meanings of 'known value' or
 'number of leading zero bits') with reg->var_off necessitates replacing all
 the arithmetic-handling code (e.g. evaluate_reg_imm_alu()).  But then
 var_off also replaces reg->aux_align[_off], so all the alignment-checking
 code has to change as well.  And changing what register state we track
 means that the pruning code (states_equal()) has to change too.
As it is, this patch leaves some selftests failing and it takes _another_
 big patch (4/12 "track signed and unsigned min/max values") to get them
 all to pass again.
> Personally, I cannot comprehend
> all of these changes. In addition, I think that it is valuable to describe
> in detail the bugs that the patch solves and when they were introduced.
Mostly this patch series isn't about fixing bugs (except those which were
 exposed when the changes caused some selftests to fail).  Instead, it's a
 combination of refactoring and unifying so that (for instance) the rules
 for pointer arithmetic and alignment checking are as similar as possible
 for all pointer types rather than having special-case rules for each type.
This allows many (correct) programs which the verifier will currently
 reject, and makes the overall description of the verifier's rules much
 shorter and simpler.  I have written a documentation patch explaining
 these rules, which the next version of the patch series will include.

The diff _is_ hard to read, I accept that; I think `diff` was too eager to
 interpolate and match single lines like '{' from completely unrelated
 functions.  It might be easier to just try applying the patch to a tree
 and then reading the result, rather than trying to interpret the diff.
> I can bring up some concerns regarding inconsistencies in offset checks and
> size, not spilling SCALAR and my wish not to limit packet size. However,
> when the patch is that big, I think it is useless.
Please, do describe these concerns; what inconsistencies do you see?
The not spilling scalars, and the limit to packet size, are retained
 from the existing code (is_spillable_regtype() and MAX_PACKET_OFF).
The latter is also needed for correctness in computing reg->range;
 if 'pkt_ptr + offset' could conceivably overflow, then the result
 could be < pkt_end without being a valid pointer into the packet.
We thus rely on the assumption that the packet pointer will never be
 within MAX_PACKET_OFF of the top of the address space.  (This
 assumption is, again, carried over from the existing verifier.)

-Ed
Nadav Amit July 7, 2017, 5:45 p.m. UTC | #9
Edward Cree <ecree@solarflare.com> wrote:

> On 06/07/17 22:21, Nadav Amit wrote:
>> I find it a bit surprising that such huge changes that can affect security
>> and robustness are performed in one patch.
> In the first version of the series, this was two patches, with "feed
> pointer-to-unknown-scalar casts into scalar ALU path" split out from the rest;
> but that just caused a lot of code movement and confusing diffs, hence why I
> folded it into the same patch.
> As for the rest of it, I'm not sure it can be split up: I'm changing the
> definition and semantics of a core data structure (struct bpf_reg_state)
> and I don't see any reasonable intermediate steps that would even compile.
> For instance, replacing reg->imm (with its two meanings of 'known value' or
> 'number of leading zero bits') with reg->var_off necessitates replacing all
> the arithmetic-handling code (e.g. evaluate_reg_imm_alu()).  But then
> var_off also replaces reg->aux_align[_off], so all the alignment-checking
> code has to change as well.  And changing what register state we track
> means that the pruning code (states_equal()) has to change too.
> As it is, this patch leaves some selftests failing and it takes _another_
> big patch (4/12 "track signed and unsigned min/max values") to get them
> all to pass again.

Thanks for your polite response to my rantings. I do understand the
complexity, and I did not like the two meanings of reg->imm either (it took
me some time to understand them). Yet, I really doubt anyone is capable of
really reviewing such a big patch. Most likely people will not even start or
pick to small details they know or care about.

>> Personally, I cannot comprehend
>> all of these changes. In addition, I think that it is valuable to describe
>> in detail the bugs that the patch solves and when they were introduced.
> Mostly this patch series isn't about fixing bugs (except those which were
> exposed when the changes caused some selftests to fail).  Instead, it's a
> combination of refactoring and unifying so that (for instance) the rules
> for pointer arithmetic and alignment checking are as similar as possible
> for all pointer types rather than having special-case rules for each type.
> This allows many (correct) programs which the verifier will currently
> reject, and makes the overall description of the verifier's rules much
> shorter and simpler.  I have written a documentation patch explaining
> these rules, which the next version of the patch series will include.

Ok. But I doubt it is as useful as commenting in the code or writing in the
commit message what exactly is addressed by the patch. And documentation
does not really help others to rebase their patches on top of yours.

> The diff _is_ hard to read, I accept that; I think `diff` was too eager to
> interpolate and match single lines like '{' from completely unrelated
> functions.  It might be easier to just try applying the patch to a tree
> and then reading the result, rather than trying to interpret the diff.

I don’t know to review patches this way. Hopefully others do, but I doubt it.

For me changes such as:

> 		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
> -			dst_reg->min_value -= min_val;
> +			dst_reg->min_value -= max_val;


are purely cryptic. What happened here? Was there a bug before and if so
what are its implications? Why can’t it be in a separate patch?

I also think that changes such as:
> -	s64 min_value;
> -	u64 max_value;
[snip]
> +	s64 min_value; /* minimum possible (s64)value */
> +	u64 max_value; /* maximum possible (u64)value */

Should have been avoided. Personally, I find this comment redundant (to say
the least). It does not help to reduce the diff size.

In this regard, I think that refactoring should have been done first and not
together with the logic changes. As another example, changing UNKNOWN_VALUE
to SCALAR_VALUE should have been a separate, easy to understand patch.

>> I can bring up some concerns regarding inconsistencies in offset checks and
>> size, not spilling SCALAR and my wish not to limit packet size. However,
>> when the patch is that big, I think it is useless.
> Please, do describe these concerns; what inconsistencies do you see?
> The not spilling scalars, and the limit to packet size, are retained
> from the existing code (is_spillable_regtype() and MAX_PACKET_OFF).
> The latter is also needed for correctness in computing reg->range;
> if 'pkt_ptr + offset' could conceivably overflow, then the result
> could be < pkt_end without being a valid pointer into the packet.
> We thus rely on the assumption that the packet pointer will never be
> within MAX_PACKET_OFF of the top of the address space.  (This
> assumption is, again, carried over from the existing verifier.)

I understand the limitations (I think). I agree that CONST being spillable
is not directly related. As for the possible packet offsets/range:
intentionally or not you do make some changes that push the 64k packet size
limit even deeper into the code. While the packet size should be limited to
avoid overflow, IIUC the requirement is that:

	64 > log(n_insn) + log(MAX_PACKET_OFF) + 1

Such an assertion may be staticly checked (using BUILD_BUG_ON), but I don’t
think should propagate into the entire code, especially in a non consistent
way. For example:

> struct bpf_reg_state {
> 	enum bpf_reg_type type;
> 	union {
> -		/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
> -		s64 imm;
> -
> -		/* valid when type == PTR_TO_PACKET* */
> -		struct {
> -			u16 off;
> -			u16 range;
> -		};
> +		/* valid when type == PTR_TO_PACKET */
> +		u32 range;

I would (personally) prefer range (and offsets) to be u64. I could
understand if you left the range as u16 (since packet size is limited to
64k). But why would it be u32?

Or:
> -	if (off < 0 || size <= 0 || off + size > reg->range) {
> +	if (off < 0 || size <= 0 || off > MAX_PACKET_OFF ||
> +	    off + size > reg->range) {

Why don’t you check (off + size > max(MAX_PACKET_OFF, reg->range))? Is there
a reason you ignore size when comparing to MAX_PACKET_OFF ?

Unfortunately, I could only look at changes that directly concern me. As for
the style, I am not a huge fan of nested or multiple if’s. Switches are
easier to understand (e.g., in adjust_ptr_min_max_vals() ) and arrays with
flags (with information per instruction or register-type) may be even
better.

Nadav
Nadav Amit July 8, 2017, 12:54 a.m. UTC | #10
Nadav Amit <nadav.amit@gmail.com> wrote:

> Edward Cree <ecree@solarflare.com> wrote:
> 
>> On 06/07/17 22:21, Nadav Amit wrote:
>>> I find it a bit surprising that such huge changes that can affect security
>>> and robustness are performed in one patch.
>> In the first version of the series, this was two patches, with "feed
>> pointer-to-unknown-scalar casts into scalar ALU path" split out from the rest;
>> but that just caused a lot of code movement and confusing diffs, hence why I
>> folded it into the same patch.
>> As for the rest of it, I'm not sure it can be split up: I'm changing the
>> definition and semantics of a core data structure (struct bpf_reg_state)
>> and I don't see any reasonable intermediate steps that would even compile.
>> For instance, replacing reg->imm (with its two meanings of 'known value' or
>> 'number of leading zero bits') with reg->var_off necessitates replacing all
>> the arithmetic-handling code (e.g. evaluate_reg_imm_alu()).  But then
>> var_off also replaces reg->aux_align[_off], so all the alignment-checking
>> code has to change as well.  And changing what register state we track
>> means that the pruning code (states_equal()) has to change too.
>> As it is, this patch leaves some selftests failing and it takes _another_
>> big patch (4/12 "track signed and unsigned min/max values") to get them
>> all to pass again.
> 
> Thanks for your polite response to my rantings. I do understand the
> complexity, and I did not like the two meanings of reg->imm either (it took
> me some time to understand them). Yet, I really doubt anyone is capable of
> really reviewing such a big patch. Most likely people will not even start or
> pick to small details they know or care about.
> 
>>> Personally, I cannot comprehend
>>> all of these changes. In addition, I think that it is valuable to describe
>>> in detail the bugs that the patch solves and when they were introduced.
>> Mostly this patch series isn't about fixing bugs (except those which were
>> exposed when the changes caused some selftests to fail).  Instead, it's a
>> combination of refactoring and unifying so that (for instance) the rules
>> for pointer arithmetic and alignment checking are as similar as possible
>> for all pointer types rather than having special-case rules for each type.
>> This allows many (correct) programs which the verifier will currently
>> reject, and makes the overall description of the verifier's rules much
>> shorter and simpler.  I have written a documentation patch explaining
>> these rules, which the next version of the patch series will include.
> 
> Ok. But I doubt it is as useful as commenting in the code or writing in the
> commit message what exactly is addressed by the patch. And documentation
> does not really help others to rebase their patches on top of yours.
> 
>> The diff _is_ hard to read, I accept that; I think `diff` was too eager to
>> interpolate and match single lines like '{' from completely unrelated
>> functions.  It might be easier to just try applying the patch to a tree
>> and then reading the result, rather than trying to interpret the diff.
> 
> I don’t know to review patches this way. Hopefully others do, but I doubt it.
> 
> For me changes such as:
> 
>> if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
>> -			dst_reg->min_value -= min_val;
>> +			dst_reg->min_value -= max_val;
> 
> 
> are purely cryptic. What happened here? Was there a bug before and if so
> what are its implications? Why can’t it be in a separate patch?
> 
> I also think that changes such as:
>> -	s64 min_value;
>> -	u64 max_value;
> [snip]
>> +	s64 min_value; /* minimum possible (s64)value */
>> +	u64 max_value; /* maximum possible (u64)value */
> 
> Should have been avoided. Personally, I find this comment redundant (to say
> the least). It does not help to reduce the diff size.
> 
> In this regard, I think that refactoring should have been done first and not
> together with the logic changes. As another example, changing UNKNOWN_VALUE
> to SCALAR_VALUE should have been a separate, easy to understand patch.
> 
>>> I can bring up some concerns regarding inconsistencies in offset checks and
>>> size, not spilling SCALAR and my wish not to limit packet size. However,
>>> when the patch is that big, I think it is useless.
>> Please, do describe these concerns; what inconsistencies do you see?
>> The not spilling scalars, and the limit to packet size, are retained
>> from the existing code (is_spillable_regtype() and MAX_PACKET_OFF).
>> The latter is also needed for correctness in computing reg->range;
>> if 'pkt_ptr + offset' could conceivably overflow, then the result
>> could be < pkt_end without being a valid pointer into the packet.
>> We thus rely on the assumption that the packet pointer will never be
>> within MAX_PACKET_OFF of the top of the address space.  (This
>> assumption is, again, carried over from the existing verifier.)
> 
> I understand the limitations (I think). I agree that CONST being spillable
> is not directly related. As for the possible packet offsets/range:
> intentionally or not you do make some changes that push the 64k packet size
> limit even deeper into the code. While the packet size should be limited to
> avoid overflow, IIUC the requirement is that:
> 
> 	64 > log(n_insn) + log(MAX_PACKET_OFF) + 1
> 
> Such an assertion may be staticly checked (using BUILD_BUG_ON), but I don’t
> think should propagate into the entire code, especially in a non consistent
> way. For example:
> 
>> struct bpf_reg_state {
>> 	enum bpf_reg_type type;
>> 	union {
>> -		/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
>> -		s64 imm;
>> -
>> -		/* valid when type == PTR_TO_PACKET* */
>> -		struct {
>> -			u16 off;
>> -			u16 range;
>> -		};
>> +		/* valid when type == PTR_TO_PACKET */
>> +		u32 range;
> 
> I would (personally) prefer range (and offsets) to be u64. I could
> understand if you left the range as u16 (since packet size is limited to
> 64k). But why would it be u32?
> 
> Or:
>> -	if (off < 0 || size <= 0 || off + size > reg->range) {
>> +	if (off < 0 || size <= 0 || off > MAX_PACKET_OFF ||
>> +	    off + size > reg->range) {
> 
> Why don’t you check (off + size > max(MAX_PACKET_OFF, reg->range))? Is there
> a reason you ignore size when comparing to MAX_PACKET_OFF ?

(should be min, not max)
Edward Cree July 12, 2017, 7:13 p.m. UTC | #11
On 07/07/17 18:45, Nadav Amit wrote:
> For me changes such as:
>
>> 		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
>> -			dst_reg->min_value -= min_val;
>> +			dst_reg->min_value -= max_val;
>
> are purely cryptic. What happened here? Was there a bug before and if so
> what are its implications? Why can’t it be in a separate patch?
In this specific case, there was a bug before: if (say) src and dst were
 both unknown bytes (so range 0 to 255), it would compute the new min and max
 to be 0, so it would think the result is known to be 0.  But that's wrong,
 because it could be anything from -255 to +255.  The bug's implications are
 that it could be used to construct an out-of-range offset to (say) a map
 pointer which the verifier would think was in-range and thus accept.
It might be possible to put it in a separate patch, but in general (not
 necessarily the case here) isolated fixes to range handling break some of
 the existing regression tests.  That is why I ended up doing patch #4,
 because I couldn't find a small fix for the patch #1 test without breaking
 others.  Essentially, this patch started out as just the tnum tracking to
 replace imm and align, and then rolled in everything I had to do to get the
 regression tests to pass again.  So some of those things are fixes that
 could go in earlier patches, but because of the order I wrote it in I'd have
 to disentangle them.  I can do that if it's necessary, but I'm not sure it'd
 really make the patch that much more readable so I'd rather avoid that work
 if I can get away with it...
> I also think that changes such as:
>> -	s64 min_value;
>> -	u64 max_value;
> [snip]
>> +	s64 min_value; /* minimum possible (s64)value */
>> +	u64 max_value; /* maximum possible (u64)value */
> Should have been avoided. Personally, I find this comment redundant (to say
> the least). It does not help to reduce the diff size.
The comment is meaningful, though perhaps badly phrased.  It's an attempt to
 define the semantics of these fields (which previously was unclear); e.g. the
 first one means "minimum value when interpreted as signed", i.e. the (s64) in
 the comment is a cast.
Apparently those weren't the semantics the original author intended, but I'm
 not sure the original semantics were well-defined and I certainly don't
 understand them well enough to define them, hence why I defined my own here
 (and then redefined them in patch #4).
> In this regard, I think that refactoring should have been done first and not
> together with the logic changes. As another example, changing UNKNOWN_VALUE
> to SCALAR_VALUE should have been a separate, easy to understand patch.
But SCALAR_VALUE is the union UNKNOWN_VALUE *or* CONST_IMM, and merging those
 together means all of the ripping-out of evaluate_reg_alu() and friends and
 thus depends on much of the rest of the patch.
>> The latter is also needed for correctness in computing reg->range;
>> if 'pkt_ptr + offset' could conceivably overflow, then the result
>> could be < pkt_end without being a valid pointer into the packet.
>> We thus rely on the assumption that the packet pointer will never be
>> within MAX_PACKET_OFF of the top of the address space.  (This
>> assumption is, again, carried over from the existing verifier.)
> I understand the limitations (I think). I agree that CONST being spillable
> is not directly related. As for the possible packet offsets/range:
> intentionally or not you do make some changes that push the 64k packet size
> limit even deeper into the code. While the packet size should be limited to
> avoid overflow, IIUC the requirement is that:
>
> 	64 > log(n_insn) + log(MAX_PACKET_OFF) + 1
I don't think that's right, unless you also make each addition to a packet-
 pointer require a max_value <= MAX_PACKET_OFF.  It's also a very loose bound
 because it assumes every instruction is such an add.
I think it makes far more sense to do it the way I have done, where the bounds
 are tracked all the way through the arithmetic and then only checked against
 MAX_PACKET_OFF when doing the access (and when doing a test against a
 PTR_TO_PACKET_END, i.e. find_good_pkt_pointers(), though for some reason I
 only added that check in patch #4).
That way we can allow things like (for the sake of example) adding $BIG_NUMBER
 to a packet pointer and then subtracting it again.
> Such an assertion may be staticly checked (using BUILD_BUG_ON), but I don’t
> think should propagate into the entire code, especially in a non consistent
> way. For example:
>
>> struct bpf_reg_state {
>> 	enum bpf_reg_type type;
>> 	union {
>> -		/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
>> -		s64 imm;
>> -
>> -		/* valid when type == PTR_TO_PACKET* */
>> -		struct {
>> -			u16 off;
>> -			u16 range;
>> -		};
>> +		/* valid when type == PTR_TO_PACKET */
>> +		u32 range;
> I would (personally) prefer range (and offsets) to be u64. I could
> understand if you left the range as u16 (since packet size is limited to
> 64k). But why would it be u32?
I'm not sure; I think I did that so that it would be the same size as the
 struct it's replacing.  In other words, no reason really.  But I will have
 to think about the implications for overflow handling if it's changed.
> Or:
>> -	if (off < 0 || size <= 0 || off + size > reg->range) {
>> +	if (off < 0 || size <= 0 || off > MAX_PACKET_OFF ||
>> +	    off + size > reg->range) {
> Why don’t you check (off + size > max(MAX_PACKET_OFF, reg->range))? Is there
> a reason you ignore size when comparing to MAX_PACKET_OFF ?
Actually, having thought further about this, I think only the check in
 find_good_pkt_pointers() is necessary (though possibly taking account of the
 fixed offset as well as the var_off), since that naturally limits how much
 reg->range we can acquire to MAX_PACKET_OFF.
But you're right that checking off rather than off + size is weird, I don't
 recall why I did that and I can't see a reason for it.

-Ed
Nadav Amit July 12, 2017, 10:07 p.m. UTC | #12
Edward Cree <ecree@solarflare.com> wrote:

> On 07/07/17 18:45, Nadav Amit wrote:
>> For me changes such as:
>> 
>>> if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
>>> -			dst_reg->min_value -= min_val;
>>> +			dst_reg->min_value -= max_val;
>> 
>> are purely cryptic. What happened here? Was there a bug before and if so
>> what are its implications? Why can’t it be in a separate patch?
> In this specific case, there was a bug before: if (say) src and dst were
> both unknown bytes (so range 0 to 255), it would compute the new min and max
> to be 0, so it would think the result is known to be 0.  But that's wrong,
> because it could be anything from -255 to +255.  The bug's implications are
> that it could be used to construct an out-of-range offset to (say) a map
> pointer which the verifier would think was in-range and thus accept.

This sounds like a serious bug that may need to be backported to stable
versions, no? In this case I would assume it should be in a separate patch
so it could be applied separately.

> It might be possible to put it in a separate patch, but in general (not
> necessarily the case here) isolated fixes to range handling break some of
> the existing regression tests.  That is why I ended up doing patch #4,
> because I couldn't find a small fix for the patch #1 test without breaking
> others.  Essentially, this patch started out as just the tnum tracking to
> replace imm and align, and then rolled in everything I had to do to get the
> regression tests to pass again.  So some of those things are fixes that
> could go in earlier patches, but because of the order I wrote it in I'd have
> to disentangle them.  I can do that if it's necessary, but I'm not sure it'd
> really make the patch that much more readable so I'd rather avoid that work
> if I can get away with it…

I clearly understand and can relate to it. Still, your patch really stands
out:

git log --stat kernel/bpf/ | grep -G '^ kernel/bpf' | tr -s " " \
>  | sort -g -r  -k 3 | head -n 10

 kernel/bpf/verifier.c | 1075 ++++++++++++++++++++++++++++++++++++++++++++++++-
 kernel/bpf/bpf_lru_list.c | 567 ++++++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/core.c | 536 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/lpm_trie.c | 503 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c | 441 +++++++++++++++++++++++++++++++++++++++++++++++++-
 kernel/bpf/inode.c | 387 +++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/hashtab.c | 362 +++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c | 329 +++++++++++++++++++++++++++++++++++++++++++++++---
 kernel/bpf/hashtab.c | 275 ++++++++++++++++++++++++++++++++++++++++++---------
 kernel/bpf/verifier.c | 266 +++++++++++++++++++-------------------------------

>> I also think that changes such as:
>>> -	s64 min_value;
>>> -	u64 max_value;
>> [snip]
>>> +	s64 min_value; /* minimum possible (s64)value */
>>> +	u64 max_value; /* maximum possible (u64)value */
>> Should have been avoided. Personally, I find this comment redundant (to say
>> the least). It does not help to reduce the diff size.
> The comment is meaningful, though perhaps badly phrased.  It's an attempt to
> define the semantics of these fields (which previously was unclear); e.g. the
> first one means "minimum value when interpreted as signed", i.e. the (s64) in
> the comment is a cast.
> Apparently those weren't the semantics the original author intended, but I'm
> not sure the original semantics were well-defined and I certainly don't
> understand them well enough to define them, hence why I defined my own here
> (and then redefined them in patch #4).

Makes more sense now.

>> In this regard, I think that refactoring should have been done first and not
>> together with the logic changes. As another example, changing UNKNOWN_VALUE
>> to SCALAR_VALUE should have been a separate, easy to understand patch.
> But SCALAR_VALUE is the union UNKNOWN_VALUE *or* CONST_IMM, and merging those
> together means all of the ripping-out of evaluate_reg_alu() and friends and
> thus depends on much of the rest of the patch.
>>> The latter is also needed for correctness in computing reg->range;
>>> if 'pkt_ptr + offset' could conceivably overflow, then the result
>>> could be < pkt_end without being a valid pointer into the packet.
>>> We thus rely on the assumption that the packet pointer will never be
>>> within MAX_PACKET_OFF of the top of the address space.  (This
>>> assumption is, again, carried over from the existing verifier.)
>> I understand the limitations (I think). I agree that CONST being spillable
>> is not directly related. As for the possible packet offsets/range:
>> intentionally or not you do make some changes that push the 64k packet size
>> limit even deeper into the code. While the packet size should be limited to
>> avoid overflow, IIUC the requirement is that:
>> 
>> 	64 > log(n_insn) + log(MAX_PACKET_OFF) + 1
> I don't think that's right, unless you also make each addition to a packet-
> pointer require a max_value <= MAX_PACKET_OFF.  It's also a very loose bound
> because it assumes every instruction is such an add.

I remembered it was like that before (you could not add to packet-pointer a
value unless the high bits were cleared). I need to revisit the code…

> I think it makes far more sense to do it the way I have done, where the bounds
> are tracked all the way through the arithmetic and then only checked against
> MAX_PACKET_OFF when doing the access (and when doing a test against a
> PTR_TO_PACKET_END, i.e. find_good_pkt_pointers(), though for some reason I
> only added that check in patch #4).
> That way we can allow things like (for the sake of example) adding $BIG_NUMBER
> to a packet pointer and then subtracting it again.
>> Such an assertion may be staticly checked (using BUILD_BUG_ON), but I don’t
>> think should propagate into the entire code, especially in a non consistent
>> way. For example:
>> 
>>> struct bpf_reg_state {
>>> 	enum bpf_reg_type type;
>>> 	union {
>>> -		/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
>>> -		s64 imm;
>>> -
>>> -		/* valid when type == PTR_TO_PACKET* */
>>> -		struct {
>>> -			u16 off;
>>> -			u16 range;
>>> -		};
>>> +		/* valid when type == PTR_TO_PACKET */
>>> +		u32 range;
>> I would (personally) prefer range (and offsets) to be u64. I could
>> understand if you left the range as u16 (since packet size is limited to
>> 64k). But why would it be u32?
> I'm not sure; I think I did that so that it would be the same size as the
> struct it's replacing.  In other words, no reason really.  But I will have
> to think about the implications for overflow handling if it's changed.
>> Or:
>>> -	if (off < 0 || size <= 0 || off + size > reg->range) {
>>> +	if (off < 0 || size <= 0 || off > MAX_PACKET_OFF ||
>>> +	    off + size > reg->range) {
>> Why don’t you check (off + size > max(MAX_PACKET_OFF, reg->range))? Is there
>> a reason you ignore size when comparing to MAX_PACKET_OFF ?
> Actually, having thought further about this, I think only the check in
> find_good_pkt_pointers() is necessary (though possibly taking account of the
> fixed offset as well as the var_off), since that naturally limits how much
> reg->range we can acquire to MAX_PACKET_OFF.
> But you're right that checking off rather than off + size is weird, I don't
> recall why I did that and I can't see a reason for it.

These are the sort of comments that I think are important for good code
quality, but I think you would not get due to the intimidating size of the
patch.

Now, I am not the maintainer or even a serious contributor to the BPF code.
For me, such changes are just a headache when I rebase my private patches. I
will get over them. But breaking patches to small ones is a good practice.
Fixes may need to be backported to old versions. It allows reviewers to
provide useful feedback, make it easier for other developers to resolve
conflicts, allows to easily revert buggy changes, enables to find buggy
patches through bisection and so on. Take it into consideration.

Thanks,
Nadav
Edward Cree July 17, 2017, 5:02 p.m. UTC | #13
On 12/07/17 23:07, Nadav Amit wrote:
> Edward Cree <ecree@solarflare.com> wrote:
>> In this specific case, there was a bug before: if (say) src and dst were
>> both unknown bytes (so range 0 to 255), it would compute the new min and max
>> to be 0, so it would think the result is known to be 0.  But that's wrong,
>> because it could be anything from -255 to +255.  The bug's implications are
>> that it could be used to construct an out-of-range offset to (say) a map
>> pointer which the verifier would think was in-range and thus accept.
> This sounds like a serious bug that may need to be backported to stable
> versions, no? In this case I would assume it should be in a separate patch
> so it could be applied separately.
Having looked deeper into this in attempting to create a test that the existing
 verifier would fail, it turns out that in the existing verifier that BPF_SUB
 handling is dead code.  If (for instance) we subtract an UNKNOWN_VALUE from a
 PTR_TO_MAP_VALUE_ADJ, that code will be run, but afterwards we will
 mark_reg_unknown_value() the register (bottom of check_alu_op()) making our
 previous min/max determination irrelevant.
So there's nothing to backport, and if I did change this in its own patch,
 there'd be no way to test it.  (I have, however, added a test covering this
 codepath in the new verifier.)

-Ed
diff mbox

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index deca4e7..0fc3bbc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -116,35 +116,25 @@  enum bpf_access_type {
 };
 
 /* types of values stored in eBPF registers */
+/* Pointer types represent:
+ * pointer
+ * pointer + imm
+ * pointer + (u16) var
+ * pointer + (u16) var + imm
+ * if (range > 0) then [ptr, ptr + range - off) is safe to access
+ * if (id > 0) means that some 'var' was added
+ * if (off > 0) means that 'imm' was added
+ */
 enum bpf_reg_type {
 	NOT_INIT = 0,		 /* nothing was written into register */
-	UNKNOWN_VALUE,		 /* reg doesn't contain a valid pointer */
+	SCALAR_VALUE,		 /* reg doesn't contain a valid pointer */
 	PTR_TO_CTX,		 /* reg points to bpf_context */
 	CONST_PTR_TO_MAP,	 /* reg points to struct bpf_map */
 	PTR_TO_MAP_VALUE,	 /* reg points to map element value */
 	PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
-	FRAME_PTR,		 /* reg == frame_pointer */
-	PTR_TO_STACK,		 /* reg == frame_pointer + imm */
-	CONST_IMM,		 /* constant integer value */
-
-	/* PTR_TO_PACKET represents:
-	 * skb->data
-	 * skb->data + imm
-	 * skb->data + (u16) var
-	 * skb->data + (u16) var + imm
-	 * if (range > 0) then [ptr, ptr + range - off) is safe to access
-	 * if (id > 0) means that some 'var' was added
-	 * if (off > 0) menas that 'imm' was added
-	 */
-	PTR_TO_PACKET,
+	PTR_TO_STACK,		 /* reg == frame_pointer + offset */
+	PTR_TO_PACKET,		 /* reg points to skb->data */
 	PTR_TO_PACKET_END,	 /* skb->data + headlen */
-
-	/* PTR_TO_MAP_VALUE_ADJ is used for doing pointer math inside of a map
-	 * elem value.  We only allow this if we can statically verify that
-	 * access from this register are going to fall within the size of the
-	 * map element.
-	 */
-	PTR_TO_MAP_VALUE_ADJ,
 };
 
 struct bpf_prog;
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 621076f..ca7e2ce 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -9,6 +9,7 @@ 
 
 #include <linux/bpf.h> /* for enum bpf_reg_type */
 #include <linux/filter.h> /* for MAX_BPF_STACK */
+#include <linux/tnum.h>
 
  /* Just some arbitrary values so we can safely do math without overflowing and
   * are obviously wrong for any sort of memory access.
@@ -19,30 +20,39 @@ 
 struct bpf_reg_state {
 	enum bpf_reg_type type;
 	union {
-		/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
-		s64 imm;
-
-		/* valid when type == PTR_TO_PACKET* */
-		struct {
-			u16 off;
-			u16 range;
-		};
+		/* valid when type == PTR_TO_PACKET */
+		u32 range;
 
 		/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
 		 *   PTR_TO_MAP_VALUE_OR_NULL
 		 */
 		struct bpf_map *map_ptr;
 	};
+	/* Fixed part of pointer offset, pointer types only */
+	s32 off;
+	/* Used to find other pointers with the same variable offset, so they
+	 * can share range knowledge.
+	 * Exception: for PTR_TO_MAP_VALUE_OR_NULL this is used to share which
+	 * map value we came from, when one is tested for != NULL.  Note that
+	 * this overloading means that we can't do pointer arithmetic on a
+	 * PTR_TO_MAP_VALUE_OR_NULL, we have to NULL-check it _first_.
+	 */
 	u32 id;
+	/* These three fields must be last.  See states_equal() */
+	/* For scalar types (SCALAR_VALUE), this represents our knowledge of
+	 * the actual value.
+	 * For pointer types, this represents the variable part of the offset
+	 * from the pointed-to object, and is shared with all bpf_reg_states
+	 * with the same id as us.
+	 */
+	struct tnum var_off;
 	/* Used to determine if any memory access using this register will
-	 * result in a bad access. These two fields must be last.
-	 * See states_equal()
+	 * result in a bad access.
+	 * These refer to the same value as var_off, not necessarily the actual
+	 * contents of the register.
 	 */
-	s64 min_value;
-	u64 max_value;
-	u32 min_align;
-	u32 aux_off;
-	u32 aux_off_align;
+	s64 min_value; /* minimum possible (s64)value */
+	u64 max_value; /* maximum possible (u64)value */
 };
 
 enum bpf_stack_slot_type {
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
new file mode 100644
index 0000000..a0b07bf
--- /dev/null
+++ b/include/linux/tnum.h
@@ -0,0 +1,79 @@ 
+/* tnum: tracked (or tristate) numbers
+ *
+ * A tnum tracks knowledge about the bits of a value.  Each bit can be either
+ * known (0 or 1), or unknown (x).  Arithmetic operations on tnums will
+ * propagate the unknown bits such that the tnum result represents all the
+ * possible results for possible values of the operands.
+ */
+#include <linux/types.h>
+
+struct tnum {
+	u64 value;
+	u64 mask;
+};
+
+/* Constructors */
+/* Represent a known constant as a tnum. */
+struct tnum tnum_const(u64 value);
+/* A completely unknown value */
+extern const struct tnum tnum_unknown;
+
+/* Arithmetic and logical ops */
+/* Shift a tnum left (by a fixed shift) */
+struct tnum tnum_lshift(struct tnum a, u8 shift);
+/* Shift a tnum right (by a fixed shift) */
+struct tnum tnum_rshift(struct tnum a, u8 shift);
+/* Add two tnums, return @a + @b */
+struct tnum tnum_add(struct tnum a, struct tnum b);
+/* Subtract two tnums, return @a - @b */
+struct tnum tnum_sub(struct tnum a, struct tnum b);
+/* Bitwise-AND, return @a & @b */
+struct tnum tnum_and(struct tnum a, struct tnum b);
+/* Bitwise-OR, return @a | @b */
+struct tnum tnum_or(struct tnum a, struct tnum b);
+/* Bitwise-XOR, return @a ^ @b */
+struct tnum tnum_xor(struct tnum a, struct tnum b);
+/* Multiply two tnums, return @a * @b */
+struct tnum tnum_mul(struct tnum a, struct tnum b);
+
+/* Return a tnum representing numbers satisfying both @a and @b */
+struct tnum tnum_intersect(struct tnum a, struct tnum b);
+
+/* Return @a with all but the lowest @size bytes cleared */
+struct tnum tnum_cast(struct tnum a, u8 size);
+
+/* Returns true if @a is a known constant */
+static inline bool tnum_is_const(struct tnum a)
+{
+	return !a.mask;
+}
+
+/* Returns true if @a == tnum_const(@b) */
+static inline bool tnum_equals_const(struct tnum a, u64 b)
+{
+	return tnum_is_const(a) && a.value == b;
+}
+
+/* Returns true if @a is completely unknown */
+static inline bool tnum_is_unknown(struct tnum a)
+{
+	return !~a.mask;
+}
+
+/* Returns true if @a is known to be a multiple of @size.
+ * @size must be a power of two.
+ */
+bool tnum_is_aligned(struct tnum a, u64 size);
+
+/* Returns true if @b represents a subset of @a. */
+bool tnum_in(struct tnum a, struct tnum b);
+
+/* Formatting functions.  These have snprintf-like semantics: they will write
+ * up to @size bytes (including the terminating NUL byte), and return the number
+ * of bytes (excluding the terminating NUL) which would have been written had
+ * sufficient space been available.  (Thus tnum_sbin always returns 64.)
+ */
+/* Format a tnum as a pair of hex numbers (value; mask) */
+int tnum_strn(char *str, size_t size, struct tnum a);
+/* Format a tnum as tristate binary expansion */
+int tnum_sbin(char *str, size_t size, struct tnum a);
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e1e5e65..df14def 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,6 +1,6 @@ 
 obj-y := core.o
 
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
 ifeq ($(CONFIG_PERF_EVENTS),y)
 obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
new file mode 100644
index 0000000..803bd0d
--- /dev/null
+++ b/kernel/bpf/tnum.c
@@ -0,0 +1,163 @@ 
+/* tnum: tracked (or tristate) numbers
+ *
+ * A tnum tracks knowledge about the bits of a value.  Each bit can be either
+ * known (0 or 1), or unknown (x).  Arithmetic operations on tnums will
+ * propagate the unknown bits such that the tnum result represents all the
+ * possible results for possible values of the operands.
+ */
+#include <linux/kernel.h>
+#include <linux/tnum.h>
+
+#define TNUM(_v, _m)	(struct tnum){.value = _v, .mask = _m}
+/* A completely unknown value */
+const struct tnum tnum_unknown = { .value = 0, .mask = -1 };
+
+struct tnum tnum_const(u64 value)
+{
+	return TNUM(value, 0);
+}
+
+struct tnum tnum_lshift(struct tnum a, u8 shift)
+{
+	return TNUM(a.value << shift, a.mask << shift);
+}
+
+struct tnum tnum_rshift(struct tnum a, u8 shift)
+{
+	return TNUM(a.value >> shift, a.mask >> shift);
+}
+
+struct tnum tnum_add(struct tnum a, struct tnum b)
+{
+	u64 sm, sv, sigma, chi, mu;
+
+	sm = a.mask + b.mask;
+	sv = a.value + b.value;
+	sigma = sm + sv;
+	chi = sigma ^ sv;
+	mu = chi | a.mask | b.mask;
+	return TNUM(sv & ~mu, mu);
+}
+
+struct tnum tnum_sub(struct tnum a, struct tnum b)
+{
+	u64 dv, alpha, beta, chi, mu;
+
+	dv = a.value - b.value;
+	alpha = dv + a.mask;
+	beta = dv - b.mask;
+	chi = alpha ^ beta;
+	mu = chi | a.mask | b.mask;
+	return TNUM(dv & ~mu, mu);
+}
+
+struct tnum tnum_and(struct tnum a, struct tnum b)
+{
+	u64 alpha, beta, v;
+
+	alpha = a.value | a.mask;
+	beta = b.value | b.mask;
+	v = a.value & b.value;
+	return TNUM(v, alpha & beta & ~v);
+}
+
+struct tnum tnum_or(struct tnum a, struct tnum b)
+{
+	u64 v, mu;
+
+	v = a.value | b.value;
+	mu = a.mask | b.mask;
+	return TNUM(v, mu & ~v);
+}
+
+struct tnum tnum_xor(struct tnum a, struct tnum b)
+{
+	u64 v, mu;
+
+	v = a.value ^ b.value;
+	mu = a.mask | b.mask;
+	return TNUM(v & ~mu, mu);
+}
+
+/* half-multiply add: acc += (unknown * mask * value).
+ * An intermediate step in the multiply algorithm.
+ */
+static struct tnum hma(struct tnum acc, u64 value, u64 mask)
+{
+	while (mask) {
+		if (mask & 1)
+			acc = tnum_add(acc, TNUM(0, value));
+		mask >>= 1;
+		value <<= 1;
+	}
+	return acc;
+}
+
+struct tnum tnum_mul(struct tnum a, struct tnum b)
+{
+	struct tnum acc;
+	u64 pi;
+
+	pi = a.value * b.value;
+	acc = hma(TNUM(pi, 0), a.mask, b.mask | b.value);
+	return hma(acc, b.mask, a.value);
+}
+
+/* Note that if a and b disagree - i.e. one has a 'known 1' where the other has
+ * a 'known 0' - this will return a 'known 1' for that bit.
+ */
+struct tnum tnum_intersect(struct tnum a, struct tnum b)
+{
+	u64 v, mu;
+
+	v = a.value | b.value;
+	mu = a.mask & b.mask;
+	return TNUM(v & ~mu, mu);
+}
+
+struct tnum tnum_cast(struct tnum a, u8 size)
+{
+	a.value &= (1ULL << (size * 8)) - 1;
+	a.mask &= (1ULL << (size * 8)) - 1;
+	return a;
+}
+
+bool tnum_is_aligned(struct tnum a, u64 size)
+{
+	if (!size)
+		return true;
+	return !((a.value | a.mask) & (size - 1));
+}
+
+bool tnum_in(struct tnum a, struct tnum b)
+{
+	if (b.mask & ~a.mask)
+		return false;
+	b.value &= ~a.mask;
+	return a.value == b.value;
+}
+
+int tnum_strn(char *str, size_t size, struct tnum a)
+{
+	return snprintf(str, size, "(%#llx; %#llx)", a.value, a.mask);
+}
+
+int tnum_sbin(char *str, size_t size, struct tnum a)
+{
+	size_t n;
+
+	for (n = 64; n; n--) {
+		if (n < size) {
+			if (a.mask & 1)
+				str[n - 1] = 'x';
+			else if (a.value & 1)
+				str[n - 1] = '1';
+			else
+				str[n - 1] = '0';
+		}
+		a.mask >>= 1;
+		a.value >>= 1;
+	}
+	str[min(size - 1, (size_t)64)] = 0;
+	return 64;
+}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 74ea96e..82823f1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -61,12 +61,12 @@ 
  * (and -20 constant is saved for further stack bounds checking).
  * Meaning that this reg is a pointer to stack plus known immediate constant.
  *
- * Most of the time the registers have UNKNOWN_VALUE type, which
+ * Most of the time the registers have SCALAR_VALUE type, which
  * means the register has some value, but it's not a valid pointer.
- * (like pointer plus pointer becomes UNKNOWN_VALUE type)
+ * (like pointer plus pointer becomes SCALAR_VALUE type)
  *
  * When verifier sees load or store instructions the type of base register
- * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, FRAME_PTR. These are three pointer
+ * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
  * types recognized by check_mem_access() function.
  *
  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
@@ -180,15 +180,12 @@  static __printf(1, 2) void verbose(const char *fmt, ...)
 /* string representation of 'enum bpf_reg_type' */
 static const char * const reg_type_str[] = {
 	[NOT_INIT]		= "?",
-	[UNKNOWN_VALUE]		= "inv",
+	[SCALAR_VALUE]		= "inv",
 	[PTR_TO_CTX]		= "ctx",
 	[CONST_PTR_TO_MAP]	= "map_ptr",
 	[PTR_TO_MAP_VALUE]	= "map_value",
 	[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
-	[PTR_TO_MAP_VALUE_ADJ]	= "map_value_adj",
-	[FRAME_PTR]		= "fp",
 	[PTR_TO_STACK]		= "fp",
-	[CONST_IMM]		= "imm",
 	[PTR_TO_PACKET]		= "pkt",
 	[PTR_TO_PACKET_END]	= "pkt_end",
 };
@@ -221,32 +218,36 @@  static void print_verifier_state(struct bpf_verifier_state *state)
 		if (t == NOT_INIT)
 			continue;
 		verbose(" R%d=%s", i, reg_type_str[t]);
-		if (t == CONST_IMM || t == PTR_TO_STACK)
-			verbose("%lld", reg->imm);
-		else if (t == PTR_TO_PACKET)
-			verbose("(id=%d,off=%d,r=%d)",
-				reg->id, reg->off, reg->range);
-		else if (t == UNKNOWN_VALUE && reg->imm)
-			verbose("%lld", reg->imm);
-		else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
-			 t == PTR_TO_MAP_VALUE_OR_NULL ||
-			 t == PTR_TO_MAP_VALUE_ADJ)
-			verbose("(ks=%d,vs=%d,id=%u)",
-				reg->map_ptr->key_size,
-				reg->map_ptr->value_size,
-				reg->id);
-		if (reg->min_value != BPF_REGISTER_MIN_RANGE)
-			verbose(",min_value=%lld",
-				(long long)reg->min_value);
-		if (reg->max_value != BPF_REGISTER_MAX_RANGE)
-			verbose(",max_value=%llu",
-				(unsigned long long)reg->max_value);
-		if (reg->min_align)
-			verbose(",min_align=%u", reg->min_align);
-		if (reg->aux_off)
-			verbose(",aux_off=%u", reg->aux_off);
-		if (reg->aux_off_align)
-			verbose(",aux_off_align=%u", reg->aux_off_align);
+		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
+		    tnum_is_const(reg->var_off)) {
+			/* reg->off should be 0 for SCALAR_VALUE */
+			verbose("%lld", reg->var_off.value + reg->off);
+		} else {
+			verbose("(id=%d", reg->id);
+			if (t != SCALAR_VALUE)
+				verbose(",off=%d", reg->off);
+			if (t == PTR_TO_PACKET)
+				verbose(",r=%d", reg->range);
+			else if (t == CONST_PTR_TO_MAP ||
+				 t == PTR_TO_MAP_VALUE ||
+				 t == PTR_TO_MAP_VALUE_OR_NULL)
+				verbose(",ks=%d,vs=%d",
+					reg->map_ptr->key_size,
+					reg->map_ptr->value_size);
+			if (reg->min_value != BPF_REGISTER_MIN_RANGE)
+				verbose(",min_value=%lld",
+					(long long)reg->min_value);
+			if (reg->max_value != BPF_REGISTER_MAX_RANGE)
+				verbose(",max_value=%llu",
+					(unsigned long long)reg->max_value);
+			if (!tnum_is_unknown(reg->var_off)) {
+				char tn_buf[48];
+
+				tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+				verbose(",var_off=%s", tn_buf);
+			}
+			verbose(")");
+		}
 	}
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] == STACK_SPILL)
@@ -463,55 +464,85 @@  static const int caller_saved[CALLER_SAVED_REGS] = {
 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
 
-static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno)
-{
-	BUG_ON(regno >= MAX_BPF_REG);
+static void __mark_reg_not_init(struct bpf_reg_state *reg);
 
-	memset(&regs[regno], 0, sizeof(regs[regno]));
-	regs[regno].type = NOT_INIT;
-	regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
-	regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+/* Mark the 'variable offset' part of a register as zero.  This should be
+ * used only on registers holding a pointer type.
+ */
+static void __mark_reg_known_zero(struct bpf_reg_state *reg)
+{
+	reg->var_off = tnum_const(0);
+	reg->min_value = 0;
+	reg->max_value = 0;
 }
 
-static void init_reg_state(struct bpf_reg_state *regs)
+static void mark_reg_known_zero(struct bpf_reg_state *regs, u32 regno)
 {
-	int i;
-
-	for (i = 0; i < MAX_BPF_REG; i++)
-		mark_reg_not_init(regs, i);
-
-	/* frame pointer */
-	regs[BPF_REG_FP].type = FRAME_PTR;
+	if (WARN_ON(regno >= MAX_BPF_REG)) {
+		verbose("mark_reg_known_zero(regs, %u)\n", regno);
+		/* Something bad happened, let's kill all regs */
+		for (regno = 0; regno < MAX_BPF_REG; regno++)
+			__mark_reg_not_init(regs + regno);
+		return;
+	}
+	__mark_reg_known_zero(regs + regno);
+}
 
-	/* 1st arg to a function */
-	regs[BPF_REG_1].type = PTR_TO_CTX;
+/* Mark a register as having a completely unknown (scalar) value. */
+static void __mark_reg_unknown(struct bpf_reg_state *reg)
+{
+	reg->type = SCALAR_VALUE;
+	reg->id = 0;
+	reg->off = 0;
+	reg->var_off = tnum_unknown;
+	reg->min_value = BPF_REGISTER_MIN_RANGE;
+	reg->max_value = BPF_REGISTER_MAX_RANGE;
 }
 
-static void __mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
+static void mark_reg_unknown(struct bpf_reg_state *regs, u32 regno)
 {
-	regs[regno].type = UNKNOWN_VALUE;
-	regs[regno].id = 0;
-	regs[regno].imm = 0;
+	if (WARN_ON(regno >= MAX_BPF_REG)) {
+		verbose("mark_reg_unknown(regs, %u)\n", regno);
+		/* Something bad happened, let's kill all regs */
+		for (regno = 0; regno < MAX_BPF_REG; regno++)
+			__mark_reg_not_init(regs + regno);
+		return;
+	}
+	__mark_reg_unknown(regs + regno);
 }
 
-static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
+static void __mark_reg_not_init(struct bpf_reg_state *reg)
 {
-	BUG_ON(regno >= MAX_BPF_REG);
-	__mark_reg_unknown_value(regs, regno);
+	__mark_reg_unknown(reg);
+	reg->type = NOT_INIT;
 }
 
-static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
+static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno)
 {
-	regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
-	regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
-	regs[regno].min_align = 0;
+	if (WARN_ON(regno >= MAX_BPF_REG)) {
+		verbose("mark_reg_not_init(regs, %u)\n", regno);
+		/* Something bad happened, let's kill all regs */
+		for (regno = 0; regno < MAX_BPF_REG; regno++)
+			__mark_reg_not_init(regs + regno);
+		return;
+	}
+	__mark_reg_not_init(regs + regno);
 }
 
-static void mark_reg_unknown_value_and_range(struct bpf_reg_state *regs,
-					     u32 regno)
+static void init_reg_state(struct bpf_reg_state *regs)
 {
-	mark_reg_unknown_value(regs, regno);
-	reset_reg_range_values(regs, regno);
+	int i;
+
+	for (i = 0; i < MAX_BPF_REG; i++)
+		mark_reg_not_init(regs, i);
+
+	/* frame pointer */
+	regs[BPF_REG_FP].type = PTR_TO_STACK;
+	mark_reg_known_zero(regs, BPF_REG_FP);
+
+	/* 1st arg to a function */
+	regs[BPF_REG_1].type = PTR_TO_CTX;
+	mark_reg_known_zero(regs, BPF_REG_1);
 }
 
 enum reg_arg_type {
@@ -541,7 +572,7 @@  static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
 			return -EACCES;
 		}
 		if (t == DST_OP)
-			mark_reg_unknown_value(regs, regno);
+			mark_reg_unknown(regs, regno);
 	}
 	return 0;
 }
@@ -565,12 +596,10 @@  static bool is_spillable_regtype(enum bpf_reg_type type)
 	switch (type) {
 	case PTR_TO_MAP_VALUE:
 	case PTR_TO_MAP_VALUE_OR_NULL:
-	case PTR_TO_MAP_VALUE_ADJ:
 	case PTR_TO_STACK:
 	case PTR_TO_CTX:
 	case PTR_TO_PACKET:
 	case PTR_TO_PACKET_END:
-	case FRAME_PTR:
 	case CONST_PTR_TO_MAP:
 		return true;
 	default:
@@ -650,14 +679,13 @@  static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
 		}
 		if (value_regno >= 0)
 			/* have read misc data from the stack */
-			mark_reg_unknown_value_and_range(state->regs,
-							 value_regno);
+			mark_reg_unknown(state->regs, value_regno);
 		return 0;
 	}
 }
 
 /* check read/write into map element returned by bpf_map_lookup_elem() */
-static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
+static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
 			    int size)
 {
 	struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
@@ -670,22 +698,25 @@  static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
 	return 0;
 }
 
-/* check read/write into an adjusted map element */
-static int check_map_access_adj(struct bpf_verifier_env *env, u32 regno,
+/* check read/write into a map element with possible variable offset */
+static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 				int off, int size)
 {
 	struct bpf_verifier_state *state = &env->cur_state;
 	struct bpf_reg_state *reg = &state->regs[regno];
 	int err;
 
-	/* We adjusted the register to this map value, so we
-	 * need to change off and size to min_value and max_value
-	 * respectively to make sure our theoretical access will be
-	 * safe.
+	/* We may have adjusted the register to this map value, so we
+	 * need to try adding each of min_value and max_value to off
+	 * to make sure our theoretical access will be safe.
 	 */
 	if (log_level)
 		print_verifier_state(state);
-	env->varlen_map_value_access = true;
+	/* If the offset is variable, we will need to be stricter in state
+	 * pruning from now on.
+	 */
+	if (!tnum_is_const(reg->var_off))
+		env->varlen_map_value_access = true;
 	/* The minimum value is only important with signed
 	 * comparisons where we can't assume the floor of a
 	 * value is 0.  If we are using signed variables for our
@@ -697,10 +728,9 @@  static int check_map_access_adj(struct bpf_verifier_env *env, u32 regno,
 			regno);
 		return -EACCES;
 	}
-	err = check_map_access(env, regno, reg->min_value + off, size);
+	err = __check_map_access(env, regno, reg->min_value + off, size);
 	if (err) {
-		verbose("R%d min value is outside of the array range\n",
-			regno);
+		verbose("R%d min value is outside of the array range\n", regno);
 		return err;
 	}
 
@@ -712,7 +742,10 @@  static int check_map_access_adj(struct bpf_verifier_env *env, u32 regno,
 			regno);
 		return -EACCES;
 	}
-	return check_map_access(env, regno, reg->max_value + off, size);
+	err = __check_map_access(env, regno, reg->max_value + off, size);
+	if (err)
+		verbose("R%d max value is outside of the array range\n", regno);
+	return err;
 }
 
 #define MAX_PACKET_OFF 0xffff
@@ -742,14 +775,14 @@  static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 	}
 }
 
-static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
-			       int size)
+static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
+				 int off, int size)
 {
 	struct bpf_reg_state *regs = env->cur_state.regs;
 	struct bpf_reg_state *reg = &regs[regno];
 
-	off += reg->off;
-	if (off < 0 || size <= 0 || off + size > reg->range) {
+	if (off < 0 || size <= 0 || off > MAX_PACKET_OFF ||
+	    off + size > reg->range) {
 		verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
 			off, size, regno, reg->id, reg->off, reg->range);
 		return -EACCES;
@@ -757,7 +790,35 @@  static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 	return 0;
 }
 
-/* check access to 'struct bpf_context' fields */
+static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
+			       int size)
+{
+	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *reg = &regs[regno];
+	int err;
+
+	/* We may have added a variable offset to the packet pointer; but any
+	 * reg->range we have comes after that.  We are only checking the fixed
+	 * offset.
+	 */
+
+	/* We don't allow negative numbers, because we aren't tracking enough
+	 * detail to prove they're safe.
+	 */
+	if (reg->min_value < 0) {
+		verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+			regno);
+		return -EACCES;
+	}
+	err = __check_packet_access(env, regno, off, size);
+	if (err) {
+		verbose("R%d offset is outside of the packet\n", regno);
+		return err;
+	}
+	return err;
+}
+
+/* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
 			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
 {
@@ -805,35 +866,19 @@  static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
 	if (env->allow_ptr_leaks)
 		return false;
 
-	switch (env->cur_state.regs[regno].type) {
-	case UNKNOWN_VALUE:
-	case CONST_IMM:
-		return false;
-	default:
-		return true;
-	}
+	return env->cur_state.regs[regno].type != SCALAR_VALUE;
 }
 
 static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
 				   int off, int size, bool strict)
 {
+	struct tnum reg_off;
 	int ip_align;
-	int reg_off;
 
 	/* Byte size accesses are always allowed. */
 	if (!strict || size == 1)
 		return 0;
 
-	reg_off = reg->off;
-	if (reg->id) {
-		if (reg->aux_off_align % size) {
-			verbose("Packet access is only %u byte aligned, %d byte access not allowed\n",
-				reg->aux_off_align, size);
-			return -EACCES;
-		}
-		reg_off += reg->aux_off;
-	}
-
 	/* For platforms that do not have a Kconfig enabling
 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
@@ -843,20 +888,37 @@  static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
 	 * unconditional IP align value of '2'.
 	 */
 	ip_align = 2;
-	if ((ip_align + reg_off + off) % size != 0) {
-		verbose("misaligned packet access off %d+%d+%d size %d\n",
-			ip_align, reg_off, off, size);
+
+	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
+	if (!tnum_is_aligned(reg_off, size)) {
+		char tn_buf[48];
+
+		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+		verbose("misaligned packet access off %d+%s+%d+%d size %d\n",
+			ip_align, tn_buf, reg->off, off, size);
 		return -EACCES;
 	}
 
 	return 0;
 }
 
-static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
-				   int size, bool strict)
+static int check_generic_ptr_alignment(const struct bpf_reg_state *reg,
+				       const char *pointer_desc,
+				       int off, int size, bool strict)
 {
-	if (strict && size != 1) {
-		verbose("Unknown alignment. Only byte-sized access allowed in value access.\n");
+	struct tnum reg_off;
+
+	/* Byte size accesses are always allowed. */
+	if (!strict || size == 1)
+		return 0;
+
+	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
+	if (!tnum_is_aligned(reg_off, size)) {
+		char tn_buf[48];
+
+		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+		verbose("misaligned %saccess off %s+%d+%d size %d\n",
+			pointer_desc, tn_buf, reg->off, off, size);
 		return -EACCES;
 	}
 
@@ -868,21 +930,25 @@  static int check_ptr_alignment(struct bpf_verifier_env *env,
 			       int off, int size)
 {
 	bool strict = env->strict_alignment;
+	const char *pointer_desc = "";
 
 	switch (reg->type) {
 	case PTR_TO_PACKET:
+		/* special case, because of NET_IP_ALIGN */
 		return check_pkt_ptr_alignment(reg, off, size, strict);
-	case PTR_TO_MAP_VALUE_ADJ:
-		return check_val_ptr_alignment(reg, size, strict);
+	case PTR_TO_MAP_VALUE:
+		pointer_desc = "value ";
+		break;
+	case PTR_TO_CTX:
+		pointer_desc = "context ";
+		break;
+	case PTR_TO_STACK:
+		pointer_desc = "stack ";
+		break;
 	default:
-		if (off % size != 0) {
-			verbose("misaligned access off %d size %d\n",
-				off, size);
-			return -EACCES;
-		}
-
-		return 0;
+		break;
 	}
+	return check_generic_ptr_alignment(reg, pointer_desc, off, size, strict);
 }
 
 /* check whether memory at (regno + off) is accessible for t = (read | write)
@@ -899,52 +965,79 @@  static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 	struct bpf_reg_state *reg = &state->regs[regno];
 	int size, err = 0;
 
-	if (reg->type == PTR_TO_STACK)
-		off += reg->imm;
-
 	size = bpf_size_to_bytes(bpf_size);
 	if (size < 0)
 		return size;
 
+	/* alignment checks will add in reg->off themselves */
 	err = check_ptr_alignment(env, reg, off, size);
 	if (err)
 		return err;
 
-	if (reg->type == PTR_TO_MAP_VALUE ||
-	    reg->type == PTR_TO_MAP_VALUE_ADJ) {
+	/* for access checks, reg->off is just part of off */
+	off += reg->off;
+
+	if (reg->type == PTR_TO_MAP_VALUE) {
 		if (t == BPF_WRITE && value_regno >= 0 &&
 		    is_pointer_value(env, value_regno)) {
 			verbose("R%d leaks addr into map\n", value_regno);
 			return -EACCES;
 		}
 
-		if (reg->type == PTR_TO_MAP_VALUE_ADJ)
-			err = check_map_access_adj(env, regno, off, size);
-		else
-			err = check_map_access(env, regno, off, size);
+		err = check_map_access(env, regno, off, size);
 		if (!err && t == BPF_READ && value_regno >= 0)
-			mark_reg_unknown_value_and_range(state->regs,
-							 value_regno);
+			mark_reg_unknown(state->regs, value_regno);
 
 	} else if (reg->type == PTR_TO_CTX) {
-		enum bpf_reg_type reg_type = UNKNOWN_VALUE;
+		enum bpf_reg_type reg_type = SCALAR_VALUE;
 
 		if (t == BPF_WRITE && value_regno >= 0 &&
 		    is_pointer_value(env, value_regno)) {
 			verbose("R%d leaks addr into ctx\n", value_regno);
 			return -EACCES;
 		}
+		/* ctx accesses must be at a fixed offset, so that we can
+		 * determine what type of data were returned.
+		 */
+		if (!tnum_is_const(reg->var_off)) {
+			char tn_buf[48];
+
+			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+			verbose("variable ctx access var_off=%s off=%d size=%d",
+				tn_buf, off, size);
+			return -EACCES;
+		}
+		off += reg->var_off.value;
 		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
 		if (!err && t == BPF_READ && value_regno >= 0) {
-			mark_reg_unknown_value_and_range(state->regs,
-							 value_regno);
-			/* note that reg.[id|off|range] == 0 */
+			/* ctx access returns either a scalar, or a
+			 * PTR_TO_PACKET[_END].  In the latter case, we know
+			 * the offset is zero.
+			 */
+			if (reg_type == SCALAR_VALUE)
+				mark_reg_unknown(state->regs, value_regno);
+			else
+				mark_reg_known_zero(state->regs, value_regno);
+			state->regs[value_regno].id = 0;
+			state->regs[value_regno].off = 0;
+			state->regs[value_regno].range = 0;
 			state->regs[value_regno].type = reg_type;
-			state->regs[value_regno].aux_off = 0;
-			state->regs[value_regno].aux_off_align = 0;
 		}
 
-	} else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
+	} else if (reg->type == PTR_TO_STACK) {
+		/* stack accesses must be at a fixed offset, so that we can
+		 * determine what type of data were returned.
+		 * See check_stack_read().
+		 */
+		if (!tnum_is_const(reg->var_off)) {
+			char tn_buf[48];
+
+			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+			verbose("variable stack access var_off=%s off=%d size=%d",
+				tn_buf, off, size);
+			return -EACCES;
+		}
+		off += reg->var_off.value;
 		if (off >= 0 || off < -MAX_BPF_STACK) {
 			verbose("invalid stack off=%d size=%d\n", off, size);
 			return -EACCES;
@@ -964,7 +1057,7 @@  static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		} else {
 			err = check_stack_read(state, off, size, value_regno);
 		}
-	} else if (state->regs[regno].type == PTR_TO_PACKET) {
+	} else if (reg->type == PTR_TO_PACKET) {
 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
 			verbose("cannot write into packet\n");
 			return -EACCES;
@@ -976,21 +1069,24 @@  static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		}
 		err = check_packet_access(env, regno, off, size);
 		if (!err && t == BPF_READ && value_regno >= 0)
-			mark_reg_unknown_value_and_range(state->regs,
-							 value_regno);
+			mark_reg_unknown(state->regs, value_regno);
 	} else {
 		verbose("R%d invalid mem access '%s'\n",
 			regno, reg_type_str[reg->type]);
 		return -EACCES;
 	}
 
-	if (!err && size <= 2 && value_regno >= 0 && env->allow_ptr_leaks &&
-	    state->regs[value_regno].type == UNKNOWN_VALUE) {
-		/* 1 or 2 byte load zero-extends, determine the number of
-		 * zero upper bits. Not doing it fo 4 byte load, since
-		 * such values cannot be added to ptr_to_packet anyway.
-		 */
-		state->regs[value_regno].imm = 64 - size * 8;
+	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
+	    state->regs[value_regno].type == SCALAR_VALUE) {
+		/* b/h/w load zero-extends, mark upper bits as known 0 */
+		state->regs[value_regno].var_off = tnum_cast(
+					state->regs[value_regno].var_off, size);
+		/* sign bit is known zero, so we can bound the value */
+		state->regs[value_regno].min_value = 0;
+		state->regs[value_regno].max_value = min_t(u64,
+					state->regs[value_regno].var_off.value |
+					state->regs[value_regno].var_off.mask,
+					BPF_REGISTER_MAX_RANGE);
 	}
 	return err;
 }
@@ -1027,9 +1123,17 @@  static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
 				BPF_SIZE(insn->code), BPF_WRITE, -1);
 }
 
+/* Does this register contain a constant zero? */
+static bool register_is_null(struct bpf_reg_state reg)
+{
+	return reg.type == SCALAR_VALUE && tnum_equals_const(reg.var_off, 0);
+}
+
 /* when register 'regno' is passed into function that will read 'access_size'
  * bytes from that pointer, make sure that it's within stack boundary
- * and all elements of stack are initialized
+ * and all elements of stack are initialized.
+ * Unlike most pointer bounds-checking functions, this one doesn't take an
+ * 'off' argument, so it has to add in reg->off itself.
  */
 static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 				int access_size, bool zero_size_allowed,
@@ -1040,9 +1144,9 @@  static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 	int off, i;
 
 	if (regs[regno].type != PTR_TO_STACK) {
+		/* Allow zero-byte read from NULL, regardless of pointer type */
 		if (zero_size_allowed && access_size == 0 &&
-		    regs[regno].type == CONST_IMM &&
-		    regs[regno].imm  == 0)
+		    register_is_null(regs[regno]))
 			return 0;
 
 		verbose("R%d type=%s expected=%s\n", regno,
@@ -1051,7 +1155,15 @@  static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 		return -EACCES;
 	}
 
-	off = regs[regno].imm;
+	/* Only allow fixed-offset stack reads */
+	if (!tnum_is_const(regs[regno].var_off)) {
+		char tn_buf[48];
+
+		tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
+		verbose("invalid variable stack read R%d var_off=%s\n",
+			regno, tn_buf);
+	}
+	off = regs[regno].off + regs[regno].var_off.value;
 	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
 	    access_size <= 0) {
 		verbose("invalid stack type R%d off=%d access_size=%d\n",
@@ -1082,16 +1194,14 @@  static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 				   int access_size, bool zero_size_allowed,
 				   struct bpf_call_arg_meta *meta)
 {
-	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
 
-	switch (regs[regno].type) {
+	switch (reg->type) {
 	case PTR_TO_PACKET:
-		return check_packet_access(env, regno, 0, access_size);
+		return check_packet_access(env, regno, reg->off, access_size);
 	case PTR_TO_MAP_VALUE:
-		return check_map_access(env, regno, 0, access_size);
-	case PTR_TO_MAP_VALUE_ADJ:
-		return check_map_access_adj(env, regno, 0, access_size);
-	default: /* const_imm|ptr_to_stack or invalid ptr */
+		return check_map_access(env, regno, reg->off, access_size);
+	default: /* scalar_value|ptr_to_stack or invalid ptr */
 		return check_stack_boundary(env, regno, access_size,
 					    zero_size_allowed, meta);
 	}
@@ -1134,11 +1244,8 @@  static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			goto err_type;
 	} else if (arg_type == ARG_CONST_SIZE ||
 		   arg_type == ARG_CONST_SIZE_OR_ZERO) {
-		expected_type = CONST_IMM;
-		/* One exception. Allow UNKNOWN_VALUE registers when the
-		 * boundaries are known and don't cause unsafe memory accesses
-		 */
-		if (type != UNKNOWN_VALUE && type != expected_type)
+		expected_type = SCALAR_VALUE;
+		if (type != expected_type)
 			goto err_type;
 	} else if (arg_type == ARG_CONST_MAP_PTR) {
 		expected_type = CONST_PTR_TO_MAP;
@@ -1152,13 +1259,13 @@  static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 		   arg_type == ARG_PTR_TO_UNINIT_MEM) {
 		expected_type = PTR_TO_STACK;
 		/* One exception here. In case function allows for NULL to be
-		 * passed in as argument, it's a CONST_IMM type. Final test
+		 * passed in as argument, it's a SCALAR_VALUE type. Final test
 		 * happens during stack boundary checking.
 		 */
-		if (type == CONST_IMM && reg->imm == 0)
+		if (register_is_null(*reg))
 			/* final test in check_stack_boundary() */;
 		else if (type != PTR_TO_PACKET && type != PTR_TO_MAP_VALUE &&
-			 type != PTR_TO_MAP_VALUE_ADJ && type != expected_type)
+			 type != expected_type)
 			goto err_type;
 		meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
 	} else {
@@ -1184,7 +1291,7 @@  static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			return -EACCES;
 		}
 		if (type == PTR_TO_PACKET)
-			err = check_packet_access(env, regno, 0,
+			err = check_packet_access(env, regno, reg->off,
 						  meta->map_ptr->key_size);
 		else
 			err = check_stack_boundary(env, regno,
@@ -1200,7 +1307,7 @@  static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			return -EACCES;
 		}
 		if (type == PTR_TO_PACKET)
-			err = check_packet_access(env, regno, 0,
+			err = check_packet_access(env, regno, reg->off,
 						  meta->map_ptr->value_size);
 		else
 			err = check_stack_boundary(env, regno,
@@ -1220,10 +1327,11 @@  static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			return -EACCES;
 		}
 
-		/* If the register is UNKNOWN_VALUE, the access check happens
-		 * using its boundaries. Otherwise, just use its imm
+		/* The register is SCALAR_VALUE; the access check
+		 * happens using its boundaries.
 		 */
-		if (type == UNKNOWN_VALUE) {
+
+		if (!tnum_is_const(reg->var_off))
 			/* For unprivileged variable accesses, disable raw
 			 * mode so that the program is required to
 			 * initialize all the memory that the helper could
@@ -1231,35 +1339,28 @@  static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			 */
 			meta = NULL;
 
-			if (reg->min_value < 0) {
-				verbose("R%d min value is negative, either use unsigned or 'var &= const'\n",
-					regno);
-				return -EACCES;
-			}
-
-			if (reg->min_value == 0) {
-				err = check_helper_mem_access(env, regno - 1, 0,
-							      zero_size_allowed,
-							      meta);
-				if (err)
-					return err;
-			}
+		if (reg->min_value < 0) {
+			verbose("R%d min value is negative, either use unsigned or 'var &= const'\n",
+				regno);
+			return -EACCES;
+		}
 
-			if (reg->max_value == BPF_REGISTER_MAX_RANGE) {
-				verbose("R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
-					regno);
-				return -EACCES;
-			}
-			err = check_helper_mem_access(env, regno - 1,
-						      reg->max_value,
-						      zero_size_allowed, meta);
+		if (reg->min_value == 0) {
+			err = check_helper_mem_access(env, regno - 1, 0,
+						      zero_size_allowed,
+						      meta);
 			if (err)
 				return err;
-		} else {
-			/* register is CONST_IMM */
-			err = check_helper_mem_access(env, regno - 1, reg->imm,
-						      zero_size_allowed, meta);
 		}
+
+		if (reg->max_value == BPF_REGISTER_MAX_RANGE) {
+			verbose("R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
+				regno);
+			return -EACCES;
+		}
+		err = check_helper_mem_access(env, regno - 1,
+					      reg->max_value,
+					      zero_size_allowed, meta);
 	}
 
 	return err;
@@ -1351,6 +1452,9 @@  static int check_raw_mode(const struct bpf_func_proto *fn)
 	return count > 1 ? -EINVAL : 0;
 }
 
+/* Packet data might have moved, any old PTR_TO_PACKET[_END] are now invalid,
+ * so turn them into unknown SCALAR_VALUE.
+ */
 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state *state = &env->cur_state;
@@ -1360,7 +1464,7 @@  static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 	for (i = 0; i < MAX_BPF_REG; i++)
 		if (regs[i].type == PTR_TO_PACKET ||
 		    regs[i].type == PTR_TO_PACKET_END)
-			mark_reg_unknown_value(regs, i);
+			mark_reg_unknown(regs, i);
 
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] != STACK_SPILL)
@@ -1369,8 +1473,7 @@  static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 		if (reg->type != PTR_TO_PACKET &&
 		    reg->type != PTR_TO_PACKET_END)
 			continue;
-		__mark_reg_unknown_value(state->spilled_regs,
-					 i / BPF_REG_SIZE);
+		__mark_reg_unknown(reg);
 	}
 }
 
@@ -1450,14 +1553,17 @@  static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 
 	/* update return register */
 	if (fn->ret_type == RET_INTEGER) {
-		regs[BPF_REG_0].type = UNKNOWN_VALUE;
+		/* sets type to SCALAR_VALUE */
+		mark_reg_unknown(regs, BPF_REG_0);
 	} else if (fn->ret_type == RET_VOID) {
 		regs[BPF_REG_0].type = NOT_INIT;
 	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
 		struct bpf_insn_aux_data *insn_aux;
 
 		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
-		regs[BPF_REG_0].max_value = regs[BPF_REG_0].min_value = 0;
+		/* There is no offset yet applied, variable or fixed */
+		mark_reg_known_zero(regs, BPF_REG_0);
+		regs[BPF_REG_0].off = 0;
 		/* remember map_ptr, so that check_map_access()
 		 * can check 'value_size' boundary of memory access
 		 * to map element returned from bpf_map_lookup_elem()
@@ -1488,371 +1594,336 @@  static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 	return 0;
 }
 
-static int check_packet_ptr_add(struct bpf_verifier_env *env,
-				struct bpf_insn *insn)
+static void check_reg_overflow(struct bpf_reg_state *reg)
 {
-	struct bpf_reg_state *regs = env->cur_state.regs;
-	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
-	struct bpf_reg_state *src_reg = &regs[insn->src_reg];
-	struct bpf_reg_state tmp_reg;
-	s32 imm;
-
-	if (BPF_SRC(insn->code) == BPF_K) {
-		/* pkt_ptr += imm */
-		imm = insn->imm;
-
-add_imm:
-		if (imm < 0) {
-			verbose("addition of negative constant to packet pointer is not allowed\n");
-			return -EACCES;
-		}
-		if (imm >= MAX_PACKET_OFF ||
-		    imm + dst_reg->off >= MAX_PACKET_OFF) {
-			verbose("constant %d is too large to add to packet pointer\n",
-				imm);
-			return -EACCES;
-		}
-		/* a constant was added to pkt_ptr.
-		 * Remember it while keeping the same 'id'
-		 */
-		dst_reg->off += imm;
-	} else {
-		bool had_id;
-
-		if (src_reg->type == PTR_TO_PACKET) {
-			/* R6=pkt(id=0,off=0,r=62) R7=imm22; r7 += r6 */
-			tmp_reg = *dst_reg;  /* save r7 state */
-			*dst_reg = *src_reg; /* copy pkt_ptr state r6 into r7 */
-			src_reg = &tmp_reg;  /* pretend it's src_reg state */
-			/* if the checks below reject it, the copy won't matter,
-			 * since we're rejecting the whole program. If all ok,
-			 * then imm22 state will be added to r7
-			 * and r7 will be pkt(id=0,off=22,r=62) while
-			 * r6 will stay as pkt(id=0,off=0,r=62)
-			 */
-		}
-
-		if (src_reg->type == CONST_IMM) {
-			/* pkt_ptr += reg where reg is known constant */
-			imm = src_reg->imm;
-			goto add_imm;
-		}
-		/* disallow pkt_ptr += reg
-		 * if reg is not uknown_value with guaranteed zero upper bits
-		 * otherwise pkt_ptr may overflow and addition will become
-		 * subtraction which is not allowed
-		 */
-		if (src_reg->type != UNKNOWN_VALUE) {
-			verbose("cannot add '%s' to ptr_to_packet\n",
-				reg_type_str[src_reg->type]);
-			return -EACCES;
-		}
-		if (src_reg->imm < 48) {
-			verbose("cannot add integer value with %lld upper zero bits to ptr_to_packet\n",
-				src_reg->imm);
-			return -EACCES;
-		}
-
-		had_id = (dst_reg->id != 0);
-
-		/* dst_reg stays as pkt_ptr type and since some positive
-		 * integer value was added to the pointer, increment its 'id'
-		 */
-		dst_reg->id = ++env->id_gen;
+	if (reg->max_value > BPF_REGISTER_MAX_RANGE)
+		reg->max_value = BPF_REGISTER_MAX_RANGE;
+	if (reg->min_value < BPF_REGISTER_MIN_RANGE ||
+	    reg->min_value > BPF_REGISTER_MAX_RANGE)
+		reg->min_value = BPF_REGISTER_MIN_RANGE;
+}
 
-		/* something was added to pkt_ptr, set range to zero */
-		dst_reg->aux_off += dst_reg->off;
-		dst_reg->off = 0;
-		dst_reg->range = 0;
-		if (had_id)
-			dst_reg->aux_off_align = min(dst_reg->aux_off_align,
-						     src_reg->min_align);
-		else
-			dst_reg->aux_off_align = src_reg->min_align;
+static void coerce_reg_to_32(struct bpf_reg_state *reg)
+{
+	/* 32-bit values can't be negative as an s64 */
+	if (reg->min_value < 0)
+		reg->min_value = 0;
+	/* clear high 32 bits */
+	reg->var_off = tnum_cast(reg->var_off, 4);
+	/* Did value become known?  Then update bounds */
+	if (tnum_is_const(reg->var_off)) {
+		if ((s64)reg->var_off.value > BPF_REGISTER_MIN_RANGE)
+			reg->min_value = reg->var_off.value;
+		if (reg->var_off.value < BPF_REGISTER_MAX_RANGE)
+			reg->max_value = reg->var_off.value;
 	}
-	return 0;
 }
 
-static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
+/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
+ * Caller must check_reg_overflow all argument regs beforehand.
+ * Caller should also handle BPF_MOV case separately.
+ * If we return -EACCES, caller may want to try again treating pointer as a
+ * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
+ */
+static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
+				   struct bpf_insn *insn,
+				   struct bpf_reg_state *ptr_reg,
+				   struct bpf_reg_state *off_reg)
 {
-	struct bpf_reg_state *regs = env->cur_state.regs;
-	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+	struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
+	bool known = tnum_is_const(off_reg->var_off);
+	s64 min_val = off_reg->min_value;
+	u64 max_val = off_reg->max_value;
 	u8 opcode = BPF_OP(insn->code);
-	s64 imm_log2;
+	u32 dst = insn->dst_reg;
 
-	/* for type == UNKNOWN_VALUE:
-	 * imm > 0 -> number of zero upper bits
-	 * imm == 0 -> don't track which is the same as all bits can be non-zero
-	 */
+	dst_reg = &regs[dst];
 
-	if (BPF_SRC(insn->code) == BPF_X) {
-		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
-
-		if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 &&
-		    dst_reg->imm && opcode == BPF_ADD) {
-			/* dreg += sreg
-			 * where both have zero upper bits. Adding them
-			 * can only result making one more bit non-zero
-			 * in the larger value.
-			 * Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47)
-			 *     0xffff (imm=48) + 0xffff = 0x1fffe (imm=47)
-			 */
-			dst_reg->imm = min(dst_reg->imm, src_reg->imm);
-			dst_reg->imm--;
-			return 0;
-		}
-		if (src_reg->type == CONST_IMM && src_reg->imm > 0 &&
-		    dst_reg->imm && opcode == BPF_ADD) {
-			/* dreg += sreg
-			 * where dreg has zero upper bits and sreg is const.
-			 * Adding them can only result making one more bit
-			 * non-zero in the larger value.
-			 */
-			imm_log2 = __ilog2_u64((long long)src_reg->imm);
-			dst_reg->imm = min(dst_reg->imm, 63 - imm_log2);
-			dst_reg->imm--;
-			return 0;
-		}
-		/* all other cases non supported yet, just mark dst_reg */
-		dst_reg->imm = 0;
-		return 0;
+	if (WARN_ON_ONCE(known && (min_val != max_val))) {
+		print_verifier_state(&env->cur_state);
+		verbose("verifier internal error\n");
+		return -EINVAL;
+	}
+
+	if (BPF_CLASS(insn->code) != BPF_ALU64) {
+		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
+		if (!env->allow_ptr_leaks)
+			verbose("R%d 32-bit pointer arithmetic prohibited\n",
+				dst);
+		return -EACCES;
+	}
+
+	if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
+		if (!env->allow_ptr_leaks)
+			verbose("R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
+				dst);
+		return -EACCES;
+	}
+	if (ptr_reg->type == CONST_PTR_TO_MAP) {
+		if (!env->allow_ptr_leaks)
+			verbose("R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
+				dst);
+		return -EACCES;
+	}
+	if (ptr_reg->type == PTR_TO_PACKET_END) {
+		if (!env->allow_ptr_leaks)
+			verbose("R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
+				dst);
+		return -EACCES;
 	}
 
-	/* sign extend 32-bit imm into 64-bit to make sure that
-	 * negative values occupy bit 63. Note ilog2() would have
-	 * been incorrect, since sizeof(insn->imm) == 4
+	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
+	 * The id may be overwritten later if we create a new variable offset.
 	 */
-	imm_log2 = __ilog2_u64((long long)insn->imm);
+	dst_reg->type = ptr_reg->type;
+	dst_reg->id = ptr_reg->id;
 
-	if (dst_reg->imm && opcode == BPF_LSH) {
-		/* reg <<= imm
-		 * if reg was a result of 2 byte load, then its imm == 48
-		 * which means that upper 48 bits are zero and shifting this reg
-		 * left by 4 would mean that upper 44 bits are still zero
+	switch (opcode) {
+	case BPF_ADD:
+		/* We can take a fixed offset as long as it doesn't overflow
+		 * the s32 'off' field
 		 */
-		dst_reg->imm -= insn->imm;
-	} else if (dst_reg->imm && opcode == BPF_MUL) {
-		/* reg *= imm
-		 * if multiplying by 14 subtract 4
-		 * This is conservative calculation of upper zero bits.
-		 * It's not trying to special case insn->imm == 1 or 0 cases
+		if (known && (ptr_reg->off + min_val ==
+			      (s64)(s32)(ptr_reg->off + min_val))) {
+			/* pointer += K.  Accumulate it into fixed offset */
+			dst_reg->min_value = ptr_reg->min_value;
+			dst_reg->max_value = ptr_reg->max_value;
+			dst_reg->var_off = ptr_reg->var_off;
+			dst_reg->off = ptr_reg->off + min_val;
+			break;
+		}
+		if (max_val == BPF_REGISTER_MAX_RANGE) {
+			if (!env->allow_ptr_leaks)
+				verbose("R%d tried to add unbounded value to pointer\n",
+					dst);
+			return -EACCES;
+		}
+		/* A new variable offset is created.  Note that off_reg->off
+		 * == 0, since it's a scalar.
+		 * dst_reg gets the pointer type and since some positive
+		 * integer value was added to the pointer, increment its 'id'.
+		 * this creates a new 'base' pointer, off_reg (variable) gets
+		 * added into the variable offset, and we copy the fixed offset
+		 * from ptr_reg.
 		 */
-		dst_reg->imm -= imm_log2 + 1;
-	} else if (opcode == BPF_AND) {
-		/* reg &= imm */
-		dst_reg->imm = 63 - imm_log2;
-	} else if (dst_reg->imm && opcode == BPF_ADD) {
-		/* reg += imm */
-		dst_reg->imm = min(dst_reg->imm, 63 - imm_log2);
-		dst_reg->imm--;
-	} else if (opcode == BPF_RSH) {
-		/* reg >>= imm
-		 * which means that after right shift, upper bits will be zero
-		 * note that verifier already checked that
-		 * 0 <= imm < 64 for shift insn
+		if (min_val <= BPF_REGISTER_MIN_RANGE)
+			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
+			dst_reg->min_value += min_val;
+		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
+			dst_reg->max_value += max_val;
+		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
+		dst_reg->off = ptr_reg->off;
+		dst_reg->id = ++env->id_gen;
+		if (ptr_reg->type == PTR_TO_PACKET)
+			/* something was added to pkt_ptr, set range to zero */
+			dst_reg->range = 0;
+		break;
+	case BPF_SUB:
+		if (dst_reg == off_reg) {
+			/* scalar -= pointer.  Creates an unknown scalar */
+			if (!env->allow_ptr_leaks)
+				verbose("R%d tried to subtract pointer from scalar\n",
+					dst);
+			return -EACCES;
+		}
+		/* We don't allow subtraction from FP, because (according to
+		 * test_verifier.c test "invalid fp arithmetic", JITs might not
+		 * be able to deal with it.
 		 */
-		dst_reg->imm += insn->imm;
-		if (unlikely(dst_reg->imm > 64))
-			/* some dumb code did:
-			 * r2 = *(u32 *)mem;
-			 * r2 >>= 32;
-			 * and all bits are zero now */
-			dst_reg->imm = 64;
-	} else {
-		/* all other alu ops, means that we don't know what will
-		 * happen to the value, mark it with unknown number of zero bits
+		if (ptr_reg->type == PTR_TO_STACK) {
+			if (!env->allow_ptr_leaks)
+				verbose("R%d subtraction from stack pointer prohibited\n",
+					dst);
+			return -EACCES;
+		}
+		if (known && (ptr_reg->off - min_val ==
+			      (s64)(s32)(ptr_reg->off - min_val))) {
+			/* pointer -= K.  Subtract it from fixed offset */
+			dst_reg->min_value = ptr_reg->min_value;
+			dst_reg->max_value = ptr_reg->max_value;
+			dst_reg->var_off = ptr_reg->var_off;
+			dst_reg->id = ptr_reg->id;
+			dst_reg->off = ptr_reg->off - min_val;
+			break;
+		}
+		/* Subtracting a negative value will just confuse everything.
+		 * This can happen if off_reg is an immediate.
 		 */
-		dst_reg->imm = 0;
-	}
-
-	if (dst_reg->imm < 0) {
-		/* all 64 bits of the register can contain non-zero bits
-		 * and such value cannot be added to ptr_to_packet, since it
-		 * may overflow, mark it as unknown to avoid further eval
+		if ((s64)max_val < 0) {
+			if (!env->allow_ptr_leaks)
+				verbose("R%d tried to subtract negative max_val %lld from pointer\n",
+					dst, (s64)max_val);
+			return -EACCES;
+		}
+		/* A new variable offset is created.  If the subtrahend is known
+		 * nonnegative, then any reg->range we had before is still good.
 		 */
-		dst_reg->imm = 0;
-	}
-	return 0;
-}
-
-static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
-				struct bpf_insn *insn)
-{
-	struct bpf_reg_state *regs = env->cur_state.regs;
-	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
-	struct bpf_reg_state *src_reg = &regs[insn->src_reg];
-	u8 opcode = BPF_OP(insn->code);
-	u64 dst_imm = dst_reg->imm;
-
-	/* dst_reg->type == CONST_IMM here. Simulate execution of insns
-	 * containing ALU ops. Don't care about overflow or negative
-	 * values, just add/sub/... them; registers are in u64.
-	 */
-	if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm += insn->imm;
-	} else if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm += src_reg->imm;
-	} else if (opcode == BPF_SUB && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm -= insn->imm;
-	} else if (opcode == BPF_SUB && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm -= src_reg->imm;
-	} else if (opcode == BPF_MUL && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm *= insn->imm;
-	} else if (opcode == BPF_MUL && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm *= src_reg->imm;
-	} else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm |= insn->imm;
-	} else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm |= src_reg->imm;
-	} else if (opcode == BPF_AND && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm &= insn->imm;
-	} else if (opcode == BPF_AND && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm &= src_reg->imm;
-	} else if (opcode == BPF_RSH && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm >>= insn->imm;
-	} else if (opcode == BPF_RSH && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm >>= src_reg->imm;
-	} else if (opcode == BPF_LSH && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm <<= insn->imm;
-	} else if (opcode == BPF_LSH && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm <<= src_reg->imm;
-	} else {
-		mark_reg_unknown_value(regs, insn->dst_reg);
-		goto out;
+		if (max_val >= BPF_REGISTER_MAX_RANGE)
+			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
+			dst_reg->min_value -= max_val;
+		if (min_val <= BPF_REGISTER_MIN_RANGE)
+			dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
+		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
+			dst_reg->max_value -= min_val;
+		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
+		dst_reg->off = ptr_reg->off;
+		dst_reg->id = ++env->id_gen;
+		if (ptr_reg->type == PTR_TO_PACKET && min_val < 0)
+			/* something was added to pkt_ptr, set range to zero */
+			dst_reg->range = 0;
+		break;
+	case BPF_AND:
+	case BPF_OR:
+	case BPF_XOR:
+		/* bitwise ops on pointers are troublesome, prohibit for now.
+		 * (However, in principle we could allow some cases, e.g.
+		 * ptr &= ~3 which would reduce min_value by 3.)
+		 */
+		if (!env->allow_ptr_leaks)
+			verbose("R%d bitwise operator %s on pointer prohibited\n",
+				dst, bpf_alu_string[opcode >> 4]);
+		return -EACCES;
+	default:
+		/* other operators (e.g. MUL,LSH) produce non-pointer results */
+		if (!env->allow_ptr_leaks)
+			verbose("R%d pointer arithmetic with %s operator prohibited\n",
+				dst, bpf_alu_string[opcode >> 4]);
+		return -EACCES;
 	}
 
-	dst_reg->imm = dst_imm;
-out:
+	check_reg_overflow(dst_reg);
 	return 0;
 }
 
-static void check_reg_overflow(struct bpf_reg_state *reg)
+static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
+				      struct bpf_insn *insn,
+				      struct bpf_reg_state *dst_reg,
+				      struct bpf_reg_state *src_reg)
 {
-	if (reg->max_value > BPF_REGISTER_MAX_RANGE)
-		reg->max_value = BPF_REGISTER_MAX_RANGE;
-	if (reg->min_value < BPF_REGISTER_MIN_RANGE ||
-	    reg->min_value > BPF_REGISTER_MAX_RANGE)
-		reg->min_value = BPF_REGISTER_MIN_RANGE;
-}
-
-static u32 calc_align(u32 imm)
-{
-	if (!imm)
-		return 1U << 31;
-	return imm - ((imm - 1) & imm);
-}
-
-static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
-				    struct bpf_insn *insn)
-{
-	struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
+	struct bpf_reg_state *regs = env->cur_state.regs;
 	s64 min_val = BPF_REGISTER_MIN_RANGE;
 	u64 max_val = BPF_REGISTER_MAX_RANGE;
 	u8 opcode = BPF_OP(insn->code);
-	u32 dst_align, src_align;
+	bool src_known, dst_known;
 
-	dst_reg = &regs[insn->dst_reg];
-	src_align = 0;
-	if (BPF_SRC(insn->code) == BPF_X) {
-		check_reg_overflow(&regs[insn->src_reg]);
-		min_val = regs[insn->src_reg].min_value;
-		max_val = regs[insn->src_reg].max_value;
-
-		/* If the source register is a random pointer then the
-		 * min_value/max_value values represent the range of the known
-		 * accesses into that value, not the actual min/max value of the
-		 * register itself.  In this case we have to reset the reg range
-		 * values so we know it is not safe to look at.
-		 */
-		if (regs[insn->src_reg].type != CONST_IMM &&
-		    regs[insn->src_reg].type != UNKNOWN_VALUE) {
-			min_val = BPF_REGISTER_MIN_RANGE;
-			max_val = BPF_REGISTER_MAX_RANGE;
-			src_align = 0;
-		} else {
-			src_align = regs[insn->src_reg].min_align;
-		}
-	} else if (insn->imm < BPF_REGISTER_MAX_RANGE &&
-		   (s64)insn->imm > BPF_REGISTER_MIN_RANGE) {
-		min_val = max_val = insn->imm;
-		src_align = calc_align(insn->imm);
+	if (BPF_CLASS(insn->code) != BPF_ALU64) {
+		/* 32-bit ALU ops are (32,32)->64 */
+		coerce_reg_to_32(dst_reg);
+		coerce_reg_to_32(src_reg);
 	}
-
-	dst_align = dst_reg->min_align;
-
-	/* We don't know anything about what was done to this register, mark it
-	 * as unknown.
-	 */
-	if (min_val == BPF_REGISTER_MIN_RANGE &&
-	    max_val == BPF_REGISTER_MAX_RANGE) {
-		reset_reg_range_values(regs, insn->dst_reg);
-		return;
+	if (BPF_CLASS(insn->code) != BPF_ALU64) {
+		/* 32-bit ALU ops are (32,32)->64 */
+		coerce_reg_to_32(dst_reg);
+		coerce_reg_to_32(src_reg);
 	}
-
-	/* If one of our values was at the end of our ranges then we can't just
-	 * do our normal operations to the register, we need to set the values
-	 * to the min/max since they are undefined.
-	 */
-	if (min_val == BPF_REGISTER_MIN_RANGE)
-		dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
-	if (max_val == BPF_REGISTER_MAX_RANGE)
-		dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
+	min_val = src_reg->min_value;
+	max_val = src_reg->max_value;
+	src_known = tnum_is_const(src_reg->var_off);
+	dst_known = tnum_is_const(dst_reg->var_off);
 
 	switch (opcode) {
 	case BPF_ADD:
+		if (min_val == BPF_REGISTER_MIN_RANGE)
+			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
 		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
 			dst_reg->min_value += min_val;
+		/* if max_val is MAX_RANGE, this will saturate dst->max */
 		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
 			dst_reg->max_value += max_val;
-		dst_reg->min_align = min(src_align, dst_align);
+		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg->var_off);
 		break;
 	case BPF_SUB:
+		if (max_val == BPF_REGISTER_MAX_RANGE)
+			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
 		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
-			dst_reg->min_value -= min_val;
+			dst_reg->min_value -= max_val;
+		if (min_val == BPF_REGISTER_MIN_RANGE)
+			dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
 		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
-			dst_reg->max_value -= max_val;
-		dst_reg->min_align = min(src_align, dst_align);
+			dst_reg->max_value -= min_val;
+		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg->var_off);
 		break;
 	case BPF_MUL:
-		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
-			dst_reg->min_value *= min_val;
+		if (min_val < 0 || dst_reg->min_value < 0) {
+			/* Ain't nobody got time to multiply that sign */
+			__mark_reg_unknown(dst_reg);
+			break;
+		}
+		dst_reg->min_value *= min_val;
+		/* if max_val is MAX_RANGE, this will saturate dst->max.
+		 * We know MAX_RANGE ** 2 won't overflow a u64, because
+		 * MAX_RANGE itself fits in a u32.
+		 */
+		BUILD_BUG_ON(BPF_REGISTER_MAX_RANGE > (u32)-1);
 		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
 			dst_reg->max_value *= max_val;
-		dst_reg->min_align = max(src_align, dst_align);
+		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg->var_off);
 		break;
 	case BPF_AND:
-		/* Disallow AND'ing of negative numbers, ain't nobody got time
-		 * for that.  Otherwise the minimum is 0 and the max is the max
-		 * value we could AND against.
+		if (src_known && dst_known) {
+			u64 value = dst_reg->var_off.value & src_reg->var_off.value;
+
+			dst_reg->var_off = tnum_const(value);
+			dst_reg->min_value = dst_reg->max_value = min_t(u64,
+					value, BPF_REGISTER_MAX_RANGE);
+			break;
+		}
+		/* Lose min_value when AND'ing negative numbers, ain't nobody
+		 * got time for that.  Otherwise we get our minimum from the
+		 * var_off, since that's inherently bitwise.
+		 * Our maximum is the minimum of the operands' maxima.
 		 */
-		if (min_val < 0)
+		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg->var_off);
+		if (min_val < 0 && dst_reg->min_value < 0)
 			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
 		else
-			dst_reg->min_value = 0;
-		dst_reg->max_value = max_val;
-		dst_reg->min_align = max(src_align, dst_align);
+			dst_reg->min_value = dst_reg->var_off.value;
+		dst_reg->max_value = min(dst_reg->max_value, max_val);
+		break;
+	case BPF_OR:
+		if (src_known && dst_known) {
+			u64 value = dst_reg->var_off.value | src_reg->var_off.value;
+
+			dst_reg->var_off = tnum_const(value);
+			dst_reg->min_value = dst_reg->max_value = min_t(u64,
+					value, BPF_REGISTER_MAX_RANGE);
+			break;
+		}
+		/* Lose ranges when OR'ing negative numbers, ain't nobody got
+		 * time for that.  Otherwise we get our maximum from the var_off,
+		 * and our minimum is the maximum of the operands' minima.
+		 */
+		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg->var_off);
+		if (min_val < 0 || dst_reg->min_value < 0) {
+			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+			dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
+		} else {
+			dst_reg->min_value = max(dst_reg->min_value, min_val);
+			dst_reg->max_value = dst_reg->var_off.value | dst_reg->var_off.mask;
+		}
 		break;
 	case BPF_LSH:
+		if (min_val < 0) {
+			/* LSH by a negative number is undefined */
+			mark_reg_unknown(regs, insn->dst_reg);
+			break;
+		}
 		/* Gotta have special overflow logic here, if we're shifting
 		 * more than MAX_RANGE then just assume we have an invalid
 		 * range.
 		 */
 		if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) {
 			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
-			dst_reg->min_align = 1;
+			dst_reg->var_off = tnum_unknown;
 		} else {
 			if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
 				dst_reg->min_value <<= min_val;
-			if (!dst_reg->min_align)
-				dst_reg->min_align = 1;
-			dst_reg->min_align <<= min_val;
+			if (src_known)
+				dst_reg->var_off = tnum_lshift(dst_reg->var_off, min_val);
+			else
+				dst_reg->var_off = tnum_lshift(tnum_unknown, min_val);
 		}
 		if (max_val > ilog2(BPF_REGISTER_MAX_RANGE))
 			dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
@@ -1860,37 +1931,138 @@  static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 			dst_reg->max_value <<= max_val;
 		break;
 	case BPF_RSH:
-		/* RSH by a negative number is undefined, and the BPF_RSH is an
-		 * unsigned shift, so make the appropriate casts.
-		 */
-		if (min_val < 0 || dst_reg->min_value < 0) {
-			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+		if (min_val < 0) {
+			/* RSH by a negative number is undefined */
+			mark_reg_unknown(regs, insn->dst_reg);
+			break;
+		}
+		/* BPF_RSH is an unsigned shift, so make the appropriate casts */
+		if (dst_reg->min_value < 0) {
+			if (min_val)
+				/* Sign bit will be cleared */
+				dst_reg->min_value = 0;
 		} else {
 			dst_reg->min_value =
 				(u64)(dst_reg->min_value) >> min_val;
 		}
-		if (min_val < 0) {
-			dst_reg->min_align = 1;
-		} else {
-			dst_reg->min_align >>= (u64) min_val;
-			if (!dst_reg->min_align)
-				dst_reg->min_align = 1;
-		}
-		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
-			dst_reg->max_value >>= max_val;
+		if (src_known)
+			dst_reg->var_off = tnum_rshift(dst_reg->var_off, min_val);
+		else
+			dst_reg->var_off = tnum_rshift(tnum_unknown, min_val);
+		if (dst_reg->max_value == BPF_REGISTER_MAX_RANGE)
+			dst_reg->max_value = ~0;
+		dst_reg->max_value >>= max_val;
 		break;
 	default:
-		reset_reg_range_values(regs, insn->dst_reg);
+		mark_reg_unknown(regs, insn->dst_reg);
 		break;
 	}
 
 	check_reg_overflow(dst_reg);
+	return 0;
+}
+
+/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
+ * and var_off.
+ */
+static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
+				   struct bpf_insn *insn)
+{
+	struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg, *src_reg;
+	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
+	u8 opcode = BPF_OP(insn->code);
+	int rc;
+
+	dst_reg = &regs[insn->dst_reg];
+	check_reg_overflow(dst_reg);
+	src_reg = NULL;
+	if (dst_reg->type != SCALAR_VALUE)
+		ptr_reg = dst_reg;
+	if (BPF_SRC(insn->code) == BPF_X) {
+		src_reg = &regs[insn->src_reg];
+		check_reg_overflow(src_reg);
+
+		if (src_reg->type != SCALAR_VALUE) {
+			if (dst_reg->type != SCALAR_VALUE) {
+				/* Combining two pointers by any ALU op yields
+				 * an arbitrary scalar.
+				 */
+				if (!env->allow_ptr_leaks) {
+					verbose("R%d pointer %s pointer prohibited\n",
+						insn->dst_reg,
+						bpf_alu_string[opcode >> 4]);
+					return -EACCES;
+				}
+				mark_reg_unknown(regs, insn->dst_reg);
+				return 0;
+			} else {
+				/* scalar += pointer
+				 * This is legal, but we have to reverse our
+				 * src/dest handling in computing the range
+				 */
+				rc = adjust_ptr_min_max_vals(env, insn,
+							     src_reg, dst_reg);
+				if (rc == -EACCES && env->allow_ptr_leaks) {
+					/* scalar += unknown scalar */
+					__mark_reg_unknown(&off_reg);
+					return adjust_scalar_min_max_vals(
+							env, insn,
+							dst_reg, &off_reg);
+				}
+				return rc;
+			}
+		} else if (ptr_reg) {
+			/* pointer += scalar */
+			rc = adjust_ptr_min_max_vals(env, insn,
+						     dst_reg, src_reg);
+			if (rc == -EACCES && env->allow_ptr_leaks) {
+				/* unknown scalar += scalar */
+				__mark_reg_unknown(dst_reg);
+				return adjust_scalar_min_max_vals(
+						env, insn, dst_reg, src_reg);
+			}
+			return rc;
+		}
+	} else {
+		/* Pretend the src is a reg with a known value, since we only
+		 * need to be able to read from this state.
+		 */
+		off_reg.type = SCALAR_VALUE;
+		off_reg.var_off = tnum_const(insn->imm);
+		off_reg.min_value = insn->imm;
+		off_reg.max_value = insn->imm;
+		src_reg = &off_reg;
+		if (ptr_reg) { /* pointer += K */
+			rc = adjust_ptr_min_max_vals(env, insn,
+						     ptr_reg, src_reg);
+			if (rc == -EACCES && env->allow_ptr_leaks) {
+				/* unknown scalar += K */
+				__mark_reg_unknown(dst_reg);
+				return adjust_scalar_min_max_vals(
+						env, insn, dst_reg, &off_reg);
+			}
+			return rc;
+		}
+	}
+
+	/* Got here implies adding two SCALAR_VALUEs */
+	if (WARN_ON_ONCE(ptr_reg)) {
+		print_verifier_state(&env->cur_state);
+		verbose("verifier internal error: unexpected ptr_reg\n");
+		return -EINVAL;
+	}
+	if (WARN_ON(!src_reg)) {
+		print_verifier_state(&env->cur_state);
+		verbose("verifier internal error: no src_reg\n");
+		return -EINVAL;
+	}
+	return adjust_scalar_min_max_vals(env, insn, dst_reg, src_reg);
 }
 
 /* check validity of 32-bit and 64-bit arithmetic operations */
 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
+	struct bpf_reg_state *regs = env->cur_state.regs;
 	u8 opcode = BPF_OP(insn->code);
 	int err;
 
@@ -1950,11 +2122,6 @@  static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		if (err)
 			return err;
 
-		/* we are setting our register to something new, we need to
-		 * reset its range values.
-		 */
-		reset_reg_range_values(regs, insn->dst_reg);
-
 		if (BPF_SRC(insn->code) == BPF_X) {
 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
 				/* case: R1 = R2
@@ -1962,23 +2129,29 @@  static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 				 */
 				regs[insn->dst_reg] = regs[insn->src_reg];
 			} else {
+				/* R1 = (u32) R2 */
 				if (is_pointer_value(env, insn->src_reg)) {
 					verbose("R%d partial copy of pointer\n",
 						insn->src_reg);
 					return -EACCES;
 				}
-				mark_reg_unknown_value(regs, insn->dst_reg);
+				mark_reg_unknown(regs, insn->dst_reg);
+				/* high 32 bits are known zero.  But this is
+				 * still out of range for max_value, so leave
+				 * that.
+				 */
+				regs[insn->dst_reg].var_off = tnum_cast(
+						regs[insn->dst_reg].var_off, 4);
 			}
 		} else {
 			/* case: R = imm
 			 * remember the value we stored into this reg
 			 */
-			regs[insn->dst_reg].type = CONST_IMM;
-			regs[insn->dst_reg].imm = insn->imm;
-			regs[insn->dst_reg].id = 0;
+			regs[insn->dst_reg].type = SCALAR_VALUE;
+			regs[insn->dst_reg].var_off = tnum_const(insn->imm);
 			regs[insn->dst_reg].max_value = insn->imm;
 			regs[insn->dst_reg].min_value = insn->imm;
-			regs[insn->dst_reg].min_align = calc_align(insn->imm);
+			regs[insn->dst_reg].id = 0;
 		}
 
 	} else if (opcode > BPF_END) {
@@ -2029,68 +2202,7 @@  static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		if (err)
 			return err;
 
-		dst_reg = &regs[insn->dst_reg];
-
-		/* first we want to adjust our ranges. */
-		adjust_reg_min_max_vals(env, insn);
-
-		/* pattern match 'bpf_add Rx, imm' instruction */
-		if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
-		    dst_reg->type == FRAME_PTR && BPF_SRC(insn->code) == BPF_K) {
-			dst_reg->type = PTR_TO_STACK;
-			dst_reg->imm = insn->imm;
-			return 0;
-		} else if (opcode == BPF_ADD &&
-			   BPF_CLASS(insn->code) == BPF_ALU64 &&
-			   dst_reg->type == PTR_TO_STACK &&
-			   ((BPF_SRC(insn->code) == BPF_X &&
-			     regs[insn->src_reg].type == CONST_IMM) ||
-			    BPF_SRC(insn->code) == BPF_K)) {
-			if (BPF_SRC(insn->code) == BPF_X)
-				dst_reg->imm += regs[insn->src_reg].imm;
-			else
-				dst_reg->imm += insn->imm;
-			return 0;
-		} else if (opcode == BPF_ADD &&
-			   BPF_CLASS(insn->code) == BPF_ALU64 &&
-			   (dst_reg->type == PTR_TO_PACKET ||
-			    (BPF_SRC(insn->code) == BPF_X &&
-			     regs[insn->src_reg].type == PTR_TO_PACKET))) {
-			/* ptr_to_packet += K|X */
-			return check_packet_ptr_add(env, insn);
-		} else if (BPF_CLASS(insn->code) == BPF_ALU64 &&
-			   dst_reg->type == UNKNOWN_VALUE &&
-			   env->allow_ptr_leaks) {
-			/* unknown += K|X */
-			return evaluate_reg_alu(env, insn);
-		} else if (BPF_CLASS(insn->code) == BPF_ALU64 &&
-			   dst_reg->type == CONST_IMM &&
-			   env->allow_ptr_leaks) {
-			/* reg_imm += K|X */
-			return evaluate_reg_imm_alu(env, insn);
-		} else if (is_pointer_value(env, insn->dst_reg)) {
-			verbose("R%d pointer arithmetic prohibited\n",
-				insn->dst_reg);
-			return -EACCES;
-		} else if (BPF_SRC(insn->code) == BPF_X &&
-			   is_pointer_value(env, insn->src_reg)) {
-			verbose("R%d pointer arithmetic prohibited\n",
-				insn->src_reg);
-			return -EACCES;
-		}
-
-		/* If we did pointer math on a map value then just set it to our
-		 * PTR_TO_MAP_VALUE_ADJ type so we can deal with any stores or
-		 * loads to this register appropriately, otherwise just mark the
-		 * register as unknown.
-		 */
-		if (env->allow_ptr_leaks &&
-		    BPF_CLASS(insn->code) == BPF_ALU64 && opcode == BPF_ADD &&
-		    (dst_reg->type == PTR_TO_MAP_VALUE ||
-		     dst_reg->type == PTR_TO_MAP_VALUE_ADJ))
-			dst_reg->type = PTR_TO_MAP_VALUE_ADJ;
-		else
-			mark_reg_unknown_value(regs, insn->dst_reg);
+		return adjust_reg_min_max_vals(env, insn);
 	}
 
 	return 0;
@@ -2102,6 +2214,10 @@  static void find_good_pkt_pointers(struct bpf_verifier_state *state,
 	struct bpf_reg_state *regs = state->regs, *reg;
 	int i;
 
+	if (dst_reg->off < 0)
+		/* This doesn't give us any range */
+		return;
+
 	/* LLVM can generate two kind of checks:
 	 *
 	 * Type 1:
@@ -2135,20 +2251,21 @@  static void find_good_pkt_pointers(struct bpf_verifier_state *state,
 	for (i = 0; i < MAX_BPF_REG; i++)
 		if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
 			/* keep the maximum range already checked */
-			regs[i].range = max(regs[i].range, dst_reg->off);
+			regs[i].range = max_t(u32, regs[i].range, dst_reg->off);
 
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] != STACK_SPILL)
 			continue;
 		reg = &state->spilled_regs[i / BPF_REG_SIZE];
 		if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
-			reg->range = max(reg->range, dst_reg->off);
+			reg->range = max_t(u32, reg->range, dst_reg->off);
 	}
 }
 
 /* Adjusts the register min/max values in the case that the dst_reg is the
  * variable register that we are working on, and src_reg is a constant or we're
  * simply doing a BPF_K check.
+ * In JEQ/JNE cases we also adjust the var_off values.
  */
 static void reg_set_min_max(struct bpf_reg_state *true_reg,
 			    struct bpf_reg_state *false_reg, u64 val,
@@ -2160,34 +2277,52 @@  static void reg_set_min_max(struct bpf_reg_state *true_reg,
 		 * true then we know for sure.
 		 */
 		true_reg->max_value = true_reg->min_value = val;
+		true_reg->var_off = tnum_const(val);
 		break;
 	case BPF_JNE:
 		/* If this is true we know nothing Jon Snow, but if it is false
 		 * we know the value for sure;
 		 */
 		false_reg->max_value = false_reg->min_value = val;
+		false_reg->var_off = tnum_const(val);
 		break;
 	case BPF_JGT:
-		/* Unsigned comparison, the minimum value is 0. */
-		false_reg->min_value = 0;
-		/* fallthrough */
-	case BPF_JSGT:
-		/* If this is false then we know the maximum val is val,
-		 * otherwise we know the min val is val+1.
+		/* Unsigned comparison, can only tell us about max_value (since
+		 * min_value is signed), unless we learn sign bit.
 		 */
 		false_reg->max_value = val;
+		/* If we're not unsigned-greater-than a positive value, then
+		 * we can't be negative.
+		 */
+		if ((s64)val >= 0 && false_reg->min_value < 0)
+			false_reg->min_value = 0;
+		break;
+	case BPF_JSGT:
+		/* Signed comparison, can only tell us about min_value (since
+		 * max_value is unsigned), unless we already know sign bit.
+		 */
 		true_reg->min_value = val + 1;
+		/* If we're not signed-greater than val, and we're not negative,
+		 * then we can't be unsigned-greater than val either.
+		 */
+		if (false_reg->min_value >= 0)
+			false_reg->max_value = val;
 		break;
 	case BPF_JGE:
-		/* Unsigned comparison, the minimum value is 0. */
-		false_reg->min_value = 0;
-		/* fallthrough */
-	case BPF_JSGE:
-		/* If this is false then we know the maximum value is val - 1,
-		 * otherwise we know the mimimum value is val.
-		 */
 		false_reg->max_value = val - 1;
+		/* If we're not unsigned-ge a positive value, then we can't be
+		 * negative.
+		 */
+		if ((s64)val >= 0 && false_reg->min_value < 0)
+			false_reg->min_value = 0;
+		break;
+	case BPF_JSGE:
 		true_reg->min_value = val;
+		/* If we're not signed-ge val, and we're not negative, then we
+		 * can't be unsigned-ge val either.
+		 */
+		if (false_reg->min_value >= 0)
+			false_reg->max_value = val - 1;
 		break;
 	default:
 		break;
@@ -2197,8 +2332,8 @@  static void reg_set_min_max(struct bpf_reg_state *true_reg,
 	check_reg_overflow(true_reg);
 }
 
-/* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg
- * is the variable reg.
+/* Same as above, but for the case that dst_reg holds a constant and src_reg is
+ * the variable reg.
  */
 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 				struct bpf_reg_state *false_reg, u64 val,
@@ -2210,35 +2345,52 @@  static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 		 * true then we know for sure.
 		 */
 		true_reg->max_value = true_reg->min_value = val;
+		true_reg->var_off = tnum_const(val);
 		break;
 	case BPF_JNE:
 		/* If this is true we know nothing Jon Snow, but if it is false
 		 * we know the value for sure;
 		 */
 		false_reg->max_value = false_reg->min_value = val;
+		false_reg->var_off = tnum_const(val);
 		break;
 	case BPF_JGT:
-		/* Unsigned comparison, the minimum value is 0. */
-		true_reg->min_value = 0;
-		/* fallthrough */
+		/* Unsigned comparison, can only tell us about max_value (since
+		 * min_value is signed), unless we learn sign bit.
+		 */
+		true_reg->max_value = val - 1;
+		/* If a positive value is unsigned-greater-than us, then we
+		 * can't be negative.
+		 */
+		if ((s64)val >= 0 && true_reg->min_value < 0)
+			true_reg->min_value = 0;
+		break;
 	case BPF_JSGT:
-		/*
-		 * If this is false, then the val is <= the register, if it is
-		 * true the register <= to the val.
+		/* Signed comparison, can only tell us about min_value (since
+		 * max_value is unsigned), unless we already know sign bit.
 		 */
 		false_reg->min_value = val;
-		true_reg->max_value = val - 1;
+		/* If val is signed-greater-than us, and we're not negative,
+		 * then val must be unsigned-greater-than us.
+		 */
+		if (true_reg->min_value >= 0)
+			true_reg->max_value = val - 1;
 		break;
 	case BPF_JGE:
-		/* Unsigned comparison, the minimum value is 0. */
-		true_reg->min_value = 0;
-		/* fallthrough */
-	case BPF_JSGE:
-		/* If this is false then constant < register, if it is true then
-		 * the register < constant.
+		true_reg->max_value = val;
+		/* If a positive value is unsigned-ge us, then we can't be
+		 * negative.
 		 */
+		if ((s64)val >= 0 && true_reg->min_value < 0)
+			true_reg->min_value = 0;
+		break;
+	case BPF_JSGE:
 		false_reg->min_value = val + 1;
-		true_reg->max_value = val;
+		/* If val is signed-ge us, and we're not negative, then val
+		 * must be unsigned-ge us.
+		 */
+		if (true_reg->min_value >= 0)
+			true_reg->max_value = val;
 		break;
 	default:
 		break;
@@ -2248,19 +2400,58 @@  static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 	check_reg_overflow(true_reg);
 }
 
+/* Regs are known to be equal, so intersect their min/max/var_off */
+static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
+				  struct bpf_reg_state *dst_reg)
+{
+	src_reg->min_value = dst_reg->min_value = max(src_reg->min_value,
+						      dst_reg->min_value);
+	src_reg->max_value = dst_reg->max_value = min(src_reg->max_value,
+						      dst_reg->max_value);
+	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
+							     dst_reg->var_off);
+	check_reg_overflow(src_reg);
+	check_reg_overflow(dst_reg);
+}
+
+static void reg_combine_min_max(struct bpf_reg_state *true_src,
+				struct bpf_reg_state *true_dst,
+				struct bpf_reg_state *false_src,
+				struct bpf_reg_state *false_dst,
+				u8 opcode)
+{
+	switch (opcode) {
+	case BPF_JEQ:
+		__reg_combine_min_max(true_src, true_dst);
+		break;
+	case BPF_JNE:
+		__reg_combine_min_max(false_src, false_dst);
+	}
+}
+
 static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
-			 enum bpf_reg_type type)
+			 bool is_null)
 {
 	struct bpf_reg_state *reg = &regs[regno];
 
 	if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) {
-		if (type == UNKNOWN_VALUE) {
-			__mark_reg_unknown_value(regs, regno);
+		/* Old offset (both fixed and variable parts) should
+		 * have been known-zero, because we don't allow pointer
+		 * arithmetic on pointers that might be NULL.
+		 */
+		if (WARN_ON_ONCE(reg->min_value || reg->max_value ||
+				 reg->var_off.value || reg->var_off.mask ||
+				 reg->off)) {
+			reg->min_value = reg->max_value = reg->off = 0;
+			reg->var_off = tnum_const(0);
+		}
+		if (is_null) {
+			reg->type = SCALAR_VALUE;
 		} else if (reg->map_ptr->inner_map_meta) {
 			reg->type = CONST_PTR_TO_MAP;
 			reg->map_ptr = reg->map_ptr->inner_map_meta;
 		} else {
-			reg->type = type;
+			reg->type = PTR_TO_MAP_VALUE;
 		}
 		/* We don't need id from this point onwards anymore, thus we
 		 * should better reset it, so that state pruning has chances
@@ -2274,19 +2465,19 @@  static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
  * be folded together at some point.
  */
 static void mark_map_regs(struct bpf_verifier_state *state, u32 regno,
-			  enum bpf_reg_type type)
+			  bool is_null)
 {
 	struct bpf_reg_state *regs = state->regs;
 	u32 id = regs[regno].id;
 	int i;
 
 	for (i = 0; i < MAX_BPF_REG; i++)
-		mark_map_reg(regs, i, id, type);
+		mark_map_reg(regs, i, id, is_null);
 
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] != STACK_SPILL)
 			continue;
-		mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, id, type);
+		mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, id, is_null);
 	}
 }
 
@@ -2336,7 +2527,8 @@  static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	/* detect if R == 0 where R was initialized to zero earlier */
 	if (BPF_SRC(insn->code) == BPF_K &&
 	    (opcode == BPF_JEQ || opcode == BPF_JNE) &&
-	    dst_reg->type == CONST_IMM && dst_reg->imm == insn->imm) {
+	    dst_reg->type == SCALAR_VALUE &&
+	    tnum_equals_const(dst_reg->var_off, insn->imm)) {
 		if (opcode == BPF_JEQ) {
 			/* if (imm == imm) goto pc+off;
 			 * only follow the goto, ignore fall-through
@@ -2358,17 +2550,30 @@  static int check_cond_jmp_op(struct bpf_verifier_env *env,
 
 	/* detect if we are comparing against a constant value so we can adjust
 	 * our min/max values for our dst register.
+	 * this is only legit if both are scalars (or pointers to the same
+	 * object, I suppose, but we don't support that right now), because
+	 * otherwise the different base pointers mean the offsets aren't
+	 * comparable.
 	 */
 	if (BPF_SRC(insn->code) == BPF_X) {
-		if (regs[insn->src_reg].type == CONST_IMM)
-			reg_set_min_max(&other_branch->regs[insn->dst_reg],
-					dst_reg, regs[insn->src_reg].imm,
-					opcode);
-		else if (dst_reg->type == CONST_IMM)
-			reg_set_min_max_inv(&other_branch->regs[insn->src_reg],
-					    &regs[insn->src_reg], dst_reg->imm,
-					    opcode);
-	} else {
+		if (dst_reg->type == SCALAR_VALUE &&
+		    regs[insn->src_reg].type == SCALAR_VALUE) {
+			if (tnum_is_const(regs[insn->src_reg].var_off))
+				reg_set_min_max(&other_branch->regs[insn->dst_reg],
+						dst_reg, regs[insn->src_reg].var_off.value,
+						opcode);
+			else if (tnum_is_const(dst_reg->var_off))
+				reg_set_min_max_inv(&other_branch->regs[insn->src_reg],
+						    &regs[insn->src_reg],
+						    dst_reg->var_off.value, opcode);
+			else if (opcode == BPF_JEQ || opcode == BPF_JNE)
+				/* Comparing for equality, we can combine knowledge */
+				reg_combine_min_max(&other_branch->regs[insn->src_reg],
+						    &other_branch->regs[insn->dst_reg],
+						    &regs[insn->src_reg],
+						    &regs[insn->dst_reg], opcode);
+		}
+	} else if (dst_reg->type == SCALAR_VALUE) {
 		reg_set_min_max(&other_branch->regs[insn->dst_reg],
 					dst_reg, insn->imm, opcode);
 	}
@@ -2380,10 +2585,8 @@  static int check_cond_jmp_op(struct bpf_verifier_env *env,
 		/* Mark all identical map registers in each branch as either
 		 * safe or unknown depending R == 0 or R != 0 conditional.
 		 */
-		mark_map_regs(this_branch, insn->dst_reg,
-			      opcode == BPF_JEQ ? PTR_TO_MAP_VALUE : UNKNOWN_VALUE);
-		mark_map_regs(other_branch, insn->dst_reg,
-			      opcode == BPF_JEQ ? UNKNOWN_VALUE : PTR_TO_MAP_VALUE);
+		mark_map_regs(this_branch, insn->dst_reg, opcode == BPF_JNE);
+		mark_map_regs(other_branch, insn->dst_reg, opcode == BPF_JEQ);
 	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
 		   dst_reg->type == PTR_TO_PACKET &&
 		   regs[insn->src_reg].type == PTR_TO_PACKET_END) {
@@ -2431,8 +2634,11 @@  static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	if (insn->src_reg == 0) {
 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
 
-		regs[insn->dst_reg].type = CONST_IMM;
-		regs[insn->dst_reg].imm = imm;
+		regs[insn->dst_reg].type = SCALAR_VALUE;
+		regs[insn->dst_reg].min_value = imm;
+		regs[insn->dst_reg].max_value = imm;
+		check_reg_overflow(&regs[insn->dst_reg]);
+		regs[insn->dst_reg].var_off = tnum_const(imm);
 		regs[insn->dst_reg].id = 0;
 		return 0;
 	}
@@ -2514,7 +2720,7 @@  static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	/* mark destination R0 register as readable, since it contains
 	 * the value fetched from the packet
 	 */
-	regs[BPF_REG_0].type = UNKNOWN_VALUE;
+	mark_reg_unknown(regs, BPF_REG_0);
 	return 0;
 }
 
@@ -2717,57 +2923,102 @@  static int check_cfg(struct bpf_verifier_env *env)
 	return ret;
 }
 
-/* the following conditions reduce the number of explored insns
- * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
- */
-static bool compare_ptrs_to_packet(struct bpf_verifier_env *env,
-				   struct bpf_reg_state *old,
-				   struct bpf_reg_state *cur)
+/* check %cur's range satisfies %old's */
+static bool range_within(struct bpf_reg_state *old,
+			 struct bpf_reg_state *cur)
 {
-	if (old->id != cur->id)
-		return false;
+	return old->min_value <= cur->min_value &&
+	       old->max_value >= cur->max_value;
+}
 
-	/* old ptr_to_packet is more conservative, since it allows smaller
-	 * range. Ex:
-	 * old(off=0,r=10) is equal to cur(off=0,r=20), because
-	 * old(off=0,r=10) means that with range=10 the verifier proceeded
-	 * further and found no issues with the program. Now we're in the same
-	 * spot with cur(off=0,r=20), so we're safe too, since anything further
-	 * will only be looking at most 10 bytes after this pointer.
-	 */
-	if (old->off == cur->off && old->range < cur->range)
+/* Returns true if (rold safe implies rcur safe) */
+static bool regsafe(struct bpf_reg_state *rold,
+		    struct bpf_reg_state *rcur,
+		    bool varlen_map_access)
+{
+	if (memcmp(rold, rcur, sizeof(*rold)) == 0)
 		return true;
 
-	/* old(off=20,r=10) is equal to cur(off=22,re=22 or 5 or 0)
-	 * since both cannot be used for packet access and safe(old)
-	 * pointer has smaller off that could be used for further
-	 * 'if (ptr > data_end)' check
-	 * Ex:
-	 * old(off=20,r=10) and cur(off=22,r=22) and cur(off=22,r=0) mean
-	 * that we cannot access the packet.
-	 * The safe range is:
-	 * [ptr, ptr + range - off)
-	 * so whenever off >=range, it means no safe bytes from this pointer.
-	 * When comparing old->off <= cur->off, it means that older code
-	 * went with smaller offset and that offset was later
-	 * used to figure out the safe range after 'if (ptr > data_end)' check
-	 * Say, 'old' state was explored like:
-	 * ... R3(off=0, r=0)
-	 * R4 = R3 + 20
-	 * ... now R4(off=20,r=0)  <-- here
-	 * if (R4 > data_end)
-	 * ... R4(off=20,r=20), R3(off=0,r=20) and R3 can be used to access.
-	 * ... the code further went all the way to bpf_exit.
-	 * Now the 'cur' state at the mark 'here' has R4(off=30,r=0).
-	 * old_R4(off=20,r=0) equal to cur_R4(off=30,r=0), since if the verifier
-	 * goes further, such cur_R4 will give larger safe packet range after
-	 * 'if (R4 > data_end)' and all further insn were already good with r=20,
-	 * so they will be good with r=30 and we can prune the search.
-	 */
-	if (!env->strict_alignment && old->off <= cur->off &&
-	    old->off >= old->range && cur->off >= cur->range)
+	if (rold->type == NOT_INIT)
+		/* explored state can't have used this */
 		return true;
+	if (rcur->type == NOT_INIT)
+		return false;
+	switch (rold->type) {
+	case SCALAR_VALUE:
+		if (rcur->type == SCALAR_VALUE) {
+			/* new val must satisfy old val knowledge */
+			return range_within(rold, rcur) &&
+			       tnum_in(rold->var_off, rcur->var_off);
+		} else {
+			/* if we knew anything about the old value, we're not
+			 * equal, because we can't know anything about the
+			 * scalar value of the pointer in the new value.
+			 */
+			return rold->min_value == BPF_REGISTER_MIN_RANGE &&
+			       rold->max_value == BPF_REGISTER_MAX_RANGE &&
+			       tnum_is_unknown(rold->var_off);
+		}
+	case PTR_TO_MAP_VALUE:
+		if (varlen_map_access) {
+			/* If the new min/max/var_off satisfy the old ones and
+			 * everything else matches, we are OK.
+			 * We don't care about the 'id' value, because nothing
+			 * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL)
+			 */
+			return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
+			       range_within(rold, rcur) &&
+			       tnum_in(rold->var_off, rcur->var_off);
+		} else {
+			/* If the ranges/var_off were not the same, but
+			 * everything else was and we didn't do a variable
+			 * access into a map then we are a-ok.
+			 */
+			return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0;
+		}
+	case PTR_TO_MAP_VALUE_OR_NULL:
+		/* a PTR_TO_MAP_VALUE with no offset (fixed or variable) can
+		 * safely be used as a PTR_TO_MAP_VALUE_OR_NULL into the same
+		 * map.  (We can't do the same thing for a CONST_PTR_TO_MAP,
+		 * because its map_ptr changed when we NULL-checked it.)
+		 */
+		return rcur->type == PTR_TO_MAP_VALUE &&
+		       rcur->map_ptr == rold->map_ptr &&
+		       tnum_equals_const(rcur->var_off, 0) &&
+		       rcur->off == 0;
+	case PTR_TO_PACKET:
+		if (rcur->type != PTR_TO_PACKET)
+			return false;
+		/* We must have at least as much range as the old ptr
+		 * did, so that any accesses which were safe before are
+		 * still safe.  This is true even if old range < old off,
+		 * since someone could have accessed through (ptr - k), or
+		 * even done ptr -= k in a register, to get a safe access.
+		 */
+		if (rold->range > rcur->range)
+			return false;
+		/* If the offsets don't match, we can't trust our alignment;
+		 * nor can we be sure that we won't fall out of range.
+		 */
+		if (rold->off != rcur->off)
+			return false;
+		/* new val must satisfy old val knowledge */
+		return range_within(rold, rcur) &&
+		       tnum_in(rold->var_off, rcur->var_off);
+	case PTR_TO_CTX:
+	case CONST_PTR_TO_MAP:
+	case PTR_TO_STACK:
+	case PTR_TO_PACKET_END:
+		/* Only valid matches are exact, which memcmp() above
+		 * would have accepted
+		 */
+	default:
+		/* Don't know what's going on, just say it's not safe */
+		return false;
+	}
 
+	/* Shouldn't get here; if we do, say it's not safe */
+	WARN_ON_ONCE(1);
 	return false;
 }
 
@@ -2802,43 +3053,11 @@  static bool states_equal(struct bpf_verifier_env *env,
 			 struct bpf_verifier_state *cur)
 {
 	bool varlen_map_access = env->varlen_map_value_access;
-	struct bpf_reg_state *rold, *rcur;
 	int i;
 
 	for (i = 0; i < MAX_BPF_REG; i++) {
-		rold = &old->regs[i];
-		rcur = &cur->regs[i];
-
-		if (memcmp(rold, rcur, sizeof(*rold)) == 0)
-			continue;
-
-		/* If the ranges were not the same, but everything else was and
-		 * we didn't do a variable access into a map then we are a-ok.
-		 */
-		if (!varlen_map_access &&
-		    memcmp(rold, rcur, offsetofend(struct bpf_reg_state, id)) == 0)
-			continue;
-
-		/* If we didn't map access then again we don't care about the
-		 * mismatched range values and it's ok if our old type was
-		 * UNKNOWN and we didn't go to a NOT_INIT'ed reg.
-		 */
-		if (rold->type == NOT_INIT ||
-		    (!varlen_map_access && rold->type == UNKNOWN_VALUE &&
-		     rcur->type != NOT_INIT))
-			continue;
-
-		/* Don't care about the reg->id in this case. */
-		if (rold->type == PTR_TO_MAP_VALUE_OR_NULL &&
-		    rcur->type == PTR_TO_MAP_VALUE_OR_NULL &&
-		    rold->map_ptr == rcur->map_ptr)
-			continue;
-
-		if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET &&
-		    compare_ptrs_to_packet(env, rold, rcur))
-			continue;
-
-		return false;
+		if (!regsafe(&old->regs[i], &cur->regs[i], varlen_map_access))
+			return false;
 	}
 
 	for (i = 0; i < MAX_BPF_STACK; i++) {
@@ -2855,16 +3074,16 @@  static bool states_equal(struct bpf_verifier_env *env,
 			continue;
 		if (old->stack_slot_type[i] != STACK_SPILL)
 			continue;
-		if (memcmp(&old->spilled_regs[i / BPF_REG_SIZE],
-			   &cur->spilled_regs[i / BPF_REG_SIZE],
-			   sizeof(old->spilled_regs[0])))
-			/* when explored and current stack slot types are
-			 * the same, check that stored pointers types
+		if (!regsafe(&old->spilled_regs[i / BPF_REG_SIZE],
+			     &cur->spilled_regs[i / BPF_REG_SIZE],
+			     varlen_map_access))
+			/* when explored and current stack slot are both storing
+			 * spilled registers, check that stored pointers types
 			 * are the same as well.
 			 * Ex: explored safe path could have stored
-			 * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -8}
+			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
 			 * but current path has stored:
-			 * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -16}
+			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
 			 * such verifier states are not equivalent.
 			 * return false to continue verification of this path
 			 */
@@ -3186,7 +3405,6 @@  static int do_check(struct bpf_verifier_env *env)
 				verbose("invalid BPF_LD mode\n");
 				return -EINVAL;
 			}
-			reset_reg_range_values(regs, insn->dst_reg);
 		} else {
 			verbose("unknown insn class %d\n", class);
 			return -EINVAL;