diff mbox series

[bpf-next,4/6] bpf: Support access to bpf map fields

Message ID 53fdc8f0c100fc50c8aa5fbc798d659e3dd77e92.1592426215.git.rdna@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: Support access to bpf map fields | expand

Commit Message

Andrey Ignatov June 17, 2020, 8:43 p.m. UTC
There are multiple use-cases when it's convenient to have access to bpf
map fields, both `struct bpf_map` and map type specific struct-s such as
`struct bpf_array`, `struct bpf_htab`, etc.

For example while working with sock arrays it can be necessary to
calculate the key based on map->max_entries (some_hash % max_entries).
Currently this is solved by communicating max_entries via "out-of-band"
channel, e.g. via additional map with known key to get info about target
map. That works, but is not very convenient and error-prone while
working with many maps.

In other cases necessary data is dynamic (i.e. unknown at loading time)
and it's impossible to get it at all. For example while working with a
hash table it can be convenient to know how much capacity is already
used (bpf_htab.count.counter for BPF_F_NO_PREALLOC case).

At the same time kernel knows this info and can provide it to bpf
program.

Fill this gap by adding support to access bpf map fields from bpf
program for both `struct bpf_map` and map type specific fields.

Support is implemented via btf_struct_access() so that a user can define
their own `struct bpf_map` or map type specific struct in their program
with only necessary fields and preserve_access_index attribute, cast a
map to this struct and use a field.

For example:

	struct bpf_map {
		__u32 max_entries;
	} __attribute__((preserve_access_index));

	struct bpf_array {
		struct bpf_map map;
		__u32 elem_size;
	} __attribute__((preserve_access_index));

	struct {
		__uint(type, BPF_MAP_TYPE_ARRAY);
		__uint(max_entries, 4);
		__type(key, __u32);
		__type(value, __u32);
	} m_array SEC(".maps");

	SEC("cgroup_skb/egress")
	int cg_skb(void *ctx)
	{
		struct bpf_array *array = (struct bpf_array *)&m_array;
		struct bpf_map *map = (struct bpf_map *)&m_array;

		/* .. use map->max_entries or array->map.max_entries .. */
	}

Similarly to other btf_struct_access() use-cases (e.g. struct tcp_sock
in net/ipv4/bpf_tcp_ca.c) the patch allows access to any fields of
corresponding struct. Only reading from map fields is supported.

For btf_struct_access() to work there should be a way to know btf id of
a struct that corresponds to a map type. To get btf id there should be a
way to get a stringified name of map-specific struct, such as
"bpf_array", "bpf_htab", etc for a map type. Such a stringified name is
kept in a new field bpf_map_ops.map_btf_name, btf ids are calculated
once while preparing btf_vmlinux and cached for future use.

While calculating btf ids, struct names are checked for collision so
that btf_parse_vmlinux() will fail if two map types use different map
struct-s with same name. The only known collision for `struct bpf_htab`
(kernel/bpf/hashtab.c vs net/core/sock_map.c) was fixed earlier.

If map_btf_name field is not set for a map type, `struct bpf_map` is
used by default and only common fields are available for this map type.
Only `struct bpf_array` for BPF_MAP_TYPE_ARRAY and `struct bpf_htab` for
BPF_MAP_TYPE_HASH are supported by this patch. Other map types will be
supported separately.

The feature is available only for CONFIG_DEBUG_INFO_BTF=y and gated by
perfmon_capable() so that unpriv programs won't have access to bpf map
fields.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
---
 include/linux/bpf.h                           |  8 ++
 include/linux/bpf_verifier.h                  |  1 +
 kernel/bpf/arraymap.c                         |  1 +
 kernel/bpf/btf.c                              | 66 ++++++++++++++++
 kernel/bpf/hashtab.c                          |  1 +
 kernel/bpf/verifier.c                         | 77 +++++++++++++++++--
 .../selftests/bpf/verifier/map_ptr_mixing.c   |  2 +-
 7 files changed, 147 insertions(+), 9 deletions(-)

Comments

Martin KaFai Lau June 18, 2020, 6:18 a.m. UTC | #1
On Wed, Jun 17, 2020 at 01:43:45PM -0700, Andrey Ignatov wrote:
[ ... ]
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index e5c5305e859c..fa21b1e766ae 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -3577,6 +3577,67 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
>  	return ctx_type;
>  }
>  
> +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
> +#define BPF_LINK_TYPE(_id, _name)
> +static const struct bpf_map_ops * const btf_vmlinux_map_ops[] = {
> +#define BPF_MAP_TYPE(_id, _ops) \
> +	[_id] = &_ops,
> +#include <linux/bpf_types.h>
> +#undef BPF_MAP_TYPE
> +};
> +static u32 btf_vmlinux_map_ids[] = {
> +#define BPF_MAP_TYPE(_id, _ops) \
> +	[_id] = (u32)-1,
> +#include <linux/bpf_types.h>
> +#undef BPF_MAP_TYPE
> +};
> +#undef BPF_PROG_TYPE
> +#undef BPF_LINK_TYPE
> +
> +static int btf_vmlinux_map_ids_init(const struct btf *btf,
> +				    struct bpf_verifier_log *log)
> +{
> +	int base_btf_id, btf_id, i;
> +	const char *btf_name;
> +
> +	base_btf_id = btf_find_by_name_kind(btf, "bpf_map", BTF_KIND_STRUCT);
> +	if (base_btf_id < 0)
> +		return base_btf_id;
> +
> +	BUILD_BUG_ON(ARRAY_SIZE(btf_vmlinux_map_ops) !=
> +		     ARRAY_SIZE(btf_vmlinux_map_ids));
> +
> +	for (i = 0; i < ARRAY_SIZE(btf_vmlinux_map_ops); ++i) {
> +		if (!btf_vmlinux_map_ops[i])
> +			continue;
> +		btf_name = btf_vmlinux_map_ops[i]->map_btf_name;
> +		if (!btf_name) {
> +			btf_vmlinux_map_ids[i] = base_btf_id;
> +			continue;
> +		}
> +		btf_id = btf_find_by_name_kind(btf, btf_name, BTF_KIND_STRUCT);
> +		if (btf_id < 0)
> +			return btf_id;
> +		btf_vmlinux_map_ids[i] = btf_id;
Since map_btf_name is already in map_ops, how about storing map_btf_id in
map_ops also?
btf_id 0 is "void" which is as good as -1, so there is no need
to modify all map_ops to init map_btf_id to -1.

> +		btf_id = btf_find_by_name_kind_next(btf, btf_name,
> +						    BTF_KIND_STRUCT,
> +						    btf_id + 1);
> +		if (btf_id > 0) {
> +			bpf_log(log,
> +				"Ambiguous btf_id for struct %s: %u, %u.\n",
> +				btf_name, btf_vmlinux_map_ids[i], btf_id);
> +			return -EINVAL;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
Andrey Ignatov June 18, 2020, 7:42 p.m. UTC | #2
Martin KaFai Lau <kafai@fb.com> [Wed, 2020-06-17 23:18 -0700]:
> On Wed, Jun 17, 2020 at 01:43:45PM -0700, Andrey Ignatov wrote:
> [ ... ]
> > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> > index e5c5305e859c..fa21b1e766ae 100644
> > --- a/kernel/bpf/btf.c
> > +++ b/kernel/bpf/btf.c
> > @@ -3577,6 +3577,67 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
> >  	return ctx_type;
> >  }
> >  
> > +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
> > +#define BPF_LINK_TYPE(_id, _name)
> > +static const struct bpf_map_ops * const btf_vmlinux_map_ops[] = {
> > +#define BPF_MAP_TYPE(_id, _ops) \
> > +	[_id] = &_ops,
> > +#include <linux/bpf_types.h>
> > +#undef BPF_MAP_TYPE
> > +};
> > +static u32 btf_vmlinux_map_ids[] = {
> > +#define BPF_MAP_TYPE(_id, _ops) \
> > +	[_id] = (u32)-1,
> > +#include <linux/bpf_types.h>
> > +#undef BPF_MAP_TYPE
> > +};
> > +#undef BPF_PROG_TYPE
> > +#undef BPF_LINK_TYPE
> > +
> > +static int btf_vmlinux_map_ids_init(const struct btf *btf,
> > +				    struct bpf_verifier_log *log)
> > +{
> > +	int base_btf_id, btf_id, i;
> > +	const char *btf_name;
> > +
> > +	base_btf_id = btf_find_by_name_kind(btf, "bpf_map", BTF_KIND_STRUCT);
> > +	if (base_btf_id < 0)
> > +		return base_btf_id;
> > +
> > +	BUILD_BUG_ON(ARRAY_SIZE(btf_vmlinux_map_ops) !=
> > +		     ARRAY_SIZE(btf_vmlinux_map_ids));
> > +
> > +	for (i = 0; i < ARRAY_SIZE(btf_vmlinux_map_ops); ++i) {
> > +		if (!btf_vmlinux_map_ops[i])
> > +			continue;
> > +		btf_name = btf_vmlinux_map_ops[i]->map_btf_name;
> > +		if (!btf_name) {
> > +			btf_vmlinux_map_ids[i] = base_btf_id;
> > +			continue;
> > +		}
> > +		btf_id = btf_find_by_name_kind(btf, btf_name, BTF_KIND_STRUCT);
> > +		if (btf_id < 0)
> > +			return btf_id;
> > +		btf_vmlinux_map_ids[i] = btf_id;
> Since map_btf_name is already in map_ops, how about storing map_btf_id in
> map_ops also?
> btf_id 0 is "void" which is as good as -1, so there is no need
> to modify all map_ops to init map_btf_id to -1.

Yeah, btf_id == 0 being a valid id was the reason I used -1.

I realized that having a map type specific struct with btf_id == 0
should be practically impossible, but is it guaranteed to always be
"void" or it just happened so and can change in the future?

If both this and having one more field in bpf_map_ops is not a problem,
I'll move it to bpf_map_ops.
Martin KaFai Lau June 18, 2020, 9:03 p.m. UTC | #3
On Thu, Jun 18, 2020 at 12:42:36PM -0700, Andrey Ignatov wrote:
> Martin KaFai Lau <kafai@fb.com> [Wed, 2020-06-17 23:18 -0700]:
> > On Wed, Jun 17, 2020 at 01:43:45PM -0700, Andrey Ignatov wrote:
> > [ ... ]
> > > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> > > index e5c5305e859c..fa21b1e766ae 100644
> > > --- a/kernel/bpf/btf.c
> > > +++ b/kernel/bpf/btf.c
> > > @@ -3577,6 +3577,67 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
> > >  	return ctx_type;
> > >  }
> > >  
> > > +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
> > > +#define BPF_LINK_TYPE(_id, _name)
> > > +static const struct bpf_map_ops * const btf_vmlinux_map_ops[] = {
> > > +#define BPF_MAP_TYPE(_id, _ops) \
> > > +	[_id] = &_ops,
> > > +#include <linux/bpf_types.h>
> > > +#undef BPF_MAP_TYPE
> > > +};
> > > +static u32 btf_vmlinux_map_ids[] = {
> > > +#define BPF_MAP_TYPE(_id, _ops) \
> > > +	[_id] = (u32)-1,
> > > +#include <linux/bpf_types.h>
> > > +#undef BPF_MAP_TYPE
> > > +};
> > > +#undef BPF_PROG_TYPE
> > > +#undef BPF_LINK_TYPE
> > > +
> > > +static int btf_vmlinux_map_ids_init(const struct btf *btf,
> > > +				    struct bpf_verifier_log *log)
> > > +{
> > > +	int base_btf_id, btf_id, i;
> > > +	const char *btf_name;
> > > +
> > > +	base_btf_id = btf_find_by_name_kind(btf, "bpf_map", BTF_KIND_STRUCT);
> > > +	if (base_btf_id < 0)
> > > +		return base_btf_id;
> > > +
> > > +	BUILD_BUG_ON(ARRAY_SIZE(btf_vmlinux_map_ops) !=
> > > +		     ARRAY_SIZE(btf_vmlinux_map_ids));
> > > +
> > > +	for (i = 0; i < ARRAY_SIZE(btf_vmlinux_map_ops); ++i) {
> > > +		if (!btf_vmlinux_map_ops[i])
> > > +			continue;
> > > +		btf_name = btf_vmlinux_map_ops[i]->map_btf_name;
> > > +		if (!btf_name) {
> > > +			btf_vmlinux_map_ids[i] = base_btf_id;
> > > +			continue;
> > > +		}
> > > +		btf_id = btf_find_by_name_kind(btf, btf_name, BTF_KIND_STRUCT);
> > > +		if (btf_id < 0)
> > > +			return btf_id;
> > > +		btf_vmlinux_map_ids[i] = btf_id;
> > Since map_btf_name is already in map_ops, how about storing map_btf_id in
> > map_ops also?
> > btf_id 0 is "void" which is as good as -1, so there is no need
> > to modify all map_ops to init map_btf_id to -1.
> 
> Yeah, btf_id == 0 being a valid id was the reason I used -1.
> 
> I realized that having a map type specific struct with btf_id == 0
> should be practically impossible, but is it guaranteed to always be
> "void" or it just happened so and can change in the future?
It is always "void" and cannot be changed in the future.
Andrey Ignatov June 18, 2020, 11:51 p.m. UTC | #4
Andrey Ignatov <rdna@fb.com> [Thu, 2020-06-18 12:42 -0700]:
> Martin KaFai Lau <kafai@fb.com> [Wed, 2020-06-17 23:18 -0700]:
> > On Wed, Jun 17, 2020 at 01:43:45PM -0700, Andrey Ignatov wrote:
> > [ ... ]
> > > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> > > index e5c5305e859c..fa21b1e766ae 100644
> > > --- a/kernel/bpf/btf.c
> > > +++ b/kernel/bpf/btf.c
> > > @@ -3577,6 +3577,67 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
> > >  	return ctx_type;
> > >  }
> > >  
> > > +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
> > > +#define BPF_LINK_TYPE(_id, _name)
> > > +static const struct bpf_map_ops * const btf_vmlinux_map_ops[] = {
> > > +#define BPF_MAP_TYPE(_id, _ops) \
> > > +	[_id] = &_ops,
> > > +#include <linux/bpf_types.h>
> > > +#undef BPF_MAP_TYPE
> > > +};
> > > +static u32 btf_vmlinux_map_ids[] = {
> > > +#define BPF_MAP_TYPE(_id, _ops) \
> > > +	[_id] = (u32)-1,
> > > +#include <linux/bpf_types.h>
> > > +#undef BPF_MAP_TYPE
> > > +};
> > > +#undef BPF_PROG_TYPE
> > > +#undef BPF_LINK_TYPE
> > > +
> > > +static int btf_vmlinux_map_ids_init(const struct btf *btf,
> > > +				    struct bpf_verifier_log *log)
> > > +{
> > > +	int base_btf_id, btf_id, i;
> > > +	const char *btf_name;
> > > +
> > > +	base_btf_id = btf_find_by_name_kind(btf, "bpf_map", BTF_KIND_STRUCT);
> > > +	if (base_btf_id < 0)
> > > +		return base_btf_id;
> > > +
> > > +	BUILD_BUG_ON(ARRAY_SIZE(btf_vmlinux_map_ops) !=
> > > +		     ARRAY_SIZE(btf_vmlinux_map_ids));
> > > +
> > > +	for (i = 0; i < ARRAY_SIZE(btf_vmlinux_map_ops); ++i) {
> > > +		if (!btf_vmlinux_map_ops[i])
> > > +			continue;
> > > +		btf_name = btf_vmlinux_map_ops[i]->map_btf_name;
> > > +		if (!btf_name) {
> > > +			btf_vmlinux_map_ids[i] = base_btf_id;
> > > +			continue;
> > > +		}
> > > +		btf_id = btf_find_by_name_kind(btf, btf_name, BTF_KIND_STRUCT);
> > > +		if (btf_id < 0)
> > > +			return btf_id;
> > > +		btf_vmlinux_map_ids[i] = btf_id;
> > Since map_btf_name is already in map_ops, how about storing map_btf_id in
> > map_ops also?
> > btf_id 0 is "void" which is as good as -1, so there is no need
> > to modify all map_ops to init map_btf_id to -1.
> 
> Yeah, btf_id == 0 being a valid id was the reason I used -1.
> 
> I realized that having a map type specific struct with btf_id == 0
> should be practically impossible, but is it guaranteed to always be
> "void" or it just happened so and can change in the future?
> 
> If both this and having one more field in bpf_map_ops is not a problem,
> I'll move it to bpf_map_ops.

Nope, I can't do it. All `struct bpf_map_ops` are global `const`, i.e.
rodata and a try cast `const` away and change them causes a panic.

Simple user space repro:

	% cat 1.c
	#include <stdio.h>
	
	struct map_ops {
		int a;
	};
	
	const struct map_ops ops = {
		.a = 1,
	};
	
	int main(void)
	{
		struct map_ops *ops_rw = (struct map_ops *)&ops;
	
		printf("before a=%d\n", ops_rw->a);
		ops_rw->a = 3;
		printf(" afrer a=%d\n", ops_rw->a);
	}
	% clang -O2 -Wall -Wextra -pedantic -pedantic-errors -g 1.c && ./a.out
	before a=1
	Segmentation fault (core dumped)
	% objdump -t a.out  | grep -w ops
	0000000000400600 g     O .rodata        0000000000000004              ops
Andrii Nakryiko June 19, 2020, 12:07 a.m. UTC | #5
On Thu, Jun 18, 2020 at 4:52 PM Andrey Ignatov <rdna@fb.com> wrote:
>
> Andrey Ignatov <rdna@fb.com> [Thu, 2020-06-18 12:42 -0700]:
> > Martin KaFai Lau <kafai@fb.com> [Wed, 2020-06-17 23:18 -0700]:
> > > On Wed, Jun 17, 2020 at 01:43:45PM -0700, Andrey Ignatov wrote:
> > > [ ... ]
> > > > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> > > > index e5c5305e859c..fa21b1e766ae 100644
> > > > --- a/kernel/bpf/btf.c
> > > > +++ b/kernel/bpf/btf.c
> > > > @@ -3577,6 +3577,67 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
> > > >   return ctx_type;
> > > >  }
> > > >
> > > > +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
> > > > +#define BPF_LINK_TYPE(_id, _name)
> > > > +static const struct bpf_map_ops * const btf_vmlinux_map_ops[] = {
> > > > +#define BPF_MAP_TYPE(_id, _ops) \
> > > > + [_id] = &_ops,
> > > > +#include <linux/bpf_types.h>
> > > > +#undef BPF_MAP_TYPE
> > > > +};
> > > > +static u32 btf_vmlinux_map_ids[] = {
> > > > +#define BPF_MAP_TYPE(_id, _ops) \
> > > > + [_id] = (u32)-1,
> > > > +#include <linux/bpf_types.h>
> > > > +#undef BPF_MAP_TYPE
> > > > +};
> > > > +#undef BPF_PROG_TYPE
> > > > +#undef BPF_LINK_TYPE
> > > > +
> > > > +static int btf_vmlinux_map_ids_init(const struct btf *btf,
> > > > +                             struct bpf_verifier_log *log)
> > > > +{
> > > > + int base_btf_id, btf_id, i;
> > > > + const char *btf_name;
> > > > +
> > > > + base_btf_id = btf_find_by_name_kind(btf, "bpf_map", BTF_KIND_STRUCT);
> > > > + if (base_btf_id < 0)
> > > > +         return base_btf_id;
> > > > +
> > > > + BUILD_BUG_ON(ARRAY_SIZE(btf_vmlinux_map_ops) !=
> > > > +              ARRAY_SIZE(btf_vmlinux_map_ids));
> > > > +
> > > > + for (i = 0; i < ARRAY_SIZE(btf_vmlinux_map_ops); ++i) {
> > > > +         if (!btf_vmlinux_map_ops[i])
> > > > +                 continue;
> > > > +         btf_name = btf_vmlinux_map_ops[i]->map_btf_name;
> > > > +         if (!btf_name) {
> > > > +                 btf_vmlinux_map_ids[i] = base_btf_id;
> > > > +                 continue;
> > > > +         }
> > > > +         btf_id = btf_find_by_name_kind(btf, btf_name, BTF_KIND_STRUCT);
> > > > +         if (btf_id < 0)
> > > > +                 return btf_id;
> > > > +         btf_vmlinux_map_ids[i] = btf_id;
> > > Since map_btf_name is already in map_ops, how about storing map_btf_id in
> > > map_ops also?
> > > btf_id 0 is "void" which is as good as -1, so there is no need
> > > to modify all map_ops to init map_btf_id to -1.
> >
> > Yeah, btf_id == 0 being a valid id was the reason I used -1.
> >
> > I realized that having a map type specific struct with btf_id == 0
> > should be practically impossible, but is it guaranteed to always be
> > "void" or it just happened so and can change in the future?
> >
> > If both this and having one more field in bpf_map_ops is not a problem,
> > I'll move it to bpf_map_ops.
>
> Nope, I can't do it. All `struct bpf_map_ops` are global `const`, i.e.
> rodata and a try cast `const` away and change them causes a panic.
>
> Simple user space repro:
>
>         % cat 1.c
>         #include <stdio.h>
>
>         struct map_ops {
>                 int a;
>         };
>
>         const struct map_ops ops = {
>                 .a = 1,
>         };
>
>         int main(void)
>         {
>                 struct map_ops *ops_rw = (struct map_ops *)&ops;
>
>                 printf("before a=%d\n", ops_rw->a);
>                 ops_rw->a = 3;
>                 printf(" afrer a=%d\n", ops_rw->a);
>         }
>         % clang -O2 -Wall -Wextra -pedantic -pedantic-errors -g 1.c && ./a.out
>         before a=1
>         Segmentation fault (core dumped)
>         % objdump -t a.out  | grep -w ops
>         0000000000400600 g     O .rodata        0000000000000004              ops
>
> --
> Andrey Ignatov

See the trick that helper prototypes do for BTF ids. Fictional example:

static int hash_map_btf_id;

const struct bpf_map_ops hash_map_opss = {
 ...
 .btf_id = &hash_map_btf_id,
};


then *hash_map_ops.btf_id = <final_btf_id>;
Andrey Ignatov June 19, 2020, 12:27 a.m. UTC | #6
Andrii Nakryiko <andrii.nakryiko@gmail.com> [Thu, 2020-06-18 17:08 -0700]:
> On Thu, Jun 18, 2020 at 4:52 PM Andrey Ignatov <rdna@fb.com> wrote:
> >
> > Andrey Ignatov <rdna@fb.com> [Thu, 2020-06-18 12:42 -0700]:
> > > Martin KaFai Lau <kafai@fb.com> [Wed, 2020-06-17 23:18 -0700]:
> > > > On Wed, Jun 17, 2020 at 01:43:45PM -0700, Andrey Ignatov wrote:
> > > > [ ... ]
> > > > > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> > > > > index e5c5305e859c..fa21b1e766ae 100644
> > > > > --- a/kernel/bpf/btf.c
> > > > > +++ b/kernel/bpf/btf.c
> > > > > @@ -3577,6 +3577,67 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
> > > > >   return ctx_type;
> > > > >  }
> > > > >
> > > > > +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
> > > > > +#define BPF_LINK_TYPE(_id, _name)
> > > > > +static const struct bpf_map_ops * const btf_vmlinux_map_ops[] = {
> > > > > +#define BPF_MAP_TYPE(_id, _ops) \
> > > > > + [_id] = &_ops,
> > > > > +#include <linux/bpf_types.h>
> > > > > +#undef BPF_MAP_TYPE
> > > > > +};
> > > > > +static u32 btf_vmlinux_map_ids[] = {
> > > > > +#define BPF_MAP_TYPE(_id, _ops) \
> > > > > + [_id] = (u32)-1,
> > > > > +#include <linux/bpf_types.h>
> > > > > +#undef BPF_MAP_TYPE
> > > > > +};
> > > > > +#undef BPF_PROG_TYPE
> > > > > +#undef BPF_LINK_TYPE
> > > > > +
> > > > > +static int btf_vmlinux_map_ids_init(const struct btf *btf,
> > > > > +                             struct bpf_verifier_log *log)
> > > > > +{
> > > > > + int base_btf_id, btf_id, i;
> > > > > + const char *btf_name;
> > > > > +
> > > > > + base_btf_id = btf_find_by_name_kind(btf, "bpf_map", BTF_KIND_STRUCT);
> > > > > + if (base_btf_id < 0)
> > > > > +         return base_btf_id;
> > > > > +
> > > > > + BUILD_BUG_ON(ARRAY_SIZE(btf_vmlinux_map_ops) !=
> > > > > +              ARRAY_SIZE(btf_vmlinux_map_ids));
> > > > > +
> > > > > + for (i = 0; i < ARRAY_SIZE(btf_vmlinux_map_ops); ++i) {
> > > > > +         if (!btf_vmlinux_map_ops[i])
> > > > > +                 continue;
> > > > > +         btf_name = btf_vmlinux_map_ops[i]->map_btf_name;
> > > > > +         if (!btf_name) {
> > > > > +                 btf_vmlinux_map_ids[i] = base_btf_id;
> > > > > +                 continue;
> > > > > +         }
> > > > > +         btf_id = btf_find_by_name_kind(btf, btf_name, BTF_KIND_STRUCT);
> > > > > +         if (btf_id < 0)
> > > > > +                 return btf_id;
> > > > > +         btf_vmlinux_map_ids[i] = btf_id;
> > > > Since map_btf_name is already in map_ops, how about storing map_btf_id in
> > > > map_ops also?
> > > > btf_id 0 is "void" which is as good as -1, so there is no need
> > > > to modify all map_ops to init map_btf_id to -1.
> > >
> > > Yeah, btf_id == 0 being a valid id was the reason I used -1.
> > >
> > > I realized that having a map type specific struct with btf_id == 0
> > > should be practically impossible, but is it guaranteed to always be
> > > "void" or it just happened so and can change in the future?
> > >
> > > If both this and having one more field in bpf_map_ops is not a problem,
> > > I'll move it to bpf_map_ops.
> >
> > Nope, I can't do it. All `struct bpf_map_ops` are global `const`, i.e.
> > rodata and a try cast `const` away and change them causes a panic.
> >
> > Simple user space repro:
> >
> >         % cat 1.c
> >         #include <stdio.h>
> >
> >         struct map_ops {
> >                 int a;
> >         };
> >
> >         const struct map_ops ops = {
> >                 .a = 1,
> >         };
> >
> >         int main(void)
> >         {
> >                 struct map_ops *ops_rw = (struct map_ops *)&ops;
> >
> >                 printf("before a=%d\n", ops_rw->a);
> >                 ops_rw->a = 3;
> >                 printf(" afrer a=%d\n", ops_rw->a);
> >         }
> >         % clang -O2 -Wall -Wextra -pedantic -pedantic-errors -g 1.c && ./a.out
> >         before a=1
> >         Segmentation fault (core dumped)
> >         % objdump -t a.out  | grep -w ops
> >         0000000000400600 g     O .rodata        0000000000000004              ops
> >
> > --
> > Andrey Ignatov
> 
> See the trick that helper prototypes do for BTF ids. Fictional example:
> 
> static int hash_map_btf_id;
> 
> const struct bpf_map_ops hash_map_opss = {
>  ...
>  .btf_id = &hash_map_btf_id,
> };
> 
> 
> then *hash_map_ops.btf_id = <final_btf_id>;

Yeah, it would work, but IMO it wouldn't make the implementation better
since for every map type I would need to write two additional lines of
code. And whoever adds new map type will need to repeat this trick.

Yeah, it can be automated with a macro, but IMO it's better to avoid
the work than to automate it.

Martin, Andrii is there any strong reason to convert to map_btf_id
field? Or it's "nice to have" kind of thing? If the latter, I'd prefer
to stick with my approach.
Martin KaFai Lau June 19, 2020, 1:11 a.m. UTC | #7
On Thu, Jun 18, 2020 at 05:27:43PM -0700, Andrey Ignatov wrote:
> Andrii Nakryiko <andrii.nakryiko@gmail.com> [Thu, 2020-06-18 17:08 -0700]:
> > On Thu, Jun 18, 2020 at 4:52 PM Andrey Ignatov <rdna@fb.com> wrote:
> > >
> > > Andrey Ignatov <rdna@fb.com> [Thu, 2020-06-18 12:42 -0700]:
> > > > Martin KaFai Lau <kafai@fb.com> [Wed, 2020-06-17 23:18 -0700]:
> > > > > On Wed, Jun 17, 2020 at 01:43:45PM -0700, Andrey Ignatov wrote:
> > > > > [ ... ]
> > > > > > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> > > > > > index e5c5305e859c..fa21b1e766ae 100644
> > > > > > --- a/kernel/bpf/btf.c
> > > > > > +++ b/kernel/bpf/btf.c
> > > > > > @@ -3577,6 +3577,67 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
> > > > > >   return ctx_type;
> > > > > >  }
> > > > > >
> > > > > > +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
> > > > > > +#define BPF_LINK_TYPE(_id, _name)
> > > > > > +static const struct bpf_map_ops * const btf_vmlinux_map_ops[] = {
> > > > > > +#define BPF_MAP_TYPE(_id, _ops) \
> > > > > > + [_id] = &_ops,
> > > > > > +#include <linux/bpf_types.h>
> > > > > > +#undef BPF_MAP_TYPE
> > > > > > +};
> > > > > > +static u32 btf_vmlinux_map_ids[] = {
> > > > > > +#define BPF_MAP_TYPE(_id, _ops) \
> > > > > > + [_id] = (u32)-1,
> > > > > > +#include <linux/bpf_types.h>
> > > > > > +#undef BPF_MAP_TYPE
> > > > > > +};
> > > > > > +#undef BPF_PROG_TYPE
> > > > > > +#undef BPF_LINK_TYPE
> > > > > > +
> > > > > > +static int btf_vmlinux_map_ids_init(const struct btf *btf,
> > > > > > +                             struct bpf_verifier_log *log)
> > > > > > +{
> > > > > > + int base_btf_id, btf_id, i;
> > > > > > + const char *btf_name;
> > > > > > +
> > > > > > + base_btf_id = btf_find_by_name_kind(btf, "bpf_map", BTF_KIND_STRUCT);
> > > > > > + if (base_btf_id < 0)
> > > > > > +         return base_btf_id;
> > > > > > +
> > > > > > + BUILD_BUG_ON(ARRAY_SIZE(btf_vmlinux_map_ops) !=
> > > > > > +              ARRAY_SIZE(btf_vmlinux_map_ids));
> > > > > > +
> > > > > > + for (i = 0; i < ARRAY_SIZE(btf_vmlinux_map_ops); ++i) {
> > > > > > +         if (!btf_vmlinux_map_ops[i])
> > > > > > +                 continue;
> > > > > > +         btf_name = btf_vmlinux_map_ops[i]->map_btf_name;
> > > > > > +         if (!btf_name) {
> > > > > > +                 btf_vmlinux_map_ids[i] = base_btf_id;
> > > > > > +                 continue;
> > > > > > +         }
> > > > > > +         btf_id = btf_find_by_name_kind(btf, btf_name, BTF_KIND_STRUCT);
> > > > > > +         if (btf_id < 0)
> > > > > > +                 return btf_id;
> > > > > > +         btf_vmlinux_map_ids[i] = btf_id;
> > > > > Since map_btf_name is already in map_ops, how about storing map_btf_id in
> > > > > map_ops also?
> > > > > btf_id 0 is "void" which is as good as -1, so there is no need
> > > > > to modify all map_ops to init map_btf_id to -1.
> > > >
> > > > Yeah, btf_id == 0 being a valid id was the reason I used -1.
> > > >
> > > > I realized that having a map type specific struct with btf_id == 0
> > > > should be practically impossible, but is it guaranteed to always be
> > > > "void" or it just happened so and can change in the future?
> > > >
> > > > If both this and having one more field in bpf_map_ops is not a problem,
> > > > I'll move it to bpf_map_ops.
> > >
> > > Nope, I can't do it. All `struct bpf_map_ops` are global `const`, i.e.
> > > rodata and a try cast `const` away and change them causes a panic.
> > >
> > > Simple user space repro:
> > >
> > >         % cat 1.c
> > >         #include <stdio.h>
> > >
> > >         struct map_ops {
> > >                 int a;
> > >         };
> > >
> > >         const struct map_ops ops = {
> > >                 .a = 1,
> > >         };
> > >
> > >         int main(void)
> > >         {
> > >                 struct map_ops *ops_rw = (struct map_ops *)&ops;
> > >
> > >                 printf("before a=%d\n", ops_rw->a);
> > >                 ops_rw->a = 3;
> > >                 printf(" afrer a=%d\n", ops_rw->a);
> > >         }
> > >         % clang -O2 -Wall -Wextra -pedantic -pedantic-errors -g 1.c && ./a.out
> > >         before a=1
> > >         Segmentation fault (core dumped)
> > >         % objdump -t a.out  | grep -w ops
> > >         0000000000400600 g     O .rodata        0000000000000004              ops
> > >
> > > --
> > > Andrey Ignatov
> > 
> > See the trick that helper prototypes do for BTF ids. Fictional example:
> > 
> > static int hash_map_btf_id;
> > 
> > const struct bpf_map_ops hash_map_opss = {
> >  ...
> >  .btf_id = &hash_map_btf_id,
> > };
> > 
> > 
> > then *hash_map_ops.btf_id = <final_btf_id>;
> 
> Yeah, it would work, but IMO it wouldn't make the implementation better
> since for every map type I would need to write two additional lines of
> code. And whoever adds new map type will need to repeat this trick.
I think bpf_func_proto has already been doing this.  Yonghong's
tcp/udp iter is also doing something similar.

> 
> Yeah, it can be automated with a macro, but IMO it's better to avoid
> the work than to automate it.
> 
> Martin, Andrii is there any strong reason to convert to map_btf_id
> field? Or it's "nice to have" kind of thing? If the latter, I'd prefer
> to stick with my approach.
I just think it will be more natural to be able to directly
access it through a map's related pointer, considering
the map_btf_name is already there.

I don't feel strongly here for now.  I think things will have to change
anyway after quickly peeking at Jiri's patch.  Also,
I take back on the collision check.  I think this check
should belong to Jiri's patch instead (if it is indeed needed).
Renaming the sock_hash is good enough for now.
Andrey Ignatov June 19, 2020, 1:53 a.m. UTC | #8
Martin KaFai Lau <kafai@fb.com> [Thu, 2020-06-18 18:12 -0700]:
> On Thu, Jun 18, 2020 at 05:27:43PM -0700, Andrey Ignatov wrote:
> > Andrii Nakryiko <andrii.nakryiko@gmail.com> [Thu, 2020-06-18 17:08 -0700]:
> > > On Thu, Jun 18, 2020 at 4:52 PM Andrey Ignatov <rdna@fb.com> wrote:
> > > >
> > > > Andrey Ignatov <rdna@fb.com> [Thu, 2020-06-18 12:42 -0700]:
> > > > > Martin KaFai Lau <kafai@fb.com> [Wed, 2020-06-17 23:18 -0700]:
> > > > > > On Wed, Jun 17, 2020 at 01:43:45PM -0700, Andrey Ignatov wrote:
> > > > > > [ ... ]
> > > > > > > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> > > > > > > index e5c5305e859c..fa21b1e766ae 100644
> > > > > > > --- a/kernel/bpf/btf.c
> > > > > > > +++ b/kernel/bpf/btf.c
> > > > > > > @@ -3577,6 +3577,67 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
> > > > > > >   return ctx_type;
> > > > > > >  }
> > > > > > >
> > > > > > > +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
> > > > > > > +#define BPF_LINK_TYPE(_id, _name)
> > > > > > > +static const struct bpf_map_ops * const btf_vmlinux_map_ops[] = {
> > > > > > > +#define BPF_MAP_TYPE(_id, _ops) \
> > > > > > > + [_id] = &_ops,
> > > > > > > +#include <linux/bpf_types.h>
> > > > > > > +#undef BPF_MAP_TYPE
> > > > > > > +};
> > > > > > > +static u32 btf_vmlinux_map_ids[] = {
> > > > > > > +#define BPF_MAP_TYPE(_id, _ops) \
> > > > > > > + [_id] = (u32)-1,
> > > > > > > +#include <linux/bpf_types.h>
> > > > > > > +#undef BPF_MAP_TYPE
> > > > > > > +};
> > > > > > > +#undef BPF_PROG_TYPE
> > > > > > > +#undef BPF_LINK_TYPE
> > > > > > > +
> > > > > > > +static int btf_vmlinux_map_ids_init(const struct btf *btf,
> > > > > > > +                             struct bpf_verifier_log *log)
> > > > > > > +{
> > > > > > > + int base_btf_id, btf_id, i;
> > > > > > > + const char *btf_name;
> > > > > > > +
> > > > > > > + base_btf_id = btf_find_by_name_kind(btf, "bpf_map", BTF_KIND_STRUCT);
> > > > > > > + if (base_btf_id < 0)
> > > > > > > +         return base_btf_id;
> > > > > > > +
> > > > > > > + BUILD_BUG_ON(ARRAY_SIZE(btf_vmlinux_map_ops) !=
> > > > > > > +              ARRAY_SIZE(btf_vmlinux_map_ids));
> > > > > > > +
> > > > > > > + for (i = 0; i < ARRAY_SIZE(btf_vmlinux_map_ops); ++i) {
> > > > > > > +         if (!btf_vmlinux_map_ops[i])
> > > > > > > +                 continue;
> > > > > > > +         btf_name = btf_vmlinux_map_ops[i]->map_btf_name;
> > > > > > > +         if (!btf_name) {
> > > > > > > +                 btf_vmlinux_map_ids[i] = base_btf_id;
> > > > > > > +                 continue;
> > > > > > > +         }
> > > > > > > +         btf_id = btf_find_by_name_kind(btf, btf_name, BTF_KIND_STRUCT);
> > > > > > > +         if (btf_id < 0)
> > > > > > > +                 return btf_id;
> > > > > > > +         btf_vmlinux_map_ids[i] = btf_id;
> > > > > > Since map_btf_name is already in map_ops, how about storing map_btf_id in
> > > > > > map_ops also?
> > > > > > btf_id 0 is "void" which is as good as -1, so there is no need
> > > > > > to modify all map_ops to init map_btf_id to -1.
> > > > >
> > > > > Yeah, btf_id == 0 being a valid id was the reason I used -1.
> > > > >
> > > > > I realized that having a map type specific struct with btf_id == 0
> > > > > should be practically impossible, but is it guaranteed to always be
> > > > > "void" or it just happened so and can change in the future?
> > > > >
> > > > > If both this and having one more field in bpf_map_ops is not a problem,
> > > > > I'll move it to bpf_map_ops.
> > > >
> > > > Nope, I can't do it. All `struct bpf_map_ops` are global `const`, i.e.
> > > > rodata and a try cast `const` away and change them causes a panic.
> > > >
> > > > Simple user space repro:
> > > >
> > > >         % cat 1.c
> > > >         #include <stdio.h>
> > > >
> > > >         struct map_ops {
> > > >                 int a;
> > > >         };
> > > >
> > > >         const struct map_ops ops = {
> > > >                 .a = 1,
> > > >         };
> > > >
> > > >         int main(void)
> > > >         {
> > > >                 struct map_ops *ops_rw = (struct map_ops *)&ops;
> > > >
> > > >                 printf("before a=%d\n", ops_rw->a);
> > > >                 ops_rw->a = 3;
> > > >                 printf(" afrer a=%d\n", ops_rw->a);
> > > >         }
> > > >         % clang -O2 -Wall -Wextra -pedantic -pedantic-errors -g 1.c && ./a.out
> > > >         before a=1
> > > >         Segmentation fault (core dumped)
> > > >         % objdump -t a.out  | grep -w ops
> > > >         0000000000400600 g     O .rodata        0000000000000004              ops
> > > >
> > > > --
> > > > Andrey Ignatov
> > > 
> > > See the trick that helper prototypes do for BTF ids. Fictional example:
> > > 
> > > static int hash_map_btf_id;
> > > 
> > > const struct bpf_map_ops hash_map_opss = {
> > >  ...
> > >  .btf_id = &hash_map_btf_id,
> > > };
> > > 
> > > 
> > > then *hash_map_ops.btf_id = <final_btf_id>;
> > 
> > Yeah, it would work, but IMO it wouldn't make the implementation better
> > since for every map type I would need to write two additional lines of
> > code. And whoever adds new map type will need to repeat this trick.
> I think bpf_func_proto has already been doing this.  Yonghong's
> tcp/udp iter is also doing something similar.

Right. I see bpf_func_proto.btf_id now.

> > Yeah, it can be automated with a macro, but IMO it's better to avoid
> > the work than to automate it.
> > 
> > Martin, Andrii is there any strong reason to convert to map_btf_id
> > field? Or it's "nice to have" kind of thing? If the latter, I'd prefer
> > to stick with my approach.
> I just think it will be more natural to be able to directly
> access it through a map's related pointer, considering
> the map_btf_name is already there.

Ok, let's keep it consistent with other struct-s then. I'll switch
bpf_map_ops to use same pointer trick.

> I don't feel strongly here for now.  I think things will have to change
> anyway after quickly peeking at Jiri's patch.  Also,
> I take back on the collision check.  I think this check
> should belong to Jiri's patch instead (if it is indeed needed).
> Renaming the sock_hash is good enough for now.

Good point. I'll remove duplicates checking.
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 07052d44bca1..60a62b9f6ed5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -92,6 +92,7 @@  struct bpf_map_ops {
 	int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma);
 	__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
 			     struct poll_table_struct *pts);
+	const char * const map_btf_name;
 };
 
 struct bpf_map_memory {
@@ -136,6 +137,8 @@  struct bpf_map {
 	u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */
 };
 
+u32 btf_vmlinux_map_id_by_type(enum bpf_map_type type);
+
 static inline bool map_value_has_spin_lock(const struct bpf_map *map)
 {
 	return map->spin_lock_off >= 0;
@@ -1109,6 +1112,11 @@  static inline bool bpf_allow_ptr_leaks(void)
 	return perfmon_capable();
 }
 
+static inline bool bpf_allow_ptr_to_map_access(void)
+{
+	return perfmon_capable();
+}
+
 static inline bool bpf_bypass_spec_v1(void)
 {
 	return perfmon_capable();
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index ca08db4ffb5f..53c7bd568c5d 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -379,6 +379,7 @@  struct bpf_verifier_env {
 	u32 used_map_cnt;		/* number of used maps */
 	u32 id_gen;			/* used to generate unique reg IDs */
 	bool allow_ptr_leaks;
+	bool allow_ptr_to_map_access;
 	bool bpf_capable;
 	bool bypass_spec_v1;
 	bool bypass_spec_v4;
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 11584618e861..bff478799a83 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -510,6 +510,7 @@  const struct bpf_map_ops array_map_ops = {
 	.map_check_btf = array_map_check_btf,
 	.map_lookup_batch = generic_map_lookup_batch,
 	.map_update_batch = generic_map_update_batch,
+	.map_btf_name = "bpf_array",
 };
 
 const struct bpf_map_ops percpu_array_map_ops = {
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index e5c5305e859c..fa21b1e766ae 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3577,6 +3577,67 @@  btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
 	return ctx_type;
 }
 
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
+#define BPF_LINK_TYPE(_id, _name)
+static const struct bpf_map_ops * const btf_vmlinux_map_ops[] = {
+#define BPF_MAP_TYPE(_id, _ops) \
+	[_id] = &_ops,
+#include <linux/bpf_types.h>
+#undef BPF_MAP_TYPE
+};
+static u32 btf_vmlinux_map_ids[] = {
+#define BPF_MAP_TYPE(_id, _ops) \
+	[_id] = (u32)-1,
+#include <linux/bpf_types.h>
+#undef BPF_MAP_TYPE
+};
+#undef BPF_PROG_TYPE
+#undef BPF_LINK_TYPE
+
+static int btf_vmlinux_map_ids_init(const struct btf *btf,
+				    struct bpf_verifier_log *log)
+{
+	int base_btf_id, btf_id, i;
+	const char *btf_name;
+
+	base_btf_id = btf_find_by_name_kind(btf, "bpf_map", BTF_KIND_STRUCT);
+	if (base_btf_id < 0)
+		return base_btf_id;
+
+	BUILD_BUG_ON(ARRAY_SIZE(btf_vmlinux_map_ops) !=
+		     ARRAY_SIZE(btf_vmlinux_map_ids));
+
+	for (i = 0; i < ARRAY_SIZE(btf_vmlinux_map_ops); ++i) {
+		if (!btf_vmlinux_map_ops[i])
+			continue;
+		btf_name = btf_vmlinux_map_ops[i]->map_btf_name;
+		if (!btf_name) {
+			btf_vmlinux_map_ids[i] = base_btf_id;
+			continue;
+		}
+		btf_id = btf_find_by_name_kind(btf, btf_name, BTF_KIND_STRUCT);
+		if (btf_id < 0)
+			return btf_id;
+		btf_vmlinux_map_ids[i] = btf_id;
+		btf_id = btf_find_by_name_kind_next(btf, btf_name,
+						    BTF_KIND_STRUCT,
+						    btf_id + 1);
+		if (btf_id > 0) {
+			bpf_log(log,
+				"Ambiguous btf_id for struct %s: %u, %u.\n",
+				btf_name, btf_vmlinux_map_ids[i], btf_id);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+u32 btf_vmlinux_map_id_by_type(enum bpf_map_type type)
+{
+	return btf_vmlinux_map_ids[type];
+}
+
 static int btf_translate_to_vmlinux(struct bpf_verifier_log *log,
 				     struct btf *btf,
 				     const struct btf_type *t,
@@ -3639,6 +3700,11 @@  struct btf *btf_parse_vmlinux(void)
 	/* btf_parse_vmlinux() runs under bpf_verifier_lock */
 	bpf_ctx_convert.t = btf_type_by_id(btf, btf_id);
 
+	/* find bpf map structs for map_ptr access checking */
+	err = btf_vmlinux_map_ids_init(btf, log);
+	if (err < 0)
+		goto errout;
+
 	bpf_struct_ops_init(btf, log);
 
 	btf_verifier_env_free(env);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index b4b288a3c3c9..5c6344fd1eee 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1625,6 +1625,7 @@  const struct bpf_map_ops htab_map_ops = {
 	.map_gen_lookup = htab_map_gen_lookup,
 	.map_seq_show_elem = htab_map_seq_show_elem,
 	BATCH_OPS(htab),
+	.map_btf_name = "bpf_htab",
 };
 
 const struct bpf_map_ops htab_lru_map_ops = {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 34cde841ab68..9c9817b9e427 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1351,6 +1351,19 @@  static void mark_reg_not_init(struct bpf_verifier_env *env,
 	__mark_reg_not_init(env, regs + regno);
 }
 
+static void mark_btf_ld_reg(struct bpf_verifier_env *env,
+			    struct bpf_reg_state *regs, u32 regno,
+			    enum bpf_reg_type reg_type, u32 btf_id)
+{
+	if (reg_type == SCALAR_VALUE) {
+		mark_reg_unknown(env, regs, regno);
+		return;
+	}
+	mark_reg_known_zero(env, regs, regno);
+	regs[regno].type = PTR_TO_BTF_ID;
+	regs[regno].btf_id = btf_id;
+}
+
 #define DEF_NOT_SUBREG	(0)
 static void init_reg_state(struct bpf_verifier_env *env,
 			   struct bpf_func_state *state)
@@ -3182,19 +3195,63 @@  static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 	if (ret < 0)
 		return ret;
 
-	if (atype == BPF_READ && value_regno >= 0) {
-		if (ret == SCALAR_VALUE) {
-			mark_reg_unknown(env, regs, value_regno);
-			return 0;
-		}
-		mark_reg_known_zero(env, regs, value_regno);
-		regs[value_regno].type = PTR_TO_BTF_ID;
-		regs[value_regno].btf_id = btf_id;
+	if (atype == BPF_READ && value_regno >= 0)
+		mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
+
+	return 0;
+}
+
+static int check_ptr_to_map_access(struct bpf_verifier_env *env,
+				   struct bpf_reg_state *regs,
+				   int regno, int off, int size,
+				   enum bpf_access_type atype,
+				   int value_regno)
+{
+	struct bpf_reg_state *reg = regs + regno;
+	struct bpf_map *map = reg->map_ptr;
+	const struct btf_type *t;
+	const char *tname;
+	u32 btf_id;
+	int ret;
+
+	if (!btf_vmlinux) {
+		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
+		return -ENOTSUPP;
+	}
+
+	t = btf_type_by_id(btf_vmlinux,
+			   btf_vmlinux_map_id_by_type(map->map_type));
+	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+
+	if (!env->allow_ptr_to_map_access) {
+		verbose(env,
+			"%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
+			tname);
+		return -EPERM;
+	}
+
+	if (off < 0) {
+		verbose(env, "R%d is %s invalid negative access: off=%d\n",
+			regno, tname, off);
+		return -EACCES;
+	}
+
+	if (atype != BPF_READ) {
+		verbose(env, "only read from %s is supported\n", tname);
+		return -EACCES;
 	}
 
+	ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
+	if (ret < 0)
+		return ret;
+
+	if (value_regno >= 0)
+		mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
+
 	return 0;
 }
 
+
 /* check whether memory at (regno + off) is accessible for t = (read | write)
  * if t==write, value_regno is a register which value is stored into memory
  * if t==read, value_regno is a register which will receive the value from memory
@@ -3363,6 +3420,9 @@  static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 	} else if (reg->type == PTR_TO_BTF_ID) {
 		err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
 					      value_regno);
+	} else if (reg->type == CONST_PTR_TO_MAP) {
+		err = check_ptr_to_map_access(env, regs, regno, off, size, t,
+					      value_regno);
 	} else {
 		verbose(env, "R%d invalid mem access '%s'\n", regno,
 			reg_type_str[reg->type]);
@@ -10946,6 +11006,7 @@  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 		env->strict_alignment = false;
 
 	env->allow_ptr_leaks = bpf_allow_ptr_leaks();
+	env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
 	env->bypass_spec_v1 = bpf_bypass_spec_v1();
 	env->bypass_spec_v4 = bpf_bypass_spec_v4();
 	env->bpf_capable = bpf_capable();
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
index cd26ee6b7b1d..1f2b8c4cb26d 100644
--- a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
+++ b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
@@ -56,7 +56,7 @@ 
 	.fixup_map_in_map = { 16 },
 	.fixup_map_array_48b = { 13 },
 	.result = REJECT,
-	.errstr = "R0 invalid mem access 'map_ptr'",
+	.errstr = "only read from bpf_array is supported",
 },
 {
 	"cond: two branches returning different map pointers for lookup (tail, tail)",