diff mbox series

[bpf-next,v2,11/12] tools: libbpf: allow map reuse

Message ID 20180709175944.32265-12-jakub.kicinski@netronome.com
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series tools: bpf: extend bpftool prog load | expand

Commit Message

Jakub Kicinski July 9, 2018, 5:59 p.m. UTC
More advanced applications may want to only replace programs without
destroying associated maps.  Allow libbpf users to achieve that.
Instead of always creating all of the maps at load time, expose to
users an API to reconstruct the map object from already existing
map.

The map parameters are read from the kernel and replace the parameters
of the ELF map.  libbpf does not restrict the map replacement, i.e.
the reused map does not have to be compatible with the ELF map
definition.  We relay on the verifier for checking the compatibility
between maps and programs.  The ELF map definition is completely
overwritten by the information read from the kernel, to make sure
libbpf's view of map object corresponds to the actual map.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
---
 tools/lib/bpf/libbpf.c | 35 +++++++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.h |  1 +
 2 files changed, 36 insertions(+)

Comments

Andrey Ignatov July 9, 2018, 8:22 p.m. UTC | #1
Jakub Kicinski <jakub.kicinski@netronome.com> [Mon, 2018-07-09 11:01 -0700]:
> More advanced applications may want to only replace programs without
> destroying associated maps.  Allow libbpf users to achieve that.
> Instead of always creating all of the maps at load time, expose to
> users an API to reconstruct the map object from already existing
> map.
> 
> The map parameters are read from the kernel and replace the parameters
> of the ELF map.  libbpf does not restrict the map replacement, i.e.
> the reused map does not have to be compatible with the ELF map
> definition.  We relay on the verifier for checking the compatibility
> between maps and programs.  The ELF map definition is completely
> overwritten by the information read from the kernel, to make sure
> libbpf's view of map object corresponds to the actual map.

Thanks for working on this Jakub! I encountered this shortcoming of
libbpf as well and was planning to fix it, but you beat me to it :)


> Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
> ---
>  tools/lib/bpf/libbpf.c | 35 +++++++++++++++++++++++++++++++++++
>  tools/lib/bpf/libbpf.h |  1 +
>  2 files changed, 36 insertions(+)
> 
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index b653dbb266c7..c80033fe66c3 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -215,6 +215,7 @@ struct bpf_map {
>  	int fd;
>  	char *name;
>  	size_t offset;
> +	bool fd_preset;

Any reason not to use map->fd itself to identify if fd is present?

fd of every map is set to -1 in bpf_object__init_maps() that, in turn, is
called from __bpf_object__open():

	for (i = 0; i < nr_maps; i++)
		obj->maps[i].fd = -1;

Later it will either contain valid fd that is >= 0, or that same -1, what
should be enough to identify fd presence.


>  	int map_ifindex;
>  	struct bpf_map_def def;
>  	uint32_t btf_key_type_id;
> @@ -1082,6 +1083,34 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
>  	return 0;
>  }
>  
> +int bpf_map__reuse_fd(struct bpf_map *map, int fd)
> +{
> +	struct bpf_map_info info = {};
> +	__u32 len = sizeof(info);
> +	int err;
> +
> +	err = bpf_obj_get_info_by_fd(fd, &info, &len);
> +	if (err)
> +		return err;
> +

Should there be a check that map->fd doesn't contain any valid fd (>= 0)
before rewriting it so that if it does (e.g. because the function is
called after bpf_object__load() by mistake), current map->fd won't be
leaked?


> +	map->fd = dup(fd);

Unfortunately, new descriptor created by dup(2) will not have O_CLOEXEC set, in
contrast to original fd returned by kernel on map creation.

libbpf has other interface shortcomings where it comes up. E.g. struct
bpf_object owns all descriptors it contains (progs, maps) and closes them in
bpf_object__close(). if one wants to open/load ELF, then close it but
keep, say, prog fd to attach it to cgroup some time later, then fd
should be duplicated as well to get a new one not owned by bpf_object.

Currently I use this workaround to avoid time when new fd doesn't have
O_CLOEXEC:

	int new_prog_fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
	if (new_prog_fd < 0 ||
	    dup3(bpf_program__fd(prog), new_prog_fd, O_CLOEXEC) == -1) {
		/* .. handle error .. */
		close(new_prog_fd);
	}
	/* .. use new_prog_fd with O_CLOEXEC set */

Not sure how to simplify it. dup2() has same problem with regard to
O_CLOEXEC.

Use-case: standalone server application that uses libbpf and does
fork()/execve() a lot.


> +	if (map->fd < 0)
> +		return map->fd;
> +	map->fd_preset = true;
> +
> +	free(map->name);
> +	map->name = strdup(info.name);
> +	map->def.type = info.type;
> +	map->def.key_size = info.key_size;
> +	map->def.value_size = info.value_size;
> +	map->def.max_entries = info.max_entries;
> +	map->def.map_flags = info.map_flags;
> +	map->btf_key_type_id = info.btf_key_type_id;
> +	map->btf_value_type_id = info.btf_value_type_id;
> +
> +	return 0;
> +}
> +
>  static int
>  bpf_object__create_maps(struct bpf_object *obj)
>  {
> @@ -1094,6 +1123,12 @@ bpf_object__create_maps(struct bpf_object *obj)
>  		struct bpf_map_def *def = &map->def;
>  		int *pfd = &map->fd;
>  
> +		if (map->fd_preset) {
> +			pr_debug("skip map create (preset) %s: fd=%d\n",
> +				 map->name, map->fd);
> +			continue;
> +		}
> +
>  		create_attr.name = map->name;
>  		create_attr.map_ifindex = map->map_ifindex;
>  		create_attr.map_type = def->type;
> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> index 60593ac44700..8e709a74f47c 100644
> --- a/tools/lib/bpf/libbpf.h
> +++ b/tools/lib/bpf/libbpf.h
> @@ -261,6 +261,7 @@ typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
>  int bpf_map__set_priv(struct bpf_map *map, void *priv,
>  		      bpf_map_clear_priv_t clear_priv);
>  void *bpf_map__priv(struct bpf_map *map);
> +int bpf_map__reuse_fd(struct bpf_map *map, int fd);
>  bool bpf_map__is_offload_neutral(struct bpf_map *map);
>  void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
>  int bpf_map__pin(struct bpf_map *map, const char *path);
> -- 
> 2.17.1
>
Jakub Kicinski July 10, 2018, 2:49 a.m. UTC | #2
On Mon, 9 Jul 2018 13:22:54 -0700, Andrey Ignatov wrote:
> Jakub Kicinski <jakub.kicinski@netronome.com> [Mon, 2018-07-09 11:01 -0700]:
> > More advanced applications may want to only replace programs without
> > destroying associated maps.  Allow libbpf users to achieve that.
> > Instead of always creating all of the maps at load time, expose to
> > users an API to reconstruct the map object from already existing
> > map.
> > 
> > The map parameters are read from the kernel and replace the parameters
> > of the ELF map.  libbpf does not restrict the map replacement, i.e.
> > the reused map does not have to be compatible with the ELF map
> > definition.  We relay on the verifier for checking the compatibility
> > between maps and programs.  The ELF map definition is completely
> > overwritten by the information read from the kernel, to make sure
> > libbpf's view of map object corresponds to the actual map.  
> 
> Thanks for working on this Jakub! I encountered this shortcoming of
> libbpf as well and was planning to fix it, but you beat me to it :)

Ah!  I wish I didn't! :)

> > Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> > Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
> > ---
> >  tools/lib/bpf/libbpf.c | 35 +++++++++++++++++++++++++++++++++++
> >  tools/lib/bpf/libbpf.h |  1 +
> >  2 files changed, 36 insertions(+)
> > 
> > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> > index b653dbb266c7..c80033fe66c3 100644
> > --- a/tools/lib/bpf/libbpf.c
> > +++ b/tools/lib/bpf/libbpf.c
> > @@ -215,6 +215,7 @@ struct bpf_map {
> >  	int fd;
> >  	char *name;
> >  	size_t offset;
> > +	bool fd_preset;  
> 
> Any reason not to use map->fd itself to identify if fd is present?

Note: pre-set, not present.

> fd of every map is set to -1 in bpf_object__init_maps() that, in turn, is
> called from __bpf_object__open():
> 
> 	for (i = 0; i < nr_maps; i++)
> 		obj->maps[i].fd = -1;
> 
> Later it will either contain valid fd that is >= 0, or that same -1, what
> should be enough to identify fd presence.

I thought it to be cleaner to indicate the fd has been pre-set, in case
things get more complicated in the future and fd >= 0 becomes ambiguous.

But no strong preference, should I change?

> >  	int map_ifindex;
> >  	struct bpf_map_def def;
> >  	uint32_t btf_key_type_id;
> > @@ -1082,6 +1083,34 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
> >  	return 0;
> >  }
> >  
> > +int bpf_map__reuse_fd(struct bpf_map *map, int fd)
> > +{
> > +	struct bpf_map_info info = {};
> > +	__u32 len = sizeof(info);
> > +	int err;
> > +
> > +	err = bpf_obj_get_info_by_fd(fd, &info, &len);
> > +	if (err)
> > +		return err;
> > +  
> 
> Should there be a check that map->fd doesn't contain any valid fd (>= 0)
> before rewriting it so that if it does (e.g. because the function is
> called after bpf_object__load() by mistake), current map->fd won't be
> leaked?

Hm.  In my first implementation libbpf just took the passed fd and
didn't do a dup(), the lifetime of the fd remained with the caller.
Having a check will prevent changing the descriptor unless we add some
from of "un-reuse" as well.  Perhaps I should just add a close() in
case fd >= 0?  Or do you prefer a hard error?

> > +	map->fd = dup(fd);  
> 
> Unfortunately, new descriptor created by dup(2) will not have O_CLOEXEC set, in
> contrast to original fd returned by kernel on map creation.
> 
> libbpf has other interface shortcomings where it comes up. E.g. struct
> bpf_object owns all descriptors it contains (progs, maps) and closes them in
> bpf_object__close(). if one wants to open/load ELF, then close it but
> keep, say, prog fd to attach it to cgroup some time later, then fd
> should be duplicated as well to get a new one not owned by bpf_object.
> 
> Currently I use this workaround to avoid time when new fd doesn't have
> O_CLOEXEC:
> 
> 	int new_prog_fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
> 	if (new_prog_fd < 0 ||
> 	    dup3(bpf_program__fd(prog), new_prog_fd, O_CLOEXEC) == -1) {
> 		/* .. handle error .. */
> 		close(new_prog_fd);
> 	}
> 	/* .. use new_prog_fd with O_CLOEXEC set */
> 
> Not sure how to simplify it. dup2() has same problem with regard to
> O_CLOEXEC.
> 
> Use-case: standalone server application that uses libbpf and does
> fork()/execve() a lot.

Good point!  I have no better ideas.  Although being slightly paranoid
I would perhaps use "/" instead of "/dev/null"?  Shouldn't matter?

> > +	if (map->fd < 0)
> > +		return map->fd;
> > +	map->fd_preset = true;
> > +
> > +	free(map->name);
> > +	map->name = strdup(info.name);
> > +	map->def.type = info.type;
> > +	map->def.key_size = info.key_size;
> > +	map->def.value_size = info.value_size;
> > +	map->def.max_entries = info.max_entries;
> > +	map->def.map_flags = info.map_flags;
> > +	map->btf_key_type_id = info.btf_key_type_id;
> > +	map->btf_value_type_id = info.btf_value_type_id;
> > +
> > +	return 0;
> > +}
> > +
> >  static int
> >  bpf_object__create_maps(struct bpf_object *obj)
> >  {
> > @@ -1094,6 +1123,12 @@ bpf_object__create_maps(struct bpf_object *obj)
> >  		struct bpf_map_def *def = &map->def;
> >  		int *pfd = &map->fd;
> >  
> > +		if (map->fd_preset) {
> > +			pr_debug("skip map create (preset) %s: fd=%d\n",
> > +				 map->name, map->fd);
> > +			continue;
> > +		}
> > +
> >  		create_attr.name = map->name;
> >  		create_attr.map_ifindex = map->map_ifindex;
> >  		create_attr.map_type = def->type;
> > diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> > index 60593ac44700..8e709a74f47c 100644
> > --- a/tools/lib/bpf/libbpf.h
> > +++ b/tools/lib/bpf/libbpf.h
> > @@ -261,6 +261,7 @@ typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
> >  int bpf_map__set_priv(struct bpf_map *map, void *priv,
> >  		      bpf_map_clear_priv_t clear_priv);
> >  void *bpf_map__priv(struct bpf_map *map);
> > +int bpf_map__reuse_fd(struct bpf_map *map, int fd);
> >  bool bpf_map__is_offload_neutral(struct bpf_map *map);
> >  void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
> >  int bpf_map__pin(struct bpf_map *map, const char *path);
> > -- 
> > 2.17.1
> >   
>
Andrey Ignatov July 10, 2018, 4:23 a.m. UTC | #3
Jakub Kicinski <jakub.kicinski@netronome.com> [Mon, 2018-07-09 19:49 -0700]:
> On Mon, 9 Jul 2018 13:22:54 -0700, Andrey Ignatov wrote:
> > Jakub Kicinski <jakub.kicinski@netronome.com> [Mon, 2018-07-09 11:01 -0700]:
> > > More advanced applications may want to only replace programs without
> > > destroying associated maps.  Allow libbpf users to achieve that.
> > > Instead of always creating all of the maps at load time, expose to
> > > users an API to reconstruct the map object from already existing
> > > map.
> > > 
> > > The map parameters are read from the kernel and replace the parameters
> > > of the ELF map.  libbpf does not restrict the map replacement, i.e.
> > > the reused map does not have to be compatible with the ELF map
> > > definition.  We relay on the verifier for checking the compatibility
> > > between maps and programs.  The ELF map definition is completely
> > > overwritten by the information read from the kernel, to make sure
> > > libbpf's view of map object corresponds to the actual map.  
> > 
> > Thanks for working on this Jakub! I encountered this shortcoming of
> > libbpf as well and was planning to fix it, but you beat me to it :)
> 
> Ah!  I wish I didn't! :)
> 
> > > Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> > > Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
> > > ---
> > >  tools/lib/bpf/libbpf.c | 35 +++++++++++++++++++++++++++++++++++
> > >  tools/lib/bpf/libbpf.h |  1 +
> > >  2 files changed, 36 insertions(+)
> > > 
> > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> > > index b653dbb266c7..c80033fe66c3 100644
> > > --- a/tools/lib/bpf/libbpf.c
> > > +++ b/tools/lib/bpf/libbpf.c
> > > @@ -215,6 +215,7 @@ struct bpf_map {
> > >  	int fd;
> > >  	char *name;
> > >  	size_t offset;
> > > +	bool fd_preset;  
> > 
> > Any reason not to use map->fd itself to identify if fd is present?
> 
> Note: pre-set, not present.

Oh, sorry, I'm blind :)


> > fd of every map is set to -1 in bpf_object__init_maps() that, in turn, is
> > called from __bpf_object__open():
> > 
> > 	for (i = 0; i < nr_maps; i++)
> > 		obj->maps[i].fd = -1;
> > 
> > Later it will either contain valid fd that is >= 0, or that same -1, what
> > should be enough to identify fd presence.
> 
> I thought it to be cleaner to indicate the fd has been pre-set, in case
> things get more complicated in the future and fd >= 0 becomes ambiguous.
> 
> But no strong preference, should I change?

My preference (not strong either) is to avoid a new field whenever it's
possible. Though if you have a use-case that can't be covered by
(fd >= 0) keeping the field is fine as well.


> > >  	int map_ifindex;
> > >  	struct bpf_map_def def;
> > >  	uint32_t btf_key_type_id;
> > > @@ -1082,6 +1083,34 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
> > >  	return 0;
> > >  }
> > >  
> > > +int bpf_map__reuse_fd(struct bpf_map *map, int fd)
> > > +{
> > > +	struct bpf_map_info info = {};
> > > +	__u32 len = sizeof(info);
> > > +	int err;
> > > +
> > > +	err = bpf_obj_get_info_by_fd(fd, &info, &len);
> > > +	if (err)
> > > +		return err;
> > > +  
> > 
> > Should there be a check that map->fd doesn't contain any valid fd (>= 0)
> > before rewriting it so that if it does (e.g. because the function is
> > called after bpf_object__load() by mistake), current map->fd won't be
> > leaked?
> 
> Hm.  In my first implementation libbpf just took the passed fd and
> didn't do a dup(), the lifetime of the fd remained with the caller.
> Having a check will prevent changing the descriptor unless we add some
> from of "un-reuse" as well.  Perhaps I should just add a close() in
> case fd >= 0?  Or do you prefer a hard error?

Agree, close() in case fd >= 0 should be fine since caller already made it
explicit that they don't care about current fd and there should not be a
reason to hard-fail.


> > > +	map->fd = dup(fd);  
> > 
> > Unfortunately, new descriptor created by dup(2) will not have O_CLOEXEC set, in
> > contrast to original fd returned by kernel on map creation.
> > 
> > libbpf has other interface shortcomings where it comes up. E.g. struct
> > bpf_object owns all descriptors it contains (progs, maps) and closes them in
> > bpf_object__close(). if one wants to open/load ELF, then close it but
> > keep, say, prog fd to attach it to cgroup some time later, then fd
> > should be duplicated as well to get a new one not owned by bpf_object.
> > 
> > Currently I use this workaround to avoid time when new fd doesn't have
> > O_CLOEXEC:
> > 
> > 	int new_prog_fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
> > 	if (new_prog_fd < 0 ||
> > 	    dup3(bpf_program__fd(prog), new_prog_fd, O_CLOEXEC) == -1) {
> > 		/* .. handle error .. */
> > 		close(new_prog_fd);
> > 	}
> > 	/* .. use new_prog_fd with O_CLOEXEC set */
> > 
> > Not sure how to simplify it. dup2() has same problem with regard to
> > O_CLOEXEC.
> > 
> > Use-case: standalone server application that uses libbpf and does
> > fork()/execve() a lot.
> 
> Good point!  I have no better ideas.  Although being slightly paranoid
> I would perhaps use "/" instead of "/dev/null"?  Shouldn't matter?

No strong preferences, important thing is to create fd with O_CLOEXEC
set somehow.

Is it safer to use "/" than "/dev/null"? (trying to understand if I
should change my code as well)


> > > +	if (map->fd < 0)
> > > +		return map->fd;
> > > +	map->fd_preset = true;
> > > +
> > > +	free(map->name);
> > > +	map->name = strdup(info.name);
> > > +	map->def.type = info.type;
> > > +	map->def.key_size = info.key_size;
> > > +	map->def.value_size = info.value_size;
> > > +	map->def.max_entries = info.max_entries;
> > > +	map->def.map_flags = info.map_flags;
> > > +	map->btf_key_type_id = info.btf_key_type_id;
> > > +	map->btf_value_type_id = info.btf_value_type_id;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > >  static int
> > >  bpf_object__create_maps(struct bpf_object *obj)
> > >  {
> > > @@ -1094,6 +1123,12 @@ bpf_object__create_maps(struct bpf_object *obj)
> > >  		struct bpf_map_def *def = &map->def;
> > >  		int *pfd = &map->fd;
> > >  
> > > +		if (map->fd_preset) {
> > > +			pr_debug("skip map create (preset) %s: fd=%d\n",
> > > +				 map->name, map->fd);
> > > +			continue;
> > > +		}
> > > +
> > >  		create_attr.name = map->name;
> > >  		create_attr.map_ifindex = map->map_ifindex;
> > >  		create_attr.map_type = def->type;
> > > diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> > > index 60593ac44700..8e709a74f47c 100644
> > > --- a/tools/lib/bpf/libbpf.h
> > > +++ b/tools/lib/bpf/libbpf.h
> > > @@ -261,6 +261,7 @@ typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
> > >  int bpf_map__set_priv(struct bpf_map *map, void *priv,
> > >  		      bpf_map_clear_priv_t clear_priv);
> > >  void *bpf_map__priv(struct bpf_map *map);
> > > +int bpf_map__reuse_fd(struct bpf_map *map, int fd);
> > >  bool bpf_map__is_offload_neutral(struct bpf_map *map);
> > >  void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
> > >  int bpf_map__pin(struct bpf_map *map, const char *path);
> > > -- 
> > > 2.17.1
> > >   
> > 
>
Jakub Kicinski July 10, 2018, 6:09 p.m. UTC | #4
On Mon, 9 Jul 2018 21:23:20 -0700, Andrey Ignatov wrote:
> Jakub Kicinski <jakub.kicinski@netronome.com> [Mon, 2018-07-09 19:49 -0700]:
> > On Mon, 9 Jul 2018 13:22:54 -0700, Andrey Ignatov wrote:  
> > > Jakub Kicinski <jakub.kicinski@netronome.com> [Mon, 2018-07-09 11:01 -0700]:  
> > > fd of every map is set to -1 in bpf_object__init_maps() that, in turn, is
> > > called from __bpf_object__open():
> > > 
> > > 	for (i = 0; i < nr_maps; i++)
> > > 		obj->maps[i].fd = -1;
> > > 
> > > Later it will either contain valid fd that is >= 0, or that same -1, what
> > > should be enough to identify fd presence.  
> > 
> > I thought it to be cleaner to indicate the fd has been pre-set, in case
> > things get more complicated in the future and fd >= 0 becomes ambiguous.
> > 
> > But no strong preference, should I change?  
> 
> My preference (not strong either) is to avoid a new field whenever it's
> possible. Though if you have a use-case that can't be covered by
> (fd >= 0) keeping the field is fine as well.

Okay, I can change.  I think it may be worthwhile keeping the
information that the map definition was replaced by something that did
not come from the ELF file, but I don't actually have a use for it
right now, so we can just add it back later :)

> > > > +	map->fd = dup(fd);    
> > > 
> > > Unfortunately, new descriptor created by dup(2) will not have O_CLOEXEC set, in
> > > contrast to original fd returned by kernel on map creation.
> > > 
> > > libbpf has other interface shortcomings where it comes up. E.g. struct
> > > bpf_object owns all descriptors it contains (progs, maps) and closes them in
> > > bpf_object__close(). if one wants to open/load ELF, then close it but
> > > keep, say, prog fd to attach it to cgroup some time later, then fd
> > > should be duplicated as well to get a new one not owned by bpf_object.
> > > 
> > > Currently I use this workaround to avoid time when new fd doesn't have
> > > O_CLOEXEC:
> > > 
> > > 	int new_prog_fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
> > > 	if (new_prog_fd < 0 ||
> > > 	    dup3(bpf_program__fd(prog), new_prog_fd, O_CLOEXEC) == -1) {
> > > 		/* .. handle error .. */
> > > 		close(new_prog_fd);
> > > 	}
> > > 	/* .. use new_prog_fd with O_CLOEXEC set */
> > > 
> > > Not sure how to simplify it. dup2() has same problem with regard to
> > > O_CLOEXEC.
> > > 
> > > Use-case: standalone server application that uses libbpf and does
> > > fork()/execve() a lot.  
> > 
> > Good point!  I have no better ideas.  Although being slightly paranoid
> > I would perhaps use "/" instead of "/dev/null"?  Shouldn't matter?  
> 
> No strong preferences, important thing is to create fd with O_CLOEXEC
> set somehow.
> 
> Is it safer to use "/" than "/dev/null"? (trying to understand if I
> should change my code as well)

IDK :)  Could there be a crazy scenario when someone runs chroot or a
very broken system without /dev/null?  / should always be there?

Thanks for all the other reviews, I will update the code accordingly!
Daniel Borkmann July 10, 2018, 6:16 p.m. UTC | #5
On 07/10/2018 08:09 PM, Jakub Kicinski wrote:
> On Mon, 9 Jul 2018 21:23:20 -0700, Andrey Ignatov wrote:
>> Jakub Kicinski <jakub.kicinski@netronome.com> [Mon, 2018-07-09 19:49 -0700]:
>>> On Mon, 9 Jul 2018 13:22:54 -0700, Andrey Ignatov wrote:  
>>>> Jakub Kicinski <jakub.kicinski@netronome.com> [Mon, 2018-07-09 11:01 -0700]:  
>>>> fd of every map is set to -1 in bpf_object__init_maps() that, in turn, is
>>>> called from __bpf_object__open():
>>>>
>>>> 	for (i = 0; i < nr_maps; i++)
>>>> 		obj->maps[i].fd = -1;
>>>>
>>>> Later it will either contain valid fd that is >= 0, or that same -1, what
>>>> should be enough to identify fd presence.  
>>>
>>> I thought it to be cleaner to indicate the fd has been pre-set, in case
>>> things get more complicated in the future and fd >= 0 becomes ambiguous.
>>>
>>> But no strong preference, should I change?  
>>
>> My preference (not strong either) is to avoid a new field whenever it's
>> possible. Though if you have a use-case that can't be covered by
>> (fd >= 0) keeping the field is fine as well.
> 
> Okay, I can change.  I think it may be worthwhile keeping the
> information that the map definition was replaced by something that did
> not come from the ELF file, but I don't actually have a use for it
> right now, so we can just add it back later :)
> 
>>>>> +	map->fd = dup(fd);    
>>>>
>>>> Unfortunately, new descriptor created by dup(2) will not have O_CLOEXEC set, in
>>>> contrast to original fd returned by kernel on map creation.
>>>>
>>>> libbpf has other interface shortcomings where it comes up. E.g. struct
>>>> bpf_object owns all descriptors it contains (progs, maps) and closes them in
>>>> bpf_object__close(). if one wants to open/load ELF, then close it but
>>>> keep, say, prog fd to attach it to cgroup some time later, then fd
>>>> should be duplicated as well to get a new one not owned by bpf_object.
>>>>
>>>> Currently I use this workaround to avoid time when new fd doesn't have
>>>> O_CLOEXEC:
>>>>
>>>> 	int new_prog_fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
>>>> 	if (new_prog_fd < 0 ||
>>>> 	    dup3(bpf_program__fd(prog), new_prog_fd, O_CLOEXEC) == -1) {
>>>> 		/* .. handle error .. */
>>>> 		close(new_prog_fd);
>>>> 	}
>>>> 	/* .. use new_prog_fd with O_CLOEXEC set */
>>>>
>>>> Not sure how to simplify it. dup2() has same problem with regard to
>>>> O_CLOEXEC.
>>>>
>>>> Use-case: standalone server application that uses libbpf and does
>>>> fork()/execve() a lot.  
>>>
>>> Good point!  I have no better ideas.  Although being slightly paranoid
>>> I would perhaps use "/" instead of "/dev/null"?  Shouldn't matter?  
>>
>> No strong preferences, important thing is to create fd with O_CLOEXEC
>> set somehow.
>>
>> Is it safer to use "/" than "/dev/null"? (trying to understand if I
>> should change my code as well)
> 
> IDK :)  Could there be a crazy scenario when someone runs chroot or a
> very broken system without /dev/null?  / should always be there?
> 
> Thanks for all the other reviews, I will update the code accordingly!

Ok, I've tossed the v2 series from patchwork and waiting for your respin
in that case.

Thanks,
Daniel
Jakub Kicinski July 10, 2018, 8:58 p.m. UTC | #6
On Tue, 10 Jul 2018 20:16:46 +0200, Daniel Borkmann wrote:
> On 07/10/2018 08:09 PM, Jakub Kicinski wrote:
> > On Mon, 9 Jul 2018 21:23:20 -0700, Andrey Ignatov wrote:  
> >> Jakub Kicinski <jakub.kicinski@netronome.com> [Mon, 2018-07-09 19:49 -0700]:  
> >>> On Mon, 9 Jul 2018 13:22:54 -0700, Andrey Ignatov wrote:    
> >>>> Jakub Kicinski <jakub.kicinski@netronome.com> [Mon, 2018-07-09 11:01 -0700]:  
> >>>>> +	map->fd = dup(fd);      
> >>>>
> >>>> Unfortunately, new descriptor created by dup(2) will not have O_CLOEXEC set, in
> >>>> contrast to original fd returned by kernel on map creation.
> >>>>
> >>>> libbpf has other interface shortcomings where it comes up. E.g. struct
> >>>> bpf_object owns all descriptors it contains (progs, maps) and closes them in
> >>>> bpf_object__close(). if one wants to open/load ELF, then close it but
> >>>> keep, say, prog fd to attach it to cgroup some time later, then fd
> >>>> should be duplicated as well to get a new one not owned by bpf_object.
> >>>>
> >>>> Currently I use this workaround to avoid time when new fd doesn't have
> >>>> O_CLOEXEC:
> >>>>
> >>>> 	int new_prog_fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
> >>>> 	if (new_prog_fd < 0 ||
> >>>> 	    dup3(bpf_program__fd(prog), new_prog_fd, O_CLOEXEC) == -1) {
> >>>> 		/* .. handle error .. */
> >>>> 		close(new_prog_fd);
> >>>> 	}
> >>>> 	/* .. use new_prog_fd with O_CLOEXEC set */
> >>>>
> >>>> Not sure how to simplify it. dup2() has same problem with regard to
> >>>> O_CLOEXEC.
> >>>>
> >>>> Use-case: standalone server application that uses libbpf and does
> >>>> fork()/execve() a lot.    
> >>>
> >>> Good point!  I have no better ideas.  Although being slightly paranoid
> >>> I would perhaps use "/" instead of "/dev/null"?  Shouldn't matter?    
> >>
> >> No strong preferences, important thing is to create fd with O_CLOEXEC
> >> set somehow.
> >>
> >> Is it safer to use "/" than "/dev/null"? (trying to understand if I
> >> should change my code as well)  
> > 
> > IDK :)  Could there be a crazy scenario when someone runs chroot or a
> > very broken system without /dev/null?  / should always be there?
> > 
> > Thanks for all the other reviews, I will update the code accordingly!  

Oh no, dup3() is under _GNU_SOURCE as well.  libbpf kind of depends
on XSI-compliant strerror_r() semantics, I think I'll have to move the
strerror_r()-dependent code out of the libbpf.c file :(

> Ok, I've tossed the v2 series from patchwork and waiting for your respin
> in that case.

Yes, thank you!
diff mbox series

Patch

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index b653dbb266c7..c80033fe66c3 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -215,6 +215,7 @@  struct bpf_map {
 	int fd;
 	char *name;
 	size_t offset;
+	bool fd_preset;
 	int map_ifindex;
 	struct bpf_map_def def;
 	uint32_t btf_key_type_id;
@@ -1082,6 +1083,34 @@  static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
 	return 0;
 }
 
+int bpf_map__reuse_fd(struct bpf_map *map, int fd)
+{
+	struct bpf_map_info info = {};
+	__u32 len = sizeof(info);
+	int err;
+
+	err = bpf_obj_get_info_by_fd(fd, &info, &len);
+	if (err)
+		return err;
+
+	map->fd = dup(fd);
+	if (map->fd < 0)
+		return map->fd;
+	map->fd_preset = true;
+
+	free(map->name);
+	map->name = strdup(info.name);
+	map->def.type = info.type;
+	map->def.key_size = info.key_size;
+	map->def.value_size = info.value_size;
+	map->def.max_entries = info.max_entries;
+	map->def.map_flags = info.map_flags;
+	map->btf_key_type_id = info.btf_key_type_id;
+	map->btf_value_type_id = info.btf_value_type_id;
+
+	return 0;
+}
+
 static int
 bpf_object__create_maps(struct bpf_object *obj)
 {
@@ -1094,6 +1123,12 @@  bpf_object__create_maps(struct bpf_object *obj)
 		struct bpf_map_def *def = &map->def;
 		int *pfd = &map->fd;
 
+		if (map->fd_preset) {
+			pr_debug("skip map create (preset) %s: fd=%d\n",
+				 map->name, map->fd);
+			continue;
+		}
+
 		create_attr.name = map->name;
 		create_attr.map_ifindex = map->map_ifindex;
 		create_attr.map_type = def->type;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 60593ac44700..8e709a74f47c 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -261,6 +261,7 @@  typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
 int bpf_map__set_priv(struct bpf_map *map, void *priv,
 		      bpf_map_clear_priv_t clear_priv);
 void *bpf_map__priv(struct bpf_map *map);
+int bpf_map__reuse_fd(struct bpf_map *map, int fd);
 bool bpf_map__is_offload_neutral(struct bpf_map *map);
 void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
 int bpf_map__pin(struct bpf_map *map, const char *path);