diff mbox series

[bpf-next,v1,4/7] tools: bpftool: implement map exec command

Message ID 20190320173332.18105-4-alban@kinvolk.io
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series [bpf-next,v1,1/7] tools: bpftool: fix infinite loop | expand

Commit Message

Alban Crequy March 20, 2019, 5:33 p.m. UTC
From: Alban Crequy <alban@kinvolk.io>

The map exec commands allows to open an existing map and pass the file
descriptor to a child process. This enables applications to use an
existing BPF map even when they don't support bpffs.

Example of usage:
    # bpftool map exec pinned /sys/fs/bpf/foo fd 99 cmd -- readlink /proc/self/fd/99
    anon_inode:bpf-map

Documentation and bash completion updated as well.

Signed-off-by: Alban Crequy <alban@kinvolk.io>
---
 .../bpf/bpftool/Documentation/bpftool-map.rst | 11 ++++
 tools/bpf/bpftool/bash-completion/bpftool     | 22 +++++++-
 tools/bpf/bpftool/map.c                       | 53 +++++++++++++++++++
 3 files changed, 85 insertions(+), 1 deletion(-)

Comments

Quentin Monnet March 20, 2019, 7:01 p.m. UTC | #1
2019-03-20 18:33 UTC+0100 ~ Alban Crequy <alban.crequy@gmail.com>
> From: Alban Crequy <alban@kinvolk.io>
> 
> The map exec commands allows to open an existing map and pass the file
> descriptor to a child process. This enables applications to use an
> existing BPF map even when they don't support bpffs.
> 
> Example of usage:
>     # bpftool map exec pinned /sys/fs/bpf/foo fd 99 cmd -- readlink /proc/self/fd/99
>     anon_inode:bpf-map
> 
> Documentation and bash completion updated as well.
> 
> Signed-off-by: Alban Crequy <alban@kinvolk.io>
> ---
>  .../bpf/bpftool/Documentation/bpftool-map.rst | 11 ++++
>  tools/bpf/bpftool/bash-completion/bpftool     | 22 +++++++-
>  tools/bpf/bpftool/map.c                       | 53 +++++++++++++++++++
>  3 files changed, 85 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
> index b685641bfd74..dfd8352fa453 100644
> --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
> +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
> @@ -30,6 +30,7 @@ MAP COMMANDS
>  |	**bpftool** **map getnext**    *MAP* [**key** *DATA*]
>  |	**bpftool** **map delete**     *MAP*  **key** *DATA*
>  |	**bpftool** **map pin**        *MAP*  *FILE*
> +|	**bpftool** **map exec**       *MAP*  **fd** *FD* **cmd** -- *CMD*
>  |	**bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
>  |	**bpftool** **map peek**       *MAP*
>  |	**bpftool** **map push**       *MAP* **value** *VALUE*
> @@ -101,6 +102,10 @@ DESCRIPTION
>  		  contain a dot character ('.'), which is reserved for future
>  		  extensions of *bpffs*.
>  
> +	**bpftool map exec**     *MAP*  **fd** *FD* **cmd** -- *CMD*
> +                  Load map and exec an external command, passing the file
> +                  descriptor.
> +

Please use tabs for indent.

Could you please add a line to explain that the process spawned get the
the link to the map in the file descriptor passed as *FD*? I'm being
pedantic, I know it's not that hard to understand with the examples,
just trying to make things as clear as possible for users reading the page.

I have nothing against the double dash in the examples or the commit
log, but putting it in the description in the command itself makes it
look like it is some kind of mandatory syntax. Would you consider
removing the occurrences above?

We don't really "exec" the map... Just thinking... Would another keyword
like "pass" or "passfd" be more relevant?

>  	**bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
>  		  Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map.
>  
> @@ -254,6 +259,12 @@ would be lost as soon as bpftool exits).
>    key: 00 00 00 00  value: 22 02 00 00
>    Found 1 element
>  
> +**# bpftool map exec pinned /sys/fs/bpf/foo fd 99 cmd -- readlink /proc/self/fd/99**
> +
> +::
> +
> +  anon_inode:bpf-map
> +
>  SEE ALSO
>  ========
>  	**bpf**\ (2),
> diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
> index 9e37de8bb227..63cd53c4d3a7 100644
> --- a/tools/bpf/bpftool/bash-completion/bpftool
> +++ b/tools/bpf/bpftool/bash-completion/bpftool
> @@ -583,6 +583,26 @@ _bpftool()
>                      fi
>                      return 0
>                      ;;
> +                exec)
> +                    case $prev in
> +                        $command)
> +                            COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
> +                            return 0
> +                            ;;
> +                        fd)
> +                            return 0
> +                            ;;
> +                        cmd)
> +                            COMPREPLY=( $(compgen -c -- "$cur" ) )
> +                            return 0
> +                            ;;
> +                        *)
> +                            _bpftool_once_attr 'fd'
> +                            _bpftool_once_attr 'cmd'
> +                            return 0
> +                            ;;
> +                    esac
> +                    ;;
>                  event_pipe)
>                      case $prev in
>                          $command)
> @@ -610,7 +630,7 @@ _bpftool()
>                      [[ $prev == $object ]] && \
>                          COMPREPLY=( $( compgen -W 'delete dump getnext help \
>                              lookup pin event_pipe show list update create \
> -                            peek push enqueue pop dequeue' -- \
> +                            peek push enqueue pop dequeue exec' -- \
>                              "$cur" ) )
>                      ;;
>              esac
> diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
> index a576f2a019be..768497364cee 100644
> --- a/tools/bpf/bpftool/map.c
> +++ b/tools/bpf/bpftool/map.c
> @@ -883,6 +883,58 @@ static int do_update(int argc, char **argv)
>  	return err;
>  }
>  
> +static int do_exec(int argc, char **argv)
> +{
> +	struct bpf_map_info info = {};
> +	__u32 len = sizeof(info);
> +	int fd, ret;
> +	__u32 outfd = 0;

Reverse-Christmas tree.

> +
> +	if (argc < 2)
> +		usage();
> +
> +	fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
> +	if (fd < 0)
> +		return -1;
> +
> +	while (argc) {
> +		if (is_prefix(*argv, "fd")) {
> +			if (parse_u32_arg(&argc, &argv, &outfd,
> +					  "out fd"))
> +				return -1;
> +		} else if (is_prefix(*argv, "cmd")){
> +			NEXT_ARG();
> +			if (!REQ_ARGS(1))
> +				return -1;
> +
> +			break;
> +		} else {
> +			p_err("unknown arg %s", *argv);
> +			return -1;
> +		}
> +	}
> +
> +	if (outfd == 0) {
> +		p_err("invalid fd");
> +		return -1;
> +	}
> +	if (*argv == NULL) {
> +		p_err("empty command");
> +		return -1;
> +	}
> +
> +

Spurious blank line.

> +	ret = dup2(fd, (int)outfd);
> +	if (ret == -1) {
> +		p_err("dup2 failed: %s", strerror(errno));
> +		return -1;
> +	}
> +
> +	execvp(argv[0], argv);
> +	p_err("execvp failed: %s", strerror(errno));
> +	return -1;
> +}
> +
>  static void print_key_value(struct bpf_map_info *info, void *key,
>  			    void *value)
>  {
> @@ -1355,6 +1407,7 @@ static const struct cmd cmds[] = {
>  	{ "enqueue",	do_update },
>  	{ "pop",	do_pop_dequeue },
>  	{ "dequeue",	do_pop_dequeue },
> +	{ "exec",	do_exec },
>  	{ 0 }
>  };
>  
> 

Please also add the new command in the interactive help (do_help()).

By the way, I like this new command a lot!
Jakub Kicinski March 20, 2019, 9:23 p.m. UTC | #2
On Wed, 20 Mar 2019 18:33:29 +0100, Alban Crequy wrote:
> From: Alban Crequy <alban@kinvolk.io>
> 
> The map exec commands allows to open an existing map and pass the file
> descriptor to a child process. This enables applications to use an
> existing BPF map even when they don't support bpffs.
> 
> Example of usage:
>     # bpftool map exec pinned /sys/fs/bpf/foo fd 99 cmd -- readlink /proc/self/fd/99
>     anon_inode:bpf-map

Would you mind telling us a little more about the use for this feature?
It seems fairly limited.  If it's about probing objects (finding out if
they are a map or a program) perhaps we can add a command just for that?

(I guess bpftool -f isn't really the cleanest way of getting at that
info.)

> Documentation and bash completion updated as well.
> 
> Signed-off-by: Alban Crequy <alban@kinvolk.io>
Alban Crequy March 21, 2019, 9:57 a.m. UTC | #3
On Wed, Mar 20, 2019 at 10:23 PM Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
>
> On Wed, 20 Mar 2019 18:33:29 +0100, Alban Crequy wrote:
> > From: Alban Crequy <alban@kinvolk.io>
> >
> > The map exec commands allows to open an existing map and pass the file
> > descriptor to a child process. This enables applications to use an
> > existing BPF map even when they don't support bpffs.
> >
> > Example of usage:
> >     # bpftool map exec pinned /sys/fs/bpf/foo fd 99 cmd -- readlink /proc/self/fd/99
> >     anon_inode:bpf-map
>
> Would you mind telling us a little more about the use for this feature?
> It seems fairly limited.  If it's about probing objects (finding out if
> they are a map or a program) perhaps we can add a command just for that?

I needed to know the name of the map too. I was preparing a demo based
on python bcc tools (opensnoop) but with added feature that requires
using a pinned map, created and maintained externally. At the moment,
the python API for bcc does not support pinning or using external
maps. Ideally, this should be added in the python API (some discussion
on https://github.com/iovisor/bcc/issues/2223) but meanwhile, I use a
workaround by executing bpftool from the python code.

Arguably, my use case is a temporary hack until we have better support
in python bcc. But other tools implements similar commands to pass
file descriptors between processes: "ip netns exec" and "tc exec bpf".
So I think it could be useful for other scripting use cases.

In my demo, I used the two hacks:
- if the pinned map fd is not given to the python script, re-execute
itself with bpftool:
os.execvp("bpftool", ["bpftool", "map", "exec", "pinned", pin_path,
"fd", "90", "cmd", "--"] + sys.argv)
- once we have the fd 90 (number specified above) of the pinned map in
the python script, overwrite the empty fd created by bcc:
os.dup2(90, 6)
I call dup2() between the bpf map creation and the bpf program
creation. To check which map fd to overwrite, I just call
os.system("bpftool map show fd 6...").


Thanks a lot for the reviews. I'll need some time to address it (maybe
a week or 2).

> (I guess bpftool -f isn't really the cleanest way of getting at that
> info.)
>
> > Documentation and bash completion updated as well.
> >
> > Signed-off-by: Alban Crequy <alban@kinvolk.io>
Jakub Kicinski March 21, 2019, 8:16 p.m. UTC | #4
On Thu, 21 Mar 2019 10:57:32 +0100, Alban Crequy wrote:
> On Wed, Mar 20, 2019 at 10:23 PM Jakub Kicinski
> <jakub.kicinski@netronome.com> wrote:
> >
> > On Wed, 20 Mar 2019 18:33:29 +0100, Alban Crequy wrote:  
> > > From: Alban Crequy <alban@kinvolk.io>
> > >
> > > The map exec commands allows to open an existing map and pass the file
> > > descriptor to a child process. This enables applications to use an
> > > existing BPF map even when they don't support bpffs.
> > >
> > > Example of usage:
> > >     # bpftool map exec pinned /sys/fs/bpf/foo fd 99 cmd -- readlink /proc/self/fd/99
> > >     anon_inode:bpf-map  
> >
> > Would you mind telling us a little more about the use for this feature?
> > It seems fairly limited.  If it's about probing objects (finding out if
> > they are a map or a program) perhaps we can add a command just for that?  
> 
> I needed to know the name of the map too. I was preparing a demo based
> on python bcc tools (opensnoop) but with added feature that requires
> using a pinned map, created and maintained externally. At the moment,
> the python API for bcc does not support pinning or using external
> maps. Ideally, this should be added in the python API (some discussion
> on https://github.com/iovisor/bcc/issues/2223) but meanwhile, I use a
> workaround by executing bpftool from the python code.
> 
> Arguably, my use case is a temporary hack until we have better support
> in python bcc. But other tools implements similar commands to pass
> file descriptors between processes: "ip netns exec" and "tc exec bpf".
> So I think it could be useful for other scripting use cases.

The thing is the receiver of the FD has to be bpf-aware, because there
isn't really much one can do with that file descriptor, in which case
it's kind of strange that the receiver doesn't know how to open a
pinned object..

> In my demo, I used the two hacks:
> - if the pinned map fd is not given to the python script, re-execute
> itself with bpftool:
> os.execvp("bpftool", ["bpftool", "map", "exec", "pinned", pin_path,
> "fd", "90", "cmd", "--"] + sys.argv)
> - once we have the fd 90 (number specified above) of the pinned map in
> the python script, overwrite the empty fd created by bcc:
> os.dup2(90, 6)
> I call dup2() between the bpf map creation and the bpf program
> creation. To check which map fd to overwrite, I just call
> os.system("bpftool map show fd 6...").

I see, thanks for the explanation.  That does indeed seems like a hack.

> Thanks a lot for the reviews. I'll need some time to address it (maybe
> a week or 2).
> 
> > (I guess bpftool -f isn't really the cleanest way of getting at that
> > info.)
> >  
> > > Documentation and bash completion updated as well.
> > >
> > > Signed-off-by: Alban Crequy <alban@kinvolk.io>
diff mbox series

Patch

diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index b685641bfd74..dfd8352fa453 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -30,6 +30,7 @@  MAP COMMANDS
 |	**bpftool** **map getnext**    *MAP* [**key** *DATA*]
 |	**bpftool** **map delete**     *MAP*  **key** *DATA*
 |	**bpftool** **map pin**        *MAP*  *FILE*
+|	**bpftool** **map exec**       *MAP*  **fd** *FD* **cmd** -- *CMD*
 |	**bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
 |	**bpftool** **map peek**       *MAP*
 |	**bpftool** **map push**       *MAP* **value** *VALUE*
@@ -101,6 +102,10 @@  DESCRIPTION
 		  contain a dot character ('.'), which is reserved for future
 		  extensions of *bpffs*.
 
+	**bpftool map exec**     *MAP*  **fd** *FD* **cmd** -- *CMD*
+                  Load map and exec an external command, passing the file
+                  descriptor.
+
 	**bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
 		  Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map.
 
@@ -254,6 +259,12 @@  would be lost as soon as bpftool exits).
   key: 00 00 00 00  value: 22 02 00 00
   Found 1 element
 
+**# bpftool map exec pinned /sys/fs/bpf/foo fd 99 cmd -- readlink /proc/self/fd/99**
+
+::
+
+  anon_inode:bpf-map
+
 SEE ALSO
 ========
 	**bpf**\ (2),
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 9e37de8bb227..63cd53c4d3a7 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -583,6 +583,26 @@  _bpftool()
                     fi
                     return 0
                     ;;
+                exec)
+                    case $prev in
+                        $command)
+                            COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
+                            return 0
+                            ;;
+                        fd)
+                            return 0
+                            ;;
+                        cmd)
+                            COMPREPLY=( $(compgen -c -- "$cur" ) )
+                            return 0
+                            ;;
+                        *)
+                            _bpftool_once_attr 'fd'
+                            _bpftool_once_attr 'cmd'
+                            return 0
+                            ;;
+                    esac
+                    ;;
                 event_pipe)
                     case $prev in
                         $command)
@@ -610,7 +630,7 @@  _bpftool()
                     [[ $prev == $object ]] && \
                         COMPREPLY=( $( compgen -W 'delete dump getnext help \
                             lookup pin event_pipe show list update create \
-                            peek push enqueue pop dequeue' -- \
+                            peek push enqueue pop dequeue exec' -- \
                             "$cur" ) )
                     ;;
             esac
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index a576f2a019be..768497364cee 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -883,6 +883,58 @@  static int do_update(int argc, char **argv)
 	return err;
 }
 
+static int do_exec(int argc, char **argv)
+{
+	struct bpf_map_info info = {};
+	__u32 len = sizeof(info);
+	int fd, ret;
+	__u32 outfd = 0;
+
+	if (argc < 2)
+		usage();
+
+	fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+	if (fd < 0)
+		return -1;
+
+	while (argc) {
+		if (is_prefix(*argv, "fd")) {
+			if (parse_u32_arg(&argc, &argv, &outfd,
+					  "out fd"))
+				return -1;
+		} else if (is_prefix(*argv, "cmd")){
+			NEXT_ARG();
+			if (!REQ_ARGS(1))
+				return -1;
+
+			break;
+		} else {
+			p_err("unknown arg %s", *argv);
+			return -1;
+		}
+	}
+
+	if (outfd == 0) {
+		p_err("invalid fd");
+		return -1;
+	}
+	if (*argv == NULL) {
+		p_err("empty command");
+		return -1;
+	}
+
+
+	ret = dup2(fd, (int)outfd);
+	if (ret == -1) {
+		p_err("dup2 failed: %s", strerror(errno));
+		return -1;
+	}
+
+	execvp(argv[0], argv);
+	p_err("execvp failed: %s", strerror(errno));
+	return -1;
+}
+
 static void print_key_value(struct bpf_map_info *info, void *key,
 			    void *value)
 {
@@ -1355,6 +1407,7 @@  static const struct cmd cmds[] = {
 	{ "enqueue",	do_update },
 	{ "pop",	do_pop_dequeue },
 	{ "dequeue",	do_pop_dequeue },
+	{ "exec",	do_exec },
 	{ 0 }
 };