diff mbox

[try,2] qemu/tap: add -net tap,dev= option

Message ID 200912081841.45075.arnd@arndb.de
State New
Headers show

Commit Message

Arnd Bergmann Dec. 8, 2009, 5:41 p.m. UTC
In order to support macvtap, we need a way to select the actual
tap endpoint. While it would be nice to pass it by network interface
name, passing the character device is more flexible, and we can
easily do both in the long run.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---

Comments

Mark McLoughlin Dec. 9, 2009, 10:33 a.m. UTC | #1
On Tue, 2009-12-08 at 18:41 +0100, Arnd Bergmann wrote:
> In order to support macvtap, we need a way to select the actual
> tap endpoint. While it would be nice to pass it by network interface
> name, passing the character device is more flexible, and we can
> easily do both in the long run.
> 
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>

Looks fine to me

Acked-by: Mark McLoughlin <markmc@redhat.com>

Cheers,
Mark.
Michael S. Tsirkin Dec. 9, 2009, 11:53 a.m. UTC | #2
On Tue, Dec 08, 2009 at 06:41:44PM +0100, Arnd Bergmann wrote:
> In order to support macvtap, we need a way to select the actual
> tap endpoint. While it would be nice to pass it by network interface
> name, passing the character device is more flexible, and we can
> easily do both in the long run.
> 
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> ---
> diff --git a/net.c b/net.c
> index 13bdbb2..deb12fd 100644
> --- a/net.c
> +++ b/net.c
> @@ -955,6 +955,11 @@ static struct {
>              },
>  #ifndef _WIN32
>              {
> +                .name = "dev",
> +                .type = QEMU_OPT_STRING,
> +                .help = "character device pathname",
> +            },
> +            {
>                  .name = "fd",
>                  .type = QEMU_OPT_STRING,
>                  .help = "file descriptor of an already opened tap",
> diff --git a/net/tap-aix.c b/net/tap-aix.c
> index 4bc9f2f..b789d06 100644
> --- a/net/tap-aix.c
> +++ b/net/tap-aix.c
> @@ -25,7 +25,8 @@
>  #include "net/tap.h"
>  #include <stdio.h>
>  
> -int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
> +int tap_open(char *ifname, int ifname_size, char *dev, int dev_size,
> +			int *vnet_hdr, int vnet_hdr_required)
>  {
>      fprintf(stderr, "no tap on AIX\n");
>      return -1;
> diff --git a/net/tap-bsd.c b/net/tap-bsd.c
> index 815997d..09a7780 100644
> --- a/net/tap-bsd.c
> +++ b/net/tap-bsd.c
> @@ -40,7 +40,8 @@
>  #include <util.h>
>  #endif
>  
> -int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
> +int tap_open(char *ifname, int ifname_size, char *dev, int dev_size,
> +			int *vnet_hdr, int vnet_hdr_required)
>  {
>      int fd;
>      char *dev;

Does this compile?

> diff --git a/net/tap-linux.c b/net/tap-linux.c
> index 6af9e82..a6df123 100644
> --- a/net/tap-linux.c
> +++ b/net/tap-linux.c
> @@ -32,14 +32,21 @@
>  #include "sysemu.h"
>  #include "qemu-common.h"
>  
> -int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
> +int tap_open(char *ifname, int ifname_size, char *dev, int dev_size,

dev is never modified, so it should be const char *.

> +			int *vnet_hdr, int vnet_hdr_required)
>  {
>      struct ifreq ifr;
>      int fd, ret;
> +    const char *path;
>  
> -    TFR(fd = open("/dev/net/tun", O_RDWR));
> +    if (dev[0] == '\0')

== 0 checks are ugly. if (*dev) is shorter, and it's a standard C
idiom to detect non-empty string. Or better pass NULL if no device,
then you can just path = dev ? dev : "/dev/net/tun".

> +	path = "/dev/net/tun";
> +    else
> +	path = dev;

Please do not indent by single space.

> +
> +    TFR(fd = open(dev, O_RDWR));
>      if (fd < 0) {
> -        fprintf(stderr, "warning: could not open /dev/net/tun: no virtual network emulation\n");
> +        fprintf(stderr, "warning: could not open %s: no virtual network emulation\n", path);
>          return -1;
>      }
>      memset(&ifr, 0, sizeof(ifr));
> @@ -70,7 +77,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required
>          pstrcpy(ifr.ifr_name, IFNAMSIZ, "tap%d");
>      ret = ioctl(fd, TUNSETIFF, (void *) &ifr);
>      if (ret != 0) {
> -        fprintf(stderr, "warning: could not configure /dev/net/tun: no virtual network emulation\n");
> +        fprintf(stderr, "warning: could not configure %s: no virtual network emulation\n", path);
>          close(fd);
>          return -1;
>      }
> diff --git a/net/tap-solaris.c b/net/tap-solaris.c
> index e14fe36..72abb78 100644
> --- a/net/tap-solaris.c
> +++ b/net/tap-solaris.c
> @@ -171,7 +171,8 @@ static int tap_alloc(char *dev, size_t dev_size)
>      return tap_fd;
>  }
>  
> -int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
> +int tap_open(char *ifname, int ifname_size, char *dev, int dev_size,
> +			int *vnet_hdr, int vnet_hdr_required)
>  {
>      char  dev[10]="";
>      int fd;

Does this compile?

> diff --git a/net/tap.c b/net/tap.c
> index 0d8b424..14ddf65 100644
> --- a/net/tap.c
> +++ b/net/tap.c
> @@ -343,12 +343,17 @@ static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
>  {
>      int fd, vnet_hdr_required;
>      char ifname[128] = {0,};
> +    char dev[128] = {0,};
>      const char *setup_script;
>  
>      if (qemu_opt_get(opts, "ifname")) {
>          pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
>      }
>  
> +    if (qemu_opt_get(opts, "dev")) {
> +        pstrcpy(dev, sizeof(dev), qemu_opt_get(opts, "dev"));
> +    }
> +

While in this case this is unlikely to be a problem in practice, we still
should not add arbitrary limitations on file name lengths.  Just teach
tap_open to get NULL instead of and empty string, or better supply
default /dev/net/tun to the option, and you will not need the strcpy.


>      *vnet_hdr = qemu_opt_get_bool(opts, "vnet_hdr", 1);
>      if (qemu_opt_get(opts, "vnet_hdr")) {
>          vnet_hdr_required = *vnet_hdr;
> @@ -356,7 +361,8 @@ static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
>          vnet_hdr_required = 0;
>      }
>  
> -    TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
> +    TFR(fd = tap_open(ifname, sizeof(ifname), dev, sizeof(dev),
> +			vnet_hdr, vnet_hdr_required));
>      if (fd < 0) {
>          return -1;
>      }
> @@ -371,6 +377,7 @@ static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
>      }
>  
>      qemu_opt_set(opts, "ifname", ifname);
> +    qemu_opt_set(opts, "dev", dev);
>  
>      return fd;
>  }
> @@ -382,10 +389,12 @@ int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan
>  
>      if (qemu_opt_get(opts, "fd")) {
>          if (qemu_opt_get(opts, "ifname") ||
> +            qemu_opt_get(opts, "dev") ||
>              qemu_opt_get(opts, "script") ||
>              qemu_opt_get(opts, "downscript") ||
>              qemu_opt_get(opts, "vnet_hdr")) {
> -            qemu_error("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=\n");
> +            qemu_error("ifname=, dev=, script=, downscript= and vnet_hdr= is "
> +		       "invalid with fd=\n");
>              return -1;
>          }
>  
> @@ -425,15 +434,16 @@ int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan
>      if (qemu_opt_get(opts, "fd")) {
>          snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
>      } else {
> -        const char *ifname, *script, *downscript;
> +        const char *ifname, *dev, *script, *downscript;
>  
>          ifname     = qemu_opt_get(opts, "ifname");
> +        dev        = qemu_opt_get(opts, "dev");
>          script     = qemu_opt_get(opts, "script");
>          downscript = qemu_opt_get(opts, "downscript");
>  
>          snprintf(s->nc.info_str, sizeof(s->nc.info_str),
> -                 "ifname=%s,script=%s,downscript=%s",
> -                 ifname, script, downscript);
> +                 "ifname=%s,dev=%s,script=%s,downscript=%s",

This will look strange if dev is not supplied, will it not?
Also, I wonder whether there might be any scripts parsing
info string from monitor, that will get broken by the
extra parameter. How about we only print dev if it
is not the default?

> +                 ifname, dev, script, downscript);
>  
>          if (strcmp(downscript, "no") != 0) {
>              snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
> diff --git a/net/tap.h b/net/tap.h
> index 538a562..ba44363 100644
> --- a/net/tap.h
> +++ b/net/tap.h
> @@ -34,7 +34,8 @@
>  
>  int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan);
>  
> -int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required);
> +int tap_open(char *ifname, int ifname_size, char *dev, int dev_size,
> +			int *vnet_hdr, int vnet_hdr_required);
>  
>  ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen);
>  
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 1b5781a..7f7aa18 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -810,12 +810,14 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
>      "-net tap[,vlan=n][,name=str],ifname=name\n"
>      "                connect the host TAP network interface to VLAN 'n'\n"
>  #else
> -    "-net tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,sndbuf=nbytes][,vnet_hdr=on|off]\n"
> +    "-net tap[,vlan=n][,name=str][,fd=h][,ifname=name][,dev=str][,script=file]\n"
> +    "        [,downscript=dfile][,sndbuf=nbytes][,vnet_hdr=on|off]\n"
>      "                connect the host TAP network interface to VLAN 'n' and use the\n"
>      "                network scripts 'file' (default=%s)\n"
>      "                and 'dfile' (default=%s);\n"
>      "                use '[down]script=no' to disable script execution;\n"
>      "                use 'fd=h' to connect to an already opened TAP interface\n"
> +    "                use 'dev=str' to open a specific tap character device\n"

please document default /dev/net/tun

>      "                use 'sndbuf=nbytes' to limit the size of the send buffer; the\n"
>      "                default of 'sndbuf=1048576' can be disabled using 'sndbuf=0'\n"
>      "                use vnet_hdr=off to avoid enabling the IFF_VNET_HDR tap flag; use\n"
>
Arnd Bergmann Dec. 9, 2009, 12:33 p.m. UTC | #3
On Wednesday 09 December 2009, Michael S. Tsirkin wrote:
> On Tue, Dec 08, 2009 at 06:41:44PM +0100, Arnd Bergmann wrote:
> > --- a/net/tap-bsd.c
> > +++ b/net/tap-bsd.c
> > @@ -40,7 +40,8 @@
> >  #include <util.h>
> >  #endif
> >  
> > -int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
> > +int tap_open(char *ifname, int ifname_size, char *dev, int dev_size,
> > +			int *vnet_hdr, int vnet_hdr_required)
> >  {
> >      int fd;
> >      char *dev;
> 
> Does this compile?

I don't have a BSD or Solaris machine here, or even just a cross-compiler, so
I could not test. However, I only add two arguments and I did the identical
change in the header file and the linux version of this file, so I am reasonably
confident.
 
> > -int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
> > +int tap_open(char *ifname, int ifname_size, char *dev, int dev_size,
> 
> dev is never modified, so it should be const char *.

ok.

> > +			int *vnet_hdr, int vnet_hdr_required)
> >  {
> >      struct ifreq ifr;
> >      int fd, ret;
> > +    const char *path;
> >  
> > -    TFR(fd = open("/dev/net/tun", O_RDWR));
> > +    if (dev[0] == '\0')
> 
> == 0 checks are ugly. if (*dev) is shorter, and it's a standard C
> idiom to detect non-empty string. Or better pass NULL if no device,
> then you can just path = dev ? dev : "/dev/net/tun".

Agreed in principle, but I was following the style that is already used
in the same function, and I think consistency is more important in
this case.
 
> > +	path = "/dev/net/tun";
> > +    else
> > +	path = dev;
> 
> Please do not indent by single space.

For some reason, this file uses four character indentation, which
I followed for consistency. In the patch this gets mangled when
some lines use only spaces for indentation and others use only
tabs.

I could change to using only spaces for indentation if that's preferred.

> > diff --git a/net/tap.c b/net/tap.c
> > index 0d8b424..14ddf65 100644
> > --- a/net/tap.c
> > +++ b/net/tap.c
> > @@ -343,12 +343,17 @@ static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
> >  {
> >      int fd, vnet_hdr_required;
> >      char ifname[128] = {0,};
> > +    char dev[128] = {0,};
> >      const char *setup_script;
> >  
> >      if (qemu_opt_get(opts, "ifname")) {
> >          pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
> >      }
> >  
> > +    if (qemu_opt_get(opts, "dev")) {
> > +        pstrcpy(dev, sizeof(dev), qemu_opt_get(opts, "dev"));
> > +    }
> > +
> 
> While in this case this is unlikely to be a problem in practice, we still
> should not add arbitrary limitations on file name lengths.  Just teach
> tap_open to get NULL instead of and empty string, or better supply
> default /dev/net/tun to the option, and you will not need the strcpy.

Right, I will do that, or alternatively make dev an input/output argument,
see below.

> >          snprintf(s->nc.info_str, sizeof(s->nc.info_str),
> > -                 "ifname=%s,script=%s,downscript=%s",
> > -                 ifname, script, downscript);
> > +                 "ifname=%s,dev=%s,script=%s,downscript=%s",
> 
> This will look strange if dev is not supplied, will it not?
> Also, I wonder whether there might be any scripts parsing
> info string from monitor, that will get broken by the
> extra parameter. How about we only print dev if it
> is not the default?

Right. I originally planned to return "/dev/net/tun" from tap_open
in that case, but I forgot to put that in. It would also not work well
for Solaris and BSD unless I add untested code there.

I guess it would be consistent to do that, but unless someone insists
on it, I'll just follow your advice here and remove it from being printed.

> > +    "-net tap[,vlan=n][,name=str][,fd=h][,ifname=name][,dev=str][,script=file]\n"
> > +    "        [,downscript=dfile][,sndbuf=nbytes][,vnet_hdr=on|off]\n"
> >      "                connect the host TAP network interface to VLAN 'n' and use the\n"
> >      "                network scripts 'file' (default=%s)\n"
> >      "                and 'dfile' (default=%s);\n"
> >      "                use '[down]script=no' to disable script execution;\n"
> >      "                use 'fd=h' to connect to an already opened TAP interface\n"
> > +    "                use 'dev=str' to open a specific tap character device\n"
> 
> please document default /dev/net/tun

ok.

Thanks for the review!

	Arnd
Christoph Egger Dec. 9, 2009, 1:21 p.m. UTC | #4
On Wednesday 09 December 2009 13:33:36 Arnd Bergmann wrote:
> On Wednesday 09 December 2009, Michael S. Tsirkin wrote:
> > On Tue, Dec 08, 2009 at 06:41:44PM +0100, Arnd Bergmann wrote:
> > > --- a/net/tap-bsd.c
> > > +++ b/net/tap-bsd.c
> > > @@ -40,7 +40,8 @@
> > >  #include <util.h>
> > >  #endif
> > >
> > > -int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int
> > > vnet_hdr_required) +int tap_open(char *ifname, int ifname_size, char
> > > *dev, int dev_size, +			int *vnet_hdr, int vnet_hdr_required)
> > >  {
> > >      int fd;
> > >      char *dev;
> >
> > Does this compile?
>
> I don't have a BSD or Solaris machine here, or even just a cross-compiler,
> so I could not test.

This sounds like a bad joke on a virtualization list.

You can say you don't have an AMD or Intel machine to test hw feature XY but
saying you don't have a machine running a certain OS is prude as you can
always use qemu itself to get that up and running.

Christoph
Michael S. Tsirkin Dec. 9, 2009, 1:23 p.m. UTC | #5
On Wed, Dec 09, 2009 at 01:33:36PM +0100, Arnd Bergmann wrote:
> On Wednesday 09 December 2009, Michael S. Tsirkin wrote:
> > On Tue, Dec 08, 2009 at 06:41:44PM +0100, Arnd Bergmann wrote:
> > > --- a/net/tap-bsd.c
> > > +++ b/net/tap-bsd.c
> > > @@ -40,7 +40,8 @@
> > >  #include <util.h>
> > >  #endif
> > >  
> > > -int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
> > > +int tap_open(char *ifname, int ifname_size, char *dev, int dev_size,
> > > +			int *vnet_hdr, int vnet_hdr_required)
> > >  {
> > >      int fd;
> > >      char *dev;
> > 
> > Does this compile?
> 
> I don't have a BSD or Solaris machine here, or even just a cross-compiler, so
> I could not test.

It should be possible to install in a VM if you really want to,
but that's not my point.

> However, I only add two arguments and I did the identical
> change in the header file and the linux version of this file, so I am reasonably
> confident.

'char *dev' variable has the same name as the new parameter, which
is not legal in C99 and normally triggers compiler warning or error.


> > > -int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
> > > +int tap_open(char *ifname, int ifname_size, char *dev, int dev_size,
> > 
> > dev is never modified, so it should be const char *.
> 
> ok.
> 
> > > +			int *vnet_hdr, int vnet_hdr_required)
> > >  {
> > >      struct ifreq ifr;
> > >      int fd, ret;
> > > +    const char *path;
> > >  
> > > -    TFR(fd = open("/dev/net/tun", O_RDWR));
> > > +    if (dev[0] == '\0')
> > 
> > == 0 checks are ugly. if (*dev) is shorter, and it's a standard C
> > idiom to detect non-empty string. Or better pass NULL if no device,
> > then you can just path = dev ? dev : "/dev/net/tun".
> 
> Agreed in principle, but I was following the style that is already used
> in the same function, and I think consistency is more important in
> this case.

qemu code in general is inconsistent. Let's make new code look sane,
over time most of it will get converted.

> > > +	path = "/dev/net/tun";
> > > +    else
> > > +	path = dev;
> > 
> > Please do not indent by single space.
> 
> For some reason, this file uses four character indentation, which
> I followed for consistency. In the patch this gets mangled when
> some lines use only spaces for indentation and others use only
> tabs.
> 
> I could change to using only spaces for indentation if that's preferred.

Coding style says you should use spaces for indentation.

> > > diff --git a/net/tap.c b/net/tap.c
> > > index 0d8b424..14ddf65 100644
> > > --- a/net/tap.c
> > > +++ b/net/tap.c
> > > @@ -343,12 +343,17 @@ static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
> > >  {
> > >      int fd, vnet_hdr_required;
> > >      char ifname[128] = {0,};
> > > +    char dev[128] = {0,};
> > >      const char *setup_script;
> > >  
> > >      if (qemu_opt_get(opts, "ifname")) {
> > >          pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
> > >      }
> > >  
> > > +    if (qemu_opt_get(opts, "dev")) {
> > > +        pstrcpy(dev, sizeof(dev), qemu_opt_get(opts, "dev"));
> > > +    }
> > > +
> > 
> > While in this case this is unlikely to be a problem in practice, we still
> > should not add arbitrary limitations on file name lengths.  Just teach
> > tap_open to get NULL instead of and empty string, or better supply
> > default /dev/net/tun to the option, and you will not need the strcpy.
> 
> Right, I will do that, or alternatively make dev an input/output argument,
> see below.

input/output will require allocating storage for it,
so it's more work (assuming length of 128 characters is evil).
Not sure it's a good idea.

> > >          snprintf(s->nc.info_str, sizeof(s->nc.info_str),
> > > -                 "ifname=%s,script=%s,downscript=%s",
> > > -                 ifname, script, downscript);
> > > +                 "ifname=%s,dev=%s,script=%s,downscript=%s",
> > 
> > This will look strange if dev is not supplied, will it not?
> > Also, I wonder whether there might be any scripts parsing
> > info string from monitor, that will get broken by the
> > extra parameter. How about we only print dev if it
> > is not the default?
> 
> Right. I originally planned to return "/dev/net/tun" from tap_open
> in that case, but I forgot to put that in. It would also not work well
> for Solaris and BSD unless I add untested code there.

I think it's better to add untested feature than intentionally
skip it. It's easier for people familiar with the platform to
fix a broken feature than to guess what feature needs to be filled in.

If you don't you, should replace ifndef WINDOWS by ifdef LINUX or
something like that.

> I guess it would be consistent to do that, but unless someone insists
> on it, I'll just follow your advice here and remove it from being printed.
> 
> > > +    "-net tap[,vlan=n][,name=str][,fd=h][,ifname=name][,dev=str][,script=file]\n"
> > > +    "        [,downscript=dfile][,sndbuf=nbytes][,vnet_hdr=on|off]\n"
> > >      "                connect the host TAP network interface to VLAN 'n' and use the\n"
> > >      "                network scripts 'file' (default=%s)\n"
> > >      "                and 'dfile' (default=%s);\n"
> > >      "                use '[down]script=no' to disable script execution;\n"
> > >      "                use 'fd=h' to connect to an already opened TAP interface\n"
> > > +    "                use 'dev=str' to open a specific tap character device\n"
> > 
> > please document default /dev/net/tun
> 
> ok.
> 
> Thanks for the review!
> 
> 	Arnd
Anthony Liguori Dec. 9, 2009, 7:39 p.m. UTC | #6
Arnd Bergmann wrote:
> In order to support macvtap, we need a way to select the actual
> tap endpoint. While it would be nice to pass it by network interface
> name, passing the character device is more flexible, and we can
> easily do both in the long run.
>
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
>   

This isn't really a generic thing and I dislike pretending it is.  This 
is specifically for macvtap.

If we were going to do this, I'd rather introduce a -net macvtap that 
actually allocated the interfaces (similar to how tap works).  The 
problem with this interface is that it's a two stage process.  You have 
to create an interface, then hand the name to qemu.  It would be just as 
easy to hand qemu an fd with a helper.

What's nice about -net tap is that with a little bit of setup 
(/etc/qemu-ifup), it Just Works.  -net tap,dev= does not share this 
property.

I think this is a good place where an exec helper would be a natural fit.

Regards,

Anthony Liguori
Arnd Bergmann Dec. 10, 2009, 8:55 a.m. UTC | #7
On Wednesday 09 December 2009, Anthony Liguori wrote:
> This isn't really a generic thing and I dislike pretending it is.  This 
> is specifically for macvtap.

Well, depending how how things work out with VMD-q and SR-IOV,
I might move the tap interface stuff into a library module and add more
user-level shims for it that deal with the creation of character devices.

Macvtap essentially implements a tun/tap compatible character device,
while leaving out some of the stuff that doesn't make sense there (e.g.
creation of virtual network interfaces). The idea of my patch was
to deal with anything that is a tap protocol implementation.

One thing that is special for macvtap is that the guest MAC address
has to match the MAC address of the macvtap downstream device,
because macvlan filters out all traffic destined for other unicast
addresses.

> If we were going to do this, I'd rather introduce a -net macvtap that 
> actually allocated the interfaces (similar to how tap works).  The 
> problem with this interface is that it's a two stage process.  You have 
> to create an interface, then hand the name to qemu.  It would be just as 
> easy to hand qemu an fd with a helper.

You can't really allocate the device from qemu in a nice way.
I had mostly implemented macvtap support for your -net bridge
helper when I realized this.

Since it uses netlink (with CAP_NET_ADMIN) to create or destroy
the netdev, it first needs to wait for udev to create the device
node (which is not so bad by itself), and then use netlink again
after shutting down to destroy the network device.

> What's nice about -net tap is that with a little bit of setup 
> (/etc/qemu-ifup), it Just Works.  -net tap,dev= does not share this 
> property.

The qemu-ifup stuff IMHO is just a workaround for the problem of
having to defer network setup until after you open the device.

Macvtap doesn't have this problem. In the same way we deal with
other host resources (raw disk access, USB passthrough, /dev/kvm),
you first set up specifically what the user is allowed to do, either
manually or through libvirt and then just use it.

> I think this is a good place where an exec helper would be a natural fit.

IMHO, the exec helper is a good way to abstract away the difference
between tap, macvtap and possibly others based on the host configuration,
but it doesn't really make sense for a separate -net macvtap backend
like you suggested. You've shown that the helper can be used to enforce
user permissions for tun/tap, which lacks this, but for macvtap we can
just use regular user permissions.

	Arnd
diff mbox

Patch

diff --git a/net.c b/net.c
index 13bdbb2..deb12fd 100644
--- a/net.c
+++ b/net.c
@@ -955,6 +955,11 @@  static struct {
             },
 #ifndef _WIN32
             {
+                .name = "dev",
+                .type = QEMU_OPT_STRING,
+                .help = "character device pathname",
+            },
+            {
                 .name = "fd",
                 .type = QEMU_OPT_STRING,
                 .help = "file descriptor of an already opened tap",
diff --git a/net/tap-aix.c b/net/tap-aix.c
index 4bc9f2f..b789d06 100644
--- a/net/tap-aix.c
+++ b/net/tap-aix.c
@@ -25,7 +25,8 @@ 
 #include "net/tap.h"
 #include <stdio.h>
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, char *dev, int dev_size,
+			int *vnet_hdr, int vnet_hdr_required)
 {
     fprintf(stderr, "no tap on AIX\n");
     return -1;
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index 815997d..09a7780 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -40,7 +40,8 @@ 
 #include <util.h>
 #endif
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, char *dev, int dev_size,
+			int *vnet_hdr, int vnet_hdr_required)
 {
     int fd;
     char *dev;
diff --git a/net/tap-linux.c b/net/tap-linux.c
index 6af9e82..a6df123 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -32,14 +32,21 @@ 
 #include "sysemu.h"
 #include "qemu-common.h"
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, char *dev, int dev_size,
+			int *vnet_hdr, int vnet_hdr_required)
 {
     struct ifreq ifr;
     int fd, ret;
+    const char *path;
 
-    TFR(fd = open("/dev/net/tun", O_RDWR));
+    if (dev[0] == '\0')
+	path = "/dev/net/tun";
+    else
+	path = dev;
+
+    TFR(fd = open(dev, O_RDWR));
     if (fd < 0) {
-        fprintf(stderr, "warning: could not open /dev/net/tun: no virtual network emulation\n");
+        fprintf(stderr, "warning: could not open %s: no virtual network emulation\n", path);
         return -1;
     }
     memset(&ifr, 0, sizeof(ifr));
@@ -70,7 +77,7 @@  int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required
         pstrcpy(ifr.ifr_name, IFNAMSIZ, "tap%d");
     ret = ioctl(fd, TUNSETIFF, (void *) &ifr);
     if (ret != 0) {
-        fprintf(stderr, "warning: could not configure /dev/net/tun: no virtual network emulation\n");
+        fprintf(stderr, "warning: could not configure %s: no virtual network emulation\n", path);
         close(fd);
         return -1;
     }
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index e14fe36..72abb78 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -171,7 +171,8 @@  static int tap_alloc(char *dev, size_t dev_size)
     return tap_fd;
 }
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, char *dev, int dev_size,
+			int *vnet_hdr, int vnet_hdr_required)
 {
     char  dev[10]="";
     int fd;
diff --git a/net/tap.c b/net/tap.c
index 0d8b424..14ddf65 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -343,12 +343,17 @@  static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
 {
     int fd, vnet_hdr_required;
     char ifname[128] = {0,};
+    char dev[128] = {0,};
     const char *setup_script;
 
     if (qemu_opt_get(opts, "ifname")) {
         pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
     }
 
+    if (qemu_opt_get(opts, "dev")) {
+        pstrcpy(dev, sizeof(dev), qemu_opt_get(opts, "dev"));
+    }
+
     *vnet_hdr = qemu_opt_get_bool(opts, "vnet_hdr", 1);
     if (qemu_opt_get(opts, "vnet_hdr")) {
         vnet_hdr_required = *vnet_hdr;
@@ -356,7 +361,8 @@  static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
         vnet_hdr_required = 0;
     }
 
-    TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
+    TFR(fd = tap_open(ifname, sizeof(ifname), dev, sizeof(dev),
+			vnet_hdr, vnet_hdr_required));
     if (fd < 0) {
         return -1;
     }
@@ -371,6 +377,7 @@  static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
     }
 
     qemu_opt_set(opts, "ifname", ifname);
+    qemu_opt_set(opts, "dev", dev);
 
     return fd;
 }
@@ -382,10 +389,12 @@  int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan
 
     if (qemu_opt_get(opts, "fd")) {
         if (qemu_opt_get(opts, "ifname") ||
+            qemu_opt_get(opts, "dev") ||
             qemu_opt_get(opts, "script") ||
             qemu_opt_get(opts, "downscript") ||
             qemu_opt_get(opts, "vnet_hdr")) {
-            qemu_error("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=\n");
+            qemu_error("ifname=, dev=, script=, downscript= and vnet_hdr= is "
+		       "invalid with fd=\n");
             return -1;
         }
 
@@ -425,15 +434,16 @@  int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan
     if (qemu_opt_get(opts, "fd")) {
         snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
     } else {
-        const char *ifname, *script, *downscript;
+        const char *ifname, *dev, *script, *downscript;
 
         ifname     = qemu_opt_get(opts, "ifname");
+        dev        = qemu_opt_get(opts, "dev");
         script     = qemu_opt_get(opts, "script");
         downscript = qemu_opt_get(opts, "downscript");
 
         snprintf(s->nc.info_str, sizeof(s->nc.info_str),
-                 "ifname=%s,script=%s,downscript=%s",
-                 ifname, script, downscript);
+                 "ifname=%s,dev=%s,script=%s,downscript=%s",
+                 ifname, dev, script, downscript);
 
         if (strcmp(downscript, "no") != 0) {
             snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
diff --git a/net/tap.h b/net/tap.h
index 538a562..ba44363 100644
--- a/net/tap.h
+++ b/net/tap.h
@@ -34,7 +34,8 @@ 
 
 int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan);
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required);
+int tap_open(char *ifname, int ifname_size, char *dev, int dev_size,
+			int *vnet_hdr, int vnet_hdr_required);
 
 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen);
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 1b5781a..7f7aa18 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -810,12 +810,14 @@  DEF("net", HAS_ARG, QEMU_OPTION_net,
     "-net tap[,vlan=n][,name=str],ifname=name\n"
     "                connect the host TAP network interface to VLAN 'n'\n"
 #else
-    "-net tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,sndbuf=nbytes][,vnet_hdr=on|off]\n"
+    "-net tap[,vlan=n][,name=str][,fd=h][,ifname=name][,dev=str][,script=file]\n"
+    "        [,downscript=dfile][,sndbuf=nbytes][,vnet_hdr=on|off]\n"
     "                connect the host TAP network interface to VLAN 'n' and use the\n"
     "                network scripts 'file' (default=%s)\n"
     "                and 'dfile' (default=%s);\n"
     "                use '[down]script=no' to disable script execution;\n"
     "                use 'fd=h' to connect to an already opened TAP interface\n"
+    "                use 'dev=str' to open a specific tap character device\n"
     "                use 'sndbuf=nbytes' to limit the size of the send buffer; the\n"
     "                default of 'sndbuf=1048576' can be disabled using 'sndbuf=0'\n"
     "                use vnet_hdr=off to avoid enabling the IFF_VNET_HDR tap flag; use\n"