diff mbox

[v6,1/2] vhost-user: add multi queue support

Message ID 1439360742-2186-2-git-send-email-changchun.ouyang@intel.com
State New
Headers show

Commit Message

Ouyang, Changchun Aug. 12, 2015, 6:25 a.m. UTC
Based on patch by Nikolay Nikolaev:
Vhost-user will implement the multi queue support in a similar way
to what vhost already has - a separate thread for each queue.
To enable the multi queue functionality - a new command line parameter
"queues" is introduced for the vhost-user netdev.

The RESET_OWNER change is based on commit:
   294ce717e0f212ed0763307f3eab72b4a1bdf4d0
If it is reverted, the patch need update for it accordingly.

Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
Changes since v5:
 - fix the message descption for VHOST_RESET_OWNER in vhost-user txt

Changes since v4:
 - remove the unnecessary trailing '\n'

Changes since v3:
 - fix one typo and wrap one long line

Changes since v2:
 - fix vq index issue for set_vring_call
   When it is the case of VHOST_SET_VRING_CALL, The vq_index is not initialized before it is used,
   thus it could be a random value. The random value leads to crash in vhost after passing down
   to vhost, as vhost use this random value to index an array index.
 - fix the typo in the doc and description
 - address vq index for reset_owner

Changes since v1:
 - use s->nc.info_str when bringing up/down the backend

 docs/specs/vhost-user.txt |  7 ++++++-
 hw/net/vhost_net.c        |  3 ++-
 hw/virtio/vhost-user.c    | 11 ++++++++++-
 net/vhost-user.c          | 37 ++++++++++++++++++++++++-------------
 qapi-schema.json          |  6 +++++-
 qemu-options.hx           |  5 +++--
 6 files changed, 50 insertions(+), 19 deletions(-)

Comments

Michael S. Tsirkin Aug. 13, 2015, 9:18 a.m. UTC | #1
On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:
> Based on patch by Nikolay Nikolaev:
> Vhost-user will implement the multi queue support in a similar way
> to what vhost already has - a separate thread for each queue.
> To enable the multi queue functionality - a new command line parameter
> "queues" is introduced for the vhost-user netdev.
> 
> The RESET_OWNER change is based on commit:
>    294ce717e0f212ed0763307f3eab72b4a1bdf4d0
> If it is reverted, the patch need update for it accordingly.
> 
> Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
> Changes since v5:
>  - fix the message descption for VHOST_RESET_OWNER in vhost-user txt
> 
> Changes since v4:
>  - remove the unnecessary trailing '\n'
> 
> Changes since v3:
>  - fix one typo and wrap one long line
> 
> Changes since v2:
>  - fix vq index issue for set_vring_call
>    When it is the case of VHOST_SET_VRING_CALL, The vq_index is not initialized before it is used,
>    thus it could be a random value. The random value leads to crash in vhost after passing down
>    to vhost, as vhost use this random value to index an array index.
>  - fix the typo in the doc and description
>  - address vq index for reset_owner
> 
> Changes since v1:
>  - use s->nc.info_str when bringing up/down the backend
> 
>  docs/specs/vhost-user.txt |  7 ++++++-
>  hw/net/vhost_net.c        |  3 ++-
>  hw/virtio/vhost-user.c    | 11 ++++++++++-
>  net/vhost-user.c          | 37 ++++++++++++++++++++++++-------------
>  qapi-schema.json          |  6 +++++-
>  qemu-options.hx           |  5 +++--
>  6 files changed, 50 insertions(+), 19 deletions(-)
> 
> diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt
> index 70da3b1..9390f89 100644
> --- a/docs/specs/vhost-user.txt
> +++ b/docs/specs/vhost-user.txt
> @@ -135,6 +135,11 @@ As older slaves don't support negotiating protocol features,
>  a feature bit was dedicated for this purpose:
>  #define VHOST_USER_F_PROTOCOL_FEATURES 30
>  
> +Multi queue support
> +-------------------
> +The protocol supports multiple queues by setting all index fields in the sent
> +messages to a properly calculated value.
> +
>  Message types
>  -------------
>  
> @@ -198,7 +203,7 @@ Message types
>  
>        Id: 4
>        Equivalent ioctl: VHOST_RESET_OWNER
> -      Master payload: N/A
> +      Master payload: vring state description
>  
>        Issued when a new connection is about to be closed. The Master will no
>        longer own this connection (and will usually close it).

This is an interface change, isn't it?
We can't make it unconditionally, need to make it dependent
on a protocol flag.


> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 1f25cb3..9cd6c05 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -159,6 +159,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
>  
>      net->dev.nvqs = 2;
>      net->dev.vqs = net->vqs;
> +    net->dev.vq_index = net->nc->queue_index;
>  
>      r = vhost_dev_init(&net->dev, options->opaque,
>                         options->backend_type, options->force);
> @@ -269,7 +270,7 @@ static void vhost_net_stop_one(struct vhost_net *net,
>          for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
>              const VhostOps *vhost_ops = net->dev.vhost_ops;
>              int r = vhost_ops->vhost_call(&net->dev, VHOST_RESET_OWNER,
> -                                          NULL);
> +                                          &file);
>              assert(r >= 0);
>          }
>      }
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index 27ba035..fb11d4c 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -219,7 +219,12 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
>          break;
>  
>      case VHOST_USER_SET_OWNER:
> +        break;
> +
>      case VHOST_USER_RESET_OWNER:
> +        memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
> +        msg.state.index += dev->vq_index;
> +        msg.size = sizeof(m.state);
>          break;
>  
>      case VHOST_USER_SET_MEM_TABLE:
> @@ -262,17 +267,20 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
>      case VHOST_USER_SET_VRING_NUM:
>      case VHOST_USER_SET_VRING_BASE:
>          memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
> +        msg.state.index += dev->vq_index;
>          msg.size = sizeof(m.state);
>          break;
>  
>      case VHOST_USER_GET_VRING_BASE:
>          memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
> +        msg.state.index += dev->vq_index;
>          msg.size = sizeof(m.state);
>          need_reply = 1;
>          break;
>  
>      case VHOST_USER_SET_VRING_ADDR:
>          memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr));
> +        msg.addr.index += dev->vq_index;
>          msg.size = sizeof(m.addr);
>          break;
>  
> @@ -280,7 +288,7 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
>      case VHOST_USER_SET_VRING_CALL:
>      case VHOST_USER_SET_VRING_ERR:
>          file = arg;
> -        msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
> +        msg.u64 = (file->index + dev->vq_index) & VHOST_USER_VRING_IDX_MASK;
>          msg.size = sizeof(m.u64);
>          if (ioeventfd_enabled() && file->fd > 0) {
>              fds[fd_num++] = file->fd;
> @@ -322,6 +330,7 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
>                  error_report("Received bad msg size.");
>                  return -1;
>              }
> +            msg.state.index -= dev->vq_index;
>              memcpy(arg, &msg.state, sizeof(struct vhost_vring_state));
>              break;
>          default:
> diff --git a/net/vhost-user.c b/net/vhost-user.c
> index 1d86a2b..904d8af 100644
> --- a/net/vhost-user.c
> +++ b/net/vhost-user.c
> @@ -121,35 +121,39 @@ static void net_vhost_user_event(void *opaque, int event)
>      case CHR_EVENT_OPENED:
>          vhost_user_start(s);
>          net_vhost_link_down(s, false);
> -        error_report("chardev \"%s\" went up", s->chr->label);
> +        error_report("chardev \"%s\" went up", s->nc.info_str);
>          break;
>      case CHR_EVENT_CLOSED:
>          net_vhost_link_down(s, true);
>          vhost_user_stop(s);
> -        error_report("chardev \"%s\" went down", s->chr->label);
> +        error_report("chardev \"%s\" went down", s->nc.info_str);
>          break;
>      }
>  }
>  
>  static int net_vhost_user_init(NetClientState *peer, const char *device,
> -                               const char *name, CharDriverState *chr)
> +                               const char *name, CharDriverState *chr,
> +                               uint32_t queues)
>  {
>      NetClientState *nc;
>      VhostUserState *s;
> +    int i;
>  
> -    nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
> +    for (i = 0; i < queues; i++) {
> +        nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
>  
> -    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",
> -             chr->label);
> +        snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s",
> +                 i, chr->label);
>  
> -    s = DO_UPCAST(VhostUserState, nc, nc);
> +        s = DO_UPCAST(VhostUserState, nc, nc);
>  
> -    /* We don't provide a receive callback */
> -    s->nc.receive_disabled = 1;
> -    s->chr = chr;
> -
> -    qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s);
> +        /* We don't provide a receive callback */
> +        s->nc.receive_disabled = 1;
> +        s->chr = chr;
> +        s->nc.queue_index = i;
>  
> +        qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s);
> +    }
>      return 0;
>  }
>  
> @@ -225,6 +229,7 @@ static int net_vhost_check_net(QemuOpts *opts, void *opaque)


There are two problems here:

1. we don't really know that the backend
   is able to support the requested number of queues.
   If not, everything will fail, silently.
   A new message to query the # of queues could help, though
   I'm not sure what can be done on failure. Fail connection?

2. each message (e.g. set memory table) is sent multiple times,
   on the same socket.



>  int net_init_vhost_user(const NetClientOptions *opts, const char *name,
>                          NetClientState *peer)
>  {
> +    uint32_t queues;
>      const NetdevVhostUserOptions *vhost_user_opts;
>      CharDriverState *chr;
>  
> @@ -243,6 +248,12 @@ int net_init_vhost_user(const NetClientOptions *opts, const char *name,
>          return -1;
>      }
>  
> +    /* number of queues for multiqueue */
> +    if (vhost_user_opts->has_queues) {
> +        queues = vhost_user_opts->queues;
> +    } else {
> +        queues = 1;
> +    }
>  
> -    return net_vhost_user_init(peer, "vhost_user", name, chr);
> +    return net_vhost_user_init(peer, "vhost_user", name, chr, queues);
>  }
> diff --git a/qapi-schema.json b/qapi-schema.json
> index f97ffa1..51e40ce 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -2444,12 +2444,16 @@
>  #
>  # @vhostforce: #optional vhost on for non-MSIX virtio guests (default: false).
>  #
> +# @queues: #optional number of queues to be created for multiqueue vhost-user
> +#          (default: 1) (Since 2.5)
> +#
>  # Since 2.1
>  ##
>  { 'struct': 'NetdevVhostUserOptions',
>    'data': {
>      'chardev':        'str',
> -    '*vhostforce':    'bool' } }
> +    '*vhostforce':    'bool',
> +    '*queues':        'uint32' } }
>  
>  ##
>  # @NetClientOptions
> diff --git a/qemu-options.hx b/qemu-options.hx
> index ec356f6..dad035e 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -1942,13 +1942,14 @@ The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single
>  netdev.  @code{-net} and @code{-device} with parameter @option{vlan} create the
>  required hub automatically.
>  
> -@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
> +@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]
>  
>  Establish a vhost-user netdev, backed by a chardev @var{id}. The chardev should
>  be a unix domain socket backed one. The vhost-user uses a specifically defined
>  protocol to pass vhost ioctl replacement messages to an application on the other
>  end of the socket. On non-MSIX guests, the feature can be forced with
> -@var{vhostforce}.
> +@var{vhostforce}. Use 'queues=@var{n}' to specify the number of queues to
> +be created for multiqueue vhost-user.
>  
>  Example:
>  @example
> -- 
> 1.8.4.2
>
Maxime Leroy Aug. 13, 2015, 10:24 a.m. UTC | #2
On Thu, Aug 13, 2015 at 11:18 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:
>> Based on patch by Nikolay Nikolaev:
>> Vhost-user will implement the multi queue support in a similar way
>> to what vhost already has - a separate thread for each queue.
>> To enable the multi queue functionality - a new command line parameter
>> "queues" is introduced for the vhost-user netdev.
>>
>> The RESET_OWNER change is based on commit:
>>    294ce717e0f212ed0763307f3eab72b4a1bdf4d0
>> If it is reverted, the patch need update for it accordingly.
>>
>> Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>> ---
>> Changes since v5:
>>  - fix the message descption for VHOST_RESET_OWNER in vhost-user txt
>>
>> Changes since v4:
>>  - remove the unnecessary trailing '\n'
>>
>> Changes since v3:
>>  - fix one typo and wrap one long line
>>
>> Changes since v2:
>>  - fix vq index issue for set_vring_call
>>    When it is the case of VHOST_SET_VRING_CALL, The vq_index is not initialized before it is used,
>>    thus it could be a random value. The random value leads to crash in vhost after passing down
>>    to vhost, as vhost use this random value to index an array index.
>>  - fix the typo in the doc and description
>>  - address vq index for reset_owner
>>
>> Changes since v1:
>>  - use s->nc.info_str when bringing up/down the backend
>>
>>  docs/specs/vhost-user.txt |  7 ++++++-
>>  hw/net/vhost_net.c        |  3 ++-
>>  hw/virtio/vhost-user.c    | 11 ++++++++++-
>>  net/vhost-user.c          | 37 ++++++++++++++++++++++++-------------
>>  qapi-schema.json          |  6 +++++-
>>  qemu-options.hx           |  5 +++--
>>  6 files changed, 50 insertions(+), 19 deletions(-)
>>
>> diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt
>> index 70da3b1..9390f89 100644
>> --- a/docs/specs/vhost-user.txt
>> +++ b/docs/specs/vhost-user.txt
>> @@ -135,6 +135,11 @@ As older slaves don't support negotiating protocol features,
>>  a feature bit was dedicated for this purpose:
>>  #define VHOST_USER_F_PROTOCOL_FEATURES 30
>>
>> +Multi queue support
>> +-------------------
>> +The protocol supports multiple queues by setting all index fields in the sent
>> +messages to a properly calculated value.
>> +
>>  Message types
>>  -------------
>>
>> @@ -198,7 +203,7 @@ Message types
>>
>>        Id: 4
>>        Equivalent ioctl: VHOST_RESET_OWNER
>> -      Master payload: N/A
>> +      Master payload: vring state description
>>
>>        Issued when a new connection is about to be closed. The Master will no
>>        longer own this connection (and will usually close it).
>
> This is an interface change, isn't it?
> We can't make it unconditionally, need to make it dependent
> on a protocol flag.

Agree. It can potential break vhost-user driver implementation
checking the size of the message. We should not change the vhost-user
protocol without a new protocol flag.

I think the first issue here that VHOST_RESET_OWNER should happen on
vhost_dev_cleanup and not in  vhost_net_stop_one.

VHOST_RESET_OWNER should be the counter part of VHOST_SET_OWNER. So it
don't need to have a payload like VHOST_SET_OWNER.

Thus I agree with this email
(http://lists.nongnu.org/archive/html/qemu-devel/2015-07/msg05971.html)

Maybe should we use an other message to tell to the backend that the
vring is not anymore available in vhost_net_stop_one ?

Maxime
Michael S. Tsirkin Aug. 13, 2015, 10:55 a.m. UTC | #3
On Thu, Aug 13, 2015 at 12:24:16PM +0200, Maxime Leroy wrote:
> On Thu, Aug 13, 2015 at 11:18 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> > On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:
> >> Based on patch by Nikolay Nikolaev:
> >> Vhost-user will implement the multi queue support in a similar way
> >> to what vhost already has - a separate thread for each queue.
> >> To enable the multi queue functionality - a new command line parameter
> >> "queues" is introduced for the vhost-user netdev.
> >>
> >> The RESET_OWNER change is based on commit:
> >>    294ce717e0f212ed0763307f3eab72b4a1bdf4d0
> >> If it is reverted, the patch need update for it accordingly.
> >>
> >> Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> >> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >> ---
> >> Changes since v5:
> >>  - fix the message descption for VHOST_RESET_OWNER in vhost-user txt
> >>
> >> Changes since v4:
> >>  - remove the unnecessary trailing '\n'
> >>
> >> Changes since v3:
> >>  - fix one typo and wrap one long line
> >>
> >> Changes since v2:
> >>  - fix vq index issue for set_vring_call
> >>    When it is the case of VHOST_SET_VRING_CALL, The vq_index is not initialized before it is used,
> >>    thus it could be a random value. The random value leads to crash in vhost after passing down
> >>    to vhost, as vhost use this random value to index an array index.
> >>  - fix the typo in the doc and description
> >>  - address vq index for reset_owner
> >>
> >> Changes since v1:
> >>  - use s->nc.info_str when bringing up/down the backend
> >>
> >>  docs/specs/vhost-user.txt |  7 ++++++-
> >>  hw/net/vhost_net.c        |  3 ++-
> >>  hw/virtio/vhost-user.c    | 11 ++++++++++-
> >>  net/vhost-user.c          | 37 ++++++++++++++++++++++++-------------
> >>  qapi-schema.json          |  6 +++++-
> >>  qemu-options.hx           |  5 +++--
> >>  6 files changed, 50 insertions(+), 19 deletions(-)
> >>
> >> diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt
> >> index 70da3b1..9390f89 100644
> >> --- a/docs/specs/vhost-user.txt
> >> +++ b/docs/specs/vhost-user.txt
> >> @@ -135,6 +135,11 @@ As older slaves don't support negotiating protocol features,
> >>  a feature bit was dedicated for this purpose:
> >>  #define VHOST_USER_F_PROTOCOL_FEATURES 30
> >>
> >> +Multi queue support
> >> +-------------------
> >> +The protocol supports multiple queues by setting all index fields in the sent
> >> +messages to a properly calculated value.
> >> +
> >>  Message types
> >>  -------------
> >>
> >> @@ -198,7 +203,7 @@ Message types
> >>
> >>        Id: 4
> >>        Equivalent ioctl: VHOST_RESET_OWNER
> >> -      Master payload: N/A
> >> +      Master payload: vring state description
> >>
> >>        Issued when a new connection is about to be closed. The Master will no
> >>        longer own this connection (and will usually close it).
> >
> > This is an interface change, isn't it?
> > We can't make it unconditionally, need to make it dependent
> > on a protocol flag.
> 
> Agree. It can potential break vhost-user driver implementation
> checking the size of the message. We should not change the vhost-user
> protocol without a new protocol flag.
> 
> I think the first issue here that VHOST_RESET_OWNER should happen on
> vhost_dev_cleanup and not in  vhost_net_stop_one.
> 
> VHOST_RESET_OWNER should be the counter part of VHOST_SET_OWNER. So it
> don't need to have a payload like VHOST_SET_OWNER.
> 
> Thus I agree with this email
> (http://lists.nongnu.org/archive/html/qemu-devel/2015-07/msg05971.html)
> 
> Maybe should we use an other message to tell to the backend that the
> vring is not anymore available in vhost_net_stop_one ?
> 
> Maxime

I think the cleanest fix is to rename this message to e.g.
VHOST_RESET_DEVICE. This way we won't break existing users.
Ouyang, Changchun Aug. 25, 2015, 3:25 a.m. UTC | #4
Hi Michael,

> -----Original Message-----

> From: snabb-devel@googlegroups.com [mailto:snabb-

> devel@googlegroups.com] On Behalf Of Michael S. Tsirkin

> Sent: Thursday, August 13, 2015 5:19 PM

> To: Ouyang, Changchun

> Cc: qemu-devel@nongnu.org; snabb-devel@googlegroups.com;

> thibaut.collet@6wind.com; n.nikolaev@virtualopensystems.com;

> luke@snabb.co; Long, Thomas

> Subject: [snabb-devel] Re: [PATCH v6 1/2] vhost-user: add multi queue

> support

> 

> On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:

> > Based on patch by Nikolay Nikolaev:

> > Vhost-user will implement the multi queue support in a similar way to

> > what vhost already has - a separate thread for each queue.

> > To enable the multi queue functionality - a new command line parameter

> > "queues" is introduced for the vhost-user netdev.

> >

> > The RESET_OWNER change is based on commit:

> >    294ce717e0f212ed0763307f3eab72b4a1bdf4d0

> > If it is reverted, the patch need update for it accordingly.

> >

> > Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>

> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

> > ---

> > Changes since v5:

> >  - fix the message descption for VHOST_RESET_OWNER in vhost-user txt

> >

> > Changes since v4:

> >  - remove the unnecessary trailing '\n'

> >

> > Changes since v3:

> >  - fix one typo and wrap one long line

> >

> > Changes since v2:

> >  - fix vq index issue for set_vring_call

> >    When it is the case of VHOST_SET_VRING_CALL, The vq_index is not

> initialized before it is used,

> >    thus it could be a random value. The random value leads to crash in vhost

> after passing down

> >    to vhost, as vhost use this random value to index an array index.

> >  - fix the typo in the doc and description

> >  - address vq index for reset_owner

> >

> > Changes since v1:

> >  - use s->nc.info_str when bringing up/down the backend

> >

> >  docs/specs/vhost-user.txt |  7 ++++++-

> >  hw/net/vhost_net.c        |  3 ++-

> >  hw/virtio/vhost-user.c    | 11 ++++++++++-

> >  net/vhost-user.c          | 37 ++++++++++++++++++++++++-------------

> >  qapi-schema.json          |  6 +++++-

> >  qemu-options.hx           |  5 +++--

> >  6 files changed, 50 insertions(+), 19 deletions(-)

> >

> > diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt

> > index 70da3b1..9390f89 100644

> > --- a/docs/specs/vhost-user.txt

> > +++ b/docs/specs/vhost-user.txt

> > @@ -135,6 +135,11 @@ As older slaves don't support negotiating

> > protocol features,  a feature bit was dedicated for this purpose:

> >  #define VHOST_USER_F_PROTOCOL_FEATURES 30

> >

> > +Multi queue support

> > +-------------------

> > +The protocol supports multiple queues by setting all index fields in

> > +the sent messages to a properly calculated value.

> > +

> >  Message types

> >  -------------

> >

> > @@ -198,7 +203,7 @@ Message types

> >

> >        Id: 4

> >        Equivalent ioctl: VHOST_RESET_OWNER

> > -      Master payload: N/A

> > +      Master payload: vring state description

> >

> >        Issued when a new connection is about to be closed. The Master will no

> >        longer own this connection (and will usually close it).

> 

> This is an interface change, isn't it?

> We can't make it unconditionally, need to make it dependent on a protocol

> flag.

> 

> 

> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index

> > 1f25cb3..9cd6c05 100644

> > --- a/hw/net/vhost_net.c

> > +++ b/hw/net/vhost_net.c

> > @@ -159,6 +159,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions

> > *options)

> >

> >      net->dev.nvqs = 2;

> >      net->dev.vqs = net->vqs;

> > +    net->dev.vq_index = net->nc->queue_index;

> >

> >      r = vhost_dev_init(&net->dev, options->opaque,

> >                         options->backend_type, options->force); @@

> > -269,7 +270,7 @@ static void vhost_net_stop_one(struct vhost_net *net,

> >          for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {

> >              const VhostOps *vhost_ops = net->dev.vhost_ops;

> >              int r = vhost_ops->vhost_call(&net->dev, VHOST_RESET_OWNER,

> > -                                          NULL);

> > +                                          &file);

> >              assert(r >= 0);

> >          }

> >      }

> > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index

> > 27ba035..fb11d4c 100644

> > --- a/hw/virtio/vhost-user.c

> > +++ b/hw/virtio/vhost-user.c

> > @@ -219,7 +219,12 @@ static int vhost_user_call(struct vhost_dev *dev,

> unsigned long int request,

> >          break;

> >

> >      case VHOST_USER_SET_OWNER:

> > +        break;

> > +

> >      case VHOST_USER_RESET_OWNER:

> > +        memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));

> > +        msg.state.index += dev->vq_index;

> > +        msg.size = sizeof(m.state);

> >          break;

> >

> >      case VHOST_USER_SET_MEM_TABLE:

> > @@ -262,17 +267,20 @@ static int vhost_user_call(struct vhost_dev *dev,

> unsigned long int request,

> >      case VHOST_USER_SET_VRING_NUM:

> >      case VHOST_USER_SET_VRING_BASE:

> >          memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));

> > +        msg.state.index += dev->vq_index;

> >          msg.size = sizeof(m.state);

> >          break;

> >

> >      case VHOST_USER_GET_VRING_BASE:

> >          memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));

> > +        msg.state.index += dev->vq_index;

> >          msg.size = sizeof(m.state);

> >          need_reply = 1;

> >          break;

> >

> >      case VHOST_USER_SET_VRING_ADDR:

> >          memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr));

> > +        msg.addr.index += dev->vq_index;

> >          msg.size = sizeof(m.addr);

> >          break;

> >

> > @@ -280,7 +288,7 @@ static int vhost_user_call(struct vhost_dev *dev,

> unsigned long int request,

> >      case VHOST_USER_SET_VRING_CALL:

> >      case VHOST_USER_SET_VRING_ERR:

> >          file = arg;

> > -        msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK;

> > +        msg.u64 = (file->index + dev->vq_index) &

> > + VHOST_USER_VRING_IDX_MASK;

> >          msg.size = sizeof(m.u64);

> >          if (ioeventfd_enabled() && file->fd > 0) {

> >              fds[fd_num++] = file->fd; @@ -322,6 +330,7 @@ static int

> > vhost_user_call(struct vhost_dev *dev, unsigned long int request,

> >                  error_report("Received bad msg size.");

> >                  return -1;

> >              }

> > +            msg.state.index -= dev->vq_index;

> >              memcpy(arg, &msg.state, sizeof(struct vhost_vring_state));

> >              break;

> >          default:

> > diff --git a/net/vhost-user.c b/net/vhost-user.c index

> > 1d86a2b..904d8af 100644

> > --- a/net/vhost-user.c

> > +++ b/net/vhost-user.c

> > @@ -121,35 +121,39 @@ static void net_vhost_user_event(void *opaque,

> int event)

> >      case CHR_EVENT_OPENED:

> >          vhost_user_start(s);

> >          net_vhost_link_down(s, false);

> > -        error_report("chardev \"%s\" went up", s->chr->label);

> > +        error_report("chardev \"%s\" went up", s->nc.info_str);

> >          break;

> >      case CHR_EVENT_CLOSED:

> >          net_vhost_link_down(s, true);

> >          vhost_user_stop(s);

> > -        error_report("chardev \"%s\" went down", s->chr->label);

> > +        error_report("chardev \"%s\" went down", s->nc.info_str);

> >          break;

> >      }

> >  }

> >

> >  static int net_vhost_user_init(NetClientState *peer, const char *device,

> > -                               const char *name, CharDriverState *chr)

> > +                               const char *name, CharDriverState *chr,

> > +                               uint32_t queues)

> >  {

> >      NetClientState *nc;

> >      VhostUserState *s;

> > +    int i;

> >

> > -    nc = qemu_new_net_client(&net_vhost_user_info, peer, device,

> name);

> > +    for (i = 0; i < queues; i++) {

> > +        nc = qemu_new_net_client(&net_vhost_user_info, peer, device,

> > + name);

> >

> > -    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",

> > -             chr->label);

> > +        snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s",

> > +                 i, chr->label);

> >

> > -    s = DO_UPCAST(VhostUserState, nc, nc);

> > +        s = DO_UPCAST(VhostUserState, nc, nc);

> >

> > -    /* We don't provide a receive callback */

> > -    s->nc.receive_disabled = 1;

> > -    s->chr = chr;

> > -

> > -    qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event,

> s);

> > +        /* We don't provide a receive callback */

> > +        s->nc.receive_disabled = 1;

> > +        s->chr = chr;

> > +        s->nc.queue_index = i;

> >

> > +        qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event,

> s);

> > +    }

> >      return 0;

> >  }

> >

> > @@ -225,6 +229,7 @@ static int net_vhost_check_net(QemuOpts *opts,

> > void *opaque)

> 

> 

> There are two problems here:

> 

> 1. we don't really know that the backend

>    is able to support the requested number of queues.

>    If not, everything will fail, silently.

>    A new message to query the # of queues could help, though

>    I'm not sure what can be done on failure. Fail connection?

> 

> 2. each message (e.g. set memory table) is sent multiple times,

>    on the same socket.

> 

I think it is tough to resolve these 2 comments, as the current message is either vhost-dev based or virt-queue based,
The multiple queues(pair) feature use multiple vhost-devs to implement itself.
For #1
So the queue number is something should be seen in the upper level of vhost-dev rather than inside the vhost-dev.
For each vhost-net, there are 2 virt-queues, one is for Rx the other is for Tx.
introduce the virt-queue pair number into the vhost-dev? But I don't think it is good, as for each vhost-dev, there is only one
virt-queue pair.

Where should I put the virt-queue pair number to? I don't get the perfect answer till now. Any suggestion is welcome.

Could we assume the vhost backend has the ability to create enough virt-queue pair(e.g. 0x8000 is the max) if qemu require
Vhost backend to do it. If it is correct, we don't need get virt-queue pair number from vhost backend, as vhost backend can
Create all virt-queue pair required by qemu.
The virtio frontend(on guest) has the flexibility to enable which virt-queue according to its own capability, qemu can do it by using
Set_vring_flag message to notify vhost backend.   

For #2 
The memory table message is also vhost-dev based, it wouldn't hurt we send it a few times, vhost backend could
Keep it vhost-dev based too, or keep it once(keep it when first time and ignore in rest messages from the same connected-fd)
Any other good suggestion is welcome too :-)

> 

> 

> >  int net_init_vhost_user(const NetClientOptions *opts, const char *name,

> >                          NetClientState *peer)  {

> > +    uint32_t queues;

> >      const NetdevVhostUserOptions *vhost_user_opts;

> >      CharDriverState *chr;

> >

> > @@ -243,6 +248,12 @@ int net_init_vhost_user(const NetClientOptions

> *opts, const char *name,

> >          return -1;

> >      }

> >

> > +    /* number of queues for multiqueue */

> > +    if (vhost_user_opts->has_queues) {

> > +        queues = vhost_user_opts->queues;

> > +    } else {

> > +        queues = 1;

> > +    }

> >

> > -    return net_vhost_user_init(peer, "vhost_user", name, chr);

> > +    return net_vhost_user_init(peer, "vhost_user", name, chr,

> > + queues);

> >  }

> > diff --git a/qapi-schema.json b/qapi-schema.json index

> > f97ffa1..51e40ce 100644

> > --- a/qapi-schema.json

> > +++ b/qapi-schema.json

> > @@ -2444,12 +2444,16 @@

> >  #

> >  # @vhostforce: #optional vhost on for non-MSIX virtio guests (default:

> false).

> >  #

> > +# @queues: #optional number of queues to be created for multiqueue

> vhost-user

> > +#          (default: 1) (Since 2.5)

> > +#

> >  # Since 2.1

> >  ##

> >  { 'struct': 'NetdevVhostUserOptions',

> >    'data': {

> >      'chardev':        'str',

> > -    '*vhostforce':    'bool' } }

> > +    '*vhostforce':    'bool',

> > +    '*queues':        'uint32' } }

> >

> >  ##

> >  # @NetClientOptions

> > diff --git a/qemu-options.hx b/qemu-options.hx index ec356f6..dad035e

> > 100644

> > --- a/qemu-options.hx

> > +++ b/qemu-options.hx

> > @@ -1942,13 +1942,14 @@ The hubport netdev lets you connect a NIC to a

> > QEMU "vlan" instead of a single  netdev.  @code{-net} and

> > @code{-device} with parameter @option{vlan} create the  required hub

> automatically.

> >

> > -@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]

> > +@item -netdev

> > +vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]

> >

> >  Establish a vhost-user netdev, backed by a chardev @var{id}. The

> > chardev should  be a unix domain socket backed one. The vhost-user

> > uses a specifically defined  protocol to pass vhost ioctl replacement

> > messages to an application on the other  end of the socket. On

> > non-MSIX guests, the feature can be forced with -@var{vhostforce}.

> > +@var{vhostforce}. Use 'queues=@var{n}' to specify the number of

> > +queues to be created for multiqueue vhost-user.

> >

> >  Example:

> >  @example

> > --

> > 1.8.4.2

> >

> 

> --

> You received this message because you are subscribed to the Google Groups

> "Snabb Switch development" group.

> To unsubscribe from this group and stop receiving emails from it, send an

> email to snabb-devel+unsubscribe@googlegroups.com.

> To post to this group, send an email to snabb-devel@googlegroups.com.

> Visit this group at http://groups.google.com/group/snabb-devel.
Michael S. Tsirkin Aug. 27, 2015, 1:05 p.m. UTC | #5
On Tue, Aug 25, 2015 at 03:25:54AM +0000, Ouyang, Changchun wrote:
> Hi Michael,
> 
> > -----Original Message-----
> > From: snabb-devel@googlegroups.com [mailto:snabb-
> > devel@googlegroups.com] On Behalf Of Michael S. Tsirkin
> > Sent: Thursday, August 13, 2015 5:19 PM
> > To: Ouyang, Changchun
> > Cc: qemu-devel@nongnu.org; snabb-devel@googlegroups.com;
> > thibaut.collet@6wind.com; n.nikolaev@virtualopensystems.com;
> > luke@snabb.co; Long, Thomas
> > Subject: [snabb-devel] Re: [PATCH v6 1/2] vhost-user: add multi queue
> > support
> > 
> > On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:
> > > Based on patch by Nikolay Nikolaev:
> > > Vhost-user will implement the multi queue support in a similar way to
> > > what vhost already has - a separate thread for each queue.
> > > To enable the multi queue functionality - a new command line parameter
> > > "queues" is introduced for the vhost-user netdev.
> > >
> > > The RESET_OWNER change is based on commit:
> > >    294ce717e0f212ed0763307f3eab72b4a1bdf4d0
> > > If it is reverted, the patch need update for it accordingly.
> > >
> > > Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> > > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > > ---
> > > Changes since v5:
> > >  - fix the message descption for VHOST_RESET_OWNER in vhost-user txt
> > >
> > > Changes since v4:
> > >  - remove the unnecessary trailing '\n'
> > >
> > > Changes since v3:
> > >  - fix one typo and wrap one long line
> > >
> > > Changes since v2:
> > >  - fix vq index issue for set_vring_call
> > >    When it is the case of VHOST_SET_VRING_CALL, The vq_index is not
> > initialized before it is used,
> > >    thus it could be a random value. The random value leads to crash in vhost
> > after passing down
> > >    to vhost, as vhost use this random value to index an array index.
> > >  - fix the typo in the doc and description
> > >  - address vq index for reset_owner
> > >
> > > Changes since v1:
> > >  - use s->nc.info_str when bringing up/down the backend
> > >
> > >  docs/specs/vhost-user.txt |  7 ++++++-
> > >  hw/net/vhost_net.c        |  3 ++-
> > >  hw/virtio/vhost-user.c    | 11 ++++++++++-
> > >  net/vhost-user.c          | 37 ++++++++++++++++++++++++-------------
> > >  qapi-schema.json          |  6 +++++-
> > >  qemu-options.hx           |  5 +++--
> > >  6 files changed, 50 insertions(+), 19 deletions(-)
> > >
> > > diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt
> > > index 70da3b1..9390f89 100644
> > > --- a/docs/specs/vhost-user.txt
> > > +++ b/docs/specs/vhost-user.txt
> > > @@ -135,6 +135,11 @@ As older slaves don't support negotiating
> > > protocol features,  a feature bit was dedicated for this purpose:
> > >  #define VHOST_USER_F_PROTOCOL_FEATURES 30
> > >
> > > +Multi queue support
> > > +-------------------
> > > +The protocol supports multiple queues by setting all index fields in
> > > +the sent messages to a properly calculated value.
> > > +
> > >  Message types
> > >  -------------
> > >
> > > @@ -198,7 +203,7 @@ Message types
> > >
> > >        Id: 4
> > >        Equivalent ioctl: VHOST_RESET_OWNER
> > > -      Master payload: N/A
> > > +      Master payload: vring state description
> > >
> > >        Issued when a new connection is about to be closed. The Master will no
> > >        longer own this connection (and will usually close it).
> > 
> > This is an interface change, isn't it?
> > We can't make it unconditionally, need to make it dependent on a protocol
> > flag.


Pls remember to fix this one.

> > 
> > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index
> > > 1f25cb3..9cd6c05 100644
> > > --- a/hw/net/vhost_net.c
> > > +++ b/hw/net/vhost_net.c
> > > @@ -159,6 +159,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions
> > > *options)
> > >
> > >      net->dev.nvqs = 2;
> > >      net->dev.vqs = net->vqs;
> > > +    net->dev.vq_index = net->nc->queue_index;
> > >
> > >      r = vhost_dev_init(&net->dev, options->opaque,
> > >                         options->backend_type, options->force); @@
> > > -269,7 +270,7 @@ static void vhost_net_stop_one(struct vhost_net *net,
> > >          for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
> > >              const VhostOps *vhost_ops = net->dev.vhost_ops;
> > >              int r = vhost_ops->vhost_call(&net->dev, VHOST_RESET_OWNER,
> > > -                                          NULL);
> > > +                                          &file);
> > >              assert(r >= 0);
> > >          }
> > >      }
> > > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index
> > > 27ba035..fb11d4c 100644
> > > --- a/hw/virtio/vhost-user.c
> > > +++ b/hw/virtio/vhost-user.c
> > > @@ -219,7 +219,12 @@ static int vhost_user_call(struct vhost_dev *dev,
> > unsigned long int request,
> > >          break;
> > >
> > >      case VHOST_USER_SET_OWNER:
> > > +        break;
> > > +
> > >      case VHOST_USER_RESET_OWNER:
> > > +        memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
> > > +        msg.state.index += dev->vq_index;
> > > +        msg.size = sizeof(m.state);
> > >          break;
> > >
> > >      case VHOST_USER_SET_MEM_TABLE:
> > > @@ -262,17 +267,20 @@ static int vhost_user_call(struct vhost_dev *dev,
> > unsigned long int request,
> > >      case VHOST_USER_SET_VRING_NUM:
> > >      case VHOST_USER_SET_VRING_BASE:
> > >          memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
> > > +        msg.state.index += dev->vq_index;
> > >          msg.size = sizeof(m.state);
> > >          break;
> > >
> > >      case VHOST_USER_GET_VRING_BASE:
> > >          memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
> > > +        msg.state.index += dev->vq_index;
> > >          msg.size = sizeof(m.state);
> > >          need_reply = 1;
> > >          break;
> > >
> > >      case VHOST_USER_SET_VRING_ADDR:
> > >          memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr));
> > > +        msg.addr.index += dev->vq_index;
> > >          msg.size = sizeof(m.addr);
> > >          break;
> > >
> > > @@ -280,7 +288,7 @@ static int vhost_user_call(struct vhost_dev *dev,
> > unsigned long int request,
> > >      case VHOST_USER_SET_VRING_CALL:
> > >      case VHOST_USER_SET_VRING_ERR:
> > >          file = arg;
> > > -        msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
> > > +        msg.u64 = (file->index + dev->vq_index) &
> > > + VHOST_USER_VRING_IDX_MASK;
> > >          msg.size = sizeof(m.u64);
> > >          if (ioeventfd_enabled() && file->fd > 0) {
> > >              fds[fd_num++] = file->fd; @@ -322,6 +330,7 @@ static int
> > > vhost_user_call(struct vhost_dev *dev, unsigned long int request,
> > >                  error_report("Received bad msg size.");
> > >                  return -1;
> > >              }
> > > +            msg.state.index -= dev->vq_index;
> > >              memcpy(arg, &msg.state, sizeof(struct vhost_vring_state));
> > >              break;
> > >          default:
> > > diff --git a/net/vhost-user.c b/net/vhost-user.c index
> > > 1d86a2b..904d8af 100644
> > > --- a/net/vhost-user.c
> > > +++ b/net/vhost-user.c
> > > @@ -121,35 +121,39 @@ static void net_vhost_user_event(void *opaque,
> > int event)
> > >      case CHR_EVENT_OPENED:
> > >          vhost_user_start(s);
> > >          net_vhost_link_down(s, false);
> > > -        error_report("chardev \"%s\" went up", s->chr->label);
> > > +        error_report("chardev \"%s\" went up", s->nc.info_str);
> > >          break;
> > >      case CHR_EVENT_CLOSED:
> > >          net_vhost_link_down(s, true);
> > >          vhost_user_stop(s);
> > > -        error_report("chardev \"%s\" went down", s->chr->label);
> > > +        error_report("chardev \"%s\" went down", s->nc.info_str);
> > >          break;
> > >      }
> > >  }

BTW this seems pretty hacky: you get multiple messages when one
client connects. Why add multiple event listeners to the same
chat device?


> > >
> > >  static int net_vhost_user_init(NetClientState *peer, const char *device,
> > > -                               const char *name, CharDriverState *chr)
> > > +                               const char *name, CharDriverState *chr,
> > > +                               uint32_t queues)
> > >  {
> > >      NetClientState *nc;
> > >      VhostUserState *s;
> > > +    int i;
> > >
> > > -    nc = qemu_new_net_client(&net_vhost_user_info, peer, device,
> > name);
> > > +    for (i = 0; i < queues; i++) {
> > > +        nc = qemu_new_net_client(&net_vhost_user_info, peer, device,
> > > + name);
> > >
> > > -    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",
> > > -             chr->label);
> > > +        snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s",
> > > +                 i, chr->label);
> > >
> > > -    s = DO_UPCAST(VhostUserState, nc, nc);
> > > +        s = DO_UPCAST(VhostUserState, nc, nc);
> > >
> > > -    /* We don't provide a receive callback */
> > > -    s->nc.receive_disabled = 1;
> > > -    s->chr = chr;
> > > -
> > > -    qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event,
> > s);
> > > +        /* We don't provide a receive callback */
> > > +        s->nc.receive_disabled = 1;
> > > +        s->chr = chr;
> > > +        s->nc.queue_index = i;
> > >
> > > +        qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event,
> > s);
> > > +    }
> > >      return 0;
> > >  }
> > >
> > > @@ -225,6 +229,7 @@ static int net_vhost_check_net(QemuOpts *opts,
> > > void *opaque)
> > 
> > 
> > There are two problems here:
> > 
> > 1. we don't really know that the backend
> >    is able to support the requested number of queues.
> >    If not, everything will fail, silently.
> >    A new message to query the # of queues could help, though
> >    I'm not sure what can be done on failure. Fail connection?
> > 
> > 2. each message (e.g. set memory table) is sent multiple times,
> >    on the same socket.
> > 
> I think it is tough to resolve these 2 comments, as the current message is either vhost-dev based or virt-queue based,
> The multiple queues(pair) feature use multiple vhost-devs to implement itself.
> For #1
> So the queue number is something should be seen in the upper level of vhost-dev rather than inside the vhost-dev.
> For each vhost-net, there are 2 virt-queues, one is for Rx the other is for Tx.
> introduce the virt-queue pair number into the vhost-dev? But I don't think it is good, as for each vhost-dev, there is only one
> virt-queue pair.
> 
> Where should I put the virt-queue pair number to? I don't get the perfect answer till now. Any suggestion is welcome.
> 
> Could we assume the vhost backend has the ability to create enough virt-queue pair(e.g. 0x8000 is the max) if qemu require
> Vhost backend to do it. If it is correct, we don't need get virt-queue pair number from vhost backend, as vhost backend can
> Create all virt-queue pair required by qemu.
> The virtio frontend(on guest) has the flexibility to enable which virt-queue according to its own capability, qemu can do it by using
> Set_vring_flag message to notify vhost backend.   

I'm reluctant to agree to this. Implementations tend to get this wrong,
e.g. they would only test with 2 queues and assume everything is OK.
With an explicit message, this seems more robust.

Why is it so hard to implement?  User specifies queues=X.
Can't we simply validate that backend supports this # of queues?


> For #2 
> The memory table message is also vhost-dev based, it wouldn't hurt we send it a few times, vhost backend could
> Keep it vhost-dev based too, or keep it once(keep it when first time and ignore in rest messages from the same connected-fd)
> Any other good suggestion is welcome too :-)

Add code in vhost-user to skip sending the useless messages?  Yes they
seem harmless but implementations tend to develop dependencies on such
bugs, then we get to maintain them forever.

> > 
> > 
> > >  int net_init_vhost_user(const NetClientOptions *opts, const char *name,
> > >                          NetClientState *peer)  {
> > > +    uint32_t queues;
> > >      const NetdevVhostUserOptions *vhost_user_opts;
> > >      CharDriverState *chr;
> > >
> > > @@ -243,6 +248,12 @@ int net_init_vhost_user(const NetClientOptions
> > *opts, const char *name,
> > >          return -1;
> > >      }
> > >
> > > +    /* number of queues for multiqueue */
> > > +    if (vhost_user_opts->has_queues) {
> > > +        queues = vhost_user_opts->queues;
> > > +    } else {
> > > +        queues = 1;
> > > +    }
> > >
> > > -    return net_vhost_user_init(peer, "vhost_user", name, chr);
> > > +    return net_vhost_user_init(peer, "vhost_user", name, chr,
> > > + queues);
> > >  }
> > > diff --git a/qapi-schema.json b/qapi-schema.json index
> > > f97ffa1..51e40ce 100644
> > > --- a/qapi-schema.json
> > > +++ b/qapi-schema.json
> > > @@ -2444,12 +2444,16 @@
> > >  #
> > >  # @vhostforce: #optional vhost on for non-MSIX virtio guests (default:
> > false).
> > >  #
> > > +# @queues: #optional number of queues to be created for multiqueue
> > vhost-user
> > > +#          (default: 1) (Since 2.5)
> > > +#
> > >  # Since 2.1
> > >  ##
> > >  { 'struct': 'NetdevVhostUserOptions',
> > >    'data': {
> > >      'chardev':        'str',
> > > -    '*vhostforce':    'bool' } }
> > > +    '*vhostforce':    'bool',
> > > +    '*queues':        'uint32' } }
> > >
> > >  ##
> > >  # @NetClientOptions
> > > diff --git a/qemu-options.hx b/qemu-options.hx index ec356f6..dad035e
> > > 100644
> > > --- a/qemu-options.hx
> > > +++ b/qemu-options.hx
> > > @@ -1942,13 +1942,14 @@ The hubport netdev lets you connect a NIC to a
> > > QEMU "vlan" instead of a single  netdev.  @code{-net} and
> > > @code{-device} with parameter @option{vlan} create the  required hub
> > automatically.
> > >
> > > -@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
> > > +@item -netdev
> > > +vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]
> > >
> > >  Establish a vhost-user netdev, backed by a chardev @var{id}. The
> > > chardev should  be a unix domain socket backed one. The vhost-user
> > > uses a specifically defined  protocol to pass vhost ioctl replacement
> > > messages to an application on the other  end of the socket. On
> > > non-MSIX guests, the feature can be forced with -@var{vhostforce}.
> > > +@var{vhostforce}. Use 'queues=@var{n}' to specify the number of
> > > +queues to be created for multiqueue vhost-user.
> > >
> > >  Example:
> > >  @example
> > > --
> > > 1.8.4.2
> > >
> > 
> > --
> > You received this message because you are subscribed to the Google Groups
> > "Snabb Switch development" group.
> > To unsubscribe from this group and stop receiving emails from it, send an
> > email to snabb-devel+unsubscribe@googlegroups.com.
> > To post to this group, send an email to snabb-devel@googlegroups.com.
> > Visit this group at http://groups.google.com/group/snabb-devel.
Ouyang, Changchun Aug. 28, 2015, 1:53 a.m. UTC | #6
Hi Michael,

Thanks for your response!

> -----Original Message-----

> From: snabb-devel@googlegroups.com [mailto:snabb-

> devel@googlegroups.com] On Behalf Of Michael S. Tsirkin

> Sent: Thursday, August 27, 2015 9:05 PM

> To: Ouyang, Changchun

> Cc: snabb-devel@googlegroups.com; qemu-devel@nongnu.org;

> thibaut.collet@6wind.com; n.nikolaev@virtualopensystems.com;

> luke@snabb.co; Long, Thomas; Liu, Yuanhan

> Subject: Re: [snabb-devel] Re: [PATCH v6 1/2] vhost-user: add multi queue

> support

> 

> On Tue, Aug 25, 2015 at 03:25:54AM +0000, Ouyang, Changchun wrote:

> > Hi Michael,

> >

> > > -----Original Message-----

> > > From: snabb-devel@googlegroups.com [mailto:snabb-

> > > devel@googlegroups.com] On Behalf Of Michael S. Tsirkin

> > > Sent: Thursday, August 13, 2015 5:19 PM

> > > To: Ouyang, Changchun

> > > Cc: qemu-devel@nongnu.org; snabb-devel@googlegroups.com;

> > > thibaut.collet@6wind.com; n.nikolaev@virtualopensystems.com;

> > > luke@snabb.co; Long, Thomas

> > > Subject: [snabb-devel] Re: [PATCH v6 1/2] vhost-user: add multi

> > > queue support

> > >

> > > On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:

> > > > Based on patch by Nikolay Nikolaev:

> > > > Vhost-user will implement the multi queue support in a similar way

> > > > to what vhost already has - a separate thread for each queue.

> > > > To enable the multi queue functionality - a new command line

> > > > parameter "queues" is introduced for the vhost-user netdev.

> > > >

> > > > The RESET_OWNER change is based on commit:

> > > >    294ce717e0f212ed0763307f3eab72b4a1bdf4d0

> > > > If it is reverted, the patch need update for it accordingly.

> > > >

> > > > Signed-off-by: Nikolay Nikolaev

> > > > <n.nikolaev@virtualopensystems.com>

> > > > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

> > > > ---

> > > > Changes since v5:

> > > >  - fix the message descption for VHOST_RESET_OWNER in vhost-user

> > > > txt

> > > >

> > > > Changes since v4:

> > > >  - remove the unnecessary trailing '\n'

> > > >

> > > > Changes since v3:

> > > >  - fix one typo and wrap one long line

> > > >

> > > > Changes since v2:

> > > >  - fix vq index issue for set_vring_call

> > > >    When it is the case of VHOST_SET_VRING_CALL, The vq_index is

> > > > not

> > > initialized before it is used,

> > > >    thus it could be a random value. The random value leads to

> > > > crash in vhost

> > > after passing down

> > > >    to vhost, as vhost use this random value to index an array index.

> > > >  - fix the typo in the doc and description

> > > >  - address vq index for reset_owner

> > > >

> > > > Changes since v1:

> > > >  - use s->nc.info_str when bringing up/down the backend

> > > >

> > > >  docs/specs/vhost-user.txt |  7 ++++++-

> > > >  hw/net/vhost_net.c        |  3 ++-

> > > >  hw/virtio/vhost-user.c    | 11 ++++++++++-

> > > >  net/vhost-user.c          | 37 ++++++++++++++++++++++++-------------

> > > >  qapi-schema.json          |  6 +++++-

> > > >  qemu-options.hx           |  5 +++--

> > > >  6 files changed, 50 insertions(+), 19 deletions(-)

> > > >

> > > > diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt

> > > > index 70da3b1..9390f89 100644

> > > > --- a/docs/specs/vhost-user.txt

> > > > +++ b/docs/specs/vhost-user.txt

> > > > @@ -135,6 +135,11 @@ As older slaves don't support negotiating

> > > > protocol features,  a feature bit was dedicated for this purpose:

> > > >  #define VHOST_USER_F_PROTOCOL_FEATURES 30

> > > >

> > > > +Multi queue support

> > > > +-------------------

> > > > +The protocol supports multiple queues by setting all index fields

> > > > +in the sent messages to a properly calculated value.

> > > > +

> > > >  Message types

> > > >  -------------

> > > >

> > > > @@ -198,7 +203,7 @@ Message types

> > > >

> > > >        Id: 4

> > > >        Equivalent ioctl: VHOST_RESET_OWNER

> > > > -      Master payload: N/A

> > > > +      Master payload: vring state description

> > > >

> > > >        Issued when a new connection is about to be closed. The Master

> will no

> > > >        longer own this connection (and will usually close it).

> > >

> > > This is an interface change, isn't it?

> > > We can't make it unconditionally, need to make it dependent on a

> > > protocol flag.

> 

> 

> Pls remember to fix this one.

> 

> > >

> > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index

> > > > 1f25cb3..9cd6c05 100644

> > > > --- a/hw/net/vhost_net.c

> > > > +++ b/hw/net/vhost_net.c

> > > > @@ -159,6 +159,7 @@ struct vhost_net

> > > > *vhost_net_init(VhostNetOptions

> > > > *options)

> > > >

> > > >      net->dev.nvqs = 2;

> > > >      net->dev.vqs = net->vqs;

> > > > +    net->dev.vq_index = net->nc->queue_index;

> > > >

> > > >      r = vhost_dev_init(&net->dev, options->opaque,

> > > >                         options->backend_type, options->force); @@

> > > > -269,7 +270,7 @@ static void vhost_net_stop_one(struct vhost_net

> *net,

> > > >          for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {

> > > >              const VhostOps *vhost_ops = net->dev.vhost_ops;

> > > >              int r = vhost_ops->vhost_call(&net->dev, VHOST_RESET_OWNER,

> > > > -                                          NULL);

> > > > +                                          &file);

> > > >              assert(r >= 0);

> > > >          }

> > > >      }

> > > > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index

> > > > 27ba035..fb11d4c 100644

> > > > --- a/hw/virtio/vhost-user.c

> > > > +++ b/hw/virtio/vhost-user.c

> > > > @@ -219,7 +219,12 @@ static int vhost_user_call(struct vhost_dev

> > > > *dev,

> > > unsigned long int request,

> > > >          break;

> > > >

> > > >      case VHOST_USER_SET_OWNER:

> > > > +        break;

> > > > +

> > > >      case VHOST_USER_RESET_OWNER:

> > > > +        memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));

> > > > +        msg.state.index += dev->vq_index;

> > > > +        msg.size = sizeof(m.state);

> > > >          break;

> > > >

> > > >      case VHOST_USER_SET_MEM_TABLE:

> > > > @@ -262,17 +267,20 @@ static int vhost_user_call(struct vhost_dev

> > > > *dev,

> > > unsigned long int request,

> > > >      case VHOST_USER_SET_VRING_NUM:

> > > >      case VHOST_USER_SET_VRING_BASE:

> > > >          memcpy(&msg.state, arg, sizeof(struct

> > > > vhost_vring_state));

> > > > +        msg.state.index += dev->vq_index;

> > > >          msg.size = sizeof(m.state);

> > > >          break;

> > > >

> > > >      case VHOST_USER_GET_VRING_BASE:

> > > >          memcpy(&msg.state, arg, sizeof(struct

> > > > vhost_vring_state));

> > > > +        msg.state.index += dev->vq_index;

> > > >          msg.size = sizeof(m.state);

> > > >          need_reply = 1;

> > > >          break;

> > > >

> > > >      case VHOST_USER_SET_VRING_ADDR:

> > > >          memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr));

> > > > +        msg.addr.index += dev->vq_index;

> > > >          msg.size = sizeof(m.addr);

> > > >          break;

> > > >

> > > > @@ -280,7 +288,7 @@ static int vhost_user_call(struct vhost_dev

> > > > *dev,

> > > unsigned long int request,

> > > >      case VHOST_USER_SET_VRING_CALL:

> > > >      case VHOST_USER_SET_VRING_ERR:

> > > >          file = arg;

> > > > -        msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK;

> > > > +        msg.u64 = (file->index + dev->vq_index) &

> > > > + VHOST_USER_VRING_IDX_MASK;

> > > >          msg.size = sizeof(m.u64);

> > > >          if (ioeventfd_enabled() && file->fd > 0) {

> > > >              fds[fd_num++] = file->fd; @@ -322,6 +330,7 @@ static

> > > > int vhost_user_call(struct vhost_dev *dev, unsigned long int request,

> > > >                  error_report("Received bad msg size.");

> > > >                  return -1;

> > > >              }

> > > > +            msg.state.index -= dev->vq_index;

> > > >              memcpy(arg, &msg.state, sizeof(struct vhost_vring_state));

> > > >              break;

> > > >          default:

> > > > diff --git a/net/vhost-user.c b/net/vhost-user.c index

> > > > 1d86a2b..904d8af 100644

> > > > --- a/net/vhost-user.c

> > > > +++ b/net/vhost-user.c

> > > > @@ -121,35 +121,39 @@ static void net_vhost_user_event(void

> > > > *opaque,

> > > int event)

> > > >      case CHR_EVENT_OPENED:

> > > >          vhost_user_start(s);

> > > >          net_vhost_link_down(s, false);

> > > > -        error_report("chardev \"%s\" went up", s->chr->label);

> > > > +        error_report("chardev \"%s\" went up", s->nc.info_str);

> > > >          break;

> > > >      case CHR_EVENT_CLOSED:

> > > >          net_vhost_link_down(s, true);

> > > >          vhost_user_stop(s);

> > > > -        error_report("chardev \"%s\" went down", s->chr->label);

> > > > +        error_report("chardev \"%s\" went down", s->nc.info_str);

> > > >          break;

> > > >      }

> > > >  }

> 

> BTW this seems pretty hacky: you get multiple messages when one client

> connects. Why add multiple event listeners to the same chat device?

> 

> 

> > > >

> > > >  static int net_vhost_user_init(NetClientState *peer, const char *device,

> > > > -                               const char *name, CharDriverState *chr)

> > > > +                               const char *name, CharDriverState *chr,

> > > > +                               uint32_t queues)

> > > >  {

> > > >      NetClientState *nc;

> > > >      VhostUserState *s;

> > > > +    int i;

> > > >

> > > > -    nc = qemu_new_net_client(&net_vhost_user_info, peer, device,

> > > name);

> > > > +    for (i = 0; i < queues; i++) {

> > > > +        nc = qemu_new_net_client(&net_vhost_user_info, peer,

> > > > + device, name);

> > > >

> > > > -    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",

> > > > -             chr->label);

> > > > +        snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s",

> > > > +                 i, chr->label);

> > > >

> > > > -    s = DO_UPCAST(VhostUserState, nc, nc);

> > > > +        s = DO_UPCAST(VhostUserState, nc, nc);

> > > >

> > > > -    /* We don't provide a receive callback */

> > > > -    s->nc.receive_disabled = 1;

> > > > -    s->chr = chr;

> > > > -

> > > > -    qemu_chr_add_handlers(s->chr, NULL, NULL,

> net_vhost_user_event,

> > > s);

> > > > +        /* We don't provide a receive callback */

> > > > +        s->nc.receive_disabled = 1;

> > > > +        s->chr = chr;

> > > > +        s->nc.queue_index = i;

> > > >

> > > > +        qemu_chr_add_handlers(s->chr, NULL, NULL,

> > > > + net_vhost_user_event,

> > > s);

> > > > +    }

> > > >      return 0;

> > > >  }

> > > >

> > > > @@ -225,6 +229,7 @@ static int net_vhost_check_net(QemuOpts

> *opts,

> > > > void *opaque)

> > >

> > >

> > > There are two problems here:

> > >

> > > 1. we don't really know that the backend

> > >    is able to support the requested number of queues.

> > >    If not, everything will fail, silently.

> > >    A new message to query the # of queues could help, though

> > >    I'm not sure what can be done on failure. Fail connection?

> > >

> > > 2. each message (e.g. set memory table) is sent multiple times,

> > >    on the same socket.

> > >

> > I think it is tough to resolve these 2 comments, as the current

> > message is either vhost-dev based or virt-queue based, The multiple

> queues(pair) feature use multiple vhost-devs to implement itself.

> > For #1

> > So the queue number is something should be seen in the upper level of

> vhost-dev rather than inside the vhost-dev.

> > For each vhost-net, there are 2 virt-queues, one is for Rx the other is for Tx.

> > introduce the virt-queue pair number into the vhost-dev? But I don't

> > think it is good, as for each vhost-dev, there is only one virt-queue pair.

> >

> > Where should I put the virt-queue pair number to? I don't get the perfect

> answer till now. Any suggestion is welcome.

> >

> > Could we assume the vhost backend has the ability to create enough

> > virt-queue pair(e.g. 0x8000 is the max) if qemu require Vhost backend

> > to do it. If it is correct, we don't need get virt-queue pair number from

> vhost backend, as vhost backend can Create all virt-queue pair required by

> qemu.

> > The virtio frontend(on guest) has the flexibility to enable which virt-queue

> according to its own capability, qemu can do it by using

> > Set_vring_flag message to notify vhost backend.

> 

> I'm reluctant to agree to this. Implementations tend to get this wrong, e.g.

> they would only test with 2 queues and assume everything is OK.

> With an explicit message, this seems more robust.

> 

> Why is it so hard to implement?  User specifies queues=X.

> Can't we simply validate that backend supports this # of queues?


Yes exactly in the qemu, we use qeueus=X to specify the virt-queue pair number.
In my previous comments, I am talking about how to resolve such scenario: 
vhost backend specify the queues=Y(Y != X, e.g. X = 8, Y = 4), how vhost backend tell
qemu that it only has the capability of max virt-queue pair is 4, and qemu need update its
max queue number according to its own parameter and the max value from vhost back end.

Do you think whether this scenario makes sense or not?
If yes, then we should consider extend new message to allow vhost backend communicate 
Its virt-queue number to qemu.
More than that, either we need change codes to allow each net_client has more than 
1 virt-queue pair(==2 virt-queue, one for Rx, the other for Tx), 
Or another better solution for that? 

> 

> 

> > For #2

> > The memory table message is also vhost-dev based, it wouldn't hurt we

> > send it a few times, vhost backend could Keep it vhost-dev based too,

> > or keep it once(keep it when first time and ignore in rest messages

> > from the same connected-fd) Any other good suggestion is welcome too

> > :-)

> 

> Add code in vhost-user to skip sending the useless messages?  Yes they

> seem harmless but implementations tend to develop dependencies on such

> bugs, then we get to maintain them forever.


Considering we use multiple vhost_net(multiple net_client) to fulfill multiple virt-queue pairs.
So only the first virt-queue pair will send the set-memory-table, others won't do it.
Is that ok? so apparently each net client will have difference in message count.
The first net client will have the message of set memory table, others have not. 

Thanks
Changchun

> 

> > >

> > >

> > > >  int net_init_vhost_user(const NetClientOptions *opts, const char

> *name,

> > > >                          NetClientState *peer)  {

> > > > +    uint32_t queues;

> > > >      const NetdevVhostUserOptions *vhost_user_opts;

> > > >      CharDriverState *chr;

> > > >

> > > > @@ -243,6 +248,12 @@ int net_init_vhost_user(const

> > > > NetClientOptions

> > > *opts, const char *name,

> > > >          return -1;

> > > >      }

> > > >

> > > > +    /* number of queues for multiqueue */

> > > > +    if (vhost_user_opts->has_queues) {

> > > > +        queues = vhost_user_opts->queues;

> > > > +    } else {

> > > > +        queues = 1;

> > > > +    }

> > > >

> > > > -    return net_vhost_user_init(peer, "vhost_user", name, chr);

> > > > +    return net_vhost_user_init(peer, "vhost_user", name, chr,

> > > > + queues);

> > > >  }

> > > > diff --git a/qapi-schema.json b/qapi-schema.json index

> > > > f97ffa1..51e40ce 100644

> > > > --- a/qapi-schema.json

> > > > +++ b/qapi-schema.json

> > > > @@ -2444,12 +2444,16 @@

> > > >  #

> > > >  # @vhostforce: #optional vhost on for non-MSIX virtio guests (default:

> > > false).

> > > >  #

> > > > +# @queues: #optional number of queues to be created for

> > > > +multiqueue

> > > vhost-user

> > > > +#          (default: 1) (Since 2.5)

> > > > +#

> > > >  # Since 2.1

> > > >  ##

> > > >  { 'struct': 'NetdevVhostUserOptions',

> > > >    'data': {

> > > >      'chardev':        'str',

> > > > -    '*vhostforce':    'bool' } }

> > > > +    '*vhostforce':    'bool',

> > > > +    '*queues':        'uint32' } }

> > > >

> > > >  ##

> > > >  # @NetClientOptions

> > > > diff --git a/qemu-options.hx b/qemu-options.hx index

> > > > ec356f6..dad035e

> > > > 100644

> > > > --- a/qemu-options.hx

> > > > +++ b/qemu-options.hx

> > > > @@ -1942,13 +1942,14 @@ The hubport netdev lets you connect a NIC

> > > > to a QEMU "vlan" instead of a single  netdev.  @code{-net} and

> > > > @code{-device} with parameter @option{vlan} create the  required

> > > > hub

> > > automatically.

> > > >

> > > > -@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]

> > > > +@item -netdev

> > > > +vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]

> > > >

> > > >  Establish a vhost-user netdev, backed by a chardev @var{id}. The

> > > > chardev should  be a unix domain socket backed one. The vhost-user

> > > > uses a specifically defined  protocol to pass vhost ioctl

> > > > replacement messages to an application on the other  end of the

> > > > socket. On non-MSIX guests, the feature can be forced with -

> @var{vhostforce}.

> > > > +@var{vhostforce}. Use 'queues=@var{n}' to specify the number of

> > > > +queues to be created for multiqueue vhost-user.

> > > >

> > > >  Example:

> > > >  @example

> > > > --

> > > > 1.8.4.2

> > > >

> > >

> > > --

> > > You received this message because you are subscribed to the Google

> > > Groups "Snabb Switch development" group.

> > > To unsubscribe from this group and stop receiving emails from it,

> > > send an email to snabb-devel+unsubscribe@googlegroups.com.

> > > To post to this group, send an email to snabb-devel@googlegroups.com.

> > > Visit this group at http://groups.google.com/group/snabb-devel.

> 

> --

> You received this message because you are subscribed to the Google Groups

> "Snabb Switch development" group.

> To unsubscribe from this group and stop receiving emails from it, send an

> email to snabb-devel+unsubscribe@googlegroups.com.

> To post to this group, send an email to snabb-devel@googlegroups.com.

> Visit this group at http://groups.google.com/group/snabb-devel.
Michael S. Tsirkin Aug. 30, 2015, 6:16 a.m. UTC | #7
On Fri, Aug 28, 2015 at 01:53:39AM +0000, Ouyang, Changchun wrote:
> Hi Michael,
> 
> Thanks for your response!
> 
> > -----Original Message-----
> > From: snabb-devel@googlegroups.com [mailto:snabb-
> > devel@googlegroups.com] On Behalf Of Michael S. Tsirkin
> > Sent: Thursday, August 27, 2015 9:05 PM
> > To: Ouyang, Changchun
> > Cc: snabb-devel@googlegroups.com; qemu-devel@nongnu.org;
> > thibaut.collet@6wind.com; n.nikolaev@virtualopensystems.com;
> > luke@snabb.co; Long, Thomas; Liu, Yuanhan
> > Subject: Re: [snabb-devel] Re: [PATCH v6 1/2] vhost-user: add multi queue
> > support
> > 
> > On Tue, Aug 25, 2015 at 03:25:54AM +0000, Ouyang, Changchun wrote:
> > > Hi Michael,
> > >
> > > > -----Original Message-----
> > > > From: snabb-devel@googlegroups.com [mailto:snabb-
> > > > devel@googlegroups.com] On Behalf Of Michael S. Tsirkin
> > > > Sent: Thursday, August 13, 2015 5:19 PM
> > > > To: Ouyang, Changchun
> > > > Cc: qemu-devel@nongnu.org; snabb-devel@googlegroups.com;
> > > > thibaut.collet@6wind.com; n.nikolaev@virtualopensystems.com;
> > > > luke@snabb.co; Long, Thomas
> > > > Subject: [snabb-devel] Re: [PATCH v6 1/2] vhost-user: add multi
> > > > queue support
> > > >
> > > > On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:
> > > > > Based on patch by Nikolay Nikolaev:
> > > > > Vhost-user will implement the multi queue support in a similar way
> > > > > to what vhost already has - a separate thread for each queue.
> > > > > To enable the multi queue functionality - a new command line
> > > > > parameter "queues" is introduced for the vhost-user netdev.
> > > > >
> > > > > The RESET_OWNER change is based on commit:
> > > > >    294ce717e0f212ed0763307f3eab72b4a1bdf4d0
> > > > > If it is reverted, the patch need update for it accordingly.
> > > > >
> > > > > Signed-off-by: Nikolay Nikolaev
> > > > > <n.nikolaev@virtualopensystems.com>
> > > > > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > > > > ---
> > > > > Changes since v5:
> > > > >  - fix the message descption for VHOST_RESET_OWNER in vhost-user
> > > > > txt
> > > > >
> > > > > Changes since v4:
> > > > >  - remove the unnecessary trailing '\n'
> > > > >
> > > > > Changes since v3:
> > > > >  - fix one typo and wrap one long line
> > > > >
> > > > > Changes since v2:
> > > > >  - fix vq index issue for set_vring_call
> > > > >    When it is the case of VHOST_SET_VRING_CALL, The vq_index is
> > > > > not
> > > > initialized before it is used,
> > > > >    thus it could be a random value. The random value leads to
> > > > > crash in vhost
> > > > after passing down
> > > > >    to vhost, as vhost use this random value to index an array index.
> > > > >  - fix the typo in the doc and description
> > > > >  - address vq index for reset_owner
> > > > >
> > > > > Changes since v1:
> > > > >  - use s->nc.info_str when bringing up/down the backend
> > > > >
> > > > >  docs/specs/vhost-user.txt |  7 ++++++-
> > > > >  hw/net/vhost_net.c        |  3 ++-
> > > > >  hw/virtio/vhost-user.c    | 11 ++++++++++-
> > > > >  net/vhost-user.c          | 37 ++++++++++++++++++++++++-------------
> > > > >  qapi-schema.json          |  6 +++++-
> > > > >  qemu-options.hx           |  5 +++--
> > > > >  6 files changed, 50 insertions(+), 19 deletions(-)
> > > > >
> > > > > diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt
> > > > > index 70da3b1..9390f89 100644
> > > > > --- a/docs/specs/vhost-user.txt
> > > > > +++ b/docs/specs/vhost-user.txt
> > > > > @@ -135,6 +135,11 @@ As older slaves don't support negotiating
> > > > > protocol features,  a feature bit was dedicated for this purpose:
> > > > >  #define VHOST_USER_F_PROTOCOL_FEATURES 30
> > > > >
> > > > > +Multi queue support
> > > > > +-------------------
> > > > > +The protocol supports multiple queues by setting all index fields
> > > > > +in the sent messages to a properly calculated value.
> > > > > +
> > > > >  Message types
> > > > >  -------------
> > > > >
> > > > > @@ -198,7 +203,7 @@ Message types
> > > > >
> > > > >        Id: 4
> > > > >        Equivalent ioctl: VHOST_RESET_OWNER
> > > > > -      Master payload: N/A
> > > > > +      Master payload: vring state description
> > > > >
> > > > >        Issued when a new connection is about to be closed. The Master
> > will no
> > > > >        longer own this connection (and will usually close it).
> > > >
> > > > This is an interface change, isn't it?
> > > > We can't make it unconditionally, need to make it dependent on a
> > > > protocol flag.
> > 
> > 
> > Pls remember to fix this one.
> > 
> > > >
> > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index
> > > > > 1f25cb3..9cd6c05 100644
> > > > > --- a/hw/net/vhost_net.c
> > > > > +++ b/hw/net/vhost_net.c
> > > > > @@ -159,6 +159,7 @@ struct vhost_net
> > > > > *vhost_net_init(VhostNetOptions
> > > > > *options)
> > > > >
> > > > >      net->dev.nvqs = 2;
> > > > >      net->dev.vqs = net->vqs;
> > > > > +    net->dev.vq_index = net->nc->queue_index;
> > > > >
> > > > >      r = vhost_dev_init(&net->dev, options->opaque,
> > > > >                         options->backend_type, options->force); @@
> > > > > -269,7 +270,7 @@ static void vhost_net_stop_one(struct vhost_net
> > *net,
> > > > >          for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
> > > > >              const VhostOps *vhost_ops = net->dev.vhost_ops;
> > > > >              int r = vhost_ops->vhost_call(&net->dev, VHOST_RESET_OWNER,
> > > > > -                                          NULL);
> > > > > +                                          &file);
> > > > >              assert(r >= 0);
> > > > >          }
> > > > >      }
> > > > > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index
> > > > > 27ba035..fb11d4c 100644
> > > > > --- a/hw/virtio/vhost-user.c
> > > > > +++ b/hw/virtio/vhost-user.c
> > > > > @@ -219,7 +219,12 @@ static int vhost_user_call(struct vhost_dev
> > > > > *dev,
> > > > unsigned long int request,
> > > > >          break;
> > > > >
> > > > >      case VHOST_USER_SET_OWNER:
> > > > > +        break;
> > > > > +
> > > > >      case VHOST_USER_RESET_OWNER:
> > > > > +        memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
> > > > > +        msg.state.index += dev->vq_index;
> > > > > +        msg.size = sizeof(m.state);
> > > > >          break;
> > > > >
> > > > >      case VHOST_USER_SET_MEM_TABLE:
> > > > > @@ -262,17 +267,20 @@ static int vhost_user_call(struct vhost_dev
> > > > > *dev,
> > > > unsigned long int request,
> > > > >      case VHOST_USER_SET_VRING_NUM:
> > > > >      case VHOST_USER_SET_VRING_BASE:
> > > > >          memcpy(&msg.state, arg, sizeof(struct
> > > > > vhost_vring_state));
> > > > > +        msg.state.index += dev->vq_index;
> > > > >          msg.size = sizeof(m.state);
> > > > >          break;
> > > > >
> > > > >      case VHOST_USER_GET_VRING_BASE:
> > > > >          memcpy(&msg.state, arg, sizeof(struct
> > > > > vhost_vring_state));
> > > > > +        msg.state.index += dev->vq_index;
> > > > >          msg.size = sizeof(m.state);
> > > > >          need_reply = 1;
> > > > >          break;
> > > > >
> > > > >      case VHOST_USER_SET_VRING_ADDR:
> > > > >          memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr));
> > > > > +        msg.addr.index += dev->vq_index;
> > > > >          msg.size = sizeof(m.addr);
> > > > >          break;
> > > > >
> > > > > @@ -280,7 +288,7 @@ static int vhost_user_call(struct vhost_dev
> > > > > *dev,
> > > > unsigned long int request,
> > > > >      case VHOST_USER_SET_VRING_CALL:
> > > > >      case VHOST_USER_SET_VRING_ERR:
> > > > >          file = arg;
> > > > > -        msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
> > > > > +        msg.u64 = (file->index + dev->vq_index) &
> > > > > + VHOST_USER_VRING_IDX_MASK;
> > > > >          msg.size = sizeof(m.u64);
> > > > >          if (ioeventfd_enabled() && file->fd > 0) {
> > > > >              fds[fd_num++] = file->fd; @@ -322,6 +330,7 @@ static
> > > > > int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
> > > > >                  error_report("Received bad msg size.");
> > > > >                  return -1;
> > > > >              }
> > > > > +            msg.state.index -= dev->vq_index;
> > > > >              memcpy(arg, &msg.state, sizeof(struct vhost_vring_state));
> > > > >              break;
> > > > >          default:
> > > > > diff --git a/net/vhost-user.c b/net/vhost-user.c index
> > > > > 1d86a2b..904d8af 100644
> > > > > --- a/net/vhost-user.c
> > > > > +++ b/net/vhost-user.c
> > > > > @@ -121,35 +121,39 @@ static void net_vhost_user_event(void
> > > > > *opaque,
> > > > int event)
> > > > >      case CHR_EVENT_OPENED:
> > > > >          vhost_user_start(s);
> > > > >          net_vhost_link_down(s, false);
> > > > > -        error_report("chardev \"%s\" went up", s->chr->label);
> > > > > +        error_report("chardev \"%s\" went up", s->nc.info_str);
> > > > >          break;
> > > > >      case CHR_EVENT_CLOSED:
> > > > >          net_vhost_link_down(s, true);
> > > > >          vhost_user_stop(s);
> > > > > -        error_report("chardev \"%s\" went down", s->chr->label);
> > > > > +        error_report("chardev \"%s\" went down", s->nc.info_str);
> > > > >          break;
> > > > >      }
> > > > >  }
> > 
> > BTW this seems pretty hacky: you get multiple messages when one client
> > connects. Why add multiple event listeners to the same chat device?
> > 
> > 
> > > > >
> > > > >  static int net_vhost_user_init(NetClientState *peer, const char *device,
> > > > > -                               const char *name, CharDriverState *chr)
> > > > > +                               const char *name, CharDriverState *chr,
> > > > > +                               uint32_t queues)
> > > > >  {
> > > > >      NetClientState *nc;
> > > > >      VhostUserState *s;
> > > > > +    int i;
> > > > >
> > > > > -    nc = qemu_new_net_client(&net_vhost_user_info, peer, device,
> > > > name);
> > > > > +    for (i = 0; i < queues; i++) {
> > > > > +        nc = qemu_new_net_client(&net_vhost_user_info, peer,
> > > > > + device, name);
> > > > >
> > > > > -    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",
> > > > > -             chr->label);
> > > > > +        snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s",
> > > > > +                 i, chr->label);
> > > > >
> > > > > -    s = DO_UPCAST(VhostUserState, nc, nc);
> > > > > +        s = DO_UPCAST(VhostUserState, nc, nc);
> > > > >
> > > > > -    /* We don't provide a receive callback */
> > > > > -    s->nc.receive_disabled = 1;
> > > > > -    s->chr = chr;
> > > > > -
> > > > > -    qemu_chr_add_handlers(s->chr, NULL, NULL,
> > net_vhost_user_event,
> > > > s);
> > > > > +        /* We don't provide a receive callback */
> > > > > +        s->nc.receive_disabled = 1;
> > > > > +        s->chr = chr;
> > > > > +        s->nc.queue_index = i;
> > > > >
> > > > > +        qemu_chr_add_handlers(s->chr, NULL, NULL,
> > > > > + net_vhost_user_event,
> > > > s);
> > > > > +    }
> > > > >      return 0;
> > > > >  }
> > > > >
> > > > > @@ -225,6 +229,7 @@ static int net_vhost_check_net(QemuOpts
> > *opts,
> > > > > void *opaque)
> > > >
> > > >
> > > > There are two problems here:
> > > >
> > > > 1. we don't really know that the backend
> > > >    is able to support the requested number of queues.
> > > >    If not, everything will fail, silently.
> > > >    A new message to query the # of queues could help, though
> > > >    I'm not sure what can be done on failure. Fail connection?
> > > >
> > > > 2. each message (e.g. set memory table) is sent multiple times,
> > > >    on the same socket.
> > > >
> > > I think it is tough to resolve these 2 comments, as the current
> > > message is either vhost-dev based or virt-queue based, The multiple
> > queues(pair) feature use multiple vhost-devs to implement itself.
> > > For #1
> > > So the queue number is something should be seen in the upper level of
> > vhost-dev rather than inside the vhost-dev.
> > > For each vhost-net, there are 2 virt-queues, one is for Rx the other is for Tx.
> > > introduce the virt-queue pair number into the vhost-dev? But I don't
> > > think it is good, as for each vhost-dev, there is only one virt-queue pair.
> > >
> > > Where should I put the virt-queue pair number to? I don't get the perfect
> > answer till now. Any suggestion is welcome.
> > >
> > > Could we assume the vhost backend has the ability to create enough
> > > virt-queue pair(e.g. 0x8000 is the max) if qemu require Vhost backend
> > > to do it. If it is correct, we don't need get virt-queue pair number from
> > vhost backend, as vhost backend can Create all virt-queue pair required by
> > qemu.
> > > The virtio frontend(on guest) has the flexibility to enable which virt-queue
> > according to its own capability, qemu can do it by using
> > > Set_vring_flag message to notify vhost backend.
> > 
> > I'm reluctant to agree to this. Implementations tend to get this wrong, e.g.
> > they would only test with 2 queues and assume everything is OK.
> > With an explicit message, this seems more robust.
> > 
> > Why is it so hard to implement?  User specifies queues=X.
> > Can't we simply validate that backend supports this # of queues?
> 
> Yes exactly in the qemu, we use qeueus=X to specify the virt-queue pair number.
> In my previous comments, I am talking about how to resolve such scenario: 
> vhost backend specify the queues=Y(Y != X, e.g. X = 8, Y = 4), how vhost backend tell
> qemu that it only has the capability of max virt-queue pair is 4, and qemu need update its
> max queue number according to its own parameter and the max value from vhost back end.
> 
> Do you think whether this scenario makes sense or not?

No, I think that if we can't support the number requested by user,
QEMU can simply exit.

> If yes, then we should consider extend new message to allow vhost backend communicate 
> Its virt-queue number to qemu.

Yes, conditional on the mq protocol flag.

> More than that, either we need change codes to allow each net_client has more than 
> 1 virt-queue pair(==2 virt-queue, one for Rx, the other for Tx), 
> Or another better solution for that? 

I'm not sure why is that required for the above.

> > 
> > 
> > > For #2
> > > The memory table message is also vhost-dev based, it wouldn't hurt we
> > > send it a few times, vhost backend could Keep it vhost-dev based too,
> > > or keep it once(keep it when first time and ignore in rest messages
> > > from the same connected-fd) Any other good suggestion is welcome too
> > > :-)
> > 
> > Add code in vhost-user to skip sending the useless messages?  Yes they
> > seem harmless but implementations tend to develop dependencies on such
> > bugs, then we get to maintain them forever.
> 
> Considering we use multiple vhost_net(multiple net_client) to fulfill multiple virt-queue pairs.
> So only the first virt-queue pair will send the set-memory-table, others won't do it.
> Is that ok? so apparently each net client will have difference in message count.
> The first net client will have the message of set memory table, others have not. 
> 
> Thanks
> Changchun

But all these clients use the same unix domain socket, correct?
If so, I don't think it matters that internally in qemu they
use different net clients. From the external point of view,
it looks like a single message is sent on a single socket.

> > 
> > > >
> > > >
> > > > >  int net_init_vhost_user(const NetClientOptions *opts, const char
> > *name,
> > > > >                          NetClientState *peer)  {
> > > > > +    uint32_t queues;
> > > > >      const NetdevVhostUserOptions *vhost_user_opts;
> > > > >      CharDriverState *chr;
> > > > >
> > > > > @@ -243,6 +248,12 @@ int net_init_vhost_user(const
> > > > > NetClientOptions
> > > > *opts, const char *name,
> > > > >          return -1;
> > > > >      }
> > > > >
> > > > > +    /* number of queues for multiqueue */
> > > > > +    if (vhost_user_opts->has_queues) {
> > > > > +        queues = vhost_user_opts->queues;
> > > > > +    } else {
> > > > > +        queues = 1;
> > > > > +    }
> > > > >
> > > > > -    return net_vhost_user_init(peer, "vhost_user", name, chr);
> > > > > +    return net_vhost_user_init(peer, "vhost_user", name, chr,
> > > > > + queues);
> > > > >  }
> > > > > diff --git a/qapi-schema.json b/qapi-schema.json index
> > > > > f97ffa1..51e40ce 100644
> > > > > --- a/qapi-schema.json
> > > > > +++ b/qapi-schema.json
> > > > > @@ -2444,12 +2444,16 @@
> > > > >  #
> > > > >  # @vhostforce: #optional vhost on for non-MSIX virtio guests (default:
> > > > false).
> > > > >  #
> > > > > +# @queues: #optional number of queues to be created for
> > > > > +multiqueue
> > > > vhost-user
> > > > > +#          (default: 1) (Since 2.5)
> > > > > +#
> > > > >  # Since 2.1
> > > > >  ##
> > > > >  { 'struct': 'NetdevVhostUserOptions',
> > > > >    'data': {
> > > > >      'chardev':        'str',
> > > > > -    '*vhostforce':    'bool' } }
> > > > > +    '*vhostforce':    'bool',
> > > > > +    '*queues':        'uint32' } }
> > > > >
> > > > >  ##
> > > > >  # @NetClientOptions
> > > > > diff --git a/qemu-options.hx b/qemu-options.hx index
> > > > > ec356f6..dad035e
> > > > > 100644
> > > > > --- a/qemu-options.hx
> > > > > +++ b/qemu-options.hx
> > > > > @@ -1942,13 +1942,14 @@ The hubport netdev lets you connect a NIC
> > > > > to a QEMU "vlan" instead of a single  netdev.  @code{-net} and
> > > > > @code{-device} with parameter @option{vlan} create the  required
> > > > > hub
> > > > automatically.
> > > > >
> > > > > -@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
> > > > > +@item -netdev
> > > > > +vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]
> > > > >
> > > > >  Establish a vhost-user netdev, backed by a chardev @var{id}. The
> > > > > chardev should  be a unix domain socket backed one. The vhost-user
> > > > > uses a specifically defined  protocol to pass vhost ioctl
> > > > > replacement messages to an application on the other  end of the
> > > > > socket. On non-MSIX guests, the feature can be forced with -
> > @var{vhostforce}.
> > > > > +@var{vhostforce}. Use 'queues=@var{n}' to specify the number of
> > > > > +queues to be created for multiqueue vhost-user.
> > > > >
> > > > >  Example:
> > > > >  @example
> > > > > --
> > > > > 1.8.4.2
> > > > >
> > > >
> > > > --
> > > > You received this message because you are subscribed to the Google
> > > > Groups "Snabb Switch development" group.
> > > > To unsubscribe from this group and stop receiving emails from it,
> > > > send an email to snabb-devel+unsubscribe@googlegroups.com.
> > > > To post to this group, send an email to snabb-devel@googlegroups.com.
> > > > Visit this group at http://groups.google.com/group/snabb-devel.
> > 
> > --
> > You received this message because you are subscribed to the Google Groups
> > "Snabb Switch development" group.
> > To unsubscribe from this group and stop receiving emails from it, send an
> > email to snabb-devel+unsubscribe@googlegroups.com.
> > To post to this group, send an email to snabb-devel@googlegroups.com.
> > Visit this group at http://groups.google.com/group/snabb-devel.
Ouyang, Changchun Aug. 31, 2015, 8:29 a.m. UTC | #8
> -----Original Message-----
> From: Michael S. Tsirkin [mailto:mst@redhat.com]
> Sent: Sunday, August 30, 2015 2:16 PM
> To: Ouyang, Changchun
> Cc: snabb-devel@googlegroups.com; qemu-devel@nongnu.org;
> thibaut.collet@6wind.com; n.nikolaev@virtualopensystems.com;
> luke@snabb.co; Long, Thomas; Liu, Yuanhan
> Subject: Re: [snabb-devel] Re: [PATCH v6 1/2] vhost-user: add multi queue
> support
> 
> On Fri, Aug 28, 2015 at 01:53:39AM +0000, Ouyang, Changchun wrote:
> > Hi Michael,
> >
> > Thanks for your response!
> >
> > > -----Original Message-----
> > > From: snabb-devel@googlegroups.com [mailto:snabb-
> > > devel@googlegroups.com] On Behalf Of Michael S. Tsirkin
> > > Sent: Thursday, August 27, 2015 9:05 PM
> > > To: Ouyang, Changchun
> > > Cc: snabb-devel@googlegroups.com; qemu-devel@nongnu.org;
> > > thibaut.collet@6wind.com; n.nikolaev@virtualopensystems.com;
> > > luke@snabb.co; Long, Thomas; Liu, Yuanhan
> > > Subject: Re: [snabb-devel] Re: [PATCH v6 1/2] vhost-user: add multi
> > > queue support
> > >
> > > On Tue, Aug 25, 2015 at 03:25:54AM +0000, Ouyang, Changchun wrote:
> > > > Hi Michael,
> > > >
> > > > > -----Original Message-----
> > > > > From: snabb-devel@googlegroups.com [mailto:snabb-
> > > > > devel@googlegroups.com] On Behalf Of Michael S. Tsirkin
> > > > > Sent: Thursday, August 13, 2015 5:19 PM
> > > > > To: Ouyang, Changchun
> > > > > Cc: qemu-devel@nongnu.org; snabb-devel@googlegroups.com;
> > > > > thibaut.collet@6wind.com; n.nikolaev@virtualopensystems.com;
> > > > > luke@snabb.co; Long, Thomas
> > > > > Subject: [snabb-devel] Re: [PATCH v6 1/2] vhost-user: add multi
> > > > > queue support
> > > > >
> > > > > On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:
> > > > > > Based on patch by Nikolay Nikolaev:
> > > > > > Vhost-user will implement the multi queue support in a similar
> > > > > > way to what vhost already has - a separate thread for each queue.
> > > > > > To enable the multi queue functionality - a new command line
> > > > > > parameter "queues" is introduced for the vhost-user netdev.
> > > > > >
> > > > > > The RESET_OWNER change is based on commit:
> > > > > >    294ce717e0f212ed0763307f3eab72b4a1bdf4d0
> > > > > > If it is reverted, the patch need update for it accordingly.
> > > > > >
> > > > > > Signed-off-by: Nikolay Nikolaev
> > > > > > <n.nikolaev@virtualopensystems.com>
> > > > > > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > > > > > ---
> > > > > > Changes since v5:
> > > > > >  - fix the message descption for VHOST_RESET_OWNER in
> > > > > > vhost-user txt
> > > > > >
> > > > > > Changes since v4:
> > > > > >  - remove the unnecessary trailing '\n'
> > > > > >
> > > > > > Changes since v3:
> > > > > >  - fix one typo and wrap one long line
> > > > > >
> > > > > > Changes since v2:
> > > > > >  - fix vq index issue for set_vring_call
> > > > > >    When it is the case of VHOST_SET_VRING_CALL, The vq_index
> > > > > > is not
> > > > > initialized before it is used,
> > > > > >    thus it could be a random value. The random value leads to
> > > > > > crash in vhost
> > > > > after passing down
> > > > > >    to vhost, as vhost use this random value to index an array index.
> > > > > >  - fix the typo in the doc and description
> > > > > >  - address vq index for reset_owner
> > > > > >
> > > > > > Changes since v1:
> > > > > >  - use s->nc.info_str when bringing up/down the backend
> > > > > >
> > > > > >  docs/specs/vhost-user.txt |  7 ++++++-
> > > > > >  hw/net/vhost_net.c        |  3 ++-
> > > > > >  hw/virtio/vhost-user.c    | 11 ++++++++++-
> > > > > >  net/vhost-user.c          | 37 ++++++++++++++++++++++++------------
> -
> > > > > >  qapi-schema.json          |  6 +++++-
> > > > > >  qemu-options.hx           |  5 +++--
> > > > > >  6 files changed, 50 insertions(+), 19 deletions(-)
> > > > > >
> > > > > > diff --git a/docs/specs/vhost-user.txt
> > > > > > b/docs/specs/vhost-user.txt index 70da3b1..9390f89 100644
> > > > > > --- a/docs/specs/vhost-user.txt
> > > > > > +++ b/docs/specs/vhost-user.txt
> > > > > > @@ -135,6 +135,11 @@ As older slaves don't support negotiating
> > > > > > protocol features,  a feature bit was dedicated for this purpose:
> > > > > >  #define VHOST_USER_F_PROTOCOL_FEATURES 30
> > > > > >
> > > > > > +Multi queue support
> > > > > > +-------------------
> > > > > > +The protocol supports multiple queues by setting all index
> > > > > > +fields in the sent messages to a properly calculated value.
> > > > > > +
> > > > > >  Message types
> > > > > >  -------------
> > > > > >
> > > > > > @@ -198,7 +203,7 @@ Message types
> > > > > >
> > > > > >        Id: 4
> > > > > >        Equivalent ioctl: VHOST_RESET_OWNER
> > > > > > -      Master payload: N/A
> > > > > > +      Master payload: vring state description
> > > > > >
> > > > > >        Issued when a new connection is about to be closed. The
> > > > > > Master
> > > will no
> > > > > >        longer own this connection (and will usually close it).
> > > > >
> > > > > This is an interface change, isn't it?
> > > > > We can't make it unconditionally, need to make it dependent on a
> > > > > protocol flag.
> > >
> > >
> > > Pls remember to fix this one.
> > >
> > > > >
> > > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index
> > > > > > 1f25cb3..9cd6c05 100644
> > > > > > --- a/hw/net/vhost_net.c
> > > > > > +++ b/hw/net/vhost_net.c
> > > > > > @@ -159,6 +159,7 @@ struct vhost_net
> > > > > > *vhost_net_init(VhostNetOptions
> > > > > > *options)
> > > > > >
> > > > > >      net->dev.nvqs = 2;
> > > > > >      net->dev.vqs = net->vqs;
> > > > > > +    net->dev.vq_index = net->nc->queue_index;
> > > > > >
> > > > > >      r = vhost_dev_init(&net->dev, options->opaque,
> > > > > >                         options->backend_type,
> > > > > > options->force); @@
> > > > > > -269,7 +270,7 @@ static void vhost_net_stop_one(struct
> > > > > > vhost_net
> > > *net,
> > > > > >          for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
> > > > > >              const VhostOps *vhost_ops = net->dev.vhost_ops;
> > > > > >              int r = vhost_ops->vhost_call(&net->dev,
> VHOST_RESET_OWNER,
> > > > > > -                                          NULL);
> > > > > > +                                          &file);
> > > > > >              assert(r >= 0);
> > > > > >          }
> > > > > >      }
> > > > > > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> > > > > > index 27ba035..fb11d4c 100644
> > > > > > --- a/hw/virtio/vhost-user.c
> > > > > > +++ b/hw/virtio/vhost-user.c
> > > > > > @@ -219,7 +219,12 @@ static int vhost_user_call(struct
> > > > > > vhost_dev *dev,
> > > > > unsigned long int request,
> > > > > >          break;
> > > > > >
> > > > > >      case VHOST_USER_SET_OWNER:
> > > > > > +        break;
> > > > > > +
> > > > > >      case VHOST_USER_RESET_OWNER:
> > > > > > +        memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
> > > > > > +        msg.state.index += dev->vq_index;
> > > > > > +        msg.size = sizeof(m.state);
> > > > > >          break;
> > > > > >
> > > > > >      case VHOST_USER_SET_MEM_TABLE:
> > > > > > @@ -262,17 +267,20 @@ static int vhost_user_call(struct
> > > > > > vhost_dev *dev,
> > > > > unsigned long int request,
> > > > > >      case VHOST_USER_SET_VRING_NUM:
> > > > > >      case VHOST_USER_SET_VRING_BASE:
> > > > > >          memcpy(&msg.state, arg, sizeof(struct
> > > > > > vhost_vring_state));
> > > > > > +        msg.state.index += dev->vq_index;
> > > > > >          msg.size = sizeof(m.state);
> > > > > >          break;
> > > > > >
> > > > > >      case VHOST_USER_GET_VRING_BASE:
> > > > > >          memcpy(&msg.state, arg, sizeof(struct
> > > > > > vhost_vring_state));
> > > > > > +        msg.state.index += dev->vq_index;
> > > > > >          msg.size = sizeof(m.state);
> > > > > >          need_reply = 1;
> > > > > >          break;
> > > > > >
> > > > > >      case VHOST_USER_SET_VRING_ADDR:
> > > > > >          memcpy(&msg.addr, arg, sizeof(struct
> > > > > > vhost_vring_addr));
> > > > > > +        msg.addr.index += dev->vq_index;
> > > > > >          msg.size = sizeof(m.addr);
> > > > > >          break;
> > > > > >
> > > > > > @@ -280,7 +288,7 @@ static int vhost_user_call(struct
> > > > > > vhost_dev *dev,
> > > > > unsigned long int request,
> > > > > >      case VHOST_USER_SET_VRING_CALL:
> > > > > >      case VHOST_USER_SET_VRING_ERR:
> > > > > >          file = arg;
> > > > > > -        msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
> > > > > > +        msg.u64 = (file->index + dev->vq_index) &
> > > > > > + VHOST_USER_VRING_IDX_MASK;
> > > > > >          msg.size = sizeof(m.u64);
> > > > > >          if (ioeventfd_enabled() && file->fd > 0) {
> > > > > >              fds[fd_num++] = file->fd; @@ -322,6 +330,7 @@
> > > > > > static int vhost_user_call(struct vhost_dev *dev, unsigned long int
> request,
> > > > > >                  error_report("Received bad msg size.");
> > > > > >                  return -1;
> > > > > >              }
> > > > > > +            msg.state.index -= dev->vq_index;
> > > > > >              memcpy(arg, &msg.state, sizeof(struct vhost_vring_state));
> > > > > >              break;
> > > > > >          default:
> > > > > > diff --git a/net/vhost-user.c b/net/vhost-user.c index
> > > > > > 1d86a2b..904d8af 100644
> > > > > > --- a/net/vhost-user.c
> > > > > > +++ b/net/vhost-user.c
> > > > > > @@ -121,35 +121,39 @@ static void net_vhost_user_event(void
> > > > > > *opaque,
> > > > > int event)
> > > > > >      case CHR_EVENT_OPENED:
> > > > > >          vhost_user_start(s);
> > > > > >          net_vhost_link_down(s, false);
> > > > > > -        error_report("chardev \"%s\" went up", s->chr->label);
> > > > > > +        error_report("chardev \"%s\" went up",
> > > > > > + s->nc.info_str);
> > > > > >          break;
> > > > > >      case CHR_EVENT_CLOSED:
> > > > > >          net_vhost_link_down(s, true);
> > > > > >          vhost_user_stop(s);
> > > > > > -        error_report("chardev \"%s\" went down", s->chr->label);
> > > > > > +        error_report("chardev \"%s\" went down",
> > > > > > + s->nc.info_str);
> > > > > >          break;
> > > > > >      }
> > > > > >  }
> > >
> > > BTW this seems pretty hacky: you get multiple messages when one
> > > client connects. Why add multiple event listeners to the same chat device?
> > >
> > >
> > > > > >
> > > > > >  static int net_vhost_user_init(NetClientState *peer, const char
> *device,
> > > > > > -                               const char *name, CharDriverState *chr)
> > > > > > +                               const char *name, CharDriverState *chr,
> > > > > > +                               uint32_t queues)
> > > > > >  {
> > > > > >      NetClientState *nc;
> > > > > >      VhostUserState *s;
> > > > > > +    int i;
> > > > > >
> > > > > > -    nc = qemu_new_net_client(&net_vhost_user_info, peer, device,
> > > > > name);
> > > > > > +    for (i = 0; i < queues; i++) {
> > > > > > +        nc = qemu_new_net_client(&net_vhost_user_info, peer,
> > > > > > + device, name);
> > > > > >
> > > > > > -    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",
> > > > > > -             chr->label);
> > > > > > +        snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d
> to %s",
> > > > > > +                 i, chr->label);
> > > > > >
> > > > > > -    s = DO_UPCAST(VhostUserState, nc, nc);
> > > > > > +        s = DO_UPCAST(VhostUserState, nc, nc);
> > > > > >
> > > > > > -    /* We don't provide a receive callback */
> > > > > > -    s->nc.receive_disabled = 1;
> > > > > > -    s->chr = chr;
> > > > > > -
> > > > > > -    qemu_chr_add_handlers(s->chr, NULL, NULL,
> > > net_vhost_user_event,
> > > > > s);
> > > > > > +        /* We don't provide a receive callback */
> > > > > > +        s->nc.receive_disabled = 1;
> > > > > > +        s->chr = chr;
> > > > > > +        s->nc.queue_index = i;
> > > > > >
> > > > > > +        qemu_chr_add_handlers(s->chr, NULL, NULL,
> > > > > > + net_vhost_user_event,
> > > > > s);
> > > > > > +    }
> > > > > >      return 0;
> > > > > >  }
> > > > > >
> > > > > > @@ -225,6 +229,7 @@ static int net_vhost_check_net(QemuOpts
> > > *opts,
> > > > > > void *opaque)
> > > > >
> > > > >
> > > > > There are two problems here:
> > > > >
> > > > > 1. we don't really know that the backend
> > > > >    is able to support the requested number of queues.
> > > > >    If not, everything will fail, silently.
> > > > >    A new message to query the # of queues could help, though
> > > > >    I'm not sure what can be done on failure. Fail connection?
> > > > >
> > > > > 2. each message (e.g. set memory table) is sent multiple times,
> > > > >    on the same socket.
> > > > >
> > > > I think it is tough to resolve these 2 comments, as the current
> > > > message is either vhost-dev based or virt-queue based, The
> > > > multiple
> > > queues(pair) feature use multiple vhost-devs to implement itself.
> > > > For #1
> > > > So the queue number is something should be seen in the upper level
> > > > of
> > > vhost-dev rather than inside the vhost-dev.
> > > > For each vhost-net, there are 2 virt-queues, one is for Rx the other is
> for Tx.
> > > > introduce the virt-queue pair number into the vhost-dev? But I
> > > > don't think it is good, as for each vhost-dev, there is only one virt-
> queue pair.
> > > >
> > > > Where should I put the virt-queue pair number to? I don't get the
> > > > perfect
> > > answer till now. Any suggestion is welcome.
> > > >
> > > > Could we assume the vhost backend has the ability to create enough
> > > > virt-queue pair(e.g. 0x8000 is the max) if qemu require Vhost
> > > > backend to do it. If it is correct, we don't need get virt-queue
> > > > pair number from
> > > vhost backend, as vhost backend can Create all virt-queue pair
> > > required by qemu.
> > > > The virtio frontend(on guest) has the flexibility to enable which
> > > > virt-queue
> > > according to its own capability, qemu can do it by using
> > > > Set_vring_flag message to notify vhost backend.
> > >
> > > I'm reluctant to agree to this. Implementations tend to get this wrong, e.g.
> > > they would only test with 2 queues and assume everything is OK.
> > > With an explicit message, this seems more robust.
> > >
> > > Why is it so hard to implement?  User specifies queues=X.
> > > Can't we simply validate that backend supports this # of queues?
> >
> > Yes exactly in the qemu, we use qeueus=X to specify the virt-queue pair
> number.
> > In my previous comments, I am talking about how to resolve such scenario:
> > vhost backend specify the queues=Y(Y != X, e.g. X = 8, Y = 4), how
> > vhost backend tell qemu that it only has the capability of max
> > virt-queue pair is 4, and qemu need update its max queue number
> according to its own parameter and the max value from vhost back end.
> >
> > Do you think whether this scenario makes sense or not?
> 
> No, I think that if we can't support the number requested by user, QEMU can
> simply exit.

Well, both values are input from user, for some unknown reason, they are different.
When startup the vhost backend, user specify 4 queues, like:
Vhost-user ...  -queues 4      # Y=4
When startup the guest, user specify 8 queues, like:
Qemu-kvm ... -queues 8      # X=8

Vhost backend can't support the queues number as much as the guest/qemu requires,
I think you mean we should let qemu exit in such a case, right?   

Then how about the other case:
When startup the vhost backend, user specify 8 queues, like:
Vhost-user ...  -queues 8      # Y=8
When startup the guest, user specify 4 queues, like:
Qemu-kvm ... -queues 4      # X=4

Vhost backend CAN support the queues number as much as the guest/qemu requires,
And qemu don't want enable all of them(8 in the case), just part of them(only 4 from 8).
Will qemu exit or go forward(of course with 4 queues)? 

> 
> > If yes, then we should consider extend new message to allow vhost
> > backend communicate Its virt-queue number to qemu.
> 
> Yes, conditional on the mq protocol flag.
> 
> > More than that, either we need change codes to allow each net_client
> > has more than
> > 1 virt-queue pair(==2 virt-queue, one for Rx, the other for Tx), Or
> > another better solution for that?
> 
> I'm not sure why is that required for the above.
> 
> > >
> > >
> > > > For #2
> > > > The memory table message is also vhost-dev based, it wouldn't hurt
> > > > we send it a few times, vhost backend could Keep it vhost-dev
> > > > based too, or keep it once(keep it when first time and ignore in
> > > > rest messages from the same connected-fd) Any other good
> > > > suggestion is welcome too
> > > > :-)
> > >
> > > Add code in vhost-user to skip sending the useless messages?  Yes
> > > they seem harmless but implementations tend to develop dependencies
> > > on such bugs, then we get to maintain them forever.
> >
> > Considering we use multiple vhost_net(multiple net_client) to fulfill
> multiple virt-queue pairs.
> > So only the first virt-queue pair will send the set-memory-table, others
> won't do it.
> > Is that ok? so apparently each net client will have difference in message
> count.
> > The first net client will have the message of set memory table, others have
> not.
> >
> > Thanks
> > Changchun
> 
> But all these clients use the same unix domain socket, correct?
Yes, correct

> If so, I don't think it matters that internally in qemu they use different net
> clients. From the external point of view, it looks like a single message is sent
> on a single socket.
> 
> > >
> > > > >
> > > > >
> > > > > >  int net_init_vhost_user(const NetClientOptions *opts, const
> > > > > > char
> > > *name,
> > > > > >                          NetClientState *peer)  {
> > > > > > +    uint32_t queues;
> > > > > >      const NetdevVhostUserOptions *vhost_user_opts;
> > > > > >      CharDriverState *chr;
> > > > > >
> > > > > > @@ -243,6 +248,12 @@ int net_init_vhost_user(const
> > > > > > NetClientOptions
> > > > > *opts, const char *name,
> > > > > >          return -1;
> > > > > >      }
> > > > > >
> > > > > > +    /* number of queues for multiqueue */
> > > > > > +    if (vhost_user_opts->has_queues) {
> > > > > > +        queues = vhost_user_opts->queues;
> > > > > > +    } else {
> > > > > > +        queues = 1;
> > > > > > +    }
> > > > > >
> > > > > > -    return net_vhost_user_init(peer, "vhost_user", name, chr);
> > > > > > +    return net_vhost_user_init(peer, "vhost_user", name, chr,
> > > > > > + queues);
> > > > > >  }
> > > > > > diff --git a/qapi-schema.json b/qapi-schema.json index
> > > > > > f97ffa1..51e40ce 100644
> > > > > > --- a/qapi-schema.json
> > > > > > +++ b/qapi-schema.json
> > > > > > @@ -2444,12 +2444,16 @@
> > > > > >  #
> > > > > >  # @vhostforce: #optional vhost on for non-MSIX virtio guests
> (default:
> > > > > false).
> > > > > >  #
> > > > > > +# @queues: #optional number of queues to be created for
> > > > > > +multiqueue
> > > > > vhost-user
> > > > > > +#          (default: 1) (Since 2.5)
> > > > > > +#
> > > > > >  # Since 2.1
> > > > > >  ##
> > > > > >  { 'struct': 'NetdevVhostUserOptions',
> > > > > >    'data': {
> > > > > >      'chardev':        'str',
> > > > > > -    '*vhostforce':    'bool' } }
> > > > > > +    '*vhostforce':    'bool',
> > > > > > +    '*queues':        'uint32' } }
> > > > > >
> > > > > >  ##
> > > > > >  # @NetClientOptions
> > > > > > diff --git a/qemu-options.hx b/qemu-options.hx index
> > > > > > ec356f6..dad035e
> > > > > > 100644
> > > > > > --- a/qemu-options.hx
> > > > > > +++ b/qemu-options.hx
> > > > > > @@ -1942,13 +1942,14 @@ The hubport netdev lets you connect a
> > > > > > NIC to a QEMU "vlan" instead of a single  netdev.  @code{-net}
> > > > > > and @code{-device} with parameter @option{vlan} create the
> > > > > > required hub
> > > > > automatically.
> > > > > >
> > > > > > -@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
> > > > > > +@item -netdev
> > > > > > +vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]
> > > > > >
> > > > > >  Establish a vhost-user netdev, backed by a chardev @var{id}.
> > > > > > The chardev should  be a unix domain socket backed one. The
> > > > > > vhost-user uses a specifically defined  protocol to pass vhost
> > > > > > ioctl replacement messages to an application on the other  end
> > > > > > of the socket. On non-MSIX guests, the feature can be forced
> > > > > > with -
> > > @var{vhostforce}.
> > > > > > +@var{vhostforce}. Use 'queues=@var{n}' to specify the number
> > > > > > +of queues to be created for multiqueue vhost-user.
> > > > > >
> > > > > >  Example:
> > > > > >  @example
> > > > > > --
> > > > > > 1.8.4.2
> > > > > >
> > > > >
> > > > > --
> > > > > You received this message because you are subscribed to the
> > > > > Google Groups "Snabb Switch development" group.
> > > > > To unsubscribe from this group and stop receiving emails from
> > > > > it, send an email to snabb-devel+unsubscribe@googlegroups.com.
> > > > > To post to this group, send an email to snabb-
> devel@googlegroups.com.
> > > > > Visit this group at http://groups.google.com/group/snabb-devel.
> > >
> > > --
> > > You received this message because you are subscribed to the Google
> > > Groups "Snabb Switch development" group.
> > > To unsubscribe from this group and stop receiving emails from it,
> > > send an email to snabb-devel+unsubscribe@googlegroups.com.
> > > To post to this group, send an email to snabb-devel@googlegroups.com.
> > > Visit this group at http://groups.google.com/group/snabb-devel.
Michael S. Tsirkin Aug. 31, 2015, 11:30 a.m. UTC | #9
On Mon, Aug 31, 2015 at 08:29:39AM +0000, Ouyang, Changchun wrote:
> 
> 
> > -----Original Message-----
> > From: Michael S. Tsirkin [mailto:mst@redhat.com]
> > Sent: Sunday, August 30, 2015 2:16 PM
> > To: Ouyang, Changchun
> > Cc: snabb-devel@googlegroups.com; qemu-devel@nongnu.org;
> > thibaut.collet@6wind.com; n.nikolaev@virtualopensystems.com;
> > luke@snabb.co; Long, Thomas; Liu, Yuanhan
> > Subject: Re: [snabb-devel] Re: [PATCH v6 1/2] vhost-user: add multi queue
> > support
> > 
> > On Fri, Aug 28, 2015 at 01:53:39AM +0000, Ouyang, Changchun wrote:
> > > Hi Michael,
> > >
> > > Thanks for your response!
> > >
> > > > -----Original Message-----
> > > > From: snabb-devel@googlegroups.com [mailto:snabb-
> > > > devel@googlegroups.com] On Behalf Of Michael S. Tsirkin
> > > > Sent: Thursday, August 27, 2015 9:05 PM
> > > > To: Ouyang, Changchun
> > > > Cc: snabb-devel@googlegroups.com; qemu-devel@nongnu.org;
> > > > thibaut.collet@6wind.com; n.nikolaev@virtualopensystems.com;
> > > > luke@snabb.co; Long, Thomas; Liu, Yuanhan
> > > > Subject: Re: [snabb-devel] Re: [PATCH v6 1/2] vhost-user: add multi
> > > > queue support
> > > >
> > > > On Tue, Aug 25, 2015 at 03:25:54AM +0000, Ouyang, Changchun wrote:
> > > > > Hi Michael,
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: snabb-devel@googlegroups.com [mailto:snabb-
> > > > > > devel@googlegroups.com] On Behalf Of Michael S. Tsirkin
> > > > > > Sent: Thursday, August 13, 2015 5:19 PM
> > > > > > To: Ouyang, Changchun
> > > > > > Cc: qemu-devel@nongnu.org; snabb-devel@googlegroups.com;
> > > > > > thibaut.collet@6wind.com; n.nikolaev@virtualopensystems.com;
> > > > > > luke@snabb.co; Long, Thomas
> > > > > > Subject: [snabb-devel] Re: [PATCH v6 1/2] vhost-user: add multi
> > > > > > queue support
> > > > > >
> > > > > > On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:
> > > > > > > Based on patch by Nikolay Nikolaev:
> > > > > > > Vhost-user will implement the multi queue support in a similar
> > > > > > > way to what vhost already has - a separate thread for each queue.
> > > > > > > To enable the multi queue functionality - a new command line
> > > > > > > parameter "queues" is introduced for the vhost-user netdev.
> > > > > > >
> > > > > > > The RESET_OWNER change is based on commit:
> > > > > > >    294ce717e0f212ed0763307f3eab72b4a1bdf4d0
> > > > > > > If it is reverted, the patch need update for it accordingly.
> > > > > > >
> > > > > > > Signed-off-by: Nikolay Nikolaev
> > > > > > > <n.nikolaev@virtualopensystems.com>
> > > > > > > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > > > > > > ---
> > > > > > > Changes since v5:
> > > > > > >  - fix the message descption for VHOST_RESET_OWNER in
> > > > > > > vhost-user txt
> > > > > > >
> > > > > > > Changes since v4:
> > > > > > >  - remove the unnecessary trailing '\n'
> > > > > > >
> > > > > > > Changes since v3:
> > > > > > >  - fix one typo and wrap one long line
> > > > > > >
> > > > > > > Changes since v2:
> > > > > > >  - fix vq index issue for set_vring_call
> > > > > > >    When it is the case of VHOST_SET_VRING_CALL, The vq_index
> > > > > > > is not
> > > > > > initialized before it is used,
> > > > > > >    thus it could be a random value. The random value leads to
> > > > > > > crash in vhost
> > > > > > after passing down
> > > > > > >    to vhost, as vhost use this random value to index an array index.
> > > > > > >  - fix the typo in the doc and description
> > > > > > >  - address vq index for reset_owner
> > > > > > >
> > > > > > > Changes since v1:
> > > > > > >  - use s->nc.info_str when bringing up/down the backend
> > > > > > >
> > > > > > >  docs/specs/vhost-user.txt |  7 ++++++-
> > > > > > >  hw/net/vhost_net.c        |  3 ++-
> > > > > > >  hw/virtio/vhost-user.c    | 11 ++++++++++-
> > > > > > >  net/vhost-user.c          | 37 ++++++++++++++++++++++++------------
> > -
> > > > > > >  qapi-schema.json          |  6 +++++-
> > > > > > >  qemu-options.hx           |  5 +++--
> > > > > > >  6 files changed, 50 insertions(+), 19 deletions(-)
> > > > > > >
> > > > > > > diff --git a/docs/specs/vhost-user.txt
> > > > > > > b/docs/specs/vhost-user.txt index 70da3b1..9390f89 100644
> > > > > > > --- a/docs/specs/vhost-user.txt
> > > > > > > +++ b/docs/specs/vhost-user.txt
> > > > > > > @@ -135,6 +135,11 @@ As older slaves don't support negotiating
> > > > > > > protocol features,  a feature bit was dedicated for this purpose:
> > > > > > >  #define VHOST_USER_F_PROTOCOL_FEATURES 30
> > > > > > >
> > > > > > > +Multi queue support
> > > > > > > +-------------------
> > > > > > > +The protocol supports multiple queues by setting all index
> > > > > > > +fields in the sent messages to a properly calculated value.
> > > > > > > +
> > > > > > >  Message types
> > > > > > >  -------------
> > > > > > >
> > > > > > > @@ -198,7 +203,7 @@ Message types
> > > > > > >
> > > > > > >        Id: 4
> > > > > > >        Equivalent ioctl: VHOST_RESET_OWNER
> > > > > > > -      Master payload: N/A
> > > > > > > +      Master payload: vring state description
> > > > > > >
> > > > > > >        Issued when a new connection is about to be closed. The
> > > > > > > Master
> > > > will no
> > > > > > >        longer own this connection (and will usually close it).
> > > > > >
> > > > > > This is an interface change, isn't it?
> > > > > > We can't make it unconditionally, need to make it dependent on a
> > > > > > protocol flag.
> > > >
> > > >
> > > > Pls remember to fix this one.
> > > >
> > > > > >
> > > > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index
> > > > > > > 1f25cb3..9cd6c05 100644
> > > > > > > --- a/hw/net/vhost_net.c
> > > > > > > +++ b/hw/net/vhost_net.c
> > > > > > > @@ -159,6 +159,7 @@ struct vhost_net
> > > > > > > *vhost_net_init(VhostNetOptions
> > > > > > > *options)
> > > > > > >
> > > > > > >      net->dev.nvqs = 2;
> > > > > > >      net->dev.vqs = net->vqs;
> > > > > > > +    net->dev.vq_index = net->nc->queue_index;
> > > > > > >
> > > > > > >      r = vhost_dev_init(&net->dev, options->opaque,
> > > > > > >                         options->backend_type,
> > > > > > > options->force); @@
> > > > > > > -269,7 +270,7 @@ static void vhost_net_stop_one(struct
> > > > > > > vhost_net
> > > > *net,
> > > > > > >          for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
> > > > > > >              const VhostOps *vhost_ops = net->dev.vhost_ops;
> > > > > > >              int r = vhost_ops->vhost_call(&net->dev,
> > VHOST_RESET_OWNER,
> > > > > > > -                                          NULL);
> > > > > > > +                                          &file);
> > > > > > >              assert(r >= 0);
> > > > > > >          }
> > > > > > >      }
> > > > > > > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> > > > > > > index 27ba035..fb11d4c 100644
> > > > > > > --- a/hw/virtio/vhost-user.c
> > > > > > > +++ b/hw/virtio/vhost-user.c
> > > > > > > @@ -219,7 +219,12 @@ static int vhost_user_call(struct
> > > > > > > vhost_dev *dev,
> > > > > > unsigned long int request,
> > > > > > >          break;
> > > > > > >
> > > > > > >      case VHOST_USER_SET_OWNER:
> > > > > > > +        break;
> > > > > > > +
> > > > > > >      case VHOST_USER_RESET_OWNER:
> > > > > > > +        memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
> > > > > > > +        msg.state.index += dev->vq_index;
> > > > > > > +        msg.size = sizeof(m.state);
> > > > > > >          break;
> > > > > > >
> > > > > > >      case VHOST_USER_SET_MEM_TABLE:
> > > > > > > @@ -262,17 +267,20 @@ static int vhost_user_call(struct
> > > > > > > vhost_dev *dev,
> > > > > > unsigned long int request,
> > > > > > >      case VHOST_USER_SET_VRING_NUM:
> > > > > > >      case VHOST_USER_SET_VRING_BASE:
> > > > > > >          memcpy(&msg.state, arg, sizeof(struct
> > > > > > > vhost_vring_state));
> > > > > > > +        msg.state.index += dev->vq_index;
> > > > > > >          msg.size = sizeof(m.state);
> > > > > > >          break;
> > > > > > >
> > > > > > >      case VHOST_USER_GET_VRING_BASE:
> > > > > > >          memcpy(&msg.state, arg, sizeof(struct
> > > > > > > vhost_vring_state));
> > > > > > > +        msg.state.index += dev->vq_index;
> > > > > > >          msg.size = sizeof(m.state);
> > > > > > >          need_reply = 1;
> > > > > > >          break;
> > > > > > >
> > > > > > >      case VHOST_USER_SET_VRING_ADDR:
> > > > > > >          memcpy(&msg.addr, arg, sizeof(struct
> > > > > > > vhost_vring_addr));
> > > > > > > +        msg.addr.index += dev->vq_index;
> > > > > > >          msg.size = sizeof(m.addr);
> > > > > > >          break;
> > > > > > >
> > > > > > > @@ -280,7 +288,7 @@ static int vhost_user_call(struct
> > > > > > > vhost_dev *dev,
> > > > > > unsigned long int request,
> > > > > > >      case VHOST_USER_SET_VRING_CALL:
> > > > > > >      case VHOST_USER_SET_VRING_ERR:
> > > > > > >          file = arg;
> > > > > > > -        msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
> > > > > > > +        msg.u64 = (file->index + dev->vq_index) &
> > > > > > > + VHOST_USER_VRING_IDX_MASK;
> > > > > > >          msg.size = sizeof(m.u64);
> > > > > > >          if (ioeventfd_enabled() && file->fd > 0) {
> > > > > > >              fds[fd_num++] = file->fd; @@ -322,6 +330,7 @@
> > > > > > > static int vhost_user_call(struct vhost_dev *dev, unsigned long int
> > request,
> > > > > > >                  error_report("Received bad msg size.");
> > > > > > >                  return -1;
> > > > > > >              }
> > > > > > > +            msg.state.index -= dev->vq_index;
> > > > > > >              memcpy(arg, &msg.state, sizeof(struct vhost_vring_state));
> > > > > > >              break;
> > > > > > >          default:
> > > > > > > diff --git a/net/vhost-user.c b/net/vhost-user.c index
> > > > > > > 1d86a2b..904d8af 100644
> > > > > > > --- a/net/vhost-user.c
> > > > > > > +++ b/net/vhost-user.c
> > > > > > > @@ -121,35 +121,39 @@ static void net_vhost_user_event(void
> > > > > > > *opaque,
> > > > > > int event)
> > > > > > >      case CHR_EVENT_OPENED:
> > > > > > >          vhost_user_start(s);
> > > > > > >          net_vhost_link_down(s, false);
> > > > > > > -        error_report("chardev \"%s\" went up", s->chr->label);
> > > > > > > +        error_report("chardev \"%s\" went up",
> > > > > > > + s->nc.info_str);
> > > > > > >          break;
> > > > > > >      case CHR_EVENT_CLOSED:
> > > > > > >          net_vhost_link_down(s, true);
> > > > > > >          vhost_user_stop(s);
> > > > > > > -        error_report("chardev \"%s\" went down", s->chr->label);
> > > > > > > +        error_report("chardev \"%s\" went down",
> > > > > > > + s->nc.info_str);
> > > > > > >          break;
> > > > > > >      }
> > > > > > >  }
> > > >
> > > > BTW this seems pretty hacky: you get multiple messages when one
> > > > client connects. Why add multiple event listeners to the same chat device?
> > > >
> > > >
> > > > > > >
> > > > > > >  static int net_vhost_user_init(NetClientState *peer, const char
> > *device,
> > > > > > > -                               const char *name, CharDriverState *chr)
> > > > > > > +                               const char *name, CharDriverState *chr,
> > > > > > > +                               uint32_t queues)
> > > > > > >  {
> > > > > > >      NetClientState *nc;
> > > > > > >      VhostUserState *s;
> > > > > > > +    int i;
> > > > > > >
> > > > > > > -    nc = qemu_new_net_client(&net_vhost_user_info, peer, device,
> > > > > > name);
> > > > > > > +    for (i = 0; i < queues; i++) {
> > > > > > > +        nc = qemu_new_net_client(&net_vhost_user_info, peer,
> > > > > > > + device, name);
> > > > > > >
> > > > > > > -    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",
> > > > > > > -             chr->label);
> > > > > > > +        snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d
> > to %s",
> > > > > > > +                 i, chr->label);
> > > > > > >
> > > > > > > -    s = DO_UPCAST(VhostUserState, nc, nc);
> > > > > > > +        s = DO_UPCAST(VhostUserState, nc, nc);
> > > > > > >
> > > > > > > -    /* We don't provide a receive callback */
> > > > > > > -    s->nc.receive_disabled = 1;
> > > > > > > -    s->chr = chr;
> > > > > > > -
> > > > > > > -    qemu_chr_add_handlers(s->chr, NULL, NULL,
> > > > net_vhost_user_event,
> > > > > > s);
> > > > > > > +        /* We don't provide a receive callback */
> > > > > > > +        s->nc.receive_disabled = 1;
> > > > > > > +        s->chr = chr;
> > > > > > > +        s->nc.queue_index = i;
> > > > > > >
> > > > > > > +        qemu_chr_add_handlers(s->chr, NULL, NULL,
> > > > > > > + net_vhost_user_event,
> > > > > > s);
> > > > > > > +    }
> > > > > > >      return 0;
> > > > > > >  }
> > > > > > >
> > > > > > > @@ -225,6 +229,7 @@ static int net_vhost_check_net(QemuOpts
> > > > *opts,
> > > > > > > void *opaque)
> > > > > >
> > > > > >
> > > > > > There are two problems here:
> > > > > >
> > > > > > 1. we don't really know that the backend
> > > > > >    is able to support the requested number of queues.
> > > > > >    If not, everything will fail, silently.
> > > > > >    A new message to query the # of queues could help, though
> > > > > >    I'm not sure what can be done on failure. Fail connection?
> > > > > >
> > > > > > 2. each message (e.g. set memory table) is sent multiple times,
> > > > > >    on the same socket.
> > > > > >
> > > > > I think it is tough to resolve these 2 comments, as the current
> > > > > message is either vhost-dev based or virt-queue based, The
> > > > > multiple
> > > > queues(pair) feature use multiple vhost-devs to implement itself.
> > > > > For #1
> > > > > So the queue number is something should be seen in the upper level
> > > > > of
> > > > vhost-dev rather than inside the vhost-dev.
> > > > > For each vhost-net, there are 2 virt-queues, one is for Rx the other is
> > for Tx.
> > > > > introduce the virt-queue pair number into the vhost-dev? But I
> > > > > don't think it is good, as for each vhost-dev, there is only one virt-
> > queue pair.
> > > > >
> > > > > Where should I put the virt-queue pair number to? I don't get the
> > > > > perfect
> > > > answer till now. Any suggestion is welcome.
> > > > >
> > > > > Could we assume the vhost backend has the ability to create enough
> > > > > virt-queue pair(e.g. 0x8000 is the max) if qemu require Vhost
> > > > > backend to do it. If it is correct, we don't need get virt-queue
> > > > > pair number from
> > > > vhost backend, as vhost backend can Create all virt-queue pair
> > > > required by qemu.
> > > > > The virtio frontend(on guest) has the flexibility to enable which
> > > > > virt-queue
> > > > according to its own capability, qemu can do it by using
> > > > > Set_vring_flag message to notify vhost backend.
> > > >
> > > > I'm reluctant to agree to this. Implementations tend to get this wrong, e.g.
> > > > they would only test with 2 queues and assume everything is OK.
> > > > With an explicit message, this seems more robust.
> > > >
> > > > Why is it so hard to implement?  User specifies queues=X.
> > > > Can't we simply validate that backend supports this # of queues?
> > >
> > > Yes exactly in the qemu, we use qeueus=X to specify the virt-queue pair
> > number.
> > > In my previous comments, I am talking about how to resolve such scenario:
> > > vhost backend specify the queues=Y(Y != X, e.g. X = 8, Y = 4), how
> > > vhost backend tell qemu that it only has the capability of max
> > > virt-queue pair is 4, and qemu need update its max queue number
> > according to its own parameter and the max value from vhost back end.
> > >
> > > Do you think whether this scenario makes sense or not?
> > 
> > No, I think that if we can't support the number requested by user, QEMU can
> > simply exit.
> 
> Well, both values are input from user, for some unknown reason, they are different.
> When startup the vhost backend, user specify 4 queues, like:
> Vhost-user ...  -queues 4      # Y=4
> When startup the guest, user specify 8 queues, like:
> Qemu-kvm ... -queues 8      # X=8
> 
> Vhost backend can't support the queues number as much as the guest/qemu requires,
> I think you mean we should let qemu exit in such a case, right?   

Exactly.

> Then how about the other case:
> When startup the vhost backend, user specify 8 queues, like:
> Vhost-user ...  -queues 8      # Y=8
> When startup the guest, user specify 4 queues, like:
> Qemu-kvm ... -queues 4      # X=4
> 
> Vhost backend CAN support the queues number as much as the guest/qemu requires,
> And qemu don't want enable all of them(8 in the case), just part of them(only 4 from 8).
> Will qemu exit or go forward(of course with 4 queues)? 

I think it must go forward since guest does not have to
configure all queues anyway - and that's built in to virtio spec.

Note that # of queues in actual use in fact changes dynamically
too, without restaring the device.

> > 
> > > If yes, then we should consider extend new message to allow vhost
> > > backend communicate Its virt-queue number to qemu.
> > 
> > Yes, conditional on the mq protocol flag.
> > 
> > > More than that, either we need change codes to allow each net_client
> > > has more than
> > > 1 virt-queue pair(==2 virt-queue, one for Rx, the other for Tx), Or
> > > another better solution for that?
> > 
> > I'm not sure why is that required for the above.
> > 
> > > >
> > > >
> > > > > For #2
> > > > > The memory table message is also vhost-dev based, it wouldn't hurt
> > > > > we send it a few times, vhost backend could Keep it vhost-dev
> > > > > based too, or keep it once(keep it when first time and ignore in
> > > > > rest messages from the same connected-fd) Any other good
> > > > > suggestion is welcome too
> > > > > :-)
> > > >
> > > > Add code in vhost-user to skip sending the useless messages?  Yes
> > > > they seem harmless but implementations tend to develop dependencies
> > > > on such bugs, then we get to maintain them forever.
> > >
> > > Considering we use multiple vhost_net(multiple net_client) to fulfill
> > multiple virt-queue pairs.
> > > So only the first virt-queue pair will send the set-memory-table, others
> > won't do it.
> > > Is that ok? so apparently each net client will have difference in message
> > count.
> > > The first net client will have the message of set memory table, others have
> > not.
> > >
> > > Thanks
> > > Changchun
> > 
> > But all these clients use the same unix domain socket, correct?
> Yes, correct
> 
> > If so, I don't think it matters that internally in qemu they use different net
> > clients. From the external point of view, it looks like a single message is sent
> > on a single socket.
> > 
> > > >
> > > > > >
> > > > > >
> > > > > > >  int net_init_vhost_user(const NetClientOptions *opts, const
> > > > > > > char
> > > > *name,
> > > > > > >                          NetClientState *peer)  {
> > > > > > > +    uint32_t queues;
> > > > > > >      const NetdevVhostUserOptions *vhost_user_opts;
> > > > > > >      CharDriverState *chr;
> > > > > > >
> > > > > > > @@ -243,6 +248,12 @@ int net_init_vhost_user(const
> > > > > > > NetClientOptions
> > > > > > *opts, const char *name,
> > > > > > >          return -1;
> > > > > > >      }
> > > > > > >
> > > > > > > +    /* number of queues for multiqueue */
> > > > > > > +    if (vhost_user_opts->has_queues) {
> > > > > > > +        queues = vhost_user_opts->queues;
> > > > > > > +    } else {
> > > > > > > +        queues = 1;
> > > > > > > +    }
> > > > > > >
> > > > > > > -    return net_vhost_user_init(peer, "vhost_user", name, chr);
> > > > > > > +    return net_vhost_user_init(peer, "vhost_user", name, chr,
> > > > > > > + queues);
> > > > > > >  }
> > > > > > > diff --git a/qapi-schema.json b/qapi-schema.json index
> > > > > > > f97ffa1..51e40ce 100644
> > > > > > > --- a/qapi-schema.json
> > > > > > > +++ b/qapi-schema.json
> > > > > > > @@ -2444,12 +2444,16 @@
> > > > > > >  #
> > > > > > >  # @vhostforce: #optional vhost on for non-MSIX virtio guests
> > (default:
> > > > > > false).
> > > > > > >  #
> > > > > > > +# @queues: #optional number of queues to be created for
> > > > > > > +multiqueue
> > > > > > vhost-user
> > > > > > > +#          (default: 1) (Since 2.5)
> > > > > > > +#
> > > > > > >  # Since 2.1
> > > > > > >  ##
> > > > > > >  { 'struct': 'NetdevVhostUserOptions',
> > > > > > >    'data': {
> > > > > > >      'chardev':        'str',
> > > > > > > -    '*vhostforce':    'bool' } }
> > > > > > > +    '*vhostforce':    'bool',
> > > > > > > +    '*queues':        'uint32' } }
> > > > > > >
> > > > > > >  ##
> > > > > > >  # @NetClientOptions
> > > > > > > diff --git a/qemu-options.hx b/qemu-options.hx index
> > > > > > > ec356f6..dad035e
> > > > > > > 100644
> > > > > > > --- a/qemu-options.hx
> > > > > > > +++ b/qemu-options.hx
> > > > > > > @@ -1942,13 +1942,14 @@ The hubport netdev lets you connect a
> > > > > > > NIC to a QEMU "vlan" instead of a single  netdev.  @code{-net}
> > > > > > > and @code{-device} with parameter @option{vlan} create the
> > > > > > > required hub
> > > > > > automatically.
> > > > > > >
> > > > > > > -@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
> > > > > > > +@item -netdev
> > > > > > > +vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]
> > > > > > >
> > > > > > >  Establish a vhost-user netdev, backed by a chardev @var{id}.
> > > > > > > The chardev should  be a unix domain socket backed one. The
> > > > > > > vhost-user uses a specifically defined  protocol to pass vhost
> > > > > > > ioctl replacement messages to an application on the other  end
> > > > > > > of the socket. On non-MSIX guests, the feature can be forced
> > > > > > > with -
> > > > @var{vhostforce}.
> > > > > > > +@var{vhostforce}. Use 'queues=@var{n}' to specify the number
> > > > > > > +of queues to be created for multiqueue vhost-user.
> > > > > > >
> > > > > > >  Example:
> > > > > > >  @example
> > > > > > > --
> > > > > > > 1.8.4.2
> > > > > > >
> > > > > >
> > > > > > --
> > > > > > You received this message because you are subscribed to the
> > > > > > Google Groups "Snabb Switch development" group.
> > > > > > To unsubscribe from this group and stop receiving emails from
> > > > > > it, send an email to snabb-devel+unsubscribe@googlegroups.com.
> > > > > > To post to this group, send an email to snabb-
> > devel@googlegroups.com.
> > > > > > Visit this group at http://groups.google.com/group/snabb-devel.
> > > >
> > > > --
> > > > You received this message because you are subscribed to the Google
> > > > Groups "Snabb Switch development" group.
> > > > To unsubscribe from this group and stop receiving emails from it,
> > > > send an email to snabb-devel+unsubscribe@googlegroups.com.
> > > > To post to this group, send an email to snabb-devel@googlegroups.com.
> > > > Visit this group at http://groups.google.com/group/snabb-devel.
Eric Blake Aug. 31, 2015, 3:04 p.m. UTC | #10
On 08/31/2015 05:30 AM, Michael S. Tsirkin wrote:
> On Mon, Aug 31, 2015 at 08:29:39AM +0000, Ouyang, Changchun wrote:

>>>>>>> On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:
>>>>>>>> Based on patch by Nikolay Nikolaev:

When you start to have this much nested quoting,...

>>>>>>>> -269,7 +270,7 @@ static void vhost_net_stop_one(struct
>>>>>>>> vhost_net
>>>>> *net,
>>>>>>>>          for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
>>>>>>>>              const VhostOps *vhost_ops = net->dev.vhost_ops;
>>>>>>>>              int r = vhost_ops->vhost_call(&net->dev,
>>> VHOST_RESET_OWNER,

...line wrapping of the quotes gets funky and starts mis-attributing
portions of lines to the wrong quote level.

>> Vhost backend can't support the queues number as much as the guest/qemu requires,
>> I think you mean we should let qemu exit in such a case, right?   
> 
> Exactly.

And mis-attribution makes it that much harder to find the real content
being added further down in the middle of 28k of mostly-quoted message.

Please remember that it is okay to trim messages to the relevant
portions, to make it easier for readers to quickly get at the new content.
Yuanhan Liu Sept. 1, 2015, 9:13 a.m. UTC | #11
On Thu, Aug 13, 2015 at 12:18:38PM +0300, Michael S. Tsirkin wrote:
> On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:
> > Based on patch by Nikolay Nikolaev:
> > Vhost-user will implement the multi queue support in a similar way
> > to what vhost already has - a separate thread for each queue.
> > To enable the multi queue functionality - a new command line parameter
> > "queues" is introduced for the vhost-user netdev.
> > 
> > The RESET_OWNER change is based on commit:
> >    294ce717e0f212ed0763307f3eab72b4a1bdf4d0
> > If it is reverted, the patch need update for it accordingly.
> > 
> > Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
[snip...]
> > @@ -198,7 +203,7 @@ Message types
> >  
> >        Id: 4
> >        Equivalent ioctl: VHOST_RESET_OWNER
> > -      Master payload: N/A
> > +      Master payload: vring state description
> >  
> >        Issued when a new connection is about to be closed. The Master will no
> >        longer own this connection (and will usually close it).
> 
> This is an interface change, isn't it?
> We can't make it unconditionally, need to make it dependent
> on a protocol flag.

Hi Michael,

I'm wondering why we need a payload here, as we don't do that for
VHOST_SET_OWNER. I mean, stopping one or few queue pairs when a
connect is about to be close doesn't make sense to me. Instead,
we should clean up all queue pair when VHOST_RESET_OWNER message
is received, right?

> 
> 
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 1f25cb3..9cd6c05 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
[snip...]
> >  static int net_vhost_user_init(NetClientState *peer, const char *device,
> > -                               const char *name, CharDriverState *chr)
> > +                               const char *name, CharDriverState *chr,
> > +                               uint32_t queues)
> >  {
> >      NetClientState *nc;
> >      VhostUserState *s;
> > +    int i;
> >  
> > -    nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
> > +    for (i = 0; i < queues; i++) {
> > +        nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
> >  
> > -    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",
> > -             chr->label);
> > +        snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s",
> > +                 i, chr->label);
> >  
> > -    s = DO_UPCAST(VhostUserState, nc, nc);
> > +        s = DO_UPCAST(VhostUserState, nc, nc);
> >  
> > -    /* We don't provide a receive callback */
> > -    s->nc.receive_disabled = 1;
> > -    s->chr = chr;
> > -
> > -    qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s);
> > +        /* We don't provide a receive callback */
> > +        s->nc.receive_disabled = 1;
> > +        s->chr = chr;
> > +        s->nc.queue_index = i;
> >  
> > +        qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s);
> > +    }
> >      return 0;
> >  }
> >  
> > @@ -225,6 +229,7 @@ static int net_vhost_check_net(QemuOpts *opts, void *opaque)
> 
> 
> There are two problems here:
> 
> 1. we don't really know that the backend
>    is able to support the requested number of queues.
>    If not, everything will fail, silently.
>    A new message to query the # of queues could help, though
>    I'm not sure what can be done on failure. Fail connection?

What I'm thinking is we may do:

- introduce a feature flag, for indicating we support MQ or not.

  We query this flag only when # of queues given is > 1. We exit
  if it not matches.

- invoke vhost_dev init repeatedly for # of queues given, unless
  something wrong happened, which basically means the backend
  can not support such # of queues; we then quit.

  We could, as you suggested, add an another message to query
  the max # queues the backend support. However, judging we have
  to check the return value of setting up a single queue pair,
  which already gives feedback when the backed is not able to
  support requested # of queues, we could save such message,
  though it's easy to implement :)

> 
> 2. each message (e.g. set memory table) is sent multiple times,
>    on the same socket.

Yeah, for there is a single socket opening there, it's not necessary
to send messages like SET_MEM_TABLE multiple times. But for other
messages that relate to to a specific vring, we have to send N times,
don't we?

So, I'm wondering could we categorize the message in two types: vring
specific and none-vring specific. For vring specific, we send it N
times, with the vhost_dev->vq_index telling which one queue pair
we have interest.

For none-vring specific, we just send it once for first queue pair
(vhost_dev->queue == 0), just like what we did for tap: we launch
qemu-ifup/down script only for the first queue pair.

Comments? (And sorry if I made some silly comments, as I'm pretty
new to this community, say just have read about 2 weeks code).

	--yliu

> 
> 
> 
> >  int net_init_vhost_user(const NetClientOptions *opts, const char *name,
> >                          NetClientState *peer)
> >  {
> > +    uint32_t queues;
> >      const NetdevVhostUserOptions *vhost_user_opts;
> >      CharDriverState *chr;
> >  
> > @@ -243,6 +248,12 @@ int net_init_vhost_user(const NetClientOptions *opts, const char *name,
> >          return -1;
> >      }
> >  
> > +    /* number of queues for multiqueue */
> > +    if (vhost_user_opts->has_queues) {
> > +        queues = vhost_user_opts->queues;
> > +    } else {
> > +        queues = 1;
> > +    }
> >  
> > -    return net_vhost_user_init(peer, "vhost_user", name, chr);
> > +    return net_vhost_user_init(peer, "vhost_user", name, chr, queues);
> >  }
> > diff --git a/qapi-schema.json b/qapi-schema.json
> > index f97ffa1..51e40ce 100644
> > --- a/qapi-schema.json
> > +++ b/qapi-schema.json
> > @@ -2444,12 +2444,16 @@
> >  #
> >  # @vhostforce: #optional vhost on for non-MSIX virtio guests (default: false).
> >  #
> > +# @queues: #optional number of queues to be created for multiqueue vhost-user
> > +#          (default: 1) (Since 2.5)
> > +#
> >  # Since 2.1
> >  ##
> >  { 'struct': 'NetdevVhostUserOptions',
> >    'data': {
> >      'chardev':        'str',
> > -    '*vhostforce':    'bool' } }
> > +    '*vhostforce':    'bool',
> > +    '*queues':        'uint32' } }
> >  
> >  ##
> >  # @NetClientOptions
> > diff --git a/qemu-options.hx b/qemu-options.hx
> > index ec356f6..dad035e 100644
> > --- a/qemu-options.hx
> > +++ b/qemu-options.hx
> > @@ -1942,13 +1942,14 @@ The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single
> >  netdev.  @code{-net} and @code{-device} with parameter @option{vlan} create the
> >  required hub automatically.
> >  
> > -@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
> > +@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]
> >  
> >  Establish a vhost-user netdev, backed by a chardev @var{id}. The chardev should
> >  be a unix domain socket backed one. The vhost-user uses a specifically defined
> >  protocol to pass vhost ioctl replacement messages to an application on the other
> >  end of the socket. On non-MSIX guests, the feature can be forced with
> > -@var{vhostforce}.
> > +@var{vhostforce}. Use 'queues=@var{n}' to specify the number of queues to
> > +be created for multiqueue vhost-user.
> >  
> >  Example:
> >  @example
> > -- 
> > 1.8.4.2
> >
Yuanhan Liu Sept. 1, 2015, 9:20 a.m. UTC | #12
On Mon, Aug 31, 2015 at 02:30:06PM +0300, Michael S. Tsirkin wrote:
[snip..] 
> > Then how about the other case:
> > When startup the vhost backend, user specify 8 queues, like:
> > Vhost-user ...  -queues 8      # Y=8
> > When startup the guest, user specify 4 queues, like:
> > Qemu-kvm ... -queues 4      # X=4
> > 
> > Vhost backend CAN support the queues number as much as the guest/qemu requires,
> > And qemu don't want enable all of them(8 in the case), just part of them(only 4 from 8).
> > Will qemu exit or go forward(of course with 4 queues)? 
> 
> I think it must go forward since guest does not have to
> configure all queues anyway - and that's built in to virtio spec.
> 
> Note that # of queues in actual use in fact changes dynamically
> too, without restaring the device.

May I know how do you do that? I mean, is there a public interface, like
by some commands? Or, by sending vhost messages?

	--yliu
Michael S. Tsirkin Sept. 1, 2015, 9:41 a.m. UTC | #13
On Tue, Sep 01, 2015 at 05:20:06PM +0800, Yuanhan Liu wrote:
> On Mon, Aug 31, 2015 at 02:30:06PM +0300, Michael S. Tsirkin wrote:
> [snip..] 
> > > Then how about the other case:
> > > When startup the vhost backend, user specify 8 queues, like:
> > > Vhost-user ...  -queues 8      # Y=8
> > > When startup the guest, user specify 4 queues, like:
> > > Qemu-kvm ... -queues 4      # X=4
> > > 
> > > Vhost backend CAN support the queues number as much as the guest/qemu requires,
> > > And qemu don't want enable all of them(8 in the case), just part of them(only 4 from 8).
> > > Will qemu exit or go forward(of course with 4 queues)? 
> > 
> > I think it must go forward since guest does not have to
> > configure all queues anyway - and that's built in to virtio spec.
> > 
> > Note that # of queues in actual use in fact changes dynamically
> > too, without restaring the device.
> 
> May I know how do you do that? I mean, is there a public interface, like
> by some commands? Or, by sending vhost messages?
> 
> 	--yliu

Using ethtool within guest. In fact, guests boot with a single queue,
you need to use ethtool to enable mq, specifying the # of queues.
Michael S. Tsirkin Sept. 1, 2015, 10:07 a.m. UTC | #14
On Tue, Sep 01, 2015 at 05:13:50PM +0800, Yuanhan Liu wrote:
> On Thu, Aug 13, 2015 at 12:18:38PM +0300, Michael S. Tsirkin wrote:
> > On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:
> > > Based on patch by Nikolay Nikolaev:
> > > Vhost-user will implement the multi queue support in a similar way
> > > to what vhost already has - a separate thread for each queue.
> > > To enable the multi queue functionality - a new command line parameter
> > > "queues" is introduced for the vhost-user netdev.
> > > 
> > > The RESET_OWNER change is based on commit:
> > >    294ce717e0f212ed0763307f3eab72b4a1bdf4d0
> > > If it is reverted, the patch need update for it accordingly.
> > > 
> > > Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> > > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> [snip...]
> > > @@ -198,7 +203,7 @@ Message types
> > >  
> > >        Id: 4
> > >        Equivalent ioctl: VHOST_RESET_OWNER
> > > -      Master payload: N/A
> > > +      Master payload: vring state description
> > >  
> > >        Issued when a new connection is about to be closed. The Master will no
> > >        longer own this connection (and will usually close it).
> > 
> > This is an interface change, isn't it?
> > We can't make it unconditionally, need to make it dependent
> > on a protocol flag.
> 
> Hi Michael,
> 
> I'm wondering why we need a payload here, as we don't do that for
> VHOST_SET_OWNER. I mean, stopping one or few queue pairs when a
> connect is about to be close doesn't make sense to me. Instead,
> we should clean up all queue pair when VHOST_RESET_OWNER message
> is received, right?

We really should rename VHOST_RESET_OWNER to VHOST_RESET_DEVICE.

And I agree, I don't think it needs a payload.


> > 
> > 
> > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > index 1f25cb3..9cd6c05 100644
> > > --- a/hw/net/vhost_net.c
> > > +++ b/hw/net/vhost_net.c
> [snip...]
> > >  static int net_vhost_user_init(NetClientState *peer, const char *device,
> > > -                               const char *name, CharDriverState *chr)
> > > +                               const char *name, CharDriverState *chr,
> > > +                               uint32_t queues)
> > >  {
> > >      NetClientState *nc;
> > >      VhostUserState *s;
> > > +    int i;
> > >  
> > > -    nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
> > > +    for (i = 0; i < queues; i++) {
> > > +        nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
> > >  
> > > -    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",
> > > -             chr->label);
> > > +        snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s",
> > > +                 i, chr->label);
> > >  
> > > -    s = DO_UPCAST(VhostUserState, nc, nc);
> > > +        s = DO_UPCAST(VhostUserState, nc, nc);
> > >  
> > > -    /* We don't provide a receive callback */
> > > -    s->nc.receive_disabled = 1;
> > > -    s->chr = chr;
> > > -
> > > -    qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s);
> > > +        /* We don't provide a receive callback */
> > > +        s->nc.receive_disabled = 1;
> > > +        s->chr = chr;
> > > +        s->nc.queue_index = i;
> > >  
> > > +        qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s);
> > > +    }
> > >      return 0;
> > >  }
> > >  
> > > @@ -225,6 +229,7 @@ static int net_vhost_check_net(QemuOpts *opts, void *opaque)
> > 
> > 
> > There are two problems here:
> > 
> > 1. we don't really know that the backend
> >    is able to support the requested number of queues.
> >    If not, everything will fail, silently.
> >    A new message to query the # of queues could help, though
> >    I'm not sure what can be done on failure. Fail connection?
> 
> What I'm thinking is we may do:
> 
> - introduce a feature flag, for indicating we support MQ or not.
> 
>   We query this flag only when # of queues given is > 1. We exit
>   if it not matches.
> 
> - invoke vhost_dev init repeatedly for # of queues given, unless
>   something wrong happened, which basically means the backend
>   can not support such # of queues; we then quit.
> 
>   We could, as you suggested, add an another message to query
>   the max # queues the backend support. However, judging we have
>   to check the return value of setting up a single queue pair,
>   which already gives feedback when the backed is not able to
>   support requested # of queues, we could save such message,
>   though it's easy to implement :)

Problem is, we only setup queues when device is started,
that is when guest is running.

Doing this at connect would mean we don't start the VM
that we can't then support.

> > 
> > 2. each message (e.g. set memory table) is sent multiple times,
> >    on the same socket.
> 
> Yeah, for there is a single socket opening there, it's not necessary
> to send messages like SET_MEM_TABLE multiple times. But for other
> messages that relate to to a specific vring, we have to send N times,
> don't we?

We need to set up each vring, sure.


> So, I'm wondering could we categorize the message in two types: vring
> specific and none-vring specific. For vring specific, we send it N
> times, with the vhost_dev->vq_index telling which one queue pair
> we have interest.
> 
> For none-vring specific, we just send it once for first queue pair
> (vhost_dev->queue == 0), just like what we did for tap: we launch
> qemu-ifup/down script only for the first queue pair.

Sounds reasonable. Make this all internal to vhost user:
no need for common vhost code to know about this distinction.

> Comments? (And sorry if I made some silly comments, as I'm pretty
> new to this community, say just have read about 2 weeks code).
> 
> 	--yliu
> 
> > 
> > 
> > 
> > >  int net_init_vhost_user(const NetClientOptions *opts, const char *name,
> > >                          NetClientState *peer)
> > >  {
> > > +    uint32_t queues;
> > >      const NetdevVhostUserOptions *vhost_user_opts;
> > >      CharDriverState *chr;
> > >  
> > > @@ -243,6 +248,12 @@ int net_init_vhost_user(const NetClientOptions *opts, const char *name,
> > >          return -1;
> > >      }
> > >  
> > > +    /* number of queues for multiqueue */
> > > +    if (vhost_user_opts->has_queues) {
> > > +        queues = vhost_user_opts->queues;
> > > +    } else {
> > > +        queues = 1;
> > > +    }
> > >  
> > > -    return net_vhost_user_init(peer, "vhost_user", name, chr);
> > > +    return net_vhost_user_init(peer, "vhost_user", name, chr, queues);
> > >  }
> > > diff --git a/qapi-schema.json b/qapi-schema.json
> > > index f97ffa1..51e40ce 100644
> > > --- a/qapi-schema.json
> > > +++ b/qapi-schema.json
> > > @@ -2444,12 +2444,16 @@
> > >  #
> > >  # @vhostforce: #optional vhost on for non-MSIX virtio guests (default: false).
> > >  #
> > > +# @queues: #optional number of queues to be created for multiqueue vhost-user
> > > +#          (default: 1) (Since 2.5)
> > > +#
> > >  # Since 2.1
> > >  ##
> > >  { 'struct': 'NetdevVhostUserOptions',
> > >    'data': {
> > >      'chardev':        'str',
> > > -    '*vhostforce':    'bool' } }
> > > +    '*vhostforce':    'bool',
> > > +    '*queues':        'uint32' } }
> > >  
> > >  ##
> > >  # @NetClientOptions
> > > diff --git a/qemu-options.hx b/qemu-options.hx
> > > index ec356f6..dad035e 100644
> > > --- a/qemu-options.hx
> > > +++ b/qemu-options.hx
> > > @@ -1942,13 +1942,14 @@ The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single
> > >  netdev.  @code{-net} and @code{-device} with parameter @option{vlan} create the
> > >  required hub automatically.
> > >  
> > > -@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
> > > +@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]
> > >  
> > >  Establish a vhost-user netdev, backed by a chardev @var{id}. The chardev should
> > >  be a unix domain socket backed one. The vhost-user uses a specifically defined
> > >  protocol to pass vhost ioctl replacement messages to an application on the other
> > >  end of the socket. On non-MSIX guests, the feature can be forced with
> > > -@var{vhostforce}.
> > > +@var{vhostforce}. Use 'queues=@var{n}' to specify the number of queues to
> > > +be created for multiqueue vhost-user.
> > >  
> > >  Example:
> > >  @example
> > > -- 
> > > 1.8.4.2
> > >
Yuanhan Liu Sept. 1, 2015, 12:15 p.m. UTC | #15
On Tue, Sep 01, 2015 at 01:07:11PM +0300, Michael S. Tsirkin wrote:
> On Tue, Sep 01, 2015 at 05:13:50PM +0800, Yuanhan Liu wrote:
> > On Thu, Aug 13, 2015 at 12:18:38PM +0300, Michael S. Tsirkin wrote:
> > > On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:
> > > > Based on patch by Nikolay Nikolaev:
> > > > Vhost-user will implement the multi queue support in a similar way
> > > > to what vhost already has - a separate thread for each queue.
> > > > To enable the multi queue functionality - a new command line parameter
> > > > "queues" is introduced for the vhost-user netdev.
> > > > 
> > > > The RESET_OWNER change is based on commit:
> > > >    294ce717e0f212ed0763307f3eab72b4a1bdf4d0
> > > > If it is reverted, the patch need update for it accordingly.
> > > > 
> > > > Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> > > > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > [snip...]
> > > > @@ -198,7 +203,7 @@ Message types
> > > >  
> > > >        Id: 4
> > > >        Equivalent ioctl: VHOST_RESET_OWNER
> > > > -      Master payload: N/A
> > > > +      Master payload: vring state description
> > > >  
> > > >        Issued when a new connection is about to be closed. The Master will no
> > > >        longer own this connection (and will usually close it).
> > > 
> > > This is an interface change, isn't it?
> > > We can't make it unconditionally, need to make it dependent
> > > on a protocol flag.
> > 
> > Hi Michael,
> > 
> > I'm wondering why we need a payload here, as we don't do that for
> > VHOST_SET_OWNER. I mean, stopping one or few queue pairs when a
> > connect is about to be close doesn't make sense to me. Instead,
> > we should clean up all queue pair when VHOST_RESET_OWNER message
> > is received, right?
> 
> We really should rename VHOST_RESET_OWNER to VHOST_RESET_DEVICE.

Yeah, second that.

BTW, can we simply do the name convertion, just changing VHOST_RESET_OWNER
to VHOST_RESET_DEVICE(or VHOST_STOP_DEVICE). I guess it's doable in
theory as far as we don't change the number. I somehow feel it's not a
good practice.

Maybe we could make it as a new vhost message, and mark the old one
as obsolete? That doesn't sound perfect, either, as it reserves a number
for a message we will not use any more.

Also, we may rename VHOST_SET_OWNER to VHOST_INIT_DEVICE?

> And I agree, I don't think it needs a payload.

Good to know.

> 
> 
> > > 
> > > 
> > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > > index 1f25cb3..9cd6c05 100644
> > > > --- a/hw/net/vhost_net.c
> > > > +++ b/hw/net/vhost_net.c
> > [snip...]
> > > >  static int net_vhost_user_init(NetClientState *peer, const char *device,
> > > > -                               const char *name, CharDriverState *chr)
> > > > +                               const char *name, CharDriverState *chr,
> > > > +                               uint32_t queues)
> > > >  {
> > > >      NetClientState *nc;
> > > >      VhostUserState *s;
> > > > +    int i;
> > > >  
> > > > -    nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
> > > > +    for (i = 0; i < queues; i++) {
> > > > +        nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
> > > >  
> > > > -    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",
> > > > -             chr->label);
> > > > +        snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s",
> > > > +                 i, chr->label);
> > > >  
> > > > -    s = DO_UPCAST(VhostUserState, nc, nc);
> > > > +        s = DO_UPCAST(VhostUserState, nc, nc);
> > > >  
> > > > -    /* We don't provide a receive callback */
> > > > -    s->nc.receive_disabled = 1;
> > > > -    s->chr = chr;
> > > > -
> > > > -    qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s);
> > > > +        /* We don't provide a receive callback */
> > > > +        s->nc.receive_disabled = 1;
> > > > +        s->chr = chr;
> > > > +        s->nc.queue_index = i;
> > > >  
> > > > +        qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s);
> > > > +    }
> > > >      return 0;
> > > >  }
> > > >  
> > > > @@ -225,6 +229,7 @@ static int net_vhost_check_net(QemuOpts *opts, void *opaque)
> > > 
> > > 
> > > There are two problems here:
> > > 
> > > 1. we don't really know that the backend
> > >    is able to support the requested number of queues.
> > >    If not, everything will fail, silently.
> > >    A new message to query the # of queues could help, though
> > >    I'm not sure what can be done on failure. Fail connection?
> > 
> > What I'm thinking is we may do:
> > 
> > - introduce a feature flag, for indicating we support MQ or not.
> > 
> >   We query this flag only when # of queues given is > 1. We exit
> >   if it not matches.
> > 
> > - invoke vhost_dev init repeatedly for # of queues given, unless
> >   something wrong happened, which basically means the backend
> >   can not support such # of queues; we then quit.
> > 
> >   We could, as you suggested, add an another message to query
> >   the max # queues the backend support. However, judging we have
> >   to check the return value of setting up a single queue pair,
> >   which already gives feedback when the backed is not able to
> >   support requested # of queues, we could save such message,
> >   though it's easy to implement :)
> 
> Problem is, we only setup queues when device is started,
> that is when guest is running.

So we couldn't simply invoke 'exit()', right?

> 
> Doing this at connect would mean we don't start the VM
> that we can't then support.

Sorry, I'm a bit confused then. You just said that we setup queues
when guest is running, but now you were saying that VM hasn't been
started yet at connect time. As far as I know, we setup queues when
the socket is connected. So, isn't it contradictory in your sayings?

> 
> > > 
> > > 2. each message (e.g. set memory table) is sent multiple times,
> > >    on the same socket.
> > 
> > Yeah, for there is a single socket opening there, it's not necessary
> > to send messages like SET_MEM_TABLE multiple times. But for other
> > messages that relate to to a specific vring, we have to send N times,
> > don't we?
> 
> We need to set up each vring, sure.
> 
> 
> > So, I'm wondering could we categorize the message in two types: vring
> > specific and none-vring specific. For vring specific, we send it N
> > times, with the vhost_dev->vq_index telling which one queue pair
> > we have interest.
> > 
> > For none-vring specific, we just send it once for first queue pair
> > (vhost_dev->queue == 0), just like what we did for tap: we launch
> > qemu-ifup/down script only for the first queue pair.
> 
> Sounds reasonable. Make this all internal to vhost user:
> no need for common vhost code to know about this distinction.

Good to know and I'll keep it in mind.

Thanks for your comments.


	--yliu
> 
> > Comments? (And sorry if I made some silly comments, as I'm pretty
> > new to this community, say just have read about 2 weeks code).
> > 
> > 	--yliu
> > 
> > > 
> > > 
> > > 
> > > >  int net_init_vhost_user(const NetClientOptions *opts, const char *name,
> > > >                          NetClientState *peer)
> > > >  {
> > > > +    uint32_t queues;
> > > >      const NetdevVhostUserOptions *vhost_user_opts;
> > > >      CharDriverState *chr;
> > > >  
> > > > @@ -243,6 +248,12 @@ int net_init_vhost_user(const NetClientOptions *opts, const char *name,
> > > >          return -1;
> > > >      }
> > > >  
> > > > +    /* number of queues for multiqueue */
> > > > +    if (vhost_user_opts->has_queues) {
> > > > +        queues = vhost_user_opts->queues;
> > > > +    } else {
> > > > +        queues = 1;
> > > > +    }
> > > >  
> > > > -    return net_vhost_user_init(peer, "vhost_user", name, chr);
> > > > +    return net_vhost_user_init(peer, "vhost_user", name, chr, queues);
> > > >  }
> > > > diff --git a/qapi-schema.json b/qapi-schema.json
> > > > index f97ffa1..51e40ce 100644
> > > > --- a/qapi-schema.json
> > > > +++ b/qapi-schema.json
> > > > @@ -2444,12 +2444,16 @@
> > > >  #
> > > >  # @vhostforce: #optional vhost on for non-MSIX virtio guests (default: false).
> > > >  #
> > > > +# @queues: #optional number of queues to be created for multiqueue vhost-user
> > > > +#          (default: 1) (Since 2.5)
> > > > +#
> > > >  # Since 2.1
> > > >  ##
> > > >  { 'struct': 'NetdevVhostUserOptions',
> > > >    'data': {
> > > >      'chardev':        'str',
> > > > -    '*vhostforce':    'bool' } }
> > > > +    '*vhostforce':    'bool',
> > > > +    '*queues':        'uint32' } }
> > > >  
> > > >  ##
> > > >  # @NetClientOptions
> > > > diff --git a/qemu-options.hx b/qemu-options.hx
> > > > index ec356f6..dad035e 100644
> > > > --- a/qemu-options.hx
> > > > +++ b/qemu-options.hx
> > > > @@ -1942,13 +1942,14 @@ The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single
> > > >  netdev.  @code{-net} and @code{-device} with parameter @option{vlan} create the
> > > >  required hub automatically.
> > > >  
> > > > -@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
> > > > +@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]
> > > >  
> > > >  Establish a vhost-user netdev, backed by a chardev @var{id}. The chardev should
> > > >  be a unix domain socket backed one. The vhost-user uses a specifically defined
> > > >  protocol to pass vhost ioctl replacement messages to an application on the other
> > > >  end of the socket. On non-MSIX guests, the feature can be forced with
> > > > -@var{vhostforce}.
> > > > +@var{vhostforce}. Use 'queues=@var{n}' to specify the number of queues to
> > > > +be created for multiqueue vhost-user.
> > > >  
> > > >  Example:
> > > >  @example
> > > > -- 
> > > > 1.8.4.2
> > > >
Yuanhan Liu Sept. 1, 2015, 12:16 p.m. UTC | #16
On Tue, Sep 01, 2015 at 12:41:49PM +0300, Michael S. Tsirkin wrote:
> On Tue, Sep 01, 2015 at 05:20:06PM +0800, Yuanhan Liu wrote:
> > On Mon, Aug 31, 2015 at 02:30:06PM +0300, Michael S. Tsirkin wrote:
> > [snip..] 
> > > > Then how about the other case:
> > > > When startup the vhost backend, user specify 8 queues, like:
> > > > Vhost-user ...  -queues 8      # Y=8
> > > > When startup the guest, user specify 4 queues, like:
> > > > Qemu-kvm ... -queues 4      # X=4
> > > > 
> > > > Vhost backend CAN support the queues number as much as the guest/qemu requires,
> > > > And qemu don't want enable all of them(8 in the case), just part of them(only 4 from 8).
> > > > Will qemu exit or go forward(of course with 4 queues)? 
> > > 
> > > I think it must go forward since guest does not have to
> > > configure all queues anyway - and that's built in to virtio spec.
> > > 
> > > Note that # of queues in actual use in fact changes dynamically
> > > too, without restaring the device.
> > 
> > May I know how do you do that? I mean, is there a public interface, like
> > by some commands? Or, by sending vhost messages?
> > 
> > 	--yliu
> 
> Using ethtool within guest. In fact, guests boot with a single queue,
> you need to use ethtool to enable mq, specifying the # of queues.

Thanks for the info.

	--yliu
Michael S. Tsirkin Sept. 1, 2015, 2:10 p.m. UTC | #17
On Tue, Sep 01, 2015 at 08:15:15PM +0800, Yuanhan Liu wrote:
> On Tue, Sep 01, 2015 at 01:07:11PM +0300, Michael S. Tsirkin wrote:
> > On Tue, Sep 01, 2015 at 05:13:50PM +0800, Yuanhan Liu wrote:
> > > On Thu, Aug 13, 2015 at 12:18:38PM +0300, Michael S. Tsirkin wrote:
> > > > On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:
> > > > > Based on patch by Nikolay Nikolaev:
> > > > > Vhost-user will implement the multi queue support in a similar way
> > > > > to what vhost already has - a separate thread for each queue.
> > > > > To enable the multi queue functionality - a new command line parameter
> > > > > "queues" is introduced for the vhost-user netdev.
> > > > > 
> > > > > The RESET_OWNER change is based on commit:
> > > > >    294ce717e0f212ed0763307f3eab72b4a1bdf4d0
> > > > > If it is reverted, the patch need update for it accordingly.
> > > > > 
> > > > > Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> > > > > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > > [snip...]
> > > > > @@ -198,7 +203,7 @@ Message types
> > > > >  
> > > > >        Id: 4
> > > > >        Equivalent ioctl: VHOST_RESET_OWNER
> > > > > -      Master payload: N/A
> > > > > +      Master payload: vring state description
> > > > >  
> > > > >        Issued when a new connection is about to be closed. The Master will no
> > > > >        longer own this connection (and will usually close it).
> > > > 
> > > > This is an interface change, isn't it?
> > > > We can't make it unconditionally, need to make it dependent
> > > > on a protocol flag.
> > > 
> > > Hi Michael,
> > > 
> > > I'm wondering why we need a payload here, as we don't do that for
> > > VHOST_SET_OWNER. I mean, stopping one or few queue pairs when a
> > > connect is about to be close doesn't make sense to me. Instead,
> > > we should clean up all queue pair when VHOST_RESET_OWNER message
> > > is received, right?
> > 
> > We really should rename VHOST_RESET_OWNER to VHOST_RESET_DEVICE.
> 
> Yeah, second that.
> 
> BTW, can we simply do the name convertion, just changing VHOST_RESET_OWNER
> to VHOST_RESET_DEVICE(or VHOST_STOP_DEVICE). I guess it's doable in
> theory as far as we don't change the number. I somehow feel it's not a
> good practice.

I think just renaming is fine, we are not changing the protocol
at all.

> Maybe we could make it as a new vhost message, and mark the old one
> as obsolete? That doesn't sound perfect, either, as it reserves a number
> for a message we will not use any more.
> 
> Also, we may rename VHOST_SET_OWNER to VHOST_INIT_DEVICE?

I think VHOST_SET_OWNER specified who's the master?

> > And I agree, I don't think it needs a payload.
> 
> Good to know.
> 
> > 
> > 
> > > > 
> > > > 
> > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > > > index 1f25cb3..9cd6c05 100644
> > > > > --- a/hw/net/vhost_net.c
> > > > > +++ b/hw/net/vhost_net.c
> > > [snip...]
> > > > >  static int net_vhost_user_init(NetClientState *peer, const char *device,
> > > > > -                               const char *name, CharDriverState *chr)
> > > > > +                               const char *name, CharDriverState *chr,
> > > > > +                               uint32_t queues)
> > > > >  {
> > > > >      NetClientState *nc;
> > > > >      VhostUserState *s;
> > > > > +    int i;
> > > > >  
> > > > > -    nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
> > > > > +    for (i = 0; i < queues; i++) {
> > > > > +        nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
> > > > >  
> > > > > -    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",
> > > > > -             chr->label);
> > > > > +        snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s",
> > > > > +                 i, chr->label);
> > > > >  
> > > > > -    s = DO_UPCAST(VhostUserState, nc, nc);
> > > > > +        s = DO_UPCAST(VhostUserState, nc, nc);
> > > > >  
> > > > > -    /* We don't provide a receive callback */
> > > > > -    s->nc.receive_disabled = 1;
> > > > > -    s->chr = chr;
> > > > > -
> > > > > -    qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s);
> > > > > +        /* We don't provide a receive callback */
> > > > > +        s->nc.receive_disabled = 1;
> > > > > +        s->chr = chr;
> > > > > +        s->nc.queue_index = i;
> > > > >  
> > > > > +        qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s);
> > > > > +    }
> > > > >      return 0;
> > > > >  }
> > > > >  
> > > > > @@ -225,6 +229,7 @@ static int net_vhost_check_net(QemuOpts *opts, void *opaque)
> > > > 
> > > > 
> > > > There are two problems here:
> > > > 
> > > > 1. we don't really know that the backend
> > > >    is able to support the requested number of queues.
> > > >    If not, everything will fail, silently.
> > > >    A new message to query the # of queues could help, though
> > > >    I'm not sure what can be done on failure. Fail connection?
> > > 
> > > What I'm thinking is we may do:
> > > 
> > > - introduce a feature flag, for indicating we support MQ or not.
> > > 
> > >   We query this flag only when # of queues given is > 1. We exit
> > >   if it not matches.
> > > 
> > > - invoke vhost_dev init repeatedly for # of queues given, unless
> > >   something wrong happened, which basically means the backend
> > >   can not support such # of queues; we then quit.
> > > 
> > >   We could, as you suggested, add an another message to query
> > >   the max # queues the backend support. However, judging we have
> > >   to check the return value of setting up a single queue pair,
> > >   which already gives feedback when the backed is not able to
> > >   support requested # of queues, we could save such message,
> > >   though it's easy to implement :)
> > 
> > Problem is, we only setup queues when device is started,
> > that is when guest is running.
> 
> So we couldn't simply invoke 'exit()', right?
> 
> > 
> > Doing this at connect would mean we don't start the VM
> > that we can't then support.
> 
> Sorry, I'm a bit confused then. You just said that we setup queues
> when guest is running, but now you were saying that VM hasn't been
> started yet at connect time. As far as I know, we setup queues when
> the socket is connected. So, isn't it contradictory in your sayings?
> 
> > 
> > > > 
> > > > 2. each message (e.g. set memory table) is sent multiple times,
> > > >    on the same socket.
> > > 
> > > Yeah, for there is a single socket opening there, it's not necessary
> > > to send messages like SET_MEM_TABLE multiple times. But for other
> > > messages that relate to to a specific vring, we have to send N times,
> > > don't we?
> > 
> > We need to set up each vring, sure.
> > 
> > 
> > > So, I'm wondering could we categorize the message in two types: vring
> > > specific and none-vring specific. For vring specific, we send it N
> > > times, with the vhost_dev->vq_index telling which one queue pair
> > > we have interest.
> > > 
> > > For none-vring specific, we just send it once for first queue pair
> > > (vhost_dev->queue == 0), just like what we did for tap: we launch
> > > qemu-ifup/down script only for the first queue pair.
> > 
> > Sounds reasonable. Make this all internal to vhost user:
> > no need for common vhost code to know about this distinction.
> 
> Good to know and I'll keep it in mind.
> 
> Thanks for your comments.
> 
> 
> 	--yliu
> > 
> > > Comments? (And sorry if I made some silly comments, as I'm pretty
> > > new to this community, say just have read about 2 weeks code).
> > > 
> > > 	--yliu
> > > 
> > > > 
> > > > 
> > > > 
> > > > >  int net_init_vhost_user(const NetClientOptions *opts, const char *name,
> > > > >                          NetClientState *peer)
> > > > >  {
> > > > > +    uint32_t queues;
> > > > >      const NetdevVhostUserOptions *vhost_user_opts;
> > > > >      CharDriverState *chr;
> > > > >  
> > > > > @@ -243,6 +248,12 @@ int net_init_vhost_user(const NetClientOptions *opts, const char *name,
> > > > >          return -1;
> > > > >      }
> > > > >  
> > > > > +    /* number of queues for multiqueue */
> > > > > +    if (vhost_user_opts->has_queues) {
> > > > > +        queues = vhost_user_opts->queues;
> > > > > +    } else {
> > > > > +        queues = 1;
> > > > > +    }
> > > > >  
> > > > > -    return net_vhost_user_init(peer, "vhost_user", name, chr);
> > > > > +    return net_vhost_user_init(peer, "vhost_user", name, chr, queues);
> > > > >  }
> > > > > diff --git a/qapi-schema.json b/qapi-schema.json
> > > > > index f97ffa1..51e40ce 100644
> > > > > --- a/qapi-schema.json
> > > > > +++ b/qapi-schema.json
> > > > > @@ -2444,12 +2444,16 @@
> > > > >  #
> > > > >  # @vhostforce: #optional vhost on for non-MSIX virtio guests (default: false).
> > > > >  #
> > > > > +# @queues: #optional number of queues to be created for multiqueue vhost-user
> > > > > +#          (default: 1) (Since 2.5)
> > > > > +#
> > > > >  # Since 2.1
> > > > >  ##
> > > > >  { 'struct': 'NetdevVhostUserOptions',
> > > > >    'data': {
> > > > >      'chardev':        'str',
> > > > > -    '*vhostforce':    'bool' } }
> > > > > +    '*vhostforce':    'bool',
> > > > > +    '*queues':        'uint32' } }
> > > > >  
> > > > >  ##
> > > > >  # @NetClientOptions
> > > > > diff --git a/qemu-options.hx b/qemu-options.hx
> > > > > index ec356f6..dad035e 100644
> > > > > --- a/qemu-options.hx
> > > > > +++ b/qemu-options.hx
> > > > > @@ -1942,13 +1942,14 @@ The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single
> > > > >  netdev.  @code{-net} and @code{-device} with parameter @option{vlan} create the
> > > > >  required hub automatically.
> > > > >  
> > > > > -@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
> > > > > +@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]
> > > > >  
> > > > >  Establish a vhost-user netdev, backed by a chardev @var{id}. The chardev should
> > > > >  be a unix domain socket backed one. The vhost-user uses a specifically defined
> > > > >  protocol to pass vhost ioctl replacement messages to an application on the other
> > > > >  end of the socket. On non-MSIX guests, the feature can be forced with
> > > > > -@var{vhostforce}.
> > > > > +@var{vhostforce}. Use 'queues=@var{n}' to specify the number of queues to
> > > > > +be created for multiqueue vhost-user.
> > > > >  
> > > > >  Example:
> > > > >  @example
> > > > > -- 
> > > > > 1.8.4.2
> > > > >
Ouyang, Changchun Sept. 2, 2015, 5:45 a.m. UTC | #18
> -----Original Message-----
> From: Michael S. Tsirkin [mailto:mst@redhat.com]
> Sent: Tuesday, September 1, 2015 10:10 PM
> To: Yuanhan Liu
> Cc: snabb-devel@googlegroups.com; thibaut.collet@6wind.com; qemu-
> devel@nongnu.org; n.nikolaev@virtualopensystems.com; luke@snabb.co;
> Long, Thomas; Ouyang, Changchun
> Subject: Re: [Qemu-devel] [PATCH v6 1/2] vhost-user: add multi queue
> support
> 
> On Tue, Sep 01, 2015 at 08:15:15PM +0800, Yuanhan Liu wrote:
> > On Tue, Sep 01, 2015 at 01:07:11PM +0300, Michael S. Tsirkin wrote:
> > > On Tue, Sep 01, 2015 at 05:13:50PM +0800, Yuanhan Liu wrote:
> > > > On Thu, Aug 13, 2015 at 12:18:38PM +0300, Michael S. Tsirkin wrote:
> > > > > On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:
> > > > > > Based on patch by Nikolay Nikolaev:
> > > > > > Vhost-user will implement the multi queue support in a similar
> > > > > > way to what vhost already has - a separate thread for each queue.
> > > > > > To enable the multi queue functionality - a new command line
> > > > > > parameter "queues" is introduced for the vhost-user netdev.
> > > > > >
> > > > > > The RESET_OWNER change is based on commit:
> > > > > >    294ce717e0f212ed0763307f3eab72b4a1bdf4d0
> > > > > > If it is reverted, the patch need update for it accordingly.
> > > > > >
> > > > > > Signed-off-by: Nikolay Nikolaev
> > > > > > <n.nikolaev@virtualopensystems.com>
> > > > > > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > > > [snip...]
> > > > > > @@ -198,7 +203,7 @@ Message types
> > > > > >
> > > > > >        Id: 4
> > > > > >        Equivalent ioctl: VHOST_RESET_OWNER
> > > > > > -      Master payload: N/A
> > > > > > +      Master payload: vring state description
> > > > > >
> > > > > >        Issued when a new connection is about to be closed. The Master
> will no
> > > > > >        longer own this connection (and will usually close it).
> > > > >
> > > > > This is an interface change, isn't it?
> > > > > We can't make it unconditionally, need to make it dependent on a
> > > > > protocol flag.
> > > >
> > > > Hi Michael,
> > > >
> > > > I'm wondering why we need a payload here, as we don't do that for
> > > > VHOST_SET_OWNER. I mean, stopping one or few queue pairs when a
> > > > connect is about to be close doesn't make sense to me. Instead, we
> > > > should clean up all queue pair when VHOST_RESET_OWNER message is
> > > > received, right?
> > >
> > > We really should rename VHOST_RESET_OWNER to
> VHOST_RESET_DEVICE.
> >
> > Yeah, second that.
> >
> > BTW, can we simply do the name convertion, just changing
> > VHOST_RESET_OWNER to VHOST_RESET_DEVICE(or VHOST_STOP_DEVICE).
> I guess
> > it's doable in theory as far as we don't change the number. I somehow
> > feel it's not a good practice.
> 
> I think just renaming is fine, we are not changing the protocol at all.

Agree, till now, VHOST_RESET_OWNER is not used pretty much, so no backward compatibility issue.
Renaming it is enough.

> 
> > Maybe we could make it as a new vhost message, and mark the old one as
> > obsolete? That doesn't sound perfect, either, as it reserves a number
> > for a message we will not use any more.
> >
> > Also, we may rename VHOST_SET_OWNER to VHOST_INIT_DEVICE?
> 
> I think VHOST_SET_OWNER specified who's the master?
> 

I think VHOST_SET_OWNER is also the message for socket, not for each vring.

> > > And I agree, I don't think it needs a payload.
> >
> > Good to know.
> >
> > >
> > >
> > > > >
> > > > >
> > > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index
> > > > > > 1f25cb3..9cd6c05 100644
> > > > > > --- a/hw/net/vhost_net.c
> > > > > > +++ b/hw/net/vhost_net.c
> > > > [snip...]
> > > > > >  static int net_vhost_user_init(NetClientState *peer, const char
> *device,
> > > > > > -                               const char *name, CharDriverState *chr)
> > > > > > +                               const char *name, CharDriverState *chr,
> > > > > > +                               uint32_t queues)
> > > > > >  {
> > > > > >      NetClientState *nc;
> > > > > >      VhostUserState *s;
> > > > > > +    int i;
> > > > > >
> > > > > > -    nc = qemu_new_net_client(&net_vhost_user_info, peer, device,
> name);
> > > > > > +    for (i = 0; i < queues; i++) {
> > > > > > +        nc = qemu_new_net_client(&net_vhost_user_info, peer,
> > > > > > + device, name);
> > > > > >
> > > > > > -    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",
> > > > > > -             chr->label);
> > > > > > +        snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d
> to %s",
> > > > > > +                 i, chr->label);
> > > > > >
> > > > > > -    s = DO_UPCAST(VhostUserState, nc, nc);
> > > > > > +        s = DO_UPCAST(VhostUserState, nc, nc);
> > > > > >
> > > > > > -    /* We don't provide a receive callback */
> > > > > > -    s->nc.receive_disabled = 1;
> > > > > > -    s->chr = chr;
> > > > > > -
> > > > > > -    qemu_chr_add_handlers(s->chr, NULL, NULL,
> net_vhost_user_event, s);
> > > > > > +        /* We don't provide a receive callback */
> > > > > > +        s->nc.receive_disabled = 1;
> > > > > > +        s->chr = chr;
> > > > > > +        s->nc.queue_index = i;
> > > > > >
> > > > > > +        qemu_chr_add_handlers(s->chr, NULL, NULL,
> net_vhost_user_event, s);
> > > > > > +    }
> > > > > >      return 0;
> > > > > >  }
> > > > > >
> > > > > > @@ -225,6 +229,7 @@ static int net_vhost_check_net(QemuOpts
> > > > > > *opts, void *opaque)
> > > > >
> > > > >
> > > > > There are two problems here:
> > > > >
> > > > > 1. we don't really know that the backend
> > > > >    is able to support the requested number of queues.
> > > > >    If not, everything will fail, silently.
> > > > >    A new message to query the # of queues could help, though
> > > > >    I'm not sure what can be done on failure. Fail connection?
> > > >
> > > > What I'm thinking is we may do:
> > > >
> > > > - introduce a feature flag, for indicating we support MQ or not.
> > > >
> > > >   We query this flag only when # of queues given is > 1. We exit
> > > >   if it not matches.
> > > >
> > > > - invoke vhost_dev init repeatedly for # of queues given, unless
> > > >   something wrong happened, which basically means the backend
> > > >   can not support such # of queues; we then quit.
> > > >
> > > >   We could, as you suggested, add an another message to query
> > > >   the max # queues the backend support. However, judging we have
> > > >   to check the return value of setting up a single queue pair,
> > > >   which already gives feedback when the backed is not able to
> > > >   support requested # of queues, we could save such message,
> > > >   though it's easy to implement :)
> > >
> > > Problem is, we only setup queues when device is started, that is
> > > when guest is running.
> >
> > So we couldn't simply invoke 'exit()', right?
> >
> > >
> > > Doing this at connect would mean we don't start the VM that we can't
> > > then support.
> >
> > Sorry, I'm a bit confused then. You just said that we setup queues
> > when guest is running, but now you were saying that VM hasn't been
> > started yet at connect time. As far as I know, we setup queues when
> > the socket is connected. So, isn't it contradictory in your sayings?
> >
> > >
> > > > >
> > > > > 2. each message (e.g. set memory table) is sent multiple times,
> > > > >    on the same socket.
> > > >
> > > > Yeah, for there is a single socket opening there, it's not
> > > > necessary to send messages like SET_MEM_TABLE multiple times. But
> > > > for other messages that relate to to a specific vring, we have to
> > > > send N times, don't we?
> > >
> > > We need to set up each vring, sure.
> > >
> > >
> > > > So, I'm wondering could we categorize the message in two types:
> > > > vring specific and none-vring specific. For vring specific, we
> > > > send it N times, with the vhost_dev->vq_index telling which one
> > > > queue pair we have interest.
> > > >
> > > > For none-vring specific, we just send it once for first queue pair
> > > > (vhost_dev->queue == 0), just like what we did for tap: we launch
> > > > qemu-ifup/down script only for the first queue pair.
> > >
> > > Sounds reasonable. Make this all internal to vhost user:
> > > no need for common vhost code to know about this distinction.
> >
> > Good to know and I'll keep it in mind.
> >
> > Thanks for your comments.
> >
> >
> > 	--yliu
> > >
> > > > Comments? (And sorry if I made some silly comments, as I'm pretty
> > > > new to this community, say just have read about 2 weeks code).
> > > >
> > > > 	--yliu
> > > >
> > > > >
> > > > >
> > > > >
> > > > > >  int net_init_vhost_user(const NetClientOptions *opts, const char
> *name,
> > > > > >                          NetClientState *peer)  {
> > > > > > +    uint32_t queues;
> > > > > >      const NetdevVhostUserOptions *vhost_user_opts;
> > > > > >      CharDriverState *chr;
> > > > > >
> > > > > > @@ -243,6 +248,12 @@ int net_init_vhost_user(const
> NetClientOptions *opts, const char *name,
> > > > > >          return -1;
> > > > > >      }
> > > > > >
> > > > > > +    /* number of queues for multiqueue */
> > > > > > +    if (vhost_user_opts->has_queues) {
> > > > > > +        queues = vhost_user_opts->queues;
> > > > > > +    } else {
> > > > > > +        queues = 1;
> > > > > > +    }
> > > > > >
> > > > > > -    return net_vhost_user_init(peer, "vhost_user", name, chr);
> > > > > > +    return net_vhost_user_init(peer, "vhost_user", name, chr,
> > > > > > + queues);
> > > > > >  }
> > > > > > diff --git a/qapi-schema.json b/qapi-schema.json index
> > > > > > f97ffa1..51e40ce 100644
> > > > > > --- a/qapi-schema.json
> > > > > > +++ b/qapi-schema.json
> > > > > > @@ -2444,12 +2444,16 @@
> > > > > >  #
> > > > > >  # @vhostforce: #optional vhost on for non-MSIX virtio guests
> (default: false).
> > > > > >  #
> > > > > > +# @queues: #optional number of queues to be created for
> multiqueue vhost-user
> > > > > > +#          (default: 1) (Since 2.5)
> > > > > > +#
> > > > > >  # Since 2.1
> > > > > >  ##
> > > > > >  { 'struct': 'NetdevVhostUserOptions',
> > > > > >    'data': {
> > > > > >      'chardev':        'str',
> > > > > > -    '*vhostforce':    'bool' } }
> > > > > > +    '*vhostforce':    'bool',
> > > > > > +    '*queues':        'uint32' } }
> > > > > >
> > > > > >  ##
> > > > > >  # @NetClientOptions
> > > > > > diff --git a/qemu-options.hx b/qemu-options.hx index
> > > > > > ec356f6..dad035e 100644
> > > > > > --- a/qemu-options.hx
> > > > > > +++ b/qemu-options.hx
> > > > > > @@ -1942,13 +1942,14 @@ The hubport netdev lets you connect a
> > > > > > NIC to a QEMU "vlan" instead of a single  netdev.  @code{-net}
> > > > > > and @code{-device} with parameter @option{vlan} create the
> required hub automatically.
> > > > > >
> > > > > > -@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
> > > > > > +@item -netdev
> > > > > > +vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]
> > > > > >
> > > > > >  Establish a vhost-user netdev, backed by a chardev @var{id}.
> > > > > > The chardev should  be a unix domain socket backed one. The
> > > > > > vhost-user uses a specifically defined  protocol to pass vhost
> > > > > > ioctl replacement messages to an application on the other  end
> > > > > > of the socket. On non-MSIX guests, the feature can be forced with -
> @var{vhostforce}.
> > > > > > +@var{vhostforce}. Use 'queues=@var{n}' to specify the number
> > > > > > +of queues to be created for multiqueue vhost-user.
> > > > > >
> > > > > >  Example:
> > > > > >  @example
> > > > > > --
> > > > > > 1.8.4.2
> > > > > >
Michael S. Tsirkin Sept. 2, 2015, 12:10 p.m. UTC | #19
On Wed, Sep 02, 2015 at 05:45:18AM +0000, Ouyang, Changchun wrote:
> 
> 
> > -----Original Message-----
> > From: Michael S. Tsirkin [mailto:mst@redhat.com]
> > Sent: Tuesday, September 1, 2015 10:10 PM
> > To: Yuanhan Liu
> > Cc: snabb-devel@googlegroups.com; thibaut.collet@6wind.com; qemu-
> > devel@nongnu.org; n.nikolaev@virtualopensystems.com; luke@snabb.co;
> > Long, Thomas; Ouyang, Changchun
> > Subject: Re: [Qemu-devel] [PATCH v6 1/2] vhost-user: add multi queue
> > support
> > 
> > On Tue, Sep 01, 2015 at 08:15:15PM +0800, Yuanhan Liu wrote:
> > > On Tue, Sep 01, 2015 at 01:07:11PM +0300, Michael S. Tsirkin wrote:
> > > > On Tue, Sep 01, 2015 at 05:13:50PM +0800, Yuanhan Liu wrote:
> > > > > On Thu, Aug 13, 2015 at 12:18:38PM +0300, Michael S. Tsirkin wrote:
> > > > > > On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:
> > > > > > > Based on patch by Nikolay Nikolaev:
> > > > > > > Vhost-user will implement the multi queue support in a similar
> > > > > > > way to what vhost already has - a separate thread for each queue.
> > > > > > > To enable the multi queue functionality - a new command line
> > > > > > > parameter "queues" is introduced for the vhost-user netdev.
> > > > > > >
> > > > > > > The RESET_OWNER change is based on commit:
> > > > > > >    294ce717e0f212ed0763307f3eab72b4a1bdf4d0
> > > > > > > If it is reverted, the patch need update for it accordingly.
> > > > > > >
> > > > > > > Signed-off-by: Nikolay Nikolaev
> > > > > > > <n.nikolaev@virtualopensystems.com>
> > > > > > > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > > > > [snip...]
> > > > > > > @@ -198,7 +203,7 @@ Message types
> > > > > > >
> > > > > > >        Id: 4
> > > > > > >        Equivalent ioctl: VHOST_RESET_OWNER
> > > > > > > -      Master payload: N/A
> > > > > > > +      Master payload: vring state description
> > > > > > >
> > > > > > >        Issued when a new connection is about to be closed. The Master
> > will no
> > > > > > >        longer own this connection (and will usually close it).
> > > > > >
> > > > > > This is an interface change, isn't it?
> > > > > > We can't make it unconditionally, need to make it dependent on a
> > > > > > protocol flag.
> > > > >
> > > > > Hi Michael,
> > > > >
> > > > > I'm wondering why we need a payload here, as we don't do that for
> > > > > VHOST_SET_OWNER. I mean, stopping one or few queue pairs when a
> > > > > connect is about to be close doesn't make sense to me. Instead, we
> > > > > should clean up all queue pair when VHOST_RESET_OWNER message is
> > > > > received, right?
> > > >
> > > > We really should rename VHOST_RESET_OWNER to
> > VHOST_RESET_DEVICE.
> > >
> > > Yeah, second that.
> > >
> > > BTW, can we simply do the name convertion, just changing
> > > VHOST_RESET_OWNER to VHOST_RESET_DEVICE(or VHOST_STOP_DEVICE).
> > I guess
> > > it's doable in theory as far as we don't change the number. I somehow
> > > feel it's not a good practice.
> > 
> > I think just renaming is fine, we are not changing the protocol at all.
> 
> Agree, till now, VHOST_RESET_OWNER is not used pretty much, so no backward compatibility issue.
> Renaming it is enough.
> 
> > 
> > > Maybe we could make it as a new vhost message, and mark the old one as
> > > obsolete? That doesn't sound perfect, either, as it reserves a number
> > > for a message we will not use any more.
> > >
> > > Also, we may rename VHOST_SET_OWNER to VHOST_INIT_DEVICE?
> > 
> > I think VHOST_SET_OWNER specified who's the master?
> > 
> 
> I think VHOST_SET_OWNER is also the message for socket, not for each vring.

Exactly. Documentation says it sets the sender as the master of the socket.

> > > > And I agree, I don't think it needs a payload.
> > >
> > > Good to know.
> > >
> > > >
> > > >
> > > > > >
> > > > > >
> > > > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index
> > > > > > > 1f25cb3..9cd6c05 100644
> > > > > > > --- a/hw/net/vhost_net.c
> > > > > > > +++ b/hw/net/vhost_net.c
> > > > > [snip...]
> > > > > > >  static int net_vhost_user_init(NetClientState *peer, const char
> > *device,
> > > > > > > -                               const char *name, CharDriverState *chr)
> > > > > > > +                               const char *name, CharDriverState *chr,
> > > > > > > +                               uint32_t queues)
> > > > > > >  {
> > > > > > >      NetClientState *nc;
> > > > > > >      VhostUserState *s;
> > > > > > > +    int i;
> > > > > > >
> > > > > > > -    nc = qemu_new_net_client(&net_vhost_user_info, peer, device,
> > name);
> > > > > > > +    for (i = 0; i < queues; i++) {
> > > > > > > +        nc = qemu_new_net_client(&net_vhost_user_info, peer,
> > > > > > > + device, name);
> > > > > > >
> > > > > > > -    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",
> > > > > > > -             chr->label);
> > > > > > > +        snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d
> > to %s",
> > > > > > > +                 i, chr->label);
> > > > > > >
> > > > > > > -    s = DO_UPCAST(VhostUserState, nc, nc);
> > > > > > > +        s = DO_UPCAST(VhostUserState, nc, nc);
> > > > > > >
> > > > > > > -    /* We don't provide a receive callback */
> > > > > > > -    s->nc.receive_disabled = 1;
> > > > > > > -    s->chr = chr;
> > > > > > > -
> > > > > > > -    qemu_chr_add_handlers(s->chr, NULL, NULL,
> > net_vhost_user_event, s);
> > > > > > > +        /* We don't provide a receive callback */
> > > > > > > +        s->nc.receive_disabled = 1;
> > > > > > > +        s->chr = chr;
> > > > > > > +        s->nc.queue_index = i;
> > > > > > >
> > > > > > > +        qemu_chr_add_handlers(s->chr, NULL, NULL,
> > net_vhost_user_event, s);
> > > > > > > +    }
> > > > > > >      return 0;
> > > > > > >  }
> > > > > > >
> > > > > > > @@ -225,6 +229,7 @@ static int net_vhost_check_net(QemuOpts
> > > > > > > *opts, void *opaque)
> > > > > >
> > > > > >
> > > > > > There are two problems here:
> > > > > >
> > > > > > 1. we don't really know that the backend
> > > > > >    is able to support the requested number of queues.
> > > > > >    If not, everything will fail, silently.
> > > > > >    A new message to query the # of queues could help, though
> > > > > >    I'm not sure what can be done on failure. Fail connection?
> > > > >
> > > > > What I'm thinking is we may do:
> > > > >
> > > > > - introduce a feature flag, for indicating we support MQ or not.
> > > > >
> > > > >   We query this flag only when # of queues given is > 1. We exit
> > > > >   if it not matches.
> > > > >
> > > > > - invoke vhost_dev init repeatedly for # of queues given, unless
> > > > >   something wrong happened, which basically means the backend
> > > > >   can not support such # of queues; we then quit.
> > > > >
> > > > >   We could, as you suggested, add an another message to query
> > > > >   the max # queues the backend support. However, judging we have
> > > > >   to check the return value of setting up a single queue pair,
> > > > >   which already gives feedback when the backed is not able to
> > > > >   support requested # of queues, we could save such message,
> > > > >   though it's easy to implement :)
> > > >
> > > > Problem is, we only setup queues when device is started, that is
> > > > when guest is running.
> > >
> > > So we couldn't simply invoke 'exit()', right?
> > >
> > > >
> > > > Doing this at connect would mean we don't start the VM that we can't
> > > > then support.
> > >
> > > Sorry, I'm a bit confused then. You just said that we setup queues
> > > when guest is running, but now you were saying that VM hasn't been
> > > started yet at connect time. As far as I know, we setup queues when
> > > the socket is connected. So, isn't it contradictory in your sayings?
> > >
> > > >
> > > > > >
> > > > > > 2. each message (e.g. set memory table) is sent multiple times,
> > > > > >    on the same socket.
> > > > >
> > > > > Yeah, for there is a single socket opening there, it's not
> > > > > necessary to send messages like SET_MEM_TABLE multiple times. But
> > > > > for other messages that relate to to a specific vring, we have to
> > > > > send N times, don't we?
> > > >
> > > > We need to set up each vring, sure.
> > > >
> > > >
> > > > > So, I'm wondering could we categorize the message in two types:
> > > > > vring specific and none-vring specific. For vring specific, we
> > > > > send it N times, with the vhost_dev->vq_index telling which one
> > > > > queue pair we have interest.
> > > > >
> > > > > For none-vring specific, we just send it once for first queue pair
> > > > > (vhost_dev->queue == 0), just like what we did for tap: we launch
> > > > > qemu-ifup/down script only for the first queue pair.
> > > >
> > > > Sounds reasonable. Make this all internal to vhost user:
> > > > no need for common vhost code to know about this distinction.
> > >
> > > Good to know and I'll keep it in mind.
> > >
> > > Thanks for your comments.
> > >
> > >
> > > 	--yliu
> > > >
> > > > > Comments? (And sorry if I made some silly comments, as I'm pretty
> > > > > new to this community, say just have read about 2 weeks code).
> > > > >
> > > > > 	--yliu
> > > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > > >  int net_init_vhost_user(const NetClientOptions *opts, const char
> > *name,
> > > > > > >                          NetClientState *peer)  {
> > > > > > > +    uint32_t queues;
> > > > > > >      const NetdevVhostUserOptions *vhost_user_opts;
> > > > > > >      CharDriverState *chr;
> > > > > > >
> > > > > > > @@ -243,6 +248,12 @@ int net_init_vhost_user(const
> > NetClientOptions *opts, const char *name,
> > > > > > >          return -1;
> > > > > > >      }
> > > > > > >
> > > > > > > +    /* number of queues for multiqueue */
> > > > > > > +    if (vhost_user_opts->has_queues) {
> > > > > > > +        queues = vhost_user_opts->queues;
> > > > > > > +    } else {
> > > > > > > +        queues = 1;
> > > > > > > +    }
> > > > > > >
> > > > > > > -    return net_vhost_user_init(peer, "vhost_user", name, chr);
> > > > > > > +    return net_vhost_user_init(peer, "vhost_user", name, chr,
> > > > > > > + queues);
> > > > > > >  }
> > > > > > > diff --git a/qapi-schema.json b/qapi-schema.json index
> > > > > > > f97ffa1..51e40ce 100644
> > > > > > > --- a/qapi-schema.json
> > > > > > > +++ b/qapi-schema.json
> > > > > > > @@ -2444,12 +2444,16 @@
> > > > > > >  #
> > > > > > >  # @vhostforce: #optional vhost on for non-MSIX virtio guests
> > (default: false).
> > > > > > >  #
> > > > > > > +# @queues: #optional number of queues to be created for
> > multiqueue vhost-user
> > > > > > > +#          (default: 1) (Since 2.5)
> > > > > > > +#
> > > > > > >  # Since 2.1
> > > > > > >  ##
> > > > > > >  { 'struct': 'NetdevVhostUserOptions',
> > > > > > >    'data': {
> > > > > > >      'chardev':        'str',
> > > > > > > -    '*vhostforce':    'bool' } }
> > > > > > > +    '*vhostforce':    'bool',
> > > > > > > +    '*queues':        'uint32' } }
> > > > > > >
> > > > > > >  ##
> > > > > > >  # @NetClientOptions
> > > > > > > diff --git a/qemu-options.hx b/qemu-options.hx index
> > > > > > > ec356f6..dad035e 100644
> > > > > > > --- a/qemu-options.hx
> > > > > > > +++ b/qemu-options.hx
> > > > > > > @@ -1942,13 +1942,14 @@ The hubport netdev lets you connect a
> > > > > > > NIC to a QEMU "vlan" instead of a single  netdev.  @code{-net}
> > > > > > > and @code{-device} with parameter @option{vlan} create the
> > required hub automatically.
> > > > > > >
> > > > > > > -@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
> > > > > > > +@item -netdev
> > > > > > > +vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]
> > > > > > >
> > > > > > >  Establish a vhost-user netdev, backed by a chardev @var{id}.
> > > > > > > The chardev should  be a unix domain socket backed one. The
> > > > > > > vhost-user uses a specifically defined  protocol to pass vhost
> > > > > > > ioctl replacement messages to an application on the other  end
> > > > > > > of the socket. On non-MSIX guests, the feature can be forced with -
> > @var{vhostforce}.
> > > > > > > +@var{vhostforce}. Use 'queues=@var{n}' to specify the number
> > > > > > > +of queues to be created for multiqueue vhost-user.
> > > > > > >
> > > > > > >  Example:
> > > > > > >  @example
> > > > > > > --
> > > > > > > 1.8.4.2
> > > > > > >
Marcel Apfelbaum Sept. 7, 2015, 11:07 a.m. UTC | #20
On 08/13/2015 12:18 PM, Michael S. Tsirkin wrote:
> On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:
>> Based on patch by Nikolay Nikolaev:
>> Vhost-user will implement the multi queue support in a similar way
>> to what vhost already has - a separate thread for each queue.
>> To enable the multi queue functionality - a new command line parameter
>> "queues" is introduced for the vhost-user netdev.
>>
>> The RESET_OWNER change is based on commit:
>>     294ce717e0f212ed0763307f3eab72b4a1bdf4d0
>> If it is reverted, the patch need update for it accordingly.
>>
>> Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>> ---
>> Changes since v5:
>>   - fix the message descption for VHOST_RESET_OWNER in vhost-user txt
>>
>> Changes since v4:
>>   - remove the unnecessary trailing '\n'
>>
>> Changes since v3:
>>   - fix one typo and wrap one long line
>>
>> Changes since v2:
>>   - fix vq index issue for set_vring_call
>>     When it is the case of VHOST_SET_VRING_CALL, The vq_index is not initialized before it is used,
>>     thus it could be a random value. The random value leads to crash in vhost after passing down
>>     to vhost, as vhost use this random value to index an array index.
>>   - fix the typo in the doc and description
>>   - address vq index for reset_owner
>>
>> Changes since v1:
>>   - use s->nc.info_str when bringing up/down the backend
>>
>>   docs/specs/vhost-user.txt |  7 ++++++-
>>   hw/net/vhost_net.c        |  3 ++-
>>   hw/virtio/vhost-user.c    | 11 ++++++++++-
>>   net/vhost-user.c          | 37 ++++++++++++++++++++++++-------------
>>   qapi-schema.json          |  6 +++++-
>>   qemu-options.hx           |  5 +++--
>>   6 files changed, 50 insertions(+), 19 deletions(-)
>>

[...]

>
>
> There are two problems here:
>
> 1. we don't really know that the backend
>     is able to support the requested number of queues.
>     If not, everything will fail, silently.
>     A new message to query the # of queues could help, though
>     I'm not sure what can be done on failure. Fail connection?
>
> 2. each message (e.g. set memory table) is sent multiple times,
>     on the same socket.

Hi,

Actually each queue has its own vhost_dev device which in turn has his own
memory mappings. Because of this VHOST_SET_MEM_TABLE should be sent for each queue.

Should we change it to VHOST_SET_VRING_MEM_TABLE? Or maybe I got this wrong...

Thanks,
Marcel




>
>
>

[...]
Michael S. Tsirkin Sept. 7, 2015, 12:26 p.m. UTC | #21
On Mon, Sep 07, 2015 at 02:07:38PM +0300, Marcel Apfelbaum wrote:
> On 08/13/2015 12:18 PM, Michael S. Tsirkin wrote:
> >On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:
> >>Based on patch by Nikolay Nikolaev:
> >>Vhost-user will implement the multi queue support in a similar way
> >>to what vhost already has - a separate thread for each queue.
> >>To enable the multi queue functionality - a new command line parameter
> >>"queues" is introduced for the vhost-user netdev.
> >>
> >>The RESET_OWNER change is based on commit:
> >>    294ce717e0f212ed0763307f3eab72b4a1bdf4d0
> >>If it is reverted, the patch need update for it accordingly.
> >>
> >>Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> >>Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >>---
> >>Changes since v5:
> >>  - fix the message descption for VHOST_RESET_OWNER in vhost-user txt
> >>
> >>Changes since v4:
> >>  - remove the unnecessary trailing '\n'
> >>
> >>Changes since v3:
> >>  - fix one typo and wrap one long line
> >>
> >>Changes since v2:
> >>  - fix vq index issue for set_vring_call
> >>    When it is the case of VHOST_SET_VRING_CALL, The vq_index is not initialized before it is used,
> >>    thus it could be a random value. The random value leads to crash in vhost after passing down
> >>    to vhost, as vhost use this random value to index an array index.
> >>  - fix the typo in the doc and description
> >>  - address vq index for reset_owner
> >>
> >>Changes since v1:
> >>  - use s->nc.info_str when bringing up/down the backend
> >>
> >>  docs/specs/vhost-user.txt |  7 ++++++-
> >>  hw/net/vhost_net.c        |  3 ++-
> >>  hw/virtio/vhost-user.c    | 11 ++++++++++-
> >>  net/vhost-user.c          | 37 ++++++++++++++++++++++++-------------
> >>  qapi-schema.json          |  6 +++++-
> >>  qemu-options.hx           |  5 +++--
> >>  6 files changed, 50 insertions(+), 19 deletions(-)
> >>
> 
> [...]
> 
> >
> >
> >There are two problems here:
> >
> >1. we don't really know that the backend
> >    is able to support the requested number of queues.
> >    If not, everything will fail, silently.
> >    A new message to query the # of queues could help, though
> >    I'm not sure what can be done on failure. Fail connection?
> >
> >2. each message (e.g. set memory table) is sent multiple times,
> >    on the same socket.
> 
> Hi,
> 
> Actually each queue has its own vhost_dev device which in turn has his own
> memory mappings. Because of this VHOST_SET_MEM_TABLE should be sent for each queue.
> 
> Should we change it to VHOST_SET_VRING_MEM_TABLE? Or maybe I got this wrong...
> 
> Thanks,
> Marcel
> 

You got it wrong, the table is the same for all rings.

> 
> 
> >
> >
> >
> 
> [...]
Marcel Apfelbaum Sept. 7, 2015, 1:08 p.m. UTC | #22
On 09/07/2015 03:26 PM, Michael S. Tsirkin wrote:
> On Mon, Sep 07, 2015 at 02:07:38PM +0300, Marcel Apfelbaum wrote:
>> On 08/13/2015 12:18 PM, Michael S. Tsirkin wrote:
>>> On Wed, Aug 12, 2015 at 02:25:41PM +0800, Ouyang Changchun wrote:
>>>> Based on patch by Nikolay Nikolaev:
>>>> Vhost-user will implement the multi queue support in a similar way
>>>> to what vhost already has - a separate thread for each queue.
>>>> To enable the multi queue functionality - a new command line parameter
>>>> "queues" is introduced for the vhost-user netdev.
>>>>
>>>> The RESET_OWNER change is based on commit:
>>>>     294ce717e0f212ed0763307f3eab72b4a1bdf4d0
>>>> If it is reverted, the patch need update for it accordingly.
>>>>
>>>> Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
>>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>>> ---
>>>> Changes since v5:
>>>>   - fix the message descption for VHOST_RESET_OWNER in vhost-user txt
>>>>
>>>> Changes since v4:
>>>>   - remove the unnecessary trailing '\n'
>>>>
>>>> Changes since v3:
>>>>   - fix one typo and wrap one long line
>>>>
>>>> Changes since v2:
>>>>   - fix vq index issue for set_vring_call
>>>>     When it is the case of VHOST_SET_VRING_CALL, The vq_index is not initialized before it is used,
>>>>     thus it could be a random value. The random value leads to crash in vhost after passing down
>>>>     to vhost, as vhost use this random value to index an array index.
>>>>   - fix the typo in the doc and description
>>>>   - address vq index for reset_owner
>>>>
>>>> Changes since v1:
>>>>   - use s->nc.info_str when bringing up/down the backend
>>>>
>>>>   docs/specs/vhost-user.txt |  7 ++++++-
>>>>   hw/net/vhost_net.c        |  3 ++-
>>>>   hw/virtio/vhost-user.c    | 11 ++++++++++-
>>>>   net/vhost-user.c          | 37 ++++++++++++++++++++++++-------------
>>>>   qapi-schema.json          |  6 +++++-
>>>>   qemu-options.hx           |  5 +++--
>>>>   6 files changed, 50 insertions(+), 19 deletions(-)
>>>>
>>
>> [...]
>>
>>>
>>>
>>> There are two problems here:
>>>
>>> 1. we don't really know that the backend
>>>     is able to support the requested number of queues.
>>>     If not, everything will fail, silently.
>>>     A new message to query the # of queues could help, though
>>>     I'm not sure what can be done on failure. Fail connection?
>>>
>>> 2. each message (e.g. set memory table) is sent multiple times,
>>>     on the same socket.
>>
>> Hi,
>>
>> Actually each queue has its own vhost_dev device which in turn has his own
>> memory mappings. Because of this VHOST_SET_MEM_TABLE should be sent for each queue.
>>
>> Should we change it to VHOST_SET_VRING_MEM_TABLE? Or maybe I got this wrong...
>>
>> Thanks,
>> Marcel
>>
>
> You got it wrong, the table is the same for all rings.

OK, thanks. So the backend, in this case DPDK, maps it different per each queue.
Here is an example for 2 queues:

  VHOST_CONFIG: read message VHOST_USER_SET_MEM_TABLE
  VHOST_CONFIG: mapped region 0 fd:165 to 0x2aac40000000 sz:0xa0000 off:0x0
  VHOST_CONFIG: mapped region 1 fd:166 to 0x2aac80000000 sz:0x80000000 off:0xc0000
  VHOST_CONFIG: read message VHOST_USER_SET_VRING_NUM
  VHOST_CONFIG: read message VHOST_USER_SET_VRING_BASE
  VHOST_CONFIG: read message VHOST_USER_SET_VRING_ADDR
  VHOST_CONFIG: read message VHOST_USER_SET_VRING_KICK
  VHOST_CONFIG: vring kick idx:0 file:167 VHOST_CONFIG: virtio isn't ready for processing.
  VHOST_CONFIG: read message VHOST_USER_SET_VRING_NUM
  VHOST_CONFIG: read message VHOST_USER_SET_VRING_BASE
  VHOST_CONFIG: read message VHOST_USER_SET_VRING_ADDR
  VHOST_CONFIG: read message VHOST_USER_SET_VRING_KICK
  VHOST_CONFIG: vring kick idx:1 file:168
  VHOST_CONFIG: virtio isn't ready for processing.
  VHOST_CONFIG: read message VHOST_USER_SET_FEATURES
  VHOST_CONFIG: read message VHOST_USER_SET_MEM_TABLE
  VHOST_CONFIG: mapped region 0 fd:169 to 0x2aad00000000 sz:0xa0000 off:0x0
  VHOST_CONFIG: mapped region 1 fd:170 to 0x2aad40000000 sz:0x80000000 off:0xc0000
  VHOST_CONFIG: read message VHOST_USER_SET_VRING_NUM
  VHOST_CONFIG: read message VHOST_USER_SET_VRING_BASE
  VHOST_CONFIG: read message VHOST_USER_SET_VRING_ADDR
  VHOST_CONFIG: read message VHOST_USER_SET_VRING_KICK
  VHOST_CONFIG: vring kick idx:2 file:171
  VHOST_CONFIG: virtio isn't ready for processing.
  VHOST_CONFIG: read message VHOST_USER_SET_VRING_NUM
  VHOST_CONFIG: read message VHOST_USER_SET_VRING_BASE
  VHOST_CONFIG: read message VHOST_USER_SET_VRING_ADDR
  VHOST_CONFIG: read message VHOST_USER_SET_VRING_KICK
  VHOST_CONFIG: vring kick idx:3 file:172
  VHOST_CONFIG: virtio is now ready for processing.

DPDK expects this message per queue and maps it to a different address.
Trying to send it only once will leave the second queue without the mapping.

I'll try to fix this on dpdk side.

Thanks,
Marcel


>
>>
>>
>>>
>>>
>>>
>>
>> [...]
diff mbox

Patch

diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt
index 70da3b1..9390f89 100644
--- a/docs/specs/vhost-user.txt
+++ b/docs/specs/vhost-user.txt
@@ -135,6 +135,11 @@  As older slaves don't support negotiating protocol features,
 a feature bit was dedicated for this purpose:
 #define VHOST_USER_F_PROTOCOL_FEATURES 30
 
+Multi queue support
+-------------------
+The protocol supports multiple queues by setting all index fields in the sent
+messages to a properly calculated value.
+
 Message types
 -------------
 
@@ -198,7 +203,7 @@  Message types
 
       Id: 4
       Equivalent ioctl: VHOST_RESET_OWNER
-      Master payload: N/A
+      Master payload: vring state description
 
       Issued when a new connection is about to be closed. The Master will no
       longer own this connection (and will usually close it).
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 1f25cb3..9cd6c05 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -159,6 +159,7 @@  struct vhost_net *vhost_net_init(VhostNetOptions *options)
 
     net->dev.nvqs = 2;
     net->dev.vqs = net->vqs;
+    net->dev.vq_index = net->nc->queue_index;
 
     r = vhost_dev_init(&net->dev, options->opaque,
                        options->backend_type, options->force);
@@ -269,7 +270,7 @@  static void vhost_net_stop_one(struct vhost_net *net,
         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
             const VhostOps *vhost_ops = net->dev.vhost_ops;
             int r = vhost_ops->vhost_call(&net->dev, VHOST_RESET_OWNER,
-                                          NULL);
+                                          &file);
             assert(r >= 0);
         }
     }
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 27ba035..fb11d4c 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -219,7 +219,12 @@  static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
         break;
 
     case VHOST_USER_SET_OWNER:
+        break;
+
     case VHOST_USER_RESET_OWNER:
+        memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
+        msg.state.index += dev->vq_index;
+        msg.size = sizeof(m.state);
         break;
 
     case VHOST_USER_SET_MEM_TABLE:
@@ -262,17 +267,20 @@  static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
     case VHOST_USER_SET_VRING_NUM:
     case VHOST_USER_SET_VRING_BASE:
         memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
+        msg.state.index += dev->vq_index;
         msg.size = sizeof(m.state);
         break;
 
     case VHOST_USER_GET_VRING_BASE:
         memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
+        msg.state.index += dev->vq_index;
         msg.size = sizeof(m.state);
         need_reply = 1;
         break;
 
     case VHOST_USER_SET_VRING_ADDR:
         memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr));
+        msg.addr.index += dev->vq_index;
         msg.size = sizeof(m.addr);
         break;
 
@@ -280,7 +288,7 @@  static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
     case VHOST_USER_SET_VRING_CALL:
     case VHOST_USER_SET_VRING_ERR:
         file = arg;
-        msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
+        msg.u64 = (file->index + dev->vq_index) & VHOST_USER_VRING_IDX_MASK;
         msg.size = sizeof(m.u64);
         if (ioeventfd_enabled() && file->fd > 0) {
             fds[fd_num++] = file->fd;
@@ -322,6 +330,7 @@  static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
                 error_report("Received bad msg size.");
                 return -1;
             }
+            msg.state.index -= dev->vq_index;
             memcpy(arg, &msg.state, sizeof(struct vhost_vring_state));
             break;
         default:
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 1d86a2b..904d8af 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -121,35 +121,39 @@  static void net_vhost_user_event(void *opaque, int event)
     case CHR_EVENT_OPENED:
         vhost_user_start(s);
         net_vhost_link_down(s, false);
-        error_report("chardev \"%s\" went up", s->chr->label);
+        error_report("chardev \"%s\" went up", s->nc.info_str);
         break;
     case CHR_EVENT_CLOSED:
         net_vhost_link_down(s, true);
         vhost_user_stop(s);
-        error_report("chardev \"%s\" went down", s->chr->label);
+        error_report("chardev \"%s\" went down", s->nc.info_str);
         break;
     }
 }
 
 static int net_vhost_user_init(NetClientState *peer, const char *device,
-                               const char *name, CharDriverState *chr)
+                               const char *name, CharDriverState *chr,
+                               uint32_t queues)
 {
     NetClientState *nc;
     VhostUserState *s;
+    int i;
 
-    nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
+    for (i = 0; i < queues; i++) {
+        nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
 
-    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",
-             chr->label);
+        snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s",
+                 i, chr->label);
 
-    s = DO_UPCAST(VhostUserState, nc, nc);
+        s = DO_UPCAST(VhostUserState, nc, nc);
 
-    /* We don't provide a receive callback */
-    s->nc.receive_disabled = 1;
-    s->chr = chr;
-
-    qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s);
+        /* We don't provide a receive callback */
+        s->nc.receive_disabled = 1;
+        s->chr = chr;
+        s->nc.queue_index = i;
 
+        qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s);
+    }
     return 0;
 }
 
@@ -225,6 +229,7 @@  static int net_vhost_check_net(QemuOpts *opts, void *opaque)
 int net_init_vhost_user(const NetClientOptions *opts, const char *name,
                         NetClientState *peer)
 {
+    uint32_t queues;
     const NetdevVhostUserOptions *vhost_user_opts;
     CharDriverState *chr;
 
@@ -243,6 +248,12 @@  int net_init_vhost_user(const NetClientOptions *opts, const char *name,
         return -1;
     }
 
+    /* number of queues for multiqueue */
+    if (vhost_user_opts->has_queues) {
+        queues = vhost_user_opts->queues;
+    } else {
+        queues = 1;
+    }
 
-    return net_vhost_user_init(peer, "vhost_user", name, chr);
+    return net_vhost_user_init(peer, "vhost_user", name, chr, queues);
 }
diff --git a/qapi-schema.json b/qapi-schema.json
index f97ffa1..51e40ce 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2444,12 +2444,16 @@ 
 #
 # @vhostforce: #optional vhost on for non-MSIX virtio guests (default: false).
 #
+# @queues: #optional number of queues to be created for multiqueue vhost-user
+#          (default: 1) (Since 2.5)
+#
 # Since 2.1
 ##
 { 'struct': 'NetdevVhostUserOptions',
   'data': {
     'chardev':        'str',
-    '*vhostforce':    'bool' } }
+    '*vhostforce':    'bool',
+    '*queues':        'uint32' } }
 
 ##
 # @NetClientOptions
diff --git a/qemu-options.hx b/qemu-options.hx
index ec356f6..dad035e 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1942,13 +1942,14 @@  The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single
 netdev.  @code{-net} and @code{-device} with parameter @option{vlan} create the
 required hub automatically.
 
-@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
+@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]
 
 Establish a vhost-user netdev, backed by a chardev @var{id}. The chardev should
 be a unix domain socket backed one. The vhost-user uses a specifically defined
 protocol to pass vhost ioctl replacement messages to an application on the other
 end of the socket. On non-MSIX guests, the feature can be forced with
-@var{vhostforce}.
+@var{vhostforce}. Use 'queues=@var{n}' to specify the number of queues to
+be created for multiqueue vhost-user.
 
 Example:
 @example