diff mbox series

[v15,3/8] net/vmnet: implement shared mode (vmnet-shared)

Message ID 20220225171402.64861-4-Vladislav.Yaroshchuk@jetbrains.com
State New
Headers show
Series Add vmnet.framework based network backend | expand

Commit Message

Vladislav Yaroshchuk Feb. 25, 2022, 5:13 p.m. UTC
Interaction with vmnet.framework in different modes
differs only on configuration stage, so we can create
common `send`, `receive`, etc. procedures and reuse them.

vmnet.framework supports iov, but writing more than
one iov into vmnet interface fails with
'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
one and passing it to vmnet works fine. That's the
reason why receive_iov() left unimplemented. But it still
works with good enough performance having .receive()
net/vmnet: implement shared mode (vmnet-shared)

Interaction with vmnet.framework in different modes
differs only on configuration stage, so we can create
common `send`, `receive`, etc. procedures and reuse them.

vmnet.framework supports iov, but writing more than
one iov into vmnet interface fails with
'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
one and passing it to vmnet works fine. That's the
reason why receive_iov() left unimplemented. But it still
works with good enough performance having .receive()
implemented only.

Also, there is no way to unsubscribe from vmnet packages
receiving except registering and unregistering event
callback or simply drop packages just ignoring and
not processing them when related flag is set. Here we do
using the second way.

Signed-off-by: Phillip Tennen <phillip@axleos.com>
Signed-off-by: Vladislav Yaroshchuk <Vladislav.Yaroshchuk@jetbrains.com>
---
 net/vmnet-common.m | 302 +++++++++++++++++++++++++++++++++++++++++++++
 net/vmnet-shared.c |  94 +++++++++++++-
 net/vmnet_int.h    |  39 +++++-
 3 files changed, 430 insertions(+), 5 deletions(-)

Comments

Akihiko Odaki Feb. 25, 2022, 5:46 p.m. UTC | #1
On 2022/02/26 2:13, Vladislav Yaroshchuk wrote:
> Interaction with vmnet.framework in different modes
> differs only on configuration stage, so we can create
> common `send`, `receive`, etc. procedures and reuse them.
> 
> vmnet.framework supports iov, but writing more than
> one iov into vmnet interface fails with
> 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
> one and passing it to vmnet works fine. That's the
> reason why receive_iov() left unimplemented. But it still
> works with good enough performance having .receive()
> net/vmnet: implement shared mode (vmnet-shared)
> 
> Interaction with vmnet.framework in different modes
> differs only on configuration stage, so we can create
> common `send`, `receive`, etc. procedures and reuse them.
> 
> vmnet.framework supports iov, but writing more than
> one iov into vmnet interface fails with
> 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
> one and passing it to vmnet works fine. That's the
> reason why receive_iov() left unimplemented. But it still
> works with good enough performance having .receive()
> implemented only.
> 
> Also, there is no way to unsubscribe from vmnet packages
> receiving except registering and unregistering event
> callback or simply drop packages just ignoring and
> not processing them when related flag is set. Here we do
> using the second way.
> 
> Signed-off-by: Phillip Tennen <phillip@axleos.com>
> Signed-off-by: Vladislav Yaroshchuk <Vladislav.Yaroshchuk@jetbrains.com>

Thank you for persistently working on this.

> ---
>   net/vmnet-common.m | 302 +++++++++++++++++++++++++++++++++++++++++++++
>   net/vmnet-shared.c |  94 +++++++++++++-
>   net/vmnet_int.h    |  39 +++++-
>   3 files changed, 430 insertions(+), 5 deletions(-)
> 
> diff --git a/net/vmnet-common.m b/net/vmnet-common.m
> index 56612c72ce..2f70921cae 100644
> --- a/net/vmnet-common.m
> +++ b/net/vmnet-common.m
> @@ -10,6 +10,8 @@
>    */
>   
>   #include "qemu/osdep.h"
> +#include "qemu/main-loop.h"
> +#include "qemu/log.h"
>   #include "qapi/qapi-types-net.h"
>   #include "vmnet_int.h"
>   #include "clients.h"
> @@ -17,4 +19,304 @@
>   #include "qapi/error.h"
>   
>   #include <vmnet/vmnet.h>
> +#include <dispatch/dispatch.h>
>   
> +
> +static inline void vmnet_set_send_bh_scheduled(VmnetCommonState *s,
> +                                               bool enable)
> +{
> +    qatomic_set(&s->send_scheduled, enable);
> +}
> +
> +
> +static inline bool vmnet_is_send_bh_scheduled(VmnetCommonState *s)
> +{
> +    return qatomic_load_acquire(&s->send_scheduled);
> +}
> +
> +
> +static inline void vmnet_set_send_enabled(VmnetCommonState *s,
> +                                          bool enable)
> +{
> +    if (enable) {
> +        vmnet_interface_set_event_callback(
> +            s->vmnet_if,
> +            VMNET_INTERFACE_PACKETS_AVAILABLE,
> +            s->if_queue,
> +            ^(interface_event_t event_id, xpc_object_t event) {
> +                assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE);
> +                /*
> +                 * This function is being called from a non qemu thread, so
> +                 * we only schedule a BH, and do the rest of the io completion
> +                 * handling from vmnet_send_bh() which runs in a qemu context.
> +                 *
> +                 * Avoid scheduling multiple bottom halves
> +                 */
> +                if (!vmnet_is_send_bh_scheduled(s)) {
> +                    vmnet_set_send_bh_scheduled(s, true);

It can be interrupted between vmnet_is_send_bh_scheduled and 
vmnet_set_send_bh_scheduled, which leads to data race.

> +                    qemu_bh_schedule(s->send_bh);
> +                }
> +            });
> +    } else {
> +        vmnet_interface_set_event_callback(
> +            s->vmnet_if,
> +            VMNET_INTERFACE_PACKETS_AVAILABLE,
> +            NULL,
> +            NULL);
> +    }
> +}
> +
> +
> +static void vmnet_send_completed(NetClientState *nc, ssize_t len)
> +{
> +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
> +    vmnet_set_send_enabled(s, true);
> +}
> +
> +
> +static void vmnet_send_bh(void *opaque)
> +{
> +    NetClientState *nc = (NetClientState *) opaque;
> +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
> +
> +    struct iovec *iov = s->iov_buf;
> +    struct vmpktdesc *packets = s->packets_buf;
> +    int pkt_cnt;
> +    int i;
> +
> +    vmnet_return_t status;
> +    ssize_t size;
> +
> +    /* read as many packets as present */
> +    pkt_cnt = VMNET_PACKETS_LIMIT;

There could be more than VMNET_PACKETS_LIMIT. You may call vmnet_read in 
a loop.

> +    for (i = 0; i < pkt_cnt; ++i) {
> +        packets[i].vm_pkt_size = s->max_packet_size;
> +        packets[i].vm_pkt_iovcnt = 1;
> +        packets[i].vm_flags = 0;
> +    }
> +
> +    status = vmnet_read(s->vmnet_if, packets, &pkt_cnt);
> +    if (status != VMNET_SUCCESS) {
> +        error_printf("vmnet: read failed: %s\n",
> +                     vmnet_status_map_str(status));
> +        goto done;
> +    }
> +
> +    for (i = 0; i < pkt_cnt; ++i) {
> +        size = qemu_send_packet_async(nc,
> +                                      iov[i].iov_base,
> +                                      packets[i].vm_pkt_size,
> +                                      vmnet_send_completed);
> +        if (size == 0) {
> +            vmnet_set_send_enabled(s, false);
> +            goto done;
> +        } else if (size < 0) {
> +            break;
> +        }

goto is not needed here. "break" when size <= 0.

> +    }
> +
> +done:
> +    vmnet_set_send_bh_scheduled(s, false);
> +}
> +
> +
> +static void vmnet_bufs_init(VmnetCommonState *s)
> +{
> +    struct vmpktdesc *packets = s->packets_buf;
> +    struct iovec *iov = s->iov_buf;
> +    int i;
> +
> +    for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
> +        iov[i].iov_len = s->max_packet_size;
> +        iov[i].iov_base = g_malloc0(iov[i].iov_len);
> +        packets[i].vm_pkt_iov = iov + i;
> +    }
> +}
> +
> +
> +const char *vmnet_status_map_str(vmnet_return_t status)
> +{
> +    switch (status) {
> +    case VMNET_SUCCESS:
> +        return "success";
> +    case VMNET_FAILURE:
> +        return "general failure (possibly not enough privileges)";
> +    case VMNET_MEM_FAILURE:
> +        return "memory allocation failure";
> +    case VMNET_INVALID_ARGUMENT:
> +        return "invalid argument specified";
> +    case VMNET_SETUP_INCOMPLETE:
> +        return "interface setup is not complete";
> +    case VMNET_INVALID_ACCESS:
> +        return "invalid access, permission denied";
> +    case VMNET_PACKET_TOO_BIG:
> +        return "packet size is larger than MTU";
> +    case VMNET_BUFFER_EXHAUSTED:
> +        return "buffers exhausted in kernel";
> +    case VMNET_TOO_MANY_PACKETS:
> +        return "packet count exceeds limit";
> +#if defined(MAC_OS_VERSION_11_0) && \
> +    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
> +    case VMNET_SHARING_SERVICE_BUSY:
> +        return "conflict, sharing service is in use";
> +#endif
> +    default:
> +        return "unknown vmnet error";
> +    }
> +}
> +
> +
> +int vmnet_if_create(NetClientState *nc,
> +                    xpc_object_t if_desc,
> +                    Error **errp)
> +{
> +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);;

Duplicate semicolons.

> +    dispatch_semaphore_t if_created_sem = dispatch_semaphore_create(0);

if_created_sem leaks.

> +    __block vmnet_return_t if_status;
> +
> +    s->if_queue = dispatch_queue_create(
> +        "org.qemu.vmnet.if_queue",
> +        DISPATCH_QUEUE_SERIAL
> +    );
> +
> +    xpc_dictionary_set_bool(
> +        if_desc,
> +        vmnet_allocate_mac_address_key,
> +        false
> +    );
> +#ifdef DEBUG
> +    qemu_log("vmnet.start.interface_desc:\n");
> +    xpc_dictionary_apply(if_desc,
> +                         ^bool(const char *k, xpc_object_t v) {
> +                             char *desc = xpc_copy_description(v);
> +                             qemu_log("  %s=%s\n", k, desc);
> +                             free(desc);
> +                             return true;
> +                         });
> +#endif /* DEBUG */
> +
> +    s->vmnet_if = vmnet_start_interface(
> +        if_desc,
> +        s->if_queue,
> +        ^(vmnet_return_t status, xpc_object_t interface_param) {
> +            if_status = status;
> +            if (status != VMNET_SUCCESS || !interface_param) {
> +                dispatch_semaphore_signal(if_created_sem);
> +                return;
> +            }
> +
> +#ifdef DEBUG
> +            qemu_log("vmnet.start.interface_param:\n");
> +            xpc_dictionary_apply(interface_param,
> +                                 ^bool(const char *k, xpc_object_t v) {
> +                                     char *desc = xpc_copy_description(v);
> +                                     qemu_log("  %s=%s\n", k, desc);
> +                                     free(desc);
> +                                     return true;
> +                                 });
> +#endif /* DEBUG */
> +
> +            s->mtu = xpc_dictionary_get_uint64(
> +                interface_param,
> +                vmnet_mtu_key);
> +            s->max_packet_size = xpc_dictionary_get_uint64(
> +                interface_param,
> +                vmnet_max_packet_size_key);
> +
> +            dispatch_semaphore_signal(if_created_sem);
> +        });
> +
> +    if (s->vmnet_if == NULL) {
> +        error_setg(errp,
> +                   "unable to create interface "
> +                   "with requested params");

You don't need line breaks here. Breaking a string into a few would also 
makes it a bit hard to grep.

> +        return -1;

s->if_queue leaks.

> +    }
> +
> +    dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER);
> +
> +    if (if_status != VMNET_SUCCESS) {
> +        error_setg(errp,
> +                   "cannot create vmnet interface: %s",
> +                   vmnet_status_map_str(if_status));
> +        return -1;
> +    }
> +
> +    s->send_bh = aio_bh_new(qemu_get_aio_context(), vmnet_send_bh, nc);
> +    vmnet_bufs_init(s);
> +    vmnet_set_send_bh_scheduled(s, false);
> +    vmnet_set_send_enabled(s, true);
> +    return 0;
> +}
> +
> +
> +ssize_t vmnet_receive_common(NetClientState *nc,
> +                             const uint8_t *buf,
> +                             size_t size)
> +{
> +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
> +    struct vmpktdesc packet;
> +    struct iovec iov;
> +    int pkt_cnt;
> +    vmnet_return_t if_status;
> +
> +    if (size > s->max_packet_size) {
> +        warn_report("vmnet: packet is too big, %zu > %llu\n",

Use PRIu64.

> +                    packet.vm_pkt_size,
> +                    s->max_packet_size);
> +        return -1;
> +    }
> +
> +    iov.iov_base = (char *) buf;
> +    iov.iov_len = size;
> +
> +    packet.vm_pkt_iovcnt = 1;
> +    packet.vm_flags = 0;
> +    packet.vm_pkt_size = size;
> +    packet.vm_pkt_iov = &iov;
> +    pkt_cnt = 1;
> +
> +    if_status = vmnet_write(s->vmnet_if, &packet, &pkt_cnt);
> +    if (if_status != VMNET_SUCCESS) {
> +        error_report("vmnet: write error: %s\n",
> +                     vmnet_status_map_str(if_status));

Why don't return -1?

> +    }
> +
> +    if (if_status == VMNET_SUCCESS && pkt_cnt) {
> +        return size;
> +    }
> +    return 0;
> +}
> +
> +
> +void vmnet_cleanup_common(NetClientState *nc)
> +{
> +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);;

Duplicate semicolons.

> +    dispatch_semaphore_t if_created_sem;
> +
> +    qemu_purge_queued_packets(nc); > +    vmnet_set_send_bh_scheduled(s, true);
> +    vmnet_set_send_enabled(s, false);
> +
> +    if (s->vmnet_if == NULL) {
> +        return;
> +    }
> +
> +    if_created_sem = dispatch_semaphore_create(0);
> +    vmnet_stop_interface(
> +        s->vmnet_if,
> +        s->if_queue,
> +        ^(vmnet_return_t status) {
> +            assert(status == VMNET_SUCCESS);
> +            dispatch_semaphore_signal(if_created_sem);
> +        });
> +    dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER);
> +
> +    qemu_bh_delete(s->send_bh);
> +    dispatch_release(if_created_sem);
> +    dispatch_release(s->if_queue);
> +
> +    for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
> +        g_free(s->iov_buf[i].iov_base);
> +    }
> +}
> diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c
> index f07afaaf21..66f66c034b 100644
> --- a/net/vmnet-shared.c
> +++ b/net/vmnet-shared.c
> @@ -10,16 +10,102 @@
>   
>   #include "qemu/osdep.h"
>   #include "qapi/qapi-types-net.h"
> +#include "qapi/error.h"
>   #include "vmnet_int.h"
>   #include "clients.h"
> -#include "qemu/error-report.h"
> -#include "qapi/error.h"
>   
>   #include <vmnet/vmnet.h>
>   
> +typedef struct VmnetSharedState {
> +    VmnetCommonState cs;
> +} VmnetSharedState;
> +
> +
> +static bool validate_options(const Netdev *netdev, Error **errp)
> +{
> +    const NetdevVmnetSharedOptions *options = &(netdev->u.vmnet_shared);
> +
> +#if !defined(MAC_OS_VERSION_11_0) || \
> +    MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0
> +    if (options->has_isolated) {
> +        error_setg(errp,
> +                   "vmnet-shared.isolated feature is "
> +                   "unavailable: outdated vmnet.framework API");
> +        return false;
> +    }
> +#endif
> +
> +    if ((options->has_start_address ||
> +         options->has_end_address ||
> +         options->has_subnet_mask) &&
> +        !(options->has_start_address &&
> +          options->has_end_address &&
> +          options->has_subnet_mask)) {
> +        error_setg(errp,
> +                   "'start-address', 'end-address', 'subnet-mask' "
> +                   "should be provided together"
> +        );
> +        return false;
> +    }
> +
> +    return true;
> +}
> +
> +static xpc_object_t build_if_desc(const Netdev *netdev)
> +{
> +    const NetdevVmnetSharedOptions *options = &(netdev->u.vmnet_shared);
> +    xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, 0);
> +
> +    xpc_dictionary_set_uint64(
> +        if_desc,
> +        vmnet_operation_mode_key,
> +        VMNET_SHARED_MODE
> +    );
> +
> +    if (options->has_nat66_prefix) {
> +        xpc_dictionary_set_string(if_desc,
> +                                  vmnet_nat66_prefix_key,
> +                                  options->nat66_prefix);
> +    }
> +
> +    if (options->has_start_address) {
> +        xpc_dictionary_set_string(if_desc,
> +                                  vmnet_start_address_key,
> +                                  options->start_address);
> +        xpc_dictionary_set_string(if_desc,
> +                                  vmnet_end_address_key,
> +                                  options->end_address);
> +        xpc_dictionary_set_string(if_desc,
> +                                  vmnet_subnet_mask_key,
> +                                  options->subnet_mask);
> +    }
> +
> +#if defined(MAC_OS_VERSION_11_0) && \
> +    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
> +    xpc_dictionary_set_bool(
> +        if_desc,
> +        vmnet_enable_isolation_key,
> +        options->isolated
> +    );
> +#endif
> +
> +    return if_desc;
> +}
> +
> +static NetClientInfo net_vmnet_shared_info = {
> +    .type = NET_CLIENT_DRIVER_VMNET_SHARED,
> +    .size = sizeof(VmnetSharedState),
> +    .receive = vmnet_receive_common,
> +    .cleanup = vmnet_cleanup_common,
> +};
> +
>   int net_init_vmnet_shared(const Netdev *netdev, const char *name,
>                             NetClientState *peer, Error **errp)
>   {
> -  error_setg(errp, "vmnet-shared is not implemented yet");
> -  return -1;
> +    NetClientState *nc = qemu_new_net_client(&net_vmnet_shared_info,
> +                                             peer, "vmnet-shared", name);
> +    if (!validate_options(netdev, errp)) {
> +        g_assert_not_reached();

g_assert_not_reached is for debugging purpose and may be dropped 
depending on the build option.

> +    }
> +    return vmnet_if_create(nc, build_if_desc(netdev), errp);
>   }
> diff --git a/net/vmnet_int.h b/net/vmnet_int.h
> index aac4d5af64..acfe3a88c0 100644
> --- a/net/vmnet_int.h
> +++ b/net/vmnet_int.h
> @@ -15,11 +15,48 @@
>   #include "clients.h"
>   
>   #include <vmnet/vmnet.h>
> +#include <dispatch/dispatch.h>
> +
> +/**
> + *  From vmnet.framework documentation
> + *
> + *  Each read/write call allows up to 200 packets to be
> + *  read or written for a maximum of 256KB.
> + *
> + *  Each packet written should be a complete
> + *  ethernet frame.
> + *
> + *  https://developer.apple.com/documentation/vmnet
> + */
> +#define VMNET_PACKETS_LIMIT 200
>   
>   typedef struct VmnetCommonState {
> -  NetClientState nc;
> +    NetClientState nc;
> +    interface_ref vmnet_if;
> +
> +    bool send_scheduled;
>   
> +    uint64_t mtu;
> +    uint64_t max_packet_size;
> +
> +    struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT];
> +    struct iovec iov_buf[VMNET_PACKETS_LIMIT];
> +
> +    dispatch_queue_t if_queue;
> +
> +    QEMUBH *send_bh;
>   } VmnetCommonState;
>   
> +const char *vmnet_status_map_str(vmnet_return_t status);
> +
> +int vmnet_if_create(NetClientState *nc,
> +                    xpc_object_t if_desc,
> +                    Error **errp);
> +
> +ssize_t vmnet_receive_common(NetClientState *nc,
> +                             const uint8_t *buf,
> +                             size_t size);
> +
> +void vmnet_cleanup_common(NetClientState *nc);
>   
>   #endif /* VMNET_INT_H */
Vladislav Yaroshchuk Feb. 26, 2022, 8:37 a.m. UTC | #2
Hi Akihiko,

On Fri, Feb 25, 2022 at 8:46 PM Akihiko Odaki <akihiko.odaki@gmail.com>
wrote:

> On 2022/02/26 2:13, Vladislav Yaroshchuk wrote:
> > Interaction with vmnet.framework in different modes
> > differs only on configuration stage, so we can create
> > common `send`, `receive`, etc. procedures and reuse them.
> >
> > vmnet.framework supports iov, but writing more than
> > one iov into vmnet interface fails with
> > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
> > one and passing it to vmnet works fine. That's the
> > reason why receive_iov() left unimplemented. But it still
> > works with good enough performance having .receive()
> > net/vmnet: implement shared mode (vmnet-shared)
> >
> > Interaction with vmnet.framework in different modes
> > differs only on configuration stage, so we can create
> > common `send`, `receive`, etc. procedures and reuse them.
> >
> > vmnet.framework supports iov, but writing more than
> > one iov into vmnet interface fails with
> > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
> > one and passing it to vmnet works fine. That's the
> > reason why receive_iov() left unimplemented. But it still
> > works with good enough performance having .receive()
> > implemented only.
> >
> > Also, there is no way to unsubscribe from vmnet packages
> > receiving except registering and unregistering event
> > callback or simply drop packages just ignoring and
> > not processing them when related flag is set. Here we do
> > using the second way.
> >
> > Signed-off-by: Phillip Tennen <phillip@axleos.com>
> > Signed-off-by: Vladislav Yaroshchuk <Vladislav.Yaroshchuk@jetbrains.com>
>
> Thank you for persistently working on this.
>
> > ---
> >   net/vmnet-common.m | 302 +++++++++++++++++++++++++++++++++++++++++++++
> >   net/vmnet-shared.c |  94 +++++++++++++-
> >   net/vmnet_int.h    |  39 +++++-
> >   3 files changed, 430 insertions(+), 5 deletions(-)
> >
> > diff --git a/net/vmnet-common.m b/net/vmnet-common.m
> > index 56612c72ce..2f70921cae 100644
> > --- a/net/vmnet-common.m
> > +++ b/net/vmnet-common.m
> > @@ -10,6 +10,8 @@
> >    */
> >
> >   #include "qemu/osdep.h"
> > +#include "qemu/main-loop.h"
> > +#include "qemu/log.h"
> >   #include "qapi/qapi-types-net.h"
> >   #include "vmnet_int.h"
> >   #include "clients.h"
> > @@ -17,4 +19,304 @@
> >   #include "qapi/error.h"
> >
> >   #include <vmnet/vmnet.h>
> > +#include <dispatch/dispatch.h>
> >
> > +
> > +static inline void vmnet_set_send_bh_scheduled(VmnetCommonState *s,
> > +                                               bool enable)
> > +{
> > +    qatomic_set(&s->send_scheduled, enable);
> > +}
> > +
> > +
> > +static inline bool vmnet_is_send_bh_scheduled(VmnetCommonState *s)
> > +{
> > +    return qatomic_load_acquire(&s->send_scheduled);
> > +}
> > +
> > +
> > +static inline void vmnet_set_send_enabled(VmnetCommonState *s,
> > +                                          bool enable)
> > +{
> > +    if (enable) {
> > +        vmnet_interface_set_event_callback(
> > +            s->vmnet_if,
> > +            VMNET_INTERFACE_PACKETS_AVAILABLE,
> > +            s->if_queue,
> > +            ^(interface_event_t event_id, xpc_object_t event) {
> > +                assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE);
> > +                /*
> > +                 * This function is being called from a non qemu
> thread, so
> > +                 * we only schedule a BH, and do the rest of the io
> completion
> > +                 * handling from vmnet_send_bh() which runs in a qemu
> context.
> > +                 *
> > +                 * Avoid scheduling multiple bottom halves
> > +                 */
> > +                if (!vmnet_is_send_bh_scheduled(s)) {
> > +                    vmnet_set_send_bh_scheduled(s, true);
>
> It can be interrupted between vmnet_is_send_bh_scheduled and
> vmnet_set_send_bh_scheduled, which leads to data race.
>
>
Sorry, I did not clearly understand what you meant. Since this
callback (block) is submitted on DISPATCH_QUEUE_SERIAL,
only one instance of the callback will be executed at any
moment of time.
https://developer.apple.com/documentation/dispatch/dispatch_queue_serial

Also this is the only place where we schedule a bottom half.

After we set the 'send_scheduled' flag, all the other
callback  blocks will do nothing (skip the if block) until
the bottom half is executed and reset 'send_scheduled'.
I don't see any races here :(

Correct me if I'm wrong please.


> > +                    qemu_bh_schedule(s->send_bh);
> > +                }
> > +            });
> > +    } else {
> > +        vmnet_interface_set_event_callback(
> > +            s->vmnet_if,
> > +            VMNET_INTERFACE_PACKETS_AVAILABLE,
> > +            NULL,
> > +            NULL);
> > +    }
> > +}
> > +
> > +
> > +static void vmnet_send_completed(NetClientState *nc, ssize_t len)
> > +{
> > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
> > +    vmnet_set_send_enabled(s, true);
> > +}
> > +
> > +
> > +static void vmnet_send_bh(void *opaque)
> > +{
> > +    NetClientState *nc = (NetClientState *) opaque;
> > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
> > +
> > +    struct iovec *iov = s->iov_buf;
> > +    struct vmpktdesc *packets = s->packets_buf;
> > +    int pkt_cnt;
> > +    int i;
> > +
> > +    vmnet_return_t status;
> > +    ssize_t size;
> > +
> > +    /* read as many packets as present */
> > +    pkt_cnt = VMNET_PACKETS_LIMIT;
>
> There could be more than VMNET_PACKETS_LIMIT. You may call vmnet_read in
> a loop.
>
> > +    for (i = 0; i < pkt_cnt; ++i) {
> > +        packets[i].vm_pkt_size = s->max_packet_size;
> > +        packets[i].vm_pkt_iovcnt = 1;
> > +        packets[i].vm_flags = 0;
> > +    }
> > +
> > +    status = vmnet_read(s->vmnet_if, packets, &pkt_cnt);
> > +    if (status != VMNET_SUCCESS) {
> > +        error_printf("vmnet: read failed: %s\n",
> > +                     vmnet_status_map_str(status));
> > +        goto done;
> > +    }
> > +
> > +    for (i = 0; i < pkt_cnt; ++i) {
> > +        size = qemu_send_packet_async(nc,
> > +                                      iov[i].iov_base,
> > +                                      packets[i].vm_pkt_size,
> > +                                      vmnet_send_completed);
> > +        if (size == 0) {
> > +            vmnet_set_send_enabled(s, false);
> > +            goto done;
> > +        } else if (size < 0) {
> > +            break;
> > +        }
>
> goto is not needed here. "break" when size <= 0.
>
> > +    }
> > +
> > +done:
> > +    vmnet_set_send_bh_scheduled(s, false);
> > +}
> > +
> > +
> > +static void vmnet_bufs_init(VmnetCommonState *s)
> > +{
> > +    struct vmpktdesc *packets = s->packets_buf;
> > +    struct iovec *iov = s->iov_buf;
> > +    int i;
> > +
> > +    for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
> > +        iov[i].iov_len = s->max_packet_size;
> > +        iov[i].iov_base = g_malloc0(iov[i].iov_len);
> > +        packets[i].vm_pkt_iov = iov + i;
> > +    }
> > +}
> > +
> > +
> > +const char *vmnet_status_map_str(vmnet_return_t status)
> > +{
> > +    switch (status) {
> > +    case VMNET_SUCCESS:
> > +        return "success";
> > +    case VMNET_FAILURE:
> > +        return "general failure (possibly not enough privileges)";
> > +    case VMNET_MEM_FAILURE:
> > +        return "memory allocation failure";
> > +    case VMNET_INVALID_ARGUMENT:
> > +        return "invalid argument specified";
> > +    case VMNET_SETUP_INCOMPLETE:
> > +        return "interface setup is not complete";
> > +    case VMNET_INVALID_ACCESS:
> > +        return "invalid access, permission denied";
> > +    case VMNET_PACKET_TOO_BIG:
> > +        return "packet size is larger than MTU";
> > +    case VMNET_BUFFER_EXHAUSTED:
> > +        return "buffers exhausted in kernel";
> > +    case VMNET_TOO_MANY_PACKETS:
> > +        return "packet count exceeds limit";
> > +#if defined(MAC_OS_VERSION_11_0) && \
> > +    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
> > +    case VMNET_SHARING_SERVICE_BUSY:
> > +        return "conflict, sharing service is in use";
> > +#endif
> > +    default:
> > +        return "unknown vmnet error";
> > +    }
> > +}
> > +
> > +
> > +int vmnet_if_create(NetClientState *nc,
> > +                    xpc_object_t if_desc,
> > +                    Error **errp)
> > +{
> > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);;
>
> Duplicate semicolons.
>
> > +    dispatch_semaphore_t if_created_sem = dispatch_semaphore_create(0);
>
> if_created_sem leaks.
>
> > +    __block vmnet_return_t if_status;
> > +
> > +    s->if_queue = dispatch_queue_create(
> > +        "org.qemu.vmnet.if_queue",
> > +        DISPATCH_QUEUE_SERIAL
> > +    );
> > +
> > +    xpc_dictionary_set_bool(
> > +        if_desc,
> > +        vmnet_allocate_mac_address_key,
> > +        false
> > +    );
> > +#ifdef DEBUG
> > +    qemu_log("vmnet.start.interface_desc:\n");
> > +    xpc_dictionary_apply(if_desc,
> > +                         ^bool(const char *k, xpc_object_t v) {
> > +                             char *desc = xpc_copy_description(v);
> > +                             qemu_log("  %s=%s\n", k, desc);
> > +                             free(desc);
> > +                             return true;
> > +                         });
> > +#endif /* DEBUG */
> > +
> > +    s->vmnet_if = vmnet_start_interface(
> > +        if_desc,
> > +        s->if_queue,
> > +        ^(vmnet_return_t status, xpc_object_t interface_param) {
> > +            if_status = status;
> > +            if (status != VMNET_SUCCESS || !interface_param) {
> > +                dispatch_semaphore_signal(if_created_sem);
> > +                return;
> > +            }
> > +
> > +#ifdef DEBUG
> > +            qemu_log("vmnet.start.interface_param:\n");
> > +            xpc_dictionary_apply(interface_param,
> > +                                 ^bool(const char *k, xpc_object_t v) {
> > +                                     char *desc =
> xpc_copy_description(v);
> > +                                     qemu_log("  %s=%s\n", k, desc);
> > +                                     free(desc);
> > +                                     return true;
> > +                                 });
> > +#endif /* DEBUG */
> > +
> > +            s->mtu = xpc_dictionary_get_uint64(
> > +                interface_param,
> > +                vmnet_mtu_key);
> > +            s->max_packet_size = xpc_dictionary_get_uint64(
> > +                interface_param,
> > +                vmnet_max_packet_size_key);
> > +
> > +            dispatch_semaphore_signal(if_created_sem);
> > +        });
> > +
> > +    if (s->vmnet_if == NULL) {
> > +        error_setg(errp,
> > +                   "unable to create interface "
> > +                   "with requested params");
>
> You don't need line breaks here. Breaking a string into a few would also
> makes it a bit hard to grep.
>
> > +        return -1;
>
> s->if_queue leaks.
>
> > +    }
> > +
> > +    dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER);
> > +
> > +    if (if_status != VMNET_SUCCESS) {
> > +        error_setg(errp,
> > +                   "cannot create vmnet interface: %s",
> > +                   vmnet_status_map_str(if_status));
> > +        return -1;
> > +    }
> > +
> > +    s->send_bh = aio_bh_new(qemu_get_aio_context(), vmnet_send_bh, nc);
> > +    vmnet_bufs_init(s);
> > +    vmnet_set_send_bh_scheduled(s, false);
> > +    vmnet_set_send_enabled(s, true);
> > +    return 0;
> > +}
> > +
> > +
> > +ssize_t vmnet_receive_common(NetClientState *nc,
> > +                             const uint8_t *buf,
> > +                             size_t size)
> > +{
> > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
> > +    struct vmpktdesc packet;
> > +    struct iovec iov;
> > +    int pkt_cnt;
> > +    vmnet_return_t if_status;
> > +
> > +    if (size > s->max_packet_size) {
> > +        warn_report("vmnet: packet is too big, %zu > %llu\n",
>
> Use PRIu64.
>
> > +                    packet.vm_pkt_size,
> > +                    s->max_packet_size);
> > +        return -1;
> > +    }
> > +
> > +    iov.iov_base = (char *) buf;
> > +    iov.iov_len = size;
> > +
> > +    packet.vm_pkt_iovcnt = 1;
> > +    packet.vm_flags = 0;
> > +    packet.vm_pkt_size = size;
> > +    packet.vm_pkt_iov = &iov;
> > +    pkt_cnt = 1;
> > +
> > +    if_status = vmnet_write(s->vmnet_if, &packet, &pkt_cnt);
> > +    if (if_status != VMNET_SUCCESS) {
> > +        error_report("vmnet: write error: %s\n",
> > +                     vmnet_status_map_str(if_status));
>
> Why don't return -1?
>
> > +    }
> > +
> > +    if (if_status == VMNET_SUCCESS && pkt_cnt) {
> > +        return size;
> > +    }
> > +    return 0;
> > +}
> > +
> > +
> > +void vmnet_cleanup_common(NetClientState *nc)
> > +{
> > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);;
>
> Duplicate semicolons.
>
> > +    dispatch_semaphore_t if_created_sem;
> > +
> > +    qemu_purge_queued_packets(nc); > +
> vmnet_set_send_bh_scheduled(s, true);
> > +    vmnet_set_send_enabled(s, false);
> > +
> > +    if (s->vmnet_if == NULL) {
> > +        return;
> > +    }
> > +
> > +    if_created_sem = dispatch_semaphore_create(0);
> > +    vmnet_stop_interface(
> > +        s->vmnet_if,
> > +        s->if_queue,
> > +        ^(vmnet_return_t status) {
> > +            assert(status == VMNET_SUCCESS);
> > +            dispatch_semaphore_signal(if_created_sem);
> > +        });
> > +    dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER);
> > +
> > +    qemu_bh_delete(s->send_bh);
> > +    dispatch_release(if_created_sem);
> > +    dispatch_release(s->if_queue);
> > +
> > +    for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
> > +        g_free(s->iov_buf[i].iov_base);
> > +    }
> > +}
> > diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c
> > index f07afaaf21..66f66c034b 100644
> > --- a/net/vmnet-shared.c
> > +++ b/net/vmnet-shared.c
> > @@ -10,16 +10,102 @@
> >
> >   #include "qemu/osdep.h"
> >   #include "qapi/qapi-types-net.h"
> > +#include "qapi/error.h"
> >   #include "vmnet_int.h"
> >   #include "clients.h"
> > -#include "qemu/error-report.h"
> > -#include "qapi/error.h"
> >
> >   #include <vmnet/vmnet.h>
> >
> > +typedef struct VmnetSharedState {
> > +    VmnetCommonState cs;
> > +} VmnetSharedState;
> > +
> > +
> > +static bool validate_options(const Netdev *netdev, Error **errp)
> > +{
> > +    const NetdevVmnetSharedOptions *options = &(netdev->u.vmnet_shared);
> > +
> > +#if !defined(MAC_OS_VERSION_11_0) || \
> > +    MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0
> > +    if (options->has_isolated) {
> > +        error_setg(errp,
> > +                   "vmnet-shared.isolated feature is "
> > +                   "unavailable: outdated vmnet.framework API");
> > +        return false;
> > +    }
> > +#endif
> > +
> > +    if ((options->has_start_address ||
> > +         options->has_end_address ||
> > +         options->has_subnet_mask) &&
> > +        !(options->has_start_address &&
> > +          options->has_end_address &&
> > +          options->has_subnet_mask)) {
> > +        error_setg(errp,
> > +                   "'start-address', 'end-address', 'subnet-mask' "
> > +                   "should be provided together"
> > +        );
> > +        return false;
> > +    }
> > +
> > +    return true;
> > +}
> > +
> > +static xpc_object_t build_if_desc(const Netdev *netdev)
> > +{
> > +    const NetdevVmnetSharedOptions *options = &(netdev->u.vmnet_shared);
> > +    xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, 0);
> > +
> > +    xpc_dictionary_set_uint64(
> > +        if_desc,
> > +        vmnet_operation_mode_key,
> > +        VMNET_SHARED_MODE
> > +    );
> > +
> > +    if (options->has_nat66_prefix) {
> > +        xpc_dictionary_set_string(if_desc,
> > +                                  vmnet_nat66_prefix_key,
> > +                                  options->nat66_prefix);
> > +    }
> > +
> > +    if (options->has_start_address) {
> > +        xpc_dictionary_set_string(if_desc,
> > +                                  vmnet_start_address_key,
> > +                                  options->start_address);
> > +        xpc_dictionary_set_string(if_desc,
> > +                                  vmnet_end_address_key,
> > +                                  options->end_address);
> > +        xpc_dictionary_set_string(if_desc,
> > +                                  vmnet_subnet_mask_key,
> > +                                  options->subnet_mask);
> > +    }
> > +
> > +#if defined(MAC_OS_VERSION_11_0) && \
> > +    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
> > +    xpc_dictionary_set_bool(
> > +        if_desc,
> > +        vmnet_enable_isolation_key,
> > +        options->isolated
> > +    );
> > +#endif
> > +
> > +    return if_desc;
> > +}
> > +
> > +static NetClientInfo net_vmnet_shared_info = {
> > +    .type = NET_CLIENT_DRIVER_VMNET_SHARED,
> > +    .size = sizeof(VmnetSharedState),
> > +    .receive = vmnet_receive_common,
> > +    .cleanup = vmnet_cleanup_common,
> > +};
> > +
> >   int net_init_vmnet_shared(const Netdev *netdev, const char *name,
> >                             NetClientState *peer, Error **errp)
> >   {
> > -  error_setg(errp, "vmnet-shared is not implemented yet");
> > -  return -1;
> > +    NetClientState *nc = qemu_new_net_client(&net_vmnet_shared_info,
> > +                                             peer, "vmnet-shared",
> name);
> > +    if (!validate_options(netdev, errp)) {
> > +        g_assert_not_reached();
>
> g_assert_not_reached is for debugging purpose and may be dropped
> depending on the build option.
>
> > +    }
> > +    return vmnet_if_create(nc, build_if_desc(netdev), errp);
> >   }
> > diff --git a/net/vmnet_int.h b/net/vmnet_int.h
> > index aac4d5af64..acfe3a88c0 100644
> > --- a/net/vmnet_int.h
> > +++ b/net/vmnet_int.h
> > @@ -15,11 +15,48 @@
> >   #include "clients.h"
> >
> >   #include <vmnet/vmnet.h>
> > +#include <dispatch/dispatch.h>
> > +
> > +/**
> > + *  From vmnet.framework documentation
> > + *
> > + *  Each read/write call allows up to 200 packets to be
> > + *  read or written for a maximum of 256KB.
> > + *
> > + *  Each packet written should be a complete
> > + *  ethernet frame.
> > + *
> > + *  https://developer.apple.com/documentation/vmnet
> > + */
> > +#define VMNET_PACKETS_LIMIT 200
> >
> >   typedef struct VmnetCommonState {
> > -  NetClientState nc;
> > +    NetClientState nc;
> > +    interface_ref vmnet_if;
> > +
> > +    bool send_scheduled;
> >
> > +    uint64_t mtu;
> > +    uint64_t max_packet_size;
> > +
> > +    struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT];
> > +    struct iovec iov_buf[VMNET_PACKETS_LIMIT];
> > +
> > +    dispatch_queue_t if_queue;
> > +
> > +    QEMUBH *send_bh;
> >   } VmnetCommonState;
> >
> > +const char *vmnet_status_map_str(vmnet_return_t status);
> > +
> > +int vmnet_if_create(NetClientState *nc,
> > +                    xpc_object_t if_desc,
> > +                    Error **errp);
> > +
> > +ssize_t vmnet_receive_common(NetClientState *nc,
> > +                             const uint8_t *buf,
> > +                             size_t size);
> > +
> > +void vmnet_cleanup_common(NetClientState *nc);
> >
> >   #endif /* VMNET_INT_H */
>
>
Other issues will be fixed and submitted later,
thank you!

Regards,
Vladislav Yaroshchuk
Akihiko Odaki Feb. 26, 2022, 9:16 a.m. UTC | #3
On 2022/02/26 17:37, Vladislav Yaroshchuk wrote:
> 
> Hi Akihiko,
> 
> On Fri, Feb 25, 2022 at 8:46 PM Akihiko Odaki <akihiko.odaki@gmail.com 
> <mailto:akihiko.odaki@gmail.com>> wrote:
> 
>     On 2022/02/26 2:13, Vladislav Yaroshchuk wrote:
>      > Interaction with vmnet.framework in different modes
>      > differs only on configuration stage, so we can create
>      > common `send`, `receive`, etc. procedures and reuse them.
>      >
>      > vmnet.framework supports iov, but writing more than
>      > one iov into vmnet interface fails with
>      > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
>      > one and passing it to vmnet works fine. That's the
>      > reason why receive_iov() left unimplemented. But it still
>      > works with good enough performance having .receive()
>      > net/vmnet: implement shared mode (vmnet-shared)
>      >
>      > Interaction with vmnet.framework in different modes
>      > differs only on configuration stage, so we can create
>      > common `send`, `receive`, etc. procedures and reuse them.
>      >
>      > vmnet.framework supports iov, but writing more than
>      > one iov into vmnet interface fails with
>      > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
>      > one and passing it to vmnet works fine. That's the
>      > reason why receive_iov() left unimplemented. But it still
>      > works with good enough performance having .receive()
>      > implemented only.
>      >
>      > Also, there is no way to unsubscribe from vmnet packages
>      > receiving except registering and unregistering event
>      > callback or simply drop packages just ignoring and
>      > not processing them when related flag is set. Here we do
>      > using the second way.
>      >
>      > Signed-off-by: Phillip Tennen <phillip@axleos.com
>     <mailto:phillip@axleos.com>>
>      > Signed-off-by: Vladislav Yaroshchuk
>     <Vladislav.Yaroshchuk@jetbrains.com
>     <mailto:Vladislav.Yaroshchuk@jetbrains.com>>
> 
>     Thank you for persistently working on this.
> 
>      > ---
>      >   net/vmnet-common.m | 302
>     +++++++++++++++++++++++++++++++++++++++++++++
>      >   net/vmnet-shared.c |  94 +++++++++++++-
>      >   net/vmnet_int.h    |  39 +++++-
>      >   3 files changed, 430 insertions(+), 5 deletions(-)
>      >
>      > diff --git a/net/vmnet-common.m b/net/vmnet-common.m
>      > index 56612c72ce..2f70921cae 100644
>      > --- a/net/vmnet-common.m
>      > +++ b/net/vmnet-common.m
>      > @@ -10,6 +10,8 @@
>      >    */
>      >
>      >   #include "qemu/osdep.h"
>      > +#include "qemu/main-loop.h"
>      > +#include "qemu/log.h"
>      >   #include "qapi/qapi-types-net.h"
>      >   #include "vmnet_int.h"
>      >   #include "clients.h"
>      > @@ -17,4 +19,304 @@
>      >   #include "qapi/error.h"
>      >
>      >   #include <vmnet/vmnet.h>
>      > +#include <dispatch/dispatch.h>
>      >
>      > +
>      > +static inline void vmnet_set_send_bh_scheduled(VmnetCommonState *s,
>      > +                                               bool enable)
>      > +{
>      > +    qatomic_set(&s->send_scheduled, enable);
>      > +}
>      > +
>      > +
>      > +static inline bool vmnet_is_send_bh_scheduled(VmnetCommonState *s)
>      > +{
>      > +    return qatomic_load_acquire(&s->send_scheduled);
>      > +}
>      > +
>      > +
>      > +static inline void vmnet_set_send_enabled(VmnetCommonState *s,
>      > +                                          bool enable)
>      > +{
>      > +    if (enable) {
>      > +        vmnet_interface_set_event_callback(
>      > +            s->vmnet_if,
>      > +            VMNET_INTERFACE_PACKETS_AVAILABLE,
>      > +            s->if_queue,
>      > +            ^(interface_event_t event_id, xpc_object_t event) {
>      > +                assert(event_id ==
>     VMNET_INTERFACE_PACKETS_AVAILABLE);
>      > +                /*
>      > +                 * This function is being called from a non qemu
>     thread, so
>      > +                 * we only schedule a BH, and do the rest of the
>     io completion
>      > +                 * handling from vmnet_send_bh() which runs in a
>     qemu context.
>      > +                 *
>      > +                 * Avoid scheduling multiple bottom halves
>      > +                 */
>      > +                if (!vmnet_is_send_bh_scheduled(s)) {
>      > +                    vmnet_set_send_bh_scheduled(s, true);
> 
>     It can be interrupted between vmnet_is_send_bh_scheduled and
>     vmnet_set_send_bh_scheduled, which leads to data race.
> 
> 
> Sorry, I did not clearly understand what you meant. Since this
> callback (block) is submitted on DISPATCH_QUEUE_SERIAL,
> only one instance of the callback will be executed at any
> moment of time.
> https://developer.apple.com/documentation/dispatch/dispatch_queue_serial 
> <https://developer.apple.com/documentation/dispatch/dispatch_queue_serial>
> 
> Also this is the only place where we schedule a bottom half.
> 
> After we set the 'send_scheduled' flag, all the other
> callback  blocks will do nothing (skip the if block) until
> the bottom half is executed and reset 'send_scheduled'.
> I don't see any races here :(
> 
> Correct me if I'm wrong please.

My explanation that the interruption between vmnet_is_send_bh_scheduled 
and vmnet_set_send_bh_scheduled is problematic was actually misleading.

The problem is that vmnet_send_bh resets 'send_scheduled' after calling 
vmnet_read. If we got another packet after vmnet_read, it would be 
silently ignored.

By the way, I forgot to mention another problem: vmnet_send_completed 
calls vmnet_set_send_enabled, but the other packets in the buffer may 
not be sent yet. Also, unregistering callbacks in vmnet_set_send_enabled 
is probably not enough to stop the callback being fired since some 
dispatch blocks are already in the dispatch queue.

Regards,
Akihiko Odaki

> 
>      > +                    qemu_bh_schedule(s->send_bh);
>      > +                }
>      > +            });
>      > +    } else {
>      > +        vmnet_interface_set_event_callback(
>      > +            s->vmnet_if,
>      > +            VMNET_INTERFACE_PACKETS_AVAILABLE,
>      > +            NULL,
>      > +            NULL);
>      > +    }
>      > +}
>      > +
>      > +
>      > +static void vmnet_send_completed(NetClientState *nc, ssize_t len)
>      > +{
>      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
>      > +    vmnet_set_send_enabled(s, true);
>      > +}
>      > +
>      > +
>      > +static void vmnet_send_bh(void *opaque)
>      > +{
>      > +    NetClientState *nc = (NetClientState *) opaque;
>      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
>      > +
>      > +    struct iovec *iov = s->iov_buf;
>      > +    struct vmpktdesc *packets = s->packets_buf;
>      > +    int pkt_cnt;
>      > +    int i;
>      > +
>      > +    vmnet_return_t status;
>      > +    ssize_t size;
>      > +
>      > +    /* read as many packets as present */
>      > +    pkt_cnt = VMNET_PACKETS_LIMIT;
> 
>     There could be more than VMNET_PACKETS_LIMIT. You may call
>     vmnet_read in
>     a loop.
> 
>      > +    for (i = 0; i < pkt_cnt; ++i) {
>      > +        packets[i].vm_pkt_size = s->max_packet_size;
>      > +        packets[i].vm_pkt_iovcnt = 1;
>      > +        packets[i].vm_flags = 0;
>      > +    }
>      > +
>      > +    status = vmnet_read(s->vmnet_if, packets, &pkt_cnt);
>      > +    if (status != VMNET_SUCCESS) {
>      > +        error_printf("vmnet: read failed: %s\n",
>      > +                     vmnet_status_map_str(status));
>      > +        goto done;
>      > +    }
>      > +
>      > +    for (i = 0; i < pkt_cnt; ++i) {
>      > +        size = qemu_send_packet_async(nc,
>      > +                                      iov[i].iov_base,
>      > +                                      packets[i].vm_pkt_size,
>      > +                                      vmnet_send_completed);
>      > +        if (size == 0) {
>      > +            vmnet_set_send_enabled(s, false);
>      > +            goto done;
>      > +        } else if (size < 0) {
>      > +            break;
>      > +        }
> 
>     goto is not needed here. "break" when size <= 0.
> 
>      > +    }
>      > +
>      > +done:
>      > +    vmnet_set_send_bh_scheduled(s, false);
>      > +}
>      > +
>      > +
>      > +static void vmnet_bufs_init(VmnetCommonState *s)
>      > +{
>      > +    struct vmpktdesc *packets = s->packets_buf;
>      > +    struct iovec *iov = s->iov_buf;
>      > +    int i;
>      > +
>      > +    for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
>      > +        iov[i].iov_len = s->max_packet_size;
>      > +        iov[i].iov_base = g_malloc0(iov[i].iov_len);
>      > +        packets[i].vm_pkt_iov = iov + i;
>      > +    }
>      > +}
>      > +
>      > +
>      > +const char *vmnet_status_map_str(vmnet_return_t status)
>      > +{
>      > +    switch (status) {
>      > +    case VMNET_SUCCESS:
>      > +        return "success";
>      > +    case VMNET_FAILURE:
>      > +        return "general failure (possibly not enough privileges)";
>      > +    case VMNET_MEM_FAILURE:
>      > +        return "memory allocation failure";
>      > +    case VMNET_INVALID_ARGUMENT:
>      > +        return "invalid argument specified";
>      > +    case VMNET_SETUP_INCOMPLETE:
>      > +        return "interface setup is not complete";
>      > +    case VMNET_INVALID_ACCESS:
>      > +        return "invalid access, permission denied";
>      > +    case VMNET_PACKET_TOO_BIG:
>      > +        return "packet size is larger than MTU";
>      > +    case VMNET_BUFFER_EXHAUSTED:
>      > +        return "buffers exhausted in kernel";
>      > +    case VMNET_TOO_MANY_PACKETS:
>      > +        return "packet count exceeds limit";
>      > +#if defined(MAC_OS_VERSION_11_0) && \
>      > +    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
>      > +    case VMNET_SHARING_SERVICE_BUSY:
>      > +        return "conflict, sharing service is in use";
>      > +#endif
>      > +    default:
>      > +        return "unknown vmnet error";
>      > +    }
>      > +}
>      > +
>      > +
>      > +int vmnet_if_create(NetClientState *nc,
>      > +                    xpc_object_t if_desc,
>      > +                    Error **errp)
>      > +{
>      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);;
> 
>     Duplicate semicolons.
> 
>      > +    dispatch_semaphore_t if_created_sem =
>     dispatch_semaphore_create(0);
> 
>     if_created_sem leaks.
> 
>      > +    __block vmnet_return_t if_status;
>      > +
>      > +    s->if_queue = dispatch_queue_create(
>      > +        "org.qemu.vmnet.if_queue",
>      > +        DISPATCH_QUEUE_SERIAL
>      > +    );
>      > +
>      > +    xpc_dictionary_set_bool(
>      > +        if_desc,
>      > +        vmnet_allocate_mac_address_key,
>      > +        false
>      > +    );
>      > +#ifdef DEBUG
>      > +    qemu_log("vmnet.start.interface_desc:\n");
>      > +    xpc_dictionary_apply(if_desc,
>      > +                         ^bool(const char *k, xpc_object_t v) {
>      > +                             char *desc = xpc_copy_description(v);
>      > +                             qemu_log("  %s=%s\n", k, desc);
>      > +                             free(desc);
>      > +                             return true;
>      > +                         });
>      > +#endif /* DEBUG */
>      > +
>      > +    s->vmnet_if = vmnet_start_interface(
>      > +        if_desc,
>      > +        s->if_queue,
>      > +        ^(vmnet_return_t status, xpc_object_t interface_param) {
>      > +            if_status = status;
>      > +            if (status != VMNET_SUCCESS || !interface_param) {
>      > +                dispatch_semaphore_signal(if_created_sem);
>      > +                return;
>      > +            }
>      > +
>      > +#ifdef DEBUG
>      > +            qemu_log("vmnet.start.interface_param:\n");
>      > +            xpc_dictionary_apply(interface_param,
>      > +                                 ^bool(const char *k,
>     xpc_object_t v) {
>      > +                                     char *desc =
>     xpc_copy_description(v);
>      > +                                     qemu_log("  %s=%s\n", k, desc);
>      > +                                     free(desc);
>      > +                                     return true;
>      > +                                 });
>      > +#endif /* DEBUG */
>      > +
>      > +            s->mtu = xpc_dictionary_get_uint64(
>      > +                interface_param,
>      > +                vmnet_mtu_key);
>      > +            s->max_packet_size = xpc_dictionary_get_uint64(
>      > +                interface_param,
>      > +                vmnet_max_packet_size_key);
>      > +
>      > +            dispatch_semaphore_signal(if_created_sem);
>      > +        });
>      > +
>      > +    if (s->vmnet_if == NULL) {
>      > +        error_setg(errp,
>      > +                   "unable to create interface "
>      > +                   "with requested params");
> 
>     You don't need line breaks here. Breaking a string into a few would
>     also
>     makes it a bit hard to grep.
> 
>      > +        return -1;
> 
>     s->if_queue leaks.
> 
>      > +    }
>      > +
>      > +    dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER);
>      > +
>      > +    if (if_status != VMNET_SUCCESS) {
>      > +        error_setg(errp,
>      > +                   "cannot create vmnet interface: %s",
>      > +                   vmnet_status_map_str(if_status));
>      > +        return -1;
>      > +    }
>      > +
>      > +    s->send_bh = aio_bh_new(qemu_get_aio_context(),
>     vmnet_send_bh, nc);
>      > +    vmnet_bufs_init(s);
>      > +    vmnet_set_send_bh_scheduled(s, false);
>      > +    vmnet_set_send_enabled(s, true);
>      > +    return 0;
>      > +}
>      > +
>      > +
>      > +ssize_t vmnet_receive_common(NetClientState *nc,
>      > +                             const uint8_t *buf,
>      > +                             size_t size)
>      > +{
>      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
>      > +    struct vmpktdesc packet;
>      > +    struct iovec iov;
>      > +    int pkt_cnt;
>      > +    vmnet_return_t if_status;
>      > +
>      > +    if (size > s->max_packet_size) {
>      > +        warn_report("vmnet: packet is too big, %zu > %llu\n",
> 
>     Use PRIu64.
> 
>      > +                    packet.vm_pkt_size,
>      > +                    s->max_packet_size);
>      > +        return -1;
>      > +    }
>      > +
>      > +    iov.iov_base = (char *) buf;
>      > +    iov.iov_len = size;
>      > +
>      > +    packet.vm_pkt_iovcnt = 1;
>      > +    packet.vm_flags = 0;
>      > +    packet.vm_pkt_size = size;
>      > +    packet.vm_pkt_iov = &iov;
>      > +    pkt_cnt = 1;
>      > +
>      > +    if_status = vmnet_write(s->vmnet_if, &packet, &pkt_cnt);
>      > +    if (if_status != VMNET_SUCCESS) {
>      > +        error_report("vmnet: write error: %s\n",
>      > +                     vmnet_status_map_str(if_status));
> 
>     Why don't return -1?
> 
>      > +    }
>      > +
>      > +    if (if_status == VMNET_SUCCESS && pkt_cnt) {
>      > +        return size;
>      > +    }
>      > +    return 0;
>      > +}
>      > +
>      > +
>      > +void vmnet_cleanup_common(NetClientState *nc)
>      > +{
>      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);;
> 
>     Duplicate semicolons.
> 
>      > +    dispatch_semaphore_t if_created_sem;
>      > +
>      > +    qemu_purge_queued_packets(nc); > +   
>     vmnet_set_send_bh_scheduled(s, true);
>      > +    vmnet_set_send_enabled(s, false);
>      > +
>      > +    if (s->vmnet_if == NULL) {
>      > +        return;
>      > +    }
>      > +
>      > +    if_created_sem = dispatch_semaphore_create(0);
>      > +    vmnet_stop_interface(
>      > +        s->vmnet_if,
>      > +        s->if_queue,
>      > +        ^(vmnet_return_t status) {
>      > +            assert(status == VMNET_SUCCESS);
>      > +            dispatch_semaphore_signal(if_created_sem);
>      > +        });
>      > +    dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER);
>      > +
>      > +    qemu_bh_delete(s->send_bh);
>      > +    dispatch_release(if_created_sem);
>      > +    dispatch_release(s->if_queue);
>      > +
>      > +    for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
>      > +        g_free(s->iov_buf[i].iov_base);
>      > +    }
>      > +}
>      > diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c
>      > index f07afaaf21..66f66c034b 100644
>      > --- a/net/vmnet-shared.c
>      > +++ b/net/vmnet-shared.c
>      > @@ -10,16 +10,102 @@
>      >
>      >   #include "qemu/osdep.h"
>      >   #include "qapi/qapi-types-net.h"
>      > +#include "qapi/error.h"
>      >   #include "vmnet_int.h"
>      >   #include "clients.h"
>      > -#include "qemu/error-report.h"
>      > -#include "qapi/error.h"
>      >
>      >   #include <vmnet/vmnet.h>
>      >
>      > +typedef struct VmnetSharedState {
>      > +    VmnetCommonState cs;
>      > +} VmnetSharedState;
>      > +
>      > +
>      > +static bool validate_options(const Netdev *netdev, Error **errp)
>      > +{
>      > +    const NetdevVmnetSharedOptions *options =
>     &(netdev->u.vmnet_shared);
>      > +
>      > +#if !defined(MAC_OS_VERSION_11_0) || \
>      > +    MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0
>      > +    if (options->has_isolated) {
>      > +        error_setg(errp,
>      > +                   "vmnet-shared.isolated feature is "
>      > +                   "unavailable: outdated vmnet.framework API");
>      > +        return false;
>      > +    }
>      > +#endif
>      > +
>      > +    if ((options->has_start_address ||
>      > +         options->has_end_address ||
>      > +         options->has_subnet_mask) &&
>      > +        !(options->has_start_address &&
>      > +          options->has_end_address &&
>      > +          options->has_subnet_mask)) {
>      > +        error_setg(errp,
>      > +                   "'start-address', 'end-address', 'subnet-mask' "
>      > +                   "should be provided together"
>      > +        );
>      > +        return false;
>      > +    }
>      > +
>      > +    return true;
>      > +}
>      > +
>      > +static xpc_object_t build_if_desc(const Netdev *netdev)
>      > +{
>      > +    const NetdevVmnetSharedOptions *options =
>     &(netdev->u.vmnet_shared);
>      > +    xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, 0);
>      > +
>      > +    xpc_dictionary_set_uint64(
>      > +        if_desc,
>      > +        vmnet_operation_mode_key,
>      > +        VMNET_SHARED_MODE
>      > +    );
>      > +
>      > +    if (options->has_nat66_prefix) {
>      > +        xpc_dictionary_set_string(if_desc,
>      > +                                  vmnet_nat66_prefix_key,
>      > +                                  options->nat66_prefix);
>      > +    }
>      > +
>      > +    if (options->has_start_address) {
>      > +        xpc_dictionary_set_string(if_desc,
>      > +                                  vmnet_start_address_key,
>      > +                                  options->start_address);
>      > +        xpc_dictionary_set_string(if_desc,
>      > +                                  vmnet_end_address_key,
>      > +                                  options->end_address);
>      > +        xpc_dictionary_set_string(if_desc,
>      > +                                  vmnet_subnet_mask_key,
>      > +                                  options->subnet_mask);
>      > +    }
>      > +
>      > +#if defined(MAC_OS_VERSION_11_0) && \
>      > +    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
>      > +    xpc_dictionary_set_bool(
>      > +        if_desc,
>      > +        vmnet_enable_isolation_key,
>      > +        options->isolated
>      > +    );
>      > +#endif
>      > +
>      > +    return if_desc;
>      > +}
>      > +
>      > +static NetClientInfo net_vmnet_shared_info = {
>      > +    .type = NET_CLIENT_DRIVER_VMNET_SHARED,
>      > +    .size = sizeof(VmnetSharedState),
>      > +    .receive = vmnet_receive_common,
>      > +    .cleanup = vmnet_cleanup_common,
>      > +};
>      > +
>      >   int net_init_vmnet_shared(const Netdev *netdev, const char *name,
>      >                             NetClientState *peer, Error **errp)
>      >   {
>      > -  error_setg(errp, "vmnet-shared is not implemented yet");
>      > -  return -1;
>      > +    NetClientState *nc = qemu_new_net_client(&net_vmnet_shared_info,
>      > +                                             peer,
>     "vmnet-shared", name);
>      > +    if (!validate_options(netdev, errp)) {
>      > +        g_assert_not_reached();
> 
>     g_assert_not_reached is for debugging purpose and may be dropped
>     depending on the build option.
> 
>      > +    }
>      > +    return vmnet_if_create(nc, build_if_desc(netdev), errp);
>      >   }
>      > diff --git a/net/vmnet_int.h b/net/vmnet_int.h
>      > index aac4d5af64..acfe3a88c0 100644
>      > --- a/net/vmnet_int.h
>      > +++ b/net/vmnet_int.h
>      > @@ -15,11 +15,48 @@
>      >   #include "clients.h"
>      >
>      >   #include <vmnet/vmnet.h>
>      > +#include <dispatch/dispatch.h>
>      > +
>      > +/**
>      > + *  From vmnet.framework documentation
>      > + *
>      > + *  Each read/write call allows up to 200 packets to be
>      > + *  read or written for a maximum of 256KB.
>      > + *
>      > + *  Each packet written should be a complete
>      > + *  ethernet frame.
>      > + *
>      > + * https://developer.apple.com/documentation/vmnet
>     <https://developer.apple.com/documentation/vmnet>
>      > + */
>      > +#define VMNET_PACKETS_LIMIT 200
>      >
>      >   typedef struct VmnetCommonState {
>      > -  NetClientState nc;
>      > +    NetClientState nc;
>      > +    interface_ref vmnet_if;
>      > +
>      > +    bool send_scheduled;
>      >
>      > +    uint64_t mtu;
>      > +    uint64_t max_packet_size;
>      > +
>      > +    struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT];
>      > +    struct iovec iov_buf[VMNET_PACKETS_LIMIT];
>      > +
>      > +    dispatch_queue_t if_queue;
>      > +
>      > +    QEMUBH *send_bh;
>      >   } VmnetCommonState;
>      >
>      > +const char *vmnet_status_map_str(vmnet_return_t status);
>      > +
>      > +int vmnet_if_create(NetClientState *nc,
>      > +                    xpc_object_t if_desc,
>      > +                    Error **errp);
>      > +
>      > +ssize_t vmnet_receive_common(NetClientState *nc,
>      > +                             const uint8_t *buf,
>      > +                             size_t size);
>      > +
>      > +void vmnet_cleanup_common(NetClientState *nc);
>      >
>      >   #endif /* VMNET_INT_H */
> 
> 
> Other issues will be fixed and submitted later,
> thank you!
> 
> Regards,
> Vladislav Yaroshchuk
Vladislav Yaroshchuk Feb. 26, 2022, 11:33 a.m. UTC | #4
On Sat, Feb 26, 2022 at 12:16 PM Akihiko Odaki <akihiko.odaki@gmail.com>
wrote:

> On 2022/02/26 17:37, Vladislav Yaroshchuk wrote:
> >
> > Hi Akihiko,
> >
> > On Fri, Feb 25, 2022 at 8:46 PM Akihiko Odaki <akihiko.odaki@gmail.com
> > <mailto:akihiko.odaki@gmail.com>> wrote:
> >
> >     On 2022/02/26 2:13, Vladislav Yaroshchuk wrote:
> >      > Interaction with vmnet.framework in different modes
> >      > differs only on configuration stage, so we can create
> >      > common `send`, `receive`, etc. procedures and reuse them.
> >      >
> >      > vmnet.framework supports iov, but writing more than
> >      > one iov into vmnet interface fails with
> >      > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
> >      > one and passing it to vmnet works fine. That's the
> >      > reason why receive_iov() left unimplemented. But it still
> >      > works with good enough performance having .receive()
> >      > net/vmnet: implement shared mode (vmnet-shared)
> >      >
> >      > Interaction with vmnet.framework in different modes
> >      > differs only on configuration stage, so we can create
> >      > common `send`, `receive`, etc. procedures and reuse them.
> >      >
> >      > vmnet.framework supports iov, but writing more than
> >      > one iov into vmnet interface fails with
> >      > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
> >      > one and passing it to vmnet works fine. That's the
> >      > reason why receive_iov() left unimplemented. But it still
> >      > works with good enough performance having .receive()
> >      > implemented only.
> >      >
> >      > Also, there is no way to unsubscribe from vmnet packages
> >      > receiving except registering and unregistering event
> >      > callback or simply drop packages just ignoring and
> >      > not processing them when related flag is set. Here we do
> >      > using the second way.
> >      >
> >      > Signed-off-by: Phillip Tennen <phillip@axleos.com
> >     <mailto:phillip@axleos.com>>
> >      > Signed-off-by: Vladislav Yaroshchuk
> >     <Vladislav.Yaroshchuk@jetbrains.com
> >     <mailto:Vladislav.Yaroshchuk@jetbrains.com>>
> >
> >     Thank you for persistently working on this.
> >
> >      > ---
> >      >   net/vmnet-common.m | 302
> >     +++++++++++++++++++++++++++++++++++++++++++++
> >      >   net/vmnet-shared.c |  94 +++++++++++++-
> >      >   net/vmnet_int.h    |  39 +++++-
> >      >   3 files changed, 430 insertions(+), 5 deletions(-)
> >      >
> >      > diff --git a/net/vmnet-common.m b/net/vmnet-common.m
> >      > index 56612c72ce..2f70921cae 100644
> >      > --- a/net/vmnet-common.m
> >      > +++ b/net/vmnet-common.m
> >      > @@ -10,6 +10,8 @@
> >      >    */
> >      >
> >      >   #include "qemu/osdep.h"
> >      > +#include "qemu/main-loop.h"
> >      > +#include "qemu/log.h"
> >      >   #include "qapi/qapi-types-net.h"
> >      >   #include "vmnet_int.h"
> >      >   #include "clients.h"
> >      > @@ -17,4 +19,304 @@
> >      >   #include "qapi/error.h"
> >      >
> >      >   #include <vmnet/vmnet.h>
> >      > +#include <dispatch/dispatch.h>
> >      >
> >      > +
> >      > +static inline void vmnet_set_send_bh_scheduled(VmnetCommonState
> *s,
> >      > +                                               bool enable)
> >      > +{
> >      > +    qatomic_set(&s->send_scheduled, enable);
> >      > +}
> >      > +
> >      > +
> >      > +static inline bool vmnet_is_send_bh_scheduled(VmnetCommonState
> *s)
> >      > +{
> >      > +    return qatomic_load_acquire(&s->send_scheduled);
> >      > +}
> >      > +
> >      > +
> >      > +static inline void vmnet_set_send_enabled(VmnetCommonState *s,
> >      > +                                          bool enable)
> >      > +{
> >      > +    if (enable) {
> >      > +        vmnet_interface_set_event_callback(
> >      > +            s->vmnet_if,
> >      > +            VMNET_INTERFACE_PACKETS_AVAILABLE,
> >      > +            s->if_queue,
> >      > +            ^(interface_event_t event_id, xpc_object_t event) {
> >      > +                assert(event_id ==
> >     VMNET_INTERFACE_PACKETS_AVAILABLE);
> >      > +                /*
> >      > +                 * This function is being called from a non qemu
> >     thread, so
> >      > +                 * we only schedule a BH, and do the rest of the
> >     io completion
> >      > +                 * handling from vmnet_send_bh() which runs in a
> >     qemu context.
> >      > +                 *
> >      > +                 * Avoid scheduling multiple bottom halves
> >      > +                 */
> >      > +                if (!vmnet_is_send_bh_scheduled(s)) {
> >      > +                    vmnet_set_send_bh_scheduled(s, true);
> >
> >     It can be interrupted between vmnet_is_send_bh_scheduled and
> >     vmnet_set_send_bh_scheduled, which leads to data race.
> >
> >
> > Sorry, I did not clearly understand what you meant. Since this
> > callback (block) is submitted on DISPATCH_QUEUE_SERIAL,
> > only one instance of the callback will be executed at any
> > moment of time.
> > https://developer.apple.com/documentation/dispatch/dispatch_queue_serial
> > <
> https://developer.apple.com/documentation/dispatch/dispatch_queue_serial>
> >
> > Also this is the only place where we schedule a bottom half.
> >
> > After we set the 'send_scheduled' flag, all the other
> > callback  blocks will do nothing (skip the if block) until
> > the bottom half is executed and reset 'send_scheduled'.
> > I don't see any races here :(
> >
> > Correct me if I'm wrong please.
>
> My explanation that the interruption between vmnet_is_send_bh_scheduled
> and vmnet_set_send_bh_scheduled is problematic was actually misleading.
>
> The problem is that vmnet_send_bh resets 'send_scheduled' after calling
> vmnet_read. If we got another packet after vmnet_read, it would be
> silently ignored.
>
> By the way, I forgot to mention another problem: vmnet_send_completed
> calls vmnet_set_send_enabled, but the other packets in the buffer may
> not be sent yet. Also, unregistering callbacks in vmnet_set_send_enabled
> is probably not enough to stop the callback being fired since some
> dispatch blocks are already in the dispatch queue.
>
>
Now I understand what you mean, thanks.
What do you think about the workaround? For me
it is quite difficult question how to synchronize qemu with
vmnet thread, especially with completion callback.



> Regards,
> Akihiko Odaki
>
> >
> >      > +                    qemu_bh_schedule(s->send_bh);
> >      > +                }
> >      > +            });
> >      > +    } else {
> >      > +        vmnet_interface_set_event_callback(
> >      > +            s->vmnet_if,
> >      > +            VMNET_INTERFACE_PACKETS_AVAILABLE,
> >      > +            NULL,
> >      > +            NULL);
> >      > +    }
> >      > +}
> >      > +
> >      > +
> >      > +static void vmnet_send_completed(NetClientState *nc, ssize_t len)
> >      > +{
> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
> >      > +    vmnet_set_send_enabled(s, true);
> >      > +}
> >      > +
> >      > +
> >      > +static void vmnet_send_bh(void *opaque)
> >      > +{
> >      > +    NetClientState *nc = (NetClientState *) opaque;
> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
> >      > +
> >      > +    struct iovec *iov = s->iov_buf;
> >      > +    struct vmpktdesc *packets = s->packets_buf;
> >      > +    int pkt_cnt;
> >      > +    int i;
> >      > +
> >      > +    vmnet_return_t status;
> >      > +    ssize_t size;
> >      > +
> >      > +    /* read as many packets as present */
> >      > +    pkt_cnt = VMNET_PACKETS_LIMIT;
> >
> >     There could be more than VMNET_PACKETS_LIMIT. You may call
> >     vmnet_read in
> >     a loop.
> >
> >      > +    for (i = 0; i < pkt_cnt; ++i) {
> >      > +        packets[i].vm_pkt_size = s->max_packet_size;
> >      > +        packets[i].vm_pkt_iovcnt = 1;
> >      > +        packets[i].vm_flags = 0;
> >      > +    }
> >      > +
> >      > +    status = vmnet_read(s->vmnet_if, packets, &pkt_cnt);
> >      > +    if (status != VMNET_SUCCESS) {
> >      > +        error_printf("vmnet: read failed: %s\n",
> >      > +                     vmnet_status_map_str(status));
> >      > +        goto done;
> >      > +    }
> >      > +
> >      > +    for (i = 0; i < pkt_cnt; ++i) {
> >      > +        size = qemu_send_packet_async(nc,
> >      > +                                      iov[i].iov_base,
> >      > +                                      packets[i].vm_pkt_size,
> >      > +                                      vmnet_send_completed);
> >      > +        if (size == 0) {
> >      > +            vmnet_set_send_enabled(s, false);
> >      > +            goto done;
> >      > +        } else if (size < 0) {
> >      > +            break;
> >      > +        }
> >
> >     goto is not needed here. "break" when size <= 0.
> >
> >      > +    }
> >      > +
> >      > +done:
> >      > +    vmnet_set_send_bh_scheduled(s, false);
> >      > +}
> >      > +
> >      > +
> >      > +static void vmnet_bufs_init(VmnetCommonState *s)
> >      > +{
> >      > +    struct vmpktdesc *packets = s->packets_buf;
> >      > +    struct iovec *iov = s->iov_buf;
> >      > +    int i;
> >      > +
> >      > +    for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
> >      > +        iov[i].iov_len = s->max_packet_size;
> >      > +        iov[i].iov_base = g_malloc0(iov[i].iov_len);
> >      > +        packets[i].vm_pkt_iov = iov + i;
> >      > +    }
> >      > +}
> >      > +
> >      > +
> >      > +const char *vmnet_status_map_str(vmnet_return_t status)
> >      > +{
> >      > +    switch (status) {
> >      > +    case VMNET_SUCCESS:
> >      > +        return "success";
> >      > +    case VMNET_FAILURE:
> >      > +        return "general failure (possibly not enough
> privileges)";
> >      > +    case VMNET_MEM_FAILURE:
> >      > +        return "memory allocation failure";
> >      > +    case VMNET_INVALID_ARGUMENT:
> >      > +        return "invalid argument specified";
> >      > +    case VMNET_SETUP_INCOMPLETE:
> >      > +        return "interface setup is not complete";
> >      > +    case VMNET_INVALID_ACCESS:
> >      > +        return "invalid access, permission denied";
> >      > +    case VMNET_PACKET_TOO_BIG:
> >      > +        return "packet size is larger than MTU";
> >      > +    case VMNET_BUFFER_EXHAUSTED:
> >      > +        return "buffers exhausted in kernel";
> >      > +    case VMNET_TOO_MANY_PACKETS:
> >      > +        return "packet count exceeds limit";
> >      > +#if defined(MAC_OS_VERSION_11_0) && \
> >      > +    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
> >      > +    case VMNET_SHARING_SERVICE_BUSY:
> >      > +        return "conflict, sharing service is in use";
> >      > +#endif
> >      > +    default:
> >      > +        return "unknown vmnet error";
> >      > +    }
> >      > +}
> >      > +
> >      > +
> >      > +int vmnet_if_create(NetClientState *nc,
> >      > +                    xpc_object_t if_desc,
> >      > +                    Error **errp)
> >      > +{
> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);;
> >
> >     Duplicate semicolons.
> >
> >      > +    dispatch_semaphore_t if_created_sem =
> >     dispatch_semaphore_create(0);
> >
> >     if_created_sem leaks.
> >
> >      > +    __block vmnet_return_t if_status;
> >      > +
> >      > +    s->if_queue = dispatch_queue_create(
> >      > +        "org.qemu.vmnet.if_queue",
> >      > +        DISPATCH_QUEUE_SERIAL
> >      > +    );
> >      > +
> >      > +    xpc_dictionary_set_bool(
> >      > +        if_desc,
> >      > +        vmnet_allocate_mac_address_key,
> >      > +        false
> >      > +    );
> >      > +#ifdef DEBUG
> >      > +    qemu_log("vmnet.start.interface_desc:\n");
> >      > +    xpc_dictionary_apply(if_desc,
> >      > +                         ^bool(const char *k, xpc_object_t v) {
> >      > +                             char *desc =
> xpc_copy_description(v);
> >      > +                             qemu_log("  %s=%s\n", k, desc);
> >      > +                             free(desc);
> >      > +                             return true;
> >      > +                         });
> >      > +#endif /* DEBUG */
> >      > +
> >      > +    s->vmnet_if = vmnet_start_interface(
> >      > +        if_desc,
> >      > +        s->if_queue,
> >      > +        ^(vmnet_return_t status, xpc_object_t interface_param) {
> >      > +            if_status = status;
> >      > +            if (status != VMNET_SUCCESS || !interface_param) {
> >      > +                dispatch_semaphore_signal(if_created_sem);
> >      > +                return;
> >      > +            }
> >      > +
> >      > +#ifdef DEBUG
> >      > +            qemu_log("vmnet.start.interface_param:\n");
> >      > +            xpc_dictionary_apply(interface_param,
> >      > +                                 ^bool(const char *k,
> >     xpc_object_t v) {
> >      > +                                     char *desc =
> >     xpc_copy_description(v);
> >      > +                                     qemu_log("  %s=%s\n", k,
> desc);
> >      > +                                     free(desc);
> >      > +                                     return true;
> >      > +                                 });
> >      > +#endif /* DEBUG */
> >      > +
> >      > +            s->mtu = xpc_dictionary_get_uint64(
> >      > +                interface_param,
> >      > +                vmnet_mtu_key);
> >      > +            s->max_packet_size = xpc_dictionary_get_uint64(
> >      > +                interface_param,
> >      > +                vmnet_max_packet_size_key);
> >      > +
> >      > +            dispatch_semaphore_signal(if_created_sem);
> >      > +        });
> >      > +
> >      > +    if (s->vmnet_if == NULL) {
> >      > +        error_setg(errp,
> >      > +                   "unable to create interface "
> >      > +                   "with requested params");
> >
> >     You don't need line breaks here. Breaking a string into a few would
> >     also
> >     makes it a bit hard to grep.
> >
> >      > +        return -1;
> >
> >     s->if_queue leaks.
> >
> >      > +    }
> >      > +
> >      > +    dispatch_semaphore_wait(if_created_sem,
> DISPATCH_TIME_FOREVER);
> >      > +
> >      > +    if (if_status != VMNET_SUCCESS) {
> >      > +        error_setg(errp,
> >      > +                   "cannot create vmnet interface: %s",
> >      > +                   vmnet_status_map_str(if_status));
> >      > +        return -1;
> >      > +    }
> >      > +
> >      > +    s->send_bh = aio_bh_new(qemu_get_aio_context(),
> >     vmnet_send_bh, nc);
> >      > +    vmnet_bufs_init(s);
> >      > +    vmnet_set_send_bh_scheduled(s, false);
> >      > +    vmnet_set_send_enabled(s, true);
> >      > +    return 0;
> >      > +}
> >      > +
> >      > +
> >      > +ssize_t vmnet_receive_common(NetClientState *nc,
> >      > +                             const uint8_t *buf,
> >      > +                             size_t size)
> >      > +{
> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
> >      > +    struct vmpktdesc packet;
> >      > +    struct iovec iov;
> >      > +    int pkt_cnt;
> >      > +    vmnet_return_t if_status;
> >      > +
> >      > +    if (size > s->max_packet_size) {
> >      > +        warn_report("vmnet: packet is too big, %zu > %llu\n",
> >
> >     Use PRIu64.
> >
> >      > +                    packet.vm_pkt_size,
> >      > +                    s->max_packet_size);
> >      > +        return -1;
> >      > +    }
> >      > +
> >      > +    iov.iov_base = (char *) buf;
> >      > +    iov.iov_len = size;
> >      > +
> >      > +    packet.vm_pkt_iovcnt = 1;
> >      > +    packet.vm_flags = 0;
> >      > +    packet.vm_pkt_size = size;
> >      > +    packet.vm_pkt_iov = &iov;
> >      > +    pkt_cnt = 1;
> >      > +
> >      > +    if_status = vmnet_write(s->vmnet_if, &packet, &pkt_cnt);
> >      > +    if (if_status != VMNET_SUCCESS) {
> >      > +        error_report("vmnet: write error: %s\n",
> >      > +                     vmnet_status_map_str(if_status));
> >
> >     Why don't return -1?
> >
> >      > +    }
> >      > +
> >      > +    if (if_status == VMNET_SUCCESS && pkt_cnt) {
> >      > +        return size;
> >      > +    }
> >      > +    return 0;
> >      > +}
> >      > +
> >      > +
> >      > +void vmnet_cleanup_common(NetClientState *nc)
> >      > +{
> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);;
> >
> >     Duplicate semicolons.
> >
> >      > +    dispatch_semaphore_t if_created_sem;
> >      > +
> >      > +    qemu_purge_queued_packets(nc); > +
> >     vmnet_set_send_bh_scheduled(s, true);
> >      > +    vmnet_set_send_enabled(s, false);
> >      > +
> >      > +    if (s->vmnet_if == NULL) {
> >      > +        return;
> >      > +    }
> >      > +
> >      > +    if_created_sem = dispatch_semaphore_create(0);
> >      > +    vmnet_stop_interface(
> >      > +        s->vmnet_if,
> >      > +        s->if_queue,
> >      > +        ^(vmnet_return_t status) {
> >      > +            assert(status == VMNET_SUCCESS);
> >      > +            dispatch_semaphore_signal(if_created_sem);
> >      > +        });
> >      > +    dispatch_semaphore_wait(if_created_sem,
> DISPATCH_TIME_FOREVER);
> >      > +
> >      > +    qemu_bh_delete(s->send_bh);
> >      > +    dispatch_release(if_created_sem);
> >      > +    dispatch_release(s->if_queue);
> >      > +
> >      > +    for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
> >      > +        g_free(s->iov_buf[i].iov_base);
> >      > +    }
> >      > +}
> >      > diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c
> >      > index f07afaaf21..66f66c034b 100644
> >      > --- a/net/vmnet-shared.c
> >      > +++ b/net/vmnet-shared.c
> >      > @@ -10,16 +10,102 @@
> >      >
> >      >   #include "qemu/osdep.h"
> >      >   #include "qapi/qapi-types-net.h"
> >      > +#include "qapi/error.h"
> >      >   #include "vmnet_int.h"
> >      >   #include "clients.h"
> >      > -#include "qemu/error-report.h"
> >      > -#include "qapi/error.h"
> >      >
> >      >   #include <vmnet/vmnet.h>
> >      >
> >      > +typedef struct VmnetSharedState {
> >      > +    VmnetCommonState cs;
> >      > +} VmnetSharedState;
> >      > +
> >      > +
> >      > +static bool validate_options(const Netdev *netdev, Error **errp)
> >      > +{
> >      > +    const NetdevVmnetSharedOptions *options =
> >     &(netdev->u.vmnet_shared);
> >      > +
> >      > +#if !defined(MAC_OS_VERSION_11_0) || \
> >      > +    MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0
> >      > +    if (options->has_isolated) {
> >      > +        error_setg(errp,
> >      > +                   "vmnet-shared.isolated feature is "
> >      > +                   "unavailable: outdated vmnet.framework API");
> >      > +        return false;
> >      > +    }
> >      > +#endif
> >      > +
> >      > +    if ((options->has_start_address ||
> >      > +         options->has_end_address ||
> >      > +         options->has_subnet_mask) &&
> >      > +        !(options->has_start_address &&
> >      > +          options->has_end_address &&
> >      > +          options->has_subnet_mask)) {
> >      > +        error_setg(errp,
> >      > +                   "'start-address', 'end-address',
> 'subnet-mask' "
> >      > +                   "should be provided together"
> >      > +        );
> >      > +        return false;
> >      > +    }
> >      > +
> >      > +    return true;
> >      > +}
> >      > +
> >      > +static xpc_object_t build_if_desc(const Netdev *netdev)
> >      > +{
> >      > +    const NetdevVmnetSharedOptions *options =
> >     &(netdev->u.vmnet_shared);
> >      > +    xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, 0);
> >      > +
> >      > +    xpc_dictionary_set_uint64(
> >      > +        if_desc,
> >      > +        vmnet_operation_mode_key,
> >      > +        VMNET_SHARED_MODE
> >      > +    );
> >      > +
> >      > +    if (options->has_nat66_prefix) {
> >      > +        xpc_dictionary_set_string(if_desc,
> >      > +                                  vmnet_nat66_prefix_key,
> >      > +                                  options->nat66_prefix);
> >      > +    }
> >      > +
> >      > +    if (options->has_start_address) {
> >      > +        xpc_dictionary_set_string(if_desc,
> >      > +                                  vmnet_start_address_key,
> >      > +                                  options->start_address);
> >      > +        xpc_dictionary_set_string(if_desc,
> >      > +                                  vmnet_end_address_key,
> >      > +                                  options->end_address);
> >      > +        xpc_dictionary_set_string(if_desc,
> >      > +                                  vmnet_subnet_mask_key,
> >      > +                                  options->subnet_mask);
> >      > +    }
> >      > +
> >      > +#if defined(MAC_OS_VERSION_11_0) && \
> >      > +    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
> >      > +    xpc_dictionary_set_bool(
> >      > +        if_desc,
> >      > +        vmnet_enable_isolation_key,
> >      > +        options->isolated
> >      > +    );
> >      > +#endif
> >      > +
> >      > +    return if_desc;
> >      > +}
> >      > +
> >      > +static NetClientInfo net_vmnet_shared_info = {
> >      > +    .type = NET_CLIENT_DRIVER_VMNET_SHARED,
> >      > +    .size = sizeof(VmnetSharedState),
> >      > +    .receive = vmnet_receive_common,
> >      > +    .cleanup = vmnet_cleanup_common,
> >      > +};
> >      > +
> >      >   int net_init_vmnet_shared(const Netdev *netdev, const char
> *name,
> >      >                             NetClientState *peer, Error **errp)
> >      >   {
> >      > -  error_setg(errp, "vmnet-shared is not implemented yet");
> >      > -  return -1;
> >      > +    NetClientState *nc =
> qemu_new_net_client(&net_vmnet_shared_info,
> >      > +                                             peer,
> >     "vmnet-shared", name);
> >      > +    if (!validate_options(netdev, errp)) {
> >      > +        g_assert_not_reached();
> >
> >     g_assert_not_reached is for debugging purpose and may be dropped
> >     depending on the build option.
> >
> >      > +    }
> >      > +    return vmnet_if_create(nc, build_if_desc(netdev), errp);
> >      >   }
> >      > diff --git a/net/vmnet_int.h b/net/vmnet_int.h
> >      > index aac4d5af64..acfe3a88c0 100644
> >      > --- a/net/vmnet_int.h
> >      > +++ b/net/vmnet_int.h
> >      > @@ -15,11 +15,48 @@
> >      >   #include "clients.h"
> >      >
> >      >   #include <vmnet/vmnet.h>
> >      > +#include <dispatch/dispatch.h>
> >      > +
> >      > +/**
> >      > + *  From vmnet.framework documentation
> >      > + *
> >      > + *  Each read/write call allows up to 200 packets to be
> >      > + *  read or written for a maximum of 256KB.
> >      > + *
> >      > + *  Each packet written should be a complete
> >      > + *  ethernet frame.
> >      > + *
> >      > + * https://developer.apple.com/documentation/vmnet
> >     <https://developer.apple.com/documentation/vmnet>
> >      > + */
> >      > +#define VMNET_PACKETS_LIMIT 200
> >      >
> >      >   typedef struct VmnetCommonState {
> >      > -  NetClientState nc;
> >      > +    NetClientState nc;
> >      > +    interface_ref vmnet_if;
> >      > +
> >      > +    bool send_scheduled;
> >      >
> >      > +    uint64_t mtu;
> >      > +    uint64_t max_packet_size;
> >      > +
> >      > +    struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT];
> >      > +    struct iovec iov_buf[VMNET_PACKETS_LIMIT];
> >      > +
> >      > +    dispatch_queue_t if_queue;
> >      > +
> >      > +    QEMUBH *send_bh;
> >      >   } VmnetCommonState;
> >      >
> >      > +const char *vmnet_status_map_str(vmnet_return_t status);
> >      > +
> >      > +int vmnet_if_create(NetClientState *nc,
> >      > +                    xpc_object_t if_desc,
> >      > +                    Error **errp);
> >      > +
> >      > +ssize_t vmnet_receive_common(NetClientState *nc,
> >      > +                             const uint8_t *buf,
> >      > +                             size_t size);
> >      > +
> >      > +void vmnet_cleanup_common(NetClientState *nc);
> >      >
> >      >   #endif /* VMNET_INT_H */
> >
> >
> > Other issues will be fixed and submitted later,
> > thank you!
> >
> > Regards,
> > Vladislav Yaroshchuk
>
>
Regards,
Vladislav Yaroshchuk
Akihiko Odaki Feb. 26, 2022, 12:26 p.m. UTC | #5
On Sat, Feb 26, 2022 at 8:33 PM Vladislav Yaroshchuk
<vladislav.yaroshchuk@jetbrains.com> wrote:
>
>
>
> On Sat, Feb 26, 2022 at 12:16 PM Akihiko Odaki <akihiko.odaki@gmail.com> wrote:
>>
>> On 2022/02/26 17:37, Vladislav Yaroshchuk wrote:
>> >
>> > Hi Akihiko,
>> >
>> > On Fri, Feb 25, 2022 at 8:46 PM Akihiko Odaki <akihiko.odaki@gmail.com
>> > <mailto:akihiko.odaki@gmail.com>> wrote:
>> >
>> >     On 2022/02/26 2:13, Vladislav Yaroshchuk wrote:
>> >      > Interaction with vmnet.framework in different modes
>> >      > differs only on configuration stage, so we can create
>> >      > common `send`, `receive`, etc. procedures and reuse them.
>> >      >
>> >      > vmnet.framework supports iov, but writing more than
>> >      > one iov into vmnet interface fails with
>> >      > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
>> >      > one and passing it to vmnet works fine. That's the
>> >      > reason why receive_iov() left unimplemented. But it still
>> >      > works with good enough performance having .receive()
>> >      > net/vmnet: implement shared mode (vmnet-shared)
>> >      >
>> >      > Interaction with vmnet.framework in different modes
>> >      > differs only on configuration stage, so we can create
>> >      > common `send`, `receive`, etc. procedures and reuse them.
>> >      >
>> >      > vmnet.framework supports iov, but writing more than
>> >      > one iov into vmnet interface fails with
>> >      > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
>> >      > one and passing it to vmnet works fine. That's the
>> >      > reason why receive_iov() left unimplemented. But it still
>> >      > works with good enough performance having .receive()
>> >      > implemented only.
>> >      >
>> >      > Also, there is no way to unsubscribe from vmnet packages
>> >      > receiving except registering and unregistering event
>> >      > callback or simply drop packages just ignoring and
>> >      > not processing them when related flag is set. Here we do
>> >      > using the second way.
>> >      >
>> >      > Signed-off-by: Phillip Tennen <phillip@axleos.com
>> >     <mailto:phillip@axleos.com>>
>> >      > Signed-off-by: Vladislav Yaroshchuk
>> >     <Vladislav.Yaroshchuk@jetbrains.com
>> >     <mailto:Vladislav.Yaroshchuk@jetbrains.com>>
>> >
>> >     Thank you for persistently working on this.
>> >
>> >      > ---
>> >      >   net/vmnet-common.m | 302
>> >     +++++++++++++++++++++++++++++++++++++++++++++
>> >      >   net/vmnet-shared.c |  94 +++++++++++++-
>> >      >   net/vmnet_int.h    |  39 +++++-
>> >      >   3 files changed, 430 insertions(+), 5 deletions(-)
>> >      >
>> >      > diff --git a/net/vmnet-common.m b/net/vmnet-common.m
>> >      > index 56612c72ce..2f70921cae 100644
>> >      > --- a/net/vmnet-common.m
>> >      > +++ b/net/vmnet-common.m
>> >      > @@ -10,6 +10,8 @@
>> >      >    */
>> >      >
>> >      >   #include "qemu/osdep.h"
>> >      > +#include "qemu/main-loop.h"
>> >      > +#include "qemu/log.h"
>> >      >   #include "qapi/qapi-types-net.h"
>> >      >   #include "vmnet_int.h"
>> >      >   #include "clients.h"
>> >      > @@ -17,4 +19,304 @@
>> >      >   #include "qapi/error.h"
>> >      >
>> >      >   #include <vmnet/vmnet.h>
>> >      > +#include <dispatch/dispatch.h>
>> >      >
>> >      > +
>> >      > +static inline void vmnet_set_send_bh_scheduled(VmnetCommonState *s,
>> >      > +                                               bool enable)
>> >      > +{
>> >      > +    qatomic_set(&s->send_scheduled, enable);
>> >      > +}
>> >      > +
>> >      > +
>> >      > +static inline bool vmnet_is_send_bh_scheduled(VmnetCommonState *s)
>> >      > +{
>> >      > +    return qatomic_load_acquire(&s->send_scheduled);
>> >      > +}
>> >      > +
>> >      > +
>> >      > +static inline void vmnet_set_send_enabled(VmnetCommonState *s,
>> >      > +                                          bool enable)
>> >      > +{
>> >      > +    if (enable) {
>> >      > +        vmnet_interface_set_event_callback(
>> >      > +            s->vmnet_if,
>> >      > +            VMNET_INTERFACE_PACKETS_AVAILABLE,
>> >      > +            s->if_queue,
>> >      > +            ^(interface_event_t event_id, xpc_object_t event) {
>> >      > +                assert(event_id ==
>> >     VMNET_INTERFACE_PACKETS_AVAILABLE);
>> >      > +                /*
>> >      > +                 * This function is being called from a non qemu
>> >     thread, so
>> >      > +                 * we only schedule a BH, and do the rest of the
>> >     io completion
>> >      > +                 * handling from vmnet_send_bh() which runs in a
>> >     qemu context.
>> >      > +                 *
>> >      > +                 * Avoid scheduling multiple bottom halves
>> >      > +                 */
>> >      > +                if (!vmnet_is_send_bh_scheduled(s)) {
>> >      > +                    vmnet_set_send_bh_scheduled(s, true);
>> >
>> >     It can be interrupted between vmnet_is_send_bh_scheduled and
>> >     vmnet_set_send_bh_scheduled, which leads to data race.
>> >
>> >
>> > Sorry, I did not clearly understand what you meant. Since this
>> > callback (block) is submitted on DISPATCH_QUEUE_SERIAL,
>> > only one instance of the callback will be executed at any
>> > moment of time.
>> > https://developer.apple.com/documentation/dispatch/dispatch_queue_serial
>> > <https://developer.apple.com/documentation/dispatch/dispatch_queue_serial>
>> >
>> > Also this is the only place where we schedule a bottom half.
>> >
>> > After we set the 'send_scheduled' flag, all the other
>> > callback  blocks will do nothing (skip the if block) until
>> > the bottom half is executed and reset 'send_scheduled'.
>> > I don't see any races here :(
>> >
>> > Correct me if I'm wrong please.
>>
>> My explanation that the interruption between vmnet_is_send_bh_scheduled
>> and vmnet_set_send_bh_scheduled is problematic was actually misleading.
>>
>> The problem is that vmnet_send_bh resets 'send_scheduled' after calling
>> vmnet_read. If we got another packet after vmnet_read, it would be
>> silently ignored.
>>
>> By the way, I forgot to mention another problem: vmnet_send_completed
>> calls vmnet_set_send_enabled, but the other packets in the buffer may
>> not be sent yet. Also, unregistering callbacks in vmnet_set_send_enabled
>> is probably not enough to stop the callback being fired since some
>> dispatch blocks are already in the dispatch queue.
>>
>
> Now I understand what you mean, thanks.
> What do you think about the workaround? For me
> it is quite difficult question how to synchronize qemu with
> vmnet thread, especially with completion callback.

You may add a new field to represent the number of packets being sent
in the buffer. The field must be maintained by vmnet_send_bh and
vmnet_send_completed, which are on the iothread. vmnet_send_bh should
do nothing if the field is greater than 0 at the beginning of the
function. vmnet_send_completed should call
qemu_bh_schedule(s->send_bh).

vmnet_set_send_enabled and send_scheduled can be simply removed.
qemu_bh_schedule ensures there is no duplicate scheduling.

Regards,
Akihiko Odaki

>
>
>>
>> Regards,
>> Akihiko Odaki
>>
>> >
>> >      > +                    qemu_bh_schedule(s->send_bh);
>> >      > +                }
>> >      > +            });
>> >      > +    } else {
>> >      > +        vmnet_interface_set_event_callback(
>> >      > +            s->vmnet_if,
>> >      > +            VMNET_INTERFACE_PACKETS_AVAILABLE,
>> >      > +            NULL,
>> >      > +            NULL);
>> >      > +    }
>> >      > +}
>> >      > +
>> >      > +
>> >      > +static void vmnet_send_completed(NetClientState *nc, ssize_t len)
>> >      > +{
>> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
>> >      > +    vmnet_set_send_enabled(s, true);
>> >      > +}
>> >      > +
>> >      > +
>> >      > +static void vmnet_send_bh(void *opaque)
>> >      > +{
>> >      > +    NetClientState *nc = (NetClientState *) opaque;
>> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
>> >      > +
>> >      > +    struct iovec *iov = s->iov_buf;
>> >      > +    struct vmpktdesc *packets = s->packets_buf;
>> >      > +    int pkt_cnt;
>> >      > +    int i;
>> >      > +
>> >      > +    vmnet_return_t status;
>> >      > +    ssize_t size;
>> >      > +
>> >      > +    /* read as many packets as present */
>> >      > +    pkt_cnt = VMNET_PACKETS_LIMIT;
>> >
>> >     There could be more than VMNET_PACKETS_LIMIT. You may call
>> >     vmnet_read in
>> >     a loop.
>> >
>> >      > +    for (i = 0; i < pkt_cnt; ++i) {
>> >      > +        packets[i].vm_pkt_size = s->max_packet_size;
>> >      > +        packets[i].vm_pkt_iovcnt = 1;
>> >      > +        packets[i].vm_flags = 0;
>> >      > +    }
>> >      > +
>> >      > +    status = vmnet_read(s->vmnet_if, packets, &pkt_cnt);
>> >      > +    if (status != VMNET_SUCCESS) {
>> >      > +        error_printf("vmnet: read failed: %s\n",
>> >      > +                     vmnet_status_map_str(status));
>> >      > +        goto done;
>> >      > +    }
>> >      > +
>> >      > +    for (i = 0; i < pkt_cnt; ++i) {
>> >      > +        size = qemu_send_packet_async(nc,
>> >      > +                                      iov[i].iov_base,
>> >      > +                                      packets[i].vm_pkt_size,
>> >      > +                                      vmnet_send_completed);
>> >      > +        if (size == 0) {
>> >      > +            vmnet_set_send_enabled(s, false);
>> >      > +            goto done;
>> >      > +        } else if (size < 0) {
>> >      > +            break;
>> >      > +        }
>> >
>> >     goto is not needed here. "break" when size <= 0.
>> >
>> >      > +    }
>> >      > +
>> >      > +done:
>> >      > +    vmnet_set_send_bh_scheduled(s, false);
>> >      > +}
>> >      > +
>> >      > +
>> >      > +static void vmnet_bufs_init(VmnetCommonState *s)
>> >      > +{
>> >      > +    struct vmpktdesc *packets = s->packets_buf;
>> >      > +    struct iovec *iov = s->iov_buf;
>> >      > +    int i;
>> >      > +
>> >      > +    for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
>> >      > +        iov[i].iov_len = s->max_packet_size;
>> >      > +        iov[i].iov_base = g_malloc0(iov[i].iov_len);
>> >      > +        packets[i].vm_pkt_iov = iov + i;
>> >      > +    }
>> >      > +}
>> >      > +
>> >      > +
>> >      > +const char *vmnet_status_map_str(vmnet_return_t status)
>> >      > +{
>> >      > +    switch (status) {
>> >      > +    case VMNET_SUCCESS:
>> >      > +        return "success";
>> >      > +    case VMNET_FAILURE:
>> >      > +        return "general failure (possibly not enough privileges)";
>> >      > +    case VMNET_MEM_FAILURE:
>> >      > +        return "memory allocation failure";
>> >      > +    case VMNET_INVALID_ARGUMENT:
>> >      > +        return "invalid argument specified";
>> >      > +    case VMNET_SETUP_INCOMPLETE:
>> >      > +        return "interface setup is not complete";
>> >      > +    case VMNET_INVALID_ACCESS:
>> >      > +        return "invalid access, permission denied";
>> >      > +    case VMNET_PACKET_TOO_BIG:
>> >      > +        return "packet size is larger than MTU";
>> >      > +    case VMNET_BUFFER_EXHAUSTED:
>> >      > +        return "buffers exhausted in kernel";
>> >      > +    case VMNET_TOO_MANY_PACKETS:
>> >      > +        return "packet count exceeds limit";
>> >      > +#if defined(MAC_OS_VERSION_11_0) && \
>> >      > +    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
>> >      > +    case VMNET_SHARING_SERVICE_BUSY:
>> >      > +        return "conflict, sharing service is in use";
>> >      > +#endif
>> >      > +    default:
>> >      > +        return "unknown vmnet error";
>> >      > +    }
>> >      > +}
>> >      > +
>> >      > +
>> >      > +int vmnet_if_create(NetClientState *nc,
>> >      > +                    xpc_object_t if_desc,
>> >      > +                    Error **errp)
>> >      > +{
>> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);;
>> >
>> >     Duplicate semicolons.
>> >
>> >      > +    dispatch_semaphore_t if_created_sem =
>> >     dispatch_semaphore_create(0);
>> >
>> >     if_created_sem leaks.
>> >
>> >      > +    __block vmnet_return_t if_status;
>> >      > +
>> >      > +    s->if_queue = dispatch_queue_create(
>> >      > +        "org.qemu.vmnet.if_queue",
>> >      > +        DISPATCH_QUEUE_SERIAL
>> >      > +    );
>> >      > +
>> >      > +    xpc_dictionary_set_bool(
>> >      > +        if_desc,
>> >      > +        vmnet_allocate_mac_address_key,
>> >      > +        false
>> >      > +    );
>> >      > +#ifdef DEBUG
>> >      > +    qemu_log("vmnet.start.interface_desc:\n");
>> >      > +    xpc_dictionary_apply(if_desc,
>> >      > +                         ^bool(const char *k, xpc_object_t v) {
>> >      > +                             char *desc = xpc_copy_description(v);
>> >      > +                             qemu_log("  %s=%s\n", k, desc);
>> >      > +                             free(desc);
>> >      > +                             return true;
>> >      > +                         });
>> >      > +#endif /* DEBUG */
>> >      > +
>> >      > +    s->vmnet_if = vmnet_start_interface(
>> >      > +        if_desc,
>> >      > +        s->if_queue,
>> >      > +        ^(vmnet_return_t status, xpc_object_t interface_param) {
>> >      > +            if_status = status;
>> >      > +            if (status != VMNET_SUCCESS || !interface_param) {
>> >      > +                dispatch_semaphore_signal(if_created_sem);
>> >      > +                return;
>> >      > +            }
>> >      > +
>> >      > +#ifdef DEBUG
>> >      > +            qemu_log("vmnet.start.interface_param:\n");
>> >      > +            xpc_dictionary_apply(interface_param,
>> >      > +                                 ^bool(const char *k,
>> >     xpc_object_t v) {
>> >      > +                                     char *desc =
>> >     xpc_copy_description(v);
>> >      > +                                     qemu_log("  %s=%s\n", k, desc);
>> >      > +                                     free(desc);
>> >      > +                                     return true;
>> >      > +                                 });
>> >      > +#endif /* DEBUG */
>> >      > +
>> >      > +            s->mtu = xpc_dictionary_get_uint64(
>> >      > +                interface_param,
>> >      > +                vmnet_mtu_key);
>> >      > +            s->max_packet_size = xpc_dictionary_get_uint64(
>> >      > +                interface_param,
>> >      > +                vmnet_max_packet_size_key);
>> >      > +
>> >      > +            dispatch_semaphore_signal(if_created_sem);
>> >      > +        });
>> >      > +
>> >      > +    if (s->vmnet_if == NULL) {
>> >      > +        error_setg(errp,
>> >      > +                   "unable to create interface "
>> >      > +                   "with requested params");
>> >
>> >     You don't need line breaks here. Breaking a string into a few would
>> >     also
>> >     makes it a bit hard to grep.
>> >
>> >      > +        return -1;
>> >
>> >     s->if_queue leaks.
>> >
>> >      > +    }
>> >      > +
>> >      > +    dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER);
>> >      > +
>> >      > +    if (if_status != VMNET_SUCCESS) {
>> >      > +        error_setg(errp,
>> >      > +                   "cannot create vmnet interface: %s",
>> >      > +                   vmnet_status_map_str(if_status));
>> >      > +        return -1;
>> >      > +    }
>> >      > +
>> >      > +    s->send_bh = aio_bh_new(qemu_get_aio_context(),
>> >     vmnet_send_bh, nc);
>> >      > +    vmnet_bufs_init(s);
>> >      > +    vmnet_set_send_bh_scheduled(s, false);
>> >      > +    vmnet_set_send_enabled(s, true);
>> >      > +    return 0;
>> >      > +}
>> >      > +
>> >      > +
>> >      > +ssize_t vmnet_receive_common(NetClientState *nc,
>> >      > +                             const uint8_t *buf,
>> >      > +                             size_t size)
>> >      > +{
>> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
>> >      > +    struct vmpktdesc packet;
>> >      > +    struct iovec iov;
>> >      > +    int pkt_cnt;
>> >      > +    vmnet_return_t if_status;
>> >      > +
>> >      > +    if (size > s->max_packet_size) {
>> >      > +        warn_report("vmnet: packet is too big, %zu > %llu\n",
>> >
>> >     Use PRIu64.
>> >
>> >      > +                    packet.vm_pkt_size,
>> >      > +                    s->max_packet_size);
>> >      > +        return -1;
>> >      > +    }
>> >      > +
>> >      > +    iov.iov_base = (char *) buf;
>> >      > +    iov.iov_len = size;
>> >      > +
>> >      > +    packet.vm_pkt_iovcnt = 1;
>> >      > +    packet.vm_flags = 0;
>> >      > +    packet.vm_pkt_size = size;
>> >      > +    packet.vm_pkt_iov = &iov;
>> >      > +    pkt_cnt = 1;
>> >      > +
>> >      > +    if_status = vmnet_write(s->vmnet_if, &packet, &pkt_cnt);
>> >      > +    if (if_status != VMNET_SUCCESS) {
>> >      > +        error_report("vmnet: write error: %s\n",
>> >      > +                     vmnet_status_map_str(if_status));
>> >
>> >     Why don't return -1?
>> >
>> >      > +    }
>> >      > +
>> >      > +    if (if_status == VMNET_SUCCESS && pkt_cnt) {
>> >      > +        return size;
>> >      > +    }
>> >      > +    return 0;
>> >      > +}
>> >      > +
>> >      > +
>> >      > +void vmnet_cleanup_common(NetClientState *nc)
>> >      > +{
>> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);;
>> >
>> >     Duplicate semicolons.
>> >
>> >      > +    dispatch_semaphore_t if_created_sem;
>> >      > +
>> >      > +    qemu_purge_queued_packets(nc); > +
>> >     vmnet_set_send_bh_scheduled(s, true);
>> >      > +    vmnet_set_send_enabled(s, false);
>> >      > +
>> >      > +    if (s->vmnet_if == NULL) {
>> >      > +        return;
>> >      > +    }
>> >      > +
>> >      > +    if_created_sem = dispatch_semaphore_create(0);
>> >      > +    vmnet_stop_interface(
>> >      > +        s->vmnet_if,
>> >      > +        s->if_queue,
>> >      > +        ^(vmnet_return_t status) {
>> >      > +            assert(status == VMNET_SUCCESS);
>> >      > +            dispatch_semaphore_signal(if_created_sem);
>> >      > +        });
>> >      > +    dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER);
>> >      > +
>> >      > +    qemu_bh_delete(s->send_bh);
>> >      > +    dispatch_release(if_created_sem);
>> >      > +    dispatch_release(s->if_queue);
>> >      > +
>> >      > +    for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
>> >      > +        g_free(s->iov_buf[i].iov_base);
>> >      > +    }
>> >      > +}
>> >      > diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c
>> >      > index f07afaaf21..66f66c034b 100644
>> >      > --- a/net/vmnet-shared.c
>> >      > +++ b/net/vmnet-shared.c
>> >      > @@ -10,16 +10,102 @@
>> >      >
>> >      >   #include "qemu/osdep.h"
>> >      >   #include "qapi/qapi-types-net.h"
>> >      > +#include "qapi/error.h"
>> >      >   #include "vmnet_int.h"
>> >      >   #include "clients.h"
>> >      > -#include "qemu/error-report.h"
>> >      > -#include "qapi/error.h"
>> >      >
>> >      >   #include <vmnet/vmnet.h>
>> >      >
>> >      > +typedef struct VmnetSharedState {
>> >      > +    VmnetCommonState cs;
>> >      > +} VmnetSharedState;
>> >      > +
>> >      > +
>> >      > +static bool validate_options(const Netdev *netdev, Error **errp)
>> >      > +{
>> >      > +    const NetdevVmnetSharedOptions *options =
>> >     &(netdev->u.vmnet_shared);
>> >      > +
>> >      > +#if !defined(MAC_OS_VERSION_11_0) || \
>> >      > +    MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0
>> >      > +    if (options->has_isolated) {
>> >      > +        error_setg(errp,
>> >      > +                   "vmnet-shared.isolated feature is "
>> >      > +                   "unavailable: outdated vmnet.framework API");
>> >      > +        return false;
>> >      > +    }
>> >      > +#endif
>> >      > +
>> >      > +    if ((options->has_start_address ||
>> >      > +         options->has_end_address ||
>> >      > +         options->has_subnet_mask) &&
>> >      > +        !(options->has_start_address &&
>> >      > +          options->has_end_address &&
>> >      > +          options->has_subnet_mask)) {
>> >      > +        error_setg(errp,
>> >      > +                   "'start-address', 'end-address', 'subnet-mask' "
>> >      > +                   "should be provided together"
>> >      > +        );
>> >      > +        return false;
>> >      > +    }
>> >      > +
>> >      > +    return true;
>> >      > +}
>> >      > +
>> >      > +static xpc_object_t build_if_desc(const Netdev *netdev)
>> >      > +{
>> >      > +    const NetdevVmnetSharedOptions *options =
>> >     &(netdev->u.vmnet_shared);
>> >      > +    xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, 0);
>> >      > +
>> >      > +    xpc_dictionary_set_uint64(
>> >      > +        if_desc,
>> >      > +        vmnet_operation_mode_key,
>> >      > +        VMNET_SHARED_MODE
>> >      > +    );
>> >      > +
>> >      > +    if (options->has_nat66_prefix) {
>> >      > +        xpc_dictionary_set_string(if_desc,
>> >      > +                                  vmnet_nat66_prefix_key,
>> >      > +                                  options->nat66_prefix);
>> >      > +    }
>> >      > +
>> >      > +    if (options->has_start_address) {
>> >      > +        xpc_dictionary_set_string(if_desc,
>> >      > +                                  vmnet_start_address_key,
>> >      > +                                  options->start_address);
>> >      > +        xpc_dictionary_set_string(if_desc,
>> >      > +                                  vmnet_end_address_key,
>> >      > +                                  options->end_address);
>> >      > +        xpc_dictionary_set_string(if_desc,
>> >      > +                                  vmnet_subnet_mask_key,
>> >      > +                                  options->subnet_mask);
>> >      > +    }
>> >      > +
>> >      > +#if defined(MAC_OS_VERSION_11_0) && \
>> >      > +    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
>> >      > +    xpc_dictionary_set_bool(
>> >      > +        if_desc,
>> >      > +        vmnet_enable_isolation_key,
>> >      > +        options->isolated
>> >      > +    );
>> >      > +#endif
>> >      > +
>> >      > +    return if_desc;
>> >      > +}
>> >      > +
>> >      > +static NetClientInfo net_vmnet_shared_info = {
>> >      > +    .type = NET_CLIENT_DRIVER_VMNET_SHARED,
>> >      > +    .size = sizeof(VmnetSharedState),
>> >      > +    .receive = vmnet_receive_common,
>> >      > +    .cleanup = vmnet_cleanup_common,
>> >      > +};
>> >      > +
>> >      >   int net_init_vmnet_shared(const Netdev *netdev, const char *name,
>> >      >                             NetClientState *peer, Error **errp)
>> >      >   {
>> >      > -  error_setg(errp, "vmnet-shared is not implemented yet");
>> >      > -  return -1;
>> >      > +    NetClientState *nc = qemu_new_net_client(&net_vmnet_shared_info,
>> >      > +                                             peer,
>> >     "vmnet-shared", name);
>> >      > +    if (!validate_options(netdev, errp)) {
>> >      > +        g_assert_not_reached();
>> >
>> >     g_assert_not_reached is for debugging purpose and may be dropped
>> >     depending on the build option.
>> >
>> >      > +    }
>> >      > +    return vmnet_if_create(nc, build_if_desc(netdev), errp);
>> >      >   }
>> >      > diff --git a/net/vmnet_int.h b/net/vmnet_int.h
>> >      > index aac4d5af64..acfe3a88c0 100644
>> >      > --- a/net/vmnet_int.h
>> >      > +++ b/net/vmnet_int.h
>> >      > @@ -15,11 +15,48 @@
>> >      >   #include "clients.h"
>> >      >
>> >      >   #include <vmnet/vmnet.h>
>> >      > +#include <dispatch/dispatch.h>
>> >      > +
>> >      > +/**
>> >      > + *  From vmnet.framework documentation
>> >      > + *
>> >      > + *  Each read/write call allows up to 200 packets to be
>> >      > + *  read or written for a maximum of 256KB.
>> >      > + *
>> >      > + *  Each packet written should be a complete
>> >      > + *  ethernet frame.
>> >      > + *
>> >      > + * https://developer.apple.com/documentation/vmnet
>> >     <https://developer.apple.com/documentation/vmnet>
>> >      > + */
>> >      > +#define VMNET_PACKETS_LIMIT 200
>> >      >
>> >      >   typedef struct VmnetCommonState {
>> >      > -  NetClientState nc;
>> >      > +    NetClientState nc;
>> >      > +    interface_ref vmnet_if;
>> >      > +
>> >      > +    bool send_scheduled;
>> >      >
>> >      > +    uint64_t mtu;
>> >      > +    uint64_t max_packet_size;
>> >      > +
>> >      > +    struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT];
>> >      > +    struct iovec iov_buf[VMNET_PACKETS_LIMIT];
>> >      > +
>> >      > +    dispatch_queue_t if_queue;
>> >      > +
>> >      > +    QEMUBH *send_bh;
>> >      >   } VmnetCommonState;
>> >      >
>> >      > +const char *vmnet_status_map_str(vmnet_return_t status);
>> >      > +
>> >      > +int vmnet_if_create(NetClientState *nc,
>> >      > +                    xpc_object_t if_desc,
>> >      > +                    Error **errp);
>> >      > +
>> >      > +ssize_t vmnet_receive_common(NetClientState *nc,
>> >      > +                             const uint8_t *buf,
>> >      > +                             size_t size);
>> >      > +
>> >      > +void vmnet_cleanup_common(NetClientState *nc);
>> >      >
>> >      >   #endif /* VMNET_INT_H */
>> >
>> >
>> > Other issues will be fixed and submitted later,
>> > thank you!
>> >
>> > Regards,
>> > Vladislav Yaroshchuk
>>
>
> Regards,
> Vladislav Yaroshchuk
Vladislav Yaroshchuk Feb. 28, 2022, 11:59 a.m. UTC | #6
On Sat, Feb 26, 2022 at 3:27 PM Akihiko Odaki <akihiko.odaki@gmail.com>
wrote:

> On Sat, Feb 26, 2022 at 8:33 PM Vladislav Yaroshchuk
> <vladislav.yaroshchuk@jetbrains.com> wrote:
> >
> >
> >
> > On Sat, Feb 26, 2022 at 12:16 PM Akihiko Odaki <akihiko.odaki@gmail.com>
> wrote:
> >>
> >> On 2022/02/26 17:37, Vladislav Yaroshchuk wrote:
> >> >
> >> > Hi Akihiko,
> >> >
> >> > On Fri, Feb 25, 2022 at 8:46 PM Akihiko Odaki <
> akihiko.odaki@gmail.com
> >> > <mailto:akihiko.odaki@gmail.com>> wrote:
> >> >
> >> >     On 2022/02/26 2:13, Vladislav Yaroshchuk wrote:
> >> >      > Interaction with vmnet.framework in different modes
> >> >      > differs only on configuration stage, so we can create
> >> >      > common `send`, `receive`, etc. procedures and reuse them.
> >> >      >
> >> >      > vmnet.framework supports iov, but writing more than
> >> >      > one iov into vmnet interface fails with
> >> >      > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
> >> >      > one and passing it to vmnet works fine. That's the
> >> >      > reason why receive_iov() left unimplemented. But it still
> >> >      > works with good enough performance having .receive()
> >> >      > net/vmnet: implement shared mode (vmnet-shared)
> >> >      >
> >> >      > Interaction with vmnet.framework in different modes
> >> >      > differs only on configuration stage, so we can create
> >> >      > common `send`, `receive`, etc. procedures and reuse them.
> >> >      >
> >> >      > vmnet.framework supports iov, but writing more than
> >> >      > one iov into vmnet interface fails with
> >> >      > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
> >> >      > one and passing it to vmnet works fine. That's the
> >> >      > reason why receive_iov() left unimplemented. But it still
> >> >      > works with good enough performance having .receive()
> >> >      > implemented only.
> >> >      >
> >> >      > Also, there is no way to unsubscribe from vmnet packages
> >> >      > receiving except registering and unregistering event
> >> >      > callback or simply drop packages just ignoring and
> >> >      > not processing them when related flag is set. Here we do
> >> >      > using the second way.
> >> >      >
> >> >      > Signed-off-by: Phillip Tennen <phillip@axleos.com
> >> >     <mailto:phillip@axleos.com>>
> >> >      > Signed-off-by: Vladislav Yaroshchuk
> >> >     <Vladislav.Yaroshchuk@jetbrains.com
> >> >     <mailto:Vladislav.Yaroshchuk@jetbrains.com>>
> >> >
> >> >     Thank you for persistently working on this.
> >> >
> >> >      > ---
> >> >      >   net/vmnet-common.m | 302
> >> >     +++++++++++++++++++++++++++++++++++++++++++++
> >> >      >   net/vmnet-shared.c |  94 +++++++++++++-
> >> >      >   net/vmnet_int.h    |  39 +++++-
> >> >      >   3 files changed, 430 insertions(+), 5 deletions(-)
> >> >      >
> >> >      > diff --git a/net/vmnet-common.m b/net/vmnet-common.m
> >> >      > index 56612c72ce..2f70921cae 100644
> >> >      > --- a/net/vmnet-common.m
> >> >      > +++ b/net/vmnet-common.m
> >> >      > @@ -10,6 +10,8 @@
> >> >      >    */
> >> >      >
> >> >      >   #include "qemu/osdep.h"
> >> >      > +#include "qemu/main-loop.h"
> >> >      > +#include "qemu/log.h"
> >> >      >   #include "qapi/qapi-types-net.h"
> >> >      >   #include "vmnet_int.h"
> >> >      >   #include "clients.h"
> >> >      > @@ -17,4 +19,304 @@
> >> >      >   #include "qapi/error.h"
> >> >      >
> >> >      >   #include <vmnet/vmnet.h>
> >> >      > +#include <dispatch/dispatch.h>
> >> >      >
> >> >      > +
> >> >      > +static inline void
> vmnet_set_send_bh_scheduled(VmnetCommonState *s,
> >> >      > +                                               bool enable)
> >> >      > +{
> >> >      > +    qatomic_set(&s->send_scheduled, enable);
> >> >      > +}
> >> >      > +
> >> >      > +
> >> >      > +static inline bool
> vmnet_is_send_bh_scheduled(VmnetCommonState *s)
> >> >      > +{
> >> >      > +    return qatomic_load_acquire(&s->send_scheduled);
> >> >      > +}
> >> >      > +
> >> >      > +
> >> >      > +static inline void vmnet_set_send_enabled(VmnetCommonState *s,
> >> >      > +                                          bool enable)
> >> >      > +{
> >> >      > +    if (enable) {
> >> >      > +        vmnet_interface_set_event_callback(
> >> >      > +            s->vmnet_if,
> >> >      > +            VMNET_INTERFACE_PACKETS_AVAILABLE,
> >> >      > +            s->if_queue,
> >> >      > +            ^(interface_event_t event_id, xpc_object_t event)
> {
> >> >      > +                assert(event_id ==
> >> >     VMNET_INTERFACE_PACKETS_AVAILABLE);
> >> >      > +                /*
> >> >      > +                 * This function is being called from a non
> qemu
> >> >     thread, so
> >> >      > +                 * we only schedule a BH, and do the rest of
> the
> >> >     io completion
> >> >      > +                 * handling from vmnet_send_bh() which runs
> in a
> >> >     qemu context.
> >> >      > +                 *
> >> >      > +                 * Avoid scheduling multiple bottom halves
> >> >      > +                 */
> >> >      > +                if (!vmnet_is_send_bh_scheduled(s)) {
> >> >      > +                    vmnet_set_send_bh_scheduled(s, true);
> >> >
> >> >     It can be interrupted between vmnet_is_send_bh_scheduled and
> >> >     vmnet_set_send_bh_scheduled, which leads to data race.
> >> >
> >> >
> >> > Sorry, I did not clearly understand what you meant. Since this
> >> > callback (block) is submitted on DISPATCH_QUEUE_SERIAL,
> >> > only one instance of the callback will be executed at any
> >> > moment of time.
> >> >
> https://developer.apple.com/documentation/dispatch/dispatch_queue_serial
> >> > <
> https://developer.apple.com/documentation/dispatch/dispatch_queue_serial>
> >> >
> >> > Also this is the only place where we schedule a bottom half.
> >> >
> >> > After we set the 'send_scheduled' flag, all the other
> >> > callback  blocks will do nothing (skip the if block) until
> >> > the bottom half is executed and reset 'send_scheduled'.
> >> > I don't see any races here :(
> >> >
> >> > Correct me if I'm wrong please.
> >>
> >> My explanation that the interruption between vmnet_is_send_bh_scheduled
> >> and vmnet_set_send_bh_scheduled is problematic was actually misleading.
> >>
> >> The problem is that vmnet_send_bh resets 'send_scheduled' after calling
> >> vmnet_read. If we got another packet after vmnet_read, it would be
> >> silently ignored.
> >>
> >> By the way, I forgot to mention another problem: vmnet_send_completed
> >> calls vmnet_set_send_enabled, but the other packets in the buffer may
> >> not be sent yet. Also, unregistering callbacks in vmnet_set_send_enabled
> >> is probably not enough to stop the callback being fired since some
> >> dispatch blocks are already in the dispatch queue.
> >>
> >
> > Now I understand what you mean, thanks.
> > What do you think about the workaround? For me
> > it is quite difficult question how to synchronize qemu with
> > vmnet thread, especially with completion callback.
>
> You may add a new field to represent the number of packets being sent
> in the buffer. The field must be maintained by vmnet_send_bh and
> vmnet_send_completed, which are on the iothread. vmnet_send_bh should
> do nothing if the field is greater than 0 at the beginning of the
> function. vmnet_send_completed should call
> qemu_bh_schedule(s->send_bh).
>
>
Thank you for the idea! Sounds meaningful to
schedule a bottom half in vmnet thread and do the
rest of things in iothread with no concurrency.

Not sure that only one field is enough, cause
we may have two states on bh execution start:
1. There are packets in vmnet buffer s->packets_buf
    that were rejected by qemu_send_async and waiting
    to be sent. If this happens, we should complete sending
    these waiting packets with qemu_send_async firstly,
    and after that we should call vmnet_read to get
    new ones and send them to QEMU;
2. There are no packets in s->packets_buf to be sent to
    qemu, we only need to get new packets from vmnet
    with vmnet_read and send them to QEMU

It can be done kinda this way:
```
struct s:
    send_bh:                    bh
    packets_buf:              array[packet]
    ## Three new fields
    send_enabled:           bool
    packets_send_pos:    int
    packets_batch_size:  int

fun bh_send(s):
    ## Send disabled - do nothing
    if not s->send_enabled:
        return

    ## If some packets are waiting to be sent in
    ## s->packets_buf - send them
    if s->packets_send_pos < s->packets_batch_size:
        if not qemu_send_wrapper(s):
            return

    ## Try to read new packets from vmnet
    s->packets_send_pos = 0
    s->packets_batch_size = 0
    s->packets_buf = vmnet_read(&s->packets_batch_size)
    if s->packets_batch_size > 0:
        # If something received - process them
        qemu_send_wrapper(s)

fun qemu_send_wrapper(s):
    for i in s->packets_send_pos to s->packets_batch_size:
        size = qemu_send_async(s->packets_buf[i],
                                                  vmnet_send_completed)
        if size == 0:
            ## Disable packets processing until vmnet_send_completed
            ## Save the state: packets to be sent now in s->packets_buf
            ## in range s->packets_send_pos..s->packets_batch_size
            s->send_enabled = false
            s->packets_send_pos = i + 1
            break

        if size < 0:
            ## On error just drop the packets
            s->packets_send_pos = 0
            s->packets_batch_size = 0
            break

     return s->send_enabled


fun vmnet_send_completed(s):
    ## Complete sending packets from s->packets_buf
    s->send_enabled = true
    qemu_bh_schedule(s->send_bh)

## From callback we only scheduling the bh
vmnet.set_callback(callback = s: qemu_bh_schedule(s->send_bh))
```

I think a simpler implementation may exist, but currently
I see only this approach with the send_enabled flag and
two more fields to save packets sending state.

vmnet_set_send_enabled and send_scheduled can be simply removed.
> qemu_bh_schedule ensures there is no duplicate scheduling.
>
>
Yep, my mistake. Previously I used schedule_oneshot which
creates a new bh for each call which causes duplicate scheduling.


Regards,
> Akihiko Odaki
>
>
> >
> >>
> >> Regards,
> >> Akihiko Odaki
> >>
> >> >
> >> >      > +                    qemu_bh_schedule(s->send_bh);
> >> >      > +                }
> >> >      > +            });
> >> >      > +    } else {
> >> >      > +        vmnet_interface_set_event_callback(
> >> >      > +            s->vmnet_if,
> >> >      > +            VMNET_INTERFACE_PACKETS_AVAILABLE,
> >> >      > +            NULL,
> >> >      > +            NULL);
> >> >      > +    }
> >> >      > +}
> >> >      > +
> >> >      > +
> >> >      > +static void vmnet_send_completed(NetClientState *nc, ssize_t
> len)
> >> >      > +{
> >> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
> >> >      > +    vmnet_set_send_enabled(s, true);
> >> >      > +}
> >> >      > +
> >> >      > +
> >> >      > +static void vmnet_send_bh(void *opaque)
> >> >      > +{
> >> >      > +    NetClientState *nc = (NetClientState *) opaque;
> >> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
> >> >      > +
> >> >      > +    struct iovec *iov = s->iov_buf;
> >> >      > +    struct vmpktdesc *packets = s->packets_buf;
> >> >      > +    int pkt_cnt;
> >> >      > +    int i;
> >> >      > +
> >> >      > +    vmnet_return_t status;
> >> >      > +    ssize_t size;
> >> >      > +
> >> >      > +    /* read as many packets as present */
> >> >      > +    pkt_cnt = VMNET_PACKETS_LIMIT;
> >> >
> >> >     There could be more than VMNET_PACKETS_LIMIT. You may call
> >> >     vmnet_read in
> >> >     a loop.
> >> >
> >> >      > +    for (i = 0; i < pkt_cnt; ++i) {
> >> >      > +        packets[i].vm_pkt_size = s->max_packet_size;
> >> >      > +        packets[i].vm_pkt_iovcnt = 1;
> >> >      > +        packets[i].vm_flags = 0;
> >> >      > +    }
> >> >      > +
> >> >      > +    status = vmnet_read(s->vmnet_if, packets, &pkt_cnt);
> >> >      > +    if (status != VMNET_SUCCESS) {
> >> >      > +        error_printf("vmnet: read failed: %s\n",
> >> >      > +                     vmnet_status_map_str(status));
> >> >      > +        goto done;
> >> >      > +    }
> >> >      > +
> >> >      > +    for (i = 0; i < pkt_cnt; ++i) {
> >> >      > +        size = qemu_send_packet_async(nc,
> >> >      > +                                      iov[i].iov_base,
> >> >      > +                                      packets[i].vm_pkt_size,
> >> >      > +                                      vmnet_send_completed);
> >> >      > +        if (size == 0) {
> >> >      > +            vmnet_set_send_enabled(s, false);
> >> >      > +            goto done;
> >> >      > +        } else if (size < 0) {
> >> >      > +            break;
> >> >      > +        }
> >> >
> >> >     goto is not needed here. "break" when size <= 0.
> >> >
> >> >      > +    }
> >> >      > +
> >> >      > +done:
> >> >      > +    vmnet_set_send_bh_scheduled(s, false);
> >> >      > +}
> >> >      > +
> >> >      > +
> >> >      > +static void vmnet_bufs_init(VmnetCommonState *s)
> >> >      > +{
> >> >      > +    struct vmpktdesc *packets = s->packets_buf;
> >> >      > +    struct iovec *iov = s->iov_buf;
> >> >      > +    int i;
> >> >      > +
> >> >      > +    for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
> >> >      > +        iov[i].iov_len = s->max_packet_size;
> >> >      > +        iov[i].iov_base = g_malloc0(iov[i].iov_len);
> >> >      > +        packets[i].vm_pkt_iov = iov + i;
> >> >      > +    }
> >> >      > +}
> >> >      > +
> >> >      > +
> >> >      > +const char *vmnet_status_map_str(vmnet_return_t status)
> >> >      > +{
> >> >      > +    switch (status) {
> >> >      > +    case VMNET_SUCCESS:
> >> >      > +        return "success";
> >> >      > +    case VMNET_FAILURE:
> >> >      > +        return "general failure (possibly not enough
> privileges)";
> >> >      > +    case VMNET_MEM_FAILURE:
> >> >      > +        return "memory allocation failure";
> >> >      > +    case VMNET_INVALID_ARGUMENT:
> >> >      > +        return "invalid argument specified";
> >> >      > +    case VMNET_SETUP_INCOMPLETE:
> >> >      > +        return "interface setup is not complete";
> >> >      > +    case VMNET_INVALID_ACCESS:
> >> >      > +        return "invalid access, permission denied";
> >> >      > +    case VMNET_PACKET_TOO_BIG:
> >> >      > +        return "packet size is larger than MTU";
> >> >      > +    case VMNET_BUFFER_EXHAUSTED:
> >> >      > +        return "buffers exhausted in kernel";
> >> >      > +    case VMNET_TOO_MANY_PACKETS:
> >> >      > +        return "packet count exceeds limit";
> >> >      > +#if defined(MAC_OS_VERSION_11_0) && \
> >> >      > +    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
> >> >      > +    case VMNET_SHARING_SERVICE_BUSY:
> >> >      > +        return "conflict, sharing service is in use";
> >> >      > +#endif
> >> >      > +    default:
> >> >      > +        return "unknown vmnet error";
> >> >      > +    }
> >> >      > +}
> >> >      > +
> >> >      > +
> >> >      > +int vmnet_if_create(NetClientState *nc,
> >> >      > +                    xpc_object_t if_desc,
> >> >      > +                    Error **errp)
> >> >      > +{
> >> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc,
> nc);;
> >> >
> >> >     Duplicate semicolons.
> >> >
> >> >      > +    dispatch_semaphore_t if_created_sem =
> >> >     dispatch_semaphore_create(0);
> >> >
> >> >     if_created_sem leaks.
> >> >
> >> >      > +    __block vmnet_return_t if_status;
> >> >      > +
> >> >      > +    s->if_queue = dispatch_queue_create(
> >> >      > +        "org.qemu.vmnet.if_queue",
> >> >      > +        DISPATCH_QUEUE_SERIAL
> >> >      > +    );
> >> >      > +
> >> >      > +    xpc_dictionary_set_bool(
> >> >      > +        if_desc,
> >> >      > +        vmnet_allocate_mac_address_key,
> >> >      > +        false
> >> >      > +    );
> >> >      > +#ifdef DEBUG
> >> >      > +    qemu_log("vmnet.start.interface_desc:\n");
> >> >      > +    xpc_dictionary_apply(if_desc,
> >> >      > +                         ^bool(const char *k, xpc_object_t v)
> {
> >> >      > +                             char *desc =
> xpc_copy_description(v);
> >> >      > +                             qemu_log("  %s=%s\n", k, desc);
> >> >      > +                             free(desc);
> >> >      > +                             return true;
> >> >      > +                         });
> >> >      > +#endif /* DEBUG */
> >> >      > +
> >> >      > +    s->vmnet_if = vmnet_start_interface(
> >> >      > +        if_desc,
> >> >      > +        s->if_queue,
> >> >      > +        ^(vmnet_return_t status, xpc_object_t
> interface_param) {
> >> >      > +            if_status = status;
> >> >      > +            if (status != VMNET_SUCCESS || !interface_param) {
> >> >      > +                dispatch_semaphore_signal(if_created_sem);
> >> >      > +                return;
> >> >      > +            }
> >> >      > +
> >> >      > +#ifdef DEBUG
> >> >      > +            qemu_log("vmnet.start.interface_param:\n");
> >> >      > +            xpc_dictionary_apply(interface_param,
> >> >      > +                                 ^bool(const char *k,
> >> >     xpc_object_t v) {
> >> >      > +                                     char *desc =
> >> >     xpc_copy_description(v);
> >> >      > +                                     qemu_log("  %s=%s\n", k,
> desc);
> >> >      > +                                     free(desc);
> >> >      > +                                     return true;
> >> >      > +                                 });
> >> >      > +#endif /* DEBUG */
> >> >      > +
> >> >      > +            s->mtu = xpc_dictionary_get_uint64(
> >> >      > +                interface_param,
> >> >      > +                vmnet_mtu_key);
> >> >      > +            s->max_packet_size = xpc_dictionary_get_uint64(
> >> >      > +                interface_param,
> >> >      > +                vmnet_max_packet_size_key);
> >> >      > +
> >> >      > +            dispatch_semaphore_signal(if_created_sem);
> >> >      > +        });
> >> >      > +
> >> >      > +    if (s->vmnet_if == NULL) {
> >> >      > +        error_setg(errp,
> >> >      > +                   "unable to create interface "
> >> >      > +                   "with requested params");
> >> >
> >> >     You don't need line breaks here. Breaking a string into a few
> would
> >> >     also
> >> >     makes it a bit hard to grep.
> >> >
> >> >      > +        return -1;
> >> >
> >> >     s->if_queue leaks.
> >> >
> >> >      > +    }
> >> >      > +
> >> >      > +    dispatch_semaphore_wait(if_created_sem,
> DISPATCH_TIME_FOREVER);
> >> >      > +
> >> >      > +    if (if_status != VMNET_SUCCESS) {
> >> >      > +        error_setg(errp,
> >> >      > +                   "cannot create vmnet interface: %s",
> >> >      > +                   vmnet_status_map_str(if_status));
> >> >      > +        return -1;
> >> >      > +    }
> >> >      > +
> >> >      > +    s->send_bh = aio_bh_new(qemu_get_aio_context(),
> >> >     vmnet_send_bh, nc);
> >> >      > +    vmnet_bufs_init(s);
> >> >      > +    vmnet_set_send_bh_scheduled(s, false);
> >> >      > +    vmnet_set_send_enabled(s, true);
> >> >      > +    return 0;
> >> >      > +}
> >> >      > +
> >> >      > +
> >> >      > +ssize_t vmnet_receive_common(NetClientState *nc,
> >> >      > +                             const uint8_t *buf,
> >> >      > +                             size_t size)
> >> >      > +{
> >> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
> >> >      > +    struct vmpktdesc packet;
> >> >      > +    struct iovec iov;
> >> >      > +    int pkt_cnt;
> >> >      > +    vmnet_return_t if_status;
> >> >      > +
> >> >      > +    if (size > s->max_packet_size) {
> >> >      > +        warn_report("vmnet: packet is too big, %zu > %llu\n",
> >> >
> >> >     Use PRIu64.
> >> >
> >> >      > +                    packet.vm_pkt_size,
> >> >      > +                    s->max_packet_size);
> >> >      > +        return -1;
> >> >      > +    }
> >> >      > +
> >> >      > +    iov.iov_base = (char *) buf;
> >> >      > +    iov.iov_len = size;
> >> >      > +
> >> >      > +    packet.vm_pkt_iovcnt = 1;
> >> >      > +    packet.vm_flags = 0;
> >> >      > +    packet.vm_pkt_size = size;
> >> >      > +    packet.vm_pkt_iov = &iov;
> >> >      > +    pkt_cnt = 1;
> >> >      > +
> >> >      > +    if_status = vmnet_write(s->vmnet_if, &packet, &pkt_cnt);
> >> >      > +    if (if_status != VMNET_SUCCESS) {
> >> >      > +        error_report("vmnet: write error: %s\n",
> >> >      > +                     vmnet_status_map_str(if_status));
> >> >
> >> >     Why don't return -1?
> >> >
> >> >      > +    }
> >> >      > +
> >> >      > +    if (if_status == VMNET_SUCCESS && pkt_cnt) {
> >> >      > +        return size;
> >> >      > +    }
> >> >      > +    return 0;
> >> >      > +}
> >> >      > +
> >> >      > +
> >> >      > +void vmnet_cleanup_common(NetClientState *nc)
> >> >      > +{
> >> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc,
> nc);;
> >> >
> >> >     Duplicate semicolons.
> >> >
> >> >      > +    dispatch_semaphore_t if_created_sem;
> >> >      > +
> >> >      > +    qemu_purge_queued_packets(nc); > +
> >> >     vmnet_set_send_bh_scheduled(s, true);
> >> >      > +    vmnet_set_send_enabled(s, false);
> >> >      > +
> >> >      > +    if (s->vmnet_if == NULL) {
> >> >      > +        return;
> >> >      > +    }
> >> >      > +
> >> >      > +    if_created_sem = dispatch_semaphore_create(0);
> >> >      > +    vmnet_stop_interface(
> >> >      > +        s->vmnet_if,
> >> >      > +        s->if_queue,
> >> >      > +        ^(vmnet_return_t status) {
> >> >      > +            assert(status == VMNET_SUCCESS);
> >> >      > +            dispatch_semaphore_signal(if_created_sem);
> >> >      > +        });
> >> >      > +    dispatch_semaphore_wait(if_created_sem,
> DISPATCH_TIME_FOREVER);
> >> >      > +
> >> >      > +    qemu_bh_delete(s->send_bh);
> >> >      > +    dispatch_release(if_created_sem);
> >> >      > +    dispatch_release(s->if_queue);
> >> >      > +
> >> >      > +    for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
> >> >      > +        g_free(s->iov_buf[i].iov_base);
> >> >      > +    }
> >> >      > +}
> >> >      > diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c
> >> >      > index f07afaaf21..66f66c034b 100644
> >> >      > --- a/net/vmnet-shared.c
> >> >      > +++ b/net/vmnet-shared.c
> >> >      > @@ -10,16 +10,102 @@
> >> >      >
> >> >      >   #include "qemu/osdep.h"
> >> >      >   #include "qapi/qapi-types-net.h"
> >> >      > +#include "qapi/error.h"
> >> >      >   #include "vmnet_int.h"
> >> >      >   #include "clients.h"
> >> >      > -#include "qemu/error-report.h"
> >> >      > -#include "qapi/error.h"
> >> >      >
> >> >      >   #include <vmnet/vmnet.h>
> >> >      >
> >> >      > +typedef struct VmnetSharedState {
> >> >      > +    VmnetCommonState cs;
> >> >      > +} VmnetSharedState;
> >> >      > +
> >> >      > +
> >> >      > +static bool validate_options(const Netdev *netdev, Error
> **errp)
> >> >      > +{
> >> >      > +    const NetdevVmnetSharedOptions *options =
> >> >     &(netdev->u.vmnet_shared);
> >> >      > +
> >> >      > +#if !defined(MAC_OS_VERSION_11_0) || \
> >> >      > +    MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0
> >> >      > +    if (options->has_isolated) {
> >> >      > +        error_setg(errp,
> >> >      > +                   "vmnet-shared.isolated feature is "
> >> >      > +                   "unavailable: outdated vmnet.framework
> API");
> >> >      > +        return false;
> >> >      > +    }
> >> >      > +#endif
> >> >      > +
> >> >      > +    if ((options->has_start_address ||
> >> >      > +         options->has_end_address ||
> >> >      > +         options->has_subnet_mask) &&
> >> >      > +        !(options->has_start_address &&
> >> >      > +          options->has_end_address &&
> >> >      > +          options->has_subnet_mask)) {
> >> >      > +        error_setg(errp,
> >> >      > +                   "'start-address', 'end-address',
> 'subnet-mask' "
> >> >      > +                   "should be provided together"
> >> >      > +        );
> >> >      > +        return false;
> >> >      > +    }
> >> >      > +
> >> >      > +    return true;
> >> >      > +}
> >> >      > +
> >> >      > +static xpc_object_t build_if_desc(const Netdev *netdev)
> >> >      > +{
> >> >      > +    const NetdevVmnetSharedOptions *options =
> >> >     &(netdev->u.vmnet_shared);
> >> >      > +    xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL,
> 0);
> >> >      > +
> >> >      > +    xpc_dictionary_set_uint64(
> >> >      > +        if_desc,
> >> >      > +        vmnet_operation_mode_key,
> >> >      > +        VMNET_SHARED_MODE
> >> >      > +    );
> >> >      > +
> >> >      > +    if (options->has_nat66_prefix) {
> >> >      > +        xpc_dictionary_set_string(if_desc,
> >> >      > +                                  vmnet_nat66_prefix_key,
> >> >      > +                                  options->nat66_prefix);
> >> >      > +    }
> >> >      > +
> >> >      > +    if (options->has_start_address) {
> >> >      > +        xpc_dictionary_set_string(if_desc,
> >> >      > +                                  vmnet_start_address_key,
> >> >      > +                                  options->start_address);
> >> >      > +        xpc_dictionary_set_string(if_desc,
> >> >      > +                                  vmnet_end_address_key,
> >> >      > +                                  options->end_address);
> >> >      > +        xpc_dictionary_set_string(if_desc,
> >> >      > +                                  vmnet_subnet_mask_key,
> >> >      > +                                  options->subnet_mask);
> >> >      > +    }
> >> >      > +
> >> >      > +#if defined(MAC_OS_VERSION_11_0) && \
> >> >      > +    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
> >> >      > +    xpc_dictionary_set_bool(
> >> >      > +        if_desc,
> >> >      > +        vmnet_enable_isolation_key,
> >> >      > +        options->isolated
> >> >      > +    );
> >> >      > +#endif
> >> >      > +
> >> >      > +    return if_desc;
> >> >      > +}
> >> >      > +
> >> >      > +static NetClientInfo net_vmnet_shared_info = {
> >> >      > +    .type = NET_CLIENT_DRIVER_VMNET_SHARED,
> >> >      > +    .size = sizeof(VmnetSharedState),
> >> >      > +    .receive = vmnet_receive_common,
> >> >      > +    .cleanup = vmnet_cleanup_common,
> >> >      > +};
> >> >      > +
> >> >      >   int net_init_vmnet_shared(const Netdev *netdev, const char
> *name,
> >> >      >                             NetClientState *peer, Error **errp)
> >> >      >   {
> >> >      > -  error_setg(errp, "vmnet-shared is not implemented yet");
> >> >      > -  return -1;
> >> >      > +    NetClientState *nc =
> qemu_new_net_client(&net_vmnet_shared_info,
> >> >      > +                                             peer,
> >> >     "vmnet-shared", name);
> >> >      > +    if (!validate_options(netdev, errp)) {
> >> >      > +        g_assert_not_reached();
> >> >
> >> >     g_assert_not_reached is for debugging purpose and may be dropped
> >> >     depending on the build option.
> >> >
> >> >      > +    }
> >> >      > +    return vmnet_if_create(nc, build_if_desc(netdev), errp);
> >> >      >   }
> >> >      > diff --git a/net/vmnet_int.h b/net/vmnet_int.h
> >> >      > index aac4d5af64..acfe3a88c0 100644
> >> >      > --- a/net/vmnet_int.h
> >> >      > +++ b/net/vmnet_int.h
> >> >      > @@ -15,11 +15,48 @@
> >> >      >   #include "clients.h"
> >> >      >
> >> >      >   #include <vmnet/vmnet.h>
> >> >      > +#include <dispatch/dispatch.h>
> >> >      > +
> >> >      > +/**
> >> >      > + *  From vmnet.framework documentation
> >> >      > + *
> >> >      > + *  Each read/write call allows up to 200 packets to be
> >> >      > + *  read or written for a maximum of 256KB.
> >> >      > + *
> >> >      > + *  Each packet written should be a complete
> >> >      > + *  ethernet frame.
> >> >      > + *
> >> >      > + * https://developer.apple.com/documentation/vmnet
> >> >     <https://developer.apple.com/documentation/vmnet>
> >> >      > + */
> >> >      > +#define VMNET_PACKETS_LIMIT 200
> >> >      >
> >> >      >   typedef struct VmnetCommonState {
> >> >      > -  NetClientState nc;
> >> >      > +    NetClientState nc;
> >> >      > +    interface_ref vmnet_if;
> >> >      > +
> >> >      > +    bool send_scheduled;
> >> >      >
> >> >      > +    uint64_t mtu;
> >> >      > +    uint64_t max_packet_size;
> >> >      > +
> >> >      > +    struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT];
> >> >      > +    struct iovec iov_buf[VMNET_PACKETS_LIMIT];
> >> >      > +
> >> >      > +    dispatch_queue_t if_queue;
> >> >      > +
> >> >      > +    QEMUBH *send_bh;
> >> >      >   } VmnetCommonState;
> >> >      >
> >> >      > +const char *vmnet_status_map_str(vmnet_return_t status);
> >> >      > +
> >> >      > +int vmnet_if_create(NetClientState *nc,
> >> >      > +                    xpc_object_t if_desc,
> >> >      > +                    Error **errp);
> >> >      > +
> >> >      > +ssize_t vmnet_receive_common(NetClientState *nc,
> >> >      > +                             const uint8_t *buf,
> >> >      > +                             size_t size);
> >> >      > +
> >> >      > +void vmnet_cleanup_common(NetClientState *nc);
> >> >      >
> >> >      >   #endif /* VMNET_INT_H */
> >> >
> >> >
> >> > Other issues will be fixed and submitted later,
> >> > thank you!
> >> >
> >> > Regards,
> >> > Vladislav Yaroshchuk
> >>
> >
> > Regards,
> > Vladislav Yaroshchuk
>

Best Regards,

Vladislav Yaroshchuk
Akihiko Odaki March 1, 2022, 5:52 a.m. UTC | #7
On 2022/02/28 20:59, Vladislav Yaroshchuk wrote:
> 
> 
> On Sat, Feb 26, 2022 at 3:27 PM Akihiko Odaki <akihiko.odaki@gmail.com 
> <mailto:akihiko.odaki@gmail.com>> wrote:
> 
>     On Sat, Feb 26, 2022 at 8:33 PM Vladislav Yaroshchuk
>     <vladislav.yaroshchuk@jetbrains.com
>     <mailto:vladislav.yaroshchuk@jetbrains.com>> wrote:
>      >
>      >
>      >
>      > On Sat, Feb 26, 2022 at 12:16 PM Akihiko Odaki
>     <akihiko.odaki@gmail.com <mailto:akihiko.odaki@gmail.com>> wrote:
>      >>
>      >> On 2022/02/26 17:37, Vladislav Yaroshchuk wrote:
>      >> >
>      >> > Hi Akihiko,
>      >> >
>      >> > On Fri, Feb 25, 2022 at 8:46 PM Akihiko Odaki
>     <akihiko.odaki@gmail.com <mailto:akihiko.odaki@gmail.com>
>      >> > <mailto:akihiko.odaki@gmail.com
>     <mailto:akihiko.odaki@gmail.com>>> wrote:
>      >> >
>      >> >     On 2022/02/26 2:13, Vladislav Yaroshchuk wrote:
>      >> >      > Interaction with vmnet.framework in different modes
>      >> >      > differs only on configuration stage, so we can create
>      >> >      > common `send`, `receive`, etc. procedures and reuse them.
>      >> >      >
>      >> >      > vmnet.framework supports iov, but writing more than
>      >> >      > one iov into vmnet interface fails with
>      >> >      > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
>      >> >      > one and passing it to vmnet works fine. That's the
>      >> >      > reason why receive_iov() left unimplemented. But it still
>      >> >      > works with good enough performance having .receive()
>      >> >      > net/vmnet: implement shared mode (vmnet-shared)
>      >> >      >
>      >> >      > Interaction with vmnet.framework in different modes
>      >> >      > differs only on configuration stage, so we can create
>      >> >      > common `send`, `receive`, etc. procedures and reuse them.
>      >> >      >
>      >> >      > vmnet.framework supports iov, but writing more than
>      >> >      > one iov into vmnet interface fails with
>      >> >      > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
>      >> >      > one and passing it to vmnet works fine. That's the
>      >> >      > reason why receive_iov() left unimplemented. But it still
>      >> >      > works with good enough performance having .receive()
>      >> >      > implemented only.
>      >> >      >
>      >> >      > Also, there is no way to unsubscribe from vmnet packages
>      >> >      > receiving except registering and unregistering event
>      >> >      > callback or simply drop packages just ignoring and
>      >> >      > not processing them when related flag is set. Here we do
>      >> >      > using the second way.
>      >> >      >
>      >> >      > Signed-off-by: Phillip Tennen <phillip@axleos.com
>     <mailto:phillip@axleos.com>
>      >> >     <mailto:phillip@axleos.com <mailto:phillip@axleos.com>>>
>      >> >      > Signed-off-by: Vladislav Yaroshchuk
>      >> >     <Vladislav.Yaroshchuk@jetbrains.com
>     <mailto:Vladislav.Yaroshchuk@jetbrains.com>
>      >> >     <mailto:Vladislav.Yaroshchuk@jetbrains.com
>     <mailto:Vladislav.Yaroshchuk@jetbrains.com>>>
>      >> >
>      >> >     Thank you for persistently working on this.
>      >> >
>      >> >      > ---
>      >> >      >   net/vmnet-common.m | 302
>      >> >     +++++++++++++++++++++++++++++++++++++++++++++
>      >> >      >   net/vmnet-shared.c |  94 +++++++++++++-
>      >> >      >   net/vmnet_int.h    |  39 +++++-
>      >> >      >   3 files changed, 430 insertions(+), 5 deletions(-)
>      >> >      >
>      >> >      > diff --git a/net/vmnet-common.m b/net/vmnet-common.m
>      >> >      > index 56612c72ce..2f70921cae 100644
>      >> >      > --- a/net/vmnet-common.m
>      >> >      > +++ b/net/vmnet-common.m
>      >> >      > @@ -10,6 +10,8 @@
>      >> >      >    */
>      >> >      >
>      >> >      >   #include "qemu/osdep.h"
>      >> >      > +#include "qemu/main-loop.h"
>      >> >      > +#include "qemu/log.h"
>      >> >      >   #include "qapi/qapi-types-net.h"
>      >> >      >   #include "vmnet_int.h"
>      >> >      >   #include "clients.h"
>      >> >      > @@ -17,4 +19,304 @@
>      >> >      >   #include "qapi/error.h"
>      >> >      >
>      >> >      >   #include <vmnet/vmnet.h>
>      >> >      > +#include <dispatch/dispatch.h>
>      >> >      >
>      >> >      > +
>      >> >      > +static inline void
>     vmnet_set_send_bh_scheduled(VmnetCommonState *s,
>      >> >      > +                                               bool
>     enable)
>      >> >      > +{
>      >> >      > +    qatomic_set(&s->send_scheduled, enable);
>      >> >      > +}
>      >> >      > +
>      >> >      > +
>      >> >      > +static inline bool
>     vmnet_is_send_bh_scheduled(VmnetCommonState *s)
>      >> >      > +{
>      >> >      > +    return qatomic_load_acquire(&s->send_scheduled);
>      >> >      > +}
>      >> >      > +
>      >> >      > +
>      >> >      > +static inline void
>     vmnet_set_send_enabled(VmnetCommonState *s,
>      >> >      > +                                          bool enable)
>      >> >      > +{
>      >> >      > +    if (enable) {
>      >> >      > +        vmnet_interface_set_event_callback(
>      >> >      > +            s->vmnet_if,
>      >> >      > +            VMNET_INTERFACE_PACKETS_AVAILABLE,
>      >> >      > +            s->if_queue,
>      >> >      > +            ^(interface_event_t event_id, xpc_object_t
>     event) {
>      >> >      > +                assert(event_id ==
>      >> >     VMNET_INTERFACE_PACKETS_AVAILABLE);
>      >> >      > +                /*
>      >> >      > +                 * This function is being called from
>     a non qemu
>      >> >     thread, so
>      >> >      > +                 * we only schedule a BH, and do the
>     rest of the
>      >> >     io completion
>      >> >      > +                 * handling from vmnet_send_bh() which
>     runs in a
>      >> >     qemu context.
>      >> >      > +                 *
>      >> >      > +                 * Avoid scheduling multiple bottom halves
>      >> >      > +                 */
>      >> >      > +                if (!vmnet_is_send_bh_scheduled(s)) {
>      >> >      > +                    vmnet_set_send_bh_scheduled(s, true);
>      >> >
>      >> >     It can be interrupted between vmnet_is_send_bh_scheduled and
>      >> >     vmnet_set_send_bh_scheduled, which leads to data race.
>      >> >
>      >> >
>      >> > Sorry, I did not clearly understand what you meant. Since this
>      >> > callback (block) is submitted on DISPATCH_QUEUE_SERIAL,
>      >> > only one instance of the callback will be executed at any
>      >> > moment of time.
>      >> >
>     https://developer.apple.com/documentation/dispatch/dispatch_queue_serial
>     <https://developer.apple.com/documentation/dispatch/dispatch_queue_serial>
>      >> >
>     <https://developer.apple.com/documentation/dispatch/dispatch_queue_serial
>     <https://developer.apple.com/documentation/dispatch/dispatch_queue_serial>>
>      >> >
>      >> > Also this is the only place where we schedule a bottom half.
>      >> >
>      >> > After we set the 'send_scheduled' flag, all the other
>      >> > callback  blocks will do nothing (skip the if block) until
>      >> > the bottom half is executed and reset 'send_scheduled'.
>      >> > I don't see any races here :(
>      >> >
>      >> > Correct me if I'm wrong please.
>      >>
>      >> My explanation that the interruption between
>     vmnet_is_send_bh_scheduled
>      >> and vmnet_set_send_bh_scheduled is problematic was actually
>     misleading.
>      >>
>      >> The problem is that vmnet_send_bh resets 'send_scheduled' after
>     calling
>      >> vmnet_read. If we got another packet after vmnet_read, it would be
>      >> silently ignored.
>      >>
>      >> By the way, I forgot to mention another problem:
>     vmnet_send_completed
>      >> calls vmnet_set_send_enabled, but the other packets in the
>     buffer may
>      >> not be sent yet. Also, unregistering callbacks in
>     vmnet_set_send_enabled
>      >> is probably not enough to stop the callback being fired since some
>      >> dispatch blocks are already in the dispatch queue.
>      >>
>      >
>      > Now I understand what you mean, thanks.
>      > What do you think about the workaround? For me
>      > it is quite difficult question how to synchronize qemu with
>      > vmnet thread, especially with completion callback.
> 
>     You may add a new field to represent the number of packets being sent
>     in the buffer. The field must be maintained by vmnet_send_bh and
>     vmnet_send_completed, which are on the iothread. vmnet_send_bh should
>     do nothing if the field is greater than 0 at the beginning of the
>     function. vmnet_send_completed should call
>     qemu_bh_schedule(s->send_bh).
> 
> 
> Thank you for the idea! Sounds meaningful to
> schedule a bottom half in vmnet thread and do the
> rest of things in iothread with no concurrency.
> 
> Not sure that only one field is enough, cause
> we may have two states on bh execution start:
> 1. There are packets in vmnet buffer s->packets_buf
>      that were rejected by qemu_send_async and waiting
>      to be sent. If this happens, we should complete sending
>      these waiting packets with qemu_send_async firstly,
>      and after that we should call vmnet_read to get
>      new ones and send them to QEMU;
> 2. There are no packets in s->packets_buf to be sent to
>      qemu, we only need to get new packets from vmnet
>      with vmnet_read and send them to QEMU

In case 1, you should just keep calling qemu_send_packet_async. Actually 
qemu_send_packet_async adds the packet to its internal queue and calls 
the callback when it is consumed.

> 
> It can be done kinda this way:
> ```
> struct s:
>      send_bh:                    bh
>      packets_buf:              array[packet]
>      ## Three new fields
>      send_enabled:           bool
>      packets_send_pos:    int
>      packets_batch_size:  int
> 
> fun bh_send(s):
>      ## Send disabled - do nothing
>      if not s->send_enabled:
>          return
>      ## If some packets are waiting to be sent in
>      ## s->packets_buf - send them
>      if s->packets_send_pos < s->packets_batch_size:
>          if not qemu_send_wrapper(s):
>              return
> 
>      ## Try to read new packets from vmnet
>      s->packets_send_pos = 0
>      s->packets_batch_size = 0
>      s->packets_buf = vmnet_read(&s->packets_batch_size)
>      if s->packets_batch_size > 0:
>          # If something received - process them
>          qemu_send_wrapper(s)
> fun qemu_send_wrapper(s):
>      for i in s->packets_send_pos to s->packets_batch_size:
>          size = qemu_send_async(s->packets_buf[i],
>                                                    vmnet_send_completed)
>          if size == 0:
>              ## Disable packets processing until vmnet_send_completed
>              ## Save the state: packets to be sent now in s->packets_buf
>              ## in range s->packets_send_pos..s->packets_batch_size
>              s->send_enabled = false
>              s->packets_send_pos = i + 1
>              break
>          if size < 0:
>              ## On error just drop the packets
>              s->packets_send_pos = 0
>              s->packets_batch_size = 0
>              break
> 
>       return s->send_enabled
> 
> 
> fun vmnet_send_completed(s):
>      ## Complete sending packets from s->packets_buf
>      s->send_enabled = true
>      qemu_bh_schedule(s->send_bh)
> 
> ## From callback we only scheduling the bh
> vmnet.set_callback(callback = s: qemu_bh_schedule(s->send_bh))
> ```
> 
> I think a simpler implementation may exist, but currently
> I see only this approach with the send_enabled flag and
> two more fields to save packets sending state.
> 
>     vmnet_set_send_enabled and send_scheduled can be simply removed.
>     qemu_bh_schedule ensures there is no duplicate scheduling.
> 
> Yep, my mistake. Previously I used schedule_oneshot which
> creates a new bh for each call which causes duplicate scheduling.
> 
> 
>     Regards,
>     Akihiko Odaki
> 
>      >
>      >
>      >>
>      >> Regards,
>      >> Akihiko Odaki
>      >>
>      >> >
>      >> >      > +                    qemu_bh_schedule(s->send_bh);
>      >> >      > +                }
>      >> >      > +            });
>      >> >      > +    } else {
>      >> >      > +        vmnet_interface_set_event_callback(
>      >> >      > +            s->vmnet_if,
>      >> >      > +            VMNET_INTERFACE_PACKETS_AVAILABLE,
>      >> >      > +            NULL,
>      >> >      > +            NULL);
>      >> >      > +    }
>      >> >      > +}
>      >> >      > +
>      >> >      > +
>      >> >      > +static void vmnet_send_completed(NetClientState *nc,
>     ssize_t len)
>      >> >      > +{
>      >> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState,
>     nc, nc);
>      >> >      > +    vmnet_set_send_enabled(s, true);
>      >> >      > +}
>      >> >      > +
>      >> >      > +
>      >> >      > +static void vmnet_send_bh(void *opaque)
>      >> >      > +{
>      >> >      > +    NetClientState *nc = (NetClientState *) opaque;
>      >> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState,
>     nc, nc);
>      >> >      > +
>      >> >      > +    struct iovec *iov = s->iov_buf;
>      >> >      > +    struct vmpktdesc *packets = s->packets_buf;
>      >> >      > +    int pkt_cnt;
>      >> >      > +    int i;
>      >> >      > +
>      >> >      > +    vmnet_return_t status;
>      >> >      > +    ssize_t size;
>      >> >      > +
>      >> >      > +    /* read as many packets as present */
>      >> >      > +    pkt_cnt = VMNET_PACKETS_LIMIT;
>      >> >
>      >> >     There could be more than VMNET_PACKETS_LIMIT. You may call
>      >> >     vmnet_read in
>      >> >     a loop.
>      >> >
>      >> >      > +    for (i = 0; i < pkt_cnt; ++i) {
>      >> >      > +        packets[i].vm_pkt_size = s->max_packet_size;
>      >> >      > +        packets[i].vm_pkt_iovcnt = 1;
>      >> >      > +        packets[i].vm_flags = 0;
>      >> >      > +    }
>      >> >      > +
>      >> >      > +    status = vmnet_read(s->vmnet_if, packets, &pkt_cnt);
>      >> >      > +    if (status != VMNET_SUCCESS) {
>      >> >      > +        error_printf("vmnet: read failed: %s\n",
>      >> >      > +                     vmnet_status_map_str(status));
>      >> >      > +        goto done;
>      >> >      > +    }
>      >> >      > +
>      >> >      > +    for (i = 0; i < pkt_cnt; ++i) {
>      >> >      > +        size = qemu_send_packet_async(nc,
>      >> >      > +                                      iov[i].iov_base,
>      >> >      > +                                     
>     packets[i].vm_pkt_size,
>      >> >      > +                                     
>     vmnet_send_completed);
>      >> >      > +        if (size == 0) {
>      >> >      > +            vmnet_set_send_enabled(s, false);
>      >> >      > +            goto done;
>      >> >      > +        } else if (size < 0) {
>      >> >      > +            break;
>      >> >      > +        }
>      >> >
>      >> >     goto is not needed here. "break" when size <= 0.
>      >> >
>      >> >      > +    }
>      >> >      > +
>      >> >      > +done:
>      >> >      > +    vmnet_set_send_bh_scheduled(s, false);
>      >> >      > +}
>      >> >      > +
>      >> >      > +
>      >> >      > +static void vmnet_bufs_init(VmnetCommonState *s)
>      >> >      > +{
>      >> >      > +    struct vmpktdesc *packets = s->packets_buf;
>      >> >      > +    struct iovec *iov = s->iov_buf;
>      >> >      > +    int i;
>      >> >      > +
>      >> >      > +    for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
>      >> >      > +        iov[i].iov_len = s->max_packet_size;
>      >> >      > +        iov[i].iov_base = g_malloc0(iov[i].iov_len);
>      >> >      > +        packets[i].vm_pkt_iov = iov + i;
>      >> >      > +    }
>      >> >      > +}
>      >> >      > +
>      >> >      > +
>      >> >      > +const char *vmnet_status_map_str(vmnet_return_t status)
>      >> >      > +{
>      >> >      > +    switch (status) {
>      >> >      > +    case VMNET_SUCCESS:
>      >> >      > +        return "success";
>      >> >      > +    case VMNET_FAILURE:
>      >> >      > +        return "general failure (possibly not enough
>     privileges)";
>      >> >      > +    case VMNET_MEM_FAILURE:
>      >> >      > +        return "memory allocation failure";
>      >> >      > +    case VMNET_INVALID_ARGUMENT:
>      >> >      > +        return "invalid argument specified";
>      >> >      > +    case VMNET_SETUP_INCOMPLETE:
>      >> >      > +        return "interface setup is not complete";
>      >> >      > +    case VMNET_INVALID_ACCESS:
>      >> >      > +        return "invalid access, permission denied";
>      >> >      > +    case VMNET_PACKET_TOO_BIG:
>      >> >      > +        return "packet size is larger than MTU";
>      >> >      > +    case VMNET_BUFFER_EXHAUSTED:
>      >> >      > +        return "buffers exhausted in kernel";
>      >> >      > +    case VMNET_TOO_MANY_PACKETS:
>      >> >      > +        return "packet count exceeds limit";
>      >> >      > +#if defined(MAC_OS_VERSION_11_0) && \
>      >> >      > +    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
>      >> >      > +    case VMNET_SHARING_SERVICE_BUSY:
>      >> >      > +        return "conflict, sharing service is in use";
>      >> >      > +#endif
>      >> >      > +    default:
>      >> >      > +        return "unknown vmnet error";
>      >> >      > +    }
>      >> >      > +}
>      >> >      > +
>      >> >      > +
>      >> >      > +int vmnet_if_create(NetClientState *nc,
>      >> >      > +                    xpc_object_t if_desc,
>      >> >      > +                    Error **errp)
>      >> >      > +{
>      >> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState,
>     nc, nc);;
>      >> >
>      >> >     Duplicate semicolons.
>      >> >
>      >> >      > +    dispatch_semaphore_t if_created_sem =
>      >> >     dispatch_semaphore_create(0);
>      >> >
>      >> >     if_created_sem leaks.
>      >> >
>      >> >      > +    __block vmnet_return_t if_status;
>      >> >      > +
>      >> >      > +    s->if_queue = dispatch_queue_create(
>      >> >      > +        "org.qemu.vmnet.if_queue",
>      >> >      > +        DISPATCH_QUEUE_SERIAL
>      >> >      > +    );
>      >> >      > +
>      >> >      > +    xpc_dictionary_set_bool(
>      >> >      > +        if_desc,
>      >> >      > +        vmnet_allocate_mac_address_key,
>      >> >      > +        false
>      >> >      > +    );
>      >> >      > +#ifdef DEBUG
>      >> >      > +    qemu_log("vmnet.start.interface_desc:\n");
>      >> >      > +    xpc_dictionary_apply(if_desc,
>      >> >      > +                         ^bool(const char *k,
>     xpc_object_t v) {
>      >> >      > +                             char *desc =
>     xpc_copy_description(v);
>      >> >      > +                             qemu_log("  %s=%s\n", k,
>     desc);
>      >> >      > +                             free(desc);
>      >> >      > +                             return true;
>      >> >      > +                         });
>      >> >      > +#endif /* DEBUG */
>      >> >      > +
>      >> >      > +    s->vmnet_if = vmnet_start_interface(
>      >> >      > +        if_desc,
>      >> >      > +        s->if_queue,
>      >> >      > +        ^(vmnet_return_t status, xpc_object_t
>     interface_param) {
>      >> >      > +            if_status = status;
>      >> >      > +            if (status != VMNET_SUCCESS ||
>     !interface_param) {
>      >> >      > +                dispatch_semaphore_signal(if_created_sem);
>      >> >      > +                return;
>      >> >      > +            }
>      >> >      > +
>      >> >      > +#ifdef DEBUG
>      >> >      > +            qemu_log("vmnet.start.interface_param:\n");
>      >> >      > +            xpc_dictionary_apply(interface_param,
>      >> >      > +                                 ^bool(const char *k,
>      >> >     xpc_object_t v) {
>      >> >      > +                                     char *desc =
>      >> >     xpc_copy_description(v);
>      >> >      > +                                     qemu_log(" 
>     %s=%s\n", k, desc);
>      >> >      > +                                     free(desc);
>      >> >      > +                                     return true;
>      >> >      > +                                 });
>      >> >      > +#endif /* DEBUG */
>      >> >      > +
>      >> >      > +            s->mtu = xpc_dictionary_get_uint64(
>      >> >      > +                interface_param,
>      >> >      > +                vmnet_mtu_key);
>      >> >      > +            s->max_packet_size =
>     xpc_dictionary_get_uint64(
>      >> >      > +                interface_param,
>      >> >      > +                vmnet_max_packet_size_key);
>      >> >      > +
>      >> >      > +            dispatch_semaphore_signal(if_created_sem);
>      >> >      > +        });
>      >> >      > +
>      >> >      > +    if (s->vmnet_if == NULL) {
>      >> >      > +        error_setg(errp,
>      >> >      > +                   "unable to create interface "
>      >> >      > +                   "with requested params");
>      >> >
>      >> >     You don't need line breaks here. Breaking a string into a
>     few would
>      >> >     also
>      >> >     makes it a bit hard to grep.
>      >> >
>      >> >      > +        return -1;
>      >> >
>      >> >     s->if_queue leaks.
>      >> >
>      >> >      > +    }
>      >> >      > +
>      >> >      > +    dispatch_semaphore_wait(if_created_sem,
>     DISPATCH_TIME_FOREVER);
>      >> >      > +
>      >> >      > +    if (if_status != VMNET_SUCCESS) {
>      >> >      > +        error_setg(errp,
>      >> >      > +                   "cannot create vmnet interface: %s",
>      >> >      > +                   vmnet_status_map_str(if_status));
>      >> >      > +        return -1;
>      >> >      > +    }
>      >> >      > +
>      >> >      > +    s->send_bh = aio_bh_new(qemu_get_aio_context(),
>      >> >     vmnet_send_bh, nc);
>      >> >      > +    vmnet_bufs_init(s);
>      >> >      > +    vmnet_set_send_bh_scheduled(s, false);
>      >> >      > +    vmnet_set_send_enabled(s, true);
>      >> >      > +    return 0;
>      >> >      > +}
>      >> >      > +
>      >> >      > +
>      >> >      > +ssize_t vmnet_receive_common(NetClientState *nc,
>      >> >      > +                             const uint8_t *buf,
>      >> >      > +                             size_t size)
>      >> >      > +{
>      >> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState,
>     nc, nc);
>      >> >      > +    struct vmpktdesc packet;
>      >> >      > +    struct iovec iov;
>      >> >      > +    int pkt_cnt;
>      >> >      > +    vmnet_return_t if_status;
>      >> >      > +
>      >> >      > +    if (size > s->max_packet_size) {
>      >> >      > +        warn_report("vmnet: packet is too big, %zu >
>     %llu\n",
>      >> >
>      >> >     Use PRIu64.
>      >> >
>      >> >      > +                    packet.vm_pkt_size,
>      >> >      > +                    s->max_packet_size);
>      >> >      > +        return -1;
>      >> >      > +    }
>      >> >      > +
>      >> >      > +    iov.iov_base = (char *) buf;
>      >> >      > +    iov.iov_len = size;
>      >> >      > +
>      >> >      > +    packet.vm_pkt_iovcnt = 1;
>      >> >      > +    packet.vm_flags = 0;
>      >> >      > +    packet.vm_pkt_size = size;
>      >> >      > +    packet.vm_pkt_iov = &iov;
>      >> >      > +    pkt_cnt = 1;
>      >> >      > +
>      >> >      > +    if_status = vmnet_write(s->vmnet_if, &packet,
>     &pkt_cnt);
>      >> >      > +    if (if_status != VMNET_SUCCESS) {
>      >> >      > +        error_report("vmnet: write error: %s\n",
>      >> >      > +                     vmnet_status_map_str(if_status));
>      >> >
>      >> >     Why don't return -1?
>      >> >
>      >> >      > +    }
>      >> >      > +
>      >> >      > +    if (if_status == VMNET_SUCCESS && pkt_cnt) {
>      >> >      > +        return size;
>      >> >      > +    }
>      >> >      > +    return 0;
>      >> >      > +}
>      >> >      > +
>      >> >      > +
>      >> >      > +void vmnet_cleanup_common(NetClientState *nc)
>      >> >      > +{
>      >> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState,
>     nc, nc);;
>      >> >
>      >> >     Duplicate semicolons.
>      >> >
>      >> >      > +    dispatch_semaphore_t if_created_sem;
>      >> >      > +
>      >> >      > +    qemu_purge_queued_packets(nc); > +
>      >> >     vmnet_set_send_bh_scheduled(s, true);
>      >> >      > +    vmnet_set_send_enabled(s, false);
>      >> >      > +
>      >> >      > +    if (s->vmnet_if == NULL) {
>      >> >      > +        return;
>      >> >      > +    }
>      >> >      > +
>      >> >      > +    if_created_sem = dispatch_semaphore_create(0);
>      >> >      > +    vmnet_stop_interface(
>      >> >      > +        s->vmnet_if,
>      >> >      > +        s->if_queue,
>      >> >      > +        ^(vmnet_return_t status) {
>      >> >      > +            assert(status == VMNET_SUCCESS);
>      >> >      > +            dispatch_semaphore_signal(if_created_sem);
>      >> >      > +        });
>      >> >      > +    dispatch_semaphore_wait(if_created_sem,
>     DISPATCH_TIME_FOREVER);
>      >> >      > +
>      >> >      > +    qemu_bh_delete(s->send_bh);
>      >> >      > +    dispatch_release(if_created_sem);
>      >> >      > +    dispatch_release(s->if_queue);
>      >> >      > +
>      >> >      > +    for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
>      >> >      > +        g_free(s->iov_buf[i].iov_base);
>      >> >      > +    }
>      >> >      > +}
>      >> >      > diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c
>      >> >      > index f07afaaf21..66f66c034b 100644
>      >> >      > --- a/net/vmnet-shared.c
>      >> >      > +++ b/net/vmnet-shared.c
>      >> >      > @@ -10,16 +10,102 @@
>      >> >      >
>      >> >      >   #include "qemu/osdep.h"
>      >> >      >   #include "qapi/qapi-types-net.h"
>      >> >      > +#include "qapi/error.h"
>      >> >      >   #include "vmnet_int.h"
>      >> >      >   #include "clients.h"
>      >> >      > -#include "qemu/error-report.h"
>      >> >      > -#include "qapi/error.h"
>      >> >      >
>      >> >      >   #include <vmnet/vmnet.h>
>      >> >      >
>      >> >      > +typedef struct VmnetSharedState {
>      >> >      > +    VmnetCommonState cs;
>      >> >      > +} VmnetSharedState;
>      >> >      > +
>      >> >      > +
>      >> >      > +static bool validate_options(const Netdev *netdev,
>     Error **errp)
>      >> >      > +{
>      >> >      > +    const NetdevVmnetSharedOptions *options =
>      >> >     &(netdev->u.vmnet_shared);
>      >> >      > +
>      >> >      > +#if !defined(MAC_OS_VERSION_11_0) || \
>      >> >      > +    MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0
>      >> >      > +    if (options->has_isolated) {
>      >> >      > +        error_setg(errp,
>      >> >      > +                   "vmnet-shared.isolated feature is "
>      >> >      > +                   "unavailable: outdated
>     vmnet.framework API");
>      >> >      > +        return false;
>      >> >      > +    }
>      >> >      > +#endif
>      >> >      > +
>      >> >      > +    if ((options->has_start_address ||
>      >> >      > +         options->has_end_address ||
>      >> >      > +         options->has_subnet_mask) &&
>      >> >      > +        !(options->has_start_address &&
>      >> >      > +          options->has_end_address &&
>      >> >      > +          options->has_subnet_mask)) {
>      >> >      > +        error_setg(errp,
>      >> >      > +                   "'start-address', 'end-address',
>     'subnet-mask' "
>      >> >      > +                   "should be provided together"
>      >> >      > +        );
>      >> >      > +        return false;
>      >> >      > +    }
>      >> >      > +
>      >> >      > +    return true;
>      >> >      > +}
>      >> >      > +
>      >> >      > +static xpc_object_t build_if_desc(const Netdev *netdev)
>      >> >      > +{
>      >> >      > +    const NetdevVmnetSharedOptions *options =
>      >> >     &(netdev->u.vmnet_shared);
>      >> >      > +    xpc_object_t if_desc = xpc_dictionary_create(NULL,
>     NULL, 0);
>      >> >      > +
>      >> >      > +    xpc_dictionary_set_uint64(
>      >> >      > +        if_desc,
>      >> >      > +        vmnet_operation_mode_key,
>      >> >      > +        VMNET_SHARED_MODE
>      >> >      > +    );
>      >> >      > +
>      >> >      > +    if (options->has_nat66_prefix) {
>      >> >      > +        xpc_dictionary_set_string(if_desc,
>      >> >      > +                                  vmnet_nat66_prefix_key,
>      >> >      > +                                  options->nat66_prefix);
>      >> >      > +    }
>      >> >      > +
>      >> >      > +    if (options->has_start_address) {
>      >> >      > +        xpc_dictionary_set_string(if_desc,
>      >> >      > +                                  vmnet_start_address_key,
>      >> >      > +                                  options->start_address);
>      >> >      > +        xpc_dictionary_set_string(if_desc,
>      >> >      > +                                  vmnet_end_address_key,
>      >> >      > +                                  options->end_address);
>      >> >      > +        xpc_dictionary_set_string(if_desc,
>      >> >      > +                                  vmnet_subnet_mask_key,
>      >> >      > +                                  options->subnet_mask);
>      >> >      > +    }
>      >> >      > +
>      >> >      > +#if defined(MAC_OS_VERSION_11_0) && \
>      >> >      > +    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
>      >> >      > +    xpc_dictionary_set_bool(
>      >> >      > +        if_desc,
>      >> >      > +        vmnet_enable_isolation_key,
>      >> >      > +        options->isolated
>      >> >      > +    );
>      >> >      > +#endif
>      >> >      > +
>      >> >      > +    return if_desc;
>      >> >      > +}
>      >> >      > +
>      >> >      > +static NetClientInfo net_vmnet_shared_info = {
>      >> >      > +    .type = NET_CLIENT_DRIVER_VMNET_SHARED,
>      >> >      > +    .size = sizeof(VmnetSharedState),
>      >> >      > +    .receive = vmnet_receive_common,
>      >> >      > +    .cleanup = vmnet_cleanup_common,
>      >> >      > +};
>      >> >      > +
>      >> >      >   int net_init_vmnet_shared(const Netdev *netdev, const
>     char *name,
>      >> >      >                             NetClientState *peer, Error
>     **errp)
>      >> >      >   {
>      >> >      > -  error_setg(errp, "vmnet-shared is not implemented yet");
>      >> >      > -  return -1;
>      >> >      > +    NetClientState *nc =
>     qemu_new_net_client(&net_vmnet_shared_info,
>      >> >      > +                                             peer,
>      >> >     "vmnet-shared", name);
>      >> >      > +    if (!validate_options(netdev, errp)) {
>      >> >      > +        g_assert_not_reached();
>      >> >
>      >> >     g_assert_not_reached is for debugging purpose and may be
>     dropped
>      >> >     depending on the build option.
>      >> >
>      >> >      > +    }
>      >> >      > +    return vmnet_if_create(nc, build_if_desc(netdev),
>     errp);
>      >> >      >   }
>      >> >      > diff --git a/net/vmnet_int.h b/net/vmnet_int.h
>      >> >      > index aac4d5af64..acfe3a88c0 100644
>      >> >      > --- a/net/vmnet_int.h
>      >> >      > +++ b/net/vmnet_int.h
>      >> >      > @@ -15,11 +15,48 @@
>      >> >      >   #include "clients.h"
>      >> >      >
>      >> >      >   #include <vmnet/vmnet.h>
>      >> >      > +#include <dispatch/dispatch.h>
>      >> >      > +
>      >> >      > +/**
>      >> >      > + *  From vmnet.framework documentation
>      >> >      > + *
>      >> >      > + *  Each read/write call allows up to 200 packets to be
>      >> >      > + *  read or written for a maximum of 256KB.
>      >> >      > + *
>      >> >      > + *  Each packet written should be a complete
>      >> >      > + *  ethernet frame.
>      >> >      > + *
>      >> >      > + * https://developer.apple.com/documentation/vmnet
>     <https://developer.apple.com/documentation/vmnet>
>      >> >     <https://developer.apple.com/documentation/vmnet
>     <https://developer.apple.com/documentation/vmnet>>
>      >> >      > + */
>      >> >      > +#define VMNET_PACKETS_LIMIT 200
>      >> >      >
>      >> >      >   typedef struct VmnetCommonState {
>      >> >      > -  NetClientState nc;
>      >> >      > +    NetClientState nc;
>      >> >      > +    interface_ref vmnet_if;
>      >> >      > +
>      >> >      > +    bool send_scheduled;
>      >> >      >
>      >> >      > +    uint64_t mtu;
>      >> >      > +    uint64_t max_packet_size;
>      >> >      > +
>      >> >      > +    struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT];
>      >> >      > +    struct iovec iov_buf[VMNET_PACKETS_LIMIT];
>      >> >      > +
>      >> >      > +    dispatch_queue_t if_queue;
>      >> >      > +
>      >> >      > +    QEMUBH *send_bh;
>      >> >      >   } VmnetCommonState;
>      >> >      >
>      >> >      > +const char *vmnet_status_map_str(vmnet_return_t status);
>      >> >      > +
>      >> >      > +int vmnet_if_create(NetClientState *nc,
>      >> >      > +                    xpc_object_t if_desc,
>      >> >      > +                    Error **errp);
>      >> >      > +
>      >> >      > +ssize_t vmnet_receive_common(NetClientState *nc,
>      >> >      > +                             const uint8_t *buf,
>      >> >      > +                             size_t size);
>      >> >      > +
>      >> >      > +void vmnet_cleanup_common(NetClientState *nc);
>      >> >      >
>      >> >      >   #endif /* VMNET_INT_H */
>      >> >
>      >> >
>      >> > Other issues will be fixed and submitted later,
>      >> > thank you!
>      >> >
>      >> > Regards,
>      >> > Vladislav Yaroshchuk
>      >>
>      >
>      > Regards,
>      > Vladislav Yaroshchuk
> 
> 
> Best Regards,
> 
> Vladislav Yaroshchuk
Vladislav Yaroshchuk March 1, 2022, 8:09 a.m. UTC | #8
On Tue, Mar 1, 2022 at 8:52 AM Akihiko Odaki <akihiko.odaki@gmail.com>
wrote:

> On 2022/02/28 20:59, Vladislav Yaroshchuk wrote:
> >
> >
> > On Sat, Feb 26, 2022 at 3:27 PM Akihiko Odaki <akihiko.odaki@gmail.com
> > <mailto:akihiko.odaki@gmail.com>> wrote:
> >
> >     On Sat, Feb 26, 2022 at 8:33 PM Vladislav Yaroshchuk
> >     <vladislav.yaroshchuk@jetbrains.com
> >     <mailto:vladislav.yaroshchuk@jetbrains.com>> wrote:
> >      >
> >      >
> >      >
> >      > On Sat, Feb 26, 2022 at 12:16 PM Akihiko Odaki
> >     <akihiko.odaki@gmail.com <mailto:akihiko.odaki@gmail.com>> wrote:
> >      >>
> >      >> On 2022/02/26 17:37, Vladislav Yaroshchuk wrote:
> >      >> >
> >      >> > Hi Akihiko,
> >      >> >
> >      >> > On Fri, Feb 25, 2022 at 8:46 PM Akihiko Odaki
> >     <akihiko.odaki@gmail.com <mailto:akihiko.odaki@gmail.com>
> >      >> > <mailto:akihiko.odaki@gmail.com
> >     <mailto:akihiko.odaki@gmail.com>>> wrote:
> >      >> >
> >      >> >     On 2022/02/26 2:13, Vladislav Yaroshchuk wrote:
> >      >> >      > Interaction with vmnet.framework in different modes
> >      >> >      > differs only on configuration stage, so we can create
> >      >> >      > common `send`, `receive`, etc. procedures and reuse
> them.
> >      >> >      >
> >      >> >      > vmnet.framework supports iov, but writing more than
> >      >> >      > one iov into vmnet interface fails with
> >      >> >      > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
> >      >> >      > one and passing it to vmnet works fine. That's the
> >      >> >      > reason why receive_iov() left unimplemented. But it
> still
> >      >> >      > works with good enough performance having .receive()
> >      >> >      > net/vmnet: implement shared mode (vmnet-shared)
> >      >> >      >
> >      >> >      > Interaction with vmnet.framework in different modes
> >      >> >      > differs only on configuration stage, so we can create
> >      >> >      > common `send`, `receive`, etc. procedures and reuse
> them.
> >      >> >      >
> >      >> >      > vmnet.framework supports iov, but writing more than
> >      >> >      > one iov into vmnet interface fails with
> >      >> >      > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
> >      >> >      > one and passing it to vmnet works fine. That's the
> >      >> >      > reason why receive_iov() left unimplemented. But it
> still
> >      >> >      > works with good enough performance having .receive()
> >      >> >      > implemented only.
> >      >> >      >
> >      >> >      > Also, there is no way to unsubscribe from vmnet packages
> >      >> >      > receiving except registering and unregistering event
> >      >> >      > callback or simply drop packages just ignoring and
> >      >> >      > not processing them when related flag is set. Here we do
> >      >> >      > using the second way.
> >      >> >      >
> >      >> >      > Signed-off-by: Phillip Tennen <phillip@axleos.com
> >     <mailto:phillip@axleos.com>
> >      >> >     <mailto:phillip@axleos.com <mailto:phillip@axleos.com>>>
> >      >> >      > Signed-off-by: Vladislav Yaroshchuk
> >      >> >     <Vladislav.Yaroshchuk@jetbrains.com
> >     <mailto:Vladislav.Yaroshchuk@jetbrains.com>
> >      >> >     <mailto:Vladislav.Yaroshchuk@jetbrains.com
> >     <mailto:Vladislav.Yaroshchuk@jetbrains.com>>>
> >      >> >
> >      >> >     Thank you for persistently working on this.
> >      >> >
> >      >> >      > ---
> >      >> >      >   net/vmnet-common.m | 302
> >      >> >     +++++++++++++++++++++++++++++++++++++++++++++
> >      >> >      >   net/vmnet-shared.c |  94 +++++++++++++-
> >      >> >      >   net/vmnet_int.h    |  39 +++++-
> >      >> >      >   3 files changed, 430 insertions(+), 5 deletions(-)
> >      >> >      >
> >      >> >      > diff --git a/net/vmnet-common.m b/net/vmnet-common.m
> >      >> >      > index 56612c72ce..2f70921cae 100644
> >      >> >      > --- a/net/vmnet-common.m
> >      >> >      > +++ b/net/vmnet-common.m
> >      >> >      > @@ -10,6 +10,8 @@
> >      >> >      >    */
> >      >> >      >
> >      >> >      >   #include "qemu/osdep.h"
> >      >> >      > +#include "qemu/main-loop.h"
> >      >> >      > +#include "qemu/log.h"
> >      >> >      >   #include "qapi/qapi-types-net.h"
> >      >> >      >   #include "vmnet_int.h"
> >      >> >      >   #include "clients.h"
> >      >> >      > @@ -17,4 +19,304 @@
> >      >> >      >   #include "qapi/error.h"
> >      >> >      >
> >      >> >      >   #include <vmnet/vmnet.h>
> >      >> >      > +#include <dispatch/dispatch.h>
> >      >> >      >
> >      >> >      > +
> >      >> >      > +static inline void
> >     vmnet_set_send_bh_scheduled(VmnetCommonState *s,
> >      >> >      > +                                               bool
> >     enable)
> >      >> >      > +{
> >      >> >      > +    qatomic_set(&s->send_scheduled, enable);
> >      >> >      > +}
> >      >> >      > +
> >      >> >      > +
> >      >> >      > +static inline bool
> >     vmnet_is_send_bh_scheduled(VmnetCommonState *s)
> >      >> >      > +{
> >      >> >      > +    return qatomic_load_acquire(&s->send_scheduled);
> >      >> >      > +}
> >      >> >      > +
> >      >> >      > +
> >      >> >      > +static inline void
> >     vmnet_set_send_enabled(VmnetCommonState *s,
> >      >> >      > +                                          bool enable)
> >      >> >      > +{
> >      >> >      > +    if (enable) {
> >      >> >      > +        vmnet_interface_set_event_callback(
> >      >> >      > +            s->vmnet_if,
> >      >> >      > +            VMNET_INTERFACE_PACKETS_AVAILABLE,
> >      >> >      > +            s->if_queue,
> >      >> >      > +            ^(interface_event_t event_id, xpc_object_t
> >     event) {
> >      >> >      > +                assert(event_id ==
> >      >> >     VMNET_INTERFACE_PACKETS_AVAILABLE);
> >      >> >      > +                /*
> >      >> >      > +                 * This function is being called from
> >     a non qemu
> >      >> >     thread, so
> >      >> >      > +                 * we only schedule a BH, and do the
> >     rest of the
> >      >> >     io completion
> >      >> >      > +                 * handling from vmnet_send_bh() which
> >     runs in a
> >      >> >     qemu context.
> >      >> >      > +                 *
> >      >> >      > +                 * Avoid scheduling multiple bottom
> halves
> >      >> >      > +                 */
> >      >> >      > +                if (!vmnet_is_send_bh_scheduled(s)) {
> >      >> >      > +                    vmnet_set_send_bh_scheduled(s,
> true);
> >      >> >
> >      >> >     It can be interrupted between vmnet_is_send_bh_scheduled
> and
> >      >> >     vmnet_set_send_bh_scheduled, which leads to data race.
> >      >> >
> >      >> >
> >      >> > Sorry, I did not clearly understand what you meant. Since this
> >      >> > callback (block) is submitted on DISPATCH_QUEUE_SERIAL,
> >      >> > only one instance of the callback will be executed at any
> >      >> > moment of time.
> >      >> >
> >
> https://developer.apple.com/documentation/dispatch/dispatch_queue_serial
> >     <
> https://developer.apple.com/documentation/dispatch/dispatch_queue_serial>
> >      >> >
> >     <
> https://developer.apple.com/documentation/dispatch/dispatch_queue_serial
> >     <
> https://developer.apple.com/documentation/dispatch/dispatch_queue_serial>>
> >      >> >
> >      >> > Also this is the only place where we schedule a bottom half.
> >      >> >
> >      >> > After we set the 'send_scheduled' flag, all the other
> >      >> > callback  blocks will do nothing (skip the if block) until
> >      >> > the bottom half is executed and reset 'send_scheduled'.
> >      >> > I don't see any races here :(
> >      >> >
> >      >> > Correct me if I'm wrong please.
> >      >>
> >      >> My explanation that the interruption between
> >     vmnet_is_send_bh_scheduled
> >      >> and vmnet_set_send_bh_scheduled is problematic was actually
> >     misleading.
> >      >>
> >      >> The problem is that vmnet_send_bh resets 'send_scheduled' after
> >     calling
> >      >> vmnet_read. If we got another packet after vmnet_read, it would
> be
> >      >> silently ignored.
> >      >>
> >      >> By the way, I forgot to mention another problem:
> >     vmnet_send_completed
> >      >> calls vmnet_set_send_enabled, but the other packets in the
> >     buffer may
> >      >> not be sent yet. Also, unregistering callbacks in
> >     vmnet_set_send_enabled
> >      >> is probably not enough to stop the callback being fired since
> some
> >      >> dispatch blocks are already in the dispatch queue.
> >      >>
> >      >
> >      > Now I understand what you mean, thanks.
> >      > What do you think about the workaround? For me
> >      > it is quite difficult question how to synchronize qemu with
> >      > vmnet thread, especially with completion callback.
> >
> >     You may add a new field to represent the number of packets being sent
> >     in the buffer. The field must be maintained by vmnet_send_bh and
> >     vmnet_send_completed, which are on the iothread. vmnet_send_bh should
> >     do nothing if the field is greater than 0 at the beginning of the
> >     function. vmnet_send_completed should call
> >     qemu_bh_schedule(s->send_bh).
> >
> >
> > Thank you for the idea! Sounds meaningful to
> > schedule a bottom half in vmnet thread and do the
> > rest of things in iothread with no concurrency.
> >
> > Not sure that only one field is enough, cause
> > we may have two states on bh execution start:
> > 1. There are packets in vmnet buffer s->packets_buf
> >      that were rejected by qemu_send_async and waiting
> >      to be sent. If this happens, we should complete sending
> >      these waiting packets with qemu_send_async firstly,
> >      and after that we should call vmnet_read to get
> >      new ones and send them to QEMU;
> > 2. There are no packets in s->packets_buf to be sent to
> >      qemu, we only need to get new packets from vmnet
> >      with vmnet_read and send them to QEMU
>
> In case 1, you should just keep calling qemu_send_packet_async. Actually
> qemu_send_packet_async adds the packet to its internal queue and calls
> the callback when it is consumed.
>
>
I'm not sure we can keep calling qemu_send_packet_async,
because as docs from net/queue.c says:

/* [...]
 * If a sent callback is provided to send(), the caller must handle a
 * zero return from the delivery handler by not sending any more packets
 * until we have invoked the callback. Only in that case will we queue
 * the packet.
 *
 * If a sent callback isn't provided, we just drop the packet to avoid
 * unbounded queueing.
 */

So after we did vmnet_read and read N packets
into temporary s->packets_buf, we begin calling
qemu_send_packet_async. If it returns 0 - it says
"no more packets until sent_cb called please".
At this moment we have N packets in s->packets_buf
and already queued K < N of them. But, packets K..N
are not queued and keep waiting for sent_cb to be sent
with qemu_send_packet_async.
Thus when sent_cb called, we should finish
our transfer of packets K..N from s->packets_buf
to qemu calling qemu_send_packet_async.
I meant this.

But also it's possible that while waiting for sent_cb
some new packet(s) has been received and it's
ready to be read with vmnet_read. To handle this,
when sent_cb is called and we're done with
packets K..N in s->packets_buf, we should also
query vmnet_read to check whether something
new is present.


Best Regards,

Vladislav Yaroshchuk

>
> > It can be done kinda this way:
> > ```
> > struct s:
> >      send_bh:                    bh
> >      packets_buf:              array[packet]
> >      ## Three new fields
> >      send_enabled:           bool
> >      packets_send_pos:    int
> >      packets_batch_size:  int
> >
> > fun bh_send(s):
> >      ## Send disabled - do nothing
> >      if not s->send_enabled:
> >          return
> >      ## If some packets are waiting to be sent in
> >      ## s->packets_buf - send them
> >      if s->packets_send_pos < s->packets_batch_size:
> >          if not qemu_send_wrapper(s):
> >              return
> >
> >      ## Try to read new packets from vmnet
> >      s->packets_send_pos = 0
> >      s->packets_batch_size = 0
> >      s->packets_buf = vmnet_read(&s->packets_batch_size)
> >      if s->packets_batch_size > 0:
> >          # If something received - process them
> >          qemu_send_wrapper(s)
> > fun qemu_send_wrapper(s):
> >      for i in s->packets_send_pos to s->packets_batch_size:
> >          size = qemu_send_async(s->packets_buf[i],
> >                                                    vmnet_send_completed)
> >          if size == 0:
> >              ## Disable packets processing until vmnet_send_completed
> >              ## Save the state: packets to be sent now in s->packets_buf
> >              ## in range s->packets_send_pos..s->packets_batch_size
> >              s->send_enabled = false
> >              s->packets_send_pos = i + 1
> >              break
> >          if size < 0:
> >              ## On error just drop the packets
> >              s->packets_send_pos = 0
> >              s->packets_batch_size = 0
> >              break
> >
> >       return s->send_enabled
> >
> >
> > fun vmnet_send_completed(s):
> >      ## Complete sending packets from s->packets_buf
> >      s->send_enabled = true
> >      qemu_bh_schedule(s->send_bh)
> >
> > ## From callback we only scheduling the bh
> > vmnet.set_callback(callback = s: qemu_bh_schedule(s->send_bh))
> > ```
> >
> > I think a simpler implementation may exist, but currently
> > I see only this approach with the send_enabled flag and
> > two more fields to save packets sending state.
> >
> >     vmnet_set_send_enabled and send_scheduled can be simply removed.
> >     qemu_bh_schedule ensures there is no duplicate scheduling.
> >
> > Yep, my mistake. Previously I used schedule_oneshot which
> > creates a new bh for each call which causes duplicate scheduling.
> >
> >
> >     Regards,
> >     Akihiko Odaki
> >
> >      >
> >      >
> >      >>
> >      >> Regards,
> >      >> Akihiko Odaki
> >      >>
> >      >> >
> >      >> >      > +                    qemu_bh_schedule(s->send_bh);
> >      >> >      > +                }
> >      >> >      > +            });
> >      >> >      > +    } else {
> >      >> >      > +        vmnet_interface_set_event_callback(
> >      >> >      > +            s->vmnet_if,
> >      >> >      > +            VMNET_INTERFACE_PACKETS_AVAILABLE,
> >      >> >      > +            NULL,
> >      >> >      > +            NULL);
> >      >> >      > +    }
> >      >> >      > +}
> >      >> >      > +
> >      >> >      > +
> >      >> >      > +static void vmnet_send_completed(NetClientState *nc,
> >     ssize_t len)
> >      >> >      > +{
> >      >> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState,
> >     nc, nc);
> >      >> >      > +    vmnet_set_send_enabled(s, true);
> >      >> >      > +}
> >      >> >      > +
> >      >> >      > +
> >      >> >      > +static void vmnet_send_bh(void *opaque)
> >      >> >      > +{
> >      >> >      > +    NetClientState *nc = (NetClientState *) opaque;
> >      >> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState,
> >     nc, nc);
> >      >> >      > +
> >      >> >      > +    struct iovec *iov = s->iov_buf;
> >      >> >      > +    struct vmpktdesc *packets = s->packets_buf;
> >      >> >      > +    int pkt_cnt;
> >      >> >      > +    int i;
> >      >> >      > +
> >      >> >      > +    vmnet_return_t status;
> >      >> >      > +    ssize_t size;
> >      >> >      > +
> >      >> >      > +    /* read as many packets as present */
> >      >> >      > +    pkt_cnt = VMNET_PACKETS_LIMIT;
> >      >> >
> >      >> >     There could be more than VMNET_PACKETS_LIMIT. You may call
> >      >> >     vmnet_read in
> >      >> >     a loop.
> >      >> >
> >      >> >      > +    for (i = 0; i < pkt_cnt; ++i) {
> >      >> >      > +        packets[i].vm_pkt_size = s->max_packet_size;
> >      >> >      > +        packets[i].vm_pkt_iovcnt = 1;
> >      >> >      > +        packets[i].vm_flags = 0;
> >      >> >      > +    }
> >      >> >      > +
> >      >> >      > +    status = vmnet_read(s->vmnet_if, packets,
> &pkt_cnt);
> >      >> >      > +    if (status != VMNET_SUCCESS) {
> >      >> >      > +        error_printf("vmnet: read failed: %s\n",
> >      >> >      > +                     vmnet_status_map_str(status));
> >      >> >      > +        goto done;
> >      >> >      > +    }
> >      >> >      > +
> >      >> >      > +    for (i = 0; i < pkt_cnt; ++i) {
> >      >> >      > +        size = qemu_send_packet_async(nc,
> >      >> >      > +                                      iov[i].iov_base,
> >      >> >      > +
> >     packets[i].vm_pkt_size,
> >      >> >      > +
> >     vmnet_send_completed);
> >      >> >      > +        if (size == 0) {
> >      >> >      > +            vmnet_set_send_enabled(s, false);
> >      >> >      > +            goto done;
> >      >> >      > +        } else if (size < 0) {
> >      >> >      > +            break;
> >      >> >      > +        }
> >      >> >
> >      >> >     goto is not needed here. "break" when size <= 0.
> >      >> >
> >      >> >      > +    }
> >      >> >      > +
> >      >> >      > +done:
> >      >> >      > +    vmnet_set_send_bh_scheduled(s, false);
> >      >> >      > +}
> >      >> >      > +
> >      >> >      > +
> >      >> >      > +static void vmnet_bufs_init(VmnetCommonState *s)
> >      >> >      > +{
> >      >> >      > +    struct vmpktdesc *packets = s->packets_buf;
> >      >> >      > +    struct iovec *iov = s->iov_buf;
> >      >> >      > +    int i;
> >      >> >      > +
> >      >> >      > +    for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
> >      >> >      > +        iov[i].iov_len = s->max_packet_size;
> >      >> >      > +        iov[i].iov_base = g_malloc0(iov[i].iov_len);
> >      >> >      > +        packets[i].vm_pkt_iov = iov + i;
> >      >> >      > +    }
> >      >> >      > +}
> >      >> >      > +
> >      >> >      > +
> >      >> >      > +const char *vmnet_status_map_str(vmnet_return_t status)
> >      >> >      > +{
> >      >> >      > +    switch (status) {
> >      >> >      > +    case VMNET_SUCCESS:
> >      >> >      > +        return "success";
> >      >> >      > +    case VMNET_FAILURE:
> >      >> >      > +        return "general failure (possibly not enough
> >     privileges)";
> >      >> >      > +    case VMNET_MEM_FAILURE:
> >      >> >      > +        return "memory allocation failure";
> >      >> >      > +    case VMNET_INVALID_ARGUMENT:
> >      >> >      > +        return "invalid argument specified";
> >      >> >      > +    case VMNET_SETUP_INCOMPLETE:
> >      >> >      > +        return "interface setup is not complete";
> >      >> >      > +    case VMNET_INVALID_ACCESS:
> >      >> >      > +        return "invalid access, permission denied";
> >      >> >      > +    case VMNET_PACKET_TOO_BIG:
> >      >> >      > +        return "packet size is larger than MTU";
> >      >> >      > +    case VMNET_BUFFER_EXHAUSTED:
> >      >> >      > +        return "buffers exhausted in kernel";
> >      >> >      > +    case VMNET_TOO_MANY_PACKETS:
> >      >> >      > +        return "packet count exceeds limit";
> >      >> >      > +#if defined(MAC_OS_VERSION_11_0) && \
> >      >> >      > +    MAC_OS_X_VERSION_MIN_REQUIRED >=
> MAC_OS_VERSION_11_0
> >      >> >      > +    case VMNET_SHARING_SERVICE_BUSY:
> >      >> >      > +        return "conflict, sharing service is in use";
> >      >> >      > +#endif
> >      >> >      > +    default:
> >      >> >      > +        return "unknown vmnet error";
> >      >> >      > +    }
> >      >> >      > +}
> >      >> >      > +
> >      >> >      > +
> >      >> >      > +int vmnet_if_create(NetClientState *nc,
> >      >> >      > +                    xpc_object_t if_desc,
> >      >> >      > +                    Error **errp)
> >      >> >      > +{
> >      >> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState,
> >     nc, nc);;
> >      >> >
> >      >> >     Duplicate semicolons.
> >      >> >
> >      >> >      > +    dispatch_semaphore_t if_created_sem =
> >      >> >     dispatch_semaphore_create(0);
> >      >> >
> >      >> >     if_created_sem leaks.
> >      >> >
> >      >> >      > +    __block vmnet_return_t if_status;
> >      >> >      > +
> >      >> >      > +    s->if_queue = dispatch_queue_create(
> >      >> >      > +        "org.qemu.vmnet.if_queue",
> >      >> >      > +        DISPATCH_QUEUE_SERIAL
> >      >> >      > +    );
> >      >> >      > +
> >      >> >      > +    xpc_dictionary_set_bool(
> >      >> >      > +        if_desc,
> >      >> >      > +        vmnet_allocate_mac_address_key,
> >      >> >      > +        false
> >      >> >      > +    );
> >      >> >      > +#ifdef DEBUG
> >      >> >      > +    qemu_log("vmnet.start.interface_desc:\n");
> >      >> >      > +    xpc_dictionary_apply(if_desc,
> >      >> >      > +                         ^bool(const char *k,
> >     xpc_object_t v) {
> >      >> >      > +                             char *desc =
> >     xpc_copy_description(v);
> >      >> >      > +                             qemu_log("  %s=%s\n", k,
> >     desc);
> >      >> >      > +                             free(desc);
> >      >> >      > +                             return true;
> >      >> >      > +                         });
> >      >> >      > +#endif /* DEBUG */
> >      >> >      > +
> >      >> >      > +    s->vmnet_if = vmnet_start_interface(
> >      >> >      > +        if_desc,
> >      >> >      > +        s->if_queue,
> >      >> >      > +        ^(vmnet_return_t status, xpc_object_t
> >     interface_param) {
> >      >> >      > +            if_status = status;
> >      >> >      > +            if (status != VMNET_SUCCESS ||
> >     !interface_param) {
> >      >> >      > +
> dispatch_semaphore_signal(if_created_sem);
> >      >> >      > +                return;
> >      >> >      > +            }
> >      >> >      > +
> >      >> >      > +#ifdef DEBUG
> >      >> >      > +            qemu_log("vmnet.start.interface_param:\n");
> >      >> >      > +            xpc_dictionary_apply(interface_param,
> >      >> >      > +                                 ^bool(const char *k,
> >      >> >     xpc_object_t v) {
> >      >> >      > +                                     char *desc =
> >      >> >     xpc_copy_description(v);
> >      >> >      > +                                     qemu_log("
> >     %s=%s\n", k, desc);
> >      >> >      > +                                     free(desc);
> >      >> >      > +                                     return true;
> >      >> >      > +                                 });
> >      >> >      > +#endif /* DEBUG */
> >      >> >      > +
> >      >> >      > +            s->mtu = xpc_dictionary_get_uint64(
> >      >> >      > +                interface_param,
> >      >> >      > +                vmnet_mtu_key);
> >      >> >      > +            s->max_packet_size =
> >     xpc_dictionary_get_uint64(
> >      >> >      > +                interface_param,
> >      >> >      > +                vmnet_max_packet_size_key);
> >      >> >      > +
> >      >> >      > +            dispatch_semaphore_signal(if_created_sem);
> >      >> >      > +        });
> >      >> >      > +
> >      >> >      > +    if (s->vmnet_if == NULL) {
> >      >> >      > +        error_setg(errp,
> >      >> >      > +                   "unable to create interface "
> >      >> >      > +                   "with requested params");
> >      >> >
> >      >> >     You don't need line breaks here. Breaking a string into a
> >     few would
> >      >> >     also
> >      >> >     makes it a bit hard to grep.
> >      >> >
> >      >> >      > +        return -1;
> >      >> >
> >      >> >     s->if_queue leaks.
> >      >> >
> >      >> >      > +    }
> >      >> >      > +
> >      >> >      > +    dispatch_semaphore_wait(if_created_sem,
> >     DISPATCH_TIME_FOREVER);
> >      >> >      > +
> >      >> >      > +    if (if_status != VMNET_SUCCESS) {
> >      >> >      > +        error_setg(errp,
> >      >> >      > +                   "cannot create vmnet interface: %s",
> >      >> >      > +                   vmnet_status_map_str(if_status));
> >      >> >      > +        return -1;
> >      >> >      > +    }
> >      >> >      > +
> >      >> >      > +    s->send_bh = aio_bh_new(qemu_get_aio_context(),
> >      >> >     vmnet_send_bh, nc);
> >      >> >      > +    vmnet_bufs_init(s);
> >      >> >      > +    vmnet_set_send_bh_scheduled(s, false);
> >      >> >      > +    vmnet_set_send_enabled(s, true);
> >      >> >      > +    return 0;
> >      >> >      > +}
> >      >> >      > +
> >      >> >      > +
> >      >> >      > +ssize_t vmnet_receive_common(NetClientState *nc,
> >      >> >      > +                             const uint8_t *buf,
> >      >> >      > +                             size_t size)
> >      >> >      > +{
> >      >> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState,
> >     nc, nc);
> >      >> >      > +    struct vmpktdesc packet;
> >      >> >      > +    struct iovec iov;
> >      >> >      > +    int pkt_cnt;
> >      >> >      > +    vmnet_return_t if_status;
> >      >> >      > +
> >      >> >      > +    if (size > s->max_packet_size) {
> >      >> >      > +        warn_report("vmnet: packet is too big, %zu >
> >     %llu\n",
> >      >> >
> >      >> >     Use PRIu64.
> >      >> >
> >      >> >      > +                    packet.vm_pkt_size,
> >      >> >      > +                    s->max_packet_size);
> >      >> >      > +        return -1;
> >      >> >      > +    }
> >      >> >      > +
> >      >> >      > +    iov.iov_base = (char *) buf;
> >      >> >      > +    iov.iov_len = size;
> >      >> >      > +
> >      >> >      > +    packet.vm_pkt_iovcnt = 1;
> >      >> >      > +    packet.vm_flags = 0;
> >      >> >      > +    packet.vm_pkt_size = size;
> >      >> >      > +    packet.vm_pkt_iov = &iov;
> >      >> >      > +    pkt_cnt = 1;
> >      >> >      > +
> >      >> >      > +    if_status = vmnet_write(s->vmnet_if, &packet,
> >     &pkt_cnt);
> >      >> >      > +    if (if_status != VMNET_SUCCESS) {
> >      >> >      > +        error_report("vmnet: write error: %s\n",
> >      >> >      > +                     vmnet_status_map_str(if_status));
> >      >> >
> >      >> >     Why don't return -1?
> >      >> >
> >      >> >      > +    }
> >      >> >      > +
> >      >> >      > +    if (if_status == VMNET_SUCCESS && pkt_cnt) {
> >      >> >      > +        return size;
> >      >> >      > +    }
> >      >> >      > +    return 0;
> >      >> >      > +}
> >      >> >      > +
> >      >> >      > +
> >      >> >      > +void vmnet_cleanup_common(NetClientState *nc)
> >      >> >      > +{
> >      >> >      > +    VmnetCommonState *s = DO_UPCAST(VmnetCommonState,
> >     nc, nc);;
> >      >> >
> >      >> >     Duplicate semicolons.
> >      >> >
> >      >> >      > +    dispatch_semaphore_t if_created_sem;
> >      >> >      > +
> >      >> >      > +    qemu_purge_queued_packets(nc); > +
> >      >> >     vmnet_set_send_bh_scheduled(s, true);
> >      >> >      > +    vmnet_set_send_enabled(s, false);
> >      >> >      > +
> >      >> >      > +    if (s->vmnet_if == NULL) {
> >      >> >      > +        return;
> >      >> >      > +    }
> >      >> >      > +
> >      >> >      > +    if_created_sem = dispatch_semaphore_create(0);
> >      >> >      > +    vmnet_stop_interface(
> >      >> >      > +        s->vmnet_if,
> >      >> >      > +        s->if_queue,
> >      >> >      > +        ^(vmnet_return_t status) {
> >      >> >      > +            assert(status == VMNET_SUCCESS);
> >      >> >      > +            dispatch_semaphore_signal(if_created_sem);
> >      >> >      > +        });
> >      >> >      > +    dispatch_semaphore_wait(if_created_sem,
> >     DISPATCH_TIME_FOREVER);
> >      >> >      > +
> >      >> >      > +    qemu_bh_delete(s->send_bh);
> >      >> >      > +    dispatch_release(if_created_sem);
> >      >> >      > +    dispatch_release(s->if_queue);
> >      >> >      > +
> >      >> >      > +    for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
> >      >> >      > +        g_free(s->iov_buf[i].iov_base);
> >      >> >      > +    }
> >      >> >      > +}
> >      >> >      > diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c
> >      >> >      > index f07afaaf21..66f66c034b 100644
> >      >> >      > --- a/net/vmnet-shared.c
> >      >> >      > +++ b/net/vmnet-shared.c
> >      >> >      > @@ -10,16 +10,102 @@
> >      >> >      >
> >      >> >      >   #include "qemu/osdep.h"
> >      >> >      >   #include "qapi/qapi-types-net.h"
> >      >> >      > +#include "qapi/error.h"
> >      >> >      >   #include "vmnet_int.h"
> >      >> >      >   #include "clients.h"
> >      >> >      > -#include "qemu/error-report.h"
> >      >> >      > -#include "qapi/error.h"
> >      >> >      >
> >      >> >      >   #include <vmnet/vmnet.h>
> >      >> >      >
> >      >> >      > +typedef struct VmnetSharedState {
> >      >> >      > +    VmnetCommonState cs;
> >      >> >      > +} VmnetSharedState;
> >      >> >      > +
> >      >> >      > +
> >      >> >      > +static bool validate_options(const Netdev *netdev,
> >     Error **errp)
> >      >> >      > +{
> >      >> >      > +    const NetdevVmnetSharedOptions *options =
> >      >> >     &(netdev->u.vmnet_shared);
> >      >> >      > +
> >      >> >      > +#if !defined(MAC_OS_VERSION_11_0) || \
> >      >> >      > +    MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0
> >      >> >      > +    if (options->has_isolated) {
> >      >> >      > +        error_setg(errp,
> >      >> >      > +                   "vmnet-shared.isolated feature is "
> >      >> >      > +                   "unavailable: outdated
> >     vmnet.framework API");
> >      >> >      > +        return false;
> >      >> >      > +    }
> >      >> >      > +#endif
> >      >> >      > +
> >      >> >      > +    if ((options->has_start_address ||
> >      >> >      > +         options->has_end_address ||
> >      >> >      > +         options->has_subnet_mask) &&
> >      >> >      > +        !(options->has_start_address &&
> >      >> >      > +          options->has_end_address &&
> >      >> >      > +          options->has_subnet_mask)) {
> >      >> >      > +        error_setg(errp,
> >      >> >      > +                   "'start-address', 'end-address',
> >     'subnet-mask' "
> >      >> >      > +                   "should be provided together"
> >      >> >      > +        );
> >      >> >      > +        return false;
> >      >> >      > +    }
> >      >> >      > +
> >      >> >      > +    return true;
> >      >> >      > +}
> >      >> >      > +
> >      >> >      > +static xpc_object_t build_if_desc(const Netdev *netdev)
> >      >> >      > +{
> >      >> >      > +    const NetdevVmnetSharedOptions *options =
> >      >> >     &(netdev->u.vmnet_shared);
> >      >> >      > +    xpc_object_t if_desc = xpc_dictionary_create(NULL,
> >     NULL, 0);
> >      >> >      > +
> >      >> >      > +    xpc_dictionary_set_uint64(
> >      >> >      > +        if_desc,
> >      >> >      > +        vmnet_operation_mode_key,
> >      >> >      > +        VMNET_SHARED_MODE
> >      >> >      > +    );
> >      >> >      > +
> >      >> >      > +    if (options->has_nat66_prefix) {
> >      >> >      > +        xpc_dictionary_set_string(if_desc,
> >      >> >      > +
> vmnet_nat66_prefix_key,
> >      >> >      > +
> options->nat66_prefix);
> >      >> >      > +    }
> >      >> >      > +
> >      >> >      > +    if (options->has_start_address) {
> >      >> >      > +        xpc_dictionary_set_string(if_desc,
> >      >> >      > +
> vmnet_start_address_key,
> >      >> >      > +
> options->start_address);
> >      >> >      > +        xpc_dictionary_set_string(if_desc,
> >      >> >      > +
> vmnet_end_address_key,
> >      >> >      > +
> options->end_address);
> >      >> >      > +        xpc_dictionary_set_string(if_desc,
> >      >> >      > +
> vmnet_subnet_mask_key,
> >      >> >      > +
> options->subnet_mask);
> >      >> >      > +    }
> >      >> >      > +
> >      >> >      > +#if defined(MAC_OS_VERSION_11_0) && \
> >      >> >      > +    MAC_OS_X_VERSION_MIN_REQUIRED >=
> MAC_OS_VERSION_11_0
> >      >> >      > +    xpc_dictionary_set_bool(
> >      >> >      > +        if_desc,
> >      >> >      > +        vmnet_enable_isolation_key,
> >      >> >      > +        options->isolated
> >      >> >      > +    );
> >      >> >      > +#endif
> >      >> >      > +
> >      >> >      > +    return if_desc;
> >      >> >      > +}
> >      >> >      > +
> >      >> >      > +static NetClientInfo net_vmnet_shared_info = {
> >      >> >      > +    .type = NET_CLIENT_DRIVER_VMNET_SHARED,
> >      >> >      > +    .size = sizeof(VmnetSharedState),
> >      >> >      > +    .receive = vmnet_receive_common,
> >      >> >      > +    .cleanup = vmnet_cleanup_common,
> >      >> >      > +};
> >      >> >      > +
> >      >> >      >   int net_init_vmnet_shared(const Netdev *netdev, const
> >     char *name,
> >      >> >      >                             NetClientState *peer, Error
> >     **errp)
> >      >> >      >   {
> >      >> >      > -  error_setg(errp, "vmnet-shared is not implemented
> yet");
> >      >> >      > -  return -1;
> >      >> >      > +    NetClientState *nc =
> >     qemu_new_net_client(&net_vmnet_shared_info,
> >      >> >      > +                                             peer,
> >      >> >     "vmnet-shared", name);
> >      >> >      > +    if (!validate_options(netdev, errp)) {
> >      >> >      > +        g_assert_not_reached();
> >      >> >
> >      >> >     g_assert_not_reached is for debugging purpose and may be
> >     dropped
> >      >> >     depending on the build option.
> >      >> >
> >      >> >      > +    }
> >      >> >      > +    return vmnet_if_create(nc, build_if_desc(netdev),
> >     errp);
> >      >> >      >   }
> >      >> >      > diff --git a/net/vmnet_int.h b/net/vmnet_int.h
> >      >> >      > index aac4d5af64..acfe3a88c0 100644
> >      >> >      > --- a/net/vmnet_int.h
> >      >> >      > +++ b/net/vmnet_int.h
> >      >> >      > @@ -15,11 +15,48 @@
> >      >> >      >   #include "clients.h"
> >      >> >      >
> >      >> >      >   #include <vmnet/vmnet.h>
> >      >> >      > +#include <dispatch/dispatch.h>
> >      >> >      > +
> >      >> >      > +/**
> >      >> >      > + *  From vmnet.framework documentation
> >      >> >      > + *
> >      >> >      > + *  Each read/write call allows up to 200 packets to be
> >      >> >      > + *  read or written for a maximum of 256KB.
> >      >> >      > + *
> >      >> >      > + *  Each packet written should be a complete
> >      >> >      > + *  ethernet frame.
> >      >> >      > + *
> >      >> >      > + * https://developer.apple.com/documentation/vmnet
> >     <https://developer.apple.com/documentation/vmnet>
> >      >> >     <https://developer.apple.com/documentation/vmnet
> >     <https://developer.apple.com/documentation/vmnet>>
> >      >> >      > + */
> >      >> >      > +#define VMNET_PACKETS_LIMIT 200
> >      >> >      >
> >      >> >      >   typedef struct VmnetCommonState {
> >      >> >      > -  NetClientState nc;
> >      >> >      > +    NetClientState nc;
> >      >> >      > +    interface_ref vmnet_if;
> >      >> >      > +
> >      >> >      > +    bool send_scheduled;
> >      >> >      >
> >      >> >      > +    uint64_t mtu;
> >      >> >      > +    uint64_t max_packet_size;
> >      >> >      > +
> >      >> >      > +    struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT];
> >      >> >      > +    struct iovec iov_buf[VMNET_PACKETS_LIMIT];
> >      >> >      > +
> >      >> >      > +    dispatch_queue_t if_queue;
> >      >> >      > +
> >      >> >      > +    QEMUBH *send_bh;
> >      >> >      >   } VmnetCommonState;
> >      >> >      >
> >      >> >      > +const char *vmnet_status_map_str(vmnet_return_t
> status);
> >      >> >      > +
> >      >> >      > +int vmnet_if_create(NetClientState *nc,
> >      >> >      > +                    xpc_object_t if_desc,
> >      >> >      > +                    Error **errp);
> >      >> >      > +
> >      >> >      > +ssize_t vmnet_receive_common(NetClientState *nc,
> >      >> >      > +                             const uint8_t *buf,
> >      >> >      > +                             size_t size);
> >      >> >      > +
> >      >> >      > +void vmnet_cleanup_common(NetClientState *nc);
> >      >> >      >
> >      >> >      >   #endif /* VMNET_INT_H */
> >      >> >
> >      >> >
> >      >> > Other issues will be fixed and submitted later,
> >      >> > thank you!
> >      >> >
> >      >> > Regards,
> >      >> > Vladislav Yaroshchuk
> >      >>
> >      >
> >      > Regards,
> >      > Vladislav Yaroshchuk
> >
> >
> > Best Regards,
> >
> > Vladislav Yaroshchuk
>
>
Akihiko Odaki March 1, 2022, 8:21 a.m. UTC | #9
On 2022/03/01 17:09, Vladislav Yaroshchuk wrote:
>      > Not sure that only one field is enough, cause
>      > we may have two states on bh execution start:
>      > 1. There are packets in vmnet buffer s->packets_buf
>      >      that were rejected by qemu_send_async and waiting
>      >      to be sent. If this happens, we should complete sending
>      >      these waiting packets with qemu_send_async firstly,
>      >      and after that we should call vmnet_read to get
>      >      new ones and send them to QEMU;
>      > 2. There are no packets in s->packets_buf to be sent to
>      >      qemu, we only need to get new packets from vmnet
>      >      with vmnet_read and send them to QEMU
> 
>     In case 1, you should just keep calling qemu_send_packet_async.
>     Actually
>     qemu_send_packet_async adds the packet to its internal queue and calls
>     the callback when it is consumed.
> 
> 
> I'm not sure we can keep calling qemu_send_packet_async,
> because as docs from net/queue.c says:
> 
> /* [...]
>   * If a sent callback is provided to send(), the caller must handle a
>   * zero return from the delivery handler by not sending any more packets
>   * until we have invoked the callback. Only in that case will we queue
>   * the packet.
>   *
>   * If a sent callback isn't provided, we just drop the packet to avoid
>   * unbounded queueing.
>   */
> 
> So after we did vmnet_read and read N packets
> into temporary s->packets_buf, we begin calling
> qemu_send_packet_async. If it returns 0 - it says
> "no more packets until sent_cb called please".
> At this moment we have N packets in s->packets_buf
> and already queued K < N of them. But, packets K..N
> are not queued and keep waiting for sent_cb to be sent
> with qemu_send_packet_async.
> Thus when sent_cb called, we should finish
> our transfer of packets K..N from s->packets_buf
> to qemu calling qemu_send_packet_async.
> I meant this.

I missed the comment. The description is contradicting with the actual 
code; qemu_net_queue_send_iov appends the packet to the queue whenever 
it cannot send one immediately.

Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if 
calling qemu_send_packet_async is allowed after it returns 0?

Regards,
Akihiko Odaki
Vladislav Yaroshchuk March 3, 2022, 3:43 p.m. UTC | #10
On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com>
wrote:

> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote:
> >      > Not sure that only one field is enough, cause
> >      > we may have two states on bh execution start:
> >      > 1. There are packets in vmnet buffer s->packets_buf
> >      >      that were rejected by qemu_send_async and waiting
> >      >      to be sent. If this happens, we should complete sending
> >      >      these waiting packets with qemu_send_async firstly,
> >      >      and after that we should call vmnet_read to get
> >      >      new ones and send them to QEMU;
> >      > 2. There are no packets in s->packets_buf to be sent to
> >      >      qemu, we only need to get new packets from vmnet
> >      >      with vmnet_read and send them to QEMU
> >
> >     In case 1, you should just keep calling qemu_send_packet_async.
> >     Actually
> >     qemu_send_packet_async adds the packet to its internal queue and
> calls
> >     the callback when it is consumed.
> >
> >
> > I'm not sure we can keep calling qemu_send_packet_async,
> > because as docs from net/queue.c says:
> >
> > /* [...]
> >   * If a sent callback is provided to send(), the caller must handle a
> >   * zero return from the delivery handler by not sending any more packets
> >   * until we have invoked the callback. Only in that case will we queue
> >   * the packet.
> >   *
> >   * If a sent callback isn't provided, we just drop the packet to avoid
> >   * unbounded queueing.
> >   */
> >
> > So after we did vmnet_read and read N packets
> > into temporary s->packets_buf, we begin calling
> > qemu_send_packet_async. If it returns 0 - it says
> > "no more packets until sent_cb called please".
> > At this moment we have N packets in s->packets_buf
> > and already queued K < N of them. But, packets K..N
> > are not queued and keep waiting for sent_cb to be sent
> > with qemu_send_packet_async.
> > Thus when sent_cb called, we should finish
> > our transfer of packets K..N from s->packets_buf
> > to qemu calling qemu_send_packet_async.
> > I meant this.
>
> I missed the comment. The description is contradicting with the actual
> code; qemu_net_queue_send_iov appends the packet to the queue whenever
> it cannot send one immediately.
>
>
Yes, it appends, but (net/queue.c):
*  qemu_net_queue_send tries to deliver the packet
    immediately. If the packet cannot be delivered, the
    qemu_net_queue_append is called and 0 is returned
    to say the caller "the receiver is not ready, hold on";
*  qemu_net_queue_append does a probe before adding
    the packet to the queue:
    if (queue->nq_count >= queue->nq_maxlen && !sent_cb) {
        return; /* drop if queue full and no callback */
    }

The queue is not infinite, so we have three cases:
1. The queue is not full -> append the packet, no
    problems here
2. The queue is full, no callback -> we cannot notify
    a caller when we're ready, so just drop the packet
    if we can't append it.
3. The queue is full, callback present -> we can notify
    a caller when we are ready, so "let's queue this packet,
    but expect no more (!) packets is sent until I call
    sent_cb when the queue is ready"

Therefore if we provide a callback and keep sending
packets if 0 is returned, this may cause unlimited(!)
queue growth. To prevent this, we should stop sending
packets and wait for notification callback to continue.

I don't see any contradiction with that comment.

Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if
> calling qemu_send_packet_async is allowed after it returns 0?
>
>
It may be wrong, but I think it's not allowed to send
packets after qemu_send_packet_async returns 0.

Jason Wang, can you confirm please?

Best Regards,

Vladislav Yaroshchuk


> Regards,
> Akihiko Odaki
>
Akihiko Odaki March 3, 2022, 6:34 p.m. UTC | #11
On Fri, Mar 4, 2022 at 12:43 AM Vladislav Yaroshchuk
<vladislav.yaroshchuk@jetbrains.com> wrote:
>
>
>
> On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote:
>>
>> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote:
>> >      > Not sure that only one field is enough, cause
>> >      > we may have two states on bh execution start:
>> >      > 1. There are packets in vmnet buffer s->packets_buf
>> >      >      that were rejected by qemu_send_async and waiting
>> >      >      to be sent. If this happens, we should complete sending
>> >      >      these waiting packets with qemu_send_async firstly,
>> >      >      and after that we should call vmnet_read to get
>> >      >      new ones and send them to QEMU;
>> >      > 2. There are no packets in s->packets_buf to be sent to
>> >      >      qemu, we only need to get new packets from vmnet
>> >      >      with vmnet_read and send them to QEMU
>> >
>> >     In case 1, you should just keep calling qemu_send_packet_async.
>> >     Actually
>> >     qemu_send_packet_async adds the packet to its internal queue and calls
>> >     the callback when it is consumed.
>> >
>> >
>> > I'm not sure we can keep calling qemu_send_packet_async,
>> > because as docs from net/queue.c says:
>> >
>> > /* [...]
>> >   * If a sent callback is provided to send(), the caller must handle a
>> >   * zero return from the delivery handler by not sending any more packets
>> >   * until we have invoked the callback. Only in that case will we queue
>> >   * the packet.
>> >   *
>> >   * If a sent callback isn't provided, we just drop the packet to avoid
>> >   * unbounded queueing.
>> >   */
>> >
>> > So after we did vmnet_read and read N packets
>> > into temporary s->packets_buf, we begin calling
>> > qemu_send_packet_async. If it returns 0 - it says
>> > "no more packets until sent_cb called please".
>> > At this moment we have N packets in s->packets_buf
>> > and already queued K < N of them. But, packets K..N
>> > are not queued and keep waiting for sent_cb to be sent
>> > with qemu_send_packet_async.
>> > Thus when sent_cb called, we should finish
>> > our transfer of packets K..N from s->packets_buf
>> > to qemu calling qemu_send_packet_async.
>> > I meant this.
>>
>> I missed the comment. The description is contradicting with the actual
>> code; qemu_net_queue_send_iov appends the packet to the queue whenever
>> it cannot send one immediately.
>>
>
> Yes, it appends, but (net/queue.c):
> *  qemu_net_queue_send tries to deliver the packet
>     immediately. If the packet cannot be delivered, the
>     qemu_net_queue_append is called and 0 is returned
>     to say the caller "the receiver is not ready, hold on";
> *  qemu_net_queue_append does a probe before adding
>     the packet to the queue:
>     if (queue->nq_count >= queue->nq_maxlen && !sent_cb) {
>         return; /* drop if queue full and no callback */
>     }
>
> The queue is not infinite, so we have three cases:
> 1. The queue is not full -> append the packet, no
>     problems here
> 2. The queue is full, no callback -> we cannot notify
>     a caller when we're ready, so just drop the packet
>     if we can't append it.
> 3. The queue is full, callback present -> we can notify
>     a caller when we are ready, so "let's queue this packet,
>     but expect no more (!) packets is sent until I call
>     sent_cb when the queue is ready"
>
> Therefore if we provide a callback and keep sending
> packets if 0 is returned, this may cause unlimited(!)
> queue growth. To prevent this, we should stop sending
> packets and wait for notification callback to continue.
>
> I don't see any contradiction with that comment.
>
>> Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if
>> calling qemu_send_packet_async is allowed after it returns 0?
>>
>
> It may be wrong, but I think it's not allowed to send
> packets after qemu_send_packet_async returns 0.
>
> Jason Wang, can you confirm please?
>
> Best Regards,
>
> Vladislav Yaroshchuk
>
>>
>> Regards,
>> Akihiko Odaki
>
>
>

The unlimited queue growth would not happen if you stop calling
vmnet_read after qemu_send_packet_async returns 0. So I think the
comment should be amended to say something like:
"Once qemu_send_packet_async returns 0, the client should stop reading
more packets from the underlying NIC to prevent infinite growth of the
queue until the last callback gets called."

The unique feature of vmnet is that it can read multiple packets at
once, and I guess it is the reason why the comment in net/queue.c
missed the case. But this is all my guess so I need confirmation from
the maintainer.

Regards,
Akihiko Odaki
Jason Wang March 4, 2022, 1:37 a.m. UTC | #12
On Thu, Mar 3, 2022 at 11:43 PM Vladislav Yaroshchuk
<vladislav.yaroshchuk@jetbrains.com> wrote:
>
>
>
> On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote:
>>
>> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote:
>> >      > Not sure that only one field is enough, cause
>> >      > we may have two states on bh execution start:
>> >      > 1. There are packets in vmnet buffer s->packets_buf
>> >      >      that were rejected by qemu_send_async and waiting
>> >      >      to be sent. If this happens, we should complete sending
>> >      >      these waiting packets with qemu_send_async firstly,
>> >      >      and after that we should call vmnet_read to get
>> >      >      new ones and send them to QEMU;
>> >      > 2. There are no packets in s->packets_buf to be sent to
>> >      >      qemu, we only need to get new packets from vmnet
>> >      >      with vmnet_read and send them to QEMU
>> >
>> >     In case 1, you should just keep calling qemu_send_packet_async.
>> >     Actually
>> >     qemu_send_packet_async adds the packet to its internal queue and calls
>> >     the callback when it is consumed.
>> >
>> >
>> > I'm not sure we can keep calling qemu_send_packet_async,
>> > because as docs from net/queue.c says:
>> >
>> > /* [...]
>> >   * If a sent callback is provided to send(), the caller must handle a
>> >   * zero return from the delivery handler by not sending any more packets
>> >   * until we have invoked the callback. Only in that case will we queue
>> >   * the packet.
>> >   *
>> >   * If a sent callback isn't provided, we just drop the packet to avoid
>> >   * unbounded queueing.
>> >   */
>> >
>> > So after we did vmnet_read and read N packets
>> > into temporary s->packets_buf, we begin calling
>> > qemu_send_packet_async. If it returns 0 - it says
>> > "no more packets until sent_cb called please".
>> > At this moment we have N packets in s->packets_buf
>> > and already queued K < N of them. But, packets K..N
>> > are not queued and keep waiting for sent_cb to be sent
>> > with qemu_send_packet_async.
>> > Thus when sent_cb called, we should finish
>> > our transfer of packets K..N from s->packets_buf
>> > to qemu calling qemu_send_packet_async.
>> > I meant this.
>>
>> I missed the comment. The description is contradicting with the actual
>> code; qemu_net_queue_send_iov appends the packet to the queue whenever
>> it cannot send one immediately.
>>
>
> Yes, it appends, but (net/queue.c):
> *  qemu_net_queue_send tries to deliver the packet
>     immediately. If the packet cannot be delivered, the
>     qemu_net_queue_append is called and 0 is returned
>     to say the caller "the receiver is not ready, hold on";
> *  qemu_net_queue_append does a probe before adding
>     the packet to the queue:
>     if (queue->nq_count >= queue->nq_maxlen && !sent_cb) {
>         return; /* drop if queue full and no callback */
>     }
>
> The queue is not infinite, so we have three cases:
> 1. The queue is not full -> append the packet, no
>     problems here
> 2. The queue is full, no callback -> we cannot notify
>     a caller when we're ready, so just drop the packet
>     if we can't append it.
> 3. The queue is full, callback present -> we can notify
>     a caller when we are ready, so "let's queue this packet,
>     but expect no more (!) packets is sent until I call
>     sent_cb when the queue is ready"
>
> Therefore if we provide a callback and keep sending
> packets if 0 is returned, this may cause unlimited(!)
> queue growth. To prevent this, we should stop sending
> packets and wait for notification callback to continue.

Right.

>
> I don't see any contradiction with that comment.
>
>> Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if
>> calling qemu_send_packet_async is allowed after it returns 0?
>>
>
> It may be wrong, but I think it's not allowed to send
> packets after qemu_send_packet_async returns 0.
>
> Jason Wang, can you confirm please?

With a cb, we can't do this. All users with cb will disable the source
polling and depend on the cb to re-read the polling.
(tap/l2tpv3/socket).

Without a cb, we can. As analyzed above, qemu_net_queue_append() can
limit the number of packets queued in this case.

Thanks

>
> Best Regards,
>
> Vladislav Yaroshchuk
>
>>
>> Regards,
>> Akihiko Odaki
>
>
>
Akihiko Odaki March 4, 2022, 4:37 a.m. UTC | #13
On 2022/03/04 10:37, Jason Wang wrote:
> On Thu, Mar 3, 2022 at 11:43 PM Vladislav Yaroshchuk
> <vladislav.yaroshchuk@jetbrains.com> wrote:
>>
>>
>>
>> On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote:
>>>
>>> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote:
>>>>       > Not sure that only one field is enough, cause
>>>>       > we may have two states on bh execution start:
>>>>       > 1. There are packets in vmnet buffer s->packets_buf
>>>>       >      that were rejected by qemu_send_async and waiting
>>>>       >      to be sent. If this happens, we should complete sending
>>>>       >      these waiting packets with qemu_send_async firstly,
>>>>       >      and after that we should call vmnet_read to get
>>>>       >      new ones and send them to QEMU;
>>>>       > 2. There are no packets in s->packets_buf to be sent to
>>>>       >      qemu, we only need to get new packets from vmnet
>>>>       >      with vmnet_read and send them to QEMU
>>>>
>>>>      In case 1, you should just keep calling qemu_send_packet_async.
>>>>      Actually
>>>>      qemu_send_packet_async adds the packet to its internal queue and calls
>>>>      the callback when it is consumed.
>>>>
>>>>
>>>> I'm not sure we can keep calling qemu_send_packet_async,
>>>> because as docs from net/queue.c says:
>>>>
>>>> /* [...]
>>>>    * If a sent callback is provided to send(), the caller must handle a
>>>>    * zero return from the delivery handler by not sending any more packets
>>>>    * until we have invoked the callback. Only in that case will we queue
>>>>    * the packet.
>>>>    *
>>>>    * If a sent callback isn't provided, we just drop the packet to avoid
>>>>    * unbounded queueing.
>>>>    */
>>>>
>>>> So after we did vmnet_read and read N packets
>>>> into temporary s->packets_buf, we begin calling
>>>> qemu_send_packet_async. If it returns 0 - it says
>>>> "no more packets until sent_cb called please".
>>>> At this moment we have N packets in s->packets_buf
>>>> and already queued K < N of them. But, packets K..N
>>>> are not queued and keep waiting for sent_cb to be sent
>>>> with qemu_send_packet_async.
>>>> Thus when sent_cb called, we should finish
>>>> our transfer of packets K..N from s->packets_buf
>>>> to qemu calling qemu_send_packet_async.
>>>> I meant this.
>>>
>>> I missed the comment. The description is contradicting with the actual
>>> code; qemu_net_queue_send_iov appends the packet to the queue whenever
>>> it cannot send one immediately.
>>>
>>
>> Yes, it appends, but (net/queue.c):
>> *  qemu_net_queue_send tries to deliver the packet
>>      immediately. If the packet cannot be delivered, the
>>      qemu_net_queue_append is called and 0 is returned
>>      to say the caller "the receiver is not ready, hold on";
>> *  qemu_net_queue_append does a probe before adding
>>      the packet to the queue:
>>      if (queue->nq_count >= queue->nq_maxlen && !sent_cb) {
>>          return; /* drop if queue full and no callback */
>>      }
>>
>> The queue is not infinite, so we have three cases:
>> 1. The queue is not full -> append the packet, no
>>      problems here
>> 2. The queue is full, no callback -> we cannot notify
>>      a caller when we're ready, so just drop the packet
>>      if we can't append it.
>> 3. The queue is full, callback present -> we can notify
>>      a caller when we are ready, so "let's queue this packet,
>>      but expect no more (!) packets is sent until I call
>>      sent_cb when the queue is ready"
>>
>> Therefore if we provide a callback and keep sending
>> packets if 0 is returned, this may cause unlimited(!)
>> queue growth. To prevent this, we should stop sending
>> packets and wait for notification callback to continue.
> 
> Right.
> 
>>
>> I don't see any contradiction with that comment.
>>
>>> Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if
>>> calling qemu_send_packet_async is allowed after it returns 0?
>>>
>>
>> It may be wrong, but I think it's not allowed to send
>> packets after qemu_send_packet_async returns 0.
>>
>> Jason Wang, can you confirm please?
> 
> With a cb, we can't do this. All users with cb will disable the source
> polling and depend on the cb to re-read the polling.
> (tap/l2tpv3/socket).
> 
> Without a cb, we can. As analyzed above, qemu_net_queue_append() can
> limit the number of packets queued in this case.

vmnet can read multiple packets at once. What about such a case? Isn't 
calling qemu_send_packet_async for already read packet and stopping 
reading more fine?

Regards,
Akihiko Odaki

> 
> Thanks
> 
>>
>> Best Regards,
>>
>> Vladislav Yaroshchuk
>>
>>>
>>> Regards,
>>> Akihiko Odaki
>>
>>
>>
>
Akihiko Odaki March 4, 2022, 4:39 a.m. UTC | #14
On 2022/03/04 3:34, Akihiko Odaki wrote:
> On Fri, Mar 4, 2022 at 12:43 AM Vladislav Yaroshchuk
> <vladislav.yaroshchuk@jetbrains.com> wrote:
>>
>>
>>
>> On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote:
>>>
>>> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote:
>>>>       > Not sure that only one field is enough, cause
>>>>       > we may have two states on bh execution start:
>>>>       > 1. There are packets in vmnet buffer s->packets_buf
>>>>       >      that were rejected by qemu_send_async and waiting
>>>>       >      to be sent. If this happens, we should complete sending
>>>>       >      these waiting packets with qemu_send_async firstly,
>>>>       >      and after that we should call vmnet_read to get
>>>>       >      new ones and send them to QEMU;
>>>>       > 2. There are no packets in s->packets_buf to be sent to
>>>>       >      qemu, we only need to get new packets from vmnet
>>>>       >      with vmnet_read and send them to QEMU
>>>>
>>>>      In case 1, you should just keep calling qemu_send_packet_async.
>>>>      Actually
>>>>      qemu_send_packet_async adds the packet to its internal queue and calls
>>>>      the callback when it is consumed.
>>>>
>>>>
>>>> I'm not sure we can keep calling qemu_send_packet_async,
>>>> because as docs from net/queue.c says:
>>>>
>>>> /* [...]
>>>>    * If a sent callback is provided to send(), the caller must handle a
>>>>    * zero return from the delivery handler by not sending any more packets
>>>>    * until we have invoked the callback. Only in that case will we queue
>>>>    * the packet.
>>>>    *
>>>>    * If a sent callback isn't provided, we just drop the packet to avoid
>>>>    * unbounded queueing.
>>>>    */
>>>>
>>>> So after we did vmnet_read and read N packets
>>>> into temporary s->packets_buf, we begin calling
>>>> qemu_send_packet_async. If it returns 0 - it says
>>>> "no more packets until sent_cb called please".
>>>> At this moment we have N packets in s->packets_buf
>>>> and already queued K < N of them. But, packets K..N
>>>> are not queued and keep waiting for sent_cb to be sent
>>>> with qemu_send_packet_async.
>>>> Thus when sent_cb called, we should finish
>>>> our transfer of packets K..N from s->packets_buf
>>>> to qemu calling qemu_send_packet_async.
>>>> I meant this.
>>>
>>> I missed the comment. The description is contradicting with the actual
>>> code; qemu_net_queue_send_iov appends the packet to the queue whenever
>>> it cannot send one immediately.
>>>
>>
>> Yes, it appends, but (net/queue.c):
>> *  qemu_net_queue_send tries to deliver the packet
>>      immediately. If the packet cannot be delivered, the
>>      qemu_net_queue_append is called and 0 is returned
>>      to say the caller "the receiver is not ready, hold on";
>> *  qemu_net_queue_append does a probe before adding
>>      the packet to the queue:
>>      if (queue->nq_count >= queue->nq_maxlen && !sent_cb) {
>>          return; /* drop if queue full and no callback */
>>      }
>>
>> The queue is not infinite, so we have three cases:
>> 1. The queue is not full -> append the packet, no
>>      problems here
>> 2. The queue is full, no callback -> we cannot notify
>>      a caller when we're ready, so just drop the packet
>>      if we can't append it.
>> 3. The queue is full, callback present -> we can notify
>>      a caller when we are ready, so "let's queue this packet,
>>      but expect no more (!) packets is sent until I call
>>      sent_cb when the queue is ready"
>>
>> Therefore if we provide a callback and keep sending
>> packets if 0 is returned, this may cause unlimited(!)
>> queue growth. To prevent this, we should stop sending
>> packets and wait for notification callback to continue.
>>
>> I don't see any contradiction with that comment.
>>
>>> Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if
>>> calling qemu_send_packet_async is allowed after it returns 0?
>>>
>>
>> It may be wrong, but I think it's not allowed to send
>> packets after qemu_send_packet_async returns 0.
>>
>> Jason Wang, can you confirm please?
>>
>> Best Regards,
>>
>> Vladislav Yaroshchuk
>>
>>>
>>> Regards,
>>> Akihiko Odaki
>>
>>
>>
> 
> The unlimited queue growth would not happen if you stop calling
> vmnet_read after qemu_send_packet_async returns 0. So I think the
> comment should be amended to say something like:
> "Once qemu_send_packet_async returns 0, the client should stop reading
> more packets from the underlying NIC to prevent infinite growth of the
> queue until the last callback gets called."
> 
> The unique feature of vmnet is that it can read multiple packets at
> once, and I guess it is the reason why the comment in net/queue.c
> missed the case. But this is all my guess so I need confirmation from
> the maintainer.
> 
> Regards,
> Akihiko Odaki

I forgot to include Jason Wang to To for this email.
Jason Wang March 7, 2022, 3:59 a.m. UTC | #15
On Fri, Mar 4, 2022 at 12:37 PM Akihiko Odaki <akihiko.odaki@gmail.com> wrote:
>
> On 2022/03/04 10:37, Jason Wang wrote:
> > On Thu, Mar 3, 2022 at 11:43 PM Vladislav Yaroshchuk
> > <vladislav.yaroshchuk@jetbrains.com> wrote:
> >>
> >>
> >>
> >> On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote:
> >>>
> >>> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote:
> >>>>       > Not sure that only one field is enough, cause
> >>>>       > we may have two states on bh execution start:
> >>>>       > 1. There are packets in vmnet buffer s->packets_buf
> >>>>       >      that were rejected by qemu_send_async and waiting
> >>>>       >      to be sent. If this happens, we should complete sending
> >>>>       >      these waiting packets with qemu_send_async firstly,
> >>>>       >      and after that we should call vmnet_read to get
> >>>>       >      new ones and send them to QEMU;
> >>>>       > 2. There are no packets in s->packets_buf to be sent to
> >>>>       >      qemu, we only need to get new packets from vmnet
> >>>>       >      with vmnet_read and send them to QEMU
> >>>>
> >>>>      In case 1, you should just keep calling qemu_send_packet_async.
> >>>>      Actually
> >>>>      qemu_send_packet_async adds the packet to its internal queue and calls
> >>>>      the callback when it is consumed.
> >>>>
> >>>>
> >>>> I'm not sure we can keep calling qemu_send_packet_async,
> >>>> because as docs from net/queue.c says:
> >>>>
> >>>> /* [...]
> >>>>    * If a sent callback is provided to send(), the caller must handle a
> >>>>    * zero return from the delivery handler by not sending any more packets
> >>>>    * until we have invoked the callback. Only in that case will we queue
> >>>>    * the packet.
> >>>>    *
> >>>>    * If a sent callback isn't provided, we just drop the packet to avoid
> >>>>    * unbounded queueing.
> >>>>    */
> >>>>
> >>>> So after we did vmnet_read and read N packets
> >>>> into temporary s->packets_buf, we begin calling
> >>>> qemu_send_packet_async. If it returns 0 - it says
> >>>> "no more packets until sent_cb called please".
> >>>> At this moment we have N packets in s->packets_buf
> >>>> and already queued K < N of them. But, packets K..N
> >>>> are not queued and keep waiting for sent_cb to be sent
> >>>> with qemu_send_packet_async.
> >>>> Thus when sent_cb called, we should finish
> >>>> our transfer of packets K..N from s->packets_buf
> >>>> to qemu calling qemu_send_packet_async.
> >>>> I meant this.
> >>>
> >>> I missed the comment. The description is contradicting with the actual
> >>> code; qemu_net_queue_send_iov appends the packet to the queue whenever
> >>> it cannot send one immediately.
> >>>
> >>
> >> Yes, it appends, but (net/queue.c):
> >> *  qemu_net_queue_send tries to deliver the packet
> >>      immediately. If the packet cannot be delivered, the
> >>      qemu_net_queue_append is called and 0 is returned
> >>      to say the caller "the receiver is not ready, hold on";
> >> *  qemu_net_queue_append does a probe before adding
> >>      the packet to the queue:
> >>      if (queue->nq_count >= queue->nq_maxlen && !sent_cb) {
> >>          return; /* drop if queue full and no callback */
> >>      }
> >>
> >> The queue is not infinite, so we have three cases:
> >> 1. The queue is not full -> append the packet, no
> >>      problems here
> >> 2. The queue is full, no callback -> we cannot notify
> >>      a caller when we're ready, so just drop the packet
> >>      if we can't append it.
> >> 3. The queue is full, callback present -> we can notify
> >>      a caller when we are ready, so "let's queue this packet,
> >>      but expect no more (!) packets is sent until I call
> >>      sent_cb when the queue is ready"
> >>
> >> Therefore if we provide a callback and keep sending
> >> packets if 0 is returned, this may cause unlimited(!)
> >> queue growth. To prevent this, we should stop sending
> >> packets and wait for notification callback to continue.
> >
> > Right.
> >
> >>
> >> I don't see any contradiction with that comment.
> >>
> >>> Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if
> >>> calling qemu_send_packet_async is allowed after it returns 0?
> >>>
> >>
> >> It may be wrong, but I think it's not allowed to send
> >> packets after qemu_send_packet_async returns 0.
> >>
> >> Jason Wang, can you confirm please?
> >
> > With a cb, we can't do this. All users with cb will disable the source
> > polling and depend on the cb to re-read the polling.
> > (tap/l2tpv3/socket).
> >
> > Without a cb, we can. As analyzed above, qemu_net_queue_append() can
> > limit the number of packets queued in this case.
>
> vmnet can read multiple packets at once. What about such a case? Isn't
> calling qemu_send_packet_async for already read packet and stopping
> reading more fine?

It should be fine, I remember I've asked whether or not the source
could be disabled but I get the answer that it can't.

Thanks

>
> Regards,
> Akihiko Odaki
>
> >
> > Thanks
> >
> >>
> >> Best Regards,
> >>
> >> Vladislav Yaroshchuk
> >>
> >>>
> >>> Regards,
> >>> Akihiko Odaki
> >>
> >>
> >>
> >
>
Akihiko Odaki March 7, 2022, 4:25 a.m. UTC | #16
On Mon, Mar 7, 2022 at 12:59 PM Jason Wang <jasowang@redhat.com> wrote:
>
> On Fri, Mar 4, 2022 at 12:37 PM Akihiko Odaki <akihiko.odaki@gmail.com> wrote:
> >
> > On 2022/03/04 10:37, Jason Wang wrote:
> > > On Thu, Mar 3, 2022 at 11:43 PM Vladislav Yaroshchuk
> > > <vladislav.yaroshchuk@jetbrains.com> wrote:
> > >>
> > >>
> > >>
> > >> On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote:
> > >>>
> > >>> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote:
> > >>>>       > Not sure that only one field is enough, cause
> > >>>>       > we may have two states on bh execution start:
> > >>>>       > 1. There are packets in vmnet buffer s->packets_buf
> > >>>>       >      that were rejected by qemu_send_async and waiting
> > >>>>       >      to be sent. If this happens, we should complete sending
> > >>>>       >      these waiting packets with qemu_send_async firstly,
> > >>>>       >      and after that we should call vmnet_read to get
> > >>>>       >      new ones and send them to QEMU;
> > >>>>       > 2. There are no packets in s->packets_buf to be sent to
> > >>>>       >      qemu, we only need to get new packets from vmnet
> > >>>>       >      with vmnet_read and send them to QEMU
> > >>>>
> > >>>>      In case 1, you should just keep calling qemu_send_packet_async.
> > >>>>      Actually
> > >>>>      qemu_send_packet_async adds the packet to its internal queue and calls
> > >>>>      the callback when it is consumed.
> > >>>>
> > >>>>
> > >>>> I'm not sure we can keep calling qemu_send_packet_async,
> > >>>> because as docs from net/queue.c says:
> > >>>>
> > >>>> /* [...]
> > >>>>    * If a sent callback is provided to send(), the caller must handle a
> > >>>>    * zero return from the delivery handler by not sending any more packets
> > >>>>    * until we have invoked the callback. Only in that case will we queue
> > >>>>    * the packet.
> > >>>>    *
> > >>>>    * If a sent callback isn't provided, we just drop the packet to avoid
> > >>>>    * unbounded queueing.
> > >>>>    */
> > >>>>
> > >>>> So after we did vmnet_read and read N packets
> > >>>> into temporary s->packets_buf, we begin calling
> > >>>> qemu_send_packet_async. If it returns 0 - it says
> > >>>> "no more packets until sent_cb called please".
> > >>>> At this moment we have N packets in s->packets_buf
> > >>>> and already queued K < N of them. But, packets K..N
> > >>>> are not queued and keep waiting for sent_cb to be sent
> > >>>> with qemu_send_packet_async.
> > >>>> Thus when sent_cb called, we should finish
> > >>>> our transfer of packets K..N from s->packets_buf
> > >>>> to qemu calling qemu_send_packet_async.
> > >>>> I meant this.
> > >>>
> > >>> I missed the comment. The description is contradicting with the actual
> > >>> code; qemu_net_queue_send_iov appends the packet to the queue whenever
> > >>> it cannot send one immediately.
> > >>>
> > >>
> > >> Yes, it appends, but (net/queue.c):
> > >> *  qemu_net_queue_send tries to deliver the packet
> > >>      immediately. If the packet cannot be delivered, the
> > >>      qemu_net_queue_append is called and 0 is returned
> > >>      to say the caller "the receiver is not ready, hold on";
> > >> *  qemu_net_queue_append does a probe before adding
> > >>      the packet to the queue:
> > >>      if (queue->nq_count >= queue->nq_maxlen && !sent_cb) {
> > >>          return; /* drop if queue full and no callback */
> > >>      }
> > >>
> > >> The queue is not infinite, so we have three cases:
> > >> 1. The queue is not full -> append the packet, no
> > >>      problems here
> > >> 2. The queue is full, no callback -> we cannot notify
> > >>      a caller when we're ready, so just drop the packet
> > >>      if we can't append it.
> > >> 3. The queue is full, callback present -> we can notify
> > >>      a caller when we are ready, so "let's queue this packet,
> > >>      but expect no more (!) packets is sent until I call
> > >>      sent_cb when the queue is ready"
> > >>
> > >> Therefore if we provide a callback and keep sending
> > >> packets if 0 is returned, this may cause unlimited(!)
> > >> queue growth. To prevent this, we should stop sending
> > >> packets and wait for notification callback to continue.
> > >
> > > Right.
> > >
> > >>
> > >> I don't see any contradiction with that comment.
> > >>
> > >>> Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if
> > >>> calling qemu_send_packet_async is allowed after it returns 0?
> > >>>
> > >>
> > >> It may be wrong, but I think it's not allowed to send
> > >> packets after qemu_send_packet_async returns 0.
> > >>
> > >> Jason Wang, can you confirm please?
> > >
> > > With a cb, we can't do this. All users with cb will disable the source
> > > polling and depend on the cb to re-read the polling.
> > > (tap/l2tpv3/socket).
> > >
> > > Without a cb, we can. As analyzed above, qemu_net_queue_append() can
> > > limit the number of packets queued in this case.
> >
> > vmnet can read multiple packets at once. What about such a case? Isn't
> > calling qemu_send_packet_async for already read packet and stopping
> > reading more fine?
>
> It should be fine, I remember I've asked whether or not the source
> could be disabled but I get the answer that it can't.
>
> Thanks

It can, but there could be more packets before disabling the source
because vmnet reads multiple packets with one call. The procedure is:
call qemu_send_packet_async with a callback, and if it returns 0,
disable the source and keep calling qemu_send_packet_async with a
callback until all the packets already read are queued. Is this kind
of procedure allowed?

Regards,
Akihiko Odaki

>
> >
> > Regards,
> > Akihiko Odaki
> >
> > >
> > > Thanks
> > >
> > >>
> > >> Best Regards,
> > >>
> > >> Vladislav Yaroshchuk
> > >>
> > >>>
> > >>> Regards,
> > >>> Akihiko Odaki
> > >>
> > >>
> > >>
> > >
> >
>
Jason Wang March 7, 2022, 4:51 a.m. UTC | #17
On Mon, Mar 7, 2022 at 12:25 PM Akihiko Odaki <akihiko.odaki@gmail.com> wrote:
>
> On Mon, Mar 7, 2022 at 12:59 PM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Fri, Mar 4, 2022 at 12:37 PM Akihiko Odaki <akihiko.odaki@gmail.com> wrote:
> > >
> > > On 2022/03/04 10:37, Jason Wang wrote:
> > > > On Thu, Mar 3, 2022 at 11:43 PM Vladislav Yaroshchuk
> > > > <vladislav.yaroshchuk@jetbrains.com> wrote:
> > > >>
> > > >>
> > > >>
> > > >> On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote:
> > > >>>
> > > >>> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote:
> > > >>>>       > Not sure that only one field is enough, cause
> > > >>>>       > we may have two states on bh execution start:
> > > >>>>       > 1. There are packets in vmnet buffer s->packets_buf
> > > >>>>       >      that were rejected by qemu_send_async and waiting
> > > >>>>       >      to be sent. If this happens, we should complete sending
> > > >>>>       >      these waiting packets with qemu_send_async firstly,
> > > >>>>       >      and after that we should call vmnet_read to get
> > > >>>>       >      new ones and send them to QEMU;
> > > >>>>       > 2. There are no packets in s->packets_buf to be sent to
> > > >>>>       >      qemu, we only need to get new packets from vmnet
> > > >>>>       >      with vmnet_read and send them to QEMU
> > > >>>>
> > > >>>>      In case 1, you should just keep calling qemu_send_packet_async.
> > > >>>>      Actually
> > > >>>>      qemu_send_packet_async adds the packet to its internal queue and calls
> > > >>>>      the callback when it is consumed.
> > > >>>>
> > > >>>>
> > > >>>> I'm not sure we can keep calling qemu_send_packet_async,
> > > >>>> because as docs from net/queue.c says:
> > > >>>>
> > > >>>> /* [...]
> > > >>>>    * If a sent callback is provided to send(), the caller must handle a
> > > >>>>    * zero return from the delivery handler by not sending any more packets
> > > >>>>    * until we have invoked the callback. Only in that case will we queue
> > > >>>>    * the packet.
> > > >>>>    *
> > > >>>>    * If a sent callback isn't provided, we just drop the packet to avoid
> > > >>>>    * unbounded queueing.
> > > >>>>    */
> > > >>>>
> > > >>>> So after we did vmnet_read and read N packets
> > > >>>> into temporary s->packets_buf, we begin calling
> > > >>>> qemu_send_packet_async. If it returns 0 - it says
> > > >>>> "no more packets until sent_cb called please".
> > > >>>> At this moment we have N packets in s->packets_buf
> > > >>>> and already queued K < N of them. But, packets K..N
> > > >>>> are not queued and keep waiting for sent_cb to be sent
> > > >>>> with qemu_send_packet_async.
> > > >>>> Thus when sent_cb called, we should finish
> > > >>>> our transfer of packets K..N from s->packets_buf
> > > >>>> to qemu calling qemu_send_packet_async.
> > > >>>> I meant this.
> > > >>>
> > > >>> I missed the comment. The description is contradicting with the actual
> > > >>> code; qemu_net_queue_send_iov appends the packet to the queue whenever
> > > >>> it cannot send one immediately.
> > > >>>
> > > >>
> > > >> Yes, it appends, but (net/queue.c):
> > > >> *  qemu_net_queue_send tries to deliver the packet
> > > >>      immediately. If the packet cannot be delivered, the
> > > >>      qemu_net_queue_append is called and 0 is returned
> > > >>      to say the caller "the receiver is not ready, hold on";
> > > >> *  qemu_net_queue_append does a probe before adding
> > > >>      the packet to the queue:
> > > >>      if (queue->nq_count >= queue->nq_maxlen && !sent_cb) {
> > > >>          return; /* drop if queue full and no callback */
> > > >>      }
> > > >>
> > > >> The queue is not infinite, so we have three cases:
> > > >> 1. The queue is not full -> append the packet, no
> > > >>      problems here
> > > >> 2. The queue is full, no callback -> we cannot notify
> > > >>      a caller when we're ready, so just drop the packet
> > > >>      if we can't append it.
> > > >> 3. The queue is full, callback present -> we can notify
> > > >>      a caller when we are ready, so "let's queue this packet,
> > > >>      but expect no more (!) packets is sent until I call
> > > >>      sent_cb when the queue is ready"
> > > >>
> > > >> Therefore if we provide a callback and keep sending
> > > >> packets if 0 is returned, this may cause unlimited(!)
> > > >> queue growth. To prevent this, we should stop sending
> > > >> packets and wait for notification callback to continue.
> > > >
> > > > Right.
> > > >
> > > >>
> > > >> I don't see any contradiction with that comment.
> > > >>
> > > >>> Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if
> > > >>> calling qemu_send_packet_async is allowed after it returns 0?
> > > >>>
> > > >>
> > > >> It may be wrong, but I think it's not allowed to send
> > > >> packets after qemu_send_packet_async returns 0.
> > > >>
> > > >> Jason Wang, can you confirm please?
> > > >
> > > > With a cb, we can't do this. All users with cb will disable the source
> > > > polling and depend on the cb to re-read the polling.
> > > > (tap/l2tpv3/socket).
> > > >
> > > > Without a cb, we can. As analyzed above, qemu_net_queue_append() can
> > > > limit the number of packets queued in this case.
> > >
> > > vmnet can read multiple packets at once. What about such a case? Isn't
> > > calling qemu_send_packet_async for already read packet and stopping
> > > reading more fine?
> >
> > It should be fine, I remember I've asked whether or not the source
> > could be disabled but I get the answer that it can't.
> >
> > Thanks
>
> It can, but there could be more packets before disabling the source
> because vmnet reads multiple packets with one call. The procedure is:
> call qemu_send_packet_async with a callback, and if it returns 0,
> disable the source and keep calling qemu_send_packet_async with a
> callback until all the packets already read are queued. Is this kind
> of procedure allowed?

So it might have several issues if we do this:

1) there's no guarantee that the following call to
qemu_send_packet_async() will always return 0, this could happen if
the device has some rx buffer after the first call to
qemu_send_packet_asnyc(). Then we may end up with out-of order
delivery.
2) calling with a cb may suppress the queue limit check, buggy driver
may cause unlimited number of packets to be queued under heavy traffic

It looks to me we'd better store those pending buffers in the vmnet,
and when sent_cb is triggered, start from those buffers first. (Or
simply drop the packets)

Thanks


>
> Regards,
> Akihiko Odaki
>
> >
> > >
> > > Regards,
> > > Akihiko Odaki
> > >
> > > >
> > > > Thanks
> > > >
> > > >>
> > > >> Best Regards,
> > > >>
> > > >> Vladislav Yaroshchuk
> > > >>
> > > >>>
> > > >>> Regards,
> > > >>> Akihiko Odaki
> > > >>
> > > >>
> > > >>
> > > >
> > >
> >
>
Akihiko Odaki March 7, 2022, 6:54 a.m. UTC | #18
On Mon, Mar 7, 2022 at 1:52 PM Jason Wang <jasowang@redhat.com> wrote:
>
> On Mon, Mar 7, 2022 at 12:25 PM Akihiko Odaki <akihiko.odaki@gmail.com> wrote:
> >
> > On Mon, Mar 7, 2022 at 12:59 PM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Fri, Mar 4, 2022 at 12:37 PM Akihiko Odaki <akihiko.odaki@gmail.com> wrote:
> > > >
> > > > On 2022/03/04 10:37, Jason Wang wrote:
> > > > > On Thu, Mar 3, 2022 at 11:43 PM Vladislav Yaroshchuk
> > > > > <vladislav.yaroshchuk@jetbrains.com> wrote:
> > > > >>
> > > > >>
> > > > >>
> > > > >> On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote:
> > > > >>>
> > > > >>> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote:
> > > > >>>>       > Not sure that only one field is enough, cause
> > > > >>>>       > we may have two states on bh execution start:
> > > > >>>>       > 1. There are packets in vmnet buffer s->packets_buf
> > > > >>>>       >      that were rejected by qemu_send_async and waiting
> > > > >>>>       >      to be sent. If this happens, we should complete sending
> > > > >>>>       >      these waiting packets with qemu_send_async firstly,
> > > > >>>>       >      and after that we should call vmnet_read to get
> > > > >>>>       >      new ones and send them to QEMU;
> > > > >>>>       > 2. There are no packets in s->packets_buf to be sent to
> > > > >>>>       >      qemu, we only need to get new packets from vmnet
> > > > >>>>       >      with vmnet_read and send them to QEMU
> > > > >>>>
> > > > >>>>      In case 1, you should just keep calling qemu_send_packet_async.
> > > > >>>>      Actually
> > > > >>>>      qemu_send_packet_async adds the packet to its internal queue and calls
> > > > >>>>      the callback when it is consumed.
> > > > >>>>
> > > > >>>>
> > > > >>>> I'm not sure we can keep calling qemu_send_packet_async,
> > > > >>>> because as docs from net/queue.c says:
> > > > >>>>
> > > > >>>> /* [...]
> > > > >>>>    * If a sent callback is provided to send(), the caller must handle a
> > > > >>>>    * zero return from the delivery handler by not sending any more packets
> > > > >>>>    * until we have invoked the callback. Only in that case will we queue
> > > > >>>>    * the packet.
> > > > >>>>    *
> > > > >>>>    * If a sent callback isn't provided, we just drop the packet to avoid
> > > > >>>>    * unbounded queueing.
> > > > >>>>    */
> > > > >>>>
> > > > >>>> So after we did vmnet_read and read N packets
> > > > >>>> into temporary s->packets_buf, we begin calling
> > > > >>>> qemu_send_packet_async. If it returns 0 - it says
> > > > >>>> "no more packets until sent_cb called please".
> > > > >>>> At this moment we have N packets in s->packets_buf
> > > > >>>> and already queued K < N of them. But, packets K..N
> > > > >>>> are not queued and keep waiting for sent_cb to be sent
> > > > >>>> with qemu_send_packet_async.
> > > > >>>> Thus when sent_cb called, we should finish
> > > > >>>> our transfer of packets K..N from s->packets_buf
> > > > >>>> to qemu calling qemu_send_packet_async.
> > > > >>>> I meant this.
> > > > >>>
> > > > >>> I missed the comment. The description is contradicting with the actual
> > > > >>> code; qemu_net_queue_send_iov appends the packet to the queue whenever
> > > > >>> it cannot send one immediately.
> > > > >>>
> > > > >>
> > > > >> Yes, it appends, but (net/queue.c):
> > > > >> *  qemu_net_queue_send tries to deliver the packet
> > > > >>      immediately. If the packet cannot be delivered, the
> > > > >>      qemu_net_queue_append is called and 0 is returned
> > > > >>      to say the caller "the receiver is not ready, hold on";
> > > > >> *  qemu_net_queue_append does a probe before adding
> > > > >>      the packet to the queue:
> > > > >>      if (queue->nq_count >= queue->nq_maxlen && !sent_cb) {
> > > > >>          return; /* drop if queue full and no callback */
> > > > >>      }
> > > > >>
> > > > >> The queue is not infinite, so we have three cases:
> > > > >> 1. The queue is not full -> append the packet, no
> > > > >>      problems here
> > > > >> 2. The queue is full, no callback -> we cannot notify
> > > > >>      a caller when we're ready, so just drop the packet
> > > > >>      if we can't append it.
> > > > >> 3. The queue is full, callback present -> we can notify
> > > > >>      a caller when we are ready, so "let's queue this packet,
> > > > >>      but expect no more (!) packets is sent until I call
> > > > >>      sent_cb when the queue is ready"
> > > > >>
> > > > >> Therefore if we provide a callback and keep sending
> > > > >> packets if 0 is returned, this may cause unlimited(!)
> > > > >> queue growth. To prevent this, we should stop sending
> > > > >> packets and wait for notification callback to continue.
> > > > >
> > > > > Right.
> > > > >
> > > > >>
> > > > >> I don't see any contradiction with that comment.
> > > > >>
> > > > >>> Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if
> > > > >>> calling qemu_send_packet_async is allowed after it returns 0?
> > > > >>>
> > > > >>
> > > > >> It may be wrong, but I think it's not allowed to send
> > > > >> packets after qemu_send_packet_async returns 0.
> > > > >>
> > > > >> Jason Wang, can you confirm please?
> > > > >
> > > > > With a cb, we can't do this. All users with cb will disable the source
> > > > > polling and depend on the cb to re-read the polling.
> > > > > (tap/l2tpv3/socket).
> > > > >
> > > > > Without a cb, we can. As analyzed above, qemu_net_queue_append() can
> > > > > limit the number of packets queued in this case.
> > > >
> > > > vmnet can read multiple packets at once. What about such a case? Isn't
> > > > calling qemu_send_packet_async for already read packet and stopping
> > > > reading more fine?
> > >
> > > It should be fine, I remember I've asked whether or not the source
> > > could be disabled but I get the answer that it can't.
> > >
> > > Thanks
> >
> > It can, but there could be more packets before disabling the source
> > because vmnet reads multiple packets with one call. The procedure is:
> > call qemu_send_packet_async with a callback, and if it returns 0,
> > disable the source and keep calling qemu_send_packet_async with a
> > callback until all the packets already read are queued. Is this kind
> > of procedure allowed?
>
> So it might have several issues if we do this:
>
> 1) there's no guarantee that the following call to
> qemu_send_packet_async() will always return 0, this could happen if
> the device has some rx buffer after the first call to
> qemu_send_packet_asnyc(). Then we may end up with out-of order
> delivery.
> 2) calling with a cb may suppress the queue limit check, buggy driver
> may cause unlimited number of packets to be queued under heavy traffic
>
> It looks to me we'd better store those pending buffers in the vmnet,
> and when sent_cb is triggered, start from those buffers first. (Or
> simply drop the packets)

Thanks for the explanation. Vladislav had an idea of such an
implementation so it would be the option we would choose. Let me
review the design again.

On 2022/02/28 20:59, Vladislav Yaroshchuk wrote:
> It can be done kinda this way:
> ```
> struct s:
>      send_bh:                    bh
>      packets_buf:              array[packet]
>      ## Three new fields
>      send_enabled:           bool

send_enabled can be eliminated. When it is enabled, packets_send_pos
and packets_batch_size must be equal. They must not be equal
otherwise. packets_send_pos must represent the position of the packet
which is not sent yet, possibly in the process of sending.
vmnet_send_completed must call qemu_send_wrapper before scheduling
send_bh. bh_send should do nothing if s->packets_send_pos <
s->packets_batch_size.

>      packets_send_pos:    int
>      packets_batch_size:  int

The name packets_batch_size lacks the context that it is used for
sending. The relation between the two members is not obvious either.
Perhaps they could be named packets_send_current_pos and
packets_send_end_pos.

>
> fun bh_send(s):
>      ## Send disabled - do nothing
>      if not s->send_enabled:
>          return
>      ## If some packets are waiting to be sent in
>      ## s->packets_buf - send them
>      if s->packets_send_pos < s->packets_batch_size:
>          if not qemu_send_wrapper(s):
>              return
>
>      ## Try to read new packets from vmnet
>      s->packets_send_pos = 0
>      s->packets_batch_size = 0
>      s->packets_buf = vmnet_read(&s->packets_batch_size)
>      if s->packets_batch_size > 0:
>          # If something received - process them
>          qemu_send_wrapper(s)
> fun qemu_send_wrapper(s):
>      for i in s->packets_send_pos to s->packets_batch_size:
>          size = qemu_send_async(s->packets_buf[i],
>                                                    vmnet_send_completed)
>          if size == 0:
>              ## Disable packets processing until vmnet_send_completed
>              ## Save the state: packets to be sent now in s->packets_buf
>              ## in range s->packets_send_pos..s->packets_batch_size
>              s->send_enabled = false
>              s->packets_send_pos = i + 1
>              break
>          if size < 0:
>              ## On error just drop the packets
>              s->packets_send_pos = 0
>              s->packets_batch_size = 0

We could ignore just one packet. Although it is unlikely, something
specific to the packet may be wrong and other packets may be fine.

>              break
>
>       return s->send_enabled
>
>
> fun vmnet_send_completed(s):
>      ## Complete sending packets from s->packets_buf
>      s->send_enabled = true
>      qemu_bh_schedule(s->send_bh)
>
> ## From callback we only scheduling the bh
> vmnet.set_callback(callback = s: qemu_bh_schedule(s->send_bh))
> ```
>
> I think a simpler implementation may exist, but currently
> I see only this approach with the send_enabled flag and
> two more fields to save packets sending state.

Aside from the send_enabled flag, I agree it is the simplest.

Regards,
Akihiko Odaki
diff mbox series

Patch

diff --git a/net/vmnet-common.m b/net/vmnet-common.m
index 56612c72ce..2f70921cae 100644
--- a/net/vmnet-common.m
+++ b/net/vmnet-common.m
@@ -10,6 +10,8 @@ 
  */
 
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/log.h"
 #include "qapi/qapi-types-net.h"
 #include "vmnet_int.h"
 #include "clients.h"
@@ -17,4 +19,304 @@ 
 #include "qapi/error.h"
 
 #include <vmnet/vmnet.h>
+#include <dispatch/dispatch.h>
 
+
+static inline void vmnet_set_send_bh_scheduled(VmnetCommonState *s,
+                                               bool enable)
+{
+    qatomic_set(&s->send_scheduled, enable);
+}
+
+
+static inline bool vmnet_is_send_bh_scheduled(VmnetCommonState *s)
+{
+    return qatomic_load_acquire(&s->send_scheduled);
+}
+
+
+static inline void vmnet_set_send_enabled(VmnetCommonState *s,
+                                          bool enable)
+{
+    if (enable) {
+        vmnet_interface_set_event_callback(
+            s->vmnet_if,
+            VMNET_INTERFACE_PACKETS_AVAILABLE,
+            s->if_queue,
+            ^(interface_event_t event_id, xpc_object_t event) {
+                assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE);
+                /*
+                 * This function is being called from a non qemu thread, so
+                 * we only schedule a BH, and do the rest of the io completion
+                 * handling from vmnet_send_bh() which runs in a qemu context.
+                 *
+                 * Avoid scheduling multiple bottom halves
+                 */
+                if (!vmnet_is_send_bh_scheduled(s)) {
+                    vmnet_set_send_bh_scheduled(s, true);
+                    qemu_bh_schedule(s->send_bh);
+                }
+            });
+    } else {
+        vmnet_interface_set_event_callback(
+            s->vmnet_if,
+            VMNET_INTERFACE_PACKETS_AVAILABLE,
+            NULL,
+            NULL);
+    }
+}
+
+
+static void vmnet_send_completed(NetClientState *nc, ssize_t len)
+{
+    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
+    vmnet_set_send_enabled(s, true);
+}
+
+
+static void vmnet_send_bh(void *opaque)
+{
+    NetClientState *nc = (NetClientState *) opaque;
+    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
+
+    struct iovec *iov = s->iov_buf;
+    struct vmpktdesc *packets = s->packets_buf;
+    int pkt_cnt;
+    int i;
+
+    vmnet_return_t status;
+    ssize_t size;
+
+    /* read as many packets as present */
+    pkt_cnt = VMNET_PACKETS_LIMIT;
+    for (i = 0; i < pkt_cnt; ++i) {
+        packets[i].vm_pkt_size = s->max_packet_size;
+        packets[i].vm_pkt_iovcnt = 1;
+        packets[i].vm_flags = 0;
+    }
+
+    status = vmnet_read(s->vmnet_if, packets, &pkt_cnt);
+    if (status != VMNET_SUCCESS) {
+        error_printf("vmnet: read failed: %s\n",
+                     vmnet_status_map_str(status));
+        goto done;
+    }
+
+    for (i = 0; i < pkt_cnt; ++i) {
+        size = qemu_send_packet_async(nc,
+                                      iov[i].iov_base,
+                                      packets[i].vm_pkt_size,
+                                      vmnet_send_completed);
+        if (size == 0) {
+            vmnet_set_send_enabled(s, false);
+            goto done;
+        } else if (size < 0) {
+            break;
+        }
+    }
+
+done:
+    vmnet_set_send_bh_scheduled(s, false);
+}
+
+
+static void vmnet_bufs_init(VmnetCommonState *s)
+{
+    struct vmpktdesc *packets = s->packets_buf;
+    struct iovec *iov = s->iov_buf;
+    int i;
+
+    for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
+        iov[i].iov_len = s->max_packet_size;
+        iov[i].iov_base = g_malloc0(iov[i].iov_len);
+        packets[i].vm_pkt_iov = iov + i;
+    }
+}
+
+
+const char *vmnet_status_map_str(vmnet_return_t status)
+{
+    switch (status) {
+    case VMNET_SUCCESS:
+        return "success";
+    case VMNET_FAILURE:
+        return "general failure (possibly not enough privileges)";
+    case VMNET_MEM_FAILURE:
+        return "memory allocation failure";
+    case VMNET_INVALID_ARGUMENT:
+        return "invalid argument specified";
+    case VMNET_SETUP_INCOMPLETE:
+        return "interface setup is not complete";
+    case VMNET_INVALID_ACCESS:
+        return "invalid access, permission denied";
+    case VMNET_PACKET_TOO_BIG:
+        return "packet size is larger than MTU";
+    case VMNET_BUFFER_EXHAUSTED:
+        return "buffers exhausted in kernel";
+    case VMNET_TOO_MANY_PACKETS:
+        return "packet count exceeds limit";
+#if defined(MAC_OS_VERSION_11_0) && \
+    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
+    case VMNET_SHARING_SERVICE_BUSY:
+        return "conflict, sharing service is in use";
+#endif
+    default:
+        return "unknown vmnet error";
+    }
+}
+
+
+int vmnet_if_create(NetClientState *nc,
+                    xpc_object_t if_desc,
+                    Error **errp)
+{
+    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);;
+    dispatch_semaphore_t if_created_sem = dispatch_semaphore_create(0);
+    __block vmnet_return_t if_status;
+
+    s->if_queue = dispatch_queue_create(
+        "org.qemu.vmnet.if_queue",
+        DISPATCH_QUEUE_SERIAL
+    );
+
+    xpc_dictionary_set_bool(
+        if_desc,
+        vmnet_allocate_mac_address_key,
+        false
+    );
+#ifdef DEBUG
+    qemu_log("vmnet.start.interface_desc:\n");
+    xpc_dictionary_apply(if_desc,
+                         ^bool(const char *k, xpc_object_t v) {
+                             char *desc = xpc_copy_description(v);
+                             qemu_log("  %s=%s\n", k, desc);
+                             free(desc);
+                             return true;
+                         });
+#endif /* DEBUG */
+
+    s->vmnet_if = vmnet_start_interface(
+        if_desc,
+        s->if_queue,
+        ^(vmnet_return_t status, xpc_object_t interface_param) {
+            if_status = status;
+            if (status != VMNET_SUCCESS || !interface_param) {
+                dispatch_semaphore_signal(if_created_sem);
+                return;
+            }
+
+#ifdef DEBUG
+            qemu_log("vmnet.start.interface_param:\n");
+            xpc_dictionary_apply(interface_param,
+                                 ^bool(const char *k, xpc_object_t v) {
+                                     char *desc = xpc_copy_description(v);
+                                     qemu_log("  %s=%s\n", k, desc);
+                                     free(desc);
+                                     return true;
+                                 });
+#endif /* DEBUG */
+
+            s->mtu = xpc_dictionary_get_uint64(
+                interface_param,
+                vmnet_mtu_key);
+            s->max_packet_size = xpc_dictionary_get_uint64(
+                interface_param,
+                vmnet_max_packet_size_key);
+
+            dispatch_semaphore_signal(if_created_sem);
+        });
+
+    if (s->vmnet_if == NULL) {
+        error_setg(errp,
+                   "unable to create interface "
+                   "with requested params");
+        return -1;
+    }
+
+    dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER);
+
+    if (if_status != VMNET_SUCCESS) {
+        error_setg(errp,
+                   "cannot create vmnet interface: %s",
+                   vmnet_status_map_str(if_status));
+        return -1;
+    }
+
+    s->send_bh = aio_bh_new(qemu_get_aio_context(), vmnet_send_bh, nc);
+    vmnet_bufs_init(s);
+    vmnet_set_send_bh_scheduled(s, false);
+    vmnet_set_send_enabled(s, true);
+    return 0;
+}
+
+
+ssize_t vmnet_receive_common(NetClientState *nc,
+                             const uint8_t *buf,
+                             size_t size)
+{
+    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
+    struct vmpktdesc packet;
+    struct iovec iov;
+    int pkt_cnt;
+    vmnet_return_t if_status;
+
+    if (size > s->max_packet_size) {
+        warn_report("vmnet: packet is too big, %zu > %llu\n",
+                    packet.vm_pkt_size,
+                    s->max_packet_size);
+        return -1;
+    }
+
+    iov.iov_base = (char *) buf;
+    iov.iov_len = size;
+
+    packet.vm_pkt_iovcnt = 1;
+    packet.vm_flags = 0;
+    packet.vm_pkt_size = size;
+    packet.vm_pkt_iov = &iov;
+    pkt_cnt = 1;
+
+    if_status = vmnet_write(s->vmnet_if, &packet, &pkt_cnt);
+    if (if_status != VMNET_SUCCESS) {
+        error_report("vmnet: write error: %s\n",
+                     vmnet_status_map_str(if_status));
+    }
+
+    if (if_status == VMNET_SUCCESS && pkt_cnt) {
+        return size;
+    }
+    return 0;
+}
+
+
+void vmnet_cleanup_common(NetClientState *nc)
+{
+    VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);;
+    dispatch_semaphore_t if_created_sem;
+
+    qemu_purge_queued_packets(nc);
+    vmnet_set_send_bh_scheduled(s, true);
+    vmnet_set_send_enabled(s, false);
+
+    if (s->vmnet_if == NULL) {
+        return;
+    }
+
+    if_created_sem = dispatch_semaphore_create(0);
+    vmnet_stop_interface(
+        s->vmnet_if,
+        s->if_queue,
+        ^(vmnet_return_t status) {
+            assert(status == VMNET_SUCCESS);
+            dispatch_semaphore_signal(if_created_sem);
+        });
+    dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER);
+
+    qemu_bh_delete(s->send_bh);
+    dispatch_release(if_created_sem);
+    dispatch_release(s->if_queue);
+
+    for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
+        g_free(s->iov_buf[i].iov_base);
+    }
+}
diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c
index f07afaaf21..66f66c034b 100644
--- a/net/vmnet-shared.c
+++ b/net/vmnet-shared.c
@@ -10,16 +10,102 @@ 
 
 #include "qemu/osdep.h"
 #include "qapi/qapi-types-net.h"
+#include "qapi/error.h"
 #include "vmnet_int.h"
 #include "clients.h"
-#include "qemu/error-report.h"
-#include "qapi/error.h"
 
 #include <vmnet/vmnet.h>
 
+typedef struct VmnetSharedState {
+    VmnetCommonState cs;
+} VmnetSharedState;
+
+
+static bool validate_options(const Netdev *netdev, Error **errp)
+{
+    const NetdevVmnetSharedOptions *options = &(netdev->u.vmnet_shared);
+
+#if !defined(MAC_OS_VERSION_11_0) || \
+    MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0
+    if (options->has_isolated) {
+        error_setg(errp,
+                   "vmnet-shared.isolated feature is "
+                   "unavailable: outdated vmnet.framework API");
+        return false;
+    }
+#endif
+
+    if ((options->has_start_address ||
+         options->has_end_address ||
+         options->has_subnet_mask) &&
+        !(options->has_start_address &&
+          options->has_end_address &&
+          options->has_subnet_mask)) {
+        error_setg(errp,
+                   "'start-address', 'end-address', 'subnet-mask' "
+                   "should be provided together"
+        );
+        return false;
+    }
+
+    return true;
+}
+
+static xpc_object_t build_if_desc(const Netdev *netdev)
+{
+    const NetdevVmnetSharedOptions *options = &(netdev->u.vmnet_shared);
+    xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, 0);
+
+    xpc_dictionary_set_uint64(
+        if_desc,
+        vmnet_operation_mode_key,
+        VMNET_SHARED_MODE
+    );
+
+    if (options->has_nat66_prefix) {
+        xpc_dictionary_set_string(if_desc,
+                                  vmnet_nat66_prefix_key,
+                                  options->nat66_prefix);
+    }
+
+    if (options->has_start_address) {
+        xpc_dictionary_set_string(if_desc,
+                                  vmnet_start_address_key,
+                                  options->start_address);
+        xpc_dictionary_set_string(if_desc,
+                                  vmnet_end_address_key,
+                                  options->end_address);
+        xpc_dictionary_set_string(if_desc,
+                                  vmnet_subnet_mask_key,
+                                  options->subnet_mask);
+    }
+
+#if defined(MAC_OS_VERSION_11_0) && \
+    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
+    xpc_dictionary_set_bool(
+        if_desc,
+        vmnet_enable_isolation_key,
+        options->isolated
+    );
+#endif
+
+    return if_desc;
+}
+
+static NetClientInfo net_vmnet_shared_info = {
+    .type = NET_CLIENT_DRIVER_VMNET_SHARED,
+    .size = sizeof(VmnetSharedState),
+    .receive = vmnet_receive_common,
+    .cleanup = vmnet_cleanup_common,
+};
+
 int net_init_vmnet_shared(const Netdev *netdev, const char *name,
                           NetClientState *peer, Error **errp)
 {
-  error_setg(errp, "vmnet-shared is not implemented yet");
-  return -1;
+    NetClientState *nc = qemu_new_net_client(&net_vmnet_shared_info,
+                                             peer, "vmnet-shared", name);
+    if (!validate_options(netdev, errp)) {
+        g_assert_not_reached();
+    }
+    return vmnet_if_create(nc, build_if_desc(netdev), errp);
 }
diff --git a/net/vmnet_int.h b/net/vmnet_int.h
index aac4d5af64..acfe3a88c0 100644
--- a/net/vmnet_int.h
+++ b/net/vmnet_int.h
@@ -15,11 +15,48 @@ 
 #include "clients.h"
 
 #include <vmnet/vmnet.h>
+#include <dispatch/dispatch.h>
+
+/**
+ *  From vmnet.framework documentation
+ *
+ *  Each read/write call allows up to 200 packets to be
+ *  read or written for a maximum of 256KB.
+ *
+ *  Each packet written should be a complete
+ *  ethernet frame.
+ *
+ *  https://developer.apple.com/documentation/vmnet
+ */
+#define VMNET_PACKETS_LIMIT 200
 
 typedef struct VmnetCommonState {
-  NetClientState nc;
+    NetClientState nc;
+    interface_ref vmnet_if;
+
+    bool send_scheduled;
 
+    uint64_t mtu;
+    uint64_t max_packet_size;
+
+    struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT];
+    struct iovec iov_buf[VMNET_PACKETS_LIMIT];
+
+    dispatch_queue_t if_queue;
+
+    QEMUBH *send_bh;
 } VmnetCommonState;
 
+const char *vmnet_status_map_str(vmnet_return_t status);
+
+int vmnet_if_create(NetClientState *nc,
+                    xpc_object_t if_desc,
+                    Error **errp);
+
+ssize_t vmnet_receive_common(NetClientState *nc,
+                             const uint8_t *buf,
+                             size_t size);
+
+void vmnet_cleanup_common(NetClientState *nc);
 
 #endif /* VMNET_INT_H */