Message ID | 20220225171402.64861-4-Vladislav.Yaroshchuk@jetbrains.com |
---|---|
State | New |
Headers | show |
Series | Add vmnet.framework based network backend | expand |
On 2022/02/26 2:13, Vladislav Yaroshchuk wrote: > Interaction with vmnet.framework in different modes > differs only on configuration stage, so we can create > common `send`, `receive`, etc. procedures and reuse them. > > vmnet.framework supports iov, but writing more than > one iov into vmnet interface fails with > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into > one and passing it to vmnet works fine. That's the > reason why receive_iov() left unimplemented. But it still > works with good enough performance having .receive() > net/vmnet: implement shared mode (vmnet-shared) > > Interaction with vmnet.framework in different modes > differs only on configuration stage, so we can create > common `send`, `receive`, etc. procedures and reuse them. > > vmnet.framework supports iov, but writing more than > one iov into vmnet interface fails with > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into > one and passing it to vmnet works fine. That's the > reason why receive_iov() left unimplemented. But it still > works with good enough performance having .receive() > implemented only. > > Also, there is no way to unsubscribe from vmnet packages > receiving except registering and unregistering event > callback or simply drop packages just ignoring and > not processing them when related flag is set. Here we do > using the second way. > > Signed-off-by: Phillip Tennen <phillip@axleos.com> > Signed-off-by: Vladislav Yaroshchuk <Vladislav.Yaroshchuk@jetbrains.com> Thank you for persistently working on this. > --- > net/vmnet-common.m | 302 +++++++++++++++++++++++++++++++++++++++++++++ > net/vmnet-shared.c | 94 +++++++++++++- > net/vmnet_int.h | 39 +++++- > 3 files changed, 430 insertions(+), 5 deletions(-) > > diff --git a/net/vmnet-common.m b/net/vmnet-common.m > index 56612c72ce..2f70921cae 100644 > --- a/net/vmnet-common.m > +++ b/net/vmnet-common.m > @@ -10,6 +10,8 @@ > */ > > #include "qemu/osdep.h" > +#include "qemu/main-loop.h" > +#include "qemu/log.h" > #include "qapi/qapi-types-net.h" > #include "vmnet_int.h" > #include "clients.h" > @@ -17,4 +19,304 @@ > #include "qapi/error.h" > > #include <vmnet/vmnet.h> > +#include <dispatch/dispatch.h> > > + > +static inline void vmnet_set_send_bh_scheduled(VmnetCommonState *s, > + bool enable) > +{ > + qatomic_set(&s->send_scheduled, enable); > +} > + > + > +static inline bool vmnet_is_send_bh_scheduled(VmnetCommonState *s) > +{ > + return qatomic_load_acquire(&s->send_scheduled); > +} > + > + > +static inline void vmnet_set_send_enabled(VmnetCommonState *s, > + bool enable) > +{ > + if (enable) { > + vmnet_interface_set_event_callback( > + s->vmnet_if, > + VMNET_INTERFACE_PACKETS_AVAILABLE, > + s->if_queue, > + ^(interface_event_t event_id, xpc_object_t event) { > + assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE); > + /* > + * This function is being called from a non qemu thread, so > + * we only schedule a BH, and do the rest of the io completion > + * handling from vmnet_send_bh() which runs in a qemu context. > + * > + * Avoid scheduling multiple bottom halves > + */ > + if (!vmnet_is_send_bh_scheduled(s)) { > + vmnet_set_send_bh_scheduled(s, true); It can be interrupted between vmnet_is_send_bh_scheduled and vmnet_set_send_bh_scheduled, which leads to data race. > + qemu_bh_schedule(s->send_bh); > + } > + }); > + } else { > + vmnet_interface_set_event_callback( > + s->vmnet_if, > + VMNET_INTERFACE_PACKETS_AVAILABLE, > + NULL, > + NULL); > + } > +} > + > + > +static void vmnet_send_completed(NetClientState *nc, ssize_t len) > +{ > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); > + vmnet_set_send_enabled(s, true); > +} > + > + > +static void vmnet_send_bh(void *opaque) > +{ > + NetClientState *nc = (NetClientState *) opaque; > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); > + > + struct iovec *iov = s->iov_buf; > + struct vmpktdesc *packets = s->packets_buf; > + int pkt_cnt; > + int i; > + > + vmnet_return_t status; > + ssize_t size; > + > + /* read as many packets as present */ > + pkt_cnt = VMNET_PACKETS_LIMIT; There could be more than VMNET_PACKETS_LIMIT. You may call vmnet_read in a loop. > + for (i = 0; i < pkt_cnt; ++i) { > + packets[i].vm_pkt_size = s->max_packet_size; > + packets[i].vm_pkt_iovcnt = 1; > + packets[i].vm_flags = 0; > + } > + > + status = vmnet_read(s->vmnet_if, packets, &pkt_cnt); > + if (status != VMNET_SUCCESS) { > + error_printf("vmnet: read failed: %s\n", > + vmnet_status_map_str(status)); > + goto done; > + } > + > + for (i = 0; i < pkt_cnt; ++i) { > + size = qemu_send_packet_async(nc, > + iov[i].iov_base, > + packets[i].vm_pkt_size, > + vmnet_send_completed); > + if (size == 0) { > + vmnet_set_send_enabled(s, false); > + goto done; > + } else if (size < 0) { > + break; > + } goto is not needed here. "break" when size <= 0. > + } > + > +done: > + vmnet_set_send_bh_scheduled(s, false); > +} > + > + > +static void vmnet_bufs_init(VmnetCommonState *s) > +{ > + struct vmpktdesc *packets = s->packets_buf; > + struct iovec *iov = s->iov_buf; > + int i; > + > + for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) { > + iov[i].iov_len = s->max_packet_size; > + iov[i].iov_base = g_malloc0(iov[i].iov_len); > + packets[i].vm_pkt_iov = iov + i; > + } > +} > + > + > +const char *vmnet_status_map_str(vmnet_return_t status) > +{ > + switch (status) { > + case VMNET_SUCCESS: > + return "success"; > + case VMNET_FAILURE: > + return "general failure (possibly not enough privileges)"; > + case VMNET_MEM_FAILURE: > + return "memory allocation failure"; > + case VMNET_INVALID_ARGUMENT: > + return "invalid argument specified"; > + case VMNET_SETUP_INCOMPLETE: > + return "interface setup is not complete"; > + case VMNET_INVALID_ACCESS: > + return "invalid access, permission denied"; > + case VMNET_PACKET_TOO_BIG: > + return "packet size is larger than MTU"; > + case VMNET_BUFFER_EXHAUSTED: > + return "buffers exhausted in kernel"; > + case VMNET_TOO_MANY_PACKETS: > + return "packet count exceeds limit"; > +#if defined(MAC_OS_VERSION_11_0) && \ > + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 > + case VMNET_SHARING_SERVICE_BUSY: > + return "conflict, sharing service is in use"; > +#endif > + default: > + return "unknown vmnet error"; > + } > +} > + > + > +int vmnet_if_create(NetClientState *nc, > + xpc_object_t if_desc, > + Error **errp) > +{ > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);; Duplicate semicolons. > + dispatch_semaphore_t if_created_sem = dispatch_semaphore_create(0); if_created_sem leaks. > + __block vmnet_return_t if_status; > + > + s->if_queue = dispatch_queue_create( > + "org.qemu.vmnet.if_queue", > + DISPATCH_QUEUE_SERIAL > + ); > + > + xpc_dictionary_set_bool( > + if_desc, > + vmnet_allocate_mac_address_key, > + false > + ); > +#ifdef DEBUG > + qemu_log("vmnet.start.interface_desc:\n"); > + xpc_dictionary_apply(if_desc, > + ^bool(const char *k, xpc_object_t v) { > + char *desc = xpc_copy_description(v); > + qemu_log(" %s=%s\n", k, desc); > + free(desc); > + return true; > + }); > +#endif /* DEBUG */ > + > + s->vmnet_if = vmnet_start_interface( > + if_desc, > + s->if_queue, > + ^(vmnet_return_t status, xpc_object_t interface_param) { > + if_status = status; > + if (status != VMNET_SUCCESS || !interface_param) { > + dispatch_semaphore_signal(if_created_sem); > + return; > + } > + > +#ifdef DEBUG > + qemu_log("vmnet.start.interface_param:\n"); > + xpc_dictionary_apply(interface_param, > + ^bool(const char *k, xpc_object_t v) { > + char *desc = xpc_copy_description(v); > + qemu_log(" %s=%s\n", k, desc); > + free(desc); > + return true; > + }); > +#endif /* DEBUG */ > + > + s->mtu = xpc_dictionary_get_uint64( > + interface_param, > + vmnet_mtu_key); > + s->max_packet_size = xpc_dictionary_get_uint64( > + interface_param, > + vmnet_max_packet_size_key); > + > + dispatch_semaphore_signal(if_created_sem); > + }); > + > + if (s->vmnet_if == NULL) { > + error_setg(errp, > + "unable to create interface " > + "with requested params"); You don't need line breaks here. Breaking a string into a few would also makes it a bit hard to grep. > + return -1; s->if_queue leaks. > + } > + > + dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER); > + > + if (if_status != VMNET_SUCCESS) { > + error_setg(errp, > + "cannot create vmnet interface: %s", > + vmnet_status_map_str(if_status)); > + return -1; > + } > + > + s->send_bh = aio_bh_new(qemu_get_aio_context(), vmnet_send_bh, nc); > + vmnet_bufs_init(s); > + vmnet_set_send_bh_scheduled(s, false); > + vmnet_set_send_enabled(s, true); > + return 0; > +} > + > + > +ssize_t vmnet_receive_common(NetClientState *nc, > + const uint8_t *buf, > + size_t size) > +{ > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); > + struct vmpktdesc packet; > + struct iovec iov; > + int pkt_cnt; > + vmnet_return_t if_status; > + > + if (size > s->max_packet_size) { > + warn_report("vmnet: packet is too big, %zu > %llu\n", Use PRIu64. > + packet.vm_pkt_size, > + s->max_packet_size); > + return -1; > + } > + > + iov.iov_base = (char *) buf; > + iov.iov_len = size; > + > + packet.vm_pkt_iovcnt = 1; > + packet.vm_flags = 0; > + packet.vm_pkt_size = size; > + packet.vm_pkt_iov = &iov; > + pkt_cnt = 1; > + > + if_status = vmnet_write(s->vmnet_if, &packet, &pkt_cnt); > + if (if_status != VMNET_SUCCESS) { > + error_report("vmnet: write error: %s\n", > + vmnet_status_map_str(if_status)); Why don't return -1? > + } > + > + if (if_status == VMNET_SUCCESS && pkt_cnt) { > + return size; > + } > + return 0; > +} > + > + > +void vmnet_cleanup_common(NetClientState *nc) > +{ > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);; Duplicate semicolons. > + dispatch_semaphore_t if_created_sem; > + > + qemu_purge_queued_packets(nc); > + vmnet_set_send_bh_scheduled(s, true); > + vmnet_set_send_enabled(s, false); > + > + if (s->vmnet_if == NULL) { > + return; > + } > + > + if_created_sem = dispatch_semaphore_create(0); > + vmnet_stop_interface( > + s->vmnet_if, > + s->if_queue, > + ^(vmnet_return_t status) { > + assert(status == VMNET_SUCCESS); > + dispatch_semaphore_signal(if_created_sem); > + }); > + dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER); > + > + qemu_bh_delete(s->send_bh); > + dispatch_release(if_created_sem); > + dispatch_release(s->if_queue); > + > + for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) { > + g_free(s->iov_buf[i].iov_base); > + } > +} > diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c > index f07afaaf21..66f66c034b 100644 > --- a/net/vmnet-shared.c > +++ b/net/vmnet-shared.c > @@ -10,16 +10,102 @@ > > #include "qemu/osdep.h" > #include "qapi/qapi-types-net.h" > +#include "qapi/error.h" > #include "vmnet_int.h" > #include "clients.h" > -#include "qemu/error-report.h" > -#include "qapi/error.h" > > #include <vmnet/vmnet.h> > > +typedef struct VmnetSharedState { > + VmnetCommonState cs; > +} VmnetSharedState; > + > + > +static bool validate_options(const Netdev *netdev, Error **errp) > +{ > + const NetdevVmnetSharedOptions *options = &(netdev->u.vmnet_shared); > + > +#if !defined(MAC_OS_VERSION_11_0) || \ > + MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0 > + if (options->has_isolated) { > + error_setg(errp, > + "vmnet-shared.isolated feature is " > + "unavailable: outdated vmnet.framework API"); > + return false; > + } > +#endif > + > + if ((options->has_start_address || > + options->has_end_address || > + options->has_subnet_mask) && > + !(options->has_start_address && > + options->has_end_address && > + options->has_subnet_mask)) { > + error_setg(errp, > + "'start-address', 'end-address', 'subnet-mask' " > + "should be provided together" > + ); > + return false; > + } > + > + return true; > +} > + > +static xpc_object_t build_if_desc(const Netdev *netdev) > +{ > + const NetdevVmnetSharedOptions *options = &(netdev->u.vmnet_shared); > + xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, 0); > + > + xpc_dictionary_set_uint64( > + if_desc, > + vmnet_operation_mode_key, > + VMNET_SHARED_MODE > + ); > + > + if (options->has_nat66_prefix) { > + xpc_dictionary_set_string(if_desc, > + vmnet_nat66_prefix_key, > + options->nat66_prefix); > + } > + > + if (options->has_start_address) { > + xpc_dictionary_set_string(if_desc, > + vmnet_start_address_key, > + options->start_address); > + xpc_dictionary_set_string(if_desc, > + vmnet_end_address_key, > + options->end_address); > + xpc_dictionary_set_string(if_desc, > + vmnet_subnet_mask_key, > + options->subnet_mask); > + } > + > +#if defined(MAC_OS_VERSION_11_0) && \ > + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 > + xpc_dictionary_set_bool( > + if_desc, > + vmnet_enable_isolation_key, > + options->isolated > + ); > +#endif > + > + return if_desc; > +} > + > +static NetClientInfo net_vmnet_shared_info = { > + .type = NET_CLIENT_DRIVER_VMNET_SHARED, > + .size = sizeof(VmnetSharedState), > + .receive = vmnet_receive_common, > + .cleanup = vmnet_cleanup_common, > +}; > + > int net_init_vmnet_shared(const Netdev *netdev, const char *name, > NetClientState *peer, Error **errp) > { > - error_setg(errp, "vmnet-shared is not implemented yet"); > - return -1; > + NetClientState *nc = qemu_new_net_client(&net_vmnet_shared_info, > + peer, "vmnet-shared", name); > + if (!validate_options(netdev, errp)) { > + g_assert_not_reached(); g_assert_not_reached is for debugging purpose and may be dropped depending on the build option. > + } > + return vmnet_if_create(nc, build_if_desc(netdev), errp); > } > diff --git a/net/vmnet_int.h b/net/vmnet_int.h > index aac4d5af64..acfe3a88c0 100644 > --- a/net/vmnet_int.h > +++ b/net/vmnet_int.h > @@ -15,11 +15,48 @@ > #include "clients.h" > > #include <vmnet/vmnet.h> > +#include <dispatch/dispatch.h> > + > +/** > + * From vmnet.framework documentation > + * > + * Each read/write call allows up to 200 packets to be > + * read or written for a maximum of 256KB. > + * > + * Each packet written should be a complete > + * ethernet frame. > + * > + * https://developer.apple.com/documentation/vmnet > + */ > +#define VMNET_PACKETS_LIMIT 200 > > typedef struct VmnetCommonState { > - NetClientState nc; > + NetClientState nc; > + interface_ref vmnet_if; > + > + bool send_scheduled; > > + uint64_t mtu; > + uint64_t max_packet_size; > + > + struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT]; > + struct iovec iov_buf[VMNET_PACKETS_LIMIT]; > + > + dispatch_queue_t if_queue; > + > + QEMUBH *send_bh; > } VmnetCommonState; > > +const char *vmnet_status_map_str(vmnet_return_t status); > + > +int vmnet_if_create(NetClientState *nc, > + xpc_object_t if_desc, > + Error **errp); > + > +ssize_t vmnet_receive_common(NetClientState *nc, > + const uint8_t *buf, > + size_t size); > + > +void vmnet_cleanup_common(NetClientState *nc); > > #endif /* VMNET_INT_H */
Hi Akihiko, On Fri, Feb 25, 2022 at 8:46 PM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: > On 2022/02/26 2:13, Vladislav Yaroshchuk wrote: > > Interaction with vmnet.framework in different modes > > differs only on configuration stage, so we can create > > common `send`, `receive`, etc. procedures and reuse them. > > > > vmnet.framework supports iov, but writing more than > > one iov into vmnet interface fails with > > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into > > one and passing it to vmnet works fine. That's the > > reason why receive_iov() left unimplemented. But it still > > works with good enough performance having .receive() > > net/vmnet: implement shared mode (vmnet-shared) > > > > Interaction with vmnet.framework in different modes > > differs only on configuration stage, so we can create > > common `send`, `receive`, etc. procedures and reuse them. > > > > vmnet.framework supports iov, but writing more than > > one iov into vmnet interface fails with > > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into > > one and passing it to vmnet works fine. That's the > > reason why receive_iov() left unimplemented. But it still > > works with good enough performance having .receive() > > implemented only. > > > > Also, there is no way to unsubscribe from vmnet packages > > receiving except registering and unregistering event > > callback or simply drop packages just ignoring and > > not processing them when related flag is set. Here we do > > using the second way. > > > > Signed-off-by: Phillip Tennen <phillip@axleos.com> > > Signed-off-by: Vladislav Yaroshchuk <Vladislav.Yaroshchuk@jetbrains.com> > > Thank you for persistently working on this. > > > --- > > net/vmnet-common.m | 302 +++++++++++++++++++++++++++++++++++++++++++++ > > net/vmnet-shared.c | 94 +++++++++++++- > > net/vmnet_int.h | 39 +++++- > > 3 files changed, 430 insertions(+), 5 deletions(-) > > > > diff --git a/net/vmnet-common.m b/net/vmnet-common.m > > index 56612c72ce..2f70921cae 100644 > > --- a/net/vmnet-common.m > > +++ b/net/vmnet-common.m > > @@ -10,6 +10,8 @@ > > */ > > > > #include "qemu/osdep.h" > > +#include "qemu/main-loop.h" > > +#include "qemu/log.h" > > #include "qapi/qapi-types-net.h" > > #include "vmnet_int.h" > > #include "clients.h" > > @@ -17,4 +19,304 @@ > > #include "qapi/error.h" > > > > #include <vmnet/vmnet.h> > > +#include <dispatch/dispatch.h> > > > > + > > +static inline void vmnet_set_send_bh_scheduled(VmnetCommonState *s, > > + bool enable) > > +{ > > + qatomic_set(&s->send_scheduled, enable); > > +} > > + > > + > > +static inline bool vmnet_is_send_bh_scheduled(VmnetCommonState *s) > > +{ > > + return qatomic_load_acquire(&s->send_scheduled); > > +} > > + > > + > > +static inline void vmnet_set_send_enabled(VmnetCommonState *s, > > + bool enable) > > +{ > > + if (enable) { > > + vmnet_interface_set_event_callback( > > + s->vmnet_if, > > + VMNET_INTERFACE_PACKETS_AVAILABLE, > > + s->if_queue, > > + ^(interface_event_t event_id, xpc_object_t event) { > > + assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE); > > + /* > > + * This function is being called from a non qemu > thread, so > > + * we only schedule a BH, and do the rest of the io > completion > > + * handling from vmnet_send_bh() which runs in a qemu > context. > > + * > > + * Avoid scheduling multiple bottom halves > > + */ > > + if (!vmnet_is_send_bh_scheduled(s)) { > > + vmnet_set_send_bh_scheduled(s, true); > > It can be interrupted between vmnet_is_send_bh_scheduled and > vmnet_set_send_bh_scheduled, which leads to data race. > > Sorry, I did not clearly understand what you meant. Since this callback (block) is submitted on DISPATCH_QUEUE_SERIAL, only one instance of the callback will be executed at any moment of time. https://developer.apple.com/documentation/dispatch/dispatch_queue_serial Also this is the only place where we schedule a bottom half. After we set the 'send_scheduled' flag, all the other callback blocks will do nothing (skip the if block) until the bottom half is executed and reset 'send_scheduled'. I don't see any races here :( Correct me if I'm wrong please. > > + qemu_bh_schedule(s->send_bh); > > + } > > + }); > > + } else { > > + vmnet_interface_set_event_callback( > > + s->vmnet_if, > > + VMNET_INTERFACE_PACKETS_AVAILABLE, > > + NULL, > > + NULL); > > + } > > +} > > + > > + > > +static void vmnet_send_completed(NetClientState *nc, ssize_t len) > > +{ > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); > > + vmnet_set_send_enabled(s, true); > > +} > > + > > + > > +static void vmnet_send_bh(void *opaque) > > +{ > > + NetClientState *nc = (NetClientState *) opaque; > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); > > + > > + struct iovec *iov = s->iov_buf; > > + struct vmpktdesc *packets = s->packets_buf; > > + int pkt_cnt; > > + int i; > > + > > + vmnet_return_t status; > > + ssize_t size; > > + > > + /* read as many packets as present */ > > + pkt_cnt = VMNET_PACKETS_LIMIT; > > There could be more than VMNET_PACKETS_LIMIT. You may call vmnet_read in > a loop. > > > + for (i = 0; i < pkt_cnt; ++i) { > > + packets[i].vm_pkt_size = s->max_packet_size; > > + packets[i].vm_pkt_iovcnt = 1; > > + packets[i].vm_flags = 0; > > + } > > + > > + status = vmnet_read(s->vmnet_if, packets, &pkt_cnt); > > + if (status != VMNET_SUCCESS) { > > + error_printf("vmnet: read failed: %s\n", > > + vmnet_status_map_str(status)); > > + goto done; > > + } > > + > > + for (i = 0; i < pkt_cnt; ++i) { > > + size = qemu_send_packet_async(nc, > > + iov[i].iov_base, > > + packets[i].vm_pkt_size, > > + vmnet_send_completed); > > + if (size == 0) { > > + vmnet_set_send_enabled(s, false); > > + goto done; > > + } else if (size < 0) { > > + break; > > + } > > goto is not needed here. "break" when size <= 0. > > > + } > > + > > +done: > > + vmnet_set_send_bh_scheduled(s, false); > > +} > > + > > + > > +static void vmnet_bufs_init(VmnetCommonState *s) > > +{ > > + struct vmpktdesc *packets = s->packets_buf; > > + struct iovec *iov = s->iov_buf; > > + int i; > > + > > + for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) { > > + iov[i].iov_len = s->max_packet_size; > > + iov[i].iov_base = g_malloc0(iov[i].iov_len); > > + packets[i].vm_pkt_iov = iov + i; > > + } > > +} > > + > > + > > +const char *vmnet_status_map_str(vmnet_return_t status) > > +{ > > + switch (status) { > > + case VMNET_SUCCESS: > > + return "success"; > > + case VMNET_FAILURE: > > + return "general failure (possibly not enough privileges)"; > > + case VMNET_MEM_FAILURE: > > + return "memory allocation failure"; > > + case VMNET_INVALID_ARGUMENT: > > + return "invalid argument specified"; > > + case VMNET_SETUP_INCOMPLETE: > > + return "interface setup is not complete"; > > + case VMNET_INVALID_ACCESS: > > + return "invalid access, permission denied"; > > + case VMNET_PACKET_TOO_BIG: > > + return "packet size is larger than MTU"; > > + case VMNET_BUFFER_EXHAUSTED: > > + return "buffers exhausted in kernel"; > > + case VMNET_TOO_MANY_PACKETS: > > + return "packet count exceeds limit"; > > +#if defined(MAC_OS_VERSION_11_0) && \ > > + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 > > + case VMNET_SHARING_SERVICE_BUSY: > > + return "conflict, sharing service is in use"; > > +#endif > > + default: > > + return "unknown vmnet error"; > > + } > > +} > > + > > + > > +int vmnet_if_create(NetClientState *nc, > > + xpc_object_t if_desc, > > + Error **errp) > > +{ > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);; > > Duplicate semicolons. > > > + dispatch_semaphore_t if_created_sem = dispatch_semaphore_create(0); > > if_created_sem leaks. > > > + __block vmnet_return_t if_status; > > + > > + s->if_queue = dispatch_queue_create( > > + "org.qemu.vmnet.if_queue", > > + DISPATCH_QUEUE_SERIAL > > + ); > > + > > + xpc_dictionary_set_bool( > > + if_desc, > > + vmnet_allocate_mac_address_key, > > + false > > + ); > > +#ifdef DEBUG > > + qemu_log("vmnet.start.interface_desc:\n"); > > + xpc_dictionary_apply(if_desc, > > + ^bool(const char *k, xpc_object_t v) { > > + char *desc = xpc_copy_description(v); > > + qemu_log(" %s=%s\n", k, desc); > > + free(desc); > > + return true; > > + }); > > +#endif /* DEBUG */ > > + > > + s->vmnet_if = vmnet_start_interface( > > + if_desc, > > + s->if_queue, > > + ^(vmnet_return_t status, xpc_object_t interface_param) { > > + if_status = status; > > + if (status != VMNET_SUCCESS || !interface_param) { > > + dispatch_semaphore_signal(if_created_sem); > > + return; > > + } > > + > > +#ifdef DEBUG > > + qemu_log("vmnet.start.interface_param:\n"); > > + xpc_dictionary_apply(interface_param, > > + ^bool(const char *k, xpc_object_t v) { > > + char *desc = > xpc_copy_description(v); > > + qemu_log(" %s=%s\n", k, desc); > > + free(desc); > > + return true; > > + }); > > +#endif /* DEBUG */ > > + > > + s->mtu = xpc_dictionary_get_uint64( > > + interface_param, > > + vmnet_mtu_key); > > + s->max_packet_size = xpc_dictionary_get_uint64( > > + interface_param, > > + vmnet_max_packet_size_key); > > + > > + dispatch_semaphore_signal(if_created_sem); > > + }); > > + > > + if (s->vmnet_if == NULL) { > > + error_setg(errp, > > + "unable to create interface " > > + "with requested params"); > > You don't need line breaks here. Breaking a string into a few would also > makes it a bit hard to grep. > > > + return -1; > > s->if_queue leaks. > > > + } > > + > > + dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER); > > + > > + if (if_status != VMNET_SUCCESS) { > > + error_setg(errp, > > + "cannot create vmnet interface: %s", > > + vmnet_status_map_str(if_status)); > > + return -1; > > + } > > + > > + s->send_bh = aio_bh_new(qemu_get_aio_context(), vmnet_send_bh, nc); > > + vmnet_bufs_init(s); > > + vmnet_set_send_bh_scheduled(s, false); > > + vmnet_set_send_enabled(s, true); > > + return 0; > > +} > > + > > + > > +ssize_t vmnet_receive_common(NetClientState *nc, > > + const uint8_t *buf, > > + size_t size) > > +{ > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); > > + struct vmpktdesc packet; > > + struct iovec iov; > > + int pkt_cnt; > > + vmnet_return_t if_status; > > + > > + if (size > s->max_packet_size) { > > + warn_report("vmnet: packet is too big, %zu > %llu\n", > > Use PRIu64. > > > + packet.vm_pkt_size, > > + s->max_packet_size); > > + return -1; > > + } > > + > > + iov.iov_base = (char *) buf; > > + iov.iov_len = size; > > + > > + packet.vm_pkt_iovcnt = 1; > > + packet.vm_flags = 0; > > + packet.vm_pkt_size = size; > > + packet.vm_pkt_iov = &iov; > > + pkt_cnt = 1; > > + > > + if_status = vmnet_write(s->vmnet_if, &packet, &pkt_cnt); > > + if (if_status != VMNET_SUCCESS) { > > + error_report("vmnet: write error: %s\n", > > + vmnet_status_map_str(if_status)); > > Why don't return -1? > > > + } > > + > > + if (if_status == VMNET_SUCCESS && pkt_cnt) { > > + return size; > > + } > > + return 0; > > +} > > + > > + > > +void vmnet_cleanup_common(NetClientState *nc) > > +{ > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);; > > Duplicate semicolons. > > > + dispatch_semaphore_t if_created_sem; > > + > > + qemu_purge_queued_packets(nc); > + > vmnet_set_send_bh_scheduled(s, true); > > + vmnet_set_send_enabled(s, false); > > + > > + if (s->vmnet_if == NULL) { > > + return; > > + } > > + > > + if_created_sem = dispatch_semaphore_create(0); > > + vmnet_stop_interface( > > + s->vmnet_if, > > + s->if_queue, > > + ^(vmnet_return_t status) { > > + assert(status == VMNET_SUCCESS); > > + dispatch_semaphore_signal(if_created_sem); > > + }); > > + dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER); > > + > > + qemu_bh_delete(s->send_bh); > > + dispatch_release(if_created_sem); > > + dispatch_release(s->if_queue); > > + > > + for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) { > > + g_free(s->iov_buf[i].iov_base); > > + } > > +} > > diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c > > index f07afaaf21..66f66c034b 100644 > > --- a/net/vmnet-shared.c > > +++ b/net/vmnet-shared.c > > @@ -10,16 +10,102 @@ > > > > #include "qemu/osdep.h" > > #include "qapi/qapi-types-net.h" > > +#include "qapi/error.h" > > #include "vmnet_int.h" > > #include "clients.h" > > -#include "qemu/error-report.h" > > -#include "qapi/error.h" > > > > #include <vmnet/vmnet.h> > > > > +typedef struct VmnetSharedState { > > + VmnetCommonState cs; > > +} VmnetSharedState; > > + > > + > > +static bool validate_options(const Netdev *netdev, Error **errp) > > +{ > > + const NetdevVmnetSharedOptions *options = &(netdev->u.vmnet_shared); > > + > > +#if !defined(MAC_OS_VERSION_11_0) || \ > > + MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0 > > + if (options->has_isolated) { > > + error_setg(errp, > > + "vmnet-shared.isolated feature is " > > + "unavailable: outdated vmnet.framework API"); > > + return false; > > + } > > +#endif > > + > > + if ((options->has_start_address || > > + options->has_end_address || > > + options->has_subnet_mask) && > > + !(options->has_start_address && > > + options->has_end_address && > > + options->has_subnet_mask)) { > > + error_setg(errp, > > + "'start-address', 'end-address', 'subnet-mask' " > > + "should be provided together" > > + ); > > + return false; > > + } > > + > > + return true; > > +} > > + > > +static xpc_object_t build_if_desc(const Netdev *netdev) > > +{ > > + const NetdevVmnetSharedOptions *options = &(netdev->u.vmnet_shared); > > + xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, 0); > > + > > + xpc_dictionary_set_uint64( > > + if_desc, > > + vmnet_operation_mode_key, > > + VMNET_SHARED_MODE > > + ); > > + > > + if (options->has_nat66_prefix) { > > + xpc_dictionary_set_string(if_desc, > > + vmnet_nat66_prefix_key, > > + options->nat66_prefix); > > + } > > + > > + if (options->has_start_address) { > > + xpc_dictionary_set_string(if_desc, > > + vmnet_start_address_key, > > + options->start_address); > > + xpc_dictionary_set_string(if_desc, > > + vmnet_end_address_key, > > + options->end_address); > > + xpc_dictionary_set_string(if_desc, > > + vmnet_subnet_mask_key, > > + options->subnet_mask); > > + } > > + > > +#if defined(MAC_OS_VERSION_11_0) && \ > > + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 > > + xpc_dictionary_set_bool( > > + if_desc, > > + vmnet_enable_isolation_key, > > + options->isolated > > + ); > > +#endif > > + > > + return if_desc; > > +} > > + > > +static NetClientInfo net_vmnet_shared_info = { > > + .type = NET_CLIENT_DRIVER_VMNET_SHARED, > > + .size = sizeof(VmnetSharedState), > > + .receive = vmnet_receive_common, > > + .cleanup = vmnet_cleanup_common, > > +}; > > + > > int net_init_vmnet_shared(const Netdev *netdev, const char *name, > > NetClientState *peer, Error **errp) > > { > > - error_setg(errp, "vmnet-shared is not implemented yet"); > > - return -1; > > + NetClientState *nc = qemu_new_net_client(&net_vmnet_shared_info, > > + peer, "vmnet-shared", > name); > > + if (!validate_options(netdev, errp)) { > > + g_assert_not_reached(); > > g_assert_not_reached is for debugging purpose and may be dropped > depending on the build option. > > > + } > > + return vmnet_if_create(nc, build_if_desc(netdev), errp); > > } > > diff --git a/net/vmnet_int.h b/net/vmnet_int.h > > index aac4d5af64..acfe3a88c0 100644 > > --- a/net/vmnet_int.h > > +++ b/net/vmnet_int.h > > @@ -15,11 +15,48 @@ > > #include "clients.h" > > > > #include <vmnet/vmnet.h> > > +#include <dispatch/dispatch.h> > > + > > +/** > > + * From vmnet.framework documentation > > + * > > + * Each read/write call allows up to 200 packets to be > > + * read or written for a maximum of 256KB. > > + * > > + * Each packet written should be a complete > > + * ethernet frame. > > + * > > + * https://developer.apple.com/documentation/vmnet > > + */ > > +#define VMNET_PACKETS_LIMIT 200 > > > > typedef struct VmnetCommonState { > > - NetClientState nc; > > + NetClientState nc; > > + interface_ref vmnet_if; > > + > > + bool send_scheduled; > > > > + uint64_t mtu; > > + uint64_t max_packet_size; > > + > > + struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT]; > > + struct iovec iov_buf[VMNET_PACKETS_LIMIT]; > > + > > + dispatch_queue_t if_queue; > > + > > + QEMUBH *send_bh; > > } VmnetCommonState; > > > > +const char *vmnet_status_map_str(vmnet_return_t status); > > + > > +int vmnet_if_create(NetClientState *nc, > > + xpc_object_t if_desc, > > + Error **errp); > > + > > +ssize_t vmnet_receive_common(NetClientState *nc, > > + const uint8_t *buf, > > + size_t size); > > + > > +void vmnet_cleanup_common(NetClientState *nc); > > > > #endif /* VMNET_INT_H */ > > Other issues will be fixed and submitted later, thank you! Regards, Vladislav Yaroshchuk
On 2022/02/26 17:37, Vladislav Yaroshchuk wrote: > > Hi Akihiko, > > On Fri, Feb 25, 2022 at 8:46 PM Akihiko Odaki <akihiko.odaki@gmail.com > <mailto:akihiko.odaki@gmail.com>> wrote: > > On 2022/02/26 2:13, Vladislav Yaroshchuk wrote: > > Interaction with vmnet.framework in different modes > > differs only on configuration stage, so we can create > > common `send`, `receive`, etc. procedures and reuse them. > > > > vmnet.framework supports iov, but writing more than > > one iov into vmnet interface fails with > > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into > > one and passing it to vmnet works fine. That's the > > reason why receive_iov() left unimplemented. But it still > > works with good enough performance having .receive() > > net/vmnet: implement shared mode (vmnet-shared) > > > > Interaction with vmnet.framework in different modes > > differs only on configuration stage, so we can create > > common `send`, `receive`, etc. procedures and reuse them. > > > > vmnet.framework supports iov, but writing more than > > one iov into vmnet interface fails with > > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into > > one and passing it to vmnet works fine. That's the > > reason why receive_iov() left unimplemented. But it still > > works with good enough performance having .receive() > > implemented only. > > > > Also, there is no way to unsubscribe from vmnet packages > > receiving except registering and unregistering event > > callback or simply drop packages just ignoring and > > not processing them when related flag is set. Here we do > > using the second way. > > > > Signed-off-by: Phillip Tennen <phillip@axleos.com > <mailto:phillip@axleos.com>> > > Signed-off-by: Vladislav Yaroshchuk > <Vladislav.Yaroshchuk@jetbrains.com > <mailto:Vladislav.Yaroshchuk@jetbrains.com>> > > Thank you for persistently working on this. > > > --- > >  net/vmnet-common.m | 302 > +++++++++++++++++++++++++++++++++++++++++++++ > >  net/vmnet-shared.c | 94 +++++++++++++- > >  net/vmnet_int.h  | 39 +++++- > >  3 files changed, 430 insertions(+), 5 deletions(-) > > > > diff --git a/net/vmnet-common.m b/net/vmnet-common.m > > index 56612c72ce..2f70921cae 100644 > > --- a/net/vmnet-common.m > > +++ b/net/vmnet-common.m > > @@ -10,6 +10,8 @@ > >  */ > > > >  #include "qemu/osdep.h" > > +#include "qemu/main-loop.h" > > +#include "qemu/log.h" > >  #include "qapi/qapi-types-net.h" > >  #include "vmnet_int.h" > >  #include "clients.h" > > @@ -17,4 +19,304 @@ > >  #include "qapi/error.h" > > > >  #include <vmnet/vmnet.h> > > +#include <dispatch/dispatch.h> > > > > + > > +static inline void vmnet_set_send_bh_scheduled(VmnetCommonState *s, > > +                        bool enable) > > +{ > > +  qatomic_set(&s->send_scheduled, enable); > > +} > > + > > + > > +static inline bool vmnet_is_send_bh_scheduled(VmnetCommonState *s) > > +{ > > +  return qatomic_load_acquire(&s->send_scheduled); > > +} > > + > > + > > +static inline void vmnet_set_send_enabled(VmnetCommonState *s, > > +                     bool enable) > > +{ > > +  if (enable) { > > +    vmnet_interface_set_event_callback( > > +      s->vmnet_if, > > +      VMNET_INTERFACE_PACKETS_AVAILABLE, > > +      s->if_queue, > > +      ^(interface_event_t event_id, xpc_object_t event) { > > +        assert(event_id == > VMNET_INTERFACE_PACKETS_AVAILABLE); > > +        /* > > +         * This function is being called from a non qemu > thread, so > > +         * we only schedule a BH, and do the rest of the > io completion > > +         * handling from vmnet_send_bh() which runs in a > qemu context. > > +         * > > +         * Avoid scheduling multiple bottom halves > > +         */ > > +        if (!vmnet_is_send_bh_scheduled(s)) { > > +          vmnet_set_send_bh_scheduled(s, true); > > It can be interrupted between vmnet_is_send_bh_scheduled and > vmnet_set_send_bh_scheduled, which leads to data race. > > > Sorry, I did not clearly understand what you meant. Since this > callback (block) is submitted on DISPATCH_QUEUE_SERIAL, > only one instance of the callback will be executed at any > moment of time. > https://developer.apple.com/documentation/dispatch/dispatch_queue_serial > <https://developer.apple.com/documentation/dispatch/dispatch_queue_serial> > > Also this is the only place where we schedule a bottom half. > > After we set the 'send_scheduled' flag, all the other > callback blocks will do nothing (skip the if block) until > the bottom half is executed and reset 'send_scheduled'. > I don't see any races here :( > > Correct me if I'm wrong please. My explanation that the interruption between vmnet_is_send_bh_scheduled and vmnet_set_send_bh_scheduled is problematic was actually misleading. The problem is that vmnet_send_bh resets 'send_scheduled' after calling vmnet_read. If we got another packet after vmnet_read, it would be silently ignored. By the way, I forgot to mention another problem: vmnet_send_completed calls vmnet_set_send_enabled, but the other packets in the buffer may not be sent yet. Also, unregistering callbacks in vmnet_set_send_enabled is probably not enough to stop the callback being fired since some dispatch blocks are already in the dispatch queue. Regards, Akihiko Odaki > > > +          qemu_bh_schedule(s->send_bh); > > +        } > > +      }); > > +  } else { > > +    vmnet_interface_set_event_callback( > > +      s->vmnet_if, > > +      VMNET_INTERFACE_PACKETS_AVAILABLE, > > +      NULL, > > +      NULL); > > +  } > > +} > > + > > + > > +static void vmnet_send_completed(NetClientState *nc, ssize_t len) > > +{ > > +  VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); > > +  vmnet_set_send_enabled(s, true); > > +} > > + > > + > > +static void vmnet_send_bh(void *opaque) > > +{ > > +  NetClientState *nc = (NetClientState *) opaque; > > +  VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); > > + > > +  struct iovec *iov = s->iov_buf; > > +  struct vmpktdesc *packets = s->packets_buf; > > +  int pkt_cnt; > > +  int i; > > + > > +  vmnet_return_t status; > > +  ssize_t size; > > + > > +  /* read as many packets as present */ > > +  pkt_cnt = VMNET_PACKETS_LIMIT; > > There could be more than VMNET_PACKETS_LIMIT. You may call > vmnet_read in > a loop. > > > +  for (i = 0; i < pkt_cnt; ++i) { > > +    packets[i].vm_pkt_size = s->max_packet_size; > > +    packets[i].vm_pkt_iovcnt = 1; > > +    packets[i].vm_flags = 0; > > +  } > > + > > +  status = vmnet_read(s->vmnet_if, packets, &pkt_cnt); > > +  if (status != VMNET_SUCCESS) { > > +    error_printf("vmnet: read failed: %s\n", > > +           vmnet_status_map_str(status)); > > +    goto done; > > +  } > > + > > +  for (i = 0; i < pkt_cnt; ++i) { > > +    size = qemu_send_packet_async(nc, > > +                   iov[i].iov_base, > > +                   packets[i].vm_pkt_size, > > +                   vmnet_send_completed); > > +    if (size == 0) { > > +      vmnet_set_send_enabled(s, false); > > +      goto done; > > +    } else if (size < 0) { > > +      break; > > +    } > > goto is not needed here. "break" when size <= 0. > > > +  } > > + > > +done: > > +  vmnet_set_send_bh_scheduled(s, false); > > +} > > + > > + > > +static void vmnet_bufs_init(VmnetCommonState *s) > > +{ > > +  struct vmpktdesc *packets = s->packets_buf; > > +  struct iovec *iov = s->iov_buf; > > +  int i; > > + > > +  for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) { > > +    iov[i].iov_len = s->max_packet_size; > > +    iov[i].iov_base = g_malloc0(iov[i].iov_len); > > +    packets[i].vm_pkt_iov = iov + i; > > +  } > > +} > > + > > + > > +const char *vmnet_status_map_str(vmnet_return_t status) > > +{ > > +  switch (status) { > > +  case VMNET_SUCCESS: > > +    return "success"; > > +  case VMNET_FAILURE: > > +    return "general failure (possibly not enough privileges)"; > > +  case VMNET_MEM_FAILURE: > > +    return "memory allocation failure"; > > +  case VMNET_INVALID_ARGUMENT: > > +    return "invalid argument specified"; > > +  case VMNET_SETUP_INCOMPLETE: > > +    return "interface setup is not complete"; > > +  case VMNET_INVALID_ACCESS: > > +    return "invalid access, permission denied"; > > +  case VMNET_PACKET_TOO_BIG: > > +    return "packet size is larger than MTU"; > > +  case VMNET_BUFFER_EXHAUSTED: > > +    return "buffers exhausted in kernel"; > > +  case VMNET_TOO_MANY_PACKETS: > > +    return "packet count exceeds limit"; > > +#if defined(MAC_OS_VERSION_11_0) && \ > > +  MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 > > +  case VMNET_SHARING_SERVICE_BUSY: > > +    return "conflict, sharing service is in use"; > > +#endif > > +  default: > > +    return "unknown vmnet error"; > > +  } > > +} > > + > > + > > +int vmnet_if_create(NetClientState *nc, > > +          xpc_object_t if_desc, > > +          Error **errp) > > +{ > > +  VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);; > > Duplicate semicolons. > > > +  dispatch_semaphore_t if_created_sem = > dispatch_semaphore_create(0); > > if_created_sem leaks. > > > +  __block vmnet_return_t if_status; > > + > > +  s->if_queue = dispatch_queue_create( > > +    "org.qemu.vmnet.if_queue", > > +    DISPATCH_QUEUE_SERIAL > > +  ); > > + > > +  xpc_dictionary_set_bool( > > +    if_desc, > > +    vmnet_allocate_mac_address_key, > > +    false > > +  ); > > +#ifdef DEBUG > > +  qemu_log("vmnet.start.interface_desc:\n"); > > +  xpc_dictionary_apply(if_desc, > > +             ^bool(const char *k, xpc_object_t v) { > > +               char *desc = xpc_copy_description(v); > > +               qemu_log(" %s=%s\n", k, desc); > > +               free(desc); > > +               return true; > > +             }); > > +#endif /* DEBUG */ > > + > > +  s->vmnet_if = vmnet_start_interface( > > +    if_desc, > > +    s->if_queue, > > +    ^(vmnet_return_t status, xpc_object_t interface_param) { > > +      if_status = status; > > +      if (status != VMNET_SUCCESS || !interface_param) { > > +        dispatch_semaphore_signal(if_created_sem); > > +        return; > > +      } > > + > > +#ifdef DEBUG > > +      qemu_log("vmnet.start.interface_param:\n"); > > +      xpc_dictionary_apply(interface_param, > > +                 ^bool(const char *k, > xpc_object_t v) { > > +                   char *desc = > xpc_copy_description(v); > > +                   qemu_log(" %s=%s\n", k, desc); > > +                   free(desc); > > +                   return true; > > +                 }); > > +#endif /* DEBUG */ > > + > > +      s->mtu = xpc_dictionary_get_uint64( > > +        interface_param, > > +        vmnet_mtu_key); > > +      s->max_packet_size = xpc_dictionary_get_uint64( > > +        interface_param, > > +        vmnet_max_packet_size_key); > > + > > +      dispatch_semaphore_signal(if_created_sem); > > +    }); > > + > > +  if (s->vmnet_if == NULL) { > > +    error_setg(errp, > > +          "unable to create interface " > > +          "with requested params"); > > You don't need line breaks here. Breaking a string into a few would > also > makes it a bit hard to grep. > > > +    return -1; > > s->if_queue leaks. > > > +  } > > + > > +  dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER); > > + > > +  if (if_status != VMNET_SUCCESS) { > > +    error_setg(errp, > > +          "cannot create vmnet interface: %s", > > +          vmnet_status_map_str(if_status)); > > +    return -1; > > +  } > > + > > +  s->send_bh = aio_bh_new(qemu_get_aio_context(), > vmnet_send_bh, nc); > > +  vmnet_bufs_init(s); > > +  vmnet_set_send_bh_scheduled(s, false); > > +  vmnet_set_send_enabled(s, true); > > +  return 0; > > +} > > + > > + > > +ssize_t vmnet_receive_common(NetClientState *nc, > > +               const uint8_t *buf, > > +               size_t size) > > +{ > > +  VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); > > +  struct vmpktdesc packet; > > +  struct iovec iov; > > +  int pkt_cnt; > > +  vmnet_return_t if_status; > > + > > +  if (size > s->max_packet_size) { > > +    warn_report("vmnet: packet is too big, %zu > %llu\n", > > Use PRIu64. > > > +          packet.vm_pkt_size, > > +          s->max_packet_size); > > +    return -1; > > +  } > > + > > +  iov.iov_base = (char *) buf; > > +  iov.iov_len = size; > > + > > +  packet.vm_pkt_iovcnt = 1; > > +  packet.vm_flags = 0; > > +  packet.vm_pkt_size = size; > > +  packet.vm_pkt_iov = &iov; > > +  pkt_cnt = 1; > > + > > +  if_status = vmnet_write(s->vmnet_if, &packet, &pkt_cnt); > > +  if (if_status != VMNET_SUCCESS) { > > +    error_report("vmnet: write error: %s\n", > > +           vmnet_status_map_str(if_status)); > > Why don't return -1? > > > +  } > > + > > +  if (if_status == VMNET_SUCCESS && pkt_cnt) { > > +    return size; > > +  } > > +  return 0; > > +} > > + > > + > > +void vmnet_cleanup_common(NetClientState *nc) > > +{ > > +  VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);; > > Duplicate semicolons. > > > +  dispatch_semaphore_t if_created_sem; > > + > > +  qemu_purge_queued_packets(nc); > + > vmnet_set_send_bh_scheduled(s, true); > > +  vmnet_set_send_enabled(s, false); > > + > > +  if (s->vmnet_if == NULL) { > > +    return; > > +  } > > + > > +  if_created_sem = dispatch_semaphore_create(0); > > +  vmnet_stop_interface( > > +    s->vmnet_if, > > +    s->if_queue, > > +    ^(vmnet_return_t status) { > > +      assert(status == VMNET_SUCCESS); > > +      dispatch_semaphore_signal(if_created_sem); > > +    }); > > +  dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER); > > + > > +  qemu_bh_delete(s->send_bh); > > +  dispatch_release(if_created_sem); > > +  dispatch_release(s->if_queue); > > + > > +  for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) { > > +    g_free(s->iov_buf[i].iov_base); > > +  } > > +} > > diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c > > index f07afaaf21..66f66c034b 100644 > > --- a/net/vmnet-shared.c > > +++ b/net/vmnet-shared.c > > @@ -10,16 +10,102 @@ > > > >  #include "qemu/osdep.h" > >  #include "qapi/qapi-types-net.h" > > +#include "qapi/error.h" > >  #include "vmnet_int.h" > >  #include "clients.h" > > -#include "qemu/error-report.h" > > -#include "qapi/error.h" > > > >  #include <vmnet/vmnet.h> > > > > +typedef struct VmnetSharedState { > > +  VmnetCommonState cs; > > +} VmnetSharedState; > > + > > + > > +static bool validate_options(const Netdev *netdev, Error **errp) > > +{ > > +  const NetdevVmnetSharedOptions *options = > &(netdev->u.vmnet_shared); > > + > > +#if !defined(MAC_OS_VERSION_11_0) || \ > > +  MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0 > > +  if (options->has_isolated) { > > +    error_setg(errp, > > +          "vmnet-shared.isolated feature is " > > +          "unavailable: outdated vmnet.framework API"); > > +    return false; > > +  } > > +#endif > > + > > +  if ((options->has_start_address || > > +     options->has_end_address || > > +     options->has_subnet_mask) && > > +    !(options->has_start_address && > > +     options->has_end_address && > > +     options->has_subnet_mask)) { > > +    error_setg(errp, > > +          "'start-address', 'end-address', 'subnet-mask' " > > +          "should be provided together" > > +    ); > > +    return false; > > +  } > > + > > +  return true; > > +} > > + > > +static xpc_object_t build_if_desc(const Netdev *netdev) > > +{ > > +  const NetdevVmnetSharedOptions *options = > &(netdev->u.vmnet_shared); > > +  xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, 0); > > + > > +  xpc_dictionary_set_uint64( > > +    if_desc, > > +    vmnet_operation_mode_key, > > +    VMNET_SHARED_MODE > > +  ); > > + > > +  if (options->has_nat66_prefix) { > > +    xpc_dictionary_set_string(if_desc, > > +                 vmnet_nat66_prefix_key, > > +                 options->nat66_prefix); > > +  } > > + > > +  if (options->has_start_address) { > > +    xpc_dictionary_set_string(if_desc, > > +                 vmnet_start_address_key, > > +                 options->start_address); > > +    xpc_dictionary_set_string(if_desc, > > +                 vmnet_end_address_key, > > +                 options->end_address); > > +    xpc_dictionary_set_string(if_desc, > > +                 vmnet_subnet_mask_key, > > +                 options->subnet_mask); > > +  } > > + > > +#if defined(MAC_OS_VERSION_11_0) && \ > > +  MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 > > +  xpc_dictionary_set_bool( > > +    if_desc, > > +    vmnet_enable_isolation_key, > > +    options->isolated > > +  ); > > +#endif > > + > > +  return if_desc; > > +} > > + > > +static NetClientInfo net_vmnet_shared_info = { > > +  .type = NET_CLIENT_DRIVER_VMNET_SHARED, > > +  .size = sizeof(VmnetSharedState), > > +  .receive = vmnet_receive_common, > > +  .cleanup = vmnet_cleanup_common, > > +}; > > + > >  int net_init_vmnet_shared(const Netdev *netdev, const char *name, > >               NetClientState *peer, Error **errp) > >  { > > - error_setg(errp, "vmnet-shared is not implemented yet"); > > - return -1; > > +  NetClientState *nc = qemu_new_net_client(&net_vmnet_shared_info, > > +                       peer, > "vmnet-shared", name); > > +  if (!validate_options(netdev, errp)) { > > +    g_assert_not_reached(); > > g_assert_not_reached is for debugging purpose and may be dropped > depending on the build option. > > > +  } > > +  return vmnet_if_create(nc, build_if_desc(netdev), errp); > >  } > > diff --git a/net/vmnet_int.h b/net/vmnet_int.h > > index aac4d5af64..acfe3a88c0 100644 > > --- a/net/vmnet_int.h > > +++ b/net/vmnet_int.h > > @@ -15,11 +15,48 @@ > >  #include "clients.h" > > > >  #include <vmnet/vmnet.h> > > +#include <dispatch/dispatch.h> > > + > > +/** > > + * From vmnet.framework documentation > > + * > > + * Each read/write call allows up to 200 packets to be > > + * read or written for a maximum of 256KB. > > + * > > + * Each packet written should be a complete > > + * ethernet frame. > > + * > > + * https://developer.apple.com/documentation/vmnet > <https://developer.apple.com/documentation/vmnet> > > + */ > > +#define VMNET_PACKETS_LIMIT 200 > > > >  typedef struct VmnetCommonState { > > - NetClientState nc; > > +  NetClientState nc; > > +  interface_ref vmnet_if; > > + > > +  bool send_scheduled; > > > > +  uint64_t mtu; > > +  uint64_t max_packet_size; > > + > > +  struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT]; > > +  struct iovec iov_buf[VMNET_PACKETS_LIMIT]; > > + > > +  dispatch_queue_t if_queue; > > + > > +  QEMUBH *send_bh; > >  } VmnetCommonState; > > > > +const char *vmnet_status_map_str(vmnet_return_t status); > > + > > +int vmnet_if_create(NetClientState *nc, > > +          xpc_object_t if_desc, > > +          Error **errp); > > + > > +ssize_t vmnet_receive_common(NetClientState *nc, > > +               const uint8_t *buf, > > +               size_t size); > > + > > +void vmnet_cleanup_common(NetClientState *nc); > > > >  #endif /* VMNET_INT_H */ > > > Other issues will be fixed and submitted later, > thank you! > > Regards, > Vladislav Yaroshchuk
On Sat, Feb 26, 2022 at 12:16 PM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: > On 2022/02/26 17:37, Vladislav Yaroshchuk wrote: > > > > Hi Akihiko, > > > > On Fri, Feb 25, 2022 at 8:46 PM Akihiko Odaki <akihiko.odaki@gmail.com > > <mailto:akihiko.odaki@gmail.com>> wrote: > > > > On 2022/02/26 2:13, Vladislav Yaroshchuk wrote: > > > Interaction with vmnet.framework in different modes > > > differs only on configuration stage, so we can create > > > common `send`, `receive`, etc. procedures and reuse them. > > > > > > vmnet.framework supports iov, but writing more than > > > one iov into vmnet interface fails with > > > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into > > > one and passing it to vmnet works fine. That's the > > > reason why receive_iov() left unimplemented. But it still > > > works with good enough performance having .receive() > > > net/vmnet: implement shared mode (vmnet-shared) > > > > > > Interaction with vmnet.framework in different modes > > > differs only on configuration stage, so we can create > > > common `send`, `receive`, etc. procedures and reuse them. > > > > > > vmnet.framework supports iov, but writing more than > > > one iov into vmnet interface fails with > > > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into > > > one and passing it to vmnet works fine. That's the > > > reason why receive_iov() left unimplemented. But it still > > > works with good enough performance having .receive() > > > implemented only. > > > > > > Also, there is no way to unsubscribe from vmnet packages > > > receiving except registering and unregistering event > > > callback or simply drop packages just ignoring and > > > not processing them when related flag is set. Here we do > > > using the second way. > > > > > > Signed-off-by: Phillip Tennen <phillip@axleos.com > > <mailto:phillip@axleos.com>> > > > Signed-off-by: Vladislav Yaroshchuk > > <Vladislav.Yaroshchuk@jetbrains.com > > <mailto:Vladislav.Yaroshchuk@jetbrains.com>> > > > > Thank you for persistently working on this. > > > > > --- > > > net/vmnet-common.m | 302 > > +++++++++++++++++++++++++++++++++++++++++++++ > > > net/vmnet-shared.c | 94 +++++++++++++- > > > net/vmnet_int.h | 39 +++++- > > > 3 files changed, 430 insertions(+), 5 deletions(-) > > > > > > diff --git a/net/vmnet-common.m b/net/vmnet-common.m > > > index 56612c72ce..2f70921cae 100644 > > > --- a/net/vmnet-common.m > > > +++ b/net/vmnet-common.m > > > @@ -10,6 +10,8 @@ > > > */ > > > > > > #include "qemu/osdep.h" > > > +#include "qemu/main-loop.h" > > > +#include "qemu/log.h" > > > #include "qapi/qapi-types-net.h" > > > #include "vmnet_int.h" > > > #include "clients.h" > > > @@ -17,4 +19,304 @@ > > > #include "qapi/error.h" > > > > > > #include <vmnet/vmnet.h> > > > +#include <dispatch/dispatch.h> > > > > > > + > > > +static inline void vmnet_set_send_bh_scheduled(VmnetCommonState > *s, > > > + bool enable) > > > +{ > > > + qatomic_set(&s->send_scheduled, enable); > > > +} > > > + > > > + > > > +static inline bool vmnet_is_send_bh_scheduled(VmnetCommonState > *s) > > > +{ > > > + return qatomic_load_acquire(&s->send_scheduled); > > > +} > > > + > > > + > > > +static inline void vmnet_set_send_enabled(VmnetCommonState *s, > > > + bool enable) > > > +{ > > > + if (enable) { > > > + vmnet_interface_set_event_callback( > > > + s->vmnet_if, > > > + VMNET_INTERFACE_PACKETS_AVAILABLE, > > > + s->if_queue, > > > + ^(interface_event_t event_id, xpc_object_t event) { > > > + assert(event_id == > > VMNET_INTERFACE_PACKETS_AVAILABLE); > > > + /* > > > + * This function is being called from a non qemu > > thread, so > > > + * we only schedule a BH, and do the rest of the > > io completion > > > + * handling from vmnet_send_bh() which runs in a > > qemu context. > > > + * > > > + * Avoid scheduling multiple bottom halves > > > + */ > > > + if (!vmnet_is_send_bh_scheduled(s)) { > > > + vmnet_set_send_bh_scheduled(s, true); > > > > It can be interrupted between vmnet_is_send_bh_scheduled and > > vmnet_set_send_bh_scheduled, which leads to data race. > > > > > > Sorry, I did not clearly understand what you meant. Since this > > callback (block) is submitted on DISPATCH_QUEUE_SERIAL, > > only one instance of the callback will be executed at any > > moment of time. > > https://developer.apple.com/documentation/dispatch/dispatch_queue_serial > > < > https://developer.apple.com/documentation/dispatch/dispatch_queue_serial> > > > > Also this is the only place where we schedule a bottom half. > > > > After we set the 'send_scheduled' flag, all the other > > callback blocks will do nothing (skip the if block) until > > the bottom half is executed and reset 'send_scheduled'. > > I don't see any races here :( > > > > Correct me if I'm wrong please. > > My explanation that the interruption between vmnet_is_send_bh_scheduled > and vmnet_set_send_bh_scheduled is problematic was actually misleading. > > The problem is that vmnet_send_bh resets 'send_scheduled' after calling > vmnet_read. If we got another packet after vmnet_read, it would be > silently ignored. > > By the way, I forgot to mention another problem: vmnet_send_completed > calls vmnet_set_send_enabled, but the other packets in the buffer may > not be sent yet. Also, unregistering callbacks in vmnet_set_send_enabled > is probably not enough to stop the callback being fired since some > dispatch blocks are already in the dispatch queue. > > Now I understand what you mean, thanks. What do you think about the workaround? For me it is quite difficult question how to synchronize qemu with vmnet thread, especially with completion callback. > Regards, > Akihiko Odaki > > > > > > + qemu_bh_schedule(s->send_bh); > > > + } > > > + }); > > > + } else { > > > + vmnet_interface_set_event_callback( > > > + s->vmnet_if, > > > + VMNET_INTERFACE_PACKETS_AVAILABLE, > > > + NULL, > > > + NULL); > > > + } > > > +} > > > + > > > + > > > +static void vmnet_send_completed(NetClientState *nc, ssize_t len) > > > +{ > > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); > > > + vmnet_set_send_enabled(s, true); > > > +} > > > + > > > + > > > +static void vmnet_send_bh(void *opaque) > > > +{ > > > + NetClientState *nc = (NetClientState *) opaque; > > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); > > > + > > > + struct iovec *iov = s->iov_buf; > > > + struct vmpktdesc *packets = s->packets_buf; > > > + int pkt_cnt; > > > + int i; > > > + > > > + vmnet_return_t status; > > > + ssize_t size; > > > + > > > + /* read as many packets as present */ > > > + pkt_cnt = VMNET_PACKETS_LIMIT; > > > > There could be more than VMNET_PACKETS_LIMIT. You may call > > vmnet_read in > > a loop. > > > > > + for (i = 0; i < pkt_cnt; ++i) { > > > + packets[i].vm_pkt_size = s->max_packet_size; > > > + packets[i].vm_pkt_iovcnt = 1; > > > + packets[i].vm_flags = 0; > > > + } > > > + > > > + status = vmnet_read(s->vmnet_if, packets, &pkt_cnt); > > > + if (status != VMNET_SUCCESS) { > > > + error_printf("vmnet: read failed: %s\n", > > > + vmnet_status_map_str(status)); > > > + goto done; > > > + } > > > + > > > + for (i = 0; i < pkt_cnt; ++i) { > > > + size = qemu_send_packet_async(nc, > > > + iov[i].iov_base, > > > + packets[i].vm_pkt_size, > > > + vmnet_send_completed); > > > + if (size == 0) { > > > + vmnet_set_send_enabled(s, false); > > > + goto done; > > > + } else if (size < 0) { > > > + break; > > > + } > > > > goto is not needed here. "break" when size <= 0. > > > > > + } > > > + > > > +done: > > > + vmnet_set_send_bh_scheduled(s, false); > > > +} > > > + > > > + > > > +static void vmnet_bufs_init(VmnetCommonState *s) > > > +{ > > > + struct vmpktdesc *packets = s->packets_buf; > > > + struct iovec *iov = s->iov_buf; > > > + int i; > > > + > > > + for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) { > > > + iov[i].iov_len = s->max_packet_size; > > > + iov[i].iov_base = g_malloc0(iov[i].iov_len); > > > + packets[i].vm_pkt_iov = iov + i; > > > + } > > > +} > > > + > > > + > > > +const char *vmnet_status_map_str(vmnet_return_t status) > > > +{ > > > + switch (status) { > > > + case VMNET_SUCCESS: > > > + return "success"; > > > + case VMNET_FAILURE: > > > + return "general failure (possibly not enough > privileges)"; > > > + case VMNET_MEM_FAILURE: > > > + return "memory allocation failure"; > > > + case VMNET_INVALID_ARGUMENT: > > > + return "invalid argument specified"; > > > + case VMNET_SETUP_INCOMPLETE: > > > + return "interface setup is not complete"; > > > + case VMNET_INVALID_ACCESS: > > > + return "invalid access, permission denied"; > > > + case VMNET_PACKET_TOO_BIG: > > > + return "packet size is larger than MTU"; > > > + case VMNET_BUFFER_EXHAUSTED: > > > + return "buffers exhausted in kernel"; > > > + case VMNET_TOO_MANY_PACKETS: > > > + return "packet count exceeds limit"; > > > +#if defined(MAC_OS_VERSION_11_0) && \ > > > + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 > > > + case VMNET_SHARING_SERVICE_BUSY: > > > + return "conflict, sharing service is in use"; > > > +#endif > > > + default: > > > + return "unknown vmnet error"; > > > + } > > > +} > > > + > > > + > > > +int vmnet_if_create(NetClientState *nc, > > > + xpc_object_t if_desc, > > > + Error **errp) > > > +{ > > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);; > > > > Duplicate semicolons. > > > > > + dispatch_semaphore_t if_created_sem = > > dispatch_semaphore_create(0); > > > > if_created_sem leaks. > > > > > + __block vmnet_return_t if_status; > > > + > > > + s->if_queue = dispatch_queue_create( > > > + "org.qemu.vmnet.if_queue", > > > + DISPATCH_QUEUE_SERIAL > > > + ); > > > + > > > + xpc_dictionary_set_bool( > > > + if_desc, > > > + vmnet_allocate_mac_address_key, > > > + false > > > + ); > > > +#ifdef DEBUG > > > + qemu_log("vmnet.start.interface_desc:\n"); > > > + xpc_dictionary_apply(if_desc, > > > + ^bool(const char *k, xpc_object_t v) { > > > + char *desc = > xpc_copy_description(v); > > > + qemu_log(" %s=%s\n", k, desc); > > > + free(desc); > > > + return true; > > > + }); > > > +#endif /* DEBUG */ > > > + > > > + s->vmnet_if = vmnet_start_interface( > > > + if_desc, > > > + s->if_queue, > > > + ^(vmnet_return_t status, xpc_object_t interface_param) { > > > + if_status = status; > > > + if (status != VMNET_SUCCESS || !interface_param) { > > > + dispatch_semaphore_signal(if_created_sem); > > > + return; > > > + } > > > + > > > +#ifdef DEBUG > > > + qemu_log("vmnet.start.interface_param:\n"); > > > + xpc_dictionary_apply(interface_param, > > > + ^bool(const char *k, > > xpc_object_t v) { > > > + char *desc = > > xpc_copy_description(v); > > > + qemu_log(" %s=%s\n", k, > desc); > > > + free(desc); > > > + return true; > > > + }); > > > +#endif /* DEBUG */ > > > + > > > + s->mtu = xpc_dictionary_get_uint64( > > > + interface_param, > > > + vmnet_mtu_key); > > > + s->max_packet_size = xpc_dictionary_get_uint64( > > > + interface_param, > > > + vmnet_max_packet_size_key); > > > + > > > + dispatch_semaphore_signal(if_created_sem); > > > + }); > > > + > > > + if (s->vmnet_if == NULL) { > > > + error_setg(errp, > > > + "unable to create interface " > > > + "with requested params"); > > > > You don't need line breaks here. Breaking a string into a few would > > also > > makes it a bit hard to grep. > > > > > + return -1; > > > > s->if_queue leaks. > > > > > + } > > > + > > > + dispatch_semaphore_wait(if_created_sem, > DISPATCH_TIME_FOREVER); > > > + > > > + if (if_status != VMNET_SUCCESS) { > > > + error_setg(errp, > > > + "cannot create vmnet interface: %s", > > > + vmnet_status_map_str(if_status)); > > > + return -1; > > > + } > > > + > > > + s->send_bh = aio_bh_new(qemu_get_aio_context(), > > vmnet_send_bh, nc); > > > + vmnet_bufs_init(s); > > > + vmnet_set_send_bh_scheduled(s, false); > > > + vmnet_set_send_enabled(s, true); > > > + return 0; > > > +} > > > + > > > + > > > +ssize_t vmnet_receive_common(NetClientState *nc, > > > + const uint8_t *buf, > > > + size_t size) > > > +{ > > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); > > > + struct vmpktdesc packet; > > > + struct iovec iov; > > > + int pkt_cnt; > > > + vmnet_return_t if_status; > > > + > > > + if (size > s->max_packet_size) { > > > + warn_report("vmnet: packet is too big, %zu > %llu\n", > > > > Use PRIu64. > > > > > + packet.vm_pkt_size, > > > + s->max_packet_size); > > > + return -1; > > > + } > > > + > > > + iov.iov_base = (char *) buf; > > > + iov.iov_len = size; > > > + > > > + packet.vm_pkt_iovcnt = 1; > > > + packet.vm_flags = 0; > > > + packet.vm_pkt_size = size; > > > + packet.vm_pkt_iov = &iov; > > > + pkt_cnt = 1; > > > + > > > + if_status = vmnet_write(s->vmnet_if, &packet, &pkt_cnt); > > > + if (if_status != VMNET_SUCCESS) { > > > + error_report("vmnet: write error: %s\n", > > > + vmnet_status_map_str(if_status)); > > > > Why don't return -1? > > > > > + } > > > + > > > + if (if_status == VMNET_SUCCESS && pkt_cnt) { > > > + return size; > > > + } > > > + return 0; > > > +} > > > + > > > + > > > +void vmnet_cleanup_common(NetClientState *nc) > > > +{ > > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);; > > > > Duplicate semicolons. > > > > > + dispatch_semaphore_t if_created_sem; > > > + > > > + qemu_purge_queued_packets(nc); > + > > vmnet_set_send_bh_scheduled(s, true); > > > + vmnet_set_send_enabled(s, false); > > > + > > > + if (s->vmnet_if == NULL) { > > > + return; > > > + } > > > + > > > + if_created_sem = dispatch_semaphore_create(0); > > > + vmnet_stop_interface( > > > + s->vmnet_if, > > > + s->if_queue, > > > + ^(vmnet_return_t status) { > > > + assert(status == VMNET_SUCCESS); > > > + dispatch_semaphore_signal(if_created_sem); > > > + }); > > > + dispatch_semaphore_wait(if_created_sem, > DISPATCH_TIME_FOREVER); > > > + > > > + qemu_bh_delete(s->send_bh); > > > + dispatch_release(if_created_sem); > > > + dispatch_release(s->if_queue); > > > + > > > + for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) { > > > + g_free(s->iov_buf[i].iov_base); > > > + } > > > +} > > > diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c > > > index f07afaaf21..66f66c034b 100644 > > > --- a/net/vmnet-shared.c > > > +++ b/net/vmnet-shared.c > > > @@ -10,16 +10,102 @@ > > > > > > #include "qemu/osdep.h" > > > #include "qapi/qapi-types-net.h" > > > +#include "qapi/error.h" > > > #include "vmnet_int.h" > > > #include "clients.h" > > > -#include "qemu/error-report.h" > > > -#include "qapi/error.h" > > > > > > #include <vmnet/vmnet.h> > > > > > > +typedef struct VmnetSharedState { > > > + VmnetCommonState cs; > > > +} VmnetSharedState; > > > + > > > + > > > +static bool validate_options(const Netdev *netdev, Error **errp) > > > +{ > > > + const NetdevVmnetSharedOptions *options = > > &(netdev->u.vmnet_shared); > > > + > > > +#if !defined(MAC_OS_VERSION_11_0) || \ > > > + MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0 > > > + if (options->has_isolated) { > > > + error_setg(errp, > > > + "vmnet-shared.isolated feature is " > > > + "unavailable: outdated vmnet.framework API"); > > > + return false; > > > + } > > > +#endif > > > + > > > + if ((options->has_start_address || > > > + options->has_end_address || > > > + options->has_subnet_mask) && > > > + !(options->has_start_address && > > > + options->has_end_address && > > > + options->has_subnet_mask)) { > > > + error_setg(errp, > > > + "'start-address', 'end-address', > 'subnet-mask' " > > > + "should be provided together" > > > + ); > > > + return false; > > > + } > > > + > > > + return true; > > > +} > > > + > > > +static xpc_object_t build_if_desc(const Netdev *netdev) > > > +{ > > > + const NetdevVmnetSharedOptions *options = > > &(netdev->u.vmnet_shared); > > > + xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, 0); > > > + > > > + xpc_dictionary_set_uint64( > > > + if_desc, > > > + vmnet_operation_mode_key, > > > + VMNET_SHARED_MODE > > > + ); > > > + > > > + if (options->has_nat66_prefix) { > > > + xpc_dictionary_set_string(if_desc, > > > + vmnet_nat66_prefix_key, > > > + options->nat66_prefix); > > > + } > > > + > > > + if (options->has_start_address) { > > > + xpc_dictionary_set_string(if_desc, > > > + vmnet_start_address_key, > > > + options->start_address); > > > + xpc_dictionary_set_string(if_desc, > > > + vmnet_end_address_key, > > > + options->end_address); > > > + xpc_dictionary_set_string(if_desc, > > > + vmnet_subnet_mask_key, > > > + options->subnet_mask); > > > + } > > > + > > > +#if defined(MAC_OS_VERSION_11_0) && \ > > > + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 > > > + xpc_dictionary_set_bool( > > > + if_desc, > > > + vmnet_enable_isolation_key, > > > + options->isolated > > > + ); > > > +#endif > > > + > > > + return if_desc; > > > +} > > > + > > > +static NetClientInfo net_vmnet_shared_info = { > > > + .type = NET_CLIENT_DRIVER_VMNET_SHARED, > > > + .size = sizeof(VmnetSharedState), > > > + .receive = vmnet_receive_common, > > > + .cleanup = vmnet_cleanup_common, > > > +}; > > > + > > > int net_init_vmnet_shared(const Netdev *netdev, const char > *name, > > > NetClientState *peer, Error **errp) > > > { > > > - error_setg(errp, "vmnet-shared is not implemented yet"); > > > - return -1; > > > + NetClientState *nc = > qemu_new_net_client(&net_vmnet_shared_info, > > > + peer, > > "vmnet-shared", name); > > > + if (!validate_options(netdev, errp)) { > > > + g_assert_not_reached(); > > > > g_assert_not_reached is for debugging purpose and may be dropped > > depending on the build option. > > > > > + } > > > + return vmnet_if_create(nc, build_if_desc(netdev), errp); > > > } > > > diff --git a/net/vmnet_int.h b/net/vmnet_int.h > > > index aac4d5af64..acfe3a88c0 100644 > > > --- a/net/vmnet_int.h > > > +++ b/net/vmnet_int.h > > > @@ -15,11 +15,48 @@ > > > #include "clients.h" > > > > > > #include <vmnet/vmnet.h> > > > +#include <dispatch/dispatch.h> > > > + > > > +/** > > > + * From vmnet.framework documentation > > > + * > > > + * Each read/write call allows up to 200 packets to be > > > + * read or written for a maximum of 256KB. > > > + * > > > + * Each packet written should be a complete > > > + * ethernet frame. > > > + * > > > + * https://developer.apple.com/documentation/vmnet > > <https://developer.apple.com/documentation/vmnet> > > > + */ > > > +#define VMNET_PACKETS_LIMIT 200 > > > > > > typedef struct VmnetCommonState { > > > - NetClientState nc; > > > + NetClientState nc; > > > + interface_ref vmnet_if; > > > + > > > + bool send_scheduled; > > > > > > + uint64_t mtu; > > > + uint64_t max_packet_size; > > > + > > > + struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT]; > > > + struct iovec iov_buf[VMNET_PACKETS_LIMIT]; > > > + > > > + dispatch_queue_t if_queue; > > > + > > > + QEMUBH *send_bh; > > > } VmnetCommonState; > > > > > > +const char *vmnet_status_map_str(vmnet_return_t status); > > > + > > > +int vmnet_if_create(NetClientState *nc, > > > + xpc_object_t if_desc, > > > + Error **errp); > > > + > > > +ssize_t vmnet_receive_common(NetClientState *nc, > > > + const uint8_t *buf, > > > + size_t size); > > > + > > > +void vmnet_cleanup_common(NetClientState *nc); > > > > > > #endif /* VMNET_INT_H */ > > > > > > Other issues will be fixed and submitted later, > > thank you! > > > > Regards, > > Vladislav Yaroshchuk > > Regards, Vladislav Yaroshchuk
On Sat, Feb 26, 2022 at 8:33 PM Vladislav Yaroshchuk <vladislav.yaroshchuk@jetbrains.com> wrote: > > > > On Sat, Feb 26, 2022 at 12:16 PM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: >> >> On 2022/02/26 17:37, Vladislav Yaroshchuk wrote: >> > >> > Hi Akihiko, >> > >> > On Fri, Feb 25, 2022 at 8:46 PM Akihiko Odaki <akihiko.odaki@gmail.com >> > <mailto:akihiko.odaki@gmail.com>> wrote: >> > >> > On 2022/02/26 2:13, Vladislav Yaroshchuk wrote: >> > > Interaction with vmnet.framework in different modes >> > > differs only on configuration stage, so we can create >> > > common `send`, `receive`, etc. procedures and reuse them. >> > > >> > > vmnet.framework supports iov, but writing more than >> > > one iov into vmnet interface fails with >> > > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into >> > > one and passing it to vmnet works fine. That's the >> > > reason why receive_iov() left unimplemented. But it still >> > > works with good enough performance having .receive() >> > > net/vmnet: implement shared mode (vmnet-shared) >> > > >> > > Interaction with vmnet.framework in different modes >> > > differs only on configuration stage, so we can create >> > > common `send`, `receive`, etc. procedures and reuse them. >> > > >> > > vmnet.framework supports iov, but writing more than >> > > one iov into vmnet interface fails with >> > > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into >> > > one and passing it to vmnet works fine. That's the >> > > reason why receive_iov() left unimplemented. But it still >> > > works with good enough performance having .receive() >> > > implemented only. >> > > >> > > Also, there is no way to unsubscribe from vmnet packages >> > > receiving except registering and unregistering event >> > > callback or simply drop packages just ignoring and >> > > not processing them when related flag is set. Here we do >> > > using the second way. >> > > >> > > Signed-off-by: Phillip Tennen <phillip@axleos.com >> > <mailto:phillip@axleos.com>> >> > > Signed-off-by: Vladislav Yaroshchuk >> > <Vladislav.Yaroshchuk@jetbrains.com >> > <mailto:Vladislav.Yaroshchuk@jetbrains.com>> >> > >> > Thank you for persistently working on this. >> > >> > > --- >> > > net/vmnet-common.m | 302 >> > +++++++++++++++++++++++++++++++++++++++++++++ >> > > net/vmnet-shared.c | 94 +++++++++++++- >> > > net/vmnet_int.h | 39 +++++- >> > > 3 files changed, 430 insertions(+), 5 deletions(-) >> > > >> > > diff --git a/net/vmnet-common.m b/net/vmnet-common.m >> > > index 56612c72ce..2f70921cae 100644 >> > > --- a/net/vmnet-common.m >> > > +++ b/net/vmnet-common.m >> > > @@ -10,6 +10,8 @@ >> > > */ >> > > >> > > #include "qemu/osdep.h" >> > > +#include "qemu/main-loop.h" >> > > +#include "qemu/log.h" >> > > #include "qapi/qapi-types-net.h" >> > > #include "vmnet_int.h" >> > > #include "clients.h" >> > > @@ -17,4 +19,304 @@ >> > > #include "qapi/error.h" >> > > >> > > #include <vmnet/vmnet.h> >> > > +#include <dispatch/dispatch.h> >> > > >> > > + >> > > +static inline void vmnet_set_send_bh_scheduled(VmnetCommonState *s, >> > > + bool enable) >> > > +{ >> > > + qatomic_set(&s->send_scheduled, enable); >> > > +} >> > > + >> > > + >> > > +static inline bool vmnet_is_send_bh_scheduled(VmnetCommonState *s) >> > > +{ >> > > + return qatomic_load_acquire(&s->send_scheduled); >> > > +} >> > > + >> > > + >> > > +static inline void vmnet_set_send_enabled(VmnetCommonState *s, >> > > + bool enable) >> > > +{ >> > > + if (enable) { >> > > + vmnet_interface_set_event_callback( >> > > + s->vmnet_if, >> > > + VMNET_INTERFACE_PACKETS_AVAILABLE, >> > > + s->if_queue, >> > > + ^(interface_event_t event_id, xpc_object_t event) { >> > > + assert(event_id == >> > VMNET_INTERFACE_PACKETS_AVAILABLE); >> > > + /* >> > > + * This function is being called from a non qemu >> > thread, so >> > > + * we only schedule a BH, and do the rest of the >> > io completion >> > > + * handling from vmnet_send_bh() which runs in a >> > qemu context. >> > > + * >> > > + * Avoid scheduling multiple bottom halves >> > > + */ >> > > + if (!vmnet_is_send_bh_scheduled(s)) { >> > > + vmnet_set_send_bh_scheduled(s, true); >> > >> > It can be interrupted between vmnet_is_send_bh_scheduled and >> > vmnet_set_send_bh_scheduled, which leads to data race. >> > >> > >> > Sorry, I did not clearly understand what you meant. Since this >> > callback (block) is submitted on DISPATCH_QUEUE_SERIAL, >> > only one instance of the callback will be executed at any >> > moment of time. >> > https://developer.apple.com/documentation/dispatch/dispatch_queue_serial >> > <https://developer.apple.com/documentation/dispatch/dispatch_queue_serial> >> > >> > Also this is the only place where we schedule a bottom half. >> > >> > After we set the 'send_scheduled' flag, all the other >> > callback blocks will do nothing (skip the if block) until >> > the bottom half is executed and reset 'send_scheduled'. >> > I don't see any races here :( >> > >> > Correct me if I'm wrong please. >> >> My explanation that the interruption between vmnet_is_send_bh_scheduled >> and vmnet_set_send_bh_scheduled is problematic was actually misleading. >> >> The problem is that vmnet_send_bh resets 'send_scheduled' after calling >> vmnet_read. If we got another packet after vmnet_read, it would be >> silently ignored. >> >> By the way, I forgot to mention another problem: vmnet_send_completed >> calls vmnet_set_send_enabled, but the other packets in the buffer may >> not be sent yet. Also, unregistering callbacks in vmnet_set_send_enabled >> is probably not enough to stop the callback being fired since some >> dispatch blocks are already in the dispatch queue. >> > > Now I understand what you mean, thanks. > What do you think about the workaround? For me > it is quite difficult question how to synchronize qemu with > vmnet thread, especially with completion callback. You may add a new field to represent the number of packets being sent in the buffer. The field must be maintained by vmnet_send_bh and vmnet_send_completed, which are on the iothread. vmnet_send_bh should do nothing if the field is greater than 0 at the beginning of the function. vmnet_send_completed should call qemu_bh_schedule(s->send_bh). vmnet_set_send_enabled and send_scheduled can be simply removed. qemu_bh_schedule ensures there is no duplicate scheduling. Regards, Akihiko Odaki > > >> >> Regards, >> Akihiko Odaki >> >> > >> > > + qemu_bh_schedule(s->send_bh); >> > > + } >> > > + }); >> > > + } else { >> > > + vmnet_interface_set_event_callback( >> > > + s->vmnet_if, >> > > + VMNET_INTERFACE_PACKETS_AVAILABLE, >> > > + NULL, >> > > + NULL); >> > > + } >> > > +} >> > > + >> > > + >> > > +static void vmnet_send_completed(NetClientState *nc, ssize_t len) >> > > +{ >> > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); >> > > + vmnet_set_send_enabled(s, true); >> > > +} >> > > + >> > > + >> > > +static void vmnet_send_bh(void *opaque) >> > > +{ >> > > + NetClientState *nc = (NetClientState *) opaque; >> > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); >> > > + >> > > + struct iovec *iov = s->iov_buf; >> > > + struct vmpktdesc *packets = s->packets_buf; >> > > + int pkt_cnt; >> > > + int i; >> > > + >> > > + vmnet_return_t status; >> > > + ssize_t size; >> > > + >> > > + /* read as many packets as present */ >> > > + pkt_cnt = VMNET_PACKETS_LIMIT; >> > >> > There could be more than VMNET_PACKETS_LIMIT. You may call >> > vmnet_read in >> > a loop. >> > >> > > + for (i = 0; i < pkt_cnt; ++i) { >> > > + packets[i].vm_pkt_size = s->max_packet_size; >> > > + packets[i].vm_pkt_iovcnt = 1; >> > > + packets[i].vm_flags = 0; >> > > + } >> > > + >> > > + status = vmnet_read(s->vmnet_if, packets, &pkt_cnt); >> > > + if (status != VMNET_SUCCESS) { >> > > + error_printf("vmnet: read failed: %s\n", >> > > + vmnet_status_map_str(status)); >> > > + goto done; >> > > + } >> > > + >> > > + for (i = 0; i < pkt_cnt; ++i) { >> > > + size = qemu_send_packet_async(nc, >> > > + iov[i].iov_base, >> > > + packets[i].vm_pkt_size, >> > > + vmnet_send_completed); >> > > + if (size == 0) { >> > > + vmnet_set_send_enabled(s, false); >> > > + goto done; >> > > + } else if (size < 0) { >> > > + break; >> > > + } >> > >> > goto is not needed here. "break" when size <= 0. >> > >> > > + } >> > > + >> > > +done: >> > > + vmnet_set_send_bh_scheduled(s, false); >> > > +} >> > > + >> > > + >> > > +static void vmnet_bufs_init(VmnetCommonState *s) >> > > +{ >> > > + struct vmpktdesc *packets = s->packets_buf; >> > > + struct iovec *iov = s->iov_buf; >> > > + int i; >> > > + >> > > + for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) { >> > > + iov[i].iov_len = s->max_packet_size; >> > > + iov[i].iov_base = g_malloc0(iov[i].iov_len); >> > > + packets[i].vm_pkt_iov = iov + i; >> > > + } >> > > +} >> > > + >> > > + >> > > +const char *vmnet_status_map_str(vmnet_return_t status) >> > > +{ >> > > + switch (status) { >> > > + case VMNET_SUCCESS: >> > > + return "success"; >> > > + case VMNET_FAILURE: >> > > + return "general failure (possibly not enough privileges)"; >> > > + case VMNET_MEM_FAILURE: >> > > + return "memory allocation failure"; >> > > + case VMNET_INVALID_ARGUMENT: >> > > + return "invalid argument specified"; >> > > + case VMNET_SETUP_INCOMPLETE: >> > > + return "interface setup is not complete"; >> > > + case VMNET_INVALID_ACCESS: >> > > + return "invalid access, permission denied"; >> > > + case VMNET_PACKET_TOO_BIG: >> > > + return "packet size is larger than MTU"; >> > > + case VMNET_BUFFER_EXHAUSTED: >> > > + return "buffers exhausted in kernel"; >> > > + case VMNET_TOO_MANY_PACKETS: >> > > + return "packet count exceeds limit"; >> > > +#if defined(MAC_OS_VERSION_11_0) && \ >> > > + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 >> > > + case VMNET_SHARING_SERVICE_BUSY: >> > > + return "conflict, sharing service is in use"; >> > > +#endif >> > > + default: >> > > + return "unknown vmnet error"; >> > > + } >> > > +} >> > > + >> > > + >> > > +int vmnet_if_create(NetClientState *nc, >> > > + xpc_object_t if_desc, >> > > + Error **errp) >> > > +{ >> > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);; >> > >> > Duplicate semicolons. >> > >> > > + dispatch_semaphore_t if_created_sem = >> > dispatch_semaphore_create(0); >> > >> > if_created_sem leaks. >> > >> > > + __block vmnet_return_t if_status; >> > > + >> > > + s->if_queue = dispatch_queue_create( >> > > + "org.qemu.vmnet.if_queue", >> > > + DISPATCH_QUEUE_SERIAL >> > > + ); >> > > + >> > > + xpc_dictionary_set_bool( >> > > + if_desc, >> > > + vmnet_allocate_mac_address_key, >> > > + false >> > > + ); >> > > +#ifdef DEBUG >> > > + qemu_log("vmnet.start.interface_desc:\n"); >> > > + xpc_dictionary_apply(if_desc, >> > > + ^bool(const char *k, xpc_object_t v) { >> > > + char *desc = xpc_copy_description(v); >> > > + qemu_log(" %s=%s\n", k, desc); >> > > + free(desc); >> > > + return true; >> > > + }); >> > > +#endif /* DEBUG */ >> > > + >> > > + s->vmnet_if = vmnet_start_interface( >> > > + if_desc, >> > > + s->if_queue, >> > > + ^(vmnet_return_t status, xpc_object_t interface_param) { >> > > + if_status = status; >> > > + if (status != VMNET_SUCCESS || !interface_param) { >> > > + dispatch_semaphore_signal(if_created_sem); >> > > + return; >> > > + } >> > > + >> > > +#ifdef DEBUG >> > > + qemu_log("vmnet.start.interface_param:\n"); >> > > + xpc_dictionary_apply(interface_param, >> > > + ^bool(const char *k, >> > xpc_object_t v) { >> > > + char *desc = >> > xpc_copy_description(v); >> > > + qemu_log(" %s=%s\n", k, desc); >> > > + free(desc); >> > > + return true; >> > > + }); >> > > +#endif /* DEBUG */ >> > > + >> > > + s->mtu = xpc_dictionary_get_uint64( >> > > + interface_param, >> > > + vmnet_mtu_key); >> > > + s->max_packet_size = xpc_dictionary_get_uint64( >> > > + interface_param, >> > > + vmnet_max_packet_size_key); >> > > + >> > > + dispatch_semaphore_signal(if_created_sem); >> > > + }); >> > > + >> > > + if (s->vmnet_if == NULL) { >> > > + error_setg(errp, >> > > + "unable to create interface " >> > > + "with requested params"); >> > >> > You don't need line breaks here. Breaking a string into a few would >> > also >> > makes it a bit hard to grep. >> > >> > > + return -1; >> > >> > s->if_queue leaks. >> > >> > > + } >> > > + >> > > + dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER); >> > > + >> > > + if (if_status != VMNET_SUCCESS) { >> > > + error_setg(errp, >> > > + "cannot create vmnet interface: %s", >> > > + vmnet_status_map_str(if_status)); >> > > + return -1; >> > > + } >> > > + >> > > + s->send_bh = aio_bh_new(qemu_get_aio_context(), >> > vmnet_send_bh, nc); >> > > + vmnet_bufs_init(s); >> > > + vmnet_set_send_bh_scheduled(s, false); >> > > + vmnet_set_send_enabled(s, true); >> > > + return 0; >> > > +} >> > > + >> > > + >> > > +ssize_t vmnet_receive_common(NetClientState *nc, >> > > + const uint8_t *buf, >> > > + size_t size) >> > > +{ >> > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); >> > > + struct vmpktdesc packet; >> > > + struct iovec iov; >> > > + int pkt_cnt; >> > > + vmnet_return_t if_status; >> > > + >> > > + if (size > s->max_packet_size) { >> > > + warn_report("vmnet: packet is too big, %zu > %llu\n", >> > >> > Use PRIu64. >> > >> > > + packet.vm_pkt_size, >> > > + s->max_packet_size); >> > > + return -1; >> > > + } >> > > + >> > > + iov.iov_base = (char *) buf; >> > > + iov.iov_len = size; >> > > + >> > > + packet.vm_pkt_iovcnt = 1; >> > > + packet.vm_flags = 0; >> > > + packet.vm_pkt_size = size; >> > > + packet.vm_pkt_iov = &iov; >> > > + pkt_cnt = 1; >> > > + >> > > + if_status = vmnet_write(s->vmnet_if, &packet, &pkt_cnt); >> > > + if (if_status != VMNET_SUCCESS) { >> > > + error_report("vmnet: write error: %s\n", >> > > + vmnet_status_map_str(if_status)); >> > >> > Why don't return -1? >> > >> > > + } >> > > + >> > > + if (if_status == VMNET_SUCCESS && pkt_cnt) { >> > > + return size; >> > > + } >> > > + return 0; >> > > +} >> > > + >> > > + >> > > +void vmnet_cleanup_common(NetClientState *nc) >> > > +{ >> > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);; >> > >> > Duplicate semicolons. >> > >> > > + dispatch_semaphore_t if_created_sem; >> > > + >> > > + qemu_purge_queued_packets(nc); > + >> > vmnet_set_send_bh_scheduled(s, true); >> > > + vmnet_set_send_enabled(s, false); >> > > + >> > > + if (s->vmnet_if == NULL) { >> > > + return; >> > > + } >> > > + >> > > + if_created_sem = dispatch_semaphore_create(0); >> > > + vmnet_stop_interface( >> > > + s->vmnet_if, >> > > + s->if_queue, >> > > + ^(vmnet_return_t status) { >> > > + assert(status == VMNET_SUCCESS); >> > > + dispatch_semaphore_signal(if_created_sem); >> > > + }); >> > > + dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER); >> > > + >> > > + qemu_bh_delete(s->send_bh); >> > > + dispatch_release(if_created_sem); >> > > + dispatch_release(s->if_queue); >> > > + >> > > + for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) { >> > > + g_free(s->iov_buf[i].iov_base); >> > > + } >> > > +} >> > > diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c >> > > index f07afaaf21..66f66c034b 100644 >> > > --- a/net/vmnet-shared.c >> > > +++ b/net/vmnet-shared.c >> > > @@ -10,16 +10,102 @@ >> > > >> > > #include "qemu/osdep.h" >> > > #include "qapi/qapi-types-net.h" >> > > +#include "qapi/error.h" >> > > #include "vmnet_int.h" >> > > #include "clients.h" >> > > -#include "qemu/error-report.h" >> > > -#include "qapi/error.h" >> > > >> > > #include <vmnet/vmnet.h> >> > > >> > > +typedef struct VmnetSharedState { >> > > + VmnetCommonState cs; >> > > +} VmnetSharedState; >> > > + >> > > + >> > > +static bool validate_options(const Netdev *netdev, Error **errp) >> > > +{ >> > > + const NetdevVmnetSharedOptions *options = >> > &(netdev->u.vmnet_shared); >> > > + >> > > +#if !defined(MAC_OS_VERSION_11_0) || \ >> > > + MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0 >> > > + if (options->has_isolated) { >> > > + error_setg(errp, >> > > + "vmnet-shared.isolated feature is " >> > > + "unavailable: outdated vmnet.framework API"); >> > > + return false; >> > > + } >> > > +#endif >> > > + >> > > + if ((options->has_start_address || >> > > + options->has_end_address || >> > > + options->has_subnet_mask) && >> > > + !(options->has_start_address && >> > > + options->has_end_address && >> > > + options->has_subnet_mask)) { >> > > + error_setg(errp, >> > > + "'start-address', 'end-address', 'subnet-mask' " >> > > + "should be provided together" >> > > + ); >> > > + return false; >> > > + } >> > > + >> > > + return true; >> > > +} >> > > + >> > > +static xpc_object_t build_if_desc(const Netdev *netdev) >> > > +{ >> > > + const NetdevVmnetSharedOptions *options = >> > &(netdev->u.vmnet_shared); >> > > + xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, 0); >> > > + >> > > + xpc_dictionary_set_uint64( >> > > + if_desc, >> > > + vmnet_operation_mode_key, >> > > + VMNET_SHARED_MODE >> > > + ); >> > > + >> > > + if (options->has_nat66_prefix) { >> > > + xpc_dictionary_set_string(if_desc, >> > > + vmnet_nat66_prefix_key, >> > > + options->nat66_prefix); >> > > + } >> > > + >> > > + if (options->has_start_address) { >> > > + xpc_dictionary_set_string(if_desc, >> > > + vmnet_start_address_key, >> > > + options->start_address); >> > > + xpc_dictionary_set_string(if_desc, >> > > + vmnet_end_address_key, >> > > + options->end_address); >> > > + xpc_dictionary_set_string(if_desc, >> > > + vmnet_subnet_mask_key, >> > > + options->subnet_mask); >> > > + } >> > > + >> > > +#if defined(MAC_OS_VERSION_11_0) && \ >> > > + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 >> > > + xpc_dictionary_set_bool( >> > > + if_desc, >> > > + vmnet_enable_isolation_key, >> > > + options->isolated >> > > + ); >> > > +#endif >> > > + >> > > + return if_desc; >> > > +} >> > > + >> > > +static NetClientInfo net_vmnet_shared_info = { >> > > + .type = NET_CLIENT_DRIVER_VMNET_SHARED, >> > > + .size = sizeof(VmnetSharedState), >> > > + .receive = vmnet_receive_common, >> > > + .cleanup = vmnet_cleanup_common, >> > > +}; >> > > + >> > > int net_init_vmnet_shared(const Netdev *netdev, const char *name, >> > > NetClientState *peer, Error **errp) >> > > { >> > > - error_setg(errp, "vmnet-shared is not implemented yet"); >> > > - return -1; >> > > + NetClientState *nc = qemu_new_net_client(&net_vmnet_shared_info, >> > > + peer, >> > "vmnet-shared", name); >> > > + if (!validate_options(netdev, errp)) { >> > > + g_assert_not_reached(); >> > >> > g_assert_not_reached is for debugging purpose and may be dropped >> > depending on the build option. >> > >> > > + } >> > > + return vmnet_if_create(nc, build_if_desc(netdev), errp); >> > > } >> > > diff --git a/net/vmnet_int.h b/net/vmnet_int.h >> > > index aac4d5af64..acfe3a88c0 100644 >> > > --- a/net/vmnet_int.h >> > > +++ b/net/vmnet_int.h >> > > @@ -15,11 +15,48 @@ >> > > #include "clients.h" >> > > >> > > #include <vmnet/vmnet.h> >> > > +#include <dispatch/dispatch.h> >> > > + >> > > +/** >> > > + * From vmnet.framework documentation >> > > + * >> > > + * Each read/write call allows up to 200 packets to be >> > > + * read or written for a maximum of 256KB. >> > > + * >> > > + * Each packet written should be a complete >> > > + * ethernet frame. >> > > + * >> > > + * https://developer.apple.com/documentation/vmnet >> > <https://developer.apple.com/documentation/vmnet> >> > > + */ >> > > +#define VMNET_PACKETS_LIMIT 200 >> > > >> > > typedef struct VmnetCommonState { >> > > - NetClientState nc; >> > > + NetClientState nc; >> > > + interface_ref vmnet_if; >> > > + >> > > + bool send_scheduled; >> > > >> > > + uint64_t mtu; >> > > + uint64_t max_packet_size; >> > > + >> > > + struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT]; >> > > + struct iovec iov_buf[VMNET_PACKETS_LIMIT]; >> > > + >> > > + dispatch_queue_t if_queue; >> > > + >> > > + QEMUBH *send_bh; >> > > } VmnetCommonState; >> > > >> > > +const char *vmnet_status_map_str(vmnet_return_t status); >> > > + >> > > +int vmnet_if_create(NetClientState *nc, >> > > + xpc_object_t if_desc, >> > > + Error **errp); >> > > + >> > > +ssize_t vmnet_receive_common(NetClientState *nc, >> > > + const uint8_t *buf, >> > > + size_t size); >> > > + >> > > +void vmnet_cleanup_common(NetClientState *nc); >> > > >> > > #endif /* VMNET_INT_H */ >> > >> > >> > Other issues will be fixed and submitted later, >> > thank you! >> > >> > Regards, >> > Vladislav Yaroshchuk >> > > Regards, > Vladislav Yaroshchuk
On Sat, Feb 26, 2022 at 3:27 PM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: > On Sat, Feb 26, 2022 at 8:33 PM Vladislav Yaroshchuk > <vladislav.yaroshchuk@jetbrains.com> wrote: > > > > > > > > On Sat, Feb 26, 2022 at 12:16 PM Akihiko Odaki <akihiko.odaki@gmail.com> > wrote: > >> > >> On 2022/02/26 17:37, Vladislav Yaroshchuk wrote: > >> > > >> > Hi Akihiko, > >> > > >> > On Fri, Feb 25, 2022 at 8:46 PM Akihiko Odaki < > akihiko.odaki@gmail.com > >> > <mailto:akihiko.odaki@gmail.com>> wrote: > >> > > >> > On 2022/02/26 2:13, Vladislav Yaroshchuk wrote: > >> > > Interaction with vmnet.framework in different modes > >> > > differs only on configuration stage, so we can create > >> > > common `send`, `receive`, etc. procedures and reuse them. > >> > > > >> > > vmnet.framework supports iov, but writing more than > >> > > one iov into vmnet interface fails with > >> > > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into > >> > > one and passing it to vmnet works fine. That's the > >> > > reason why receive_iov() left unimplemented. But it still > >> > > works with good enough performance having .receive() > >> > > net/vmnet: implement shared mode (vmnet-shared) > >> > > > >> > > Interaction with vmnet.framework in different modes > >> > > differs only on configuration stage, so we can create > >> > > common `send`, `receive`, etc. procedures and reuse them. > >> > > > >> > > vmnet.framework supports iov, but writing more than > >> > > one iov into vmnet interface fails with > >> > > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into > >> > > one and passing it to vmnet works fine. That's the > >> > > reason why receive_iov() left unimplemented. But it still > >> > > works with good enough performance having .receive() > >> > > implemented only. > >> > > > >> > > Also, there is no way to unsubscribe from vmnet packages > >> > > receiving except registering and unregistering event > >> > > callback or simply drop packages just ignoring and > >> > > not processing them when related flag is set. Here we do > >> > > using the second way. > >> > > > >> > > Signed-off-by: Phillip Tennen <phillip@axleos.com > >> > <mailto:phillip@axleos.com>> > >> > > Signed-off-by: Vladislav Yaroshchuk > >> > <Vladislav.Yaroshchuk@jetbrains.com > >> > <mailto:Vladislav.Yaroshchuk@jetbrains.com>> > >> > > >> > Thank you for persistently working on this. > >> > > >> > > --- > >> > > net/vmnet-common.m | 302 > >> > +++++++++++++++++++++++++++++++++++++++++++++ > >> > > net/vmnet-shared.c | 94 +++++++++++++- > >> > > net/vmnet_int.h | 39 +++++- > >> > > 3 files changed, 430 insertions(+), 5 deletions(-) > >> > > > >> > > diff --git a/net/vmnet-common.m b/net/vmnet-common.m > >> > > index 56612c72ce..2f70921cae 100644 > >> > > --- a/net/vmnet-common.m > >> > > +++ b/net/vmnet-common.m > >> > > @@ -10,6 +10,8 @@ > >> > > */ > >> > > > >> > > #include "qemu/osdep.h" > >> > > +#include "qemu/main-loop.h" > >> > > +#include "qemu/log.h" > >> > > #include "qapi/qapi-types-net.h" > >> > > #include "vmnet_int.h" > >> > > #include "clients.h" > >> > > @@ -17,4 +19,304 @@ > >> > > #include "qapi/error.h" > >> > > > >> > > #include <vmnet/vmnet.h> > >> > > +#include <dispatch/dispatch.h> > >> > > > >> > > + > >> > > +static inline void > vmnet_set_send_bh_scheduled(VmnetCommonState *s, > >> > > + bool enable) > >> > > +{ > >> > > + qatomic_set(&s->send_scheduled, enable); > >> > > +} > >> > > + > >> > > + > >> > > +static inline bool > vmnet_is_send_bh_scheduled(VmnetCommonState *s) > >> > > +{ > >> > > + return qatomic_load_acquire(&s->send_scheduled); > >> > > +} > >> > > + > >> > > + > >> > > +static inline void vmnet_set_send_enabled(VmnetCommonState *s, > >> > > + bool enable) > >> > > +{ > >> > > + if (enable) { > >> > > + vmnet_interface_set_event_callback( > >> > > + s->vmnet_if, > >> > > + VMNET_INTERFACE_PACKETS_AVAILABLE, > >> > > + s->if_queue, > >> > > + ^(interface_event_t event_id, xpc_object_t event) > { > >> > > + assert(event_id == > >> > VMNET_INTERFACE_PACKETS_AVAILABLE); > >> > > + /* > >> > > + * This function is being called from a non > qemu > >> > thread, so > >> > > + * we only schedule a BH, and do the rest of > the > >> > io completion > >> > > + * handling from vmnet_send_bh() which runs > in a > >> > qemu context. > >> > > + * > >> > > + * Avoid scheduling multiple bottom halves > >> > > + */ > >> > > + if (!vmnet_is_send_bh_scheduled(s)) { > >> > > + vmnet_set_send_bh_scheduled(s, true); > >> > > >> > It can be interrupted between vmnet_is_send_bh_scheduled and > >> > vmnet_set_send_bh_scheduled, which leads to data race. > >> > > >> > > >> > Sorry, I did not clearly understand what you meant. Since this > >> > callback (block) is submitted on DISPATCH_QUEUE_SERIAL, > >> > only one instance of the callback will be executed at any > >> > moment of time. > >> > > https://developer.apple.com/documentation/dispatch/dispatch_queue_serial > >> > < > https://developer.apple.com/documentation/dispatch/dispatch_queue_serial> > >> > > >> > Also this is the only place where we schedule a bottom half. > >> > > >> > After we set the 'send_scheduled' flag, all the other > >> > callback blocks will do nothing (skip the if block) until > >> > the bottom half is executed and reset 'send_scheduled'. > >> > I don't see any races here :( > >> > > >> > Correct me if I'm wrong please. > >> > >> My explanation that the interruption between vmnet_is_send_bh_scheduled > >> and vmnet_set_send_bh_scheduled is problematic was actually misleading. > >> > >> The problem is that vmnet_send_bh resets 'send_scheduled' after calling > >> vmnet_read. If we got another packet after vmnet_read, it would be > >> silently ignored. > >> > >> By the way, I forgot to mention another problem: vmnet_send_completed > >> calls vmnet_set_send_enabled, but the other packets in the buffer may > >> not be sent yet. Also, unregistering callbacks in vmnet_set_send_enabled > >> is probably not enough to stop the callback being fired since some > >> dispatch blocks are already in the dispatch queue. > >> > > > > Now I understand what you mean, thanks. > > What do you think about the workaround? For me > > it is quite difficult question how to synchronize qemu with > > vmnet thread, especially with completion callback. > > You may add a new field to represent the number of packets being sent > in the buffer. The field must be maintained by vmnet_send_bh and > vmnet_send_completed, which are on the iothread. vmnet_send_bh should > do nothing if the field is greater than 0 at the beginning of the > function. vmnet_send_completed should call > qemu_bh_schedule(s->send_bh). > > Thank you for the idea! Sounds meaningful to schedule a bottom half in vmnet thread and do the rest of things in iothread with no concurrency. Not sure that only one field is enough, cause we may have two states on bh execution start: 1. There are packets in vmnet buffer s->packets_buf that were rejected by qemu_send_async and waiting to be sent. If this happens, we should complete sending these waiting packets with qemu_send_async firstly, and after that we should call vmnet_read to get new ones and send them to QEMU; 2. There are no packets in s->packets_buf to be sent to qemu, we only need to get new packets from vmnet with vmnet_read and send them to QEMU It can be done kinda this way: ``` struct s: send_bh: bh packets_buf: array[packet] ## Three new fields send_enabled: bool packets_send_pos: int packets_batch_size: int fun bh_send(s): ## Send disabled - do nothing if not s->send_enabled: return ## If some packets are waiting to be sent in ## s->packets_buf - send them if s->packets_send_pos < s->packets_batch_size: if not qemu_send_wrapper(s): return ## Try to read new packets from vmnet s->packets_send_pos = 0 s->packets_batch_size = 0 s->packets_buf = vmnet_read(&s->packets_batch_size) if s->packets_batch_size > 0: # If something received - process them qemu_send_wrapper(s) fun qemu_send_wrapper(s): for i in s->packets_send_pos to s->packets_batch_size: size = qemu_send_async(s->packets_buf[i], vmnet_send_completed) if size == 0: ## Disable packets processing until vmnet_send_completed ## Save the state: packets to be sent now in s->packets_buf ## in range s->packets_send_pos..s->packets_batch_size s->send_enabled = false s->packets_send_pos = i + 1 break if size < 0: ## On error just drop the packets s->packets_send_pos = 0 s->packets_batch_size = 0 break return s->send_enabled fun vmnet_send_completed(s): ## Complete sending packets from s->packets_buf s->send_enabled = true qemu_bh_schedule(s->send_bh) ## From callback we only scheduling the bh vmnet.set_callback(callback = s: qemu_bh_schedule(s->send_bh)) ``` I think a simpler implementation may exist, but currently I see only this approach with the send_enabled flag and two more fields to save packets sending state. vmnet_set_send_enabled and send_scheduled can be simply removed. > qemu_bh_schedule ensures there is no duplicate scheduling. > > Yep, my mistake. Previously I used schedule_oneshot which creates a new bh for each call which causes duplicate scheduling. Regards, > Akihiko Odaki > > > > > >> > >> Regards, > >> Akihiko Odaki > >> > >> > > >> > > + qemu_bh_schedule(s->send_bh); > >> > > + } > >> > > + }); > >> > > + } else { > >> > > + vmnet_interface_set_event_callback( > >> > > + s->vmnet_if, > >> > > + VMNET_INTERFACE_PACKETS_AVAILABLE, > >> > > + NULL, > >> > > + NULL); > >> > > + } > >> > > +} > >> > > + > >> > > + > >> > > +static void vmnet_send_completed(NetClientState *nc, ssize_t > len) > >> > > +{ > >> > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); > >> > > + vmnet_set_send_enabled(s, true); > >> > > +} > >> > > + > >> > > + > >> > > +static void vmnet_send_bh(void *opaque) > >> > > +{ > >> > > + NetClientState *nc = (NetClientState *) opaque; > >> > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); > >> > > + > >> > > + struct iovec *iov = s->iov_buf; > >> > > + struct vmpktdesc *packets = s->packets_buf; > >> > > + int pkt_cnt; > >> > > + int i; > >> > > + > >> > > + vmnet_return_t status; > >> > > + ssize_t size; > >> > > + > >> > > + /* read as many packets as present */ > >> > > + pkt_cnt = VMNET_PACKETS_LIMIT; > >> > > >> > There could be more than VMNET_PACKETS_LIMIT. You may call > >> > vmnet_read in > >> > a loop. > >> > > >> > > + for (i = 0; i < pkt_cnt; ++i) { > >> > > + packets[i].vm_pkt_size = s->max_packet_size; > >> > > + packets[i].vm_pkt_iovcnt = 1; > >> > > + packets[i].vm_flags = 0; > >> > > + } > >> > > + > >> > > + status = vmnet_read(s->vmnet_if, packets, &pkt_cnt); > >> > > + if (status != VMNET_SUCCESS) { > >> > > + error_printf("vmnet: read failed: %s\n", > >> > > + vmnet_status_map_str(status)); > >> > > + goto done; > >> > > + } > >> > > + > >> > > + for (i = 0; i < pkt_cnt; ++i) { > >> > > + size = qemu_send_packet_async(nc, > >> > > + iov[i].iov_base, > >> > > + packets[i].vm_pkt_size, > >> > > + vmnet_send_completed); > >> > > + if (size == 0) { > >> > > + vmnet_set_send_enabled(s, false); > >> > > + goto done; > >> > > + } else if (size < 0) { > >> > > + break; > >> > > + } > >> > > >> > goto is not needed here. "break" when size <= 0. > >> > > >> > > + } > >> > > + > >> > > +done: > >> > > + vmnet_set_send_bh_scheduled(s, false); > >> > > +} > >> > > + > >> > > + > >> > > +static void vmnet_bufs_init(VmnetCommonState *s) > >> > > +{ > >> > > + struct vmpktdesc *packets = s->packets_buf; > >> > > + struct iovec *iov = s->iov_buf; > >> > > + int i; > >> > > + > >> > > + for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) { > >> > > + iov[i].iov_len = s->max_packet_size; > >> > > + iov[i].iov_base = g_malloc0(iov[i].iov_len); > >> > > + packets[i].vm_pkt_iov = iov + i; > >> > > + } > >> > > +} > >> > > + > >> > > + > >> > > +const char *vmnet_status_map_str(vmnet_return_t status) > >> > > +{ > >> > > + switch (status) { > >> > > + case VMNET_SUCCESS: > >> > > + return "success"; > >> > > + case VMNET_FAILURE: > >> > > + return "general failure (possibly not enough > privileges)"; > >> > > + case VMNET_MEM_FAILURE: > >> > > + return "memory allocation failure"; > >> > > + case VMNET_INVALID_ARGUMENT: > >> > > + return "invalid argument specified"; > >> > > + case VMNET_SETUP_INCOMPLETE: > >> > > + return "interface setup is not complete"; > >> > > + case VMNET_INVALID_ACCESS: > >> > > + return "invalid access, permission denied"; > >> > > + case VMNET_PACKET_TOO_BIG: > >> > > + return "packet size is larger than MTU"; > >> > > + case VMNET_BUFFER_EXHAUSTED: > >> > > + return "buffers exhausted in kernel"; > >> > > + case VMNET_TOO_MANY_PACKETS: > >> > > + return "packet count exceeds limit"; > >> > > +#if defined(MAC_OS_VERSION_11_0) && \ > >> > > + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 > >> > > + case VMNET_SHARING_SERVICE_BUSY: > >> > > + return "conflict, sharing service is in use"; > >> > > +#endif > >> > > + default: > >> > > + return "unknown vmnet error"; > >> > > + } > >> > > +} > >> > > + > >> > > + > >> > > +int vmnet_if_create(NetClientState *nc, > >> > > + xpc_object_t if_desc, > >> > > + Error **errp) > >> > > +{ > >> > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, > nc);; > >> > > >> > Duplicate semicolons. > >> > > >> > > + dispatch_semaphore_t if_created_sem = > >> > dispatch_semaphore_create(0); > >> > > >> > if_created_sem leaks. > >> > > >> > > + __block vmnet_return_t if_status; > >> > > + > >> > > + s->if_queue = dispatch_queue_create( > >> > > + "org.qemu.vmnet.if_queue", > >> > > + DISPATCH_QUEUE_SERIAL > >> > > + ); > >> > > + > >> > > + xpc_dictionary_set_bool( > >> > > + if_desc, > >> > > + vmnet_allocate_mac_address_key, > >> > > + false > >> > > + ); > >> > > +#ifdef DEBUG > >> > > + qemu_log("vmnet.start.interface_desc:\n"); > >> > > + xpc_dictionary_apply(if_desc, > >> > > + ^bool(const char *k, xpc_object_t v) > { > >> > > + char *desc = > xpc_copy_description(v); > >> > > + qemu_log(" %s=%s\n", k, desc); > >> > > + free(desc); > >> > > + return true; > >> > > + }); > >> > > +#endif /* DEBUG */ > >> > > + > >> > > + s->vmnet_if = vmnet_start_interface( > >> > > + if_desc, > >> > > + s->if_queue, > >> > > + ^(vmnet_return_t status, xpc_object_t > interface_param) { > >> > > + if_status = status; > >> > > + if (status != VMNET_SUCCESS || !interface_param) { > >> > > + dispatch_semaphore_signal(if_created_sem); > >> > > + return; > >> > > + } > >> > > + > >> > > +#ifdef DEBUG > >> > > + qemu_log("vmnet.start.interface_param:\n"); > >> > > + xpc_dictionary_apply(interface_param, > >> > > + ^bool(const char *k, > >> > xpc_object_t v) { > >> > > + char *desc = > >> > xpc_copy_description(v); > >> > > + qemu_log(" %s=%s\n", k, > desc); > >> > > + free(desc); > >> > > + return true; > >> > > + }); > >> > > +#endif /* DEBUG */ > >> > > + > >> > > + s->mtu = xpc_dictionary_get_uint64( > >> > > + interface_param, > >> > > + vmnet_mtu_key); > >> > > + s->max_packet_size = xpc_dictionary_get_uint64( > >> > > + interface_param, > >> > > + vmnet_max_packet_size_key); > >> > > + > >> > > + dispatch_semaphore_signal(if_created_sem); > >> > > + }); > >> > > + > >> > > + if (s->vmnet_if == NULL) { > >> > > + error_setg(errp, > >> > > + "unable to create interface " > >> > > + "with requested params"); > >> > > >> > You don't need line breaks here. Breaking a string into a few > would > >> > also > >> > makes it a bit hard to grep. > >> > > >> > > + return -1; > >> > > >> > s->if_queue leaks. > >> > > >> > > + } > >> > > + > >> > > + dispatch_semaphore_wait(if_created_sem, > DISPATCH_TIME_FOREVER); > >> > > + > >> > > + if (if_status != VMNET_SUCCESS) { > >> > > + error_setg(errp, > >> > > + "cannot create vmnet interface: %s", > >> > > + vmnet_status_map_str(if_status)); > >> > > + return -1; > >> > > + } > >> > > + > >> > > + s->send_bh = aio_bh_new(qemu_get_aio_context(), > >> > vmnet_send_bh, nc); > >> > > + vmnet_bufs_init(s); > >> > > + vmnet_set_send_bh_scheduled(s, false); > >> > > + vmnet_set_send_enabled(s, true); > >> > > + return 0; > >> > > +} > >> > > + > >> > > + > >> > > +ssize_t vmnet_receive_common(NetClientState *nc, > >> > > + const uint8_t *buf, > >> > > + size_t size) > >> > > +{ > >> > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); > >> > > + struct vmpktdesc packet; > >> > > + struct iovec iov; > >> > > + int pkt_cnt; > >> > > + vmnet_return_t if_status; > >> > > + > >> > > + if (size > s->max_packet_size) { > >> > > + warn_report("vmnet: packet is too big, %zu > %llu\n", > >> > > >> > Use PRIu64. > >> > > >> > > + packet.vm_pkt_size, > >> > > + s->max_packet_size); > >> > > + return -1; > >> > > + } > >> > > + > >> > > + iov.iov_base = (char *) buf; > >> > > + iov.iov_len = size; > >> > > + > >> > > + packet.vm_pkt_iovcnt = 1; > >> > > + packet.vm_flags = 0; > >> > > + packet.vm_pkt_size = size; > >> > > + packet.vm_pkt_iov = &iov; > >> > > + pkt_cnt = 1; > >> > > + > >> > > + if_status = vmnet_write(s->vmnet_if, &packet, &pkt_cnt); > >> > > + if (if_status != VMNET_SUCCESS) { > >> > > + error_report("vmnet: write error: %s\n", > >> > > + vmnet_status_map_str(if_status)); > >> > > >> > Why don't return -1? > >> > > >> > > + } > >> > > + > >> > > + if (if_status == VMNET_SUCCESS && pkt_cnt) { > >> > > + return size; > >> > > + } > >> > > + return 0; > >> > > +} > >> > > + > >> > > + > >> > > +void vmnet_cleanup_common(NetClientState *nc) > >> > > +{ > >> > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, > nc);; > >> > > >> > Duplicate semicolons. > >> > > >> > > + dispatch_semaphore_t if_created_sem; > >> > > + > >> > > + qemu_purge_queued_packets(nc); > + > >> > vmnet_set_send_bh_scheduled(s, true); > >> > > + vmnet_set_send_enabled(s, false); > >> > > + > >> > > + if (s->vmnet_if == NULL) { > >> > > + return; > >> > > + } > >> > > + > >> > > + if_created_sem = dispatch_semaphore_create(0); > >> > > + vmnet_stop_interface( > >> > > + s->vmnet_if, > >> > > + s->if_queue, > >> > > + ^(vmnet_return_t status) { > >> > > + assert(status == VMNET_SUCCESS); > >> > > + dispatch_semaphore_signal(if_created_sem); > >> > > + }); > >> > > + dispatch_semaphore_wait(if_created_sem, > DISPATCH_TIME_FOREVER); > >> > > + > >> > > + qemu_bh_delete(s->send_bh); > >> > > + dispatch_release(if_created_sem); > >> > > + dispatch_release(s->if_queue); > >> > > + > >> > > + for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) { > >> > > + g_free(s->iov_buf[i].iov_base); > >> > > + } > >> > > +} > >> > > diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c > >> > > index f07afaaf21..66f66c034b 100644 > >> > > --- a/net/vmnet-shared.c > >> > > +++ b/net/vmnet-shared.c > >> > > @@ -10,16 +10,102 @@ > >> > > > >> > > #include "qemu/osdep.h" > >> > > #include "qapi/qapi-types-net.h" > >> > > +#include "qapi/error.h" > >> > > #include "vmnet_int.h" > >> > > #include "clients.h" > >> > > -#include "qemu/error-report.h" > >> > > -#include "qapi/error.h" > >> > > > >> > > #include <vmnet/vmnet.h> > >> > > > >> > > +typedef struct VmnetSharedState { > >> > > + VmnetCommonState cs; > >> > > +} VmnetSharedState; > >> > > + > >> > > + > >> > > +static bool validate_options(const Netdev *netdev, Error > **errp) > >> > > +{ > >> > > + const NetdevVmnetSharedOptions *options = > >> > &(netdev->u.vmnet_shared); > >> > > + > >> > > +#if !defined(MAC_OS_VERSION_11_0) || \ > >> > > + MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0 > >> > > + if (options->has_isolated) { > >> > > + error_setg(errp, > >> > > + "vmnet-shared.isolated feature is " > >> > > + "unavailable: outdated vmnet.framework > API"); > >> > > + return false; > >> > > + } > >> > > +#endif > >> > > + > >> > > + if ((options->has_start_address || > >> > > + options->has_end_address || > >> > > + options->has_subnet_mask) && > >> > > + !(options->has_start_address && > >> > > + options->has_end_address && > >> > > + options->has_subnet_mask)) { > >> > > + error_setg(errp, > >> > > + "'start-address', 'end-address', > 'subnet-mask' " > >> > > + "should be provided together" > >> > > + ); > >> > > + return false; > >> > > + } > >> > > + > >> > > + return true; > >> > > +} > >> > > + > >> > > +static xpc_object_t build_if_desc(const Netdev *netdev) > >> > > +{ > >> > > + const NetdevVmnetSharedOptions *options = > >> > &(netdev->u.vmnet_shared); > >> > > + xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, > 0); > >> > > + > >> > > + xpc_dictionary_set_uint64( > >> > > + if_desc, > >> > > + vmnet_operation_mode_key, > >> > > + VMNET_SHARED_MODE > >> > > + ); > >> > > + > >> > > + if (options->has_nat66_prefix) { > >> > > + xpc_dictionary_set_string(if_desc, > >> > > + vmnet_nat66_prefix_key, > >> > > + options->nat66_prefix); > >> > > + } > >> > > + > >> > > + if (options->has_start_address) { > >> > > + xpc_dictionary_set_string(if_desc, > >> > > + vmnet_start_address_key, > >> > > + options->start_address); > >> > > + xpc_dictionary_set_string(if_desc, > >> > > + vmnet_end_address_key, > >> > > + options->end_address); > >> > > + xpc_dictionary_set_string(if_desc, > >> > > + vmnet_subnet_mask_key, > >> > > + options->subnet_mask); > >> > > + } > >> > > + > >> > > +#if defined(MAC_OS_VERSION_11_0) && \ > >> > > + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 > >> > > + xpc_dictionary_set_bool( > >> > > + if_desc, > >> > > + vmnet_enable_isolation_key, > >> > > + options->isolated > >> > > + ); > >> > > +#endif > >> > > + > >> > > + return if_desc; > >> > > +} > >> > > + > >> > > +static NetClientInfo net_vmnet_shared_info = { > >> > > + .type = NET_CLIENT_DRIVER_VMNET_SHARED, > >> > > + .size = sizeof(VmnetSharedState), > >> > > + .receive = vmnet_receive_common, > >> > > + .cleanup = vmnet_cleanup_common, > >> > > +}; > >> > > + > >> > > int net_init_vmnet_shared(const Netdev *netdev, const char > *name, > >> > > NetClientState *peer, Error **errp) > >> > > { > >> > > - error_setg(errp, "vmnet-shared is not implemented yet"); > >> > > - return -1; > >> > > + NetClientState *nc = > qemu_new_net_client(&net_vmnet_shared_info, > >> > > + peer, > >> > "vmnet-shared", name); > >> > > + if (!validate_options(netdev, errp)) { > >> > > + g_assert_not_reached(); > >> > > >> > g_assert_not_reached is for debugging purpose and may be dropped > >> > depending on the build option. > >> > > >> > > + } > >> > > + return vmnet_if_create(nc, build_if_desc(netdev), errp); > >> > > } > >> > > diff --git a/net/vmnet_int.h b/net/vmnet_int.h > >> > > index aac4d5af64..acfe3a88c0 100644 > >> > > --- a/net/vmnet_int.h > >> > > +++ b/net/vmnet_int.h > >> > > @@ -15,11 +15,48 @@ > >> > > #include "clients.h" > >> > > > >> > > #include <vmnet/vmnet.h> > >> > > +#include <dispatch/dispatch.h> > >> > > + > >> > > +/** > >> > > + * From vmnet.framework documentation > >> > > + * > >> > > + * Each read/write call allows up to 200 packets to be > >> > > + * read or written for a maximum of 256KB. > >> > > + * > >> > > + * Each packet written should be a complete > >> > > + * ethernet frame. > >> > > + * > >> > > + * https://developer.apple.com/documentation/vmnet > >> > <https://developer.apple.com/documentation/vmnet> > >> > > + */ > >> > > +#define VMNET_PACKETS_LIMIT 200 > >> > > > >> > > typedef struct VmnetCommonState { > >> > > - NetClientState nc; > >> > > + NetClientState nc; > >> > > + interface_ref vmnet_if; > >> > > + > >> > > + bool send_scheduled; > >> > > > >> > > + uint64_t mtu; > >> > > + uint64_t max_packet_size; > >> > > + > >> > > + struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT]; > >> > > + struct iovec iov_buf[VMNET_PACKETS_LIMIT]; > >> > > + > >> > > + dispatch_queue_t if_queue; > >> > > + > >> > > + QEMUBH *send_bh; > >> > > } VmnetCommonState; > >> > > > >> > > +const char *vmnet_status_map_str(vmnet_return_t status); > >> > > + > >> > > +int vmnet_if_create(NetClientState *nc, > >> > > + xpc_object_t if_desc, > >> > > + Error **errp); > >> > > + > >> > > +ssize_t vmnet_receive_common(NetClientState *nc, > >> > > + const uint8_t *buf, > >> > > + size_t size); > >> > > + > >> > > +void vmnet_cleanup_common(NetClientState *nc); > >> > > > >> > > #endif /* VMNET_INT_H */ > >> > > >> > > >> > Other issues will be fixed and submitted later, > >> > thank you! > >> > > >> > Regards, > >> > Vladislav Yaroshchuk > >> > > > > Regards, > > Vladislav Yaroshchuk > Best Regards, Vladislav Yaroshchuk
On 2022/02/28 20:59, Vladislav Yaroshchuk wrote: > > > On Sat, Feb 26, 2022 at 3:27 PM Akihiko Odaki <akihiko.odaki@gmail.com > <mailto:akihiko.odaki@gmail.com>> wrote: > > On Sat, Feb 26, 2022 at 8:33 PM Vladislav Yaroshchuk > <vladislav.yaroshchuk@jetbrains.com > <mailto:vladislav.yaroshchuk@jetbrains.com>> wrote: > > > > > > > > On Sat, Feb 26, 2022 at 12:16 PM Akihiko Odaki > <akihiko.odaki@gmail.com <mailto:akihiko.odaki@gmail.com>> wrote: > >> > >> On 2022/02/26 17:37, Vladislav Yaroshchuk wrote: > >> > > >> > Hi Akihiko, > >> > > >> > On Fri, Feb 25, 2022 at 8:46 PM Akihiko Odaki > <akihiko.odaki@gmail.com <mailto:akihiko.odaki@gmail.com> > >> > <mailto:akihiko.odaki@gmail.com > <mailto:akihiko.odaki@gmail.com>>> wrote: > >> > > >> >   On 2022/02/26 2:13, Vladislav Yaroshchuk wrote: > >> >   > Interaction with vmnet.framework in different modes > >> >   > differs only on configuration stage, so we can create > >> >   > common `send`, `receive`, etc. procedures and reuse them. > >> >   > > >> >   > vmnet.framework supports iov, but writing more than > >> >   > one iov into vmnet interface fails with > >> >   > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into > >> >   > one and passing it to vmnet works fine. That's the > >> >   > reason why receive_iov() left unimplemented. But it still > >> >   > works with good enough performance having .receive() > >> >   > net/vmnet: implement shared mode (vmnet-shared) > >> >   > > >> >   > Interaction with vmnet.framework in different modes > >> >   > differs only on configuration stage, so we can create > >> >   > common `send`, `receive`, etc. procedures and reuse them. > >> >   > > >> >   > vmnet.framework supports iov, but writing more than > >> >   > one iov into vmnet interface fails with > >> >   > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into > >> >   > one and passing it to vmnet works fine. That's the > >> >   > reason why receive_iov() left unimplemented. But it still > >> >   > works with good enough performance having .receive() > >> >   > implemented only. > >> >   > > >> >   > Also, there is no way to unsubscribe from vmnet packages > >> >   > receiving except registering and unregistering event > >> >   > callback or simply drop packages just ignoring and > >> >   > not processing them when related flag is set. Here we do > >> >   > using the second way. > >> >   > > >> >   > Signed-off-by: Phillip Tennen <phillip@axleos.com > <mailto:phillip@axleos.com> > >> >   <mailto:phillip@axleos.com <mailto:phillip@axleos.com>>> > >> >   > Signed-off-by: Vladislav Yaroshchuk > >> >   <Vladislav.Yaroshchuk@jetbrains.com > <mailto:Vladislav.Yaroshchuk@jetbrains.com> > >> >   <mailto:Vladislav.Yaroshchuk@jetbrains.com > <mailto:Vladislav.Yaroshchuk@jetbrains.com>>> > >> > > >> >   Thank you for persistently working on this. > >> > > >> >   > --- > >> >   >  net/vmnet-common.m | 302 > >> >   +++++++++++++++++++++++++++++++++++++++++++++ > >> >   >  net/vmnet-shared.c | 94 +++++++++++++- > >> >   >  net/vmnet_int.h  | 39 +++++- > >> >   >  3 files changed, 430 insertions(+), 5 deletions(-) > >> >   > > >> >   > diff --git a/net/vmnet-common.m b/net/vmnet-common.m > >> >   > index 56612c72ce..2f70921cae 100644 > >> >   > --- a/net/vmnet-common.m > >> >   > +++ b/net/vmnet-common.m > >> >   > @@ -10,6 +10,8 @@ > >> >   >  */ > >> >   > > >> >   >  #include "qemu/osdep.h" > >> >   > +#include "qemu/main-loop.h" > >> >   > +#include "qemu/log.h" > >> >   >  #include "qapi/qapi-types-net.h" > >> >   >  #include "vmnet_int.h" > >> >   >  #include "clients.h" > >> >   > @@ -17,4 +19,304 @@ > >> >   >  #include "qapi/error.h" > >> >   > > >> >   >  #include <vmnet/vmnet.h> > >> >   > +#include <dispatch/dispatch.h> > >> >   > > >> >   > + > >> >   > +static inline void > vmnet_set_send_bh_scheduled(VmnetCommonState *s, > >> >   > +                        bool > enable) > >> >   > +{ > >> >   > +  qatomic_set(&s->send_scheduled, enable); > >> >   > +} > >> >   > + > >> >   > + > >> >   > +static inline bool > vmnet_is_send_bh_scheduled(VmnetCommonState *s) > >> >   > +{ > >> >   > +  return qatomic_load_acquire(&s->send_scheduled); > >> >   > +} > >> >   > + > >> >   > + > >> >   > +static inline void > vmnet_set_send_enabled(VmnetCommonState *s, > >> >   > +                     bool enable) > >> >   > +{ > >> >   > +  if (enable) { > >> >   > +    vmnet_interface_set_event_callback( > >> >   > +      s->vmnet_if, > >> >   > +      VMNET_INTERFACE_PACKETS_AVAILABLE, > >> >   > +      s->if_queue, > >> >   > +      ^(interface_event_t event_id, xpc_object_t > event) { > >> >   > +        assert(event_id == > >> >   VMNET_INTERFACE_PACKETS_AVAILABLE); > >> >   > +        /* > >> >   > +         * This function is being called from > a non qemu > >> >   thread, so > >> >   > +         * we only schedule a BH, and do the > rest of the > >> >   io completion > >> >   > +         * handling from vmnet_send_bh() which > runs in a > >> >   qemu context. > >> >   > +         * > >> >   > +         * Avoid scheduling multiple bottom halves > >> >   > +         */ > >> >   > +        if (!vmnet_is_send_bh_scheduled(s)) { > >> >   > +          vmnet_set_send_bh_scheduled(s, true); > >> > > >> >   It can be interrupted between vmnet_is_send_bh_scheduled and > >> >   vmnet_set_send_bh_scheduled, which leads to data race. > >> > > >> > > >> > Sorry, I did not clearly understand what you meant. Since this > >> > callback (block) is submitted on DISPATCH_QUEUE_SERIAL, > >> > only one instance of the callback will be executed at any > >> > moment of time. > >> > > https://developer.apple.com/documentation/dispatch/dispatch_queue_serial > <https://developer.apple.com/documentation/dispatch/dispatch_queue_serial> > >> > > <https://developer.apple.com/documentation/dispatch/dispatch_queue_serial > <https://developer.apple.com/documentation/dispatch/dispatch_queue_serial>> > >> > > >> > Also this is the only place where we schedule a bottom half. > >> > > >> > After we set the 'send_scheduled' flag, all the other > >> > callback blocks will do nothing (skip the if block) until > >> > the bottom half is executed and reset 'send_scheduled'. > >> > I don't see any races here :( > >> > > >> > Correct me if I'm wrong please. > >> > >> My explanation that the interruption between > vmnet_is_send_bh_scheduled > >> and vmnet_set_send_bh_scheduled is problematic was actually > misleading. > >> > >> The problem is that vmnet_send_bh resets 'send_scheduled' after > calling > >> vmnet_read. If we got another packet after vmnet_read, it would be > >> silently ignored. > >> > >> By the way, I forgot to mention another problem: > vmnet_send_completed > >> calls vmnet_set_send_enabled, but the other packets in the > buffer may > >> not be sent yet. Also, unregistering callbacks in > vmnet_set_send_enabled > >> is probably not enough to stop the callback being fired since some > >> dispatch blocks are already in the dispatch queue. > >> > > > > Now I understand what you mean, thanks. > > What do you think about the workaround? For me > > it is quite difficult question how to synchronize qemu with > > vmnet thread, especially with completion callback. > > You may add a new field to represent the number of packets being sent > in the buffer. The field must be maintained by vmnet_send_bh and > vmnet_send_completed, which are on the iothread. vmnet_send_bh should > do nothing if the field is greater than 0 at the beginning of the > function. vmnet_send_completed should call > qemu_bh_schedule(s->send_bh). > > > Thank you for the idea! Sounds meaningful to > schedule a bottom half in vmnet thread and do the > rest of things in iothread with no concurrency. > > Not sure that only one field is enough, cause > we may have two states on bh execution start: > 1. There are packets in vmnet buffer s->packets_buf >   that were rejected by qemu_send_async and waiting >   to be sent. If this happens, we should complete sending >   these waiting packets with qemu_send_async firstly, >   and after that we should call vmnet_read to get >   new ones and send them to QEMU; > 2. There are no packets in s->packets_buf to be sent to >   qemu, we only need to get new packets from vmnet >   with vmnet_read and send them to QEMU In case 1, you should just keep calling qemu_send_packet_async. Actually qemu_send_packet_async adds the packet to its internal queue and calls the callback when it is consumed. > > It can be done kinda this way: > ``` > struct s: >   send_bh:          bh >   packets_buf:       array[packet] >   ## Three new fields >   send_enabled:      bool >   packets_send_pos:  int >   packets_batch_size: int > > fun bh_send(s): >   ## Send disabled - do nothing >   if not s->send_enabled: >     return >   ## If some packets are waiting to be sent in >   ## s->packets_buf - send them >   if s->packets_send_pos < s->packets_batch_size: >     if not qemu_send_wrapper(s): >       return > >   ## Try to read new packets from vmnet >   s->packets_send_pos = 0 >   s->packets_batch_size = 0 >   s->packets_buf = vmnet_read(&s->packets_batch_size) >   if s->packets_batch_size > 0: >     # If something received - process them >     qemu_send_wrapper(s) > fun qemu_send_wrapper(s): >   for i in s->packets_send_pos to s->packets_batch_size: >     size = qemu_send_async(s->packets_buf[i], >                          vmnet_send_completed) >     if size == 0: >       ## Disable packets processing until vmnet_send_completed >       ## Save the state: packets to be sent now in s->packets_buf >       ## in range s->packets_send_pos..s->packets_batch_size >       s->send_enabled = false >       s->packets_send_pos = i + 1 >       break >     if size < 0: >       ## On error just drop the packets >       s->packets_send_pos = 0 >       s->packets_batch_size = 0 >       break > >    return s->send_enabled > > > fun vmnet_send_completed(s): >   ## Complete sending packets from s->packets_buf >   s->send_enabled = true >   qemu_bh_schedule(s->send_bh) > > ## From callback we only scheduling the bh > vmnet.set_callback(callback = s: qemu_bh_schedule(s->send_bh)) > ``` > > I think a simpler implementation may exist, but currently > I see only this approach with the send_enabled flag and > two more fields to save packets sending state. > > vmnet_set_send_enabled and send_scheduled can be simply removed. > qemu_bh_schedule ensures there is no duplicate scheduling. > > Yep, my mistake. Previously I used schedule_oneshot which > creates a new bh for each call which causes duplicate scheduling. > > > Regards, > Akihiko Odaki > > > > > > >> > >> Regards, > >> Akihiko Odaki > >> > >> > > >> >   > +          qemu_bh_schedule(s->send_bh); > >> >   > +        } > >> >   > +      }); > >> >   > +  } else { > >> >   > +    vmnet_interface_set_event_callback( > >> >   > +      s->vmnet_if, > >> >   > +      VMNET_INTERFACE_PACKETS_AVAILABLE, > >> >   > +      NULL, > >> >   > +      NULL); > >> >   > +  } > >> >   > +} > >> >   > + > >> >   > + > >> >   > +static void vmnet_send_completed(NetClientState *nc, > ssize_t len) > >> >   > +{ > >> >   > +  VmnetCommonState *s = DO_UPCAST(VmnetCommonState, > nc, nc); > >> >   > +  vmnet_set_send_enabled(s, true); > >> >   > +} > >> >   > + > >> >   > + > >> >   > +static void vmnet_send_bh(void *opaque) > >> >   > +{ > >> >   > +  NetClientState *nc = (NetClientState *) opaque; > >> >   > +  VmnetCommonState *s = DO_UPCAST(VmnetCommonState, > nc, nc); > >> >   > + > >> >   > +  struct iovec *iov = s->iov_buf; > >> >   > +  struct vmpktdesc *packets = s->packets_buf; > >> >   > +  int pkt_cnt; > >> >   > +  int i; > >> >   > + > >> >   > +  vmnet_return_t status; > >> >   > +  ssize_t size; > >> >   > + > >> >   > +  /* read as many packets as present */ > >> >   > +  pkt_cnt = VMNET_PACKETS_LIMIT; > >> > > >> >   There could be more than VMNET_PACKETS_LIMIT. You may call > >> >   vmnet_read in > >> >   a loop. > >> > > >> >   > +  for (i = 0; i < pkt_cnt; ++i) { > >> >   > +    packets[i].vm_pkt_size = s->max_packet_size; > >> >   > +    packets[i].vm_pkt_iovcnt = 1; > >> >   > +    packets[i].vm_flags = 0; > >> >   > +  } > >> >   > + > >> >   > +  status = vmnet_read(s->vmnet_if, packets, &pkt_cnt); > >> >   > +  if (status != VMNET_SUCCESS) { > >> >   > +    error_printf("vmnet: read failed: %s\n", > >> >   > +           vmnet_status_map_str(status)); > >> >   > +    goto done; > >> >   > +  } > >> >   > + > >> >   > +  for (i = 0; i < pkt_cnt; ++i) { > >> >   > +    size = qemu_send_packet_async(nc, > >> >   > +                   iov[i].iov_base, > >> >   > + > packets[i].vm_pkt_size, > >> >   > + > vmnet_send_completed); > >> >   > +    if (size == 0) { > >> >   > +      vmnet_set_send_enabled(s, false); > >> >   > +      goto done; > >> >   > +    } else if (size < 0) { > >> >   > +      break; > >> >   > +    } > >> > > >> >   goto is not needed here. "break" when size <= 0. > >> > > >> >   > +  } > >> >   > + > >> >   > +done: > >> >   > +  vmnet_set_send_bh_scheduled(s, false); > >> >   > +} > >> >   > + > >> >   > + > >> >   > +static void vmnet_bufs_init(VmnetCommonState *s) > >> >   > +{ > >> >   > +  struct vmpktdesc *packets = s->packets_buf; > >> >   > +  struct iovec *iov = s->iov_buf; > >> >   > +  int i; > >> >   > + > >> >   > +  for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) { > >> >   > +    iov[i].iov_len = s->max_packet_size; > >> >   > +    iov[i].iov_base = g_malloc0(iov[i].iov_len); > >> >   > +    packets[i].vm_pkt_iov = iov + i; > >> >   > +  } > >> >   > +} > >> >   > + > >> >   > + > >> >   > +const char *vmnet_status_map_str(vmnet_return_t status) > >> >   > +{ > >> >   > +  switch (status) { > >> >   > +  case VMNET_SUCCESS: > >> >   > +    return "success"; > >> >   > +  case VMNET_FAILURE: > >> >   > +    return "general failure (possibly not enough > privileges)"; > >> >   > +  case VMNET_MEM_FAILURE: > >> >   > +    return "memory allocation failure"; > >> >   > +  case VMNET_INVALID_ARGUMENT: > >> >   > +    return "invalid argument specified"; > >> >   > +  case VMNET_SETUP_INCOMPLETE: > >> >   > +    return "interface setup is not complete"; > >> >   > +  case VMNET_INVALID_ACCESS: > >> >   > +    return "invalid access, permission denied"; > >> >   > +  case VMNET_PACKET_TOO_BIG: > >> >   > +    return "packet size is larger than MTU"; > >> >   > +  case VMNET_BUFFER_EXHAUSTED: > >> >   > +    return "buffers exhausted in kernel"; > >> >   > +  case VMNET_TOO_MANY_PACKETS: > >> >   > +    return "packet count exceeds limit"; > >> >   > +#if defined(MAC_OS_VERSION_11_0) && \ > >> >   > +  MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 > >> >   > +  case VMNET_SHARING_SERVICE_BUSY: > >> >   > +    return "conflict, sharing service is in use"; > >> >   > +#endif > >> >   > +  default: > >> >   > +    return "unknown vmnet error"; > >> >   > +  } > >> >   > +} > >> >   > + > >> >   > + > >> >   > +int vmnet_if_create(NetClientState *nc, > >> >   > +          xpc_object_t if_desc, > >> >   > +          Error **errp) > >> >   > +{ > >> >   > +  VmnetCommonState *s = DO_UPCAST(VmnetCommonState, > nc, nc);; > >> > > >> >   Duplicate semicolons. > >> > > >> >   > +  dispatch_semaphore_t if_created_sem = > >> >   dispatch_semaphore_create(0); > >> > > >> >   if_created_sem leaks. > >> > > >> >   > +  __block vmnet_return_t if_status; > >> >   > + > >> >   > +  s->if_queue = dispatch_queue_create( > >> >   > +    "org.qemu.vmnet.if_queue", > >> >   > +    DISPATCH_QUEUE_SERIAL > >> >   > +  ); > >> >   > + > >> >   > +  xpc_dictionary_set_bool( > >> >   > +    if_desc, > >> >   > +    vmnet_allocate_mac_address_key, > >> >   > +    false > >> >   > +  ); > >> >   > +#ifdef DEBUG > >> >   > +  qemu_log("vmnet.start.interface_desc:\n"); > >> >   > +  xpc_dictionary_apply(if_desc, > >> >   > +             ^bool(const char *k, > xpc_object_t v) { > >> >   > +               char *desc = > xpc_copy_description(v); > >> >   > +               qemu_log(" %s=%s\n", k, > desc); > >> >   > +               free(desc); > >> >   > +               return true; > >> >   > +             }); > >> >   > +#endif /* DEBUG */ > >> >   > + > >> >   > +  s->vmnet_if = vmnet_start_interface( > >> >   > +    if_desc, > >> >   > +    s->if_queue, > >> >   > +    ^(vmnet_return_t status, xpc_object_t > interface_param) { > >> >   > +      if_status = status; > >> >   > +      if (status != VMNET_SUCCESS || > !interface_param) { > >> >   > +        dispatch_semaphore_signal(if_created_sem); > >> >   > +        return; > >> >   > +      } > >> >   > + > >> >   > +#ifdef DEBUG > >> >   > +      qemu_log("vmnet.start.interface_param:\n"); > >> >   > +      xpc_dictionary_apply(interface_param, > >> >   > +                 ^bool(const char *k, > >> >   xpc_object_t v) { > >> >   > +                   char *desc = > >> >   xpc_copy_description(v); > >> >   > +                   qemu_log(" > %s=%s\n", k, desc); > >> >   > +                   free(desc); > >> >   > +                   return true; > >> >   > +                 }); > >> >   > +#endif /* DEBUG */ > >> >   > + > >> >   > +      s->mtu = xpc_dictionary_get_uint64( > >> >   > +        interface_param, > >> >   > +        vmnet_mtu_key); > >> >   > +      s->max_packet_size = > xpc_dictionary_get_uint64( > >> >   > +        interface_param, > >> >   > +        vmnet_max_packet_size_key); > >> >   > + > >> >   > +      dispatch_semaphore_signal(if_created_sem); > >> >   > +    }); > >> >   > + > >> >   > +  if (s->vmnet_if == NULL) { > >> >   > +    error_setg(errp, > >> >   > +          "unable to create interface " > >> >   > +          "with requested params"); > >> > > >> >   You don't need line breaks here. Breaking a string into a > few would > >> >   also > >> >   makes it a bit hard to grep. > >> > > >> >   > +    return -1; > >> > > >> >   s->if_queue leaks. > >> > > >> >   > +  } > >> >   > + > >> >   > +  dispatch_semaphore_wait(if_created_sem, > DISPATCH_TIME_FOREVER); > >> >   > + > >> >   > +  if (if_status != VMNET_SUCCESS) { > >> >   > +    error_setg(errp, > >> >   > +          "cannot create vmnet interface: %s", > >> >   > +          vmnet_status_map_str(if_status)); > >> >   > +    return -1; > >> >   > +  } > >> >   > + > >> >   > +  s->send_bh = aio_bh_new(qemu_get_aio_context(), > >> >   vmnet_send_bh, nc); > >> >   > +  vmnet_bufs_init(s); > >> >   > +  vmnet_set_send_bh_scheduled(s, false); > >> >   > +  vmnet_set_send_enabled(s, true); > >> >   > +  return 0; > >> >   > +} > >> >   > + > >> >   > + > >> >   > +ssize_t vmnet_receive_common(NetClientState *nc, > >> >   > +               const uint8_t *buf, > >> >   > +               size_t size) > >> >   > +{ > >> >   > +  VmnetCommonState *s = DO_UPCAST(VmnetCommonState, > nc, nc); > >> >   > +  struct vmpktdesc packet; > >> >   > +  struct iovec iov; > >> >   > +  int pkt_cnt; > >> >   > +  vmnet_return_t if_status; > >> >   > + > >> >   > +  if (size > s->max_packet_size) { > >> >   > +    warn_report("vmnet: packet is too big, %zu > > %llu\n", > >> > > >> >   Use PRIu64. > >> > > >> >   > +          packet.vm_pkt_size, > >> >   > +          s->max_packet_size); > >> >   > +    return -1; > >> >   > +  } > >> >   > + > >> >   > +  iov.iov_base = (char *) buf; > >> >   > +  iov.iov_len = size; > >> >   > + > >> >   > +  packet.vm_pkt_iovcnt = 1; > >> >   > +  packet.vm_flags = 0; > >> >   > +  packet.vm_pkt_size = size; > >> >   > +  packet.vm_pkt_iov = &iov; > >> >   > +  pkt_cnt = 1; > >> >   > + > >> >   > +  if_status = vmnet_write(s->vmnet_if, &packet, > &pkt_cnt); > >> >   > +  if (if_status != VMNET_SUCCESS) { > >> >   > +    error_report("vmnet: write error: %s\n", > >> >   > +           vmnet_status_map_str(if_status)); > >> > > >> >   Why don't return -1? > >> > > >> >   > +  } > >> >   > + > >> >   > +  if (if_status == VMNET_SUCCESS && pkt_cnt) { > >> >   > +    return size; > >> >   > +  } > >> >   > +  return 0; > >> >   > +} > >> >   > + > >> >   > + > >> >   > +void vmnet_cleanup_common(NetClientState *nc) > >> >   > +{ > >> >   > +  VmnetCommonState *s = DO_UPCAST(VmnetCommonState, > nc, nc);; > >> > > >> >   Duplicate semicolons. > >> > > >> >   > +  dispatch_semaphore_t if_created_sem; > >> >   > + > >> >   > +  qemu_purge_queued_packets(nc); > + > >> >   vmnet_set_send_bh_scheduled(s, true); > >> >   > +  vmnet_set_send_enabled(s, false); > >> >   > + > >> >   > +  if (s->vmnet_if == NULL) { > >> >   > +    return; > >> >   > +  } > >> >   > + > >> >   > +  if_created_sem = dispatch_semaphore_create(0); > >> >   > +  vmnet_stop_interface( > >> >   > +    s->vmnet_if, > >> >   > +    s->if_queue, > >> >   > +    ^(vmnet_return_t status) { > >> >   > +      assert(status == VMNET_SUCCESS); > >> >   > +      dispatch_semaphore_signal(if_created_sem); > >> >   > +    }); > >> >   > +  dispatch_semaphore_wait(if_created_sem, > DISPATCH_TIME_FOREVER); > >> >   > + > >> >   > +  qemu_bh_delete(s->send_bh); > >> >   > +  dispatch_release(if_created_sem); > >> >   > +  dispatch_release(s->if_queue); > >> >   > + > >> >   > +  for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) { > >> >   > +    g_free(s->iov_buf[i].iov_base); > >> >   > +  } > >> >   > +} > >> >   > diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c > >> >   > index f07afaaf21..66f66c034b 100644 > >> >   > --- a/net/vmnet-shared.c > >> >   > +++ b/net/vmnet-shared.c > >> >   > @@ -10,16 +10,102 @@ > >> >   > > >> >   >  #include "qemu/osdep.h" > >> >   >  #include "qapi/qapi-types-net.h" > >> >   > +#include "qapi/error.h" > >> >   >  #include "vmnet_int.h" > >> >   >  #include "clients.h" > >> >   > -#include "qemu/error-report.h" > >> >   > -#include "qapi/error.h" > >> >   > > >> >   >  #include <vmnet/vmnet.h> > >> >   > > >> >   > +typedef struct VmnetSharedState { > >> >   > +  VmnetCommonState cs; > >> >   > +} VmnetSharedState; > >> >   > + > >> >   > + > >> >   > +static bool validate_options(const Netdev *netdev, > Error **errp) > >> >   > +{ > >> >   > +  const NetdevVmnetSharedOptions *options = > >> >   &(netdev->u.vmnet_shared); > >> >   > + > >> >   > +#if !defined(MAC_OS_VERSION_11_0) || \ > >> >   > +  MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0 > >> >   > +  if (options->has_isolated) { > >> >   > +    error_setg(errp, > >> >   > +          "vmnet-shared.isolated feature is " > >> >   > +          "unavailable: outdated > vmnet.framework API"); > >> >   > +    return false; > >> >   > +  } > >> >   > +#endif > >> >   > + > >> >   > +  if ((options->has_start_address || > >> >   > +     options->has_end_address || > >> >   > +     options->has_subnet_mask) && > >> >   > +    !(options->has_start_address && > >> >   > +     options->has_end_address && > >> >   > +     options->has_subnet_mask)) { > >> >   > +    error_setg(errp, > >> >   > +          "'start-address', 'end-address', > 'subnet-mask' " > >> >   > +          "should be provided together" > >> >   > +    ); > >> >   > +    return false; > >> >   > +  } > >> >   > + > >> >   > +  return true; > >> >   > +} > >> >   > + > >> >   > +static xpc_object_t build_if_desc(const Netdev *netdev) > >> >   > +{ > >> >   > +  const NetdevVmnetSharedOptions *options = > >> >   &(netdev->u.vmnet_shared); > >> >   > +  xpc_object_t if_desc = xpc_dictionary_create(NULL, > NULL, 0); > >> >   > + > >> >   > +  xpc_dictionary_set_uint64( > >> >   > +    if_desc, > >> >   > +    vmnet_operation_mode_key, > >> >   > +    VMNET_SHARED_MODE > >> >   > +  ); > >> >   > + > >> >   > +  if (options->has_nat66_prefix) { > >> >   > +    xpc_dictionary_set_string(if_desc, > >> >   > +                 vmnet_nat66_prefix_key, > >> >   > +                 options->nat66_prefix); > >> >   > +  } > >> >   > + > >> >   > +  if (options->has_start_address) { > >> >   > +    xpc_dictionary_set_string(if_desc, > >> >   > +                 vmnet_start_address_key, > >> >   > +                 options->start_address); > >> >   > +    xpc_dictionary_set_string(if_desc, > >> >   > +                 vmnet_end_address_key, > >> >   > +                 options->end_address); > >> >   > +    xpc_dictionary_set_string(if_desc, > >> >   > +                 vmnet_subnet_mask_key, > >> >   > +                 options->subnet_mask); > >> >   > +  } > >> >   > + > >> >   > +#if defined(MAC_OS_VERSION_11_0) && \ > >> >   > +  MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 > >> >   > +  xpc_dictionary_set_bool( > >> >   > +    if_desc, > >> >   > +    vmnet_enable_isolation_key, > >> >   > +    options->isolated > >> >   > +  ); > >> >   > +#endif > >> >   > + > >> >   > +  return if_desc; > >> >   > +} > >> >   > + > >> >   > +static NetClientInfo net_vmnet_shared_info = { > >> >   > +  .type = NET_CLIENT_DRIVER_VMNET_SHARED, > >> >   > +  .size = sizeof(VmnetSharedState), > >> >   > +  .receive = vmnet_receive_common, > >> >   > +  .cleanup = vmnet_cleanup_common, > >> >   > +}; > >> >   > + > >> >   >  int net_init_vmnet_shared(const Netdev *netdev, const > char *name, > >> >   >               NetClientState *peer, Error > **errp) > >> >   >  { > >> >   > - error_setg(errp, "vmnet-shared is not implemented yet"); > >> >   > - return -1; > >> >   > +  NetClientState *nc = > qemu_new_net_client(&net_vmnet_shared_info, > >> >   > +                       peer, > >> >   "vmnet-shared", name); > >> >   > +  if (!validate_options(netdev, errp)) { > >> >   > +    g_assert_not_reached(); > >> > > >> >   g_assert_not_reached is for debugging purpose and may be > dropped > >> >   depending on the build option. > >> > > >> >   > +  } > >> >   > +  return vmnet_if_create(nc, build_if_desc(netdev), > errp); > >> >   >  } > >> >   > diff --git a/net/vmnet_int.h b/net/vmnet_int.h > >> >   > index aac4d5af64..acfe3a88c0 100644 > >> >   > --- a/net/vmnet_int.h > >> >   > +++ b/net/vmnet_int.h > >> >   > @@ -15,11 +15,48 @@ > >> >   >  #include "clients.h" > >> >   > > >> >   >  #include <vmnet/vmnet.h> > >> >   > +#include <dispatch/dispatch.h> > >> >   > + > >> >   > +/** > >> >   > + * From vmnet.framework documentation > >> >   > + * > >> >   > + * Each read/write call allows up to 200 packets to be > >> >   > + * read or written for a maximum of 256KB. > >> >   > + * > >> >   > + * Each packet written should be a complete > >> >   > + * ethernet frame. > >> >   > + * > >> >   > + * https://developer.apple.com/documentation/vmnet > <https://developer.apple.com/documentation/vmnet> > >> >   <https://developer.apple.com/documentation/vmnet > <https://developer.apple.com/documentation/vmnet>> > >> >   > + */ > >> >   > +#define VMNET_PACKETS_LIMIT 200 > >> >   > > >> >   >  typedef struct VmnetCommonState { > >> >   > - NetClientState nc; > >> >   > +  NetClientState nc; > >> >   > +  interface_ref vmnet_if; > >> >   > + > >> >   > +  bool send_scheduled; > >> >   > > >> >   > +  uint64_t mtu; > >> >   > +  uint64_t max_packet_size; > >> >   > + > >> >   > +  struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT]; > >> >   > +  struct iovec iov_buf[VMNET_PACKETS_LIMIT]; > >> >   > + > >> >   > +  dispatch_queue_t if_queue; > >> >   > + > >> >   > +  QEMUBH *send_bh; > >> >   >  } VmnetCommonState; > >> >   > > >> >   > +const char *vmnet_status_map_str(vmnet_return_t status); > >> >   > + > >> >   > +int vmnet_if_create(NetClientState *nc, > >> >   > +          xpc_object_t if_desc, > >> >   > +          Error **errp); > >> >   > + > >> >   > +ssize_t vmnet_receive_common(NetClientState *nc, > >> >   > +               const uint8_t *buf, > >> >   > +               size_t size); > >> >   > + > >> >   > +void vmnet_cleanup_common(NetClientState *nc); > >> >   > > >> >   >  #endif /* VMNET_INT_H */ > >> > > >> > > >> > Other issues will be fixed and submitted later, > >> > thank you! > >> > > >> > Regards, > >> > Vladislav Yaroshchuk > >> > > > > Regards, > > Vladislav Yaroshchuk > > > Best Regards, > > Vladislav Yaroshchuk
On Tue, Mar 1, 2022 at 8:52 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: > On 2022/02/28 20:59, Vladislav Yaroshchuk wrote: > > > > > > On Sat, Feb 26, 2022 at 3:27 PM Akihiko Odaki <akihiko.odaki@gmail.com > > <mailto:akihiko.odaki@gmail.com>> wrote: > > > > On Sat, Feb 26, 2022 at 8:33 PM Vladislav Yaroshchuk > > <vladislav.yaroshchuk@jetbrains.com > > <mailto:vladislav.yaroshchuk@jetbrains.com>> wrote: > > > > > > > > > > > > On Sat, Feb 26, 2022 at 12:16 PM Akihiko Odaki > > <akihiko.odaki@gmail.com <mailto:akihiko.odaki@gmail.com>> wrote: > > >> > > >> On 2022/02/26 17:37, Vladislav Yaroshchuk wrote: > > >> > > > >> > Hi Akihiko, > > >> > > > >> > On Fri, Feb 25, 2022 at 8:46 PM Akihiko Odaki > > <akihiko.odaki@gmail.com <mailto:akihiko.odaki@gmail.com> > > >> > <mailto:akihiko.odaki@gmail.com > > <mailto:akihiko.odaki@gmail.com>>> wrote: > > >> > > > >> > On 2022/02/26 2:13, Vladislav Yaroshchuk wrote: > > >> > > Interaction with vmnet.framework in different modes > > >> > > differs only on configuration stage, so we can create > > >> > > common `send`, `receive`, etc. procedures and reuse > them. > > >> > > > > >> > > vmnet.framework supports iov, but writing more than > > >> > > one iov into vmnet interface fails with > > >> > > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into > > >> > > one and passing it to vmnet works fine. That's the > > >> > > reason why receive_iov() left unimplemented. But it > still > > >> > > works with good enough performance having .receive() > > >> > > net/vmnet: implement shared mode (vmnet-shared) > > >> > > > > >> > > Interaction with vmnet.framework in different modes > > >> > > differs only on configuration stage, so we can create > > >> > > common `send`, `receive`, etc. procedures and reuse > them. > > >> > > > > >> > > vmnet.framework supports iov, but writing more than > > >> > > one iov into vmnet interface fails with > > >> > > 'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into > > >> > > one and passing it to vmnet works fine. That's the > > >> > > reason why receive_iov() left unimplemented. But it > still > > >> > > works with good enough performance having .receive() > > >> > > implemented only. > > >> > > > > >> > > Also, there is no way to unsubscribe from vmnet packages > > >> > > receiving except registering and unregistering event > > >> > > callback or simply drop packages just ignoring and > > >> > > not processing them when related flag is set. Here we do > > >> > > using the second way. > > >> > > > > >> > > Signed-off-by: Phillip Tennen <phillip@axleos.com > > <mailto:phillip@axleos.com> > > >> > <mailto:phillip@axleos.com <mailto:phillip@axleos.com>>> > > >> > > Signed-off-by: Vladislav Yaroshchuk > > >> > <Vladislav.Yaroshchuk@jetbrains.com > > <mailto:Vladislav.Yaroshchuk@jetbrains.com> > > >> > <mailto:Vladislav.Yaroshchuk@jetbrains.com > > <mailto:Vladislav.Yaroshchuk@jetbrains.com>>> > > >> > > > >> > Thank you for persistently working on this. > > >> > > > >> > > --- > > >> > > net/vmnet-common.m | 302 > > >> > +++++++++++++++++++++++++++++++++++++++++++++ > > >> > > net/vmnet-shared.c | 94 +++++++++++++- > > >> > > net/vmnet_int.h | 39 +++++- > > >> > > 3 files changed, 430 insertions(+), 5 deletions(-) > > >> > > > > >> > > diff --git a/net/vmnet-common.m b/net/vmnet-common.m > > >> > > index 56612c72ce..2f70921cae 100644 > > >> > > --- a/net/vmnet-common.m > > >> > > +++ b/net/vmnet-common.m > > >> > > @@ -10,6 +10,8 @@ > > >> > > */ > > >> > > > > >> > > #include "qemu/osdep.h" > > >> > > +#include "qemu/main-loop.h" > > >> > > +#include "qemu/log.h" > > >> > > #include "qapi/qapi-types-net.h" > > >> > > #include "vmnet_int.h" > > >> > > #include "clients.h" > > >> > > @@ -17,4 +19,304 @@ > > >> > > #include "qapi/error.h" > > >> > > > > >> > > #include <vmnet/vmnet.h> > > >> > > +#include <dispatch/dispatch.h> > > >> > > > > >> > > + > > >> > > +static inline void > > vmnet_set_send_bh_scheduled(VmnetCommonState *s, > > >> > > + bool > > enable) > > >> > > +{ > > >> > > + qatomic_set(&s->send_scheduled, enable); > > >> > > +} > > >> > > + > > >> > > + > > >> > > +static inline bool > > vmnet_is_send_bh_scheduled(VmnetCommonState *s) > > >> > > +{ > > >> > > + return qatomic_load_acquire(&s->send_scheduled); > > >> > > +} > > >> > > + > > >> > > + > > >> > > +static inline void > > vmnet_set_send_enabled(VmnetCommonState *s, > > >> > > + bool enable) > > >> > > +{ > > >> > > + if (enable) { > > >> > > + vmnet_interface_set_event_callback( > > >> > > + s->vmnet_if, > > >> > > + VMNET_INTERFACE_PACKETS_AVAILABLE, > > >> > > + s->if_queue, > > >> > > + ^(interface_event_t event_id, xpc_object_t > > event) { > > >> > > + assert(event_id == > > >> > VMNET_INTERFACE_PACKETS_AVAILABLE); > > >> > > + /* > > >> > > + * This function is being called from > > a non qemu > > >> > thread, so > > >> > > + * we only schedule a BH, and do the > > rest of the > > >> > io completion > > >> > > + * handling from vmnet_send_bh() which > > runs in a > > >> > qemu context. > > >> > > + * > > >> > > + * Avoid scheduling multiple bottom > halves > > >> > > + */ > > >> > > + if (!vmnet_is_send_bh_scheduled(s)) { > > >> > > + vmnet_set_send_bh_scheduled(s, > true); > > >> > > > >> > It can be interrupted between vmnet_is_send_bh_scheduled > and > > >> > vmnet_set_send_bh_scheduled, which leads to data race. > > >> > > > >> > > > >> > Sorry, I did not clearly understand what you meant. Since this > > >> > callback (block) is submitted on DISPATCH_QUEUE_SERIAL, > > >> > only one instance of the callback will be executed at any > > >> > moment of time. > > >> > > > > https://developer.apple.com/documentation/dispatch/dispatch_queue_serial > > < > https://developer.apple.com/documentation/dispatch/dispatch_queue_serial> > > >> > > > < > https://developer.apple.com/documentation/dispatch/dispatch_queue_serial > > < > https://developer.apple.com/documentation/dispatch/dispatch_queue_serial>> > > >> > > > >> > Also this is the only place where we schedule a bottom half. > > >> > > > >> > After we set the 'send_scheduled' flag, all the other > > >> > callback blocks will do nothing (skip the if block) until > > >> > the bottom half is executed and reset 'send_scheduled'. > > >> > I don't see any races here :( > > >> > > > >> > Correct me if I'm wrong please. > > >> > > >> My explanation that the interruption between > > vmnet_is_send_bh_scheduled > > >> and vmnet_set_send_bh_scheduled is problematic was actually > > misleading. > > >> > > >> The problem is that vmnet_send_bh resets 'send_scheduled' after > > calling > > >> vmnet_read. If we got another packet after vmnet_read, it would > be > > >> silently ignored. > > >> > > >> By the way, I forgot to mention another problem: > > vmnet_send_completed > > >> calls vmnet_set_send_enabled, but the other packets in the > > buffer may > > >> not be sent yet. Also, unregistering callbacks in > > vmnet_set_send_enabled > > >> is probably not enough to stop the callback being fired since > some > > >> dispatch blocks are already in the dispatch queue. > > >> > > > > > > Now I understand what you mean, thanks. > > > What do you think about the workaround? For me > > > it is quite difficult question how to synchronize qemu with > > > vmnet thread, especially with completion callback. > > > > You may add a new field to represent the number of packets being sent > > in the buffer. The field must be maintained by vmnet_send_bh and > > vmnet_send_completed, which are on the iothread. vmnet_send_bh should > > do nothing if the field is greater than 0 at the beginning of the > > function. vmnet_send_completed should call > > qemu_bh_schedule(s->send_bh). > > > > > > Thank you for the idea! Sounds meaningful to > > schedule a bottom half in vmnet thread and do the > > rest of things in iothread with no concurrency. > > > > Not sure that only one field is enough, cause > > we may have two states on bh execution start: > > 1. There are packets in vmnet buffer s->packets_buf > > that were rejected by qemu_send_async and waiting > > to be sent. If this happens, we should complete sending > > these waiting packets with qemu_send_async firstly, > > and after that we should call vmnet_read to get > > new ones and send them to QEMU; > > 2. There are no packets in s->packets_buf to be sent to > > qemu, we only need to get new packets from vmnet > > with vmnet_read and send them to QEMU > > In case 1, you should just keep calling qemu_send_packet_async. Actually > qemu_send_packet_async adds the packet to its internal queue and calls > the callback when it is consumed. > > I'm not sure we can keep calling qemu_send_packet_async, because as docs from net/queue.c says: /* [...] * If a sent callback is provided to send(), the caller must handle a * zero return from the delivery handler by not sending any more packets * until we have invoked the callback. Only in that case will we queue * the packet. * * If a sent callback isn't provided, we just drop the packet to avoid * unbounded queueing. */ So after we did vmnet_read and read N packets into temporary s->packets_buf, we begin calling qemu_send_packet_async. If it returns 0 - it says "no more packets until sent_cb called please". At this moment we have N packets in s->packets_buf and already queued K < N of them. But, packets K..N are not queued and keep waiting for sent_cb to be sent with qemu_send_packet_async. Thus when sent_cb called, we should finish our transfer of packets K..N from s->packets_buf to qemu calling qemu_send_packet_async. I meant this. But also it's possible that while waiting for sent_cb some new packet(s) has been received and it's ready to be read with vmnet_read. To handle this, when sent_cb is called and we're done with packets K..N in s->packets_buf, we should also query vmnet_read to check whether something new is present. Best Regards, Vladislav Yaroshchuk > > > It can be done kinda this way: > > ``` > > struct s: > > send_bh: bh > > packets_buf: array[packet] > > ## Three new fields > > send_enabled: bool > > packets_send_pos: int > > packets_batch_size: int > > > > fun bh_send(s): > > ## Send disabled - do nothing > > if not s->send_enabled: > > return > > ## If some packets are waiting to be sent in > > ## s->packets_buf - send them > > if s->packets_send_pos < s->packets_batch_size: > > if not qemu_send_wrapper(s): > > return > > > > ## Try to read new packets from vmnet > > s->packets_send_pos = 0 > > s->packets_batch_size = 0 > > s->packets_buf = vmnet_read(&s->packets_batch_size) > > if s->packets_batch_size > 0: > > # If something received - process them > > qemu_send_wrapper(s) > > fun qemu_send_wrapper(s): > > for i in s->packets_send_pos to s->packets_batch_size: > > size = qemu_send_async(s->packets_buf[i], > > vmnet_send_completed) > > if size == 0: > > ## Disable packets processing until vmnet_send_completed > > ## Save the state: packets to be sent now in s->packets_buf > > ## in range s->packets_send_pos..s->packets_batch_size > > s->send_enabled = false > > s->packets_send_pos = i + 1 > > break > > if size < 0: > > ## On error just drop the packets > > s->packets_send_pos = 0 > > s->packets_batch_size = 0 > > break > > > > return s->send_enabled > > > > > > fun vmnet_send_completed(s): > > ## Complete sending packets from s->packets_buf > > s->send_enabled = true > > qemu_bh_schedule(s->send_bh) > > > > ## From callback we only scheduling the bh > > vmnet.set_callback(callback = s: qemu_bh_schedule(s->send_bh)) > > ``` > > > > I think a simpler implementation may exist, but currently > > I see only this approach with the send_enabled flag and > > two more fields to save packets sending state. > > > > vmnet_set_send_enabled and send_scheduled can be simply removed. > > qemu_bh_schedule ensures there is no duplicate scheduling. > > > > Yep, my mistake. Previously I used schedule_oneshot which > > creates a new bh for each call which causes duplicate scheduling. > > > > > > Regards, > > Akihiko Odaki > > > > > > > > > > >> > > >> Regards, > > >> Akihiko Odaki > > >> > > >> > > > >> > > + qemu_bh_schedule(s->send_bh); > > >> > > + } > > >> > > + }); > > >> > > + } else { > > >> > > + vmnet_interface_set_event_callback( > > >> > > + s->vmnet_if, > > >> > > + VMNET_INTERFACE_PACKETS_AVAILABLE, > > >> > > + NULL, > > >> > > + NULL); > > >> > > + } > > >> > > +} > > >> > > + > > >> > > + > > >> > > +static void vmnet_send_completed(NetClientState *nc, > > ssize_t len) > > >> > > +{ > > >> > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, > > nc, nc); > > >> > > + vmnet_set_send_enabled(s, true); > > >> > > +} > > >> > > + > > >> > > + > > >> > > +static void vmnet_send_bh(void *opaque) > > >> > > +{ > > >> > > + NetClientState *nc = (NetClientState *) opaque; > > >> > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, > > nc, nc); > > >> > > + > > >> > > + struct iovec *iov = s->iov_buf; > > >> > > + struct vmpktdesc *packets = s->packets_buf; > > >> > > + int pkt_cnt; > > >> > > + int i; > > >> > > + > > >> > > + vmnet_return_t status; > > >> > > + ssize_t size; > > >> > > + > > >> > > + /* read as many packets as present */ > > >> > > + pkt_cnt = VMNET_PACKETS_LIMIT; > > >> > > > >> > There could be more than VMNET_PACKETS_LIMIT. You may call > > >> > vmnet_read in > > >> > a loop. > > >> > > > >> > > + for (i = 0; i < pkt_cnt; ++i) { > > >> > > + packets[i].vm_pkt_size = s->max_packet_size; > > >> > > + packets[i].vm_pkt_iovcnt = 1; > > >> > > + packets[i].vm_flags = 0; > > >> > > + } > > >> > > + > > >> > > + status = vmnet_read(s->vmnet_if, packets, > &pkt_cnt); > > >> > > + if (status != VMNET_SUCCESS) { > > >> > > + error_printf("vmnet: read failed: %s\n", > > >> > > + vmnet_status_map_str(status)); > > >> > > + goto done; > > >> > > + } > > >> > > + > > >> > > + for (i = 0; i < pkt_cnt; ++i) { > > >> > > + size = qemu_send_packet_async(nc, > > >> > > + iov[i].iov_base, > > >> > > + > > packets[i].vm_pkt_size, > > >> > > + > > vmnet_send_completed); > > >> > > + if (size == 0) { > > >> > > + vmnet_set_send_enabled(s, false); > > >> > > + goto done; > > >> > > + } else if (size < 0) { > > >> > > + break; > > >> > > + } > > >> > > > >> > goto is not needed here. "break" when size <= 0. > > >> > > > >> > > + } > > >> > > + > > >> > > +done: > > >> > > + vmnet_set_send_bh_scheduled(s, false); > > >> > > +} > > >> > > + > > >> > > + > > >> > > +static void vmnet_bufs_init(VmnetCommonState *s) > > >> > > +{ > > >> > > + struct vmpktdesc *packets = s->packets_buf; > > >> > > + struct iovec *iov = s->iov_buf; > > >> > > + int i; > > >> > > + > > >> > > + for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) { > > >> > > + iov[i].iov_len = s->max_packet_size; > > >> > > + iov[i].iov_base = g_malloc0(iov[i].iov_len); > > >> > > + packets[i].vm_pkt_iov = iov + i; > > >> > > + } > > >> > > +} > > >> > > + > > >> > > + > > >> > > +const char *vmnet_status_map_str(vmnet_return_t status) > > >> > > +{ > > >> > > + switch (status) { > > >> > > + case VMNET_SUCCESS: > > >> > > + return "success"; > > >> > > + case VMNET_FAILURE: > > >> > > + return "general failure (possibly not enough > > privileges)"; > > >> > > + case VMNET_MEM_FAILURE: > > >> > > + return "memory allocation failure"; > > >> > > + case VMNET_INVALID_ARGUMENT: > > >> > > + return "invalid argument specified"; > > >> > > + case VMNET_SETUP_INCOMPLETE: > > >> > > + return "interface setup is not complete"; > > >> > > + case VMNET_INVALID_ACCESS: > > >> > > + return "invalid access, permission denied"; > > >> > > + case VMNET_PACKET_TOO_BIG: > > >> > > + return "packet size is larger than MTU"; > > >> > > + case VMNET_BUFFER_EXHAUSTED: > > >> > > + return "buffers exhausted in kernel"; > > >> > > + case VMNET_TOO_MANY_PACKETS: > > >> > > + return "packet count exceeds limit"; > > >> > > +#if defined(MAC_OS_VERSION_11_0) && \ > > >> > > + MAC_OS_X_VERSION_MIN_REQUIRED >= > MAC_OS_VERSION_11_0 > > >> > > + case VMNET_SHARING_SERVICE_BUSY: > > >> > > + return "conflict, sharing service is in use"; > > >> > > +#endif > > >> > > + default: > > >> > > + return "unknown vmnet error"; > > >> > > + } > > >> > > +} > > >> > > + > > >> > > + > > >> > > +int vmnet_if_create(NetClientState *nc, > > >> > > + xpc_object_t if_desc, > > >> > > + Error **errp) > > >> > > +{ > > >> > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, > > nc, nc);; > > >> > > > >> > Duplicate semicolons. > > >> > > > >> > > + dispatch_semaphore_t if_created_sem = > > >> > dispatch_semaphore_create(0); > > >> > > > >> > if_created_sem leaks. > > >> > > > >> > > + __block vmnet_return_t if_status; > > >> > > + > > >> > > + s->if_queue = dispatch_queue_create( > > >> > > + "org.qemu.vmnet.if_queue", > > >> > > + DISPATCH_QUEUE_SERIAL > > >> > > + ); > > >> > > + > > >> > > + xpc_dictionary_set_bool( > > >> > > + if_desc, > > >> > > + vmnet_allocate_mac_address_key, > > >> > > + false > > >> > > + ); > > >> > > +#ifdef DEBUG > > >> > > + qemu_log("vmnet.start.interface_desc:\n"); > > >> > > + xpc_dictionary_apply(if_desc, > > >> > > + ^bool(const char *k, > > xpc_object_t v) { > > >> > > + char *desc = > > xpc_copy_description(v); > > >> > > + qemu_log(" %s=%s\n", k, > > desc); > > >> > > + free(desc); > > >> > > + return true; > > >> > > + }); > > >> > > +#endif /* DEBUG */ > > >> > > + > > >> > > + s->vmnet_if = vmnet_start_interface( > > >> > > + if_desc, > > >> > > + s->if_queue, > > >> > > + ^(vmnet_return_t status, xpc_object_t > > interface_param) { > > >> > > + if_status = status; > > >> > > + if (status != VMNET_SUCCESS || > > !interface_param) { > > >> > > + > dispatch_semaphore_signal(if_created_sem); > > >> > > + return; > > >> > > + } > > >> > > + > > >> > > +#ifdef DEBUG > > >> > > + qemu_log("vmnet.start.interface_param:\n"); > > >> > > + xpc_dictionary_apply(interface_param, > > >> > > + ^bool(const char *k, > > >> > xpc_object_t v) { > > >> > > + char *desc = > > >> > xpc_copy_description(v); > > >> > > + qemu_log(" > > %s=%s\n", k, desc); > > >> > > + free(desc); > > >> > > + return true; > > >> > > + }); > > >> > > +#endif /* DEBUG */ > > >> > > + > > >> > > + s->mtu = xpc_dictionary_get_uint64( > > >> > > + interface_param, > > >> > > + vmnet_mtu_key); > > >> > > + s->max_packet_size = > > xpc_dictionary_get_uint64( > > >> > > + interface_param, > > >> > > + vmnet_max_packet_size_key); > > >> > > + > > >> > > + dispatch_semaphore_signal(if_created_sem); > > >> > > + }); > > >> > > + > > >> > > + if (s->vmnet_if == NULL) { > > >> > > + error_setg(errp, > > >> > > + "unable to create interface " > > >> > > + "with requested params"); > > >> > > > >> > You don't need line breaks here. Breaking a string into a > > few would > > >> > also > > >> > makes it a bit hard to grep. > > >> > > > >> > > + return -1; > > >> > > > >> > s->if_queue leaks. > > >> > > > >> > > + } > > >> > > + > > >> > > + dispatch_semaphore_wait(if_created_sem, > > DISPATCH_TIME_FOREVER); > > >> > > + > > >> > > + if (if_status != VMNET_SUCCESS) { > > >> > > + error_setg(errp, > > >> > > + "cannot create vmnet interface: %s", > > >> > > + vmnet_status_map_str(if_status)); > > >> > > + return -1; > > >> > > + } > > >> > > + > > >> > > + s->send_bh = aio_bh_new(qemu_get_aio_context(), > > >> > vmnet_send_bh, nc); > > >> > > + vmnet_bufs_init(s); > > >> > > + vmnet_set_send_bh_scheduled(s, false); > > >> > > + vmnet_set_send_enabled(s, true); > > >> > > + return 0; > > >> > > +} > > >> > > + > > >> > > + > > >> > > +ssize_t vmnet_receive_common(NetClientState *nc, > > >> > > + const uint8_t *buf, > > >> > > + size_t size) > > >> > > +{ > > >> > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, > > nc, nc); > > >> > > + struct vmpktdesc packet; > > >> > > + struct iovec iov; > > >> > > + int pkt_cnt; > > >> > > + vmnet_return_t if_status; > > >> > > + > > >> > > + if (size > s->max_packet_size) { > > >> > > + warn_report("vmnet: packet is too big, %zu > > > %llu\n", > > >> > > > >> > Use PRIu64. > > >> > > > >> > > + packet.vm_pkt_size, > > >> > > + s->max_packet_size); > > >> > > + return -1; > > >> > > + } > > >> > > + > > >> > > + iov.iov_base = (char *) buf; > > >> > > + iov.iov_len = size; > > >> > > + > > >> > > + packet.vm_pkt_iovcnt = 1; > > >> > > + packet.vm_flags = 0; > > >> > > + packet.vm_pkt_size = size; > > >> > > + packet.vm_pkt_iov = &iov; > > >> > > + pkt_cnt = 1; > > >> > > + > > >> > > + if_status = vmnet_write(s->vmnet_if, &packet, > > &pkt_cnt); > > >> > > + if (if_status != VMNET_SUCCESS) { > > >> > > + error_report("vmnet: write error: %s\n", > > >> > > + vmnet_status_map_str(if_status)); > > >> > > > >> > Why don't return -1? > > >> > > > >> > > + } > > >> > > + > > >> > > + if (if_status == VMNET_SUCCESS && pkt_cnt) { > > >> > > + return size; > > >> > > + } > > >> > > + return 0; > > >> > > +} > > >> > > + > > >> > > + > > >> > > +void vmnet_cleanup_common(NetClientState *nc) > > >> > > +{ > > >> > > + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, > > nc, nc);; > > >> > > > >> > Duplicate semicolons. > > >> > > > >> > > + dispatch_semaphore_t if_created_sem; > > >> > > + > > >> > > + qemu_purge_queued_packets(nc); > + > > >> > vmnet_set_send_bh_scheduled(s, true); > > >> > > + vmnet_set_send_enabled(s, false); > > >> > > + > > >> > > + if (s->vmnet_if == NULL) { > > >> > > + return; > > >> > > + } > > >> > > + > > >> > > + if_created_sem = dispatch_semaphore_create(0); > > >> > > + vmnet_stop_interface( > > >> > > + s->vmnet_if, > > >> > > + s->if_queue, > > >> > > + ^(vmnet_return_t status) { > > >> > > + assert(status == VMNET_SUCCESS); > > >> > > + dispatch_semaphore_signal(if_created_sem); > > >> > > + }); > > >> > > + dispatch_semaphore_wait(if_created_sem, > > DISPATCH_TIME_FOREVER); > > >> > > + > > >> > > + qemu_bh_delete(s->send_bh); > > >> > > + dispatch_release(if_created_sem); > > >> > > + dispatch_release(s->if_queue); > > >> > > + > > >> > > + for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) { > > >> > > + g_free(s->iov_buf[i].iov_base); > > >> > > + } > > >> > > +} > > >> > > diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c > > >> > > index f07afaaf21..66f66c034b 100644 > > >> > > --- a/net/vmnet-shared.c > > >> > > +++ b/net/vmnet-shared.c > > >> > > @@ -10,16 +10,102 @@ > > >> > > > > >> > > #include "qemu/osdep.h" > > >> > > #include "qapi/qapi-types-net.h" > > >> > > +#include "qapi/error.h" > > >> > > #include "vmnet_int.h" > > >> > > #include "clients.h" > > >> > > -#include "qemu/error-report.h" > > >> > > -#include "qapi/error.h" > > >> > > > > >> > > #include <vmnet/vmnet.h> > > >> > > > > >> > > +typedef struct VmnetSharedState { > > >> > > + VmnetCommonState cs; > > >> > > +} VmnetSharedState; > > >> > > + > > >> > > + > > >> > > +static bool validate_options(const Netdev *netdev, > > Error **errp) > > >> > > +{ > > >> > > + const NetdevVmnetSharedOptions *options = > > >> > &(netdev->u.vmnet_shared); > > >> > > + > > >> > > +#if !defined(MAC_OS_VERSION_11_0) || \ > > >> > > + MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0 > > >> > > + if (options->has_isolated) { > > >> > > + error_setg(errp, > > >> > > + "vmnet-shared.isolated feature is " > > >> > > + "unavailable: outdated > > vmnet.framework API"); > > >> > > + return false; > > >> > > + } > > >> > > +#endif > > >> > > + > > >> > > + if ((options->has_start_address || > > >> > > + options->has_end_address || > > >> > > + options->has_subnet_mask) && > > >> > > + !(options->has_start_address && > > >> > > + options->has_end_address && > > >> > > + options->has_subnet_mask)) { > > >> > > + error_setg(errp, > > >> > > + "'start-address', 'end-address', > > 'subnet-mask' " > > >> > > + "should be provided together" > > >> > > + ); > > >> > > + return false; > > >> > > + } > > >> > > + > > >> > > + return true; > > >> > > +} > > >> > > + > > >> > > +static xpc_object_t build_if_desc(const Netdev *netdev) > > >> > > +{ > > >> > > + const NetdevVmnetSharedOptions *options = > > >> > &(netdev->u.vmnet_shared); > > >> > > + xpc_object_t if_desc = xpc_dictionary_create(NULL, > > NULL, 0); > > >> > > + > > >> > > + xpc_dictionary_set_uint64( > > >> > > + if_desc, > > >> > > + vmnet_operation_mode_key, > > >> > > + VMNET_SHARED_MODE > > >> > > + ); > > >> > > + > > >> > > + if (options->has_nat66_prefix) { > > >> > > + xpc_dictionary_set_string(if_desc, > > >> > > + > vmnet_nat66_prefix_key, > > >> > > + > options->nat66_prefix); > > >> > > + } > > >> > > + > > >> > > + if (options->has_start_address) { > > >> > > + xpc_dictionary_set_string(if_desc, > > >> > > + > vmnet_start_address_key, > > >> > > + > options->start_address); > > >> > > + xpc_dictionary_set_string(if_desc, > > >> > > + > vmnet_end_address_key, > > >> > > + > options->end_address); > > >> > > + xpc_dictionary_set_string(if_desc, > > >> > > + > vmnet_subnet_mask_key, > > >> > > + > options->subnet_mask); > > >> > > + } > > >> > > + > > >> > > +#if defined(MAC_OS_VERSION_11_0) && \ > > >> > > + MAC_OS_X_VERSION_MIN_REQUIRED >= > MAC_OS_VERSION_11_0 > > >> > > + xpc_dictionary_set_bool( > > >> > > + if_desc, > > >> > > + vmnet_enable_isolation_key, > > >> > > + options->isolated > > >> > > + ); > > >> > > +#endif > > >> > > + > > >> > > + return if_desc; > > >> > > +} > > >> > > + > > >> > > +static NetClientInfo net_vmnet_shared_info = { > > >> > > + .type = NET_CLIENT_DRIVER_VMNET_SHARED, > > >> > > + .size = sizeof(VmnetSharedState), > > >> > > + .receive = vmnet_receive_common, > > >> > > + .cleanup = vmnet_cleanup_common, > > >> > > +}; > > >> > > + > > >> > > int net_init_vmnet_shared(const Netdev *netdev, const > > char *name, > > >> > > NetClientState *peer, Error > > **errp) > > >> > > { > > >> > > - error_setg(errp, "vmnet-shared is not implemented > yet"); > > >> > > - return -1; > > >> > > + NetClientState *nc = > > qemu_new_net_client(&net_vmnet_shared_info, > > >> > > + peer, > > >> > "vmnet-shared", name); > > >> > > + if (!validate_options(netdev, errp)) { > > >> > > + g_assert_not_reached(); > > >> > > > >> > g_assert_not_reached is for debugging purpose and may be > > dropped > > >> > depending on the build option. > > >> > > > >> > > + } > > >> > > + return vmnet_if_create(nc, build_if_desc(netdev), > > errp); > > >> > > } > > >> > > diff --git a/net/vmnet_int.h b/net/vmnet_int.h > > >> > > index aac4d5af64..acfe3a88c0 100644 > > >> > > --- a/net/vmnet_int.h > > >> > > +++ b/net/vmnet_int.h > > >> > > @@ -15,11 +15,48 @@ > > >> > > #include "clients.h" > > >> > > > > >> > > #include <vmnet/vmnet.h> > > >> > > +#include <dispatch/dispatch.h> > > >> > > + > > >> > > +/** > > >> > > + * From vmnet.framework documentation > > >> > > + * > > >> > > + * Each read/write call allows up to 200 packets to be > > >> > > + * read or written for a maximum of 256KB. > > >> > > + * > > >> > > + * Each packet written should be a complete > > >> > > + * ethernet frame. > > >> > > + * > > >> > > + * https://developer.apple.com/documentation/vmnet > > <https://developer.apple.com/documentation/vmnet> > > >> > <https://developer.apple.com/documentation/vmnet > > <https://developer.apple.com/documentation/vmnet>> > > >> > > + */ > > >> > > +#define VMNET_PACKETS_LIMIT 200 > > >> > > > > >> > > typedef struct VmnetCommonState { > > >> > > - NetClientState nc; > > >> > > + NetClientState nc; > > >> > > + interface_ref vmnet_if; > > >> > > + > > >> > > + bool send_scheduled; > > >> > > > > >> > > + uint64_t mtu; > > >> > > + uint64_t max_packet_size; > > >> > > + > > >> > > + struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT]; > > >> > > + struct iovec iov_buf[VMNET_PACKETS_LIMIT]; > > >> > > + > > >> > > + dispatch_queue_t if_queue; > > >> > > + > > >> > > + QEMUBH *send_bh; > > >> > > } VmnetCommonState; > > >> > > > > >> > > +const char *vmnet_status_map_str(vmnet_return_t > status); > > >> > > + > > >> > > +int vmnet_if_create(NetClientState *nc, > > >> > > + xpc_object_t if_desc, > > >> > > + Error **errp); > > >> > > + > > >> > > +ssize_t vmnet_receive_common(NetClientState *nc, > > >> > > + const uint8_t *buf, > > >> > > + size_t size); > > >> > > + > > >> > > +void vmnet_cleanup_common(NetClientState *nc); > > >> > > > > >> > > #endif /* VMNET_INT_H */ > > >> > > > >> > > > >> > Other issues will be fixed and submitted later, > > >> > thank you! > > >> > > > >> > Regards, > > >> > Vladislav Yaroshchuk > > >> > > > > > > Regards, > > > Vladislav Yaroshchuk > > > > > > Best Regards, > > > > Vladislav Yaroshchuk > >
On 2022/03/01 17:09, Vladislav Yaroshchuk wrote: > > Not sure that only one field is enough, cause > > we may have two states on bh execution start: > > 1. There are packets in vmnet buffer s->packets_buf > >   that were rejected by qemu_send_async and waiting > >   to be sent. If this happens, we should complete sending > >   these waiting packets with qemu_send_async firstly, > >   and after that we should call vmnet_read to get > >   new ones and send them to QEMU; > > 2. There are no packets in s->packets_buf to be sent to > >   qemu, we only need to get new packets from vmnet > >   with vmnet_read and send them to QEMU > > In case 1, you should just keep calling qemu_send_packet_async. > Actually > qemu_send_packet_async adds the packet to its internal queue and calls > the callback when it is consumed. > > > I'm not sure we can keep calling qemu_send_packet_async, > because as docs from net/queue.c says: > > /* [...] >  * If a sent callback is provided to send(), the caller must handle a >  * zero return from the delivery handler by not sending any more packets >  * until we have invoked the callback. Only in that case will we queue >  * the packet. >  * >  * If a sent callback isn't provided, we just drop the packet to avoid >  * unbounded queueing. >  */ > > So after we did vmnet_read and read N packets > into temporary s->packets_buf, we begin calling > qemu_send_packet_async. If it returns 0 - it says > "no more packets until sent_cb called please". > At this moment we have N packets in s->packets_buf > and already queued K < N of them. But, packets K..N > are not queued and keep waiting for sent_cb to be sent > with qemu_send_packet_async. > Thus when sent_cb called, we should finish > our transfer of packets K..N from s->packets_buf > to qemu calling qemu_send_packet_async. > I meant this. I missed the comment. The description is contradicting with the actual code; qemu_net_queue_send_iov appends the packet to the queue whenever it cannot send one immediately. Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if calling qemu_send_packet_async is allowed after it returns 0? Regards, Akihiko Odaki
On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: > On 2022/03/01 17:09, Vladislav Yaroshchuk wrote: > > > Not sure that only one field is enough, cause > > > we may have two states on bh execution start: > > > 1. There are packets in vmnet buffer s->packets_buf > > > that were rejected by qemu_send_async and waiting > > > to be sent. If this happens, we should complete sending > > > these waiting packets with qemu_send_async firstly, > > > and after that we should call vmnet_read to get > > > new ones and send them to QEMU; > > > 2. There are no packets in s->packets_buf to be sent to > > > qemu, we only need to get new packets from vmnet > > > with vmnet_read and send them to QEMU > > > > In case 1, you should just keep calling qemu_send_packet_async. > > Actually > > qemu_send_packet_async adds the packet to its internal queue and > calls > > the callback when it is consumed. > > > > > > I'm not sure we can keep calling qemu_send_packet_async, > > because as docs from net/queue.c says: > > > > /* [...] > > * If a sent callback is provided to send(), the caller must handle a > > * zero return from the delivery handler by not sending any more packets > > * until we have invoked the callback. Only in that case will we queue > > * the packet. > > * > > * If a sent callback isn't provided, we just drop the packet to avoid > > * unbounded queueing. > > */ > > > > So after we did vmnet_read and read N packets > > into temporary s->packets_buf, we begin calling > > qemu_send_packet_async. If it returns 0 - it says > > "no more packets until sent_cb called please". > > At this moment we have N packets in s->packets_buf > > and already queued K < N of them. But, packets K..N > > are not queued and keep waiting for sent_cb to be sent > > with qemu_send_packet_async. > > Thus when sent_cb called, we should finish > > our transfer of packets K..N from s->packets_buf > > to qemu calling qemu_send_packet_async. > > I meant this. > > I missed the comment. The description is contradicting with the actual > code; qemu_net_queue_send_iov appends the packet to the queue whenever > it cannot send one immediately. > > Yes, it appends, but (net/queue.c): * qemu_net_queue_send tries to deliver the packet immediately. If the packet cannot be delivered, the qemu_net_queue_append is called and 0 is returned to say the caller "the receiver is not ready, hold on"; * qemu_net_queue_append does a probe before adding the packet to the queue: if (queue->nq_count >= queue->nq_maxlen && !sent_cb) { return; /* drop if queue full and no callback */ } The queue is not infinite, so we have three cases: 1. The queue is not full -> append the packet, no problems here 2. The queue is full, no callback -> we cannot notify a caller when we're ready, so just drop the packet if we can't append it. 3. The queue is full, callback present -> we can notify a caller when we are ready, so "let's queue this packet, but expect no more (!) packets is sent until I call sent_cb when the queue is ready" Therefore if we provide a callback and keep sending packets if 0 is returned, this may cause unlimited(!) queue growth. To prevent this, we should stop sending packets and wait for notification callback to continue. I don't see any contradiction with that comment. Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if > calling qemu_send_packet_async is allowed after it returns 0? > > It may be wrong, but I think it's not allowed to send packets after qemu_send_packet_async returns 0. Jason Wang, can you confirm please? Best Regards, Vladislav Yaroshchuk > Regards, > Akihiko Odaki >
On Fri, Mar 4, 2022 at 12:43 AM Vladislav Yaroshchuk <vladislav.yaroshchuk@jetbrains.com> wrote: > > > > On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: >> >> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote: >> > > Not sure that only one field is enough, cause >> > > we may have two states on bh execution start: >> > > 1. There are packets in vmnet buffer s->packets_buf >> > > that were rejected by qemu_send_async and waiting >> > > to be sent. If this happens, we should complete sending >> > > these waiting packets with qemu_send_async firstly, >> > > and after that we should call vmnet_read to get >> > > new ones and send them to QEMU; >> > > 2. There are no packets in s->packets_buf to be sent to >> > > qemu, we only need to get new packets from vmnet >> > > with vmnet_read and send them to QEMU >> > >> > In case 1, you should just keep calling qemu_send_packet_async. >> > Actually >> > qemu_send_packet_async adds the packet to its internal queue and calls >> > the callback when it is consumed. >> > >> > >> > I'm not sure we can keep calling qemu_send_packet_async, >> > because as docs from net/queue.c says: >> > >> > /* [...] >> > * If a sent callback is provided to send(), the caller must handle a >> > * zero return from the delivery handler by not sending any more packets >> > * until we have invoked the callback. Only in that case will we queue >> > * the packet. >> > * >> > * If a sent callback isn't provided, we just drop the packet to avoid >> > * unbounded queueing. >> > */ >> > >> > So after we did vmnet_read and read N packets >> > into temporary s->packets_buf, we begin calling >> > qemu_send_packet_async. If it returns 0 - it says >> > "no more packets until sent_cb called please". >> > At this moment we have N packets in s->packets_buf >> > and already queued K < N of them. But, packets K..N >> > are not queued and keep waiting for sent_cb to be sent >> > with qemu_send_packet_async. >> > Thus when sent_cb called, we should finish >> > our transfer of packets K..N from s->packets_buf >> > to qemu calling qemu_send_packet_async. >> > I meant this. >> >> I missed the comment. The description is contradicting with the actual >> code; qemu_net_queue_send_iov appends the packet to the queue whenever >> it cannot send one immediately. >> > > Yes, it appends, but (net/queue.c): > * qemu_net_queue_send tries to deliver the packet > immediately. If the packet cannot be delivered, the > qemu_net_queue_append is called and 0 is returned > to say the caller "the receiver is not ready, hold on"; > * qemu_net_queue_append does a probe before adding > the packet to the queue: > if (queue->nq_count >= queue->nq_maxlen && !sent_cb) { > return; /* drop if queue full and no callback */ > } > > The queue is not infinite, so we have three cases: > 1. The queue is not full -> append the packet, no > problems here > 2. The queue is full, no callback -> we cannot notify > a caller when we're ready, so just drop the packet > if we can't append it. > 3. The queue is full, callback present -> we can notify > a caller when we are ready, so "let's queue this packet, > but expect no more (!) packets is sent until I call > sent_cb when the queue is ready" > > Therefore if we provide a callback and keep sending > packets if 0 is returned, this may cause unlimited(!) > queue growth. To prevent this, we should stop sending > packets and wait for notification callback to continue. > > I don't see any contradiction with that comment. > >> Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if >> calling qemu_send_packet_async is allowed after it returns 0? >> > > It may be wrong, but I think it's not allowed to send > packets after qemu_send_packet_async returns 0. > > Jason Wang, can you confirm please? > > Best Regards, > > Vladislav Yaroshchuk > >> >> Regards, >> Akihiko Odaki > > > The unlimited queue growth would not happen if you stop calling vmnet_read after qemu_send_packet_async returns 0. So I think the comment should be amended to say something like: "Once qemu_send_packet_async returns 0, the client should stop reading more packets from the underlying NIC to prevent infinite growth of the queue until the last callback gets called." The unique feature of vmnet is that it can read multiple packets at once, and I guess it is the reason why the comment in net/queue.c missed the case. But this is all my guess so I need confirmation from the maintainer. Regards, Akihiko Odaki
On Thu, Mar 3, 2022 at 11:43 PM Vladislav Yaroshchuk <vladislav.yaroshchuk@jetbrains.com> wrote: > > > > On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: >> >> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote: >> > > Not sure that only one field is enough, cause >> > > we may have two states on bh execution start: >> > > 1. There are packets in vmnet buffer s->packets_buf >> > > that were rejected by qemu_send_async and waiting >> > > to be sent. If this happens, we should complete sending >> > > these waiting packets with qemu_send_async firstly, >> > > and after that we should call vmnet_read to get >> > > new ones and send them to QEMU; >> > > 2. There are no packets in s->packets_buf to be sent to >> > > qemu, we only need to get new packets from vmnet >> > > with vmnet_read and send them to QEMU >> > >> > In case 1, you should just keep calling qemu_send_packet_async. >> > Actually >> > qemu_send_packet_async adds the packet to its internal queue and calls >> > the callback when it is consumed. >> > >> > >> > I'm not sure we can keep calling qemu_send_packet_async, >> > because as docs from net/queue.c says: >> > >> > /* [...] >> > * If a sent callback is provided to send(), the caller must handle a >> > * zero return from the delivery handler by not sending any more packets >> > * until we have invoked the callback. Only in that case will we queue >> > * the packet. >> > * >> > * If a sent callback isn't provided, we just drop the packet to avoid >> > * unbounded queueing. >> > */ >> > >> > So after we did vmnet_read and read N packets >> > into temporary s->packets_buf, we begin calling >> > qemu_send_packet_async. If it returns 0 - it says >> > "no more packets until sent_cb called please". >> > At this moment we have N packets in s->packets_buf >> > and already queued K < N of them. But, packets K..N >> > are not queued and keep waiting for sent_cb to be sent >> > with qemu_send_packet_async. >> > Thus when sent_cb called, we should finish >> > our transfer of packets K..N from s->packets_buf >> > to qemu calling qemu_send_packet_async. >> > I meant this. >> >> I missed the comment. The description is contradicting with the actual >> code; qemu_net_queue_send_iov appends the packet to the queue whenever >> it cannot send one immediately. >> > > Yes, it appends, but (net/queue.c): > * qemu_net_queue_send tries to deliver the packet > immediately. If the packet cannot be delivered, the > qemu_net_queue_append is called and 0 is returned > to say the caller "the receiver is not ready, hold on"; > * qemu_net_queue_append does a probe before adding > the packet to the queue: > if (queue->nq_count >= queue->nq_maxlen && !sent_cb) { > return; /* drop if queue full and no callback */ > } > > The queue is not infinite, so we have three cases: > 1. The queue is not full -> append the packet, no > problems here > 2. The queue is full, no callback -> we cannot notify > a caller when we're ready, so just drop the packet > if we can't append it. > 3. The queue is full, callback present -> we can notify > a caller when we are ready, so "let's queue this packet, > but expect no more (!) packets is sent until I call > sent_cb when the queue is ready" > > Therefore if we provide a callback and keep sending > packets if 0 is returned, this may cause unlimited(!) > queue growth. To prevent this, we should stop sending > packets and wait for notification callback to continue. Right. > > I don't see any contradiction with that comment. > >> Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if >> calling qemu_send_packet_async is allowed after it returns 0? >> > > It may be wrong, but I think it's not allowed to send > packets after qemu_send_packet_async returns 0. > > Jason Wang, can you confirm please? With a cb, we can't do this. All users with cb will disable the source polling and depend on the cb to re-read the polling. (tap/l2tpv3/socket). Without a cb, we can. As analyzed above, qemu_net_queue_append() can limit the number of packets queued in this case. Thanks > > Best Regards, > > Vladislav Yaroshchuk > >> >> Regards, >> Akihiko Odaki > > >
On 2022/03/04 10:37, Jason Wang wrote: > On Thu, Mar 3, 2022 at 11:43 PM Vladislav Yaroshchuk > <vladislav.yaroshchuk@jetbrains.com> wrote: >> >> >> >> On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: >>> >>> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote: >>>> > Not sure that only one field is enough, cause >>>> > we may have two states on bh execution start: >>>> > 1. There are packets in vmnet buffer s->packets_buf >>>> > that were rejected by qemu_send_async and waiting >>>> > to be sent. If this happens, we should complete sending >>>> > these waiting packets with qemu_send_async firstly, >>>> > and after that we should call vmnet_read to get >>>> > new ones and send them to QEMU; >>>> > 2. There are no packets in s->packets_buf to be sent to >>>> > qemu, we only need to get new packets from vmnet >>>> > with vmnet_read and send them to QEMU >>>> >>>> In case 1, you should just keep calling qemu_send_packet_async. >>>> Actually >>>> qemu_send_packet_async adds the packet to its internal queue and calls >>>> the callback when it is consumed. >>>> >>>> >>>> I'm not sure we can keep calling qemu_send_packet_async, >>>> because as docs from net/queue.c says: >>>> >>>> /* [...] >>>> * If a sent callback is provided to send(), the caller must handle a >>>> * zero return from the delivery handler by not sending any more packets >>>> * until we have invoked the callback. Only in that case will we queue >>>> * the packet. >>>> * >>>> * If a sent callback isn't provided, we just drop the packet to avoid >>>> * unbounded queueing. >>>> */ >>>> >>>> So after we did vmnet_read and read N packets >>>> into temporary s->packets_buf, we begin calling >>>> qemu_send_packet_async. If it returns 0 - it says >>>> "no more packets until sent_cb called please". >>>> At this moment we have N packets in s->packets_buf >>>> and already queued K < N of them. But, packets K..N >>>> are not queued and keep waiting for sent_cb to be sent >>>> with qemu_send_packet_async. >>>> Thus when sent_cb called, we should finish >>>> our transfer of packets K..N from s->packets_buf >>>> to qemu calling qemu_send_packet_async. >>>> I meant this. >>> >>> I missed the comment. The description is contradicting with the actual >>> code; qemu_net_queue_send_iov appends the packet to the queue whenever >>> it cannot send one immediately. >>> >> >> Yes, it appends, but (net/queue.c): >> * qemu_net_queue_send tries to deliver the packet >> immediately. If the packet cannot be delivered, the >> qemu_net_queue_append is called and 0 is returned >> to say the caller "the receiver is not ready, hold on"; >> * qemu_net_queue_append does a probe before adding >> the packet to the queue: >> if (queue->nq_count >= queue->nq_maxlen && !sent_cb) { >> return; /* drop if queue full and no callback */ >> } >> >> The queue is not infinite, so we have three cases: >> 1. The queue is not full -> append the packet, no >> problems here >> 2. The queue is full, no callback -> we cannot notify >> a caller when we're ready, so just drop the packet >> if we can't append it. >> 3. The queue is full, callback present -> we can notify >> a caller when we are ready, so "let's queue this packet, >> but expect no more (!) packets is sent until I call >> sent_cb when the queue is ready" >> >> Therefore if we provide a callback and keep sending >> packets if 0 is returned, this may cause unlimited(!) >> queue growth. To prevent this, we should stop sending >> packets and wait for notification callback to continue. > > Right. > >> >> I don't see any contradiction with that comment. >> >>> Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if >>> calling qemu_send_packet_async is allowed after it returns 0? >>> >> >> It may be wrong, but I think it's not allowed to send >> packets after qemu_send_packet_async returns 0. >> >> Jason Wang, can you confirm please? > > With a cb, we can't do this. All users with cb will disable the source > polling and depend on the cb to re-read the polling. > (tap/l2tpv3/socket). > > Without a cb, we can. As analyzed above, qemu_net_queue_append() can > limit the number of packets queued in this case. vmnet can read multiple packets at once. What about such a case? Isn't calling qemu_send_packet_async for already read packet and stopping reading more fine? Regards, Akihiko Odaki > > Thanks > >> >> Best Regards, >> >> Vladislav Yaroshchuk >> >>> >>> Regards, >>> Akihiko Odaki >> >> >> >
On 2022/03/04 3:34, Akihiko Odaki wrote: > On Fri, Mar 4, 2022 at 12:43 AM Vladislav Yaroshchuk > <vladislav.yaroshchuk@jetbrains.com> wrote: >> >> >> >> On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: >>> >>> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote: >>>> > Not sure that only one field is enough, cause >>>> > we may have two states on bh execution start: >>>> > 1. There are packets in vmnet buffer s->packets_buf >>>> > that were rejected by qemu_send_async and waiting >>>> > to be sent. If this happens, we should complete sending >>>> > these waiting packets with qemu_send_async firstly, >>>> > and after that we should call vmnet_read to get >>>> > new ones and send them to QEMU; >>>> > 2. There are no packets in s->packets_buf to be sent to >>>> > qemu, we only need to get new packets from vmnet >>>> > with vmnet_read and send them to QEMU >>>> >>>> In case 1, you should just keep calling qemu_send_packet_async. >>>> Actually >>>> qemu_send_packet_async adds the packet to its internal queue and calls >>>> the callback when it is consumed. >>>> >>>> >>>> I'm not sure we can keep calling qemu_send_packet_async, >>>> because as docs from net/queue.c says: >>>> >>>> /* [...] >>>> * If a sent callback is provided to send(), the caller must handle a >>>> * zero return from the delivery handler by not sending any more packets >>>> * until we have invoked the callback. Only in that case will we queue >>>> * the packet. >>>> * >>>> * If a sent callback isn't provided, we just drop the packet to avoid >>>> * unbounded queueing. >>>> */ >>>> >>>> So after we did vmnet_read and read N packets >>>> into temporary s->packets_buf, we begin calling >>>> qemu_send_packet_async. If it returns 0 - it says >>>> "no more packets until sent_cb called please". >>>> At this moment we have N packets in s->packets_buf >>>> and already queued K < N of them. But, packets K..N >>>> are not queued and keep waiting for sent_cb to be sent >>>> with qemu_send_packet_async. >>>> Thus when sent_cb called, we should finish >>>> our transfer of packets K..N from s->packets_buf >>>> to qemu calling qemu_send_packet_async. >>>> I meant this. >>> >>> I missed the comment. The description is contradicting with the actual >>> code; qemu_net_queue_send_iov appends the packet to the queue whenever >>> it cannot send one immediately. >>> >> >> Yes, it appends, but (net/queue.c): >> * qemu_net_queue_send tries to deliver the packet >> immediately. If the packet cannot be delivered, the >> qemu_net_queue_append is called and 0 is returned >> to say the caller "the receiver is not ready, hold on"; >> * qemu_net_queue_append does a probe before adding >> the packet to the queue: >> if (queue->nq_count >= queue->nq_maxlen && !sent_cb) { >> return; /* drop if queue full and no callback */ >> } >> >> The queue is not infinite, so we have three cases: >> 1. The queue is not full -> append the packet, no >> problems here >> 2. The queue is full, no callback -> we cannot notify >> a caller when we're ready, so just drop the packet >> if we can't append it. >> 3. The queue is full, callback present -> we can notify >> a caller when we are ready, so "let's queue this packet, >> but expect no more (!) packets is sent until I call >> sent_cb when the queue is ready" >> >> Therefore if we provide a callback and keep sending >> packets if 0 is returned, this may cause unlimited(!) >> queue growth. To prevent this, we should stop sending >> packets and wait for notification callback to continue. >> >> I don't see any contradiction with that comment. >> >>> Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if >>> calling qemu_send_packet_async is allowed after it returns 0? >>> >> >> It may be wrong, but I think it's not allowed to send >> packets after qemu_send_packet_async returns 0. >> >> Jason Wang, can you confirm please? >> >> Best Regards, >> >> Vladislav Yaroshchuk >> >>> >>> Regards, >>> Akihiko Odaki >> >> >> > > The unlimited queue growth would not happen if you stop calling > vmnet_read after qemu_send_packet_async returns 0. So I think the > comment should be amended to say something like: > "Once qemu_send_packet_async returns 0, the client should stop reading > more packets from the underlying NIC to prevent infinite growth of the > queue until the last callback gets called." > > The unique feature of vmnet is that it can read multiple packets at > once, and I guess it is the reason why the comment in net/queue.c > missed the case. But this is all my guess so I need confirmation from > the maintainer. > > Regards, > Akihiko Odaki I forgot to include Jason Wang to To for this email.
On Fri, Mar 4, 2022 at 12:37 PM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: > > On 2022/03/04 10:37, Jason Wang wrote: > > On Thu, Mar 3, 2022 at 11:43 PM Vladislav Yaroshchuk > > <vladislav.yaroshchuk@jetbrains.com> wrote: > >> > >> > >> > >> On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: > >>> > >>> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote: > >>>> > Not sure that only one field is enough, cause > >>>> > we may have two states on bh execution start: > >>>> > 1. There are packets in vmnet buffer s->packets_buf > >>>> > that were rejected by qemu_send_async and waiting > >>>> > to be sent. If this happens, we should complete sending > >>>> > these waiting packets with qemu_send_async firstly, > >>>> > and after that we should call vmnet_read to get > >>>> > new ones and send them to QEMU; > >>>> > 2. There are no packets in s->packets_buf to be sent to > >>>> > qemu, we only need to get new packets from vmnet > >>>> > with vmnet_read and send them to QEMU > >>>> > >>>> In case 1, you should just keep calling qemu_send_packet_async. > >>>> Actually > >>>> qemu_send_packet_async adds the packet to its internal queue and calls > >>>> the callback when it is consumed. > >>>> > >>>> > >>>> I'm not sure we can keep calling qemu_send_packet_async, > >>>> because as docs from net/queue.c says: > >>>> > >>>> /* [...] > >>>> * If a sent callback is provided to send(), the caller must handle a > >>>> * zero return from the delivery handler by not sending any more packets > >>>> * until we have invoked the callback. Only in that case will we queue > >>>> * the packet. > >>>> * > >>>> * If a sent callback isn't provided, we just drop the packet to avoid > >>>> * unbounded queueing. > >>>> */ > >>>> > >>>> So after we did vmnet_read and read N packets > >>>> into temporary s->packets_buf, we begin calling > >>>> qemu_send_packet_async. If it returns 0 - it says > >>>> "no more packets until sent_cb called please". > >>>> At this moment we have N packets in s->packets_buf > >>>> and already queued K < N of them. But, packets K..N > >>>> are not queued and keep waiting for sent_cb to be sent > >>>> with qemu_send_packet_async. > >>>> Thus when sent_cb called, we should finish > >>>> our transfer of packets K..N from s->packets_buf > >>>> to qemu calling qemu_send_packet_async. > >>>> I meant this. > >>> > >>> I missed the comment. The description is contradicting with the actual > >>> code; qemu_net_queue_send_iov appends the packet to the queue whenever > >>> it cannot send one immediately. > >>> > >> > >> Yes, it appends, but (net/queue.c): > >> * qemu_net_queue_send tries to deliver the packet > >> immediately. If the packet cannot be delivered, the > >> qemu_net_queue_append is called and 0 is returned > >> to say the caller "the receiver is not ready, hold on"; > >> * qemu_net_queue_append does a probe before adding > >> the packet to the queue: > >> if (queue->nq_count >= queue->nq_maxlen && !sent_cb) { > >> return; /* drop if queue full and no callback */ > >> } > >> > >> The queue is not infinite, so we have three cases: > >> 1. The queue is not full -> append the packet, no > >> problems here > >> 2. The queue is full, no callback -> we cannot notify > >> a caller when we're ready, so just drop the packet > >> if we can't append it. > >> 3. The queue is full, callback present -> we can notify > >> a caller when we are ready, so "let's queue this packet, > >> but expect no more (!) packets is sent until I call > >> sent_cb when the queue is ready" > >> > >> Therefore if we provide a callback and keep sending > >> packets if 0 is returned, this may cause unlimited(!) > >> queue growth. To prevent this, we should stop sending > >> packets and wait for notification callback to continue. > > > > Right. > > > >> > >> I don't see any contradiction with that comment. > >> > >>> Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if > >>> calling qemu_send_packet_async is allowed after it returns 0? > >>> > >> > >> It may be wrong, but I think it's not allowed to send > >> packets after qemu_send_packet_async returns 0. > >> > >> Jason Wang, can you confirm please? > > > > With a cb, we can't do this. All users with cb will disable the source > > polling and depend on the cb to re-read the polling. > > (tap/l2tpv3/socket). > > > > Without a cb, we can. As analyzed above, qemu_net_queue_append() can > > limit the number of packets queued in this case. > > vmnet can read multiple packets at once. What about such a case? Isn't > calling qemu_send_packet_async for already read packet and stopping > reading more fine? It should be fine, I remember I've asked whether or not the source could be disabled but I get the answer that it can't. Thanks > > Regards, > Akihiko Odaki > > > > > Thanks > > > >> > >> Best Regards, > >> > >> Vladislav Yaroshchuk > >> > >>> > >>> Regards, > >>> Akihiko Odaki > >> > >> > >> > > >
On Mon, Mar 7, 2022 at 12:59 PM Jason Wang <jasowang@redhat.com> wrote: > > On Fri, Mar 4, 2022 at 12:37 PM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: > > > > On 2022/03/04 10:37, Jason Wang wrote: > > > On Thu, Mar 3, 2022 at 11:43 PM Vladislav Yaroshchuk > > > <vladislav.yaroshchuk@jetbrains.com> wrote: > > >> > > >> > > >> > > >> On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: > > >>> > > >>> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote: > > >>>> > Not sure that only one field is enough, cause > > >>>> > we may have two states on bh execution start: > > >>>> > 1. There are packets in vmnet buffer s->packets_buf > > >>>> > that were rejected by qemu_send_async and waiting > > >>>> > to be sent. If this happens, we should complete sending > > >>>> > these waiting packets with qemu_send_async firstly, > > >>>> > and after that we should call vmnet_read to get > > >>>> > new ones and send them to QEMU; > > >>>> > 2. There are no packets in s->packets_buf to be sent to > > >>>> > qemu, we only need to get new packets from vmnet > > >>>> > with vmnet_read and send them to QEMU > > >>>> > > >>>> In case 1, you should just keep calling qemu_send_packet_async. > > >>>> Actually > > >>>> qemu_send_packet_async adds the packet to its internal queue and calls > > >>>> the callback when it is consumed. > > >>>> > > >>>> > > >>>> I'm not sure we can keep calling qemu_send_packet_async, > > >>>> because as docs from net/queue.c says: > > >>>> > > >>>> /* [...] > > >>>> * If a sent callback is provided to send(), the caller must handle a > > >>>> * zero return from the delivery handler by not sending any more packets > > >>>> * until we have invoked the callback. Only in that case will we queue > > >>>> * the packet. > > >>>> * > > >>>> * If a sent callback isn't provided, we just drop the packet to avoid > > >>>> * unbounded queueing. > > >>>> */ > > >>>> > > >>>> So after we did vmnet_read and read N packets > > >>>> into temporary s->packets_buf, we begin calling > > >>>> qemu_send_packet_async. If it returns 0 - it says > > >>>> "no more packets until sent_cb called please". > > >>>> At this moment we have N packets in s->packets_buf > > >>>> and already queued K < N of them. But, packets K..N > > >>>> are not queued and keep waiting for sent_cb to be sent > > >>>> with qemu_send_packet_async. > > >>>> Thus when sent_cb called, we should finish > > >>>> our transfer of packets K..N from s->packets_buf > > >>>> to qemu calling qemu_send_packet_async. > > >>>> I meant this. > > >>> > > >>> I missed the comment. The description is contradicting with the actual > > >>> code; qemu_net_queue_send_iov appends the packet to the queue whenever > > >>> it cannot send one immediately. > > >>> > > >> > > >> Yes, it appends, but (net/queue.c): > > >> * qemu_net_queue_send tries to deliver the packet > > >> immediately. If the packet cannot be delivered, the > > >> qemu_net_queue_append is called and 0 is returned > > >> to say the caller "the receiver is not ready, hold on"; > > >> * qemu_net_queue_append does a probe before adding > > >> the packet to the queue: > > >> if (queue->nq_count >= queue->nq_maxlen && !sent_cb) { > > >> return; /* drop if queue full and no callback */ > > >> } > > >> > > >> The queue is not infinite, so we have three cases: > > >> 1. The queue is not full -> append the packet, no > > >> problems here > > >> 2. The queue is full, no callback -> we cannot notify > > >> a caller when we're ready, so just drop the packet > > >> if we can't append it. > > >> 3. The queue is full, callback present -> we can notify > > >> a caller when we are ready, so "let's queue this packet, > > >> but expect no more (!) packets is sent until I call > > >> sent_cb when the queue is ready" > > >> > > >> Therefore if we provide a callback and keep sending > > >> packets if 0 is returned, this may cause unlimited(!) > > >> queue growth. To prevent this, we should stop sending > > >> packets and wait for notification callback to continue. > > > > > > Right. > > > > > >> > > >> I don't see any contradiction with that comment. > > >> > > >>> Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if > > >>> calling qemu_send_packet_async is allowed after it returns 0? > > >>> > > >> > > >> It may be wrong, but I think it's not allowed to send > > >> packets after qemu_send_packet_async returns 0. > > >> > > >> Jason Wang, can you confirm please? > > > > > > With a cb, we can't do this. All users with cb will disable the source > > > polling and depend on the cb to re-read the polling. > > > (tap/l2tpv3/socket). > > > > > > Without a cb, we can. As analyzed above, qemu_net_queue_append() can > > > limit the number of packets queued in this case. > > > > vmnet can read multiple packets at once. What about such a case? Isn't > > calling qemu_send_packet_async for already read packet and stopping > > reading more fine? > > It should be fine, I remember I've asked whether or not the source > could be disabled but I get the answer that it can't. > > Thanks It can, but there could be more packets before disabling the source because vmnet reads multiple packets with one call. The procedure is: call qemu_send_packet_async with a callback, and if it returns 0, disable the source and keep calling qemu_send_packet_async with a callback until all the packets already read are queued. Is this kind of procedure allowed? Regards, Akihiko Odaki > > > > > Regards, > > Akihiko Odaki > > > > > > > > Thanks > > > > > >> > > >> Best Regards, > > >> > > >> Vladislav Yaroshchuk > > >> > > >>> > > >>> Regards, > > >>> Akihiko Odaki > > >> > > >> > > >> > > > > > >
On Mon, Mar 7, 2022 at 12:25 PM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: > > On Mon, Mar 7, 2022 at 12:59 PM Jason Wang <jasowang@redhat.com> wrote: > > > > On Fri, Mar 4, 2022 at 12:37 PM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: > > > > > > On 2022/03/04 10:37, Jason Wang wrote: > > > > On Thu, Mar 3, 2022 at 11:43 PM Vladislav Yaroshchuk > > > > <vladislav.yaroshchuk@jetbrains.com> wrote: > > > >> > > > >> > > > >> > > > >> On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: > > > >>> > > > >>> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote: > > > >>>> > Not sure that only one field is enough, cause > > > >>>> > we may have two states on bh execution start: > > > >>>> > 1. There are packets in vmnet buffer s->packets_buf > > > >>>> > that were rejected by qemu_send_async and waiting > > > >>>> > to be sent. If this happens, we should complete sending > > > >>>> > these waiting packets with qemu_send_async firstly, > > > >>>> > and after that we should call vmnet_read to get > > > >>>> > new ones and send them to QEMU; > > > >>>> > 2. There are no packets in s->packets_buf to be sent to > > > >>>> > qemu, we only need to get new packets from vmnet > > > >>>> > with vmnet_read and send them to QEMU > > > >>>> > > > >>>> In case 1, you should just keep calling qemu_send_packet_async. > > > >>>> Actually > > > >>>> qemu_send_packet_async adds the packet to its internal queue and calls > > > >>>> the callback when it is consumed. > > > >>>> > > > >>>> > > > >>>> I'm not sure we can keep calling qemu_send_packet_async, > > > >>>> because as docs from net/queue.c says: > > > >>>> > > > >>>> /* [...] > > > >>>> * If a sent callback is provided to send(), the caller must handle a > > > >>>> * zero return from the delivery handler by not sending any more packets > > > >>>> * until we have invoked the callback. Only in that case will we queue > > > >>>> * the packet. > > > >>>> * > > > >>>> * If a sent callback isn't provided, we just drop the packet to avoid > > > >>>> * unbounded queueing. > > > >>>> */ > > > >>>> > > > >>>> So after we did vmnet_read and read N packets > > > >>>> into temporary s->packets_buf, we begin calling > > > >>>> qemu_send_packet_async. If it returns 0 - it says > > > >>>> "no more packets until sent_cb called please". > > > >>>> At this moment we have N packets in s->packets_buf > > > >>>> and already queued K < N of them. But, packets K..N > > > >>>> are not queued and keep waiting for sent_cb to be sent > > > >>>> with qemu_send_packet_async. > > > >>>> Thus when sent_cb called, we should finish > > > >>>> our transfer of packets K..N from s->packets_buf > > > >>>> to qemu calling qemu_send_packet_async. > > > >>>> I meant this. > > > >>> > > > >>> I missed the comment. The description is contradicting with the actual > > > >>> code; qemu_net_queue_send_iov appends the packet to the queue whenever > > > >>> it cannot send one immediately. > > > >>> > > > >> > > > >> Yes, it appends, but (net/queue.c): > > > >> * qemu_net_queue_send tries to deliver the packet > > > >> immediately. If the packet cannot be delivered, the > > > >> qemu_net_queue_append is called and 0 is returned > > > >> to say the caller "the receiver is not ready, hold on"; > > > >> * qemu_net_queue_append does a probe before adding > > > >> the packet to the queue: > > > >> if (queue->nq_count >= queue->nq_maxlen && !sent_cb) { > > > >> return; /* drop if queue full and no callback */ > > > >> } > > > >> > > > >> The queue is not infinite, so we have three cases: > > > >> 1. The queue is not full -> append the packet, no > > > >> problems here > > > >> 2. The queue is full, no callback -> we cannot notify > > > >> a caller when we're ready, so just drop the packet > > > >> if we can't append it. > > > >> 3. The queue is full, callback present -> we can notify > > > >> a caller when we are ready, so "let's queue this packet, > > > >> but expect no more (!) packets is sent until I call > > > >> sent_cb when the queue is ready" > > > >> > > > >> Therefore if we provide a callback and keep sending > > > >> packets if 0 is returned, this may cause unlimited(!) > > > >> queue growth. To prevent this, we should stop sending > > > >> packets and wait for notification callback to continue. > > > > > > > > Right. > > > > > > > >> > > > >> I don't see any contradiction with that comment. > > > >> > > > >>> Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if > > > >>> calling qemu_send_packet_async is allowed after it returns 0? > > > >>> > > > >> > > > >> It may be wrong, but I think it's not allowed to send > > > >> packets after qemu_send_packet_async returns 0. > > > >> > > > >> Jason Wang, can you confirm please? > > > > > > > > With a cb, we can't do this. All users with cb will disable the source > > > > polling and depend on the cb to re-read the polling. > > > > (tap/l2tpv3/socket). > > > > > > > > Without a cb, we can. As analyzed above, qemu_net_queue_append() can > > > > limit the number of packets queued in this case. > > > > > > vmnet can read multiple packets at once. What about such a case? Isn't > > > calling qemu_send_packet_async for already read packet and stopping > > > reading more fine? > > > > It should be fine, I remember I've asked whether or not the source > > could be disabled but I get the answer that it can't. > > > > Thanks > > It can, but there could be more packets before disabling the source > because vmnet reads multiple packets with one call. The procedure is: > call qemu_send_packet_async with a callback, and if it returns 0, > disable the source and keep calling qemu_send_packet_async with a > callback until all the packets already read are queued. Is this kind > of procedure allowed? So it might have several issues if we do this: 1) there's no guarantee that the following call to qemu_send_packet_async() will always return 0, this could happen if the device has some rx buffer after the first call to qemu_send_packet_asnyc(). Then we may end up with out-of order delivery. 2) calling with a cb may suppress the queue limit check, buggy driver may cause unlimited number of packets to be queued under heavy traffic It looks to me we'd better store those pending buffers in the vmnet, and when sent_cb is triggered, start from those buffers first. (Or simply drop the packets) Thanks > > Regards, > Akihiko Odaki > > > > > > > > > Regards, > > > Akihiko Odaki > > > > > > > > > > > Thanks > > > > > > > >> > > > >> Best Regards, > > > >> > > > >> Vladislav Yaroshchuk > > > >> > > > >>> > > > >>> Regards, > > > >>> Akihiko Odaki > > > >> > > > >> > > > >> > > > > > > > > > >
On Mon, Mar 7, 2022 at 1:52 PM Jason Wang <jasowang@redhat.com> wrote: > > On Mon, Mar 7, 2022 at 12:25 PM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: > > > > On Mon, Mar 7, 2022 at 12:59 PM Jason Wang <jasowang@redhat.com> wrote: > > > > > > On Fri, Mar 4, 2022 at 12:37 PM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: > > > > > > > > On 2022/03/04 10:37, Jason Wang wrote: > > > > > On Thu, Mar 3, 2022 at 11:43 PM Vladislav Yaroshchuk > > > > > <vladislav.yaroshchuk@jetbrains.com> wrote: > > > > >> > > > > >> > > > > >> > > > > >> On Tue, Mar 1, 2022 at 11:21 AM Akihiko Odaki <akihiko.odaki@gmail.com> wrote: > > > > >>> > > > > >>> On 2022/03/01 17:09, Vladislav Yaroshchuk wrote: > > > > >>>> > Not sure that only one field is enough, cause > > > > >>>> > we may have two states on bh execution start: > > > > >>>> > 1. There are packets in vmnet buffer s->packets_buf > > > > >>>> > that were rejected by qemu_send_async and waiting > > > > >>>> > to be sent. If this happens, we should complete sending > > > > >>>> > these waiting packets with qemu_send_async firstly, > > > > >>>> > and after that we should call vmnet_read to get > > > > >>>> > new ones and send them to QEMU; > > > > >>>> > 2. There are no packets in s->packets_buf to be sent to > > > > >>>> > qemu, we only need to get new packets from vmnet > > > > >>>> > with vmnet_read and send them to QEMU > > > > >>>> > > > > >>>> In case 1, you should just keep calling qemu_send_packet_async. > > > > >>>> Actually > > > > >>>> qemu_send_packet_async adds the packet to its internal queue and calls > > > > >>>> the callback when it is consumed. > > > > >>>> > > > > >>>> > > > > >>>> I'm not sure we can keep calling qemu_send_packet_async, > > > > >>>> because as docs from net/queue.c says: > > > > >>>> > > > > >>>> /* [...] > > > > >>>> * If a sent callback is provided to send(), the caller must handle a > > > > >>>> * zero return from the delivery handler by not sending any more packets > > > > >>>> * until we have invoked the callback. Only in that case will we queue > > > > >>>> * the packet. > > > > >>>> * > > > > >>>> * If a sent callback isn't provided, we just drop the packet to avoid > > > > >>>> * unbounded queueing. > > > > >>>> */ > > > > >>>> > > > > >>>> So after we did vmnet_read and read N packets > > > > >>>> into temporary s->packets_buf, we begin calling > > > > >>>> qemu_send_packet_async. If it returns 0 - it says > > > > >>>> "no more packets until sent_cb called please". > > > > >>>> At this moment we have N packets in s->packets_buf > > > > >>>> and already queued K < N of them. But, packets K..N > > > > >>>> are not queued and keep waiting for sent_cb to be sent > > > > >>>> with qemu_send_packet_async. > > > > >>>> Thus when sent_cb called, we should finish > > > > >>>> our transfer of packets K..N from s->packets_buf > > > > >>>> to qemu calling qemu_send_packet_async. > > > > >>>> I meant this. > > > > >>> > > > > >>> I missed the comment. The description is contradicting with the actual > > > > >>> code; qemu_net_queue_send_iov appends the packet to the queue whenever > > > > >>> it cannot send one immediately. > > > > >>> > > > > >> > > > > >> Yes, it appends, but (net/queue.c): > > > > >> * qemu_net_queue_send tries to deliver the packet > > > > >> immediately. If the packet cannot be delivered, the > > > > >> qemu_net_queue_append is called and 0 is returned > > > > >> to say the caller "the receiver is not ready, hold on"; > > > > >> * qemu_net_queue_append does a probe before adding > > > > >> the packet to the queue: > > > > >> if (queue->nq_count >= queue->nq_maxlen && !sent_cb) { > > > > >> return; /* drop if queue full and no callback */ > > > > >> } > > > > >> > > > > >> The queue is not infinite, so we have three cases: > > > > >> 1. The queue is not full -> append the packet, no > > > > >> problems here > > > > >> 2. The queue is full, no callback -> we cannot notify > > > > >> a caller when we're ready, so just drop the packet > > > > >> if we can't append it. > > > > >> 3. The queue is full, callback present -> we can notify > > > > >> a caller when we are ready, so "let's queue this packet, > > > > >> but expect no more (!) packets is sent until I call > > > > >> sent_cb when the queue is ready" > > > > >> > > > > >> Therefore if we provide a callback and keep sending > > > > >> packets if 0 is returned, this may cause unlimited(!) > > > > >> queue growth. To prevent this, we should stop sending > > > > >> packets and wait for notification callback to continue. > > > > > > > > > > Right. > > > > > > > > > >> > > > > >> I don't see any contradiction with that comment. > > > > >> > > > > >>> Jason Wang, I saw you are in the MAINTAINERS for net/. Can you tell if > > > > >>> calling qemu_send_packet_async is allowed after it returns 0? > > > > >>> > > > > >> > > > > >> It may be wrong, but I think it's not allowed to send > > > > >> packets after qemu_send_packet_async returns 0. > > > > >> > > > > >> Jason Wang, can you confirm please? > > > > > > > > > > With a cb, we can't do this. All users with cb will disable the source > > > > > polling and depend on the cb to re-read the polling. > > > > > (tap/l2tpv3/socket). > > > > > > > > > > Without a cb, we can. As analyzed above, qemu_net_queue_append() can > > > > > limit the number of packets queued in this case. > > > > > > > > vmnet can read multiple packets at once. What about such a case? Isn't > > > > calling qemu_send_packet_async for already read packet and stopping > > > > reading more fine? > > > > > > It should be fine, I remember I've asked whether or not the source > > > could be disabled but I get the answer that it can't. > > > > > > Thanks > > > > It can, but there could be more packets before disabling the source > > because vmnet reads multiple packets with one call. The procedure is: > > call qemu_send_packet_async with a callback, and if it returns 0, > > disable the source and keep calling qemu_send_packet_async with a > > callback until all the packets already read are queued. Is this kind > > of procedure allowed? > > So it might have several issues if we do this: > > 1) there's no guarantee that the following call to > qemu_send_packet_async() will always return 0, this could happen if > the device has some rx buffer after the first call to > qemu_send_packet_asnyc(). Then we may end up with out-of order > delivery. > 2) calling with a cb may suppress the queue limit check, buggy driver > may cause unlimited number of packets to be queued under heavy traffic > > It looks to me we'd better store those pending buffers in the vmnet, > and when sent_cb is triggered, start from those buffers first. (Or > simply drop the packets) Thanks for the explanation. Vladislav had an idea of such an implementation so it would be the option we would choose. Let me review the design again. On 2022/02/28 20:59, Vladislav Yaroshchuk wrote: > It can be done kinda this way: > ``` > struct s: > send_bh: bh > packets_buf: array[packet] > ## Three new fields > send_enabled: bool send_enabled can be eliminated. When it is enabled, packets_send_pos and packets_batch_size must be equal. They must not be equal otherwise. packets_send_pos must represent the position of the packet which is not sent yet, possibly in the process of sending. vmnet_send_completed must call qemu_send_wrapper before scheduling send_bh. bh_send should do nothing if s->packets_send_pos < s->packets_batch_size. > packets_send_pos: int > packets_batch_size: int The name packets_batch_size lacks the context that it is used for sending. The relation between the two members is not obvious either. Perhaps they could be named packets_send_current_pos and packets_send_end_pos. > > fun bh_send(s): > ## Send disabled - do nothing > if not s->send_enabled: > return > ## If some packets are waiting to be sent in > ## s->packets_buf - send them > if s->packets_send_pos < s->packets_batch_size: > if not qemu_send_wrapper(s): > return > > ## Try to read new packets from vmnet > s->packets_send_pos = 0 > s->packets_batch_size = 0 > s->packets_buf = vmnet_read(&s->packets_batch_size) > if s->packets_batch_size > 0: > # If something received - process them > qemu_send_wrapper(s) > fun qemu_send_wrapper(s): > for i in s->packets_send_pos to s->packets_batch_size: > size = qemu_send_async(s->packets_buf[i], > vmnet_send_completed) > if size == 0: > ## Disable packets processing until vmnet_send_completed > ## Save the state: packets to be sent now in s->packets_buf > ## in range s->packets_send_pos..s->packets_batch_size > s->send_enabled = false > s->packets_send_pos = i + 1 > break > if size < 0: > ## On error just drop the packets > s->packets_send_pos = 0 > s->packets_batch_size = 0 We could ignore just one packet. Although it is unlikely, something specific to the packet may be wrong and other packets may be fine. > break > > return s->send_enabled > > > fun vmnet_send_completed(s): > ## Complete sending packets from s->packets_buf > s->send_enabled = true > qemu_bh_schedule(s->send_bh) > > ## From callback we only scheduling the bh > vmnet.set_callback(callback = s: qemu_bh_schedule(s->send_bh)) > ``` > > I think a simpler implementation may exist, but currently > I see only this approach with the send_enabled flag and > two more fields to save packets sending state. Aside from the send_enabled flag, I agree it is the simplest. Regards, Akihiko Odaki
diff --git a/net/vmnet-common.m b/net/vmnet-common.m index 56612c72ce..2f70921cae 100644 --- a/net/vmnet-common.m +++ b/net/vmnet-common.m @@ -10,6 +10,8 @@ */ #include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "qemu/log.h" #include "qapi/qapi-types-net.h" #include "vmnet_int.h" #include "clients.h" @@ -17,4 +19,304 @@ #include "qapi/error.h" #include <vmnet/vmnet.h> +#include <dispatch/dispatch.h> + +static inline void vmnet_set_send_bh_scheduled(VmnetCommonState *s, + bool enable) +{ + qatomic_set(&s->send_scheduled, enable); +} + + +static inline bool vmnet_is_send_bh_scheduled(VmnetCommonState *s) +{ + return qatomic_load_acquire(&s->send_scheduled); +} + + +static inline void vmnet_set_send_enabled(VmnetCommonState *s, + bool enable) +{ + if (enable) { + vmnet_interface_set_event_callback( + s->vmnet_if, + VMNET_INTERFACE_PACKETS_AVAILABLE, + s->if_queue, + ^(interface_event_t event_id, xpc_object_t event) { + assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE); + /* + * This function is being called from a non qemu thread, so + * we only schedule a BH, and do the rest of the io completion + * handling from vmnet_send_bh() which runs in a qemu context. + * + * Avoid scheduling multiple bottom halves + */ + if (!vmnet_is_send_bh_scheduled(s)) { + vmnet_set_send_bh_scheduled(s, true); + qemu_bh_schedule(s->send_bh); + } + }); + } else { + vmnet_interface_set_event_callback( + s->vmnet_if, + VMNET_INTERFACE_PACKETS_AVAILABLE, + NULL, + NULL); + } +} + + +static void vmnet_send_completed(NetClientState *nc, ssize_t len) +{ + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); + vmnet_set_send_enabled(s, true); +} + + +static void vmnet_send_bh(void *opaque) +{ + NetClientState *nc = (NetClientState *) opaque; + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); + + struct iovec *iov = s->iov_buf; + struct vmpktdesc *packets = s->packets_buf; + int pkt_cnt; + int i; + + vmnet_return_t status; + ssize_t size; + + /* read as many packets as present */ + pkt_cnt = VMNET_PACKETS_LIMIT; + for (i = 0; i < pkt_cnt; ++i) { + packets[i].vm_pkt_size = s->max_packet_size; + packets[i].vm_pkt_iovcnt = 1; + packets[i].vm_flags = 0; + } + + status = vmnet_read(s->vmnet_if, packets, &pkt_cnt); + if (status != VMNET_SUCCESS) { + error_printf("vmnet: read failed: %s\n", + vmnet_status_map_str(status)); + goto done; + } + + for (i = 0; i < pkt_cnt; ++i) { + size = qemu_send_packet_async(nc, + iov[i].iov_base, + packets[i].vm_pkt_size, + vmnet_send_completed); + if (size == 0) { + vmnet_set_send_enabled(s, false); + goto done; + } else if (size < 0) { + break; + } + } + +done: + vmnet_set_send_bh_scheduled(s, false); +} + + +static void vmnet_bufs_init(VmnetCommonState *s) +{ + struct vmpktdesc *packets = s->packets_buf; + struct iovec *iov = s->iov_buf; + int i; + + for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) { + iov[i].iov_len = s->max_packet_size; + iov[i].iov_base = g_malloc0(iov[i].iov_len); + packets[i].vm_pkt_iov = iov + i; + } +} + + +const char *vmnet_status_map_str(vmnet_return_t status) +{ + switch (status) { + case VMNET_SUCCESS: + return "success"; + case VMNET_FAILURE: + return "general failure (possibly not enough privileges)"; + case VMNET_MEM_FAILURE: + return "memory allocation failure"; + case VMNET_INVALID_ARGUMENT: + return "invalid argument specified"; + case VMNET_SETUP_INCOMPLETE: + return "interface setup is not complete"; + case VMNET_INVALID_ACCESS: + return "invalid access, permission denied"; + case VMNET_PACKET_TOO_BIG: + return "packet size is larger than MTU"; + case VMNET_BUFFER_EXHAUSTED: + return "buffers exhausted in kernel"; + case VMNET_TOO_MANY_PACKETS: + return "packet count exceeds limit"; +#if defined(MAC_OS_VERSION_11_0) && \ + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 + case VMNET_SHARING_SERVICE_BUSY: + return "conflict, sharing service is in use"; +#endif + default: + return "unknown vmnet error"; + } +} + + +int vmnet_if_create(NetClientState *nc, + xpc_object_t if_desc, + Error **errp) +{ + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);; + dispatch_semaphore_t if_created_sem = dispatch_semaphore_create(0); + __block vmnet_return_t if_status; + + s->if_queue = dispatch_queue_create( + "org.qemu.vmnet.if_queue", + DISPATCH_QUEUE_SERIAL + ); + + xpc_dictionary_set_bool( + if_desc, + vmnet_allocate_mac_address_key, + false + ); +#ifdef DEBUG + qemu_log("vmnet.start.interface_desc:\n"); + xpc_dictionary_apply(if_desc, + ^bool(const char *k, xpc_object_t v) { + char *desc = xpc_copy_description(v); + qemu_log(" %s=%s\n", k, desc); + free(desc); + return true; + }); +#endif /* DEBUG */ + + s->vmnet_if = vmnet_start_interface( + if_desc, + s->if_queue, + ^(vmnet_return_t status, xpc_object_t interface_param) { + if_status = status; + if (status != VMNET_SUCCESS || !interface_param) { + dispatch_semaphore_signal(if_created_sem); + return; + } + +#ifdef DEBUG + qemu_log("vmnet.start.interface_param:\n"); + xpc_dictionary_apply(interface_param, + ^bool(const char *k, xpc_object_t v) { + char *desc = xpc_copy_description(v); + qemu_log(" %s=%s\n", k, desc); + free(desc); + return true; + }); +#endif /* DEBUG */ + + s->mtu = xpc_dictionary_get_uint64( + interface_param, + vmnet_mtu_key); + s->max_packet_size = xpc_dictionary_get_uint64( + interface_param, + vmnet_max_packet_size_key); + + dispatch_semaphore_signal(if_created_sem); + }); + + if (s->vmnet_if == NULL) { + error_setg(errp, + "unable to create interface " + "with requested params"); + return -1; + } + + dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER); + + if (if_status != VMNET_SUCCESS) { + error_setg(errp, + "cannot create vmnet interface: %s", + vmnet_status_map_str(if_status)); + return -1; + } + + s->send_bh = aio_bh_new(qemu_get_aio_context(), vmnet_send_bh, nc); + vmnet_bufs_init(s); + vmnet_set_send_bh_scheduled(s, false); + vmnet_set_send_enabled(s, true); + return 0; +} + + +ssize_t vmnet_receive_common(NetClientState *nc, + const uint8_t *buf, + size_t size) +{ + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc); + struct vmpktdesc packet; + struct iovec iov; + int pkt_cnt; + vmnet_return_t if_status; + + if (size > s->max_packet_size) { + warn_report("vmnet: packet is too big, %zu > %llu\n", + packet.vm_pkt_size, + s->max_packet_size); + return -1; + } + + iov.iov_base = (char *) buf; + iov.iov_len = size; + + packet.vm_pkt_iovcnt = 1; + packet.vm_flags = 0; + packet.vm_pkt_size = size; + packet.vm_pkt_iov = &iov; + pkt_cnt = 1; + + if_status = vmnet_write(s->vmnet_if, &packet, &pkt_cnt); + if (if_status != VMNET_SUCCESS) { + error_report("vmnet: write error: %s\n", + vmnet_status_map_str(if_status)); + } + + if (if_status == VMNET_SUCCESS && pkt_cnt) { + return size; + } + return 0; +} + + +void vmnet_cleanup_common(NetClientState *nc) +{ + VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);; + dispatch_semaphore_t if_created_sem; + + qemu_purge_queued_packets(nc); + vmnet_set_send_bh_scheduled(s, true); + vmnet_set_send_enabled(s, false); + + if (s->vmnet_if == NULL) { + return; + } + + if_created_sem = dispatch_semaphore_create(0); + vmnet_stop_interface( + s->vmnet_if, + s->if_queue, + ^(vmnet_return_t status) { + assert(status == VMNET_SUCCESS); + dispatch_semaphore_signal(if_created_sem); + }); + dispatch_semaphore_wait(if_created_sem, DISPATCH_TIME_FOREVER); + + qemu_bh_delete(s->send_bh); + dispatch_release(if_created_sem); + dispatch_release(s->if_queue); + + for (int i = 0; i < VMNET_PACKETS_LIMIT; ++i) { + g_free(s->iov_buf[i].iov_base); + } +} diff --git a/net/vmnet-shared.c b/net/vmnet-shared.c index f07afaaf21..66f66c034b 100644 --- a/net/vmnet-shared.c +++ b/net/vmnet-shared.c @@ -10,16 +10,102 @@ #include "qemu/osdep.h" #include "qapi/qapi-types-net.h" +#include "qapi/error.h" #include "vmnet_int.h" #include "clients.h" -#include "qemu/error-report.h" -#include "qapi/error.h" #include <vmnet/vmnet.h> +typedef struct VmnetSharedState { + VmnetCommonState cs; +} VmnetSharedState; + + +static bool validate_options(const Netdev *netdev, Error **errp) +{ + const NetdevVmnetSharedOptions *options = &(netdev->u.vmnet_shared); + +#if !defined(MAC_OS_VERSION_11_0) || \ + MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0 + if (options->has_isolated) { + error_setg(errp, + "vmnet-shared.isolated feature is " + "unavailable: outdated vmnet.framework API"); + return false; + } +#endif + + if ((options->has_start_address || + options->has_end_address || + options->has_subnet_mask) && + !(options->has_start_address && + options->has_end_address && + options->has_subnet_mask)) { + error_setg(errp, + "'start-address', 'end-address', 'subnet-mask' " + "should be provided together" + ); + return false; + } + + return true; +} + +static xpc_object_t build_if_desc(const Netdev *netdev) +{ + const NetdevVmnetSharedOptions *options = &(netdev->u.vmnet_shared); + xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, 0); + + xpc_dictionary_set_uint64( + if_desc, + vmnet_operation_mode_key, + VMNET_SHARED_MODE + ); + + if (options->has_nat66_prefix) { + xpc_dictionary_set_string(if_desc, + vmnet_nat66_prefix_key, + options->nat66_prefix); + } + + if (options->has_start_address) { + xpc_dictionary_set_string(if_desc, + vmnet_start_address_key, + options->start_address); + xpc_dictionary_set_string(if_desc, + vmnet_end_address_key, + options->end_address); + xpc_dictionary_set_string(if_desc, + vmnet_subnet_mask_key, + options->subnet_mask); + } + +#if defined(MAC_OS_VERSION_11_0) && \ + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 + xpc_dictionary_set_bool( + if_desc, + vmnet_enable_isolation_key, + options->isolated + ); +#endif + + return if_desc; +} + +static NetClientInfo net_vmnet_shared_info = { + .type = NET_CLIENT_DRIVER_VMNET_SHARED, + .size = sizeof(VmnetSharedState), + .receive = vmnet_receive_common, + .cleanup = vmnet_cleanup_common, +}; + int net_init_vmnet_shared(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp) { - error_setg(errp, "vmnet-shared is not implemented yet"); - return -1; + NetClientState *nc = qemu_new_net_client(&net_vmnet_shared_info, + peer, "vmnet-shared", name); + if (!validate_options(netdev, errp)) { + g_assert_not_reached(); + } + return vmnet_if_create(nc, build_if_desc(netdev), errp); } diff --git a/net/vmnet_int.h b/net/vmnet_int.h index aac4d5af64..acfe3a88c0 100644 --- a/net/vmnet_int.h +++ b/net/vmnet_int.h @@ -15,11 +15,48 @@ #include "clients.h" #include <vmnet/vmnet.h> +#include <dispatch/dispatch.h> + +/** + * From vmnet.framework documentation + * + * Each read/write call allows up to 200 packets to be + * read or written for a maximum of 256KB. + * + * Each packet written should be a complete + * ethernet frame. + * + * https://developer.apple.com/documentation/vmnet + */ +#define VMNET_PACKETS_LIMIT 200 typedef struct VmnetCommonState { - NetClientState nc; + NetClientState nc; + interface_ref vmnet_if; + + bool send_scheduled; + uint64_t mtu; + uint64_t max_packet_size; + + struct vmpktdesc packets_buf[VMNET_PACKETS_LIMIT]; + struct iovec iov_buf[VMNET_PACKETS_LIMIT]; + + dispatch_queue_t if_queue; + + QEMUBH *send_bh; } VmnetCommonState; +const char *vmnet_status_map_str(vmnet_return_t status); + +int vmnet_if_create(NetClientState *nc, + xpc_object_t if_desc, + Error **errp); + +ssize_t vmnet_receive_common(NetClientState *nc, + const uint8_t *buf, + size_t size); + +void vmnet_cleanup_common(NetClientState *nc); #endif /* VMNET_INT_H */