| Message ID | 20170227104956.24729-1-marcandre.lureau@redhat.com |
|---|---|
| State | New |
| Headers | show |
On 27/02/2017 11:49, Marc-André Lureau wrote: > Since commit b0a335e351103bf92f3f9d0bd5759311be8156ac, a socket write > may trigger a disconnect events, calling vhost_user_stop() and clearing > all the vhost_dev strutures holding data that vhost.c functions expect > to remain valid. Delay the cleanup to keep the vhost_dev structure > valid during the vhost.c functions. > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com> > --- > v3: > - use aio_bh_schedule_oneshot(), as suggest by Paolo > v2: > - fix reconnect race > > net/vhost-user.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 46 insertions(+), 7 deletions(-) > > diff --git a/net/vhost-user.c b/net/vhost-user.c > index 77b8110f8c..e7e63408a1 100644 > --- a/net/vhost-user.c > +++ b/net/vhost-user.c > @@ -190,7 +190,35 @@ static gboolean net_vhost_user_watch(GIOChannel *chan, GIOCondition cond, > > qemu_chr_fe_disconnect(&s->chr); > > - return FALSE; > + return TRUE; > +} > + > +static void net_vhost_user_event(void *opaque, int event); > + > +static void chr_closed_bh(void *opaque) > +{ > + const char *name = opaque; > + NetClientState *ncs[MAX_QUEUE_NUM]; > + VhostUserState *s; > + Error *err = NULL; > + int queues; > + > + queues = qemu_find_net_clients_except(name, ncs, > + NET_CLIENT_DRIVER_NIC, > + MAX_QUEUE_NUM); > + assert(queues < MAX_QUEUE_NUM); > + > + s = DO_UPCAST(VhostUserState, nc, ncs[0]); > + > + qmp_set_link(name, false, &err); > + vhost_user_stop(queues, ncs); > + > + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event, > + opaque, NULL, true); > + > + if (err) { > + error_report_err(err); > + } > } > > static void net_vhost_user_event(void *opaque, int event) > @@ -212,20 +240,31 @@ static void net_vhost_user_event(void *opaque, int event) > trace_vhost_user_event(chr->label, event); > switch (event) { > case CHR_EVENT_OPENED: > - s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP, > - net_vhost_user_watch, s); > if (vhost_user_start(queues, ncs, &s->chr) < 0) { > qemu_chr_fe_disconnect(&s->chr); > return; > } > + s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP, > + net_vhost_user_watch, s); > qmp_set_link(name, true, &err); > s->started = true; > break; > case CHR_EVENT_CLOSED: > - qmp_set_link(name, false, &err); > - vhost_user_stop(queues, ncs); > - g_source_remove(s->watch); > - s->watch = 0; > + /* a close event may happen during a read/write, but vhost > + * code assumes the vhost_dev remains setup, so delay the > + * stop & clear to idle. > + * FIXME: better handle failure in vhost code, remove bh > + */ > + if (s->watch) { > + AioContext *ctx = qemu_get_current_aio_context(); > + > + g_source_remove(s->watch); > + s->watch = 0; Removing the watch here makes sense too! Thanks, Paolo > + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, NULL, > + NULL, NULL, false); > + > + aio_bh_schedule_oneshot(ctx, chr_closed_bh, opaque); > + } > break; > } > >
On 27 February 2017 at 10:49, Marc-André Lureau <marcandre.lureau@redhat.com> wrote: > Since commit b0a335e351103bf92f3f9d0bd5759311be8156ac, a socket write > may trigger a disconnect events, calling vhost_user_stop() and clearing > all the vhost_dev strutures holding data that vhost.c functions expect > to remain valid. Delay the cleanup to keep the vhost_dev structure > valid during the vhost.c functions. > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com> > --- > v3: > - use aio_bh_schedule_oneshot(), as suggest by Paolo > v2: > - fix reconnect race Applied to master as a buildfix (I haven't seen the test failures but others on IRC were complaining). thanks -- PMM
Hi Marc-André, [very old patch...] On 27/2/17 11:49, Marc-André Lureau wrote: > Since commit b0a335e351103bf92f3f9d0bd5759311be8156ac, a socket write > may trigger a disconnect events, calling vhost_user_stop() and clearing > all the vhost_dev strutures holding data that vhost.c functions expect > to remain valid. Delay the cleanup to keep the vhost_dev structure > valid during the vhost.c functions. > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com> > --- > v3: > - use aio_bh_schedule_oneshot(), as suggest by Paolo > v2: > - fix reconnect race > > net/vhost-user.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 46 insertions(+), 7 deletions(-) > > diff --git a/net/vhost-user.c b/net/vhost-user.c > index 77b8110f8c..e7e63408a1 100644 > --- a/net/vhost-user.c > +++ b/net/vhost-user.c > @@ -190,7 +190,35 @@ static gboolean net_vhost_user_watch(GIOChannel *chan, GIOCondition cond, > > qemu_chr_fe_disconnect(&s->chr); > > - return FALSE; > + return TRUE; Do you mind explaining this change again, it is not clear from the commit description. We listen to G_IO_HUP, got a SIGHUP so we disconnect the chardev but keep listening for future HUP? In which case can that happen? How can we get a chardev connected and initialized here without calling net_init_vhost_user() again? > +} > + > +static void net_vhost_user_event(void *opaque, int event); > + > +static void chr_closed_bh(void *opaque) > +{ > + const char *name = opaque; > + NetClientState *ncs[MAX_QUEUE_NUM]; > + VhostUserState *s; > + Error *err = NULL; > + int queues; > + > + queues = qemu_find_net_clients_except(name, ncs, > + NET_CLIENT_DRIVER_NIC, > + MAX_QUEUE_NUM); > + assert(queues < MAX_QUEUE_NUM); > + > + s = DO_UPCAST(VhostUserState, nc, ncs[0]); > + > + qmp_set_link(name, false, &err); > + vhost_user_stop(queues, ncs); > + > + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event, > + opaque, NULL, true); > + > + if (err) { > + error_report_err(err); > + } > } > > static void net_vhost_user_event(void *opaque, int event) > @@ -212,20 +240,31 @@ static void net_vhost_user_event(void *opaque, int event) > trace_vhost_user_event(chr->label, event); > switch (event) { > case CHR_EVENT_OPENED: > - s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP, > - net_vhost_user_watch, s); > if (vhost_user_start(queues, ncs, &s->chr) < 0) { > qemu_chr_fe_disconnect(&s->chr); > return; > } > + s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP, > + net_vhost_user_watch, s); > qmp_set_link(name, true, &err); > s->started = true; > break; > case CHR_EVENT_CLOSED: > - qmp_set_link(name, false, &err); > - vhost_user_stop(queues, ncs); > - g_source_remove(s->watch); > - s->watch = 0; > + /* a close event may happen during a read/write, but vhost > + * code assumes the vhost_dev remains setup, so delay the > + * stop & clear to idle. > + * FIXME: better handle failure in vhost code, remove bh > + */ > + if (s->watch) { > + AioContext *ctx = qemu_get_current_aio_context(); > + > + g_source_remove(s->watch); > + s->watch = 0; > + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, NULL, > + NULL, NULL, false); > + > + aio_bh_schedule_oneshot(ctx, chr_closed_bh, opaque); > + } > break; > } >
Hi On Wed, Jul 5, 2023 at 2:02 PM Philippe Mathieu-Daudé <philmd@linaro.org> wrote: > Hi Marc-André, > > [very old patch...] > > On 27/2/17 11:49, Marc-André Lureau wrote: > > Since commit b0a335e351103bf92f3f9d0bd5759311be8156ac, a socket write > > may trigger a disconnect events, calling vhost_user_stop() and clearing > > all the vhost_dev strutures holding data that vhost.c functions expect > > to remain valid. Delay the cleanup to keep the vhost_dev structure > > valid during the vhost.c functions. > > > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com> > > --- > > v3: > > - use aio_bh_schedule_oneshot(), as suggest by Paolo > > v2: > > - fix reconnect race > > > > net/vhost-user.c | 53 > ++++++++++++++++++++++++++++++++++++++++++++++------- > > 1 file changed, 46 insertions(+), 7 deletions(-) > > > > diff --git a/net/vhost-user.c b/net/vhost-user.c > > index 77b8110f8c..e7e63408a1 100644 > > --- a/net/vhost-user.c > > +++ b/net/vhost-user.c > > @@ -190,7 +190,35 @@ static gboolean net_vhost_user_watch(GIOChannel > *chan, GIOCondition cond, > > > > qemu_chr_fe_disconnect(&s->chr); > > > > - return FALSE; > > + return TRUE; > > Do you mind explaining this change again, it is not clear from > the commit description. We listen to G_IO_HUP, got a SIGHUP so > we disconnect the chardev but keep listening for future HUP? > In which case can that happen? How can we get a chardev connected > and initialized here without calling net_init_vhost_user() again? > I think the point was simply to keep the source ID valid, until the cleanup happens and calls g_source_remove(). Is there any issue with that? we can probably set s->watch = 0 in the source callback instead and check the watch before removing it. > > +} > > + > > +static void net_vhost_user_event(void *opaque, int event); > > + > > +static void chr_closed_bh(void *opaque) > > +{ > > + const char *name = opaque; > > + NetClientState *ncs[MAX_QUEUE_NUM]; > > + VhostUserState *s; > > + Error *err = NULL; > > + int queues; > > + > > + queues = qemu_find_net_clients_except(name, ncs, > > + NET_CLIENT_DRIVER_NIC, > > + MAX_QUEUE_NUM); > > + assert(queues < MAX_QUEUE_NUM); > > + > > + s = DO_UPCAST(VhostUserState, nc, ncs[0]); > > + > > + qmp_set_link(name, false, &err); > > + vhost_user_stop(queues, ncs); > > + > > + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event, > > + opaque, NULL, true); > > + > > + if (err) { > > + error_report_err(err); > > + } > > } > > > > static void net_vhost_user_event(void *opaque, int event) > > @@ -212,20 +240,31 @@ static void net_vhost_user_event(void *opaque, int > event) > > trace_vhost_user_event(chr->label, event); > > switch (event) { > > case CHR_EVENT_OPENED: > > - s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP, > > - net_vhost_user_watch, s); > > if (vhost_user_start(queues, ncs, &s->chr) < 0) { > > qemu_chr_fe_disconnect(&s->chr); > > return; > > } > > + s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP, > > + net_vhost_user_watch, s); > > qmp_set_link(name, true, &err); > > s->started = true; > > break; > > case CHR_EVENT_CLOSED: > > - qmp_set_link(name, false, &err); > > - vhost_user_stop(queues, ncs); > > - g_source_remove(s->watch); > > - s->watch = 0; > > + /* a close event may happen during a read/write, but vhost > > + * code assumes the vhost_dev remains setup, so delay the > > + * stop & clear to idle. > > + * FIXME: better handle failure in vhost code, remove bh > > + */ > > + if (s->watch) { > > + AioContext *ctx = qemu_get_current_aio_context(); > > + > > + g_source_remove(s->watch); > > + s->watch = 0; > > + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, NULL, > > + NULL, NULL, false); > > + > > + aio_bh_schedule_oneshot(ctx, chr_closed_bh, opaque); > > + } > > break; > > } > > > >
diff --git a/net/vhost-user.c b/net/vhost-user.c index 77b8110f8c..e7e63408a1 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -190,7 +190,35 @@ static gboolean net_vhost_user_watch(GIOChannel *chan, GIOCondition cond, qemu_chr_fe_disconnect(&s->chr); - return FALSE; + return TRUE; +} + +static void net_vhost_user_event(void *opaque, int event); + +static void chr_closed_bh(void *opaque) +{ + const char *name = opaque; + NetClientState *ncs[MAX_QUEUE_NUM]; + VhostUserState *s; + Error *err = NULL; + int queues; + + queues = qemu_find_net_clients_except(name, ncs, + NET_CLIENT_DRIVER_NIC, + MAX_QUEUE_NUM); + assert(queues < MAX_QUEUE_NUM); + + s = DO_UPCAST(VhostUserState, nc, ncs[0]); + + qmp_set_link(name, false, &err); + vhost_user_stop(queues, ncs); + + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event, + opaque, NULL, true); + + if (err) { + error_report_err(err); + } } static void net_vhost_user_event(void *opaque, int event) @@ -212,20 +240,31 @@ static void net_vhost_user_event(void *opaque, int event) trace_vhost_user_event(chr->label, event); switch (event) { case CHR_EVENT_OPENED: - s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP, - net_vhost_user_watch, s); if (vhost_user_start(queues, ncs, &s->chr) < 0) { qemu_chr_fe_disconnect(&s->chr); return; } + s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP, + net_vhost_user_watch, s); qmp_set_link(name, true, &err); s->started = true; break; case CHR_EVENT_CLOSED: - qmp_set_link(name, false, &err); - vhost_user_stop(queues, ncs); - g_source_remove(s->watch); - s->watch = 0; + /* a close event may happen during a read/write, but vhost + * code assumes the vhost_dev remains setup, so delay the + * stop & clear to idle. + * FIXME: better handle failure in vhost code, remove bh + */ + if (s->watch) { + AioContext *ctx = qemu_get_current_aio_context(); + + g_source_remove(s->watch); + s->watch = 0; + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, NULL, + NULL, NULL, false); + + aio_bh_schedule_oneshot(ctx, chr_closed_bh, opaque); + } break; }
Since commit b0a335e351103bf92f3f9d0bd5759311be8156ac, a socket write may trigger a disconnect events, calling vhost_user_stop() and clearing all the vhost_dev strutures holding data that vhost.c functions expect to remain valid. Delay the cleanup to keep the vhost_dev structure valid during the vhost.c functions. Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com> --- v3: - use aio_bh_schedule_oneshot(), as suggest by Paolo v2: - fix reconnect race net/vhost-user.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 7 deletions(-)