diff mbox series

[3/4] char-socket: avoid double call tcp_chr_free_connection

Message ID 20200415032826.16701-4-fengli@smartx.com
State New
Headers show
Series fix crashes when inject errors to vhost-user-blk chardev | expand

Commit Message

Li Feng April 15, 2020, 3:28 a.m. UTC
double call tcp_chr_free_connection generates a crash.

Signed-off-by: Li Feng <fengli@smartx.com>
---
 chardev/char-socket.c | 5 +++++
 1 file changed, 5 insertions(+)

Comments

Marc-André Lureau April 15, 2020, 10:35 a.m. UTC | #1
Hi

On Wed, Apr 15, 2020 at 5:31 AM Li Feng <fengli@smartx.com> wrote:
>
> double call tcp_chr_free_connection generates a crash.
>
> Signed-off-by: Li Feng <fengli@smartx.com>

Could you describe how you reach the "double call".

Even better would be to write a test for it in tests/test-char.c

thanks

> ---
>  chardev/char-socket.c | 5 +++++
>  1 file changed, 5 insertions(+)
>
> diff --git a/chardev/char-socket.c b/chardev/char-socket.c
> index 185fe38dda..43aab8f24b 100644
> --- a/chardev/char-socket.c
> +++ b/chardev/char-socket.c
> @@ -476,6 +476,11 @@ static void tcp_chr_disconnect_locked(Chardev *chr)
>      SocketChardev *s = SOCKET_CHARDEV(chr);
>      bool emit_close = s->state == TCP_CHARDEV_STATE_CONNECTED;
>
> +    /* avoid re-enter when socket read/write error and disconnect event. */
> +    if (s->state == TCP_CHARDEV_STATE_DISCONNECTED) {
> +        return;
> +    }
> +
>      tcp_chr_free_connection(chr);
>
>      if (s->listener) {
> --
> 2.11.0
>
>
> --
> The SmartX email address is only for business purpose. Any sent message
> that is not related to the business is not authorized or permitted by
> SmartX.
> 本邮箱为北京志凌海纳科技有限公司(SmartX)工作邮箱. 如本邮箱发出的邮件与工作无关,该邮件未得到本公司任何的明示或默示的授权.
>
>
>
Li Feng April 20, 2020, 3:10 a.m. UTC | #2
Hi Lureau,

Thanks for your comment.
I have spent some time to reproduce this crash, so that delay my
reply, apologies.

This is the description of this bug using gdb:
1. Set break points using gdb;
b chardev/char-socket.c:410 if s->state == TCP_CHARDEV_STATE_DISCONNECTED
b break vhost_user_write

2. hotplug a vhost-blk device:
echo "chardev-add
socket,id=spdk_vhost_blk2,path=/vhost-blk.0,reconnect=1 "| nc -U
/tmp/a.socket
echo "device_add
vhost-user-blk-pci,chardev=spdk_vhost_blk2,id=spdk_vhost_blk2,num-queues=4"
| nc -U /tmp/a.socket

3. Gdb will stop at vhost_user_write, and CTRL-C the vhost target.
4. Put 'c' to let gdb continue.
You will see a crash:

192 login: qemu-system-x86_64: chardev/char-socket.c:125:
qemu_chr_socket_restart_timer: Assertion `!s->reconnect_timer' failed.

The related code:
394 static void tcp_chr_free_connection(Chardev *chr)
 395 {
 396     SocketChardev *s = SOCKET_CHARDEV(chr);
 397     int i;
 398
 399     if (s->read_msgfds_num) {
 400         for (i = 0; i < s->read_msgfds_num; i++) {
 401             close(s->read_msgfds[i]);
 402         }
 403         g_free(s->read_msgfds);
 404         s->read_msgfds = NULL;
 405         s->read_msgfds_num = 0;
 406     }
 407
 408     remove_hup_source(s);
 409
 410     tcp_set_msgfds(chr, NULL, 0);
 411     remove_fd_in_watch(chr);
 412     object_unref(OBJECT(s->sioc));
 413     s->sioc = NULL;
 414     object_unref(OBJECT(s->ioc));
 415     s->ioc = NULL;
 416     g_free(chr->filename);
 417     chr->filename = NULL;
 418     tcp_chr_change_state(s, TCP_CHARDEV_STATE_DISCONNECTED);
 419 }

#0  0x0000555555c1aae8 in tcp_chr_free_connection
(chr=chr@entry=0x55555751ee00) at chardev/char-socket.c:410
#1  0x0000555555c1aec8 in tcp_chr_disconnect_locked
(chr=0x55555751ee00) at chardev/char-socket.c:479
#2  0x0000555555c1af5d in tcp_chr_disconnect (chr=0x55555751ee00) at
chardev/char-socket.c:497
#3  0x0000555555c16715 in qemu_chr_fe_set_handlers
(b=b@entry=0x5555575f3b48, fd_can_read=fd_can_read@entry=0x0,
fd_read=fd_
read@entry=0x0, fd_event=fd_event@entry=0x55555588e740
<vhost_user_blk_event>, be_change=be_change@entry=0x0, opaque=opaque@
entry=0x5555575f3990, context=0x0, set_open=true) at chardev/char-fe.c:304
#4  0x000055555588e5c0 in vhost_user_blk_device_realize
(dev=0x5555575f3990, errp=<optimized out>)
    at /root/qemu-master/hw/block/vhost-user-blk.c:447
#5  0x00005555558ca478 in virtio_device_realize (dev=0x5555575f3990,
errp=0x7fffffffbb90)
    at /root/qemu-master/hw/virtio/virtio.c:3615
#6  0x00005555559dc842 in device_set_realized (obj=<optimized out>,
value=<optimized out>, errp=0x7fffffffbd20)
    at hw/core/qdev.c:891
#7  0x0000555555b78e07 in property_set_bool (obj=0x5555575f3990,
v=<optimized out>, name=<optimized out>, opaque=0x555556453
040, errp=0x7fffffffbd20) at qom/object.c:2238
#8  0x0000555555b7db3f in object_property_set_qobject
(obj=obj@entry=0x5555575f3990, value=value@entry=0x555556ab89c0, name=
name@entry=0x555555d15308 "realized", errp=errp@entry=0x7fffffffbd20)
at qom/qom-qobject.c:26
#9  0x0000555555b7b2c5 in object_property_set_bool
(obj=0x5555575f3990, value=<optimized out>, name=0x555555d15308
"realized
", errp=0x7fffffffbd20) at qom/object.c:1390
#10 0x0000555555af13b2 in virtio_pci_realize (pci_dev=0x5555575eb800,
errp=0x7fffffffbd20) at hw/virtio/virtio-pci.c:1807
#11 0x0000555555a7a14b in pci_qdev_realize (qdev=<optimized out>,
errp=<optimized out>) at hw/pci/pci.c:2098
#12 0x00005555559dc842 in device_set_realized (obj=<optimized out>,
value=<optimized out>, errp=0x7fffffffbef8)
    at hw/core/qdev.c:891

(gdb) p s
$23 = (SocketChardev *) 0x55555751ee00
(gdb) p s->state
$24 = TCP_CHARDEV_STATE_DISCONNECTED

At 410 line of char-socket.c, the state has been changed to
TCP_CHARDEV_STATE_DISCONNECTED before tcp_chr_change_state(s,
TCP_CHARDEV_STATE_DISCONNECTED);
It means the tcp_chr_disconnect_locked is called by more than one
times, and the tcp socket has been freed before.

The crash reason is s->reconnect_timer is set in the previous call of
tcp_chr_disconnect_locked.
The another approach fix maybe like this:

diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 185fe38dda..94957de367 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -486,7 +486,7 @@ static void tcp_chr_disconnect_locked(Chardev *chr)
if (emit_close) {
qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
}
- if (s->reconnect_time) {
+ if (s->reconnect_time && !s->reconnect_timer) {
qemu_chr_socket_restart_timer(chr);
}
}

The base code is here:
474 static void tcp_chr_disconnect_locked(Chardev *chr)
475 {
476 SocketChardev *s = SOCKET_CHARDEV(chr);
477 bool emit_close = s->state == TCP_CHARDEV_STATE_CONNECTED;
478
479 tcp_chr_free_connection(chr);
480
481 if (s->listener) {
482 qio_net_listener_set_client_func_full(s->listener, tcp_chr_accept,
483 chr, NULL, chr->gcontext);
484 }
485 update_disconnected_filename(s);
486 if (emit_close) {
487 qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
488 }
489 if (s->reconnect_time) {
490 qemu_chr_socket_restart_timer(chr);
491 }
492 }

Which one is better?

I don't know how to inject an error at socket write when writing tests
code(tests/test-char.c ).
Any tips?

Thanks,
Feng Li

Marc-André Lureau <marcandre.lureau@gmail.com> 于2020年4月15日周三 下午6:35写道:

>
> Hi
>
> On Wed, Apr 15, 2020 at 5:31 AM Li Feng <fengli@smartx.com> wrote:
> >
> > double call tcp_chr_free_connection generates a crash.
> >
> > Signed-off-by: Li Feng <fengli@smartx.com>
>
> Could you describe how you reach the "double call".
>
> Even better would be to write a test for it in tests/test-char.c
>
> thanks
>
> > ---
> >  chardev/char-socket.c | 5 +++++
> >  1 file changed, 5 insertions(+)
> >
> > diff --git a/chardev/char-socket.c b/chardev/char-socket.c
> > index 185fe38dda..43aab8f24b 100644
> > --- a/chardev/char-socket.c
> > +++ b/chardev/char-socket.c
> > @@ -476,6 +476,11 @@ static void tcp_chr_disconnect_locked(Chardev *chr)
> >      SocketChardev *s = SOCKET_CHARDEV(chr);
> >      bool emit_close = s->state == TCP_CHARDEV_STATE_CONNECTED;
> >
> > +    /* avoid re-enter when socket read/write error and disconnect event. */
> > +    if (s->state == TCP_CHARDEV_STATE_DISCONNECTED) {
> > +        return;
> > +    }
> > +
> >      tcp_chr_free_connection(chr);
> >
> >      if (s->listener) {
> > --
> > 2.11.0
> >
> >
> > --
> > The SmartX email address is only for business purpose. Any sent message
> > that is not related to the business is not authorized or permitted by
> > SmartX.
> > 本邮箱为北京志凌海纳科技有限公司(SmartX)工作邮箱. 如本邮箱发出的邮件与工作无关,该邮件未得到本公司任何的明示或默示的授权.
> >
> >
> >
>
>
> --
> Marc-André Lureau
Marc-André Lureau April 20, 2020, 2:28 p.m. UTC | #3
Hi

On Mon, Apr 20, 2020 at 5:10 AM Li Feng <fengli@smartx.com> wrote:
>
> Hi Lureau,
>
> Thanks for your comment.
> I have spent some time to reproduce this crash, so that delay my
> reply, apologies.
>
> This is the description of this bug using gdb:
> 1. Set break points using gdb;
> b chardev/char-socket.c:410 if s->state == TCP_CHARDEV_STATE_DISCONNECTED
> b break vhost_user_write
>
> 2. hotplug a vhost-blk device:
> echo "chardev-add
> socket,id=spdk_vhost_blk2,path=/vhost-blk.0,reconnect=1 "| nc -U
> /tmp/a.socket
> echo "device_add
> vhost-user-blk-pci,chardev=spdk_vhost_blk2,id=spdk_vhost_blk2,num-queues=4"
> | nc -U /tmp/a.socket
>
> 3. Gdb will stop at vhost_user_write, and CTRL-C the vhost target.
> 4. Put 'c' to let gdb continue.
> You will see a crash:
>
> 192 login: qemu-system-x86_64: chardev/char-socket.c:125:
> qemu_chr_socket_restart_timer: Assertion `!s->reconnect_timer' failed.
>
> The related code:
> 394 static void tcp_chr_free_connection(Chardev *chr)
>  395 {
>  396     SocketChardev *s = SOCKET_CHARDEV(chr);
>  397     int i;
>  398
>  399     if (s->read_msgfds_num) {
>  400         for (i = 0; i < s->read_msgfds_num; i++) {
>  401             close(s->read_msgfds[i]);
>  402         }
>  403         g_free(s->read_msgfds);
>  404         s->read_msgfds = NULL;
>  405         s->read_msgfds_num = 0;
>  406     }
>  407
>  408     remove_hup_source(s);
>  409
>  410     tcp_set_msgfds(chr, NULL, 0);
>  411     remove_fd_in_watch(chr);
>  412     object_unref(OBJECT(s->sioc));
>  413     s->sioc = NULL;
>  414     object_unref(OBJECT(s->ioc));
>  415     s->ioc = NULL;
>  416     g_free(chr->filename);
>  417     chr->filename = NULL;
>  418     tcp_chr_change_state(s, TCP_CHARDEV_STATE_DISCONNECTED);
>  419 }
>
> #0  0x0000555555c1aae8 in tcp_chr_free_connection
> (chr=chr@entry=0x55555751ee00) at chardev/char-socket.c:410
> #1  0x0000555555c1aec8 in tcp_chr_disconnect_locked
> (chr=0x55555751ee00) at chardev/char-socket.c:479
> #2  0x0000555555c1af5d in tcp_chr_disconnect (chr=0x55555751ee00) at
> chardev/char-socket.c:497
> #3  0x0000555555c16715 in qemu_chr_fe_set_handlers
> (b=b@entry=0x5555575f3b48, fd_can_read=fd_can_read@entry=0x0,
> fd_read=fd_
> read@entry=0x0, fd_event=fd_event@entry=0x55555588e740
> <vhost_user_blk_event>, be_change=be_change@entry=0x0, opaque=opaque@
> entry=0x5555575f3990, context=0x0, set_open=true) at chardev/char-fe.c:304
> #4  0x000055555588e5c0 in vhost_user_blk_device_realize
> (dev=0x5555575f3990, errp=<optimized out>)
>     at /root/qemu-master/hw/block/vhost-user-blk.c:447
> #5  0x00005555558ca478 in virtio_device_realize (dev=0x5555575f3990,
> errp=0x7fffffffbb90)
>     at /root/qemu-master/hw/virtio/virtio.c:3615
> #6  0x00005555559dc842 in device_set_realized (obj=<optimized out>,
> value=<optimized out>, errp=0x7fffffffbd20)
>     at hw/core/qdev.c:891
> #7  0x0000555555b78e07 in property_set_bool (obj=0x5555575f3990,
> v=<optimized out>, name=<optimized out>, opaque=0x555556453
> 040, errp=0x7fffffffbd20) at qom/object.c:2238
> #8  0x0000555555b7db3f in object_property_set_qobject
> (obj=obj@entry=0x5555575f3990, value=value@entry=0x555556ab89c0, name=
> name@entry=0x555555d15308 "realized", errp=errp@entry=0x7fffffffbd20)
> at qom/qom-qobject.c:26
> #9  0x0000555555b7b2c5 in object_property_set_bool
> (obj=0x5555575f3990, value=<optimized out>, name=0x555555d15308
> "realized
> ", errp=0x7fffffffbd20) at qom/object.c:1390
> #10 0x0000555555af13b2 in virtio_pci_realize (pci_dev=0x5555575eb800,
> errp=0x7fffffffbd20) at hw/virtio/virtio-pci.c:1807
> #11 0x0000555555a7a14b in pci_qdev_realize (qdev=<optimized out>,
> errp=<optimized out>) at hw/pci/pci.c:2098
> #12 0x00005555559dc842 in device_set_realized (obj=<optimized out>,
> value=<optimized out>, errp=0x7fffffffbef8)
>     at hw/core/qdev.c:891
>
> (gdb) p s
> $23 = (SocketChardev *) 0x55555751ee00
> (gdb) p s->state
> $24 = TCP_CHARDEV_STATE_DISCONNECTED
>
> At 410 line of char-socket.c, the state has been changed to
> TCP_CHARDEV_STATE_DISCONNECTED before tcp_chr_change_state(s,
> TCP_CHARDEV_STATE_DISCONNECTED);
> It means the tcp_chr_disconnect_locked is called by more than one
> times, and the tcp socket has been freed before.
>
> The crash reason is s->reconnect_timer is set in the previous call of
> tcp_chr_disconnect_locked.
> The another approach fix maybe like this:
>
> diff --git a/chardev/char-socket.c b/chardev/char-socket.c
> index 185fe38dda..94957de367 100644
> --- a/chardev/char-socket.c
> +++ b/chardev/char-socket.c
> @@ -486,7 +486,7 @@ static void tcp_chr_disconnect_locked(Chardev *chr)
> if (emit_close) {
> qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
> }
> - if (s->reconnect_time) {
> + if (s->reconnect_time && !s->reconnect_timer) {
> qemu_chr_socket_restart_timer(chr);
> }
> }
>
> The base code is here:
> 474 static void tcp_chr_disconnect_locked(Chardev *chr)
> 475 {
> 476 SocketChardev *s = SOCKET_CHARDEV(chr);
> 477 bool emit_close = s->state == TCP_CHARDEV_STATE_CONNECTED;
> 478
> 479 tcp_chr_free_connection(chr);
> 480
> 481 if (s->listener) {
> 482 qio_net_listener_set_client_func_full(s->listener, tcp_chr_accept,
> 483 chr, NULL, chr->gcontext);
> 484 }
> 485 update_disconnected_filename(s);
> 486 if (emit_close) {
> 487 qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
> 488 }
> 489 if (s->reconnect_time) {
> 490 qemu_chr_socket_restart_timer(chr);
> 491 }
> 492 }
>
> Which one is better?

+ if (s->reconnect_time && !s->reconnect_timer)

Looks like a good solution to me.

>
> I don't know how to inject an error at socket write when writing tests
> code(tests/test-char.c ).
> Any tips?

You could check the patch I just sent fixing a related issue:
https://patchew.org/QEMU/20200420112012.567284-1-marcandre.lureau@redhat.com/

In your case, it might be as easy as calling qemu_chr_fe_disconnect()
2 times on a reconnect socket, I'll let you figure out.

Thanks!

>
> Thanks,
> Feng Li
>
> Marc-André Lureau <marcandre.lureau@gmail.com> 于2020年4月15日周三 下午6:35写道:
>
> >
> > Hi
> >
> > On Wed, Apr 15, 2020 at 5:31 AM Li Feng <fengli@smartx.com> wrote:
> > >
> > > double call tcp_chr_free_connection generates a crash.
> > >
> > > Signed-off-by: Li Feng <fengli@smartx.com>
> >
> > Could you describe how you reach the "double call".
> >
> > Even better would be to write a test for it in tests/test-char.c
> >
> > thanks
> >
> > > ---
> > >  chardev/char-socket.c | 5 +++++
> > >  1 file changed, 5 insertions(+)
> > >
> > > diff --git a/chardev/char-socket.c b/chardev/char-socket.c
> > > index 185fe38dda..43aab8f24b 100644
> > > --- a/chardev/char-socket.c
> > > +++ b/chardev/char-socket.c
> > > @@ -476,6 +476,11 @@ static void tcp_chr_disconnect_locked(Chardev *chr)
> > >      SocketChardev *s = SOCKET_CHARDEV(chr);
> > >      bool emit_close = s->state == TCP_CHARDEV_STATE_CONNECTED;
> > >
> > > +    /* avoid re-enter when socket read/write error and disconnect event. */
> > > +    if (s->state == TCP_CHARDEV_STATE_DISCONNECTED) {
> > > +        return;
> > > +    }
> > > +
> > >      tcp_chr_free_connection(chr);
> > >
> > >      if (s->listener) {
> > > --
> > > 2.11.0
> > >
> > >
> > > --
> > > The SmartX email address is only for business purpose. Any sent message
> > > that is not related to the business is not authorized or permitted by
> > > SmartX.
> > > 本邮箱为北京志凌海纳科技有限公司(SmartX)工作邮箱. 如本邮箱发出的邮件与工作无关,该邮件未得到本公司任何的明示或默示的授权.
> > >
> > >
> > >
> >
> >
> > --
> > Marc-André Lureau
>
> --
> The SmartX email address is only for business purpose. Any sent message
> that is not related to the business is not authorized or permitted by
> SmartX.
> 本邮箱为北京志凌海纳科技有限公司(SmartX)工作邮箱. 如本邮箱发出的邮件与工作无关,该邮件未得到本公司任何的明示或默示的授权.
>
>
Li Feng April 28, 2020, 8:59 a.m. UTC | #4
Sorry for sending the weird same mail out.
Ignore the second one.

Hi Lureau,

I found another issue when running my new test:  tests/test-char -p
/char/socket/client/reconnect-error/unix
The backtrace like this:
#0  0x00007ffff5ac3277 in raise () from /lib64/libc.so.6
#1  0x00007ffff5ac4968 in abort () from /lib64/libc.so.6
#2  0x00005555555aaa34 in error_handle_fatal (errp=<optimized out>,
err=0x7fffec0012d0) at util/error.c:40
#3  0x00005555555aab0d in error_setv (errp=0x555555802a08
<error_abort>, src=0x5555555c4220 "io/channel.c", line=148,
func=0x5555555c4520 <__func__.17450> "qio_channel_readv_all",
    err_class=ERROR_CLASS_GENERIC_ERROR, fmt=<optimized out>,
ap=0x7ffff423bb10, suffix=0x0) at util/error.c:73
#4  0x00005555555aac90 in error_setg_internal
(errp=errp@entry=0x555555802a08 <error_abort>,
src=src@entry=0x5555555c4220 "io/channel.c", line=line@entry=148,
    func=func@entry=0x5555555c4520 <__func__.17450>
"qio_channel_readv_all", fmt=fmt@entry=0x5555555c4340 "Unexpected
end-of-file before all bytes were read") at util/error.c:97
#5  0x000055555556c1fc in qio_channel_readv_all (ioc=<optimized out>,
iov=<optimized out>, niov=<optimized out>, errp=0x555555802a08
<error_abort>) at io/channel.c:147
#6  0x000055555556c23a in qio_channel_read_all (ioc=<optimized out>,
buf=<optimized out>, buflen=<optimized out>, errp=<optimized out>) at
io/channel.c:247
#7  0x000055555556ad0e in char_socket_ping_pong (ioc=0x7fffec0008c0)
at tests/test-char.c:727
#8  0x000055555556adcf in char_socket_client_server_thread
(data=data@entry=0x55555582e350) at tests/test-char.c:881
#9  0x00005555555a9556 in qemu_thread_start (args=<optimized out>) at
util/qemu-thread-posix.c:519
#10 0x00007ffff5e61e25 in start_thread () from /lib64/libpthread.so.0
#11 0x00007ffff5b8bbad in clone () from /lib64/libc.so.6

I think this is a new issue of qemu, not my test issue.
How do you think?

Thanks,

Feng Li

Li Feng <fengli@smartx.com> 于2020年4月28日周二 下午4:53写道:

>
> When the disconnect event is triggered in the connecting stage,
> the tcp_chr_disconnect_locked may be called twice.
>
> The first call:
>     #0  qemu_chr_socket_restart_timer (chr=0x55555582ee90) at chardev/char-socket.c:120
>     #1  0x000055555558e38c in tcp_chr_disconnect_locked (chr=<optimized out>) at chardev/char-socket.c:490
>     #2  0x000055555558e3cd in tcp_chr_disconnect (chr=0x55555582ee90) at chardev/char-socket.c:497
>     #3  0x000055555558ea32 in tcp_chr_new_client (chr=chr@entry=0x55555582ee90, sioc=sioc@entry=0x55555582f0b0) at chardev/char-socket.c:892
>     #4  0x000055555558eeb8 in qemu_chr_socket_connected (task=0x55555582f300, opaque=<optimized out>) at chardev/char-socket.c:1090
>     #5  0x0000555555574352 in qio_task_complete (task=task@entry=0x55555582f300) at io/task.c:196
>     #6  0x00005555555745f4 in qio_task_thread_result (opaque=0x55555582f300) at io/task.c:111
>     #7  qio_task_wait_thread (task=0x55555582f300) at io/task.c:190
>     #8  0x000055555558f17e in tcp_chr_wait_connected (chr=0x55555582ee90, errp=0x555555802a08 <error_abort>) at chardev/char-socket.c:1013
>     #9  0x0000555555567cbd in char_socket_client_reconnect_test (opaque=0x5555557fe020 <client8unix>) at tests/test-char.c:1152
> The second call:
>     #0  0x00007ffff5ac3277 in raise () from /lib64/libc.so.6
>     #1  0x00007ffff5ac4968 in abort () from /lib64/libc.so.6
>     #2  0x00007ffff5abc096 in __assert_fail_base () from /lib64/libc.so.6
>     #3  0x00007ffff5abc142 in __assert_fail () from /lib64/libc.so.6
>     #4  0x000055555558d10a in qemu_chr_socket_restart_timer (chr=0x55555582ee90) at chardev/char-socket.c:125
>     #5  0x000055555558df0c in tcp_chr_disconnect_locked (chr=<optimized out>) at chardev/char-socket.c:490
>     #6  0x000055555558df4d in tcp_chr_disconnect (chr=0x55555582ee90) at chardev/char-socket.c:497
>     #7  0x000055555558e5b2 in tcp_chr_new_client (chr=chr@entry=0x55555582ee90, sioc=sioc@entry=0x55555582f0b0) at chardev/char-socket.c:892
>     #8  0x000055555558e93a in tcp_chr_connect_client_sync (chr=chr@entry=0x55555582ee90, errp=errp@entry=0x7fffffffd178) at chardev/char-socket.c:944
>     #9  0x000055555558ec78 in tcp_chr_wait_connected (chr=0x55555582ee90, errp=0x555555802a08 <error_abort>) at chardev/char-socket.c:1035
>     #10 0x000055555556804b in char_socket_client_test (opaque=0x5555557fe020 <client8unix>) at tests/test-char.c:1023
>
> Run test/test-char to reproduce this issue.
>
> test-char: chardev/char-socket.c:125: qemu_chr_socket_restart_timer: Assertion `!s->reconnect_timer' failed.
>
> Signed-off-by: Li Feng <fengli@smartx.com>
> ---
> v2:
> - Rewrite the solution.
> - Add test to reproduce this issue.
>
>  chardev/char-socket.c |  2 +-
>  tests/test-char.c     | 48 ++++++++++++++++++++++++++++++++++++++----------
>  2 files changed, 39 insertions(+), 11 deletions(-)
>
> diff --git a/chardev/char-socket.c b/chardev/char-socket.c
> index 1f14c2c7c8..d84330b3c9 100644
> --- a/chardev/char-socket.c
> +++ b/chardev/char-socket.c
> @@ -486,7 +486,7 @@ static void tcp_chr_disconnect_locked(Chardev *chr)
>      if (emit_close) {
>          qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
>      }
> -    if (s->reconnect_time) {
> +    if (s->reconnect_time && !s->reconnect_timer) {
>          qemu_chr_socket_restart_timer(chr);
>      }
>  }
> diff --git a/tests/test-char.c b/tests/test-char.c
> index 8d39bdc9fa..13dbbfe2a3 100644
> --- a/tests/test-char.c
> +++ b/tests/test-char.c
> @@ -625,12 +625,14 @@ static void char_udp_test(void)
>  typedef struct {
>      int event;
>      bool got_pong;
> +    CharBackend *be;
>  } CharSocketTestData;
>
>
>  #define SOCKET_PING "Hello"
>  #define SOCKET_PONG "World"
>
> +typedef void (*char_socket_cb)(void *opaque, QEMUChrEvent event);
>
>  static void
>  char_socket_event(void *opaque, QEMUChrEvent event)
> @@ -639,6 +641,23 @@ char_socket_event(void *opaque, QEMUChrEvent event)
>      data->event = event;
>  }
>
> +static void
> +char_socket_event_with_error(void *opaque, QEMUChrEvent event)
> +{
> +    CharSocketTestData *data = opaque;
> +    CharBackend *be = data->be;
> +    data->event = event;
> +    switch (event) {
> +    case CHR_EVENT_OPENED:
> +        qemu_chr_fe_disconnect(be);
> +        return;
> +    case CHR_EVENT_CLOSED:
> +        return;
> +    default:
> +        return;
> +    }
> +}
> +
>
>  static void
>  char_socket_read(void *opaque, const uint8_t *buf, int size)
> @@ -783,6 +802,7 @@ static void char_socket_server_test(gconstpointer opaque)
>
>   reconnect:
>      data.event = -1;
> +    data.be = &be;
>      qemu_chr_fe_set_handlers(&be, NULL, NULL,
>                               char_socket_event, NULL,
>                               &data, NULL, true);
> @@ -869,6 +889,7 @@ typedef struct {
>      const char *reconnect;
>      bool wait_connected;
>      bool fd_pass;
> +    char_socket_cb event_cb;
>  } CharSocketClientTestConfig;
>
>  static void char_socket_client_dupid_test(gconstpointer opaque)
> @@ -920,6 +941,7 @@ static void char_socket_client_dupid_test(gconstpointer opaque)
>  static void char_socket_client_test(gconstpointer opaque)
>  {
>      const CharSocketClientTestConfig *config = opaque;
> +    const char_socket_cb event_cb = config->event_cb;
>      QIOChannelSocket *ioc;
>      char *optstr;
>      Chardev *chr;
> @@ -983,8 +1005,9 @@ static void char_socket_client_test(gconstpointer opaque)
>
>   reconnect:
>      data.event = -1;
> +    data.be = &be;
>      qemu_chr_fe_set_handlers(&be, NULL, NULL,
> -                             char_socket_event, NULL,
> +                             event_cb, NULL,
>                               &data, NULL, true);
>      if (config->reconnect) {
>          g_assert(data.event == -1);
> @@ -1022,7 +1045,7 @@ static void char_socket_client_test(gconstpointer opaque)
>      /* Setup a callback to receive the reply to our greeting */
>      qemu_chr_fe_set_handlers(&be, char_socket_can_read,
>                               char_socket_read,
> -                             char_socket_event, NULL,
> +                             event_cb, NULL,
>                               &data, NULL, true);
>      g_assert(data.event == CHR_EVENT_OPENED);
>      data.event = -1;
> @@ -1467,19 +1490,22 @@ int main(int argc, char **argv)
>
>  #define SOCKET_CLIENT_TEST(name, addr)                                  \
>      static CharSocketClientTestConfig client1 ## name =                 \
> -        { addr, NULL, false, false };                                   \
> +        { addr, NULL, false, false, char_socket_event};                 \
>      static CharSocketClientTestConfig client2 ## name =                 \
> -        { addr, NULL, true, false };                                    \
> +        { addr, NULL, true, false, char_socket_event };                 \
>      static CharSocketClientTestConfig client3 ## name =                 \
> -        { addr, ",reconnect=1", false };                                \
> +        { addr, ",reconnect=1", false, false, char_socket_event };      \
>      static CharSocketClientTestConfig client4 ## name =                 \
> -        { addr, ",reconnect=1", true };                                 \
> +        { addr, ",reconnect=1", true, false, char_socket_event };       \
>      static CharSocketClientTestConfig client5 ## name =                 \
> -        { addr, NULL, false, true };                                    \
> +        { addr, NULL, false, true, char_socket_event };                 \
>      static CharSocketClientTestConfig client6 ## name =                 \
> -        { addr, NULL, true, true };                                     \
> +        { addr, NULL, true, true, char_socket_event };                  \
>      static CharSocketClientTestConfig client7 ## name =                 \
> -        { addr, ",reconnect=1", false, false };                         \
> +        { addr, ",reconnect=1", false, false, char_socket_event };      \
> +    static CharSocketClientTestConfig client8 ## name =                 \
> +        { addr, ",reconnect=1", true, false,                            \
> +            char_socket_event_with_error };                             \
>      g_test_add_data_func("/char/socket/client/mainloop/" # name,        \
>                           &client1 ##name, char_socket_client_test);     \
>      g_test_add_data_func("/char/socket/client/wait-conn/" # name,       \
> @@ -1493,7 +1519,9 @@ int main(int argc, char **argv)
>      g_test_add_data_func("/char/socket/client/wait-conn-fdpass/" # name, \
>                           &client6 ##name, char_socket_client_test);     \
>      g_test_add_data_func("/char/socket/client/dupid-reconnect/" # name, \
> -                         &client7 ##name, char_socket_client_dupid_test)
> +                         &client7 ##name, char_socket_client_dupid_test);\
> +    g_test_add_data_func("/char/socket/client/reconnect-error/" # name, \
> +                         &client8 ##name, char_socket_client_test)
>
>      if (has_ipv4) {
>          SOCKET_SERVER_TEST(tcp, &tcpaddr);
> --
> 2.11.0
>
Marc-André Lureau April 28, 2020, 11:05 a.m. UTC | #5
Hi

On Tue, Apr 28, 2020 at 10:59 AM Li Feng <fengli@smartx.com> wrote:
>
> Sorry for sending the weird same mail out.
> Ignore the second one.
>
> Hi Lureau,
>
> I found another issue when running my new test:  tests/test-char -p
> /char/socket/client/reconnect-error/unix
> The backtrace like this:
> #0  0x00007ffff5ac3277 in raise () from /lib64/libc.so.6
> #1  0x00007ffff5ac4968 in abort () from /lib64/libc.so.6
> #2  0x00005555555aaa34 in error_handle_fatal (errp=<optimized out>,
> err=0x7fffec0012d0) at util/error.c:40
> #3  0x00005555555aab0d in error_setv (errp=0x555555802a08
> <error_abort>, src=0x5555555c4220 "io/channel.c", line=148,
> func=0x5555555c4520 <__func__.17450> "qio_channel_readv_all",
>     err_class=ERROR_CLASS_GENERIC_ERROR, fmt=<optimized out>,
> ap=0x7ffff423bb10, suffix=0x0) at util/error.c:73
> #4  0x00005555555aac90 in error_setg_internal
> (errp=errp@entry=0x555555802a08 <error_abort>,
> src=src@entry=0x5555555c4220 "io/channel.c", line=line@entry=148,
>     func=func@entry=0x5555555c4520 <__func__.17450>
> "qio_channel_readv_all", fmt=fmt@entry=0x5555555c4340 "Unexpected
> end-of-file before all bytes were read") at util/error.c:97
> #5  0x000055555556c1fc in qio_channel_readv_all (ioc=<optimized out>,
> iov=<optimized out>, niov=<optimized out>, errp=0x555555802a08
> <error_abort>) at io/channel.c:147
> #6  0x000055555556c23a in qio_channel_read_all (ioc=<optimized out>,
> buf=<optimized out>, buflen=<optimized out>, errp=<optimized out>) at
> io/channel.c:247
> #7  0x000055555556ad0e in char_socket_ping_pong (ioc=0x7fffec0008c0)
> at tests/test-char.c:727
> #8  0x000055555556adcf in char_socket_client_server_thread
> (data=data@entry=0x55555582e350) at tests/test-char.c:881
> #9  0x00005555555a9556 in qemu_thread_start (args=<optimized out>) at
> util/qemu-thread-posix.c:519
> #10 0x00007ffff5e61e25 in start_thread () from /lib64/libpthread.so.0
> #11 0x00007ffff5b8bbad in clone () from /lib64/libc.so.6
>
> I think this is a new issue of qemu, not my test issue.
> How do you think?


No idea, it could be a bug in the test itself. How did you produce it?

>
> Thanks,
>
> Feng Li
>
> Li Feng <fengli@smartx.com> 于2020年4月28日周二 下午4:53写道:
>
> >
> > When the disconnect event is triggered in the connecting stage,
> > the tcp_chr_disconnect_locked may be called twice.
> >
> > The first call:
> >     #0  qemu_chr_socket_restart_timer (chr=0x55555582ee90) at chardev/char-socket.c:120
> >     #1  0x000055555558e38c in tcp_chr_disconnect_locked (chr=<optimized out>) at chardev/char-socket.c:490
> >     #2  0x000055555558e3cd in tcp_chr_disconnect (chr=0x55555582ee90) at chardev/char-socket.c:497
> >     #3  0x000055555558ea32 in tcp_chr_new_client (chr=chr@entry=0x55555582ee90, sioc=sioc@entry=0x55555582f0b0) at chardev/char-socket.c:892
> >     #4  0x000055555558eeb8 in qemu_chr_socket_connected (task=0x55555582f300, opaque=<optimized out>) at chardev/char-socket.c:1090
> >     #5  0x0000555555574352 in qio_task_complete (task=task@entry=0x55555582f300) at io/task.c:196
> >     #6  0x00005555555745f4 in qio_task_thread_result (opaque=0x55555582f300) at io/task.c:111
> >     #7  qio_task_wait_thread (task=0x55555582f300) at io/task.c:190
> >     #8  0x000055555558f17e in tcp_chr_wait_connected (chr=0x55555582ee90, errp=0x555555802a08 <error_abort>) at chardev/char-socket.c:1013
> >     #9  0x0000555555567cbd in char_socket_client_reconnect_test (opaque=0x5555557fe020 <client8unix>) at tests/test-char.c:1152
> > The second call:
> >     #0  0x00007ffff5ac3277 in raise () from /lib64/libc.so.6
> >     #1  0x00007ffff5ac4968 in abort () from /lib64/libc.so.6
> >     #2  0x00007ffff5abc096 in __assert_fail_base () from /lib64/libc.so.6
> >     #3  0x00007ffff5abc142 in __assert_fail () from /lib64/libc.so.6
> >     #4  0x000055555558d10a in qemu_chr_socket_restart_timer (chr=0x55555582ee90) at chardev/char-socket.c:125
> >     #5  0x000055555558df0c in tcp_chr_disconnect_locked (chr=<optimized out>) at chardev/char-socket.c:490
> >     #6  0x000055555558df4d in tcp_chr_disconnect (chr=0x55555582ee90) at chardev/char-socket.c:497
> >     #7  0x000055555558e5b2 in tcp_chr_new_client (chr=chr@entry=0x55555582ee90, sioc=sioc@entry=0x55555582f0b0) at chardev/char-socket.c:892
> >     #8  0x000055555558e93a in tcp_chr_connect_client_sync (chr=chr@entry=0x55555582ee90, errp=errp@entry=0x7fffffffd178) at chardev/char-socket.c:944
> >     #9  0x000055555558ec78 in tcp_chr_wait_connected (chr=0x55555582ee90, errp=0x555555802a08 <error_abort>) at chardev/char-socket.c:1035
> >     #10 0x000055555556804b in char_socket_client_test (opaque=0x5555557fe020 <client8unix>) at tests/test-char.c:1023
> >
> > Run test/test-char to reproduce this issue.
> >
> > test-char: chardev/char-socket.c:125: qemu_chr_socket_restart_timer: Assertion `!s->reconnect_timer' failed.
> >
> > Signed-off-by: Li Feng <fengli@smartx.com>
> > ---
> > v2:
> > - Rewrite the solution.
> > - Add test to reproduce this issue.
> >
> >  chardev/char-socket.c |  2 +-
> >  tests/test-char.c     | 48 ++++++++++++++++++++++++++++++++++++++----------
> >  2 files changed, 39 insertions(+), 11 deletions(-)
> >
> > diff --git a/chardev/char-socket.c b/chardev/char-socket.c
> > index 1f14c2c7c8..d84330b3c9 100644
> > --- a/chardev/char-socket.c
> > +++ b/chardev/char-socket.c
> > @@ -486,7 +486,7 @@ static void tcp_chr_disconnect_locked(Chardev *chr)
> >      if (emit_close) {
> >          qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
> >      }
> > -    if (s->reconnect_time) {
> > +    if (s->reconnect_time && !s->reconnect_timer) {
> >          qemu_chr_socket_restart_timer(chr);
> >      }
> >  }
> > diff --git a/tests/test-char.c b/tests/test-char.c
> > index 8d39bdc9fa..13dbbfe2a3 100644
> > --- a/tests/test-char.c
> > +++ b/tests/test-char.c
> > @@ -625,12 +625,14 @@ static void char_udp_test(void)
> >  typedef struct {
> >      int event;
> >      bool got_pong;
> > +    CharBackend *be;
> >  } CharSocketTestData;
> >
> >
> >  #define SOCKET_PING "Hello"
> >  #define SOCKET_PONG "World"
> >
> > +typedef void (*char_socket_cb)(void *opaque, QEMUChrEvent event);
> >
> >  static void
> >  char_socket_event(void *opaque, QEMUChrEvent event)
> > @@ -639,6 +641,23 @@ char_socket_event(void *opaque, QEMUChrEvent event)
> >      data->event = event;
> >  }
> >
> > +static void
> > +char_socket_event_with_error(void *opaque, QEMUChrEvent event)
> > +{
> > +    CharSocketTestData *data = opaque;
> > +    CharBackend *be = data->be;
> > +    data->event = event;
> > +    switch (event) {
> > +    case CHR_EVENT_OPENED:
> > +        qemu_chr_fe_disconnect(be);
> > +        return;
> > +    case CHR_EVENT_CLOSED:
> > +        return;
> > +    default:
> > +        return;
> > +    }
> > +}
> > +
> >
> >  static void
> >  char_socket_read(void *opaque, const uint8_t *buf, int size)
> > @@ -783,6 +802,7 @@ static void char_socket_server_test(gconstpointer opaque)
> >
> >   reconnect:
> >      data.event = -1;
> > +    data.be = &be;
> >      qemu_chr_fe_set_handlers(&be, NULL, NULL,
> >                               char_socket_event, NULL,
> >                               &data, NULL, true);
> > @@ -869,6 +889,7 @@ typedef struct {
> >      const char *reconnect;
> >      bool wait_connected;
> >      bool fd_pass;
> > +    char_socket_cb event_cb;
> >  } CharSocketClientTestConfig;
> >
> >  static void char_socket_client_dupid_test(gconstpointer opaque)
> > @@ -920,6 +941,7 @@ static void char_socket_client_dupid_test(gconstpointer opaque)
> >  static void char_socket_client_test(gconstpointer opaque)
> >  {
> >      const CharSocketClientTestConfig *config = opaque;
> > +    const char_socket_cb event_cb = config->event_cb;
> >      QIOChannelSocket *ioc;
> >      char *optstr;
> >      Chardev *chr;
> > @@ -983,8 +1005,9 @@ static void char_socket_client_test(gconstpointer opaque)
> >
> >   reconnect:
> >      data.event = -1;
> > +    data.be = &be;
> >      qemu_chr_fe_set_handlers(&be, NULL, NULL,
> > -                             char_socket_event, NULL,
> > +                             event_cb, NULL,
> >                               &data, NULL, true);
> >      if (config->reconnect) {
> >          g_assert(data.event == -1);
> > @@ -1022,7 +1045,7 @@ static void char_socket_client_test(gconstpointer opaque)
> >      /* Setup a callback to receive the reply to our greeting */
> >      qemu_chr_fe_set_handlers(&be, char_socket_can_read,
> >                               char_socket_read,
> > -                             char_socket_event, NULL,
> > +                             event_cb, NULL,
> >                               &data, NULL, true);
> >      g_assert(data.event == CHR_EVENT_OPENED);
> >      data.event = -1;
> > @@ -1467,19 +1490,22 @@ int main(int argc, char **argv)
> >
> >  #define SOCKET_CLIENT_TEST(name, addr)                                  \
> >      static CharSocketClientTestConfig client1 ## name =                 \
> > -        { addr, NULL, false, false };                                   \
> > +        { addr, NULL, false, false, char_socket_event};                 \
> >      static CharSocketClientTestConfig client2 ## name =                 \
> > -        { addr, NULL, true, false };                                    \
> > +        { addr, NULL, true, false, char_socket_event };                 \
> >      static CharSocketClientTestConfig client3 ## name =                 \
> > -        { addr, ",reconnect=1", false };                                \
> > +        { addr, ",reconnect=1", false, false, char_socket_event };      \
> >      static CharSocketClientTestConfig client4 ## name =                 \
> > -        { addr, ",reconnect=1", true };                                 \
> > +        { addr, ",reconnect=1", true, false, char_socket_event };       \
> >      static CharSocketClientTestConfig client5 ## name =                 \
> > -        { addr, NULL, false, true };                                    \
> > +        { addr, NULL, false, true, char_socket_event };                 \
> >      static CharSocketClientTestConfig client6 ## name =                 \
> > -        { addr, NULL, true, true };                                     \
> > +        { addr, NULL, true, true, char_socket_event };                  \
> >      static CharSocketClientTestConfig client7 ## name =                 \
> > -        { addr, ",reconnect=1", false, false };                         \
> > +        { addr, ",reconnect=1", false, false, char_socket_event };      \
> > +    static CharSocketClientTestConfig client8 ## name =                 \
> > +        { addr, ",reconnect=1", true, false,                            \
> > +            char_socket_event_with_error };                             \
> >      g_test_add_data_func("/char/socket/client/mainloop/" # name,        \
> >                           &client1 ##name, char_socket_client_test);     \
> >      g_test_add_data_func("/char/socket/client/wait-conn/" # name,       \
> > @@ -1493,7 +1519,9 @@ int main(int argc, char **argv)
> >      g_test_add_data_func("/char/socket/client/wait-conn-fdpass/" # name, \
> >                           &client6 ##name, char_socket_client_test);     \
> >      g_test_add_data_func("/char/socket/client/dupid-reconnect/" # name, \
> > -                         &client7 ##name, char_socket_client_dupid_test)
> > +                         &client7 ##name, char_socket_client_dupid_test);\
> > +    g_test_add_data_func("/char/socket/client/reconnect-error/" # name, \
> > +                         &client8 ##name, char_socket_client_test)
> >
> >      if (has_ipv4) {
> >          SOCKET_SERVER_TEST(tcp, &tcpaddr);
> > --
> > 2.11.0
> >
>
Feng Li April 28, 2020, 2:09 p.m. UTC | #6
Apply the above patch, then `make check-unit`
or
`tests/test-char -p /char/socket/client/reconnect-error/unix`

Marc-André Lureau <marcandre.lureau@redhat.com> 于2020年4月28日周二 下午7:06写道:
>
> Hi
>
> On Tue, Apr 28, 2020 at 10:59 AM Li Feng <fengli@smartx.com> wrote:
> >
> > Sorry for sending the weird same mail out.
> > Ignore the second one.
> >
> > Hi Lureau,
> >
> > I found another issue when running my new test:  tests/test-char -p
> > /char/socket/client/reconnect-error/unix
> > The backtrace like this:
> > #0  0x00007ffff5ac3277 in raise () from /lib64/libc.so.6
> > #1  0x00007ffff5ac4968 in abort () from /lib64/libc.so.6
> > #2  0x00005555555aaa34 in error_handle_fatal (errp=<optimized out>,
> > err=0x7fffec0012d0) at util/error.c:40
> > #3  0x00005555555aab0d in error_setv (errp=0x555555802a08
> > <error_abort>, src=0x5555555c4220 "io/channel.c", line=148,
> > func=0x5555555c4520 <__func__.17450> "qio_channel_readv_all",
> >     err_class=ERROR_CLASS_GENERIC_ERROR, fmt=<optimized out>,
> > ap=0x7ffff423bb10, suffix=0x0) at util/error.c:73
> > #4  0x00005555555aac90 in error_setg_internal
> > (errp=errp@entry=0x555555802a08 <error_abort>,
> > src=src@entry=0x5555555c4220 "io/channel.c", line=line@entry=148,
> >     func=func@entry=0x5555555c4520 <__func__.17450>
> > "qio_channel_readv_all", fmt=fmt@entry=0x5555555c4340 "Unexpected
> > end-of-file before all bytes were read") at util/error.c:97
> > #5  0x000055555556c1fc in qio_channel_readv_all (ioc=<optimized out>,
> > iov=<optimized out>, niov=<optimized out>, errp=0x555555802a08
> > <error_abort>) at io/channel.c:147
> > #6  0x000055555556c23a in qio_channel_read_all (ioc=<optimized out>,
> > buf=<optimized out>, buflen=<optimized out>, errp=<optimized out>) at
> > io/channel.c:247
> > #7  0x000055555556ad0e in char_socket_ping_pong (ioc=0x7fffec0008c0)
> > at tests/test-char.c:727
> > #8  0x000055555556adcf in char_socket_client_server_thread
> > (data=data@entry=0x55555582e350) at tests/test-char.c:881
> > #9  0x00005555555a9556 in qemu_thread_start (args=<optimized out>) at
> > util/qemu-thread-posix.c:519
> > #10 0x00007ffff5e61e25 in start_thread () from /lib64/libpthread.so.0
> > #11 0x00007ffff5b8bbad in clone () from /lib64/libc.so.6
> >
> > I think this is a new issue of qemu, not my test issue.
> > How do you think?
>
>
> No idea, it could be a bug in the test itself. How did you produce it?
>
> >
> > Thanks,
> >
> > Feng Li
> >
> > Li Feng <fengli@smartx.com> 于2020年4月28日周二 下午4:53写道:
> >
> > >
> > > When the disconnect event is triggered in the connecting stage,
> > > the tcp_chr_disconnect_locked may be called twice.
> > >
> > > The first call:
> > >     #0  qemu_chr_socket_restart_timer (chr=0x55555582ee90) at chardev/char-socket.c:120
> > >     #1  0x000055555558e38c in tcp_chr_disconnect_locked (chr=<optimized out>) at chardev/char-socket.c:490
> > >     #2  0x000055555558e3cd in tcp_chr_disconnect (chr=0x55555582ee90) at chardev/char-socket.c:497
> > >     #3  0x000055555558ea32 in tcp_chr_new_client (chr=chr@entry=0x55555582ee90, sioc=sioc@entry=0x55555582f0b0) at chardev/char-socket.c:892
> > >     #4  0x000055555558eeb8 in qemu_chr_socket_connected (task=0x55555582f300, opaque=<optimized out>) at chardev/char-socket.c:1090
> > >     #5  0x0000555555574352 in qio_task_complete (task=task@entry=0x55555582f300) at io/task.c:196
> > >     #6  0x00005555555745f4 in qio_task_thread_result (opaque=0x55555582f300) at io/task.c:111
> > >     #7  qio_task_wait_thread (task=0x55555582f300) at io/task.c:190
> > >     #8  0x000055555558f17e in tcp_chr_wait_connected (chr=0x55555582ee90, errp=0x555555802a08 <error_abort>) at chardev/char-socket.c:1013
> > >     #9  0x0000555555567cbd in char_socket_client_reconnect_test (opaque=0x5555557fe020 <client8unix>) at tests/test-char.c:1152
> > > The second call:
> > >     #0  0x00007ffff5ac3277 in raise () from /lib64/libc.so.6
> > >     #1  0x00007ffff5ac4968 in abort () from /lib64/libc.so.6
> > >     #2  0x00007ffff5abc096 in __assert_fail_base () from /lib64/libc.so.6
> > >     #3  0x00007ffff5abc142 in __assert_fail () from /lib64/libc.so.6
> > >     #4  0x000055555558d10a in qemu_chr_socket_restart_timer (chr=0x55555582ee90) at chardev/char-socket.c:125
> > >     #5  0x000055555558df0c in tcp_chr_disconnect_locked (chr=<optimized out>) at chardev/char-socket.c:490
> > >     #6  0x000055555558df4d in tcp_chr_disconnect (chr=0x55555582ee90) at chardev/char-socket.c:497
> > >     #7  0x000055555558e5b2 in tcp_chr_new_client (chr=chr@entry=0x55555582ee90, sioc=sioc@entry=0x55555582f0b0) at chardev/char-socket.c:892
> > >     #8  0x000055555558e93a in tcp_chr_connect_client_sync (chr=chr@entry=0x55555582ee90, errp=errp@entry=0x7fffffffd178) at chardev/char-socket.c:944
> > >     #9  0x000055555558ec78 in tcp_chr_wait_connected (chr=0x55555582ee90, errp=0x555555802a08 <error_abort>) at chardev/char-socket.c:1035
> > >     #10 0x000055555556804b in char_socket_client_test (opaque=0x5555557fe020 <client8unix>) at tests/test-char.c:1023
> > >
> > > Run test/test-char to reproduce this issue.
> > >
> > > test-char: chardev/char-socket.c:125: qemu_chr_socket_restart_timer: Assertion `!s->reconnect_timer' failed.
> > >
> > > Signed-off-by: Li Feng <fengli@smartx.com>
> > > ---
> > > v2:
> > > - Rewrite the solution.
> > > - Add test to reproduce this issue.
> > >
> > >  chardev/char-socket.c |  2 +-
> > >  tests/test-char.c     | 48 ++++++++++++++++++++++++++++++++++++++----------
> > >  2 files changed, 39 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/chardev/char-socket.c b/chardev/char-socket.c
> > > index 1f14c2c7c8..d84330b3c9 100644
> > > --- a/chardev/char-socket.c
> > > +++ b/chardev/char-socket.c
> > > @@ -486,7 +486,7 @@ static void tcp_chr_disconnect_locked(Chardev *chr)
> > >      if (emit_close) {
> > >          qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
> > >      }
> > > -    if (s->reconnect_time) {
> > > +    if (s->reconnect_time && !s->reconnect_timer) {
> > >          qemu_chr_socket_restart_timer(chr);
> > >      }
> > >  }
> > > diff --git a/tests/test-char.c b/tests/test-char.c
> > > index 8d39bdc9fa..13dbbfe2a3 100644
> > > --- a/tests/test-char.c
> > > +++ b/tests/test-char.c
> > > @@ -625,12 +625,14 @@ static void char_udp_test(void)
> > >  typedef struct {
> > >      int event;
> > >      bool got_pong;
> > > +    CharBackend *be;
> > >  } CharSocketTestData;
> > >
> > >
> > >  #define SOCKET_PING "Hello"
> > >  #define SOCKET_PONG "World"
> > >
> > > +typedef void (*char_socket_cb)(void *opaque, QEMUChrEvent event);
> > >
> > >  static void
> > >  char_socket_event(void *opaque, QEMUChrEvent event)
> > > @@ -639,6 +641,23 @@ char_socket_event(void *opaque, QEMUChrEvent event)
> > >      data->event = event;
> > >  }
> > >
> > > +static void
> > > +char_socket_event_with_error(void *opaque, QEMUChrEvent event)
> > > +{
> > > +    CharSocketTestData *data = opaque;
> > > +    CharBackend *be = data->be;
> > > +    data->event = event;
> > > +    switch (event) {
> > > +    case CHR_EVENT_OPENED:
> > > +        qemu_chr_fe_disconnect(be);
> > > +        return;
> > > +    case CHR_EVENT_CLOSED:
> > > +        return;
> > > +    default:
> > > +        return;
> > > +    }
> > > +}
> > > +
> > >
> > >  static void
> > >  char_socket_read(void *opaque, const uint8_t *buf, int size)
> > > @@ -783,6 +802,7 @@ static void char_socket_server_test(gconstpointer opaque)
> > >
> > >   reconnect:
> > >      data.event = -1;
> > > +    data.be = &be;
> > >      qemu_chr_fe_set_handlers(&be, NULL, NULL,
> > >                               char_socket_event, NULL,
> > >                               &data, NULL, true);
> > > @@ -869,6 +889,7 @@ typedef struct {
> > >      const char *reconnect;
> > >      bool wait_connected;
> > >      bool fd_pass;
> > > +    char_socket_cb event_cb;
> > >  } CharSocketClientTestConfig;
> > >
> > >  static void char_socket_client_dupid_test(gconstpointer opaque)
> > > @@ -920,6 +941,7 @@ static void char_socket_client_dupid_test(gconstpointer opaque)
> > >  static void char_socket_client_test(gconstpointer opaque)
> > >  {
> > >      const CharSocketClientTestConfig *config = opaque;
> > > +    const char_socket_cb event_cb = config->event_cb;
> > >      QIOChannelSocket *ioc;
> > >      char *optstr;
> > >      Chardev *chr;
> > > @@ -983,8 +1005,9 @@ static void char_socket_client_test(gconstpointer opaque)
> > >
> > >   reconnect:
> > >      data.event = -1;
> > > +    data.be = &be;
> > >      qemu_chr_fe_set_handlers(&be, NULL, NULL,
> > > -                             char_socket_event, NULL,
> > > +                             event_cb, NULL,
> > >                               &data, NULL, true);
> > >      if (config->reconnect) {
> > >          g_assert(data.event == -1);
> > > @@ -1022,7 +1045,7 @@ static void char_socket_client_test(gconstpointer opaque)
> > >      /* Setup a callback to receive the reply to our greeting */
> > >      qemu_chr_fe_set_handlers(&be, char_socket_can_read,
> > >                               char_socket_read,
> > > -                             char_socket_event, NULL,
> > > +                             event_cb, NULL,
> > >                               &data, NULL, true);
> > >      g_assert(data.event == CHR_EVENT_OPENED);
> > >      data.event = -1;
> > > @@ -1467,19 +1490,22 @@ int main(int argc, char **argv)
> > >
> > >  #define SOCKET_CLIENT_TEST(name, addr)                                  \
> > >      static CharSocketClientTestConfig client1 ## name =                 \
> > > -        { addr, NULL, false, false };                                   \
> > > +        { addr, NULL, false, false, char_socket_event};                 \
> > >      static CharSocketClientTestConfig client2 ## name =                 \
> > > -        { addr, NULL, true, false };                                    \
> > > +        { addr, NULL, true, false, char_socket_event };                 \
> > >      static CharSocketClientTestConfig client3 ## name =                 \
> > > -        { addr, ",reconnect=1", false };                                \
> > > +        { addr, ",reconnect=1", false, false, char_socket_event };      \
> > >      static CharSocketClientTestConfig client4 ## name =                 \
> > > -        { addr, ",reconnect=1", true };                                 \
> > > +        { addr, ",reconnect=1", true, false, char_socket_event };       \
> > >      static CharSocketClientTestConfig client5 ## name =                 \
> > > -        { addr, NULL, false, true };                                    \
> > > +        { addr, NULL, false, true, char_socket_event };                 \
> > >      static CharSocketClientTestConfig client6 ## name =                 \
> > > -        { addr, NULL, true, true };                                     \
> > > +        { addr, NULL, true, true, char_socket_event };                  \
> > >      static CharSocketClientTestConfig client7 ## name =                 \
> > > -        { addr, ",reconnect=1", false, false };                         \
> > > +        { addr, ",reconnect=1", false, false, char_socket_event };      \
> > > +    static CharSocketClientTestConfig client8 ## name =                 \
> > > +        { addr, ",reconnect=1", true, false,                            \
> > > +            char_socket_event_with_error };                             \
> > >      g_test_add_data_func("/char/socket/client/mainloop/" # name,        \
> > >                           &client1 ##name, char_socket_client_test);     \
> > >      g_test_add_data_func("/char/socket/client/wait-conn/" # name,       \
> > > @@ -1493,7 +1519,9 @@ int main(int argc, char **argv)
> > >      g_test_add_data_func("/char/socket/client/wait-conn-fdpass/" # name, \
> > >                           &client6 ##name, char_socket_client_test);     \
> > >      g_test_add_data_func("/char/socket/client/dupid-reconnect/" # name, \
> > > -                         &client7 ##name, char_socket_client_dupid_test)
> > > +                         &client7 ##name, char_socket_client_dupid_test);\
> > > +    g_test_add_data_func("/char/socket/client/reconnect-error/" # name, \
> > > +                         &client8 ##name, char_socket_client_test)
> > >
> > >      if (has_ipv4) {
> > >          SOCKET_SERVER_TEST(tcp, &tcpaddr);
> > > --
> > > 2.11.0
> > >
> >
>
diff mbox series

Patch

diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 185fe38dda..43aab8f24b 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -476,6 +476,11 @@  static void tcp_chr_disconnect_locked(Chardev *chr)
     SocketChardev *s = SOCKET_CHARDEV(chr);
     bool emit_close = s->state == TCP_CHARDEV_STATE_CONNECTED;
 
+    /* avoid re-enter when socket read/write error and disconnect event. */
+    if (s->state == TCP_CHARDEV_STATE_DISCONNECTED) {
+        return;
+    }
+
     tcp_chr_free_connection(chr);
 
     if (s->listener) {