diff mbox series

[v5] chardev/char-socket: add POLLHUP handler

Message ID 20180125135129.9305-1-klim.kireev@virtuozzo.com
State New
Headers show
Series [v5] chardev/char-socket: add POLLHUP handler | expand

Commit Message

Klim Kireev Jan. 25, 2018, 1:51 p.m. UTC
The following behavior was observed for QEMU configured by libvirt
to use guest agent as usual for the guests without virtio-serial
driver (Windows or the guest remaining in BIOS stage).

In QEMU on first connect to listen character device socket
the listen socket is removed from poll just after the accept().
virtio_serial_guest_ready() returns 0 and the descriptor
of the connected Unix socket is removed from poll and it will
not be present in poll() until the guest will initialize the driver
and change the state of the serial to "guest connected".

In libvirt connect() to guest agent is performed on restart and
is run under VM state lock. Connect() is blocking and can
wait forever.
In this case libvirt can not perform ANY operation on that VM.

The bug can be easily reproduced this way:

Terminal 1:
qemu-system-x86_64 -m 512 -device pci-serial,chardev=serial1 -chardev socket,id=serial1,path=/tmp/console.sock,server,nowait
(virtio-serial and isa-serial also fit)

Terminal 2:
minicom -D unix\#/tmp/console.sock
(type something and press enter)
C-a x (to exit)

Do 3 times:
minicom -D unix\#/tmp/console.sock
C-a x

It needs 4 connections, because the first one is accepted by QEMU, then two are queued by
the kernel, and the 4th blocks.

The problem is that QEMU doesn't add a read watcher after succesful read
until the guest device wants to acquire recieved data, so
I propose to install a separate pullhup watcher regardless of
whether the device waits for data or not.

Signed-off-by: Klim Kireev <klim.kireev@virtuozzo.com>
---
Changelog:
v2: Remove timer as a redundant feature

v3: Remove read call and return G_SOURCE_REMOVE

v4: Move to GSource API

v5: Fix git typos 

 chardev/char-socket.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

Comments

Paolo Bonzini Jan. 30, 2018, 7:49 p.m. UTC | #1
On 25/01/2018 08:51, Klim Kireev wrote:
> The following behavior was observed for QEMU configured by libvirt
> to use guest agent as usual for the guests without virtio-serial
> driver (Windows or the guest remaining in BIOS stage).
> 
> In QEMU on first connect to listen character device socket
> the listen socket is removed from poll just after the accept().
> virtio_serial_guest_ready() returns 0 and the descriptor
> of the connected Unix socket is removed from poll and it will
> not be present in poll() until the guest will initialize the driver
> and change the state of the serial to "guest connected".
> 
> In libvirt connect() to guest agent is performed on restart and
> is run under VM state lock. Connect() is blocking and can
> wait forever.
> In this case libvirt can not perform ANY operation on that VM.
> 
> The bug can be easily reproduced this way:
> 
> Terminal 1:
> qemu-system-x86_64 -m 512 -device pci-serial,chardev=serial1 -chardev socket,id=serial1,path=/tmp/console.sock,server,nowait
> (virtio-serial and isa-serial also fit)
> 
> Terminal 2:
> minicom -D unix\#/tmp/console.sock
> (type something and press enter)
> C-a x (to exit)
> 
> Do 3 times:
> minicom -D unix\#/tmp/console.sock
> C-a x
> 
> It needs 4 connections, because the first one is accepted by QEMU, then two are queued by
> the kernel, and the 4th blocks.
> 
> The problem is that QEMU doesn't add a read watcher after succesful read
> until the guest device wants to acquire recieved data, so
> I propose to install a separate pullhup watcher regardless of
> whether the device waits for data or not.
> 
> Signed-off-by: Klim Kireev <klim.kireev@virtuozzo.com>
> ---
> Changelog:
> v2: Remove timer as a redundant feature
> 
> v3: Remove read call and return G_SOURCE_REMOVE
> 
> v4: Move to GSource API
> 
> v5: Fix git typos 
> 
>  chardev/char-socket.c | 22 ++++++++++++++++++++++
>  1 file changed, 22 insertions(+)
> 
> diff --git a/chardev/char-socket.c b/chardev/char-socket.c
> index 77cdf487eb..a340af6cd3 100644
> --- a/chardev/char-socket.c
> +++ b/chardev/char-socket.c
> @@ -42,6 +42,7 @@ typedef struct {
>      QIOChannel *ioc; /* Client I/O channel */
>      QIOChannelSocket *sioc; /* Client master channel */
>      QIONetListener *listener;
> +    GSource *hup_source;
>      QCryptoTLSCreds *tls_creds;
>      int connected;
>      int max_size;
> @@ -352,6 +353,12 @@ static void tcp_chr_free_connection(Chardev *chr)
>          s->read_msgfds_num = 0;
>      }
>  
> +    if (s->hup_source != NULL) {
> +        g_source_destroy(s->hup_source);
> +        g_source_unref(s->hup_source);
> +        s->hup_source = NULL;
> +    }
> +
>      tcp_set_msgfds(chr, NULL, 0);
>      remove_fd_in_watch(chr);
>      object_unref(OBJECT(s->sioc));
> @@ -455,6 +462,15 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
>      return TRUE;
>  }
>  
> +static gboolean tcp_chr_hup(QIOChannel *channel,
> +                               GIOCondition cond,
> +                               void *opaque)
> +{
> +    Chardev *chr = CHARDEV(opaque);
> +    tcp_chr_disconnect(chr);
> +    return G_SOURCE_REMOVE;
> +}
> +
>  static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len)
>  {
>      SocketChardev *s = SOCKET_CHARDEV(chr);
> @@ -528,6 +544,12 @@ static void tcp_chr_connect(void *opaque)
>                                             tcp_chr_read,
>                                             chr, chr->gcontext);
>      }
> +
> +    s->hup_source = qio_channel_create_watch(s->ioc, G_IO_HUP);
> +    g_source_set_callback(s->hup_source, (GSourceFunc)tcp_chr_hup,
> +                          chr, NULL);
> +    g_source_attach(s->hup_source, chr->gcontext);
> +
>      qemu_chr_be_event(chr, CHR_EVENT_OPENED);
>  }
>  
> 

Queued, thanks.

Paolo
Thomas Huth Feb. 8, 2018, 7:13 p.m. UTC | #2
On 30.01.2018 20:49, Paolo Bonzini wrote:
> On 25/01/2018 08:51, Klim Kireev wrote:
>> The following behavior was observed for QEMU configured by libvirt
>> to use guest agent as usual for the guests without virtio-serial
>> driver (Windows or the guest remaining in BIOS stage).
>>
>> In QEMU on first connect to listen character device socket
>> the listen socket is removed from poll just after the accept().
>> virtio_serial_guest_ready() returns 0 and the descriptor
>> of the connected Unix socket is removed from poll and it will
>> not be present in poll() until the guest will initialize the driver
>> and change the state of the serial to "guest connected".
>>
>> In libvirt connect() to guest agent is performed on restart and
>> is run under VM state lock. Connect() is blocking and can
>> wait forever.
>> In this case libvirt can not perform ANY operation on that VM.
>>
>> The bug can be easily reproduced this way:
>>
>> Terminal 1:
>> qemu-system-x86_64 -m 512 -device pci-serial,chardev=serial1 -chardev socket,id=serial1,path=/tmp/console.sock,server,nowait
>> (virtio-serial and isa-serial also fit)
>>
>> Terminal 2:
>> minicom -D unix\#/tmp/console.sock
>> (type something and press enter)
>> C-a x (to exit)
>>
>> Do 3 times:
>> minicom -D unix\#/tmp/console.sock
>> C-a x
>>
>> It needs 4 connections, because the first one is accepted by QEMU, then two are queued by
>> the kernel, and the 4th blocks.
>>
>> The problem is that QEMU doesn't add a read watcher after succesful read
>> until the guest device wants to acquire recieved data, so
>> I propose to install a separate pullhup watcher regardless of
>> whether the device waits for data or not.
>>
>> Signed-off-by: Klim Kireev <klim.kireev@virtuozzo.com>
>> ---
>> Changelog:
>> v2: Remove timer as a redundant feature
>>
>> v3: Remove read call and return G_SOURCE_REMOVE
>>
>> v4: Move to GSource API
>>
>> v5: Fix git typos 
>>
>>  chardev/char-socket.c | 22 ++++++++++++++++++++++
>>  1 file changed, 22 insertions(+)
>>
>> diff --git a/chardev/char-socket.c b/chardev/char-socket.c
>> index 77cdf487eb..a340af6cd3 100644
>> --- a/chardev/char-socket.c
>> +++ b/chardev/char-socket.c
>> @@ -42,6 +42,7 @@ typedef struct {
>>      QIOChannel *ioc; /* Client I/O channel */
>>      QIOChannelSocket *sioc; /* Client master channel */
>>      QIONetListener *listener;
>> +    GSource *hup_source;
>>      QCryptoTLSCreds *tls_creds;
>>      int connected;
>>      int max_size;
>> @@ -352,6 +353,12 @@ static void tcp_chr_free_connection(Chardev *chr)
>>          s->read_msgfds_num = 0;
>>      }
>>  
>> +    if (s->hup_source != NULL) {
>> +        g_source_destroy(s->hup_source);
>> +        g_source_unref(s->hup_source);
>> +        s->hup_source = NULL;
>> +    }
>> +
>>      tcp_set_msgfds(chr, NULL, 0);
>>      remove_fd_in_watch(chr);
>>      object_unref(OBJECT(s->sioc));
>> @@ -455,6 +462,15 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
>>      return TRUE;
>>  }
>>  
>> +static gboolean tcp_chr_hup(QIOChannel *channel,
>> +                               GIOCondition cond,
>> +                               void *opaque)
>> +{
>> +    Chardev *chr = CHARDEV(opaque);
>> +    tcp_chr_disconnect(chr);
>> +    return G_SOURCE_REMOVE;
>> +}
>> +
>>  static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len)
>>  {
>>      SocketChardev *s = SOCKET_CHARDEV(chr);
>> @@ -528,6 +544,12 @@ static void tcp_chr_connect(void *opaque)
>>                                             tcp_chr_read,
>>                                             chr, chr->gcontext);
>>      }
>> +
>> +    s->hup_source = qio_channel_create_watch(s->ioc, G_IO_HUP);
>> +    g_source_set_callback(s->hup_source, (GSourceFunc)tcp_chr_hup,
>> +                          chr, NULL);
>> +    g_source_attach(s->hup_source, chr->gcontext);
>> +
>>      qemu_chr_be_event(chr, CHR_EVENT_OPENED);
>>  }
>>  
>>
> 
> Queued, thanks.

 Hi!

I'm currently facing some issues with "make check -j4" (i.e. running the
tests in parallel). Git bisect blames this commit - though I'm not sure
whether this is really the right one ... Starting with this commit, I
saw hangs in test-filter-redirector, but git master rather seems to hang
with vhost-user-test instead.
Stefan also had issues with "make check -j4" today, so it's apparently
not only me ... can somebody else reproduce the problem with the git
master branch?

 Thomas
Peter Maydell Feb. 8, 2018, 9:12 p.m. UTC | #3
On 8 February 2018 at 19:13, Thomas Huth <thuth@redhat.com> wrote:
> I'm currently facing some issues with "make check -j4" (i.e. running the
> tests in parallel). Git bisect blames this commit - though I'm not sure
> whether this is really the right one ... Starting with this commit, I
> saw hangs in test-filter-redirector, but git master rather seems to hang
> with vhost-user-test instead.
> Stefan also had issues with "make check -j4" today, so it's apparently
> not only me ... can somebody else reproduce the problem with the git
> master branch?

Yeah, I was seeing odd make check hangs with parallelization enabled
too, on vhost-user-test... (my builds for merge tests are almost
all non-parallelized I think).

thanks
-- PMM
Alistair Francis Feb. 8, 2018, 9:54 p.m. UTC | #4
On Thu, Feb 8, 2018 at 1:12 PM, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 8 February 2018 at 19:13, Thomas Huth <thuth@redhat.com> wrote:
>> I'm currently facing some issues with "make check -j4" (i.e. running the
>> tests in parallel). Git bisect blames this commit - though I'm not sure
>> whether this is really the right one ... Starting with this commit, I
>> saw hangs in test-filter-redirector, but git master rather seems to hang
>> with vhost-user-test instead.
>> Stefan also had issues with "make check -j4" today, so it's apparently
>> not only me ... can somebody else reproduce the problem with the git
>> master branch?
>
> Yeah, I was seeing odd make check hangs with parallelization enabled
> too, on vhost-user-test... (my builds for merge tests are almost
> all non-parallelized I think).

I don't only see hangs I also am seeing this:

qemu-system-i386: -chardev
socket,id=chr-test,path=/tmp/vhost-test-wxuOIX/test.sock: Failed to
connect socket /tmp/vhost-test-wxuOIX/test.sock: No such file or
directory
socket_accept failed: Resource temporarily unavailable
**
ERROR:tests/libqtest.c:218:qtest_init_without_qmp_handshake: assertion
failed: (s->fd >= 0 && s->qmp_fd >= 0)
GTester: last random seed: R02Sa67ba85090274c01deb9c2d15b044c10

It isn't 100% reproducible though.

Alistair

>
> thanks
> -- PMM
>
Paolo Bonzini Feb. 9, 2018, 11:55 a.m. UTC | #5
On 08/02/2018 22:54, Alistair Francis wrote:
> On Thu, Feb 8, 2018 at 1:12 PM, Peter Maydell <peter.maydell@linaro.org> wrote:
>> On 8 February 2018 at 19:13, Thomas Huth <thuth@redhat.com> wrote:
>>> I'm currently facing some issues with "make check -j4" (i.e. running the
>>> tests in parallel). Git bisect blames this commit - though I'm not sure
>>> whether this is really the right one ... Starting with this commit, I
>>> saw hangs in test-filter-redirector, but git master rather seems to hang
>>> with vhost-user-test instead.
>>> Stefan also had issues with "make check -j4" today, so it's apparently
>>> not only me ... can somebody else reproduce the problem with the git
>>> master branch?
>>
>> Yeah, I was seeing odd make check hangs with parallelization enabled
>> too, on vhost-user-test... (my builds for merge tests are almost
>> all non-parallelized I think).
> 
> I don't only see hangs I also am seeing this:
> 
> qemu-system-i386: -chardev
> socket,id=chr-test,path=/tmp/vhost-test-wxuOIX/test.sock: Failed to
> connect socket /tmp/vhost-test-wxuOIX/test.sock: No such file or
> directory
> socket_accept failed: Resource temporarily unavailable
> **
> ERROR:tests/libqtest.c:218:qtest_init_without_qmp_handshake: assertion
> failed: (s->fd >= 0 && s->qmp_fd >= 0)
> GTester: last random seed: R02Sa67ba85090274c01deb9c2d15b044c10
> 
> It isn't 100% reproducible though.

It may be something like commit 8f6d701044bc87197aac8aa2a627d70ce8471726.

In the meanwhile we should revert it.

Paolo
Klim Kireev Feb. 9, 2018, 12:47 p.m. UTC | #6
On 02/08/2018 10:13 PM, Thomas Huth wrote:
> On 30.01.2018 20:49, Paolo Bonzini wrote:
>> On 25/01/2018 08:51, Klim Kireev wrote:
>>> The following behavior was observed for QEMU configured by libvirt
>>> to use guest agent as usual for the guests without virtio-serial
>>> driver (Windows or the guest remaining in BIOS stage).
>>>
>>> In QEMU on first connect to listen character device socket
>>> the listen socket is removed from poll just after the accept().
>>> virtio_serial_guest_ready() returns 0 and the descriptor
>>> of the connected Unix socket is removed from poll and it will
>>> not be present in poll() until the guest will initialize the driver
>>> and change the state of the serial to "guest connected".
>>>
>>> In libvirt connect() to guest agent is performed on restart and
>>> is run under VM state lock. Connect() is blocking and can
>>> wait forever.
>>> In this case libvirt can not perform ANY operation on that VM.
>>>
>>> The bug can be easily reproduced this way:
>>>
>>> Terminal 1:
>>> qemu-system-x86_64 -m 512 -device pci-serial,chardev=serial1 -chardev socket,id=serial1,path=/tmp/console.sock,server,nowait
>>> (virtio-serial and isa-serial also fit)
>>>
>>> Terminal 2:
>>> minicom -D unix\#/tmp/console.sock
>>> (type something and press enter)
>>> C-a x (to exit)
>>>
>>> Do 3 times:
>>> minicom -D unix\#/tmp/console.sock
>>> C-a x
>>>
>>> It needs 4 connections, because the first one is accepted by QEMU, then two are queued by
>>> the kernel, and the 4th blocks.
>>>
>>> The problem is that QEMU doesn't add a read watcher after succesful read
>>> until the guest device wants to acquire recieved data, so
>>> I propose to install a separate pullhup watcher regardless of
>>> whether the device waits for data or not.
>>>
>>> Signed-off-by: Klim Kireev <klim.kireev@virtuozzo.com>
>>> ---
>>> Changelog:
>>> v2: Remove timer as a redundant feature
>>>
>>> v3: Remove read call and return G_SOURCE_REMOVE
>>>
>>> v4: Move to GSource API
>>>
>>> v5: Fix git typos 
>>>
>>>  chardev/char-socket.c | 22 ++++++++++++++++++++++
>>>  1 file changed, 22 insertions(+)
>>>
>>> diff --git a/chardev/char-socket.c b/chardev/char-socket.c
>>> index 77cdf487eb..a340af6cd3 100644
>>> --- a/chardev/char-socket.c
>>> +++ b/chardev/char-socket.c
>>> @@ -42,6 +42,7 @@ typedef struct {
>>>      QIOChannel *ioc; /* Client I/O channel */
>>>      QIOChannelSocket *sioc; /* Client master channel */
>>>      QIONetListener *listener;
>>> +    GSource *hup_source;
>>>      QCryptoTLSCreds *tls_creds;
>>>      int connected;
>>>      int max_size;
>>> @@ -352,6 +353,12 @@ static void tcp_chr_free_connection(Chardev *chr)
>>>          s->read_msgfds_num = 0;
>>>      }
>>>  
>>> +    if (s->hup_source != NULL) {
>>> +        g_source_destroy(s->hup_source);
>>> +        g_source_unref(s->hup_source);
>>> +        s->hup_source = NULL;
>>> +    }
>>> +
>>>      tcp_set_msgfds(chr, NULL, 0);
>>>      remove_fd_in_watch(chr);
>>>      object_unref(OBJECT(s->sioc));
>>> @@ -455,6 +462,15 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
>>>      return TRUE;
>>>  }
>>>  
>>> +static gboolean tcp_chr_hup(QIOChannel *channel,
>>> +                               GIOCondition cond,
>>> +                               void *opaque)
>>> +{
>>> +    Chardev *chr = CHARDEV(opaque);
>>> +    tcp_chr_disconnect(chr);
>>> +    return G_SOURCE_REMOVE;
>>> +}
>>> +
>>>  static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len)
>>>  {
>>>      SocketChardev *s = SOCKET_CHARDEV(chr);
>>> @@ -528,6 +544,12 @@ static void tcp_chr_connect(void *opaque)
>>>                                             tcp_chr_read,
>>>                                             chr, chr->gcontext);
>>>      }
>>> +
>>> +    s->hup_source = qio_channel_create_watch(s->ioc, G_IO_HUP);
>>> +    g_source_set_callback(s->hup_source, (GSourceFunc)tcp_chr_hup,
>>> +                          chr, NULL);
>>> +    g_source_attach(s->hup_source, chr->gcontext);
>>> +
>>>      qemu_chr_be_event(chr, CHR_EVENT_OPENED);
>>>  }
>>>  
>>>
>> Queued, thanks.
>  Hi!
>
> I'm currently facing some issues with "make check -j4" (i.e. running the
> tests in parallel). Git bisect blames this commit - though I'm not sure
> whether this is really the right one ... Starting with this commit, I
> saw hangs in test-filter-redirector, 

Commit 8f6d701044bc87197aac8aa2a627d70ce8471726 fixes it
Have you tried to apply it?

> but git master rather seems to hang
> with vhost-user-test instead.
> Stefan also had issues with "make check -j4" today, so it's apparently
> not only me ... can somebody else reproduce the problem with the git
> master branch?
>
>  Thomas
Thomas Huth Feb. 9, 2018, 4:03 p.m. UTC | #7
On 09.02.2018 13:47, Klim Kireev wrote:
> 
> On 02/08/2018 10:13 PM, Thomas Huth wrote:
>> On 30.01.2018 20:49, Paolo Bonzini wrote:
>>> On 25/01/2018 08:51, Klim Kireev wrote:
>>>> The following behavior was observed for QEMU configured by libvirt
>>>> to use guest agent as usual for the guests without virtio-serial
>>>> driver (Windows or the guest remaining in BIOS stage).
>>>>
>>>> In QEMU on first connect to listen character device socket
>>>> the listen socket is removed from poll just after the accept().
>>>> virtio_serial_guest_ready() returns 0 and the descriptor
>>>> of the connected Unix socket is removed from poll and it will
>>>> not be present in poll() until the guest will initialize the driver
>>>> and change the state of the serial to "guest connected".
>>>>
>>>> In libvirt connect() to guest agent is performed on restart and
>>>> is run under VM state lock. Connect() is blocking and can
>>>> wait forever.
>>>> In this case libvirt can not perform ANY operation on that VM.
>>>>
>>>> The bug can be easily reproduced this way:
>>>>
>>>> Terminal 1:
>>>> qemu-system-x86_64 -m 512 -device pci-serial,chardev=serial1 -chardev socket,id=serial1,path=/tmp/console.sock,server,nowait
>>>> (virtio-serial and isa-serial also fit)
>>>>
>>>> Terminal 2:
>>>> minicom -D unix\#/tmp/console.sock
>>>> (type something and press enter)
>>>> C-a x (to exit)
>>>>
>>>> Do 3 times:
>>>> minicom -D unix\#/tmp/console.sock
>>>> C-a x
>>>>
>>>> It needs 4 connections, because the first one is accepted by QEMU, then two are queued by
>>>> the kernel, and the 4th blocks.
>>>>
>>>> The problem is that QEMU doesn't add a read watcher after succesful read
>>>> until the guest device wants to acquire recieved data, so
>>>> I propose to install a separate pullhup watcher regardless of
>>>> whether the device waits for data or not.
>>>>
>>>> Signed-off-by: Klim Kireev <klim.kireev@virtuozzo.com>
>>>> ---
>>>> Changelog:
>>>> v2: Remove timer as a redundant feature
>>>>
>>>> v3: Remove read call and return G_SOURCE_REMOVE
>>>>
>>>> v4: Move to GSource API
>>>>
>>>> v5: Fix git typos 
>>>>
>>>>  chardev/char-socket.c | 22 ++++++++++++++++++++++
>>>>  1 file changed, 22 insertions(+)
>>>>
>>>> diff --git a/chardev/char-socket.c b/chardev/char-socket.c
>>>> index 77cdf487eb..a340af6cd3 100644
>>>> --- a/chardev/char-socket.c
>>>> +++ b/chardev/char-socket.c
>>>> @@ -42,6 +42,7 @@ typedef struct {
>>>>      QIOChannel *ioc; /* Client I/O channel */
>>>>      QIOChannelSocket *sioc; /* Client master channel */
>>>>      QIONetListener *listener;
>>>> +    GSource *hup_source;
>>>>      QCryptoTLSCreds *tls_creds;
>>>>      int connected;
>>>>      int max_size;
>>>> @@ -352,6 +353,12 @@ static void tcp_chr_free_connection(Chardev *chr)
>>>>          s->read_msgfds_num = 0;
>>>>      }
>>>>  
>>>> +    if (s->hup_source != NULL) {
>>>> +        g_source_destroy(s->hup_source);
>>>> +        g_source_unref(s->hup_source);
>>>> +        s->hup_source = NULL;
>>>> +    }
>>>> +
>>>>      tcp_set_msgfds(chr, NULL, 0);
>>>>      remove_fd_in_watch(chr);
>>>>      object_unref(OBJECT(s->sioc));
>>>> @@ -455,6 +462,15 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
>>>>      return TRUE;
>>>>  }
>>>>  
>>>> +static gboolean tcp_chr_hup(QIOChannel *channel,
>>>> +                               GIOCondition cond,
>>>> +                               void *opaque)
>>>> +{
>>>> +    Chardev *chr = CHARDEV(opaque);
>>>> +    tcp_chr_disconnect(chr);
>>>> +    return G_SOURCE_REMOVE;
>>>> +}
>>>> +
>>>>  static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len)
>>>>  {
>>>>      SocketChardev *s = SOCKET_CHARDEV(chr);
>>>> @@ -528,6 +544,12 @@ static void tcp_chr_connect(void *opaque)
>>>>                                             tcp_chr_read,
>>>>                                             chr, chr->gcontext);
>>>>      }
>>>> +
>>>> +    s->hup_source = qio_channel_create_watch(s->ioc, G_IO_HUP);
>>>> +    g_source_set_callback(s->hup_source, (GSourceFunc)tcp_chr_hup,
>>>> +                          chr, NULL);
>>>> +    g_source_attach(s->hup_source, chr->gcontext);
>>>> +
>>>>      qemu_chr_be_event(chr, CHR_EVENT_OPENED);
>>>>  }
>>>>  
>>>>
>>> Queued, thanks.
>>  Hi!
>>
>> I'm currently facing some issues with "make check -j4" (i.e. running the
>> tests in parallel). Git bisect blames this commit - though I'm not sure
>> whether this is really the right one ... Starting with this commit, I
>> saw hangs in test-filter-redirector, 
> 
> Commit 8f6d701044bc87197aac8aa2a627d70ce8471726 fixes it
> Have you tried to apply it?

That seems to be the one that fixes test-filter-redirector, but after
that, it still fails with vhost-user-test instead.

 Thomas
Klim Kireev Feb. 12, 2018, 2:42 p.m. UTC | #8
On 02/09/2018 02:55 PM, Paolo Bonzini wrote:
> On 08/02/2018 22:54, Alistair Francis wrote:
>> On Thu, Feb 8, 2018 at 1:12 PM, Peter Maydell <peter.maydell@linaro.org> wrote:
>>> On 8 February 2018 at 19:13, Thomas Huth <thuth@redhat.com> wrote:
>>>> I'm currently facing some issues with "make check -j4" (i.e. running the
>>>> tests in parallel). Git bisect blames this commit - though I'm not sure
>>>> whether this is really the right one ... Starting with this commit, I
>>>> saw hangs in test-filter-redirector, but git master rather seems to hang
>>>> with vhost-user-test instead.
>>>> Stefan also had issues with "make check -j4" today, so it's apparently
>>>> not only me ... can somebody else reproduce the problem with the git
>>>> master branch?
>>> Yeah, I was seeing odd make check hangs with parallelization enabled
>>> too, on vhost-user-test... (my builds for merge tests are almost
>>> all non-parallelized I think).
>> I don't only see hangs I also am seeing this:
>>
>> qemu-system-i386: -chardev
>> socket,id=chr-test,path=/tmp/vhost-test-wxuOIX/test.sock: Failed to
>> connect socket /tmp/vhost-test-wxuOIX/test.sock: No such file or
>> directory
>> socket_accept failed: Resource temporarily unavailable
>> **
>> ERROR:tests/libqtest.c:218:qtest_init_without_qmp_handshake: assertion
>> failed: (s->fd >= 0 && s->qmp_fd >= 0)
>> GTester: last random seed: R02Sa67ba85090274c01deb9c2d15b044c10
>>
>> It isn't 100% reproducible though.
> It may be something like commit 8f6d701044bc87197aac8aa2a627d70ce8471726.
>
> In the meanwhile we should revert it.
>
> Paolo

I just have reverted my 2 commits and

after that make check -j32 hangs

with

GTester: last random seed: R02Sb95a3bf6ab4c05540cec188081a7cc2a

in vhost-user-test

so it is not my fault
Thomas Huth Feb. 12, 2018, 7:30 p.m. UTC | #9
On 12.02.2018 15:42, klim wrote:
[...]
> I just have reverted my 2 commits and
> 
> after that make check -j32 hangs
> 
> with
> 
> GTester: last random seed: R02Sb95a3bf6ab4c05540cec188081a7cc2a
> 
> in vhost-user-test
> 
> so it is not my fault

You're right. I've bisected the issue again, and this time I've applied
8f6d701044b ("tests/test-filter-redirector: move close()") on top of
each step if necessary, and indeed, the "vhost-user-test" problem has
nothing to do with your patches, Klim, but with the one from Marc-André.
According to git bisect, this is the commit that introduced the problem:

	commit 7e49f5e8e508ed020c96798b3f7083e24e0e425b
	tests: use memfd in vhost-user-test

Peter, since this is quite annoying if "make check" is not working
reliably, could you please revert that commit until a proper solution is
found?

 Thanks,
  Thomas
Paolo Bonzini Feb. 12, 2018, 10:14 p.m. UTC | #10
On 12/02/2018 20:30, Thomas Huth wrote:
> On 12.02.2018 15:42, klim wrote:
> [...]
>> I just have reverted my 2 commits and
>>
>> after that make check -j32 hangs
>>
>> with
>>
>> GTester: last random seed: R02Sb95a3bf6ab4c05540cec188081a7cc2a
>>
>> in vhost-user-test
>>
>> so it is not my fault
> 
> You're right. I've bisected the issue again, and this time I've applied
> 8f6d701044b ("tests/test-filter-redirector: move close()") on top of
> each step if necessary, and indeed, the "vhost-user-test" problem has
> nothing to do with your patches, Klim, but with the one from Marc-André.
> According to git bisect, this is the commit that introduced the problem:
> 
> 	commit 7e49f5e8e508ed020c96798b3f7083e24e0e425b
> 	tests: use memfd in vhost-user-test
> 
> Peter, since this is quite annoying if "make check" is not working
> reliably, could you please revert that commit until a proper solution is
> found?

I was about to send one when Klim answered.  I'll send it tomorrow, with
Marc-André's patch reverted.

Paolo
Peter Maydell Feb. 13, 2018, 11:21 a.m. UTC | #11
On 12 February 2018 at 19:30, Thomas Huth <thuth@redhat.com> wrote:
> On 12.02.2018 15:42, klim wrote:
> [...]
>> I just have reverted my 2 commits and
>>
>> after that make check -j32 hangs
>>
>> with
>>
>> GTester: last random seed: R02Sb95a3bf6ab4c05540cec188081a7cc2a
>>
>> in vhost-user-test
>>
>> so it is not my fault
>
> You're right. I've bisected the issue again, and this time I've applied
> 8f6d701044b ("tests/test-filter-redirector: move close()") on top of
> each step if necessary, and indeed, the "vhost-user-test" problem has
> nothing to do with your patches, Klim, but with the one from Marc-André.
> According to git bisect, this is the commit that introduced the problem:
>
>         commit 7e49f5e8e508ed020c96798b3f7083e24e0e425b
>         tests: use memfd in vhost-user-test
>
> Peter, since this is quite annoying if "make check" is not working
> reliably, could you please revert that commit until a proper solution is
> found?

Reverted in master -- thanks for the bisect.

-- PMM
diff mbox series

Patch

diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 77cdf487eb..a340af6cd3 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -42,6 +42,7 @@  typedef struct {
     QIOChannel *ioc; /* Client I/O channel */
     QIOChannelSocket *sioc; /* Client master channel */
     QIONetListener *listener;
+    GSource *hup_source;
     QCryptoTLSCreds *tls_creds;
     int connected;
     int max_size;
@@ -352,6 +353,12 @@  static void tcp_chr_free_connection(Chardev *chr)
         s->read_msgfds_num = 0;
     }
 
+    if (s->hup_source != NULL) {
+        g_source_destroy(s->hup_source);
+        g_source_unref(s->hup_source);
+        s->hup_source = NULL;
+    }
+
     tcp_set_msgfds(chr, NULL, 0);
     remove_fd_in_watch(chr);
     object_unref(OBJECT(s->sioc));
@@ -455,6 +462,15 @@  static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
     return TRUE;
 }
 
+static gboolean tcp_chr_hup(QIOChannel *channel,
+                               GIOCondition cond,
+                               void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    tcp_chr_disconnect(chr);
+    return G_SOURCE_REMOVE;
+}
+
 static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len)
 {
     SocketChardev *s = SOCKET_CHARDEV(chr);
@@ -528,6 +544,12 @@  static void tcp_chr_connect(void *opaque)
                                            tcp_chr_read,
                                            chr, chr->gcontext);
     }
+
+    s->hup_source = qio_channel_create_watch(s->ioc, G_IO_HUP);
+    g_source_set_callback(s->hup_source, (GSourceFunc)tcp_chr_hup,
+                          chr, NULL);
+    g_source_attach(s->hup_source, chr->gcontext);
+
     qemu_chr_be_event(chr, CHR_EVENT_OPENED);
 }