diff mbox

[PULL,v2,for-2.7,02/15] qapi: change QmpInputVisitor to QSLIST

Message ID 1468953718-27661-3-git-send-email-armbru@redhat.com
State New
Headers show

Commit Message

Markus Armbruster July 19, 2016, 6:41 p.m. UTC
From: Paolo Bonzini <pbonzini@redhat.com>

This saves a lot of memory compared to a statically-sized array,
or at least 24kb could be considered a lot on an Atari ST.
It also makes the code more similar to QmpOutputVisitor.

This removes the limit on the depth of a QObject that can be processed
into a QAPI tree.  This is not a problem because QObjects can be
considered trusted; the text received on the QMP wire is untrusted
input, but the JSON parser already takes pains to limit the QObject tree
it creates.  We don't need the QMP input visitor to limit it again.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1467906798-5312-3-git-send-email-pbonzini@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
[Commit message typo fixed]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
---
 qapi/qmp-input-visitor.c | 59 ++++++++++++++++++++++++------------------------
 1 file changed, 29 insertions(+), 30 deletions(-)

Comments

Laszlo Ersek July 27, 2016, 9:37 p.m. UTC | #1
On 07/19/16 20:41, Markus Armbruster wrote:
> From: Paolo Bonzini <pbonzini@redhat.com>
>
> This saves a lot of memory compared to a statically-sized array,
> or at least 24kb could be considered a lot on an Atari ST.
> It also makes the code more similar to QmpOutputVisitor.
>
> This removes the limit on the depth of a QObject that can be processed
> into a QAPI tree.  This is not a problem because QObjects can be
> considered trusted; the text received on the QMP wire is untrusted
> input, but the JSON parser already takes pains to limit the QObject tree
> it creates.  We don't need the QMP input visitor to limit it again.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Message-Id: <1467906798-5312-3-git-send-email-pbonzini@redhat.com>
> Reviewed-by: Markus Armbruster <armbru@redhat.com>
> Reviewed-by: Eric Blake <eblake@redhat.com>
> [Commit message typo fixed]
> Signed-off-by: Markus Armbruster <armbru@redhat.com>
> ---
>  qapi/qmp-input-visitor.c | 59 ++++++++++++++++++++++++------------------------
>  1 file changed, 29 insertions(+), 30 deletions(-)

This patch breaks VM shutdown for me, with libvirt. The symptom is that
I issue the "poweroff" command in the Fedora guest, the shutdown
proceeds normally, and at the very end, the QEMU process hangs, and
virt-manager seems to hang too, with the VM state stuck in "Shutting
Down".

NB: VM shutdown is not generally broken for me by this patch. Most of my
other VMs don't care, but this one specific VM, it is very consistent.

* First, some version numbers:
- host kernel: 3.10.0-464.el7.x86_64
- libvirt: 2.0.0-2.el7.x86_64
- virt manager: 1.4.0-1.el7.noarch

* configure command line:

  ./configure \
    --audio-drv-list=alsa \
    --target-list=x86_64-softmmu,aarch64-softmmu,i386-softmmu,arm-softmmu \
    --disable-vde \
    --enable-spice \
    --prefix=/opt/qemu-installed \
    --disable-gtk \
    --enable-debug \
    --enable-trace-backends=log

* QEMU command line:

  /opt/qemu-installed/bin/qemu-system-x86_64 \
    -name guest=ovmf.fedora.q35,debug-threads=on \
    -S \
    -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-31-ovmf.fedora.q35/master-key.aes \
    -machine pc-q35-2.5,accel=kvm,usb=off \
    -cpu Haswell-noTSX,+vmx \
    -drive file=/home/virt-images/OVMF_CODE.3264.fd,if=pflash,format=raw,unit=0,readonly=on \
    -drive file=/var/lib/libvirt/qemu/nvram/ovmf.fedora.q35_VARS.fd,if=pflash,format=raw,unit=1 \
    -m 4096 \
    -realtime mlock=off \
    -smp 4,sockets=1,cores=2,threads=2 \
    -uuid a51c0e4c-93b1-4485-811e-ea9727eb748c \
    -no-user-config \
    -nodefaults \
    -chardev socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-31-ovmf.fedora.q35/monitor.sock,server,nowait \
    -mon chardev=charmonitor,id=monitor,mode=control \
    -rtc base=utc \
    -no-shutdown \
    -global ICH9-LPC.disable_s3=0 \
    -global ICH9-LPC.disable_s4=1 \
    -boot menu=on,splash-time=1000,strict=on \
    -device i82801b11-bridge,id=pci.1,bus=pcie.0,addr=0x1e \
    -device pci-bridge,chassis_nr=2,id=pci.2,bus=pci.1,addr=0x1 \
    -device ich9-usb-ehci1,id=usb,bus=pci.2,addr=0x2.0x7 \
    -device ich9-usb-uhci1,masterbus=usb.0,firstport=0,bus=pci.2,multifunction=on,addr=0x2 \
    -device ich9-usb-uhci2,masterbus=usb.0,firstport=2,bus=pci.2,addr=0x2.0x1 \
    -device ich9-usb-uhci3,masterbus=usb.0,firstport=4,bus=pci.2,addr=0x2.0x2 \
    -device virtio-scsi-pci,id=scsi0,bus=pci.2,addr=0x5 \
    -device virtio-serial-pci,id=virtio-serial0,bus=pci.2,addr=0x6 \
    -drive file=/mnt/data/virt-images-big/ovmf.fedora.q35.zimg,format=qcow2,if=none,id=drive-virtio-disk0,cache=writeback \
    -device virtio-blk-pci,scsi=off,bus=pci.2,addr=0x3,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=2 \
    -drive file=/mnt/data/isos/iso-bits/bits-2073.iso,format=raw,if=none,media=cdrom,id=drive-sata0-0-0,readonly=on,cache=writeback \
    -device ide-cd,bus=ide.0,drive=drive-sata0-0-0,id=sata0-0-0 \
    -drive file=/usr/share/OVMF/UefiShell.iso,format=raw,if=none,id=drive-scsi0-0-0-1,readonly=on,cache=writeback \
    -device scsi-cd,bus=scsi0.0,channel=0,scsi-id=0,lun=1,drive=drive-scsi0-0-0-1,id=scsi0-0-0-1,bootindex=1 \
    -netdev tap,fd=26,id=hostnet0,vhost=on,vhostfd=28 \
    -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:29:80:ae,bus=pci.2,addr=0x1,rombar=1,romfile=/usr/share/ipxe/1af41000.rom \
    -chardev pty,id=charserial0 \
    -device isa-serial,chardev=charserial0,id=serial0 \
    -chardev socket,id=charchannel0,path=/var/lib/libvirt/qemu/channel/target/domain-31-ovmf.fedora.q35/org.qemu.guest_agent.0,server,nowait \
    -device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,id=channel0,name=org.qemu.guest_agent.0 \
    -chardev spicevmc,id=charchannel1,name=vdagent \
    -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel1,id=channel1,name=com.redhat.spice.0 \
    -device usb-tablet,id=input0 \
    -spice port=5900,addr=127.0.0.1,disable-ticketing,streaming-video=off,seamless-migration=on \
    -device qxl-vga,id=video0,ram_size=67108864,vram_size=67108864,vram64_size_mb=0,vgamem_mb=16,bus=pcie.0,addr=0x1 \
    -device intel-hda,id=sound0,bus=pci.2,addr=0x7 \
    -device hda-duplex,id=sound0-codec0,bus=sound0.0,cad=0 \
    -device virtio-balloon-pci,id=balloon0,bus=pci.2,addr=0x4 \
    -global isa-debugcon.iobase=0x402 \
    -debugcon file:/tmp/ovmf.fedora.q35.log \
    -global driver=cfi.pflash01,property=secure,value=on \
    -fw_cfg name=opt/ovmf/PcdResizeXterm,string=y \
    -msg timestamp=on

* Bisection log:

> git bisect start
> # bad: [51313fe4f4afea65970a20b32ed2d35b44fbf993] Merge remote-tracking branch 'remotes/stefanha/tags/CVE-2016-5403-virtio-unbounded-allocation-pull-request' into staging
> git bisect bad 51313fe4f4afea65970a20b32ed2d35b44fbf993
> # bad: [e0ce97f896ac752b734c22a6d0d1b3e1739d1158] Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
> git bisect bad e0ce97f896ac752b734c22a6d0d1b3e1739d1158
> # good: [6b3532b20b787cbd697a68b383232f5c3b39bd1e] Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20160603-1' into staging
> git bisect good 6b3532b20b787cbd697a68b383232f5c3b39bd1e
> # skip: [3638439d541835f20fb76346f14549800046af76] MC146818 RTC: add GPIO access to output IRQ
> git bisect skip 3638439d541835f20fb76346f14549800046af76
> # skip: [52d4c8ee93cc599fecf817d403f125f4c7c036c6] linux-user: Update preprocessor constants for Mips-specific e_flags bits
> git bisect skip 52d4c8ee93cc599fecf817d403f125f4c7c036c6
> # good: [e7ed11f083015bf34a121cfff31540cf9c2bae23] crypto: remove temp files on completion of secrets test
> git bisect good e7ed11f083015bf34a121cfff31540cf9c2bae23
> # good: [dde35bc966ef8c1afb4f4e0f3c0e99fc0f27ca04] spapr: fix write-past-end-of-array error in cpu core device init code
> git bisect good dde35bc966ef8c1afb4f4e0f3c0e99fc0f27ca04
> # skip: [f1ef55786691a1bf79db0b74ba1e5347a0d38c1b] Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20160711' into staging
> git bisect skip f1ef55786691a1bf79db0b74ba1e5347a0d38c1b
> # good: [720ff280e73ebdc6751a9c5ae741fdcdd092a5a0] block: Convert bdrv_pwrite_zeroes() to BdrvChild
> git bisect good 720ff280e73ebdc6751a9c5ae741fdcdd092a5a0
> # good: [0b8b8753e4d94901627b3e86431230f2319215c4] coroutine: move entry argument to qemu_coroutine_create
> git bisect good 0b8b8753e4d94901627b3e86431230f2319215c4
> # skip: [4815185902971c41fcdd700fa1fc3e1d9299900f] trace: Add per-vCPU tracing states for events with the 'vcpu' property
> git bisect skip 4815185902971c41fcdd700fa1fc3e1d9299900f
> # good: [4fb8320a2efb2216c7ddcc929ad0362f4e285681] avx2 configure: Use primitives in test
> git bisect good 4fb8320a2efb2216c7ddcc929ad0362f4e285681
> # good: [338404d061144956b76f9893ca3434d057dff2d4] Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20160719' into staging
> git bisect good 338404d061144956b76f9893ca3434d057dff2d4
> # bad: [0e55c381f69f302d09ab1e18f3c1156cca56f4a6] net: Use correct type for bool flag
> git bisect bad 0e55c381f69f302d09ab1e18f3c1156cca56f4a6
> # bad: [b6167706829c6e0d3572daa2b6769594ced276f7] qapi: Add type.is_empty() helper
> git bisect bad b6167706829c6e0d3572daa2b6769594ced276f7
> # bad: [cebea510579ed43724156cc596a8ff14ba208740] net: use Netdev instead of NetClientOptions in client init
> git bisect bad cebea510579ed43724156cc596a8ff14ba208740
> # bad: [3d344c2aabb7bc9b414321e3c52872901edebdda] qapi: change QmpInputVisitor to QSLIST
> git bisect bad 3d344c2aabb7bc9b414321e3c52872901edebdda
> # good: [fc76ae8b38783e82c109834573ba5d6f080440b5] qapi: change QmpOutputVisitor to QSLIST
> git bisect good fc76ae8b38783e82c109834573ba5d6f080440b5
> # first bad commit: [3d344c2aabb7bc9b414321e3c52872901edebdda] qapi: change QmpInputVisitor to QSLIST

I repeatedly tested this, and the parent commit of 3d344c2aabb7 works.

The bug reproduces at current master (v2.7.0-rc0-21-g51313fe4f4af) --
that's where I noticed it (see it at the top of the bisection log). If I
revert 3d344c2aabb7 on top of master, then the issue is *not* fixed; it
persists.

This tells me that 3d344c2aabb7 may not be the real culprit, it just
exposes a bug elsewhere, perhaps.

* Nonetheless, because 3d344c2aabb7 is QAPI related, I looked at my
libvirt log. (I constantly capture all QMP and QGA traffic, as a
principle.) Here's the snippet that is logged by libvirtd right after I
enter the "poweroff" command in the guest. First, the working case
(i.e., when QEMU is built at the parent of 3d344c2aabb7):

> 2016-07-27 20:19:28.188+0000: 1576: debug : qemuMonitorJSONIOProcessLine:191 : Line [{"timestamp": {"seconds": 1469650768, "microseconds": 188028}, "event": "VSERPORT_CHANGE", "data": {"open": false, "id": "channel0"}}]
> 2016-07-27 20:19:28.188+0000: 1576: info : qemuMonitorJSONIOProcessLine:206 : QEMU_MONITOR_RECV_EVENT: mon=0x7fc17c2b9b50 event={"timestamp": {"seconds": 1469650768, "microseconds": 188028}, "event": "VSERPORT_CHANGE", "data": {"open": false, "id": "channel0"}}
> 2016-07-27 20:19:28.188+0000: 1576: debug : qemuMonitorJSONIOProcessEvent:152 : mon=0x7fc17c2b9b50 obj=0x7fc19b7364e0
> 2016-07-27 20:19:28.188+0000: 1576: debug : qemuMonitorJSONIOProcessEvent:177 : handle VSERPORT_CHANGE handler=0x7fc186260a80 data=0x7fc19b740200
> 2016-07-27 20:19:28.188+0000: 1576: debug : qemuMonitorJSONIOProcess:260 : Total used 135 bytes out of 135 available in buffer
> 2016-07-27 20:19:28.188+0000: 1588: debug : qemuAgentNotifyClose:817 : mon=0x7fc158000c50
> 2016-07-27 20:19:28.188+0000: 1588: debug : qemuAgentClose:830 : mon=0x7fc158000c50
> 2016-07-27 20:19:28.188+0000: 1576: debug : qemuAgentDispose:163 : mon=0x7fc158000c50
> 2016-07-27 20:19:28.922+0000: 1576: debug : qemuMonitorJSONIOProcessLine:191 : Line [{"timestamp": {"seconds": 1469650768, "microseconds": 922231}, "event": "SHUTDOWN"}]
> 2016-07-27 20:19:28.922+0000: 1576: info : qemuMonitorJSONIOProcessLine:206 : QEMU_MONITOR_RECV_EVENT: mon=0x7fc17c2b9b50 event={"timestamp": {"seconds": 1469650768, "microseconds": 922231}, "event": "SHUTDOWN"}
> 2016-07-27 20:19:28.922+0000: 1576: debug : qemuMonitorJSONIOProcessEvent:152 : mon=0x7fc17c2b9b50 obj=0x7fc19b736ad0
> 2016-07-27 20:19:28.922+0000: 1576: debug : qemuMonitorJSONIOProcessEvent:177 : handle SHUTDOWN handler=0x7fc186260bc0 data=(nil)
> 2016-07-27 20:19:28.922+0000: 1576: debug : qemuMonitorJSONIOProcess:260 : Total used 85 bytes out of 85 available in buffer
> 2016-07-27 20:19:28.926+0000: 1576: debug : qemuMonitorJSONIOProcessLine:191 : Line [{"timestamp": {"seconds": 1469650768, "microseconds": 926025}, "event": "STOP"}]
> 2016-07-27 20:19:28.926+0000: 1576: info : qemuMonitorJSONIOProcessLine:206 : QEMU_MONITOR_RECV_EVENT: mon=0x7fc17c2b9b50 event={"timestamp": {"seconds": 1469650768, "microseconds": 926025}, "event": "STOP"}
> 2016-07-27 20:19:28.926+0000: 1576: debug : qemuMonitorJSONIOProcessEvent:152 : mon=0x7fc17c2b9b50 obj=0x7fc19b761b90
> 2016-07-27 20:19:28.926+0000: 1576: debug : qemuMonitorJSONIOProcessEvent:177 : handle STOP handler=0x7fc186260ba0 data=(nil)
> 2016-07-27 20:19:28.926+0000: 1576: debug : qemuMonitorJSONIOProcess:260 : Total used 81 bytes out of 81 available in buffer
> 2016-07-27 20:19:28.927+0000: 1576: debug : qemuMonitorJSONIOProcessLine:191 : Line [{"timestamp": {"seconds": 1469650768, "microseconds": 927492}, "event": "SHUTDOWN"}]
> 2016-07-27 20:19:28.927+0000: 1576: info : qemuMonitorJSONIOProcessLine:206 : QEMU_MONITOR_RECV_EVENT: mon=0x7fc17c2b9b50 event={"timestamp": {"seconds": 1469650768, "microseconds": 927492}, "event": "SHUTDOWN"}
> 2016-07-27 20:19:28.927+0000: 1576: debug : qemuMonitorJSONIOProcessEvent:152 : mon=0x7fc17c2b9b50 obj=0x7fc19b736ef0
> 2016-07-27 20:19:28.927+0000: 1576: debug : qemuMonitorJSONIOProcessEvent:177 : handle SHUTDOWN handler=0x7fc186260bc0 data=(nil)
> 2016-07-27 20:19:28.927+0000: 1576: debug : qemuMonitorJSONIOProcess:260 : Total used 85 bytes out of 85 available in buffer
> 2016-07-27 20:19:28.965+0000: 1576: error : qemuMonitorIO:692 : internal error: End of file from monitor
> 2016-07-27 20:19:29.170+0000: 1588: warning : virSecuritySELinuxRestoreFileLabel:1053 : cannot lookup default selinux label for /mnt/data/virt-images-big/ovmf.fedora.q35.zimg

Second, the same snippet for the broken case (i.e., when QEMU is built
at 3d344c2aabb7):

> 2016-07-27 20:20:30.390+0000: 1576: debug : qemuMonitorJSONIOProcessLine:191 : Line [{"timestamp": {"seconds": 1469650830, "microseconds": 390652}, "event": "VSERPORT_CHANGE", "data": {"open": false, "id": "channel0"}}]
> 2016-07-27 20:20:30.390+0000: 1576: info : qemuMonitorJSONIOProcessLine:206 : QEMU_MONITOR_RECV_EVENT: mon=0x7fc160012210 event={"timestamp": {"seconds": 1469650830, "microseconds": 390652}, "event": "VSERPORT_CHANGE", "data": {"open": false, "id": "channel0"}}
> 2016-07-27 20:20:30.390+0000: 1576: debug : qemuMonitorJSONIOProcessEvent:152 : mon=0x7fc160012210 obj=0x7fc19b7609e0
> 2016-07-27 20:20:30.390+0000: 1576: debug : qemuMonitorJSONIOProcessEvent:177 : handle VSERPORT_CHANGE handler=0x7fc186260a80 data=0x7fc19b739a60
> 2016-07-27 20:20:30.390+0000: 1576: debug : qemuMonitorJSONIOProcess:260 : Total used 135 bytes out of 135 available in buffer
> 2016-07-27 20:20:30.390+0000: 1588: debug : qemuAgentNotifyClose:817 : mon=0x7fc158000c50
> 2016-07-27 20:20:30.391+0000: 1588: debug : qemuAgentClose:830 : mon=0x7fc158000c50
> 2016-07-27 20:20:30.391+0000: 1576: debug : qemuAgentDispose:163 : mon=0x7fc158000c50
> 2016-07-27 20:20:31.169+0000: 1576: debug : qemuMonitorJSONIOProcessLine:191 : Line [{"timestamp": {"seconds": 1469650831, "microseconds": 169050}, "event": "SHUTDOWN"}]
> 2016-07-27 20:20:31.169+0000: 1576: info : qemuMonitorJSONIOProcessLine:206 : QEMU_MONITOR_RECV_EVENT: mon=0x7fc160012210 event={"timestamp": {"seconds": 1469650831, "microseconds": 169050}, "event": "SHUTDOWN"}
> 2016-07-27 20:20:31.169+0000: 1576: debug : qemuMonitorJSONIOProcessEvent:152 : mon=0x7fc160012210 obj=0x7fc19b734f90
> 2016-07-27 20:20:31.169+0000: 1576: debug : qemuMonitorJSONIOProcessEvent:177 : handle SHUTDOWN handler=0x7fc186260bc0 data=(nil)
> 2016-07-27 20:20:31.169+0000: 1576: debug : qemuMonitorJSONIOProcess:260 : Total used 85 bytes out of 85 available in buffer
> 2016-07-27 20:20:31.172+0000: 1576: debug : qemuMonitorJSONIOProcessLine:191 : Line [{"timestamp": {"seconds": 1469650831, "microseconds": 172041}, "event": "STOP"}]
> 2016-07-27 20:20:31.172+0000: 1576: info : qemuMonitorJSONIOProcessLine:206 : QEMU_MONITOR_RECV_EVENT: mon=0x7fc160012210 event={"timestamp": {"seconds": 1469650831, "microseconds": 172041}, "event": "STOP"}
> 2016-07-27 20:20:31.172+0000: 1576: debug : qemuMonitorJSONIOProcessEvent:152 : mon=0x7fc160012210 obj=0x7fc19b761b90
> 2016-07-27 20:20:31.172+0000: 1576: debug : qemuMonitorJSONIOProcessEvent:177 : handle STOP handler=0x7fc186260ba0 data=(nil)
> 2016-07-27 20:20:31.172+0000: 1576: debug : qemuMonitorJSONIOProcess:260 : Total used 81 bytes out of 81 available in buffer
> 2016-07-27 20:20:31.174+0000: 1576: debug : qemuMonitorJSONIOProcessLine:191 : Line [{"timestamp": {"seconds": 1469650831, "microseconds": 174169}, "event": "SHUTDOWN"}]
> 2016-07-27 20:20:31.174+0000: 1576: info : qemuMonitorJSONIOProcessLine:206 : QEMU_MONITOR_RECV_EVENT: mon=0x7fc160012210 event={"timestamp": {"seconds": 1469650831, "microseconds": 174169}, "event": "SHUTDOWN"}
> 2016-07-27 20:20:31.174+0000: 1576: debug : qemuMonitorJSONIOProcessEvent:152 : mon=0x7fc160012210 obj=0x7fc19b73f5a0
> 2016-07-27 20:20:31.174+0000: 1576: debug : qemuMonitorJSONIOProcessEvent:177 : handle SHUTDOWN handler=0x7fc186260bc0 data=(nil)
> 2016-07-27 20:20:31.174+0000: 1576: debug : qemuMonitorJSONIOProcess:260 : Total used 85 bytes out of 85 available in buffer

This is where things hang. When I send a SIGKILL to the QEMU process, it
dies (of course) and then libvirtd logs:

> 2016-07-27 20:20:46.005+0000: 1576: error : qemuMonitorIO:692 : internal error: End of file from monitor

* Here's a stack dump from QEMU when it is hung:

> (gdb) thread apply all bt full
>
> Thread 7 (Thread 0x7f935812f700 (LWP 20802)):
> #0  0x00007f9360423f4d in __lll_lock_wait () from /lib64/libpthread.so.0
> No symbol table info available.
> #1  0x00007f936041fd02 in _L_lock_791 () from /lib64/libpthread.so.0
> No symbol table info available.
> #2  0x00007f936041fc08 in pthread_mutex_lock () from /lib64/libpthread.so.0
> No symbol table info available.
> #3  0x00007f9363b9c2cd in qemu_mutex_lock (mutex=0x7f93641d9c00 <qemu_global_mutex>) at util/qemu-thread-posix.c:64
>         err = 0
>         __func__ = "qemu_mutex_lock"
> #4  0x00007f936377a6a3 in qemu_mutex_lock_iothread () at /home/lacos/src/upstream/qemu/cpus.c:1271
> No locals.
> #5  0x00007f9363bb0fc3 in call_rcu_thread (opaque=0x0) at util/rcu.c:258
>         tries = 1
>         n = 52
>         node = 0x7f933c4e8dc0
> #6  0x00007f936041ddc5 in start_thread () from /lib64/libpthread.so.0
> No symbol table info available.
> #7  0x00007f935d9c7ced in clone () from /lib64/libc.so.6
> No symbol table info available.
>
> Thread 6 (Thread 0x7f9355518700 (LWP 20809)):
> #0  0x00007f9360423f4d in __lll_lock_wait () from /lib64/libpthread.so.0
> No symbol table info available.
> #1  0x00007f93604264bc in _L_cond_lock_792 () from /lib64/libpthread.so.0
> No symbol table info available.
> #2  0x00007f9360426398 in __pthread_mutex_cond_lock () from /lib64/libpthread.so.0
> No symbol table info available.
> #3  0x00007f9360421765 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
> No symbol table info available.
> #4  0x00007f9363b9c42f in qemu_cond_wait (cond=0x7f9365f8bfa0, mutex=0x7f93641d9c00 <qemu_global_mutex>) at util/qemu-thread-posix.c:123
>         err = 32659
>         __func__ = "qemu_cond_wait"
> #5  0x00007f9363779fed in qemu_kvm_wait_io_event (cpu=0x7f9365f65c70) at /home/lacos/src/upstream/qemu/cpus.c:1044
> No locals.
> #6  0x00007f936377a123 in qemu_kvm_cpu_thread_fn (arg=0x7f9365f65c70) at /home/lacos/src/upstream/qemu/cpus.c:1083
>         cpu = 0x7f9365f65c70
>         r = 65536
> #7  0x00007f936041ddc5 in start_thread () from /lib64/libpthread.so.0
> No symbol table info available.
> #8  0x00007f935d9c7ced in clone () from /lib64/libc.so.6
> No symbol table info available.
>
> Thread 5 (Thread 0x7f9354d17700 (LWP 20810)):
> #0  0x00007f9360423f4d in __lll_lock_wait () from /lib64/libpthread.so.0
> No symbol table info available.
> #1  0x00007f93604264bc in _L_cond_lock_792 () from /lib64/libpthread.so.0
> No symbol table info available.
> #2  0x00007f9360426398 in __pthread_mutex_cond_lock () from /lib64/libpthread.so.0
> No symbol table info available.
> #3  0x00007f9360421765 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
> No symbol table info available.
> #4  0x00007f9363b9c42f in qemu_cond_wait (cond=0x7f9365fe7a70, mutex=0x7f93641d9c00 <qemu_global_mutex>) at util/qemu-thread-posix.c:123
>         err = 32659
>         __func__ = "qemu_cond_wait"
> #5  0x00007f9363779fed in qemu_kvm_wait_io_event (cpu=0x7f9365fcb400) at /home/lacos/src/upstream/qemu/cpus.c:1044
> No locals.
> #6  0x00007f936377a123 in qemu_kvm_cpu_thread_fn (arg=0x7f9365fcb400) at /home/lacos/src/upstream/qemu/cpus.c:1083
>         cpu = 0x7f9365fcb400
>         r = 65536
> #7  0x00007f936041ddc5 in start_thread () from /lib64/libpthread.so.0
> No symbol table info available.
> #8  0x00007f935d9c7ced in clone () from /lib64/libc.so.6
> No symbol table info available.
>
> Thread 4 (Thread 0x7f934ffff700 (LWP 20811)):
> #0  0x00007f9360423f4d in __lll_lock_wait () from /lib64/libpthread.so.0
> No symbol table info available.
> #1  0x00007f93604264bc in _L_cond_lock_792 () from /lib64/libpthread.so.0
> No symbol table info available.
> #2  0x00007f9360426398 in __pthread_mutex_cond_lock () from /lib64/libpthread.so.0
> No symbol table info available.
> #3  0x00007f9360421765 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
> No symbol table info available.
> #4  0x00007f9363b9c42f in qemu_cond_wait (cond=0x7f93660051d0, mutex=0x7f93641d9c00 <qemu_global_mutex>) at util/qemu-thread-posix.c:123
>         err = 32659
>         __func__ = "qemu_cond_wait"
> #5  0x00007f9363779fed in qemu_kvm_wait_io_event (cpu=0x7f9365fe8dc0) at /home/lacos/src/upstream/qemu/cpus.c:1044
> No locals.
> #6  0x00007f936377a123 in qemu_kvm_cpu_thread_fn (arg=0x7f9365fe8dc0) at /home/lacos/src/upstream/qemu/cpus.c:1083
>         cpu = 0x7f9365fe8dc0
>         r = 65536
> #7  0x00007f936041ddc5 in start_thread () from /lib64/libpthread.so.0
> No symbol table info available.
> #8  0x00007f935d9c7ced in clone () from /lib64/libc.so.6
> No symbol table info available.
>
> Thread 3 (Thread 0x7f934f7fe700 (LWP 20812)):
> #0  0x00007f9360423f4d in __lll_lock_wait () from /lib64/libpthread.so.0
> No symbol table info available.
> #1  0x00007f93604264bc in _L_cond_lock_792 () from /lib64/libpthread.so.0
> No symbol table info available.
> #2  0x00007f9360426398 in __pthread_mutex_cond_lock () from /lib64/libpthread.so.0
> No symbol table info available.
> #3  0x00007f9360421765 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
> No symbol table info available.
> #4  0x00007f9363b9c42f in qemu_cond_wait (cond=0x7f93660230e0, mutex=0x7f93641d9c00 <qemu_global_mutex>) at util/qemu-thread-posix.c:123
>         err = 32659
>         __func__ = "qemu_cond_wait"
> #5  0x00007f9363779fed in qemu_kvm_wait_io_event (cpu=0x7f9366006520) at /home/lacos/src/upstream/qemu/cpus.c:1044
> No locals.
> #6  0x00007f936377a123 in qemu_kvm_cpu_thread_fn (arg=0x7f9366006520) at /home/lacos/src/upstream/qemu/cpus.c:1083
>         cpu = 0x7f9366006520
>         r = 65536
> #7  0x00007f936041ddc5 in start_thread () from /lib64/libpthread.so.0
> No symbol table info available.
> #8  0x00007f935d9c7ced in clone () from /lib64/libc.so.6
> No symbol table info available.
>
> Thread 2 (Thread 0x7f934e5ff700 (LWP 20826)):
> #0  0x00007f935d9bd69d in poll () from /lib64/libc.so.6
> No symbol table info available.
> #1  0x00007f935f2671c7 in red_worker_main () from /lib64/libspice-server.so.1
> No symbol table info available.
> #2  0x00007f936041ddc5 in start_thread () from /lib64/libpthread.so.0
> No symbol table info available.
> #3  0x00007f935d9c7ced in clone () from /lib64/libc.so.6
> No symbol table info available.
>
> Thread 1 (Thread 0x7f93634f0c00 (LWP 20784)):
> #0  0x00007f9360423f4d in __lll_lock_wait () from /lib64/libpthread.so.0
> No symbol table info available.
> #1  0x00007f936041fd02 in _L_lock_791 () from /lib64/libpthread.so.0
> No symbol table info available.
> #2  0x00007f936041fc08 in pthread_mutex_lock () from /lib64/libpthread.so.0
> No symbol table info available.
> #3  0x00007f9363b9c2cd in qemu_mutex_lock (mutex=0x7f9365ed1a50) at util/qemu-thread-posix.c:64
>         err = 32659
>         __func__ = "qemu_mutex_lock"
> #4  0x00007f936389324d in qemu_chr_fe_write (s=0x7f9365ed1a50, buf=0x7f9365f50530 "{\"timestamp\": {\"seconds\": 1469650920, \"microseconds\": 469371}, \"event\": \"SPICE_DISCONNECTED\", \"data\": {\"server\": {\"port\": \"\", \"family\": \"unix\", \"host\": \"localhost\"}, \"client\": {\"port\": \"\", \"family\": \""..., len=231) at qemu-char.c:280
>         ret = 32659
>         __PRETTY_FUNCTION__ = "qemu_chr_fe_write"
> #5  0x00007f936377bf52 in monitor_flush_locked (mon=0x7f9365f50690) at /home/lacos/src/upstream/qemu/monitor.c:311
>         rc = 32659
>         len = 231
>         buf = 0x7f9365f50530 "{\"timestamp\": {\"seconds\": 1469650920, \"microseconds\": 469371}, \"event\": \"SPICE_DISCONNECTED\", \"data\": {\"server\": {\"port\": \"\", \"family\": \"unix\", \"host\": \"localhost\"}, \"client\": {\"port\": \"\", \"family\": \""...
> #6  0x00007f936377c0e4 in monitor_puts (mon=0x7f9365f50690, str=0x7f9365ed28b6 "") at /home/lacos/src/upstream/qemu/monitor.c:353
>         c = 10 '\n'
> #7  0x00007f936377c347 in monitor_json_emitter (mon=0x7f9365f50690, data=0x7f93666e2c00) at /home/lacos/src/upstream/qemu/monitor.c:401
>         json = 0x7f9365f8d4b0
>         __PRETTY_FUNCTION__ = "monitor_json_emitter"
> #8  0x00007f936377c513 in monitor_qapi_event_emit (event=QAPI_EVENT_SPICE_DISCONNECTED, qdict=0x7f93666e2c00) at /home/lacos/src/upstream/qemu/monitor.c:472
>         mon = 0x7f9365f50690
> #9  0x00007f936377c5c0 in monitor_qapi_event_queue (event=QAPI_EVENT_SPICE_DISCONNECTED, qdict=0x7f93666e2c00, errp=0x7ffe5234beb0) at /home/lacos/src/upstream/qemu/monitor.c:497
>         evconf = 0x7f9364072a88 <monitor_qapi_event_conf+200>
>         evstate = 0x7f93666e2c00
>         __PRETTY_FUNCTION__ = "monitor_qapi_event_queue"
> #10 0x00007f9363b8c28e in qapi_event_send_spice_disconnected (server=0x7f93660c64c0, client=0x7f93660c7050, errp=0x7f93646105e8 <error_abort>) at qapi-event.c:1038
>         qmp = 0x7f93666e2c00
>         err = 0x0
>         emit = 0x7f936377c52b <monitor_qapi_event_queue>
>         obj = 0x7f9365f29420
>         v = 0x7f9366112330
>         param = {server = 0x7f93660c64c0, client = 0x7f93660c7050}
> #11 0x00007f9363aac5b0 in channel_event (event=3, info=0x7f93666e3c20) at ui/spice-core.c:248
>         server = 0x7f93660c64c0
>         client = 0x7f93660c7050
>         need_lock = false
>         __func__ = "channel_event"
> #12 0x00007f935f269ed3 in reds_handle_channel_event () from /lib64/libspice-server.so.1
> No symbol table info available.
> #13 0x00007f935f24607a in main_dispatcher_channel_event () from /lib64/libspice-server.so.1
> No symbol table info available.
> #14 0x00007f935f27093e in reds_stream_free () from /lib64/libspice-server.so.1
> No symbol table info available.
> #15 0x00007f935f27a4f3 in snd_disconnect_channel () from /lib64/libspice-server.so.1
> No symbol table info available.
> #16 0x00007f935f27ac3e in snd_detach_common () from /lib64/libspice-server.so.1
> No symbol table info available.
> #17 0x00007f935f27d0bd in snd_detach_playback () from /lib64/libspice-server.so.1
> No symbol table info available.
> #18 0x00007f935f26d648 in spice_server_remove_interface () from /lib64/libspice-server.so.1
> No symbol table info available.
> #19 0x00007f93638c821d in line_out_fini (hw=0x7f93677d9220) at audio/spiceaudio.c:152
>         out = 0x7f93677d9220
> #20 0x00007f93638c3b8c in audio_atexit () at audio/audio.c:1754
>         sc = 0x0
>         s = 0x7f93641e1420 <glob_audio_state>
>         hwo = 0x7f93677d9220
>         hwi = 0x0
> #21 0x00007f935d909e69 in __run_exit_handlers () from /lib64/libc.so.6
> No symbol table info available.
> #22 0x00007f935d909eb5 in exit () from /lib64/libc.so.6
> No symbol table info available.
> #23 0x00007f935d8f2b1c in __libc_start_main () from /lib64/libc.so.6
> No symbol table info available.
> #24 0x00007f9363745619 in _start ()
> No symbol table info available.

It seems to me that QEMU deadlocks when it tries to emit the
SPICE_DISCONNECTED event.

(Note that I can't find "handle SPICE_DISCONNECTED" in the libvirtd log
even in the successful case (i.e., when QEMU is built at the parent of
3d344c2aabb7).)

Apparently audio_atexit() is triggered when QEMU is returning from
main() -- or calling exit() --, which somehow results in QEMU trying to
send a SPICE_DISCONNECTED event through the monitor? I guess the monitor
has been long dead by then.

Hmmm, this gives me an idea... What happens if I remove the following
fragment from my domain XML?

    <sound model='ich6'>
      <address type='pci' domain='0x0000' bus='0x02' slot='0x07' function='0x0'/>
    </sound>

Yeah, the hang disappears, shutdown works just fine. It's a spice audio
bug after all, apparently. Sorry for reporting it in this thread! :) I'm
adding Gerd to the address list.

To reiterate: this patch (commit 3d344c2aabb7) does *not* cause the
symptom, it only exposes an independent bug that causes the symptom.
And, I can work around that for now, by removing sound devices.

Thanks,
Laszlo
diff mbox

Patch

diff --git a/qapi/qmp-input-visitor.c b/qapi/qmp-input-visitor.c
index 21edb39..64dd392 100644
--- a/qapi/qmp-input-visitor.c
+++ b/qapi/qmp-input-visitor.c
@@ -30,6 +30,8 @@  typedef struct StackObject
 
     GHashTable *h;           /* If obj is dict: unvisited keys */
     const QListEntry *entry; /* If obj is list: unvisited tail */
+
+    QSLIST_ENTRY(StackObject) node;
 } StackObject;
 
 struct QmpInputVisitor
@@ -41,8 +43,7 @@  struct QmpInputVisitor
 
     /* Stack of objects being visited (all entries will be either
      * QDict or QList). */
-    StackObject stack[QIV_STACK_SIZE];
-    int nb_stack;
+    QSLIST_HEAD(, StackObject) stack;
 
     /* True to reject parse in visit_end_struct() if unvisited keys remain. */
     bool strict;
@@ -61,13 +62,13 @@  static QObject *qmp_input_get_object(QmpInputVisitor *qiv,
     QObject *qobj;
     QObject *ret;
 
-    if (!qiv->nb_stack) {
+    if (QSLIST_EMPTY(&qiv->stack)) {
         /* Starting at root, name is ignored. */
         return qiv->root;
     }
 
     /* We are in a container; find the next element. */
-    tos = &qiv->stack[qiv->nb_stack - 1];
+    tos = QSLIST_FIRST(&qiv->stack);
     qobj = tos->obj;
     assert(qobj);
 
@@ -100,18 +101,11 @@  static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj,
                                         void *qapi, Error **errp)
 {
     GHashTable *h;
-    StackObject *tos = &qiv->stack[qiv->nb_stack];
+    StackObject *tos = g_new0(StackObject, 1);
 
     assert(obj);
-    if (qiv->nb_stack >= QIV_STACK_SIZE) {
-        error_setg(errp, "An internal buffer overran");
-        return NULL;
-    }
-
     tos->obj = obj;
     tos->qapi = qapi;
-    assert(!tos->h);
-    assert(!tos->entry);
 
     if (qiv->strict && qobject_type(obj) == QTYPE_QDICT) {
         h = g_hash_table_new(g_str_hash, g_str_equal);
@@ -121,7 +115,7 @@  static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj,
         tos->entry = qlist_first(qobject_to_qlist(obj));
     }
 
-    qiv->nb_stack++;
+    QSLIST_INSERT_HEAD(&qiv->stack, tos, node);
     return tos->entry;
 }
 
@@ -129,10 +123,9 @@  static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj,
 static void qmp_input_check_struct(Visitor *v, Error **errp)
 {
     QmpInputVisitor *qiv = to_qiv(v);
-    StackObject *tos = &qiv->stack[qiv->nb_stack - 1];
-
-    assert(qiv->nb_stack > 0);
+    StackObject *tos = QSLIST_FIRST(&qiv->stack);
 
+    assert(tos && !tos->entry);
     if (qiv->strict) {
         GHashTable *const top_ht = tos->h;
         if (top_ht) {
@@ -147,23 +140,23 @@  static void qmp_input_check_struct(Visitor *v, Error **errp)
     }
 }
 
+static void qmp_input_stack_object_free(StackObject *tos)
+{
+    if (tos->h) {
+        g_hash_table_unref(tos->h);
+    }
+
+    g_free(tos);
+}
+
 static void qmp_input_pop(Visitor *v, void **obj)
 {
     QmpInputVisitor *qiv = to_qiv(v);
-    StackObject *tos = &qiv->stack[qiv->nb_stack - 1];
+    StackObject *tos = QSLIST_FIRST(&qiv->stack);
 
-    assert(qiv->nb_stack > 0);
-    assert(tos->qapi == obj);
-
-    if (qiv->strict) {
-        GHashTable * const top_ht = qiv->stack[qiv->nb_stack - 1].h;
-        if (top_ht) {
-            g_hash_table_unref(top_ht);
-        }
-        tos->h = NULL;
-    }
-
-    qiv->nb_stack--;
+    assert(tos && tos->qapi == obj);
+    QSLIST_REMOVE_HEAD(&qiv->stack, node);
+    qmp_input_stack_object_free(tos);
 }
 
 static void qmp_input_start_struct(Visitor *v, const char *name, void **obj,
@@ -224,7 +217,7 @@  static GenericList *qmp_input_next_list(Visitor *v, GenericList *tail,
                                         size_t size)
 {
     QmpInputVisitor *qiv = to_qiv(v);
-    StackObject *so = &qiv->stack[qiv->nb_stack - 1];
+    StackObject *so = QSLIST_FIRST(&qiv->stack);
 
     if (!so->entry) {
         return NULL;
@@ -376,6 +369,12 @@  static void qmp_input_optional(Visitor *v, const char *name, bool *present)
 static void qmp_input_free(Visitor *v)
 {
     QmpInputVisitor *qiv = to_qiv(v);
+    while (!QSLIST_EMPTY(&qiv->stack)) {
+        StackObject *tos = QSLIST_FIRST(&qiv->stack);
+
+        QSLIST_REMOVE_HEAD(&qiv->stack, node);
+        qmp_input_stack_object_free(tos);
+    }
 
     qobject_decref(qiv->root);
     g_free(qiv);