diff mbox series

[2/2] scsi-disk: Fix crash for VM configured with USB CDROM after live migration

Message ID 878c8f093f3fc2f584b5c31cb2490d9f6a12131a.1716531409.git.yong.huang@smartx.com
State New
Headers show
Series migrate inflight emulated SCSI request for the scsi disk device | expand

Commit Message

Yong Huang May 24, 2024, 6:29 a.m. UTC
For VMs configured with the USB CDROM device:

-drive file=/path/to/local/file,id=drive-usb-disk0,media=cdrom,readonly=on...
-device usb-storage,drive=drive-usb-disk0,id=usb-disk0...

QEMU process may crash after live migration, to reproduce the issue,
configure VM (Guest OS ubuntu 20.04 or 21.10) with the following XML:

<disk type='file' device='cdrom'>
  <driver name='qemu' type='raw'/>
  <source file='/path/to/share_fs/cdrom.iso'/>
  <target dev='sda' bus='usb'/>
  <readonly/>
  <address type='usb' bus='0' port='2'/>
</disk>
<controller type='usb' index='0' model='piix3-uhci'/>

Do the live migration repeatedly, crash may happen after live migratoin,
trace log at the source before live migration is as follows:

324808@1711972823.521945:usb_uhci_frame_start nr 319
324808@1711972823.521978:usb_uhci_qh_load qh 0x35cb5400
324808@1711972823.521989:usb_uhci_qh_load qh 0x35cb5480
324808@1711972823.521997:usb_uhci_td_load qh 0x35cb5480, td 0x35cbe000, ctrl 0x0, token 0xffe07f69
324808@1711972823.522010:usb_uhci_td_nextqh qh 0x35cb5480, td 0x35cbe000
324808@1711972823.522022:usb_uhci_qh_load qh 0x35cb5680
324808@1711972823.522030:usb_uhci_td_load qh 0x35cb5680, td 0x75ac5180, ctrl 0x19800000, token 0x3c903e1
324808@1711972823.522045:usb_uhci_packet_add token 0x103e1, td 0x75ac5180
324808@1711972823.522056:usb_packet_state_change bus 0, port 2, ep 2, packet 0x559f9ba14b00, state undef -> setup
324808@1711972823.522079:usb_msd_cmd_submit lun 0, tag 0x472, flags 0x00000080, len 10, data-len 8
324808@1711972823.522107:scsi_req_parsed target 0 lun 0 tag 1138 command 74 dir 1 length 8
324808@1711972823.522124:scsi_req_parsed_lba target 0 lun 0 tag 1138 command 74 lba 4096
324808@1711972823.522139:scsi_req_alloc target 0 lun 0 tag 1138
324808@1711972823.522169:scsi_req_continue target 0 lun 0 tag 1138
324808@1711972823.522181:scsi_req_data target 0 lun 0 tag 1138 len 8
324808@1711972823.522194:usb_packet_state_change bus 0, port 2, ep 2, packet 0x559f9ba14b00, state setup -> complete
324808@1711972823.522209:usb_uhci_packet_complete_success token 0x103e1, td 0x75ac5180
324808@1711972823.522219:usb_uhci_packet_del token 0x103e1, td 0x75ac5180
324808@1711972823.522232:usb_uhci_td_complete qh 0x35cb5680, td 0x75ac5180

trace log at the destination after live migration is as follows:

3286206@1711972823.951646:usb_uhci_frame_start nr 320
3286206@1711972823.951663:usb_uhci_qh_load qh 0x35cb5100
3286206@1711972823.951671:usb_uhci_qh_load qh 0x35cb5480
3286206@1711972823.951680:usb_uhci_td_load qh 0x35cb5480, td 0x35cbe000, ctrl 0x1000000, token 0xffe07f69
3286206@1711972823.951693:usb_uhci_td_nextqh qh 0x35cb5480, td 0x35cbe000
3286206@1711972823.951702:usb_uhci_qh_load qh 0x35cb5700
3286206@1711972823.951709:usb_uhci_td_load qh 0x35cb5700, td 0x75ac5240, ctrl 0x39800000, token 0xe08369
3286206@1711972823.951727:usb_uhci_queue_add token 0x8369
3286206@1711972823.951735:usb_uhci_packet_add token 0x8369, td 0x75ac5240
3286206@1711972823.951746:usb_packet_state_change bus 0, port 2, ep 1, packet 0x56066b2fb5a0, state undef -> setup
3286206@1711972823.951766:usb_msd_data_in 8/8 (scsi 8)
2024-04-01 12:00:24.665+0000: shutting down, reason=crashed

The backtrace reveals the following:

Program terminated with signal SIGSEGV, Segmentation fault.
0  __memmove_sse2_unaligned_erms () at ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:312
312        movq    -8(%rsi,%rdx), %rcx
[Current thread is 1 (Thread 0x7f0a9025fc00 (LWP 3286206))]
(gdb) bt
0  __memmove_sse2_unaligned_erms () at ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:312
1  memcpy (__len=8, __src=<optimized out>, __dest=<optimized out>) at /usr/include/bits/string_fortified.h:34
2  iov_from_buf_full (iov=<optimized out>, iov_cnt=<optimized out>, offset=<optimized out>, buf=0x0, bytes=bytes@entry=8) at ../util/iov.c:33
3  iov_from_buf (bytes=8, buf=<optimized out>, offset=<optimized out>, iov_cnt=<optimized out>, iov=<optimized out>)
   at /usr/src/debug/qemu-6-6.2.0-75.7.oe1.smartx.git.40.x86_64/include/qemu/iov.h:49
4  usb_packet_copy (p=p@entry=0x56066b2fb5a0, ptr=<optimized out>, bytes=bytes@entry=8) at ../hw/usb/core.c:636
5  usb_msd_copy_data (s=s@entry=0x56066c62c770, p=p@entry=0x56066b2fb5a0) at ../hw/usb/dev-storage.c:186
6  usb_msd_handle_data (dev=0x56066c62c770, p=0x56066b2fb5a0) at ../hw/usb/dev-storage.c:496
7  usb_handle_packet (dev=0x56066c62c770, p=p@entry=0x56066b2fb5a0) at ../hw/usb/core.c:455
8  uhci_handle_td (s=s@entry=0x56066bd5f210, q=0x56066bb7fbd0, q@entry=0x0, qh_addr=qh_addr@entry=902518530, td=td@entry=0x7fffe6e788f0, td_addr=<optimized out>,
   int_mask=int_mask@entry=0x7fffe6e788e4) at ../hw/usb/hcd-uhci.c:885
9  uhci_process_frame (s=s@entry=0x56066bd5f210) at ../hw/usb/hcd-uhci.c:1061
10 uhci_frame_timer (opaque=opaque@entry=0x56066bd5f210) at ../hw/usb/hcd-uhci.c:1159
11 timerlist_run_timers (timer_list=0x56066af26bd0) at ../util/qemu-timer.c:642
12 qemu_clock_run_timers (type=QEMU_CLOCK_VIRTUAL) at ../util/qemu-timer.c:656
13 qemu_clock_run_all_timers () at ../util/qemu-timer.c:738
14 main_loop_wait (nonblocking=nonblocking@entry=0) at ../util/main-loop.c:542
15 qemu_main_loop () at ../softmmu/runstate.c:739
16 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at ../softmmu/main.c:52
(gdb) frame 5
(gdb) p ((SCSIDiskReq *)s->req)->iov
$1 = {iov_base = 0x0, iov_len = 0}
(gdb) p/x s->req->tag
$2 = 0x472

When designing the USB mass storage device model, QEMU places SCSI disk
device as the backend of USB mass storage device. In addition, USB mass
device driver in Guest OS conforms to the "Universal Serial Bus Mass
Storage Class Bulk-Only Transport" specification in order to simulate
the transform behavior between a USB controller and a USB mass device.
The following shows the protocol hierarchy:

                      +----------------+
 CDROM driver         |  scsi command  |        CDROM
                      +----------------+

                   +-----------------------+
 USB mass          | USB Mass Storage Class|    USB mass
 storage driver    | Bulk-Only Transport   |    storage device
                   +-----------------------+

                      +----------------+
 USB Controller       |  USB Protocol  |        USB device
                      +----------------+

In the USB protocol layer, between the USB controller and USB device, at
least two USB packets will be transformed when guest OS send a
read operation to USB mass storage device:

1. The CBW packet, which will be delivered to the USB device's Bulk-Out
endpoint. In order to simulate a read operation, the USB mass storage
device parses the CBW and converts it to a SCSI command, which would be
executed by CDROM(represented as SCSI disk in QEMU internally), and store
the result data of the SCSI command in a buffer.

2. The DATA-IN packet, which will be delivered from the USB device's
Bulk-In endpoint(fetched directly from the preceding buffer) to the USB
controller.

We consider UHCI to be the controller. The two packets mentioned above may
have been processed by UHCI in two separate frame entries of the Frame List
, and also described by two different TDs. Unlike the physical environment,
a virtualized environment requires the QEMU to make sure that the result
data of CBW is not lost and is delivered to the UHCI controller.

Currently, these types of SCSI requests are not migrated, so QEMU cannot
ensure the result data of the IO operation is not lost if there are
inflight emulated SCSI requests during the live migration.

Assume for the moment that the USB mass storage device is processing the
CBW and storing the result data of the read operation to a buffre, live
migration happens and moves the VM to the destination while not migrating
the result data of the read operation.

After migration, when UHCI at the destination issues a DATA-IN request to
the USB mass storage device, a crash happens because USB mass storage device
fetches the result data and get nothing.

The scenario this patch addresses is this one.

Theoretically, any device that uses the SCSI disk as a back-end would be
affected by this issue. In this case, it is the USB CDROM.

To fix it, inflight emulated SCSI request be migrated during live migration,
similar to the DMA SCSI request.

Signed-off-by: Hyman Huang <yong.huang@smartx.com>
---
 hw/scsi/scsi-disk.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

Comments

Prasad Pandit May 24, 2024, 10:01 a.m. UTC | #1
Hello Hyman,

* Is this the same patch series as sent before..?
  -> https://lists.nongnu.org/archive/html/qemu-devel/2024-04/msg00816.html

On Fri, 24 May 2024 at 12:02, Hyman Huang <yong.huang@smartx.com> wrote:
> For VMs configured with the USB CDROM device:
>
> -drive file=/path/to/local/file,id=drive-usb-disk0,media=cdrom,readonly=on...
> -device usb-storage,drive=drive-usb-disk0,id=usb-disk0...
>
> QEMU process may crash after live migration,
> Do the live migration repeatedly, crash may happen after live migratoin,

* Does live migration work many times before QEMU crashes on the
destination side? OR QEMU crashes at the very first migration?

>    at /usr/src/debug/qemu-6-6.2.0-75.7.oe1.smartx.git.40.x86_64/include/qemu/iov.h:49

* This qemu version looks quite old. Is the issue reproducible with
the latest QEMU version 9.0?

> diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
> +static void scsi_disk_emulate_save_request(QEMUFile *f, SCSIRequest *req)
> +{
> +    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +    if (s->migrate_emulate_scsi_request) {
> +        scsi_disk_save_request(f, req);
> +    }
> +}
> +
>  static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req)
>  {
>      SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> @@ -183,6 +193,16 @@ static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req)
>      qemu_iovec_init_external(&r->qiov, &r->iov, 1);
>  }
>
> +static void scsi_disk_emulate_load_request(QEMUFile *f, SCSIRequest *req)
> +{
> +    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +    if (s->migrate_emulate_scsi_request) {
> +        scsi_disk_load_request(f, req);
> +    }
> +}
> +
>  /*
>   * scsi_handle_rw_error has two return values.  False means that the error
>   * must be ignored, true means that the error has been processed and the
> @@ -2593,6 +2613,8 @@ static const SCSIReqOps scsi_disk_emulate_reqops = {
>      .read_data    = scsi_disk_emulate_read_data,
>      .write_data   = scsi_disk_emulate_write_data,
>      .get_buf      = scsi_get_buf,
> +    .load_request = scsi_disk_emulate_load_request,
> +    .save_request = scsi_disk_emulate_save_request,
>  };
>
>  static const SCSIReqOps scsi_disk_dma_reqops = {
> @@ -3137,7 +3159,7 @@ static Property scsi_hd_properties[] = {
>  static int scsi_disk_pre_save(void *opaque)
>  {
>      SCSIDiskState *dev = opaque;
> -    dev->migrate_emulate_scsi_request = false;
> +    dev->migrate_emulate_scsi_request = true;
>

* This patch seems to add support for migrating SCSI requests. While
it looks okay, not sure if it is required, how likely is someone to
configure a VM to use CDROM?

*  Should the CDROM device be reset on the destination if no requests
are found? ie. if (scsi_req_get_buf -> scsi_get_buf() returns NULL)?

Thank you.
---
  - Prasad
Yong Huang May 24, 2024, 10:53 a.m. UTC | #2
On Fri, May 24, 2024 at 6:01 PM Prasad Pandit <ppandit@redhat.com> wrote:

> Hello Hyman,
>
> * Is this the same patch series as sent before..?
>   ->
> https://lists.nongnu.org/archive/html/qemu-devel/2024-04/msg00816.html

Yes, exactly the same, I just refine the comment


>
>
> On Fri, 24 May 2024 at 12:02, Hyman Huang <yong.huang@smartx.com> wrote:
> > For VMs configured with the USB CDROM device:
> >
> > -drive
> file=/path/to/local/file,id=drive-usb-disk0,media=cdrom,readonly=on...
> > -device usb-storage,drive=drive-usb-disk0,id=usb-disk0...
> >
> > QEMU process may crash after live migration,
> > Do the live migration repeatedly, crash may happen after live migratoin,
>
> * Does live migration work many times before QEMU crashes on the
> destination side? OR QEMU crashes at the very first migration?
>
> >    at
> /usr/src/debug/qemu-6-6.2.0-75.7.oe1.smartx.git.40.x86_64/include/qemu/iov.h:49
>
> * This qemu version looks quite old. Is the issue reproducible with
> the latest QEMU version 9.0?
>

I'm not testing the latest QEMU version while theoretically it is
reproducible, I'll check it and give a conclusion.


>
> > diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
> > +static void scsi_disk_emulate_save_request(QEMUFile *f, SCSIRequest
> *req)
> > +{
> > +    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> > +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> > +
> > +    if (s->migrate_emulate_scsi_request) {
> > +        scsi_disk_save_request(f, req);
> > +    }
> > +}
> > +
> >  static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req)
> >  {
> >      SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> > @@ -183,6 +193,16 @@ static void scsi_disk_load_request(QEMUFile *f,
> SCSIRequest *req)
> >      qemu_iovec_init_external(&r->qiov, &r->iov, 1);
> >  }
> >
> > +static void scsi_disk_emulate_load_request(QEMUFile *f, SCSIRequest
> *req)
> > +{
> > +    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> > +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> > +
> > +    if (s->migrate_emulate_scsi_request) {
> > +        scsi_disk_load_request(f, req);
> > +    }
> > +}
> > +
> >  /*
> >   * scsi_handle_rw_error has two return values.  False means that the
> error
> >   * must be ignored, true means that the error has been processed and the
> > @@ -2593,6 +2613,8 @@ static const SCSIReqOps scsi_disk_emulate_reqops =
> {
> >      .read_data    = scsi_disk_emulate_read_data,
> >      .write_data   = scsi_disk_emulate_write_data,
> >      .get_buf      = scsi_get_buf,
> > +    .load_request = scsi_disk_emulate_load_request,
> > +    .save_request = scsi_disk_emulate_save_request,
> >  };
> >
> >  static const SCSIReqOps scsi_disk_dma_reqops = {
> > @@ -3137,7 +3159,7 @@ static Property scsi_hd_properties[] = {
> >  static int scsi_disk_pre_save(void *opaque)
> >  {
> >      SCSIDiskState *dev = opaque;
> > -    dev->migrate_emulate_scsi_request = false;
> > +    dev->migrate_emulate_scsi_request = true;
> >
>
> * This patch seems to add support for migrating SCSI requests. While
> it looks okay, not sure if it is required, how likely is someone to
> configure a VM to use CDROM?
>

I'm not sure this usage is common but in our production environment,
it is used.


>
> *  Should the CDROM device be reset on the destination if no requests
> are found? ie. if (scsi_req_get_buf -> scsi_get_buf() returns NULL)?
>

IMHO, resetting the CDROM device may be a work around because
the request *SHOULD *not be lost. No requests are found may be
caused by other reasons, resetting the CD ROM seems crude.
The path that executes the scsi_get_buf() is in a USB mass storage
device,  and it called by the UHCI controller originally, which just
handles the Frame List blindly, reset solution is kind of complicated
in implementation

Migrating the requests may be a graceful solution.

Thanks for the comments,
Yong


> Thank you.
> ---
>   - Prasad
>
>
Prasad Pandit May 25, 2024, 6:40 a.m. UTC | #3
Hi,

On Fri, 24 May 2024 at 16:23, Yong Huang <yong.huang@smartx.com> wrote:
> I'm not testing the latest QEMU version while theoretically it is
> reproducible, I'll check it and give a conclusion.

* Yes, that'll help. Thank you.

> I'm not sure this usage is common but in our production environment, it is used.

* I see. If it's being used in a production environment and the crash
occurs there, then it's a reasonable change.

 > IMHO, resetting the CDROM device may be a work around because
> the request SHOULD not be lost. No requests are found may be
> caused by other reasons, resetting the CD ROM seems crude.
> The path that executes the scsi_get_buf() is in a USB mass storage
> device,  and it called by the UHCI controller originally, which just
> handles the Frame List blindly, reset solution is kind of complicated
> in implementation
>
> Migrating the requests may be a graceful solution.

* Yes, true. Migration should migrate guest's devices along with their
state and data. Resetting was suggested for the case if CDROM is not
used in production and so the migration was not required. But you are
using it in a production environment so migrating SCSI requests makes
sense.

Thank you.
---
  - Prasad
diff mbox series

Patch

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 0985676f73..d6e9d9e8d4 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -160,6 +160,16 @@  static void scsi_disk_save_request(QEMUFile *f, SCSIRequest *req)
     }
 }
 
+static void scsi_disk_emulate_save_request(QEMUFile *f, SCSIRequest *req)
+{
+    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+
+    if (s->migrate_emulate_scsi_request) {
+        scsi_disk_save_request(f, req);
+    }
+}
+
 static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req)
 {
     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
@@ -183,6 +193,16 @@  static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req)
     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
 }
 
+static void scsi_disk_emulate_load_request(QEMUFile *f, SCSIRequest *req)
+{
+    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+
+    if (s->migrate_emulate_scsi_request) {
+        scsi_disk_load_request(f, req);
+    }
+}
+
 /*
  * scsi_handle_rw_error has two return values.  False means that the error
  * must be ignored, true means that the error has been processed and the
@@ -2593,6 +2613,8 @@  static const SCSIReqOps scsi_disk_emulate_reqops = {
     .read_data    = scsi_disk_emulate_read_data,
     .write_data   = scsi_disk_emulate_write_data,
     .get_buf      = scsi_get_buf,
+    .load_request = scsi_disk_emulate_load_request,
+    .save_request = scsi_disk_emulate_save_request,
 };
 
 static const SCSIReqOps scsi_disk_dma_reqops = {
@@ -3137,7 +3159,7 @@  static Property scsi_hd_properties[] = {
 static int scsi_disk_pre_save(void *opaque)
 {
     SCSIDiskState *dev = opaque;
-    dev->migrate_emulate_scsi_request = false;
+    dev->migrate_emulate_scsi_request = true;
 
     return 0;
 }