diff mbox

[03/10] scsi-disk: Complete failed requests in scsi_disk_emulate_command

Message ID 1288876539-8300-4-git-send-email-kwolf@redhat.com
State New
Headers show

Commit Message

Kevin Wolf Nov. 4, 2010, 1:15 p.m. UTC
This pulls the request completion for error cases from the caller to
scsi_disk_emulate_command. This should not change semantics, but allows to
reuse scsi_handle_write_error() for flushes in the next patch.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 hw/scsi-disk.c |   21 ++++++++++-----------
 1 files changed, 10 insertions(+), 11 deletions(-)

Comments

Jonathan Nieder May 16, 2011, 11:23 a.m. UTC | #1
Hi,

Kevin Wolf wrote:

> This pulls the request completion for error cases from the caller to
> scsi_disk_emulate_command. This should not change semantics, but allows to
> reuse scsi_handle_write_error() for flushes in the next patch.

Today I tried out qemu-system-arm for the first time.  It's faster
than I expected; very neat.  Unfortunately it segfaults.

Reproducible with "master" (077030d11).  Bisects to v0.14.0-rc0~489
(scsi-disk: Complete failed requests in scsi_disk_emulate_command,
2010-10-25).

Ideas?
Jonathan

Backtrace:

| Program received signal SIGSEGV, Segmentation fault.
| 0x00000000005552b5 in lsi_do_command (s=0x13b84d0) at /home/jrn/src/qemu/hw/lsi53c895a.c:762
| 762             dev->info->read_data(dev, s->current->tag);
| (gdb) bt full
| #0  0x00000000005552b5 in lsi_do_command (s=0x13b84d0) at /home/jrn/src/qemu/hw/lsi53c895a.c:762
|         dev = 0x13baf10
|         buf = "\000\000\000\000\000\000\000\000\251\207Q\000\000\000\000"
|         n = 656877154
| #1  lsi_execute_script (s=0x13b84d0) at /home/jrn/src/qemu/hw/lsi53c895a.c:1067
|         insn = 20688656
|         addr = 97263452
|         addr_high = <value optimized out>
|         opcode = <value optimized out>
|         insn_processed = 18
| #2  0x00000000005566b8 in lsi_reg_writeb (s=0x13b84d0, offset=<value optimized out>, val=32 ' ')
|     at /home/jrn/src/qemu/hw/lsi53c895a.c:1656
| No locals.
| #3  0x000000004059fe4e in ?? ()
| No symbol table info available.
| #4  0x0000000000000040 in ?? ()
| No symbol table info available.
| #5  0x0000000000000000 in ?? ()
| No symbol table info available.
| (gdb) p n
| $1 = 656877154
| (gdb) p dev->info
| $2 = (SCSIDeviceInfo *) 0x8df000
| (gdb) p s->current
| $3 = (lsi_request *) 0x0

That's weird because qemu_mallocz should have checked for NULL.

Program counter:

| Dump of assembler code for function lsi_execute_script:
[...]
|    0x0000000000555250 <+2784>:  callq  0x42a970 <qemu_mallocz>
|    0x0000000000555255 <+2789>:  mov    0x334(%rbx),%edx
|    0x000000000055525b <+2795>:  mov    %rax,0x350(%rbx)
|    0x0000000000555262 <+2802>:  mov    %rbp,%rdi
|    0x0000000000555265 <+2805>:  mov    %edx,(%rax)
|    0x0000000000555267 <+2807>:  mov    0x350(%rbx),%rsi
|    0x000000000055526e <+2814>:  lea    0x30(%rsp),%rdx
|    0x0000000000555273 <+2819>:  mov    0x98(%rbp),%rax
|    0x000000000055527a <+2826>:  mov    0x330(%rbx),%ecx
|    0x0000000000555280 <+2832>:  mov    (%rsi),%esi
|    0x0000000000555282 <+2834>:  callq  *0x78(%rax)
|    0x0000000000555285 <+2837>:  cmp    $0x0,%eax
|    0x0000000000555288 <+2840>:  mov    %eax,%r14d
|    0x000000000055528b <+2843>:  jle    0x5555cc <lsi_execute_script+3676>
|    0x0000000000555291 <+2849>:  movzbl 0x38b(%rbx),%eax
|    0x0000000000555298 <+2856>:  mov    0x350(%rbx),%rdx
|    0x000000000055529f <+2863>:  mov    %rbp,%rdi
|    0x00000000005552a2 <+2866>:  and    $0xfffffffffffffff8,%eax
|    0x00000000005552a5 <+2869>:  or     $0x1,%eax
|    0x00000000005552a8 <+2872>:  mov    %al,0x38b(%rbx)
|    0x00000000005552ae <+2878>:  mov    0x98(%rbp),%rax
| => 0x00000000005552b5 <+2885>:  mov    (%rdx),%esi
|    0x00000000005552b7 <+2887>:  callq  *0x80(%rax)
|    0x00000000005552bd <+2893>:  mov    0x338(%rbx),%ebp

Recipe:

| $ ./configure --prefix=$HOME/opt/qemu --disable-werror
| [...]
| $ make -j2 install STRIP=:
| [...]
| $ PATH=$HOME/opt/qemu/bin:$PATH
| $ qemu-img create arm-install.qemu 10G
| Formatting 'arm-install.qemu', fmt=raw size=10737418240
| $ wget http://d-i.debian.org/daily-images/armel/daily/versatile/netboot/initrd.gz
| [...]
| $ wget http://d-i.debian.org/daily-images/armel/daily/versatile/netboot/vmlinuz-2.6.37-2-versatile
| [...]
| $ sha1sum initrd.gz vmlinuz-2.6.37-2-versatile
| 9822cd356e2e66c0ee2d08f2dfc100f074683b81  initrd.gz
| 81aa8f15f6d0fb3fa971d859787f89eec653d1a3  vmlinuz-2.6.37-2-versatile
| $
| $ qemu-system-arm  -M versatilepb -kernel vmlinuz-2.6.37-2-versatile \
|		-initrd initrd.gz -hda arm-install.qemu
| Segmentation fault (core dumped)

The above transcript does not describe the installation process, since
it happened in another window.

1. choice of keymap, mirror, etc are boring
2. It asks for a root password.  Leave it blank.
3. It asks for a new account.  I chose "sudoer".
4. It wants a password.  Give one.
5. Choose a time zone and switch to vt4 for messages.
6. Messages (copied by hand):

| kernel: [  928.454139] SCSI subsystem initialized
| kernel: [  928.767929] PCI: enabling device 0000:00:0c.0 (0100 -> 0103)
| kernel: [  928.840653] sym0: <895a> rev 0x0 at pci 0000:00:0c.0 irq 27
| kernel: [  928.893943] sym0: No NVRAM, ID 7, Fast-40, LVD, parity checking
| kernel: [  928.902942] sym0: SCSI BUS has been reset.
| kernel: [  928.903283] scsi0 : sym-2.2.3
| kernel: [  931.915071] sym0: unknown interrupt(s) ignored, ISTAT=0x5 DSTAT=0x80 SIST=0x0
| kernel: [  931.922015] scsi 0:0:0:0: Direct-Access     QEMU     QEMU HARDDISK    0.14 PQ: 0 ANSI: 5
| kernel: [  931.922765] scsi target0:0:0: tagged command queuing enabled, command queue depth 16.
| kernel: [  931.923171] scsi target0:0:0: Beginning Domain Validation
| kernel: [  931.928165] scsi target0:0:0: Domain Validation skipping write tests

7. Segfault.  The messages stop.

| $ gcc --version
| gcc (Debian 4.6.0-7) 4.6.1 20110507 (prerelease)
| Copyright (C) 2011 Free Software Foundation, Inc.
| This is free software; see the source for copying conditions.  There is NO
| warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
| $ ld --version
| GNU ld (GNU Binutils for Debian) 2.21.51.20110421
| Copyright 2011 Free Software Foundation, Inc.
| This program is free software; you may redistribute it under the terms of
| the GNU General Public License version 3 or (at your option) a later version.
| This program has absolutely no warranty.
| $ /lib/libc.so.6 | head -1
| GNU C Library (Debian EGLIBC 2.13-4) stable release version 2.13, by Roland McGrath et al.
| $ uname -a
| Linux elie 2.6.39-rc5-amd64 #1 SMP Sat Apr 30 05:48:55 UTC 2011 x86_64 GNU/Linux
Kevin Wolf May 16, 2011, 3:13 p.m. UTC | #2
Hi Jonathan,

Am 16.05.2011 13:23, schrieb Jonathan Nieder:
> Hi,
> 
> Kevin Wolf wrote:
> 
>> This pulls the request completion for error cases from the caller to
>> scsi_disk_emulate_command. This should not change semantics, but allows to
>> reuse scsi_handle_write_error() for flushes in the next patch.
> 
> Today I tried out qemu-system-arm for the first time.  It's faster
> than I expected; very neat.  Unfortunately it segfaults.
> 
> Reproducible with "master" (077030d11).  Bisects to v0.14.0-rc0~489
> (scsi-disk: Complete failed requests in scsi_disk_emulate_command,
> 2010-10-25).

Your instructions seemed clear enough, so I tried to reproduce your
problem. Now I have an ARM VM with a Debian installation that works just
fine and I have no idea what to use it for. ;-)

I also reviewed the patch that you mentioned and I can't find anything
suspicious there. I'm afraid you'll have to bite the bullet and run it
with some debugging code yourself (if it's really related to that patch,
you'll want to enable DPRINTF in hw/scsi-disk.c as a first step)

Kevin
Jonathan Nieder May 16, 2011, 3:30 p.m. UTC | #3
Kevin Wolf wrote:

> I also reviewed the patch that you mentioned and I can't find anything
> suspicious there. I'm afraid you'll have to bite the bullet and run it
> with some debugging code yourself (if it's really related to that patch,
> you'll want to enable DPRINTF in hw/scsi-disk.c as a first step)

I tried reverting a6d96eb7 (scsi: Move sense handling into the driver,
2010-11-24), 78ced65e (scsi-disk: Implement werror for flushes,
2010-10-25), and 8af7a3a (csi-disk: Complete failed requests in
scsi_disk_emulate_command, 2010-10-25), and the segfault is gone.  So
now I also have a nice ARM image to reproduce it more quickly with. :)

Here's what the default DPRINTFs write when it segfaults, for what
it's worth.  I'll try playing with this some more.

scsi-disk: Command: lun=0 tag=0x0 data=0x12 0x00 0x00 0x00 0x24 0x00
scsi-disk: Read buf_len=36
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x0 status=0 sense=0
scsi-disk: Command: lun=0 tag=0x10001 data=0x12 0x00 0x00 0x00 0x24 0x00
scsi-disk: Read buf_len=36
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x10001 status=0 sense=0
scsi-disk: Command: lun=0 tag=0x10003 data=0x12 0x00 0x00 0x00 0x24 0x00
scsi-disk: Read buf_len=36
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x10003 status=0 sense=0
scsi-disk: Command: lun=0 tag=0x10005 data=0x12 0x00 0x00 0x00 0x24 0x00
scsi-disk: Read buf_len=36
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x10005 status=0 sense=0
scsi-disk: Command: lun=0 tag=0x10007 data=0x12 0x00 0x00 0x00 0x24 0x00
scsi-disk: Read buf_len=36
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x10007 status=0 sense=0
scsi-disk: Command: lun=0 tag=0x10009 data=0x12 0x00 0x00 0x00 0x24 0x00
scsi-disk: Read buf_len=36
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x10009 status=0 sense=0
scsi-disk: Command: lun=0 tag=0x1000b data=0x12 0x00 0x00 0x00 0x24 0x00
scsi-disk: Read buf_len=36
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x1000b status=0 sense=0
scsi-disk: Command: lun=0 tag=0x1000d data=0x12 0x00 0x00 0x00 0x24 0x00
scsi-disk: Read buf_len=36
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x1000d status=0 sense=0
scsi-disk: Command: lun=0 tag=0x1000f data=0xa0 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x10 0x00 0x00 0x00
scsi-disk: Read buf_len=16
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x1000f status=0 sense=0
scsi-disk: Command: lun=0 tag=0x200 data=0x12 0x00 0x00 0x00 0x24 0x00
scsi-disk: Read buf_len=36
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x200 status=0 sense=0
scsi-disk: Command: lun=0 tag=0x10201 data=0x12 0x00 0x00 0x00 0x24 0x00
scsi-disk: Read buf_len=36
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x10201 status=0 sense=0
scsi-disk: Command: lun=0 tag=0x10203 data=0x12 0x00 0x00 0x00 0x24 0x00
scsi-disk: Read buf_len=36
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x10203 status=0 sense=0
scsi-disk: Command: lun=0 tag=0x10205 data=0x12 0x00 0x00 0x00 0x24 0x00
scsi-disk: Read buf_len=36
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x10205 status=0 sense=0
scsi-disk: Command: lun=0 tag=0x10207 data=0x12 0x00 0x00 0x00 0x24 0x00
scsi-disk: Read buf_len=36
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x10207 status=0 sense=0
scsi-disk: Command: lun=0 tag=0x10209 data=0x12 0x00 0x00 0x00 0x24 0x00
scsi-disk: Read buf_len=36
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x10209 status=0 sense=0
scsi-disk: Command: lun=0 tag=0x1020b data=0x12 0x00 0x00 0x00 0x24 0x00
scsi-disk: Read buf_len=36
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x1020b status=0 sense=0
scsi-disk: Command: lun=0 tag=0x1020d data=0x12 0x00 0x00 0x00 0x24 0x00
scsi-disk: Read buf_len=36
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x1020d status=0 sense=0
scsi-disk: Command: lun=0 tag=0x1020f data=0xa0 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x10 0x00 0x00 0x00
scsi-disk: Read buf_len=16
scsi-disk: Read sector_count=0
scsi-disk: Command complete tag=0x1020f status=0 sense=0
scsi-disk: Command: lun=0 tag=0x10011 data=0x00 0x00 0x00 0x00 0x00 0x00
scsi-disk: Command complete tag=0x10011 status=0 sense=0
Jonathan Nieder May 16, 2011, 3:43 p.m. UTC | #4
Kevin Wolf wrote:

> Your instructions seemed clear enough, so I tried to reproduce your
> problem. Now I have an ARM VM with a Debian installation that works just
> fine and I have no idea what to use it for. ;-)

So I was puzzled about this for a while, but then I had a flash
of inspiration:

	unset MALLOC_PERTURB_
	reproduction-script;	# no segfault

	MALLOC_PERTURB_=37
	export MALLOC_PERTURB_
	reproduction-script;	# segfaults

Thanks.  Sorry, it's easy to forget.
Kevin Wolf May 16, 2011, 3:58 p.m. UTC | #5
Am 16.05.2011 17:43, schrieb Jonathan Nieder:
> Kevin Wolf wrote:
> 
>> Your instructions seemed clear enough, so I tried to reproduce your
>> problem. Now I have an ARM VM with a Debian installation that works just
>> fine and I have no idea what to use it for. ;-)
> 
> So I was puzzled about this for a while, but then I had a flash
> of inspiration:
> 
> 	unset MALLOC_PERTURB_
> 	reproduction-script;	# no segfault
> 
> 	MALLOC_PERTURB_=37
> 	export MALLOC_PERTURB_
> 	reproduction-script;	# segfaults
> 
> Thanks.  Sorry, it's easy to forget.

Thanks. Still doesn't make much sense to me, the patch shouldn't change
anything with respect to a malloc, but I can reproduce a segfault now. I
think I'll have a closer look tomorrow.

Kevin
diff mbox

Patch

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 43a5b59..96acfe3 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -784,8 +784,9 @@  static int scsi_disk_emulate_read_toc(SCSIRequest *req, uint8_t *outbuf)
     return toclen;
 }
 
-static int scsi_disk_emulate_command(SCSIRequest *req, uint8_t *outbuf)
+static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf)
 {
+    SCSIRequest *req = &r->req;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
     uint64_t nb_sectors;
     int buflen = 0;
@@ -943,12 +944,12 @@  static int scsi_disk_emulate_command(SCSIRequest *req, uint8_t *outbuf)
     return buflen;
 
 not_ready:
-    scsi_req_set_status(req, CHECK_CONDITION, NOT_READY);
-    return 0;
+    scsi_command_complete(r, CHECK_CONDITION, NOT_READY);
+    return -1;
 
 illegal_request:
-    scsi_req_set_status(req, CHECK_CONDITION, ILLEGAL_REQUEST);
-    return 0;
+    scsi_command_complete(r, CHECK_CONDITION, ILLEGAL_REQUEST);
+    return -1;
 }
 
 /* Execute a scsi command.  Returns the length of the data expected by the
@@ -1056,14 +1057,12 @@  static int32_t scsi_send_command(SCSIDevice *d, uint32_t tag,
     case REPORT_LUNS:
     case VERIFY:
     case REZERO_UNIT:
-        rc = scsi_disk_emulate_command(&r->req, outbuf);
-        if (rc > 0) {
-            r->iov.iov_len = rc;
-        } else {
-            scsi_req_complete(&r->req);
-            scsi_remove_request(r);
+        rc = scsi_disk_emulate_command(r, outbuf);
+        if (rc < 0) {
             return 0;
         }
+
+        r->iov.iov_len = rc;
         break;
     case READ_6:
     case READ_10: