Message ID | 1403107449-6186-2-git-send-email-pbonzini@redhat.com |
---|---|
State | New |
Headers | show |
On 06/19/2014 02:03 AM, Paolo Bonzini wrote: > From: Peter Lieven <pl@kamp.de> > > this patch adds handling of BUSY status reponse from an iSCSI target. > Currently, we fail with -EIO in case of SCSI_STATUS_BUSY while the > obvious reaction would be to retry the operation after some time. > The retry time is randomly choosen from a range with exponential > growth increasing with each retry. > > This patch includes most of the changes by a an upcoming patch > from Stefan Hajnoczi: > > iscsi: implement .bdrv_detach/attach_aio_context() > > because I also need the reference to the aio_context for > the retry timer to work. I included the changes to maintain > better mergeability. > > Signed-off-by: Peter Lieven <pl@kamp.de> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > block/iscsi.c | 55 +++++++++++++++++++++++++++++++++++++++++++------------ > 1 file changed, 43 insertions(+), 12 deletions(-) > > diff --git a/block/iscsi.c b/block/iscsi.c > index 6f87605..e64659f 100644 > --- a/block/iscsi.c > +++ b/block/iscsi.c > @@ -26,6 +26,7 @@ > #include "config-host.h" > > #include <poll.h> > +#include <math.h> > #include <arpa/inet.h> > #include "qemu-common.h" > #include "qemu/config-file.h" > @@ -75,6 +76,7 @@ typedef struct IscsiTask { > Coroutine *co; > QEMUBH *bh; > IscsiLun *iscsilun; > + QEMUTimer retry_timer; > } IscsiTask; > > typedef struct IscsiAIOCB { > @@ -86,7 +88,6 @@ typedef struct IscsiAIOCB { > uint8_t *buf; > int status; > int canceled; > - int retries; > int64_t sector_num; > int nb_sectors; > #ifdef __linux__ > @@ -96,7 +97,8 @@ typedef struct IscsiAIOCB { > > #define NOP_INTERVAL 5000 > #define MAX_NOP_FAILURES 3 > -#define ISCSI_CMD_RETRIES 5 > +#define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times) > +static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048}; > > /* this threshhold is a trade-off knob to choose between > * the potential additional overhead of an extra GET_LBA_STATUS request > @@ -146,6 +148,19 @@ static void iscsi_co_generic_bh_cb(void *opaque) > qemu_coroutine_enter(iTask->co, NULL); > } > > +static void iscsi_retry_timer_expired(void *opaque) > +{ > + struct IscsiTask *iTask = opaque; > + if (iTask->co) { > + qemu_coroutine_enter(iTask->co, NULL); > + } > +} > + > +static inline unsigned exp_random(double mean) > +{ > + return -mean * log((double)rand() / RAND_MAX); This new use of log() breaks linkage on Fedora20/ppc64, qemu-nbd fails (but qemu-system-ppc64 still links find). Adding "--extra-cflags=-lm" to ./configure invocation helps. How to fix? Seems like fc20 packages do not have some dependency which qemu-nbd relies on. Thanks. [aik@vpl2 qemu]$ make V=1 cc -DHAS_LIBSSH2_SFTP_FSYNC -m64 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes -Wredundant-decls -Wall -Wundef -Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fno-common -Wendif-labels -Wmissing-include-dirs -Wempty-body -Wnested-externs -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers -Wold-style-declaration -Wold-style-definition -Wtype-limits -fstack-protector-strong -I/usr/include/p11-kit-1 -I/usr/include/p11-kit-1 -I/usr/include/libpng16 -I/usr/include/libusb-1.0 -I/usr/include/pixman-1 -I/home/aik/p/qemu/tests -pthread -I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include -g -Wl,--warn-common -m64 -g -o qemu-nbd qemu-nbd.o async.o thread-pool.o nbd.o block.o blockjob.o main-loop.o iohandler.o qemu-timer.o aio-posix.o qapi-types.o qapi-visit.o qemu-io-cmds.o qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o qemu-coroutine-sleep.o coroutine-ucontext.o block/raw_bsd.o block/cow.o block/qcow.o block/vdi.o block/vmdk.o block/cloop.o block/dmg.o block/bochs.o block/vpc.o block/vvfat.o block/qcow2.o block/qcow2-refcount.o block/qcow2-cluster.o block/qcow2-snapshot.o block/qcow2-cache.o block/qed.o block/qed-gencb.o block/qed-l2-cache.o block/qed-table.o block/qed-cluster.o block/qed-check.o block/vhdx.o block/vhdx-endian.o block/vhdx-log.o block/quorum.o block/parallels.o block/blkdebug.o block/blkverify.o block/snapshot.o block/qapi.o block/raw-posix.o block/linux-aio.o block/nbd.o block/nbd-client.o block/sheepdog.o block/iscsi.o block/curl.o block/rbd.o block/gluster.o block/ssh.o libqemuutil.a libqemustub.a -lz -laio -L/usr/lib64/iscsi -liscsi -lcurl -lrbd -lrados -lgfapi -lglusterfs -lgfrpc -lgfxdr -lssh2 -lgthread-2.0 -pthread -lglib-2.0 -lz -lrt -lz -luuid -lgnutls -lgnutls -lgnutls -lSDL -lpthread -lX11 -lvte2_90 -lgtk-3 -lgdk-3 -lpangocairo-1.0 -lpango-1.0 -latk-1.0 -lcairo-gobject -lgdk_pixbuf-2.0 -lgio-2.0 -lgobject-2.0 -lglib-2.0 -lcairo -lX11 -lXext -lgtk-3 -lgdk-3 -lpangocairo-1.0 -lpango-1.0 -latk-1.0 -lcairo-gobject -lcairo -lgdk_pixbuf-2.0 -lgio-2.0 -lgobject-2.0 -lglib-2.0 -lX11 -lrdmacm -libverbs /usr/bin/ld: block/iscsi.o: undefined reference to symbol 'log@@GLIBC_2.3' /usr/bin/ld: note: 'log@@GLIBC_2.3' is defined in DSO /lib64/libm.so.6 so try adding it to the linker command line /lib64/libm.so.6: could not read symbols: Invalid operation collect2: error: ld returned 1 exit status make: *** [qemu-nbd] Error 1 > +} > + > static void > iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, > void *command_data, void *opaque) > @@ -158,14 +173,30 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, > iTask->do_retry = 0; > iTask->task = task; > > - if (iTask->retries-- > 0 && status == SCSI_STATUS_CHECK_CONDITION > - && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) { > - error_report("iSCSI CheckCondition: %s", iscsi_get_error(iscsi)); > - iTask->do_retry = 1; > - goto out; > - } > - > if (status != SCSI_STATUS_GOOD) { > + if (iTask->retries++ < ISCSI_CMD_RETRIES) { > + if (status == SCSI_STATUS_CHECK_CONDITION > + && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) { > + error_report("iSCSI CheckCondition: %s", > + iscsi_get_error(iscsi)); > + iTask->do_retry = 1; > + goto out; > + } > + if (status == SCSI_STATUS_BUSY) { > + unsigned retry_time = > + exp_random(iscsi_retry_times[iTask->retries - 1]); > + error_report("iSCSI Busy (retry #%u in %u ms): %s", > + iTask->retries, retry_time, > + iscsi_get_error(iscsi)); > + aio_timer_init(iTask->iscsilun->aio_context, > + &iTask->retry_timer, QEMU_CLOCK_REALTIME, > + SCALE_MS, iscsi_retry_timer_expired, iTask); > + timer_mod(&iTask->retry_timer, > + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time); > + iTask->do_retry = 1; > + return; > + } > + } > error_report("iSCSI Failure: %s", iscsi_get_error(iscsi)); > } > > @@ -180,9 +211,9 @@ out: > static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask) > { > *iTask = (struct IscsiTask) { > - .co = qemu_coroutine_self(), > - .retries = ISCSI_CMD_RETRIES, > - .iscsilun = iscsilun, > + .co = qemu_coroutine_self(), > + .retries = ISCSI_CMD_RETRIES, > + .iscsilun = iscsilun, > }; > } > >
diff --git a/block/iscsi.c b/block/iscsi.c index 6f87605..e64659f 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -26,6 +26,7 @@ #include "config-host.h" #include <poll.h> +#include <math.h> #include <arpa/inet.h> #include "qemu-common.h" #include "qemu/config-file.h" @@ -75,6 +76,7 @@ typedef struct IscsiTask { Coroutine *co; QEMUBH *bh; IscsiLun *iscsilun; + QEMUTimer retry_timer; } IscsiTask; typedef struct IscsiAIOCB { @@ -86,7 +88,6 @@ typedef struct IscsiAIOCB { uint8_t *buf; int status; int canceled; - int retries; int64_t sector_num; int nb_sectors; #ifdef __linux__ @@ -96,7 +97,8 @@ typedef struct IscsiAIOCB { #define NOP_INTERVAL 5000 #define MAX_NOP_FAILURES 3 -#define ISCSI_CMD_RETRIES 5 +#define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times) +static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048}; /* this threshhold is a trade-off knob to choose between * the potential additional overhead of an extra GET_LBA_STATUS request @@ -146,6 +148,19 @@ static void iscsi_co_generic_bh_cb(void *opaque) qemu_coroutine_enter(iTask->co, NULL); } +static void iscsi_retry_timer_expired(void *opaque) +{ + struct IscsiTask *iTask = opaque; + if (iTask->co) { + qemu_coroutine_enter(iTask->co, NULL); + } +} + +static inline unsigned exp_random(double mean) +{ + return -mean * log((double)rand() / RAND_MAX); +} + static void iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, void *command_data, void *opaque) @@ -158,14 +173,30 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, iTask->do_retry = 0; iTask->task = task; - if (iTask->retries-- > 0 && status == SCSI_STATUS_CHECK_CONDITION - && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) { - error_report("iSCSI CheckCondition: %s", iscsi_get_error(iscsi)); - iTask->do_retry = 1; - goto out; - } - if (status != SCSI_STATUS_GOOD) { + if (iTask->retries++ < ISCSI_CMD_RETRIES) { + if (status == SCSI_STATUS_CHECK_CONDITION + && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) { + error_report("iSCSI CheckCondition: %s", + iscsi_get_error(iscsi)); + iTask->do_retry = 1; + goto out; + } + if (status == SCSI_STATUS_BUSY) { + unsigned retry_time = + exp_random(iscsi_retry_times[iTask->retries - 1]); + error_report("iSCSI Busy (retry #%u in %u ms): %s", + iTask->retries, retry_time, + iscsi_get_error(iscsi)); + aio_timer_init(iTask->iscsilun->aio_context, + &iTask->retry_timer, QEMU_CLOCK_REALTIME, + SCALE_MS, iscsi_retry_timer_expired, iTask); + timer_mod(&iTask->retry_timer, + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time); + iTask->do_retry = 1; + return; + } + } error_report("iSCSI Failure: %s", iscsi_get_error(iscsi)); } @@ -180,9 +211,9 @@ out: static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask) { *iTask = (struct IscsiTask) { - .co = qemu_coroutine_self(), - .retries = ISCSI_CMD_RETRIES, - .iscsilun = iscsilun, + .co = qemu_coroutine_self(), + .retries = ISCSI_CMD_RETRIES, + .iscsilun = iscsilun, }; }