Message ID | 1410758605-29375-3-git-send-email-famz@redhat.com |
---|---|
State | New |
Headers | show |
Il 15/09/2014 07:23, Fam Zheng ha scritto: > SCSIRequest *sreq; > size_t resp_size; > enum SCSIXferMode mode; > - QEMUIOVector resp_iov; > union { > VirtIOSCSICmdResp cmd; > VirtIOSCSICtrlTMFResp tmf; > @@ -68,23 +75,27 @@ static inline SCSIDevice *virtio_scsi_device_find(VirtIOSCSI *s, uint8_t *lun) > static VirtIOSCSIReq *virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq) > { > VirtIOSCSIReq *req; > - VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); > - > - req = g_malloc0(sizeof(*req) + vs->cdb_size); > + VirtIOSCSICommon *vs = (VirtIOSCSICommon *)s; > + const size_t zero_skip = offsetof(VirtIOSCSIReq, elem) > + + sizeof(VirtQueueElement); > > + req = g_slice_alloc(sizeof(*req) + vs->cdb_size); Looks good, but why do you need to zero the union? You only need to zero sreq, resp_size and mode, don't you (and at this point, memset becomes superfluous)? Paolo
On Mon, 09/15 12:17, Paolo Bonzini wrote: > Il 15/09/2014 07:23, Fam Zheng ha scritto: > > SCSIRequest *sreq; > > size_t resp_size; > > enum SCSIXferMode mode; > > - QEMUIOVector resp_iov; > > union { > > VirtIOSCSICmdResp cmd; > > VirtIOSCSICtrlTMFResp tmf; > > @@ -68,23 +75,27 @@ static inline SCSIDevice *virtio_scsi_device_find(VirtIOSCSI *s, uint8_t *lun) > > static VirtIOSCSIReq *virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq) > > { > > VirtIOSCSIReq *req; > > - VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); > > - > > - req = g_malloc0(sizeof(*req) + vs->cdb_size); > > + VirtIOSCSICommon *vs = (VirtIOSCSICommon *)s; > > + const size_t zero_skip = offsetof(VirtIOSCSIReq, elem) > > + + sizeof(VirtQueueElement); > > > > + req = g_slice_alloc(sizeof(*req) + vs->cdb_size); > > Looks good, but why do you need to zero the union? You only need to > zero sreq, resp_size and mode, don't you (and at this point, memset > becomes superfluous)? > The structures in unions are not zeroed by caller, also leaving them breaks virtio-scsi in my test. FWIW, I will remove the "req->sreq = NULL;" two lines below in v3. At this point tuning these small fields are subtle optimization compared to the arrays, I say let's just simply keep the memset so that adding more fields in the future are also safe. Fam
Il 16/09/2014 09:16, Fam Zheng ha scritto: > On Mon, 09/15 12:17, Paolo Bonzini wrote: >> Il 15/09/2014 07:23, Fam Zheng ha scritto: >>> SCSIRequest *sreq; >>> size_t resp_size; >>> enum SCSIXferMode mode; >>> - QEMUIOVector resp_iov; >>> union { >>> VirtIOSCSICmdResp cmd; >>> VirtIOSCSICtrlTMFResp tmf; >>> @@ -68,23 +75,27 @@ static inline SCSIDevice *virtio_scsi_device_find(VirtIOSCSI *s, uint8_t *lun) >>> static VirtIOSCSIReq *virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq) >>> { >>> VirtIOSCSIReq *req; >>> - VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); >>> - >>> - req = g_malloc0(sizeof(*req) + vs->cdb_size); >>> + VirtIOSCSICommon *vs = (VirtIOSCSICommon *)s; >>> + const size_t zero_skip = offsetof(VirtIOSCSIReq, elem) >>> + + sizeof(VirtQueueElement); >>> >>> + req = g_slice_alloc(sizeof(*req) + vs->cdb_size); >> >> Looks good, but why do you need to zero the union? You only need to >> zero sreq, resp_size and mode, don't you (and at this point, memset >> becomes superfluous)? >> > > The structures in unions are not zeroed by caller, also leaving them breaks > virtio-scsi in my test. > > FWIW, I will remove the "req->sreq = NULL;" two lines below in v3. At this > point tuning these small fields are subtle optimization compared to the arrays, > I say let's just simply keep the memset so that adding more fields in the > future are also safe. Perhaps the response fields have to be zeroed? The request shouldn't need it. It can be done separately though---the VirtQueueElement is the big one that we have to fix. Paolo
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 86aba88..7bf03c4 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -24,12 +24,19 @@ typedef struct VirtIOSCSIReq { VirtIOSCSI *dev; VirtQueue *vq; - VirtQueueElement elem; QEMUSGList qsgl; + QEMUIOVector resp_iov; + + /* Note: + * - fields before elem are initialized by virtio_scsi_init_req; + * - elem is uninitialized at the time of allocation. + * - fields after elem are zeroed by virtio_scsi_init_req. + * */ + + VirtQueueElement elem; SCSIRequest *sreq; size_t resp_size; enum SCSIXferMode mode; - QEMUIOVector resp_iov; union { VirtIOSCSICmdResp cmd; VirtIOSCSICtrlTMFResp tmf; @@ -68,23 +75,27 @@ static inline SCSIDevice *virtio_scsi_device_find(VirtIOSCSI *s, uint8_t *lun) static VirtIOSCSIReq *virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq) { VirtIOSCSIReq *req; - VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); - - req = g_malloc0(sizeof(*req) + vs->cdb_size); + VirtIOSCSICommon *vs = (VirtIOSCSICommon *)s; + const size_t zero_skip = offsetof(VirtIOSCSIReq, elem) + + sizeof(VirtQueueElement); + req = g_slice_alloc(sizeof(*req) + vs->cdb_size); req->vq = vq; req->dev = s; req->sreq = NULL; qemu_sglist_init(&req->qsgl, DEVICE(s), 8, &address_space_memory); qemu_iovec_init(&req->resp_iov, 1); + memset((uint8_t *)req + zero_skip, 0, sizeof(*req) - zero_skip); return req; } static void virtio_scsi_free_req(VirtIOSCSIReq *req) { + VirtIOSCSICommon *vs = (VirtIOSCSICommon *)req->dev; + qemu_iovec_destroy(&req->resp_iov); qemu_sglist_destroy(&req->qsgl); - g_free(req); + g_slice_free1(sizeof(*req) + vs->cdb_size, req); } static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
The VirtQueueElement is a very big structure (>48k!), since it will be initialzed by virtqueue_pop, we can save the expensive zeroing here. This saves a few microseconds per request in my test: [fio-test] rw bs iodepth jobs bw iops latency -------------------------------------------------------------------------------------------- Before read 4k 1 1 110 28269 34 After read 4k 1 1 131 33745 28 Whereas, virtio-blk read 4k 1 1 217 55673 16 Signed-off-by: Fam Zheng <famz@redhat.com> --- hw/scsi/virtio-scsi.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-)