Message ID | 20190825071541.10389-3-mlevitsk@redhat.com |
---|---|
State | New |
Headers | show |
Series | block/nvme: add support for write zeros and discard | expand |
On 8/25/19 3:15 AM, Maxim Levitsky wrote: > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> > --- > block/nvme.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++ > block/trace-events | 2 ++ > 2 files changed, 85 insertions(+) > > diff --git a/block/nvme.c b/block/nvme.c > index f8bd11e19a..dd041f39c9 100644 > --- a/block/nvme.c > +++ b/block/nvme.c > @@ -112,6 +112,7 @@ typedef struct { > bool plugged; > > bool supports_write_zeros; > + bool supports_discard; > > CoMutex dma_map_lock; > CoQueue dma_flush_queue; > @@ -463,6 +464,7 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) > > oncs = le16_to_cpu(idctrl->oncs); > s->supports_write_zeros = (oncs & NVME_ONCS_WRITE_ZEROS) != 0; > + s->supports_discard = (oncs & NVME_ONCS_DSM) != 0; Same comment -- checking !!(register & FIELD) is nicer than the negative. (I'm actually not sure even the !! is needed, but it seems to be a QEMU-ism and I've caught myself using it...) Rest looks good to me on a skim, but I'm not very well-versed in NVME. > > memset(resp, 0, 4096); > > @@ -1153,6 +1155,86 @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs, > } > > > +static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs, > + int64_t offset, > + int bytes) > +{ > + BDRVNVMeState *s = bs->opaque; > + NVMeQueuePair *ioq = s->queues[1]; > + NVMeRequest *req; > + NvmeDsmRange *buf; > + QEMUIOVector local_qiov; > + int ret; > + > + NvmeCmd cmd = { > + .opcode = NVME_CMD_DSM, > + .nsid = cpu_to_le32(s->nsid), > + .cdw10 = cpu_to_le32(0), /*number of ranges - 0 based*/ > + .cdw11 = cpu_to_le32(1 << 2), /*deallocate bit*/ > + }; > + > + NVMeCoData data = { > + .ctx = bdrv_get_aio_context(bs), > + .ret = -EINPROGRESS, > + }; > + > + if (!s->supports_discard) { > + return -ENOTSUP; > + } > + > + assert(s->nr_queues > 1); > + > + buf = qemu_try_blockalign0(bs, s->page_size); > + if (!buf) { > + return -ENOMEM; > + } > + > + buf->nlb = cpu_to_le32(bytes >> s->blkshift); > + buf->slba = cpu_to_le64(offset >> s->blkshift); > + buf->cattr = 0; > + > + qemu_iovec_init(&local_qiov, 1); > + qemu_iovec_add(&local_qiov, buf, 4096); > + > + req = nvme_get_free_req(ioq); > + assert(req); > + > + qemu_co_mutex_lock(&s->dma_map_lock); > + ret = nvme_cmd_map_qiov(bs, &cmd, req, &local_qiov); > + qemu_co_mutex_unlock(&s->dma_map_lock); > + > + if (ret) { > + req->busy = false; > + goto out; > + } > + > + trace_nvme_dsm(s, offset, bytes); > + > + nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data); > + > + data.co = qemu_coroutine_self(); > + while (data.ret == -EINPROGRESS) { > + qemu_coroutine_yield(); > + } > + > + qemu_co_mutex_lock(&s->dma_map_lock); > + ret = nvme_cmd_unmap_qiov(bs, &local_qiov); > + qemu_co_mutex_unlock(&s->dma_map_lock); > + > + if (ret) { > + goto out; > + } > + > + ret = data.ret; > + trace_nvme_dsm_done(s, offset, bytes, ret); > +out: > + qemu_iovec_destroy(&local_qiov); > + qemu_vfree(buf); > + return ret; > + > +} > + > + > static int nvme_reopen_prepare(BDRVReopenState *reopen_state, > BlockReopenQueue *queue, Error **errp) > { > @@ -1259,6 +1341,7 @@ static BlockDriver bdrv_nvme = { > .bdrv_co_pwritev = nvme_co_pwritev, > > .bdrv_co_pwrite_zeroes = nvme_co_pwrite_zeroes, > + .bdrv_co_pdiscard = nvme_co_pdiscard, > > .bdrv_co_flush_to_disk = nvme_co_flush, > .bdrv_reopen_prepare = nvme_reopen_prepare, > diff --git a/block/trace-events b/block/trace-events > index 8209fbd0c7..7d1d48b502 100644 > --- a/block/trace-events > +++ b/block/trace-events > @@ -153,6 +153,8 @@ nvme_write_zeros(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p offs > nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x" > nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset %"PRId64" bytes %"PRId64" niov %d is_write %d" > nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset %"PRId64" bytes %"PRId64" ret %d" > +nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset %"PRId64" bytes %"PRId64"" > +nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset %"PRId64" bytes %"PRId64" ret %d" > nvme_dma_map_flush(void *s) "s %p" > nvme_free_req_queue_wait(void *q) "q %p" > nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d" >
On Tue, 2019-08-27 at 18:29 -0400, John Snow wrote: > > On 8/25/19 3:15 AM, Maxim Levitsky wrote: > > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> > > --- > > block/nvme.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++ > > block/trace-events | 2 ++ > > 2 files changed, 85 insertions(+) > > > > diff --git a/block/nvme.c b/block/nvme.c > > index f8bd11e19a..dd041f39c9 100644 > > --- a/block/nvme.c > > +++ b/block/nvme.c > > @@ -112,6 +112,7 @@ typedef struct { > > bool plugged; > > > > bool supports_write_zeros; > > + bool supports_discard; > > > > CoMutex dma_map_lock; > > CoQueue dma_flush_queue; > > @@ -463,6 +464,7 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) > > > > oncs = le16_to_cpu(idctrl->oncs); > > s->supports_write_zeros = (oncs & NVME_ONCS_WRITE_ZEROS) != 0; > > + s->supports_discard = (oncs & NVME_ONCS_DSM) != 0; > > Same comment -- checking !!(register & FIELD) is nicer than the > negative. (I'm actually not sure even the !! is needed, but it seems to > be a QEMU-ism and I've caught myself using it...) All right, no problem to use !! > > Rest looks good to me on a skim, but I'm not very well-versed in NVME. Thanks! > > > > > memset(resp, 0, 4096); > > > > @@ -1153,6 +1155,86 @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs, > > } > > > > > > +static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs, > > + int64_t offset, > > + int bytes) > > +{ > > + BDRVNVMeState *s = bs->opaque; > > + NVMeQueuePair *ioq = s->queues[1]; > > + NVMeRequest *req; > > + NvmeDsmRange *buf; > > + QEMUIOVector local_qiov; > > + int ret; > > + > > + NvmeCmd cmd = { > > + .opcode = NVME_CMD_DSM, > > + .nsid = cpu_to_le32(s->nsid), > > + .cdw10 = cpu_to_le32(0), /*number of ranges - 0 based*/ > > + .cdw11 = cpu_to_le32(1 << 2), /*deallocate bit*/ > > + }; > > + > > + NVMeCoData data = { > > + .ctx = bdrv_get_aio_context(bs), > > + .ret = -EINPROGRESS, > > + }; > > + > > + if (!s->supports_discard) { > > + return -ENOTSUP; > > + } > > + > > + assert(s->nr_queues > 1); > > + > > + buf = qemu_try_blockalign0(bs, s->page_size); > > + if (!buf) { > > + return -ENOMEM; > > + } > > + > > + buf->nlb = cpu_to_le32(bytes >> s->blkshift); > > + buf->slba = cpu_to_le64(offset >> s->blkshift); > > + buf->cattr = 0; > > + > > + qemu_iovec_init(&local_qiov, 1); > > + qemu_iovec_add(&local_qiov, buf, 4096); > > + > > + req = nvme_get_free_req(ioq); > > + assert(req); > > + > > + qemu_co_mutex_lock(&s->dma_map_lock); > > + ret = nvme_cmd_map_qiov(bs, &cmd, req, &local_qiov); > > + qemu_co_mutex_unlock(&s->dma_map_lock); > > + > > + if (ret) { > > + req->busy = false; > > + goto out; > > + } > > + > > + trace_nvme_dsm(s, offset, bytes); > > + > > + nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data); > > + > > + data.co = qemu_coroutine_self(); > > + while (data.ret == -EINPROGRESS) { > > + qemu_coroutine_yield(); > > + } > > + > > + qemu_co_mutex_lock(&s->dma_map_lock); > > + ret = nvme_cmd_unmap_qiov(bs, &local_qiov); > > + qemu_co_mutex_unlock(&s->dma_map_lock); > > + > > + if (ret) { > > + goto out; > > + } > > + > > + ret = data.ret; > > + trace_nvme_dsm_done(s, offset, bytes, ret); > > +out: > > + qemu_iovec_destroy(&local_qiov); > > + qemu_vfree(buf); > > + return ret; > > + > > +} > > + > > + > > static int nvme_reopen_prepare(BDRVReopenState *reopen_state, > > BlockReopenQueue *queue, Error **errp) > > { > > @@ -1259,6 +1341,7 @@ static BlockDriver bdrv_nvme = { > > .bdrv_co_pwritev = nvme_co_pwritev, > > > > .bdrv_co_pwrite_zeroes = nvme_co_pwrite_zeroes, > > + .bdrv_co_pdiscard = nvme_co_pdiscard, > > > > .bdrv_co_flush_to_disk = nvme_co_flush, > > .bdrv_reopen_prepare = nvme_reopen_prepare, > > diff --git a/block/trace-events b/block/trace-events > > index 8209fbd0c7..7d1d48b502 100644 > > --- a/block/trace-events > > +++ b/block/trace-events > > @@ -153,6 +153,8 @@ nvme_write_zeros(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p offs > > nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x" > > nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset %"PRId64" bytes %"PRId64" niov %d is_write %d" > > nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset %"PRId64" bytes %"PRId64" ret %d" > > +nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset %"PRId64" bytes %"PRId64"" > > +nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset %"PRId64" bytes %"PRId64" ret %d" > > nvme_dma_map_flush(void *s) "s %p" > > nvme_free_req_queue_wait(void *q) "q %p" > > nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d" > > Best regards, Maxim Levitsky
On Wed, 2019-08-28 at 12:03 +0300, Maxim Levitsky wrote: > On Tue, 2019-08-27 at 18:29 -0400, John Snow wrote: > > > > On 8/25/19 3:15 AM, Maxim Levitsky wrote: > > > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> > > > --- > > > block/nvme.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++ > > > block/trace-events | 2 ++ > > > 2 files changed, 85 insertions(+) > > > > > > diff --git a/block/nvme.c b/block/nvme.c > > > index f8bd11e19a..dd041f39c9 100644 > > > --- a/block/nvme.c > > > +++ b/block/nvme.c > > > @@ -112,6 +112,7 @@ typedef struct { > > > bool plugged; > > > > > > bool supports_write_zeros; > > > + bool supports_discard; > > > > > > CoMutex dma_map_lock; > > > CoQueue dma_flush_queue; > > > @@ -463,6 +464,7 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) > > > > > > oncs = le16_to_cpu(idctrl->oncs); > > > s->supports_write_zeros = (oncs & NVME_ONCS_WRITE_ZEROS) != 0; > > > + s->supports_discard = (oncs & NVME_ONCS_DSM) != 0; > > > > Same comment -- checking !!(register & FIELD) is nicer than the > > negative. (I'm actually not sure even the !! is needed, but it seems to > > be a QEMU-ism and I've caught myself using it...) > > All right, no problem to use !! > > > > > Rest looks good to me on a skim, but I'm not very well-versed in NVME. > > Thanks! > Kind ping about this patch series. Apart from using !!, do you think that this patch series can be merged, or should I do anything else? Which tree do you think this should be committed to? I kind of want to see that merged before the freeze starts, if there are no objections, to reduce the amount of pending stuff in my queue. [...] Best regards, Maxim Levitsky
On 9/5/19 9:24 AM, Maxim Levitsky wrote: > On Wed, 2019-08-28 at 12:03 +0300, Maxim Levitsky wrote: >> On Tue, 2019-08-27 at 18:29 -0400, John Snow wrote: >>> >>> On 8/25/19 3:15 AM, Maxim Levitsky wrote: >>>> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> >>>> --- >>>> block/nvme.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++ >>>> block/trace-events | 2 ++ >>>> 2 files changed, 85 insertions(+) >>>> >>>> diff --git a/block/nvme.c b/block/nvme.c >>>> index f8bd11e19a..dd041f39c9 100644 >>>> --- a/block/nvme.c >>>> +++ b/block/nvme.c >>>> @@ -112,6 +112,7 @@ typedef struct { >>>> bool plugged; >>>> >>>> bool supports_write_zeros; >>>> + bool supports_discard; >>>> >>>> CoMutex dma_map_lock; >>>> CoQueue dma_flush_queue; >>>> @@ -463,6 +464,7 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) >>>> >>>> oncs = le16_to_cpu(idctrl->oncs); >>>> s->supports_write_zeros = (oncs & NVME_ONCS_WRITE_ZEROS) != 0; >>>> + s->supports_discard = (oncs & NVME_ONCS_DSM) != 0; >>> >>> Same comment -- checking !!(register & FIELD) is nicer than the >>> negative. (I'm actually not sure even the !! is needed, but it seems to >>> be a QEMU-ism and I've caught myself using it...) >> >> All right, no problem to use !! >> >>> >>> Rest looks good to me on a skim, but I'm not very well-versed in NVME. >> >> Thanks! >> > > Kind ping about this patch series. > > Apart from using !!, do you think that this patch series > can be merged, or should I do anything else? > Which tree do you think this should be committed to? > > I kind of want to see that merged before the freeze > starts, if there are no objections, > to reduce the amount of pending stuff in my queue. > Didn't I ask a few other things? like not using "res30" because you've moved the fields around, and trying to be consistent about "zeros" vs "zeroes". Removing "+#define NVME_ID_NS_DLFEAT_GUARD_CRC(dlfeat) ((dlfeat) & 0x10)" because it's unused. You also probably require review (or at least an ACK) from Keith Busch who maintains this file. --js
On Thu, 2019-09-05 at 13:27 -0400, John Snow wrote: > > On 9/5/19 9:24 AM, Maxim Levitsky wrote: > > On Wed, 2019-08-28 at 12:03 +0300, Maxim Levitsky wrote: > > > On Tue, 2019-08-27 at 18:29 -0400, John Snow wrote: > > > > > > > > On 8/25/19 3:15 AM, Maxim Levitsky wrote: > > > > > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> > > > > > --- > > > > > block/nvme.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++ > > > > > block/trace-events | 2 ++ > > > > > 2 files changed, 85 insertions(+) > > > > > > > > > > diff --git a/block/nvme.c b/block/nvme.c > > > > > index f8bd11e19a..dd041f39c9 100644 > > > > > --- a/block/nvme.c > > > > > +++ b/block/nvme.c > > > > > @@ -112,6 +112,7 @@ typedef struct { > > > > > bool plugged; > > > > > > > > > > bool supports_write_zeros; > > > > > + bool supports_discard; > > > > > > > > > > CoMutex dma_map_lock; > > > > > CoQueue dma_flush_queue; > > > > > @@ -463,6 +464,7 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) > > > > > > > > > > oncs = le16_to_cpu(idctrl->oncs); > > > > > s->supports_write_zeros = (oncs & NVME_ONCS_WRITE_ZEROS) != 0; > > > > > + s->supports_discard = (oncs & NVME_ONCS_DSM) != 0; > > > > > > > > Same comment -- checking !!(register & FIELD) is nicer than the > > > > negative. (I'm actually not sure even the !! is needed, but it seems to > > > > be a QEMU-ism and I've caught myself using it...) > > > > > > All right, no problem to use !! > > > > > > > > > > > Rest looks good to me on a skim, but I'm not very well-versed in NVME. > > > > > > Thanks! > > > > > > > Kind ping about this patch series. > > > > Apart from using !!, do you think that this patch series > > can be merged, or should I do anything else? > > Which tree do you think this should be committed to? > > > > I kind of want to see that merged before the freeze > > starts, if there are no objections, > > to reduce the amount of pending stuff in my queue. > > > > Didn't I ask a few other things? > > like not using "res30" because you've moved the fields around, and > trying to be consistent about "zeros" vs "zeroes". > > Removing "+#define NVME_ID_NS_DLFEAT_GUARD_CRC(dlfeat) ((dlfeat) & > 0x10)" because it's unused. All right I forgot about it, that's one of they joys of duplication of a kernel driver in the userspace... > > You also probably require review (or at least an ACK) from Keith Busch > who maintains this file. > > --js All right, Best regards, Maxim Levitsky
On 05.09.19 19:27, John Snow wrote: [...] > You also probably require review (or at least an ACK) from Keith Busch > who maintains this file. Keith actually maintains the NVMe guest device; technically, Fam is the NVMe block driver maintainer. Max
On 9/9/19 5:25 AM, Max Reitz wrote: > On 05.09.19 19:27, John Snow wrote: > > [...] > >> You also probably require review (or at least an ACK) from Keith Busch >> who maintains this file. > > Keith actually maintains the NVMe guest device; technically, Fam is the > NVMe block driver maintainer. W h o o p s. Thanks for correcting me. Well, if it's Fam -- he seems a little busier lately -- it's probably not so crucial to gate on his approval. I thought it'd be nice to at least get an ACK from someone who has used this module before, because I haven't -- I was just giving some style review to help push it along. (On that note, if you felt like my style review was wrong or isn't worth doing -- it is always perfectly fair to just say so, along with some reason as to why you won't -- that way patches won't rot on the list when people may have gotten the impression that a V2 is warranted.) --js
On 09/09/19 19:03, John Snow wrote: > > > On 9/9/19 5:25 AM, Max Reitz wrote: >> On 05.09.19 19:27, John Snow wrote: >> >> [...] >> >>> You also probably require review (or at least an ACK) from Keith Busch >>> who maintains this file. >> >> Keith actually maintains the NVMe guest device; technically, Fam is the >> NVMe block driver maintainer. > > W h o o p s. Thanks for correcting me. > > Well, if it's Fam -- he seems a little busier lately -- it's probably > not so crucial to gate on his approval. I thought it'd be nice to at > least get an ACK from someone who has used this module before, because I > haven't -- I was just giving some style review to help push it along. > > (On that note, if you felt like my style review was wrong or isn't worth > doing -- it is always perfectly fair to just say so, along with some > reason as to why you won't -- that way patches won't rot on the list > when people may have gotten the impression that a V2 is warranted.) Looks good to me with the changes you pointed out (especially res30; leaving out the unused macros is not so important). Paolo
On Tue, 2019-09-10 at 16:49 +0200, Paolo Bonzini wrote: > On 09/09/19 19:03, John Snow wrote: > > > > > > On 9/9/19 5:25 AM, Max Reitz wrote: > > > On 05.09.19 19:27, John Snow wrote: > > > > > > [...] > > > > > > > You also probably require review (or at least an ACK) from Keith Busch > > > > who maintains this file. > > > > > > Keith actually maintains the NVMe guest device; technically, Fam is the > > > NVMe block driver maintainer. > > > > W h o o p s. Thanks for correcting me. > > > > Well, if it's Fam -- he seems a little busier lately -- it's probably > > not so crucial to gate on his approval. I thought it'd be nice to at > > least get an ACK from someone who has used this module before, because I > > haven't -- I was just giving some style review to help push it along. > > > > (On that note, if you felt like my style review was wrong or isn't worth > > doing -- it is always perfectly fair to just say so, along with some > > reason as to why you won't -- that way patches won't rot on the list > > when people may have gotten the impression that a V2 is warranted.) Absolutely not, your review was fine! I just was/is a bit lazy to send next version of the patches before I get some kind of indication if anything else is needed for this to be merged, since the module doesn't have currently an active maintainer. > > Looks good to me with the changes you pointed out (especially res30; > leaving out the unused macros is not so important). All right, I'll send an updated version of those two patches soon. Best regards, Maxim Levitsky
diff --git a/block/nvme.c b/block/nvme.c index f8bd11e19a..dd041f39c9 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -112,6 +112,7 @@ typedef struct { bool plugged; bool supports_write_zeros; + bool supports_discard; CoMutex dma_map_lock; CoQueue dma_flush_queue; @@ -463,6 +464,7 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) oncs = le16_to_cpu(idctrl->oncs); s->supports_write_zeros = (oncs & NVME_ONCS_WRITE_ZEROS) != 0; + s->supports_discard = (oncs & NVME_ONCS_DSM) != 0; memset(resp, 0, 4096); @@ -1153,6 +1155,86 @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs, } +static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs, + int64_t offset, + int bytes) +{ + BDRVNVMeState *s = bs->opaque; + NVMeQueuePair *ioq = s->queues[1]; + NVMeRequest *req; + NvmeDsmRange *buf; + QEMUIOVector local_qiov; + int ret; + + NvmeCmd cmd = { + .opcode = NVME_CMD_DSM, + .nsid = cpu_to_le32(s->nsid), + .cdw10 = cpu_to_le32(0), /*number of ranges - 0 based*/ + .cdw11 = cpu_to_le32(1 << 2), /*deallocate bit*/ + }; + + NVMeCoData data = { + .ctx = bdrv_get_aio_context(bs), + .ret = -EINPROGRESS, + }; + + if (!s->supports_discard) { + return -ENOTSUP; + } + + assert(s->nr_queues > 1); + + buf = qemu_try_blockalign0(bs, s->page_size); + if (!buf) { + return -ENOMEM; + } + + buf->nlb = cpu_to_le32(bytes >> s->blkshift); + buf->slba = cpu_to_le64(offset >> s->blkshift); + buf->cattr = 0; + + qemu_iovec_init(&local_qiov, 1); + qemu_iovec_add(&local_qiov, buf, 4096); + + req = nvme_get_free_req(ioq); + assert(req); + + qemu_co_mutex_lock(&s->dma_map_lock); + ret = nvme_cmd_map_qiov(bs, &cmd, req, &local_qiov); + qemu_co_mutex_unlock(&s->dma_map_lock); + + if (ret) { + req->busy = false; + goto out; + } + + trace_nvme_dsm(s, offset, bytes); + + nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data); + + data.co = qemu_coroutine_self(); + while (data.ret == -EINPROGRESS) { + qemu_coroutine_yield(); + } + + qemu_co_mutex_lock(&s->dma_map_lock); + ret = nvme_cmd_unmap_qiov(bs, &local_qiov); + qemu_co_mutex_unlock(&s->dma_map_lock); + + if (ret) { + goto out; + } + + ret = data.ret; + trace_nvme_dsm_done(s, offset, bytes, ret); +out: + qemu_iovec_destroy(&local_qiov); + qemu_vfree(buf); + return ret; + +} + + static int nvme_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp) { @@ -1259,6 +1341,7 @@ static BlockDriver bdrv_nvme = { .bdrv_co_pwritev = nvme_co_pwritev, .bdrv_co_pwrite_zeroes = nvme_co_pwrite_zeroes, + .bdrv_co_pdiscard = nvme_co_pdiscard, .bdrv_co_flush_to_disk = nvme_co_flush, .bdrv_reopen_prepare = nvme_reopen_prepare, diff --git a/block/trace-events b/block/trace-events index 8209fbd0c7..7d1d48b502 100644 --- a/block/trace-events +++ b/block/trace-events @@ -153,6 +153,8 @@ nvme_write_zeros(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p offs nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x" nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset %"PRId64" bytes %"PRId64" niov %d is_write %d" nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset %"PRId64" bytes %"PRId64" ret %d" +nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset %"PRId64" bytes %"PRId64"" +nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset %"PRId64" bytes %"PRId64" ret %d" nvme_dma_map_flush(void *s) "s %p" nvme_free_req_queue_wait(void *q) "q %p" nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d"
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> --- block/nvme.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++ block/trace-events | 2 ++ 2 files changed, 85 insertions(+)