diff mbox

block: document semanatics of bdrv_co_preadv|pwritev

Message ID 20170804105059.11941-1-berrange@redhat.com
State New
Headers show

Commit Message

Daniel P. Berrangé Aug. 4, 2017, 10:50 a.m. UTC
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
---
 include/block/block_int.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

Comments

Kevin Wolf Aug. 4, 2017, 2:02 p.m. UTC | #1
Am 04.08.2017 um 12:50 hat Daniel P. Berrange geschrieben:
> Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
> ---
>  include/block/block_int.h | 29 +++++++++++++++++++++++++++++
>  1 file changed, 29 insertions(+)
> 
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index d4f4ea7584..deb81a58bd 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -147,12 +147,41 @@ struct BlockDriver {
>  
>      int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
>          int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
> +
> +    /**
> +     * @offset: position in bytes to read at
> +     * @bytes: number of bytes to read
> +     * @qiov: the buffers to fill with read data
> +     *
> +     * @offset and @bytes will be a multiple of 'request_alignment',
> +     * but the length of individual @qiov elements does not have to
> +     * be a multiple.
> +     *
> +     * @bytes may be less than the total sizeof @iov, and will be
> +     * no larger than 'max_transfer'.

Really? We are asserting that they match in bdrv_aligned_preadv():

    assert(!qiov || bytes == qiov->size);

Also, s/sizeof @iov/size of @qiov/

> +     *
> +     * The buffer in @qiov may point directly to guest memory.
> +     */
>      int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
>          uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
>      int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
>          int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
>      int coroutine_fn (*bdrv_co_writev_flags)(BlockDriverState *bs,
>          int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags);
> +    /**
> +     * @offset: position in bytes to write at
> +     * @bytes: number of bytes to write
> +     * @qiov: the buffers containing data to write
> +     *
> +     * @offset and @bytes will be a multiple of 'request_alignment',
> +     * but the length of individual @qiov elements does not have to
> +     * be a multiple.
> +     *
> +     * @bytes may be less than the total sizeof @iov, and will be
> +     * no larger than 'max_transfer'.

The same assertion exists in bdrv_aligned_pwritev() (and the same typo
in your comment).

> +     * The buffer in @qiov may point directly to guest memory.
> +     */
>      int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
>          uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);

Kevin
Daniel P. Berrangé Aug. 4, 2017, 2:06 p.m. UTC | #2
On Fri, Aug 04, 2017 at 04:02:10PM +0200, Kevin Wolf wrote:
> Am 04.08.2017 um 12:50 hat Daniel P. Berrange geschrieben:
> > Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
> > ---
> >  include/block/block_int.h | 29 +++++++++++++++++++++++++++++
> >  1 file changed, 29 insertions(+)
> > 
> > diff --git a/include/block/block_int.h b/include/block/block_int.h
> > index d4f4ea7584..deb81a58bd 100644
> > --- a/include/block/block_int.h
> > +++ b/include/block/block_int.h
> > @@ -147,12 +147,41 @@ struct BlockDriver {
> >  
> >      int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
> >          int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
> > +
> > +    /**
> > +     * @offset: position in bytes to read at
> > +     * @bytes: number of bytes to read
> > +     * @qiov: the buffers to fill with read data
> > +     *
> > +     * @offset and @bytes will be a multiple of 'request_alignment',
> > +     * but the length of individual @qiov elements does not have to
> > +     * be a multiple.
> > +     *
> > +     * @bytes may be less than the total sizeof @iov, and will be
> > +     * no larger than 'max_transfer'.
> 
> Really? We are asserting that they match in bdrv_aligned_preadv():
> 
>     assert(!qiov || bytes == qiov->size);

Hmm, why do we pass @bytes at all then ? If they're always the same,
how about deleting it and just letting everyone read qiov->size
directly.

> Also, s/sizeof @iov/size of @qiov/



> 
> > +     *
> > +     * The buffer in @qiov may point directly to guest memory.
> > +     */
> >      int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
> >          uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
> >      int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
> >          int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
> >      int coroutine_fn (*bdrv_co_writev_flags)(BlockDriverState *bs,
> >          int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags);
> > +    /**
> > +     * @offset: position in bytes to write at
> > +     * @bytes: number of bytes to write
> > +     * @qiov: the buffers containing data to write
> > +     *
> > +     * @offset and @bytes will be a multiple of 'request_alignment',
> > +     * but the length of individual @qiov elements does not have to
> > +     * be a multiple.
> > +     *
> > +     * @bytes may be less than the total sizeof @iov, and will be
> > +     * no larger than 'max_transfer'.
> 
> The same assertion exists in bdrv_aligned_pwritev() (and the same typo
> in your comment).
> 
> > +     * The buffer in @qiov may point directly to guest memory.
> > +     */
> >      int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
> >          uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
> 
> Kevin

Regards,
Daniel
Eric Blake Aug. 4, 2017, 3:41 p.m. UTC | #3
On 08/04/2017 09:06 AM, Daniel P. Berrange wrote:
> On Fri, Aug 04, 2017 at 04:02:10PM +0200, Kevin Wolf wrote:
>> Am 04.08.2017 um 12:50 hat Daniel P. Berrange geschrieben:
>>> Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
>>> ---

>> Really? We are asserting that they match in bdrv_aligned_preadv():
>>
>>     assert(!qiov || bytes == qiov->size);
> 
> Hmm, why do we pass @bytes at all then ? If they're always the same,
> how about deleting it and just letting everyone read qiov->size
> directly.

Read the assertion again: qiov can be NULL (generally, when writing
zeroes).  So we can't rely on qiov->size in that scenario.
Daniel P. Berrangé Aug. 4, 2017, 3:49 p.m. UTC | #4
On Fri, Aug 04, 2017 at 10:41:54AM -0500, Eric Blake wrote:
> On 08/04/2017 09:06 AM, Daniel P. Berrange wrote:
> > On Fri, Aug 04, 2017 at 04:02:10PM +0200, Kevin Wolf wrote:
> >> Am 04.08.2017 um 12:50 hat Daniel P. Berrange geschrieben:
> >>> Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
> >>> ---
> 
> >> Really? We are asserting that they match in bdrv_aligned_preadv():
> >>
> >>     assert(!qiov || bytes == qiov->size);
> > 
> > Hmm, why do we pass @bytes at all then ? If they're always the same,
> > how about deleting it and just letting everyone read qiov->size
> > directly.
> 
> Read the assertion again: qiov can be NULL (generally, when writing
> zeroes).  So we can't rely on qiov->size in that scenario.

This is odd.  In the bdrv_aligned_readv() it looks very much like
we'll reference qiov->niov, if bytes != 0, so if qiov was NULL we
would crash.

In bdrv_aligned_writev(), qiov->niov is also refernced if bytes != 0,
*unless*  flags contains BDRV_REQ_ZERO_WRITE, in which case we'll
invoke bdrv_co_do_pwrite_zeroes() instead.

So unless I'm missing something, bdrv_co_preadv|writev cannot be
called with a NULL  qiov, and bdrv_aligned_writev|readv might
need their assertions tightened up.

Regards,
Daniel
Fam Zheng Aug. 8, 2017, 2:39 a.m. UTC | #5
On Fri, 08/04 16:49, Daniel P. Berrange wrote:
> This is odd.  In the bdrv_aligned_readv() it looks very much like
> we'll reference qiov->niov, if bytes != 0, so if qiov was NULL we
> would crash.

It doesn't make sense if read doesn't have an iov, where should the data be
placed? :)

> 
> In bdrv_aligned_writev(), qiov->niov is also refernced if bytes != 0,
> *unless*  flags contains BDRV_REQ_ZERO_WRITE, in which case we'll
> invoke bdrv_co_do_pwrite_zeroes() instead.

This is intended. Zero-write doesn't need qiov, hence the BDRV_REQ_ZERO_WRITE
branch. Otherwise, we can assert qiov != NULL.

> 
> So unless I'm missing something, bdrv_co_preadv|writev cannot be
> called with a NULL  qiov, and bdrv_aligned_writev|readv might
> need their assertions tightened up.

bdrv_co_pwritev _is_ called with a NULL qiov from blk_aio_pwrite_zeroes. Your
other reasonings are right.

So for write we cannot remove the bytes parameter.

Fam
Daniel P. Berrangé Aug. 8, 2017, 9:13 a.m. UTC | #6
On Tue, Aug 08, 2017 at 10:39:29AM +0800, Fam Zheng wrote:
> On Fri, 08/04 16:49, Daniel P. Berrange wrote:
> > This is odd.  In the bdrv_aligned_readv() it looks very much like
> > we'll reference qiov->niov, if bytes != 0, so if qiov was NULL we
> > would crash.
> 
> It doesn't make sense if read doesn't have an iov, where should the data be
> placed? :)
> 
> > 
> > In bdrv_aligned_writev(), qiov->niov is also refernced if bytes != 0,
> > *unless*  flags contains BDRV_REQ_ZERO_WRITE, in which case we'll
> > invoke bdrv_co_do_pwrite_zeroes() instead.
> 
> This is intended. Zero-write doesn't need qiov, hence the BDRV_REQ_ZERO_WRITE
> branch. Otherwise, we can assert qiov != NULL.
> 
> > 
> > So unless I'm missing something, bdrv_co_preadv|writev cannot be
> > called with a NULL  qiov, and bdrv_aligned_writev|readv might
> > need their assertions tightened up.
> 
> bdrv_co_pwritev _is_ called with a NULL qiov from blk_aio_pwrite_zeroes. Your
> other reasonings are right.

That will have BDRV_REQ_ZERO_WRITE flag set though, so we don't end up
calling the bdrv_co_pwritev() callback function registered by the block
driver.

> So for write we cannot remove the bytes parameter.

We can't remove it from the bdrv_co_pwritev() function, but we can remove
it from bdrv_co_pwritev block driver callback AFAICT.

Regards,
Daniel
Eric Blake Aug. 8, 2017, 3:03 p.m. UTC | #7
On 08/08/2017 04:13 AM, Daniel P. Berrange wrote:
> On Tue, Aug 08, 2017 at 10:39:29AM +0800, Fam Zheng wrote:
>> On Fri, 08/04 16:49, Daniel P. Berrange wrote:
>>> This is odd.  In the bdrv_aligned_readv() it looks very much like
>>> we'll reference qiov->niov, if bytes != 0, so if qiov was NULL we
>>> would crash.
>>
>> It doesn't make sense if read doesn't have an iov, where should the data be
>> placed? :)
>>

> We can't remove it from the bdrv_co_pwritev() function, but we can remove
> it from bdrv_co_pwritev block driver callback AFAICT.

Sounds like a separate cleanup series to remove the length parameter for
both read and write (since we have write_zeroes), for the 2.11
timeframe.  You're right that the block layer does not have to have
quite the same interface as the driver callbacks.
diff mbox

Patch

diff --git a/include/block/block_int.h b/include/block/block_int.h
index d4f4ea7584..deb81a58bd 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -147,12 +147,41 @@  struct BlockDriver {
 
     int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+
+    /**
+     * @offset: position in bytes to read at
+     * @bytes: number of bytes to read
+     * @qiov: the buffers to fill with read data
+     *
+     * @offset and @bytes will be a multiple of 'request_alignment',
+     * but the length of individual @qiov elements does not have to
+     * be a multiple.
+     *
+     * @bytes may be less than the total sizeof @iov, and will be
+     * no larger than 'max_transfer'.
+     *
+     * The buffer in @qiov may point directly to guest memory.
+     */
     int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
         uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
     int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
     int coroutine_fn (*bdrv_co_writev_flags)(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags);
+    /**
+     * @offset: position in bytes to write at
+     * @bytes: number of bytes to write
+     * @qiov: the buffers containing data to write
+     *
+     * @offset and @bytes will be a multiple of 'request_alignment',
+     * but the length of individual @qiov elements does not have to
+     * be a multiple.
+     *
+     * @bytes may be less than the total sizeof @iov, and will be
+     * no larger than 'max_transfer'.
+     *
+     * The buffer in @qiov may point directly to guest memory.
+     */
     int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
         uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);