diff mbox

[1/2] iscsi: add support for bdrv_co_is_allocated()

Message ID 1371752409-16313-2-git-send-email-pl@kamp.de
State New
Headers show

Commit Message

Peter Lieven June 20, 2013, 6:20 p.m. UTC
Signed-off-by: Peter Lieven <pl@kamp.de>
---
 block/iscsi.c |   57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

Comments

Kevin Wolf June 21, 2013, 9:18 a.m. UTC | #1
Am 20.06.2013 um 20:20 hat Peter Lieven geschrieben:
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
>  block/iscsi.c |   57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 57 insertions(+)
> 
> diff --git a/block/iscsi.c b/block/iscsi.c
> index 0bbf0b1..e6b966d 100644
> --- a/block/iscsi.c
> +++ b/block/iscsi.c
> @@ -49,6 +49,7 @@ typedef struct IscsiLun {
>      uint64_t num_blocks;
>      int events;
>      QEMUTimer *nop_timer;
> +    uint8_t lbpme;
>  } IscsiLun;
>  
>  typedef struct IscsiAIOCB {
> @@ -800,6 +801,60 @@ iscsi_getlength(BlockDriverState *bs)
>      return len;
>  }
>  
> +static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
> +                                              int64_t sector_num,
> +                                              int nb_sectors, int *pnum)
> +{
> +    IscsiLun *iscsilun = bs->opaque;
> +    struct scsi_task *task = NULL;
> +    struct scsi_get_lba_status *lbas = NULL;
> +    struct scsi_lba_status_descriptor *lbasd = NULL;
> +    int ret;
> +
> +    *pnum = nb_sectors;
> +
> +    if (iscsilun->lbpme == 0) {
> +        return 1;
> +    }
> +
> +    /* in-flight requests could invalidate the lba status result */
> +    while (iscsi_process_flush(iscsilun)) {
> +        qemu_aio_wait();
> +    }

Note that you're blocking here. The preferred way would be something
involving a yield from the coroutine and a reenter as soon as all
requests are done. Maybe a CoRwLock does what you need?

> +
> +    task = iscsi_get_lba_status_sync(iscsilun->iscsi, iscsilun->lun,
> +                                     sector_qemu2lun(sector_num, iscsilun),
> +                                     8+16);

Spacing around operators (8 + 16)

> +
> +    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
> +        scsi_free_scsi_task(task);
> +        return 1;

Error cases should set *pnum = 0 and return 0.

Kevin
Peter Lieven June 21, 2013, 9:45 a.m. UTC | #2
Am 21.06.2013 11:18, schrieb Kevin Wolf:
> Am 20.06.2013 um 20:20 hat Peter Lieven geschrieben:
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>>  block/iscsi.c |   57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 57 insertions(+)
>>
>> diff --git a/block/iscsi.c b/block/iscsi.c
>> index 0bbf0b1..e6b966d 100644
>> --- a/block/iscsi.c
>> +++ b/block/iscsi.c
>> @@ -49,6 +49,7 @@ typedef struct IscsiLun {
>>      uint64_t num_blocks;
>>      int events;
>>      QEMUTimer *nop_timer;
>> +    uint8_t lbpme;
>>  } IscsiLun;
>>  
>>  typedef struct IscsiAIOCB {
>> @@ -800,6 +801,60 @@ iscsi_getlength(BlockDriverState *bs)
>>      return len;
>>  }
>>  
>> +static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
>> +                                              int64_t sector_num,
>> +                                              int nb_sectors, int *pnum)
>> +{
>> +    IscsiLun *iscsilun = bs->opaque;
>> +    struct scsi_task *task = NULL;
>> +    struct scsi_get_lba_status *lbas = NULL;
>> +    struct scsi_lba_status_descriptor *lbasd = NULL;
>> +    int ret;
>> +
>> +    *pnum = nb_sectors;
>> +
>> +    if (iscsilun->lbpme == 0) {
>> +        return 1;
>> +    }
>> +
>> +    /* in-flight requests could invalidate the lba status result */
>> +    while (iscsi_process_flush(iscsilun)) {
>> +        qemu_aio_wait();
>> +    }
> Note that you're blocking here. The preferred way would be something
> involving a yield from the coroutine and a reenter as soon as all
> requests are done. Maybe a CoRwLock does what you need?
Is there a document how to use it? Or can you help here?
>
>> +
>> +    task = iscsi_get_lba_status_sync(iscsilun->iscsi, iscsilun->lun,
>> +                                     sector_qemu2lun(sector_num, iscsilun),
>> +                                     8+16);
> Spacing around operators (8 + 16)
Thanks, the checkpatch script didn't catch this.
>
>> +
>> +    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
>> +        scsi_free_scsi_task(task);
>> +        return 1;
> Error cases should set *pnum = 0 and return 0.
In this special case, the target might not implement get_lba_status
or it might return SCSI_STATUS_BUSY. We shouldn't generate an
error or should we?

Peter
Kevin Wolf June 21, 2013, 11:07 a.m. UTC | #3
Am 21.06.2013 um 11:45 hat Peter Lieven geschrieben:
> Am 21.06.2013 11:18, schrieb Kevin Wolf:
> > Am 20.06.2013 um 20:20 hat Peter Lieven geschrieben:
> >> Signed-off-by: Peter Lieven <pl@kamp.de>
> >> ---
> >>  block/iscsi.c |   57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>  1 file changed, 57 insertions(+)
> >>
> >> diff --git a/block/iscsi.c b/block/iscsi.c
> >> index 0bbf0b1..e6b966d 100644
> >> --- a/block/iscsi.c
> >> +++ b/block/iscsi.c
> >> @@ -49,6 +49,7 @@ typedef struct IscsiLun {
> >>      uint64_t num_blocks;
> >>      int events;
> >>      QEMUTimer *nop_timer;
> >> +    uint8_t lbpme;
> >>  } IscsiLun;
> >>  
> >>  typedef struct IscsiAIOCB {
> >> @@ -800,6 +801,60 @@ iscsi_getlength(BlockDriverState *bs)
> >>      return len;
> >>  }
> >>  
> >> +static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
> >> +                                              int64_t sector_num,
> >> +                                              int nb_sectors, int *pnum)
> >> +{
> >> +    IscsiLun *iscsilun = bs->opaque;
> >> +    struct scsi_task *task = NULL;
> >> +    struct scsi_get_lba_status *lbas = NULL;
> >> +    struct scsi_lba_status_descriptor *lbasd = NULL;
> >> +    int ret;
> >> +
> >> +    *pnum = nb_sectors;
> >> +
> >> +    if (iscsilun->lbpme == 0) {
> >> +        return 1;
> >> +    }
> >> +
> >> +    /* in-flight requests could invalidate the lba status result */
> >> +    while (iscsi_process_flush(iscsilun)) {
> >> +        qemu_aio_wait();
> >> +    }
> > Note that you're blocking here. The preferred way would be something
> > involving a yield from the coroutine and a reenter as soon as all
> > requests are done. Maybe a CoRwLock does what you need?
> Is there a document how to use it? Or can you help here?

The idea would be to take a read lock while any request is in flight
(i.e. qemu_co_rwlock_rdlock() before it's started and
qemu_co_rwlock_unlock() when it completes), and to take a write lock
(qemu_co_rwlock_wrlock) for the part of iscsi_co_is_allocated() that
requires that no other request runs in parallel.

> >> +    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
> >> +        scsi_free_scsi_task(task);
> >> +        return 1;
> > Error cases should set *pnum = 0 and return 0.
> In this special case, the target might not implement get_lba_status
> or it might return SCSI_STATUS_BUSY. We shouldn't generate an
> error or should we?

If you have reasons to not return an error, do so. Maybe adding a
comment wouldn't hurt.

I just saw more than one of these return 1; lines which looked like
error handling to me. If you don't want any of them to result in an
error, that's okay with me, I just wanted to mention how you would
correctly signal an error.

Kevin
Peter Lieven June 21, 2013, 11:42 a.m. UTC | #4
Am 21.06.2013 13:07, schrieb Kevin Wolf:
> Am 21.06.2013 um 11:45 hat Peter Lieven geschrieben:
>> Am 21.06.2013 11:18, schrieb Kevin Wolf:
>>> Am 20.06.2013 um 20:20 hat Peter Lieven geschrieben:
>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>> ---
>>>>  block/iscsi.c |   57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>  1 file changed, 57 insertions(+)
>>>>
>>>> diff --git a/block/iscsi.c b/block/iscsi.c
>>>> index 0bbf0b1..e6b966d 100644
>>>> --- a/block/iscsi.c
>>>> +++ b/block/iscsi.c
>>>> @@ -49,6 +49,7 @@ typedef struct IscsiLun {
>>>>      uint64_t num_blocks;
>>>>      int events;
>>>>      QEMUTimer *nop_timer;
>>>> +    uint8_t lbpme;
>>>>  } IscsiLun;
>>>>  
>>>>  typedef struct IscsiAIOCB {
>>>> @@ -800,6 +801,60 @@ iscsi_getlength(BlockDriverState *bs)
>>>>      return len;
>>>>  }
>>>>  
>>>> +static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
>>>> +                                              int64_t sector_num,
>>>> +                                              int nb_sectors, int *pnum)
>>>> +{
>>>> +    IscsiLun *iscsilun = bs->opaque;
>>>> +    struct scsi_task *task = NULL;
>>>> +    struct scsi_get_lba_status *lbas = NULL;
>>>> +    struct scsi_lba_status_descriptor *lbasd = NULL;
>>>> +    int ret;
>>>> +
>>>> +    *pnum = nb_sectors;
>>>> +
>>>> +    if (iscsilun->lbpme == 0) {
>>>> +        return 1;
>>>> +    }
>>>> +
>>>> +    /* in-flight requests could invalidate the lba status result */
>>>> +    while (iscsi_process_flush(iscsilun)) {
>>>> +        qemu_aio_wait();
>>>> +    }
>>> Note that you're blocking here. The preferred way would be something
>>> involving a yield from the coroutine and a reenter as soon as all
>>> requests are done. Maybe a CoRwLock does what you need?
>> Is there a document how to use it? Or can you help here?
> The idea would be to take a read lock while any request is in flight
> (i.e. qemu_co_rwlock_rdlock() before it's started and
> qemu_co_rwlock_unlock() when it completes), and to take a write lock
> (qemu_co_rwlock_wrlock) for the part of iscsi_co_is_allocated() that
> requires that no other request runs in parallel.
Wouldn't this require that all the other operations in iscsi.c would
also take these lock? Currently there are only aio requests
implemented as it seems.

Would it help here to add sth like this?

while (iscsi_process_flush(iscsilun)) {
    if (qemu_in_coroutine()) {
        qemu_coroutine_yield();
    } else {
        qemu_aio_wait();
    }       
}
>>>> +    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
>>>> +        scsi_free_scsi_task(task);
>>>> +        return 1;
>>> Error cases should set *pnum = 0 and return 0.
>> In this special case, the target might not implement get_lba_status
>> or it might return SCSI_STATUS_BUSY. We shouldn't generate an
>> error or should we?
> If you have reasons to not return an error, do so. Maybe adding a
> comment wouldn't hurt.
Will do ;-)
>
> I just saw more than one of these return 1; lines which looked like
> error handling to me. If you don't want any of them to result in an
> error, that's okay with me, I just wanted to mention how you would
> correctly signal an error.
Ah okay. Thank you.

Peter
>
> Kevin
Kevin Wolf June 21, 2013, 1:14 p.m. UTC | #5
Am 21.06.2013 um 13:42 hat Peter Lieven geschrieben:
> Am 21.06.2013 13:07, schrieb Kevin Wolf:
> > Am 21.06.2013 um 11:45 hat Peter Lieven geschrieben:
> >> Am 21.06.2013 11:18, schrieb Kevin Wolf:
> >>> Am 20.06.2013 um 20:20 hat Peter Lieven geschrieben:
> >>>> Signed-off-by: Peter Lieven <pl@kamp.de>
> >>>> ---
> >>>>  block/iscsi.c |   57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>>>  1 file changed, 57 insertions(+)
> >>>>
> >>>> diff --git a/block/iscsi.c b/block/iscsi.c
> >>>> index 0bbf0b1..e6b966d 100644
> >>>> --- a/block/iscsi.c
> >>>> +++ b/block/iscsi.c
> >>>> @@ -49,6 +49,7 @@ typedef struct IscsiLun {
> >>>>      uint64_t num_blocks;
> >>>>      int events;
> >>>>      QEMUTimer *nop_timer;
> >>>> +    uint8_t lbpme;
> >>>>  } IscsiLun;
> >>>>  
> >>>>  typedef struct IscsiAIOCB {
> >>>> @@ -800,6 +801,60 @@ iscsi_getlength(BlockDriverState *bs)
> >>>>      return len;
> >>>>  }
> >>>>  
> >>>> +static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
> >>>> +                                              int64_t sector_num,
> >>>> +                                              int nb_sectors, int *pnum)
> >>>> +{
> >>>> +    IscsiLun *iscsilun = bs->opaque;
> >>>> +    struct scsi_task *task = NULL;
> >>>> +    struct scsi_get_lba_status *lbas = NULL;
> >>>> +    struct scsi_lba_status_descriptor *lbasd = NULL;
> >>>> +    int ret;
> >>>> +
> >>>> +    *pnum = nb_sectors;
> >>>> +
> >>>> +    if (iscsilun->lbpme == 0) {
> >>>> +        return 1;
> >>>> +    }
> >>>> +
> >>>> +    /* in-flight requests could invalidate the lba status result */
> >>>> +    while (iscsi_process_flush(iscsilun)) {
> >>>> +        qemu_aio_wait();
> >>>> +    }
> >>> Note that you're blocking here. The preferred way would be something
> >>> involving a yield from the coroutine and a reenter as soon as all
> >>> requests are done. Maybe a CoRwLock does what you need?
> >> Is there a document how to use it? Or can you help here?
> > The idea would be to take a read lock while any request is in flight
> > (i.e. qemu_co_rwlock_rdlock() before it's started and
> > qemu_co_rwlock_unlock() when it completes), and to take a write lock
> > (qemu_co_rwlock_wrlock) for the part of iscsi_co_is_allocated() that
> > requires that no other request runs in parallel.
> Wouldn't this require that all the other operations in iscsi.c would
> also take these lock? Currently there are only aio requests
> implemented as it seems.

Hm, okay, that makes it a bit harder because AIO callbacks aren't called
in coroutine context. So taking the lock would be easy, but releasing
them could turn out somewhat tricky.

> Would it help here to add sth like this?
> 
> while (iscsi_process_flush(iscsilun)) {
>     if (qemu_in_coroutine()) {
>         qemu_coroutine_yield();
>     } else {
>         qemu_aio_wait();
>     }       
> }

You're always in a coroutine here.

The problem is that if you yield, you need to reenter the coroutine at
some point or the is_allocated request would never complete. That's
basically what the coroutine locks do for you: They yield and only
reenter when the lock can be taken.

Kevin
Peter Lieven June 21, 2013, 1:23 p.m. UTC | #6
Am 21.06.2013 15:14, schrieb Kevin Wolf:
> Am 21.06.2013 um 13:42 hat Peter Lieven geschrieben:
>> Am 21.06.2013 13:07, schrieb Kevin Wolf:
>>> Am 21.06.2013 um 11:45 hat Peter Lieven geschrieben:
>>>> Am 21.06.2013 11:18, schrieb Kevin Wolf:
>>>>> Am 20.06.2013 um 20:20 hat Peter Lieven geschrieben:
>>>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>>>> ---
>>>>>>  block/iscsi.c |   57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>  1 file changed, 57 insertions(+)
>>>>>>
>>>>>> diff --git a/block/iscsi.c b/block/iscsi.c
>>>>>> index 0bbf0b1..e6b966d 100644
>>>>>> --- a/block/iscsi.c
>>>>>> +++ b/block/iscsi.c
>>>>>> @@ -49,6 +49,7 @@ typedef struct IscsiLun {
>>>>>>      uint64_t num_blocks;
>>>>>>      int events;
>>>>>>      QEMUTimer *nop_timer;
>>>>>> +    uint8_t lbpme;
>>>>>>  } IscsiLun;
>>>>>>  
>>>>>>  typedef struct IscsiAIOCB {
>>>>>> @@ -800,6 +801,60 @@ iscsi_getlength(BlockDriverState *bs)
>>>>>>      return len;
>>>>>>  }
>>>>>>  
>>>>>> +static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
>>>>>> +                                              int64_t sector_num,
>>>>>> +                                              int nb_sectors, int *pnum)
>>>>>> +{
>>>>>> +    IscsiLun *iscsilun = bs->opaque;
>>>>>> +    struct scsi_task *task = NULL;
>>>>>> +    struct scsi_get_lba_status *lbas = NULL;
>>>>>> +    struct scsi_lba_status_descriptor *lbasd = NULL;
>>>>>> +    int ret;
>>>>>> +
>>>>>> +    *pnum = nb_sectors;
>>>>>> +
>>>>>> +    if (iscsilun->lbpme == 0) {
>>>>>> +        return 1;
>>>>>> +    }
>>>>>> +
>>>>>> +    /* in-flight requests could invalidate the lba status result */
>>>>>> +    while (iscsi_process_flush(iscsilun)) {
>>>>>> +        qemu_aio_wait();
>>>>>> +    }
>>>>> Note that you're blocking here. The preferred way would be something
>>>>> involving a yield from the coroutine and a reenter as soon as all
>>>>> requests are done. Maybe a CoRwLock does what you need?
>>>> Is there a document how to use it? Or can you help here?
>>> The idea would be to take a read lock while any request is in flight
>>> (i.e. qemu_co_rwlock_rdlock() before it's started and
>>> qemu_co_rwlock_unlock() when it completes), and to take a write lock
>>> (qemu_co_rwlock_wrlock) for the part of iscsi_co_is_allocated() that
>>> requires that no other request runs in parallel.
>> Wouldn't this require that all the other operations in iscsi.c would
>> also take these lock? Currently there are only aio requests
>> implemented as it seems.
> Hm, okay, that makes it a bit harder because AIO callbacks aren't called
> in coroutine context. So taking the lock would be easy, but releasing
> them could turn out somewhat tricky.
>
>> Would it help here to add sth like this?
>>
>> while (iscsi_process_flush(iscsilun)) {
>>     if (qemu_in_coroutine()) {
>>         qemu_coroutine_yield();
>>     } else {
>>         qemu_aio_wait();
>>     }       
>> }
> You're always in a coroutine here.
>
> The problem is that if you yield, you need to reenter the coroutine at
> some point or the is_allocated request would never complete. That's
> basically what the coroutine locks do for you: They yield and only
> reenter when the lock can be taken.
would it ok for you to stay with qemu_aio_wait() and add a remark
that it blocks and its a TODO. i need the is_allocated support in
iscsi only to improve bdrv_has_zero_init().

Peter
ronnie sahlberg June 21, 2013, 4:06 p.m. UTC | #7
Should we really mix co-routines and AIO in the same backend?

Would it not be better to instead add a new bdrb_aio_is_allocaed and
use non-blocking async calls to libiscsi ?


On Fri, Jun 21, 2013 at 2:18 AM, Kevin Wolf <kwolf@redhat.com> wrote:
> Am 20.06.2013 um 20:20 hat Peter Lieven geschrieben:
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>>  block/iscsi.c |   57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 57 insertions(+)
>>
>> diff --git a/block/iscsi.c b/block/iscsi.c
>> index 0bbf0b1..e6b966d 100644
>> --- a/block/iscsi.c
>> +++ b/block/iscsi.c
>> @@ -49,6 +49,7 @@ typedef struct IscsiLun {
>>      uint64_t num_blocks;
>>      int events;
>>      QEMUTimer *nop_timer;
>> +    uint8_t lbpme;
>>  } IscsiLun;
>>
>>  typedef struct IscsiAIOCB {
>> @@ -800,6 +801,60 @@ iscsi_getlength(BlockDriverState *bs)
>>      return len;
>>  }
>>
>> +static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
>> +                                              int64_t sector_num,
>> +                                              int nb_sectors, int *pnum)
>> +{
>> +    IscsiLun *iscsilun = bs->opaque;
>> +    struct scsi_task *task = NULL;
>> +    struct scsi_get_lba_status *lbas = NULL;
>> +    struct scsi_lba_status_descriptor *lbasd = NULL;
>> +    int ret;
>> +
>> +    *pnum = nb_sectors;
>> +
>> +    if (iscsilun->lbpme == 0) {
>> +        return 1;
>> +    }
>> +
>> +    /* in-flight requests could invalidate the lba status result */
>> +    while (iscsi_process_flush(iscsilun)) {
>> +        qemu_aio_wait();
>> +    }
>
> Note that you're blocking here. The preferred way would be something
> involving a yield from the coroutine and a reenter as soon as all
> requests are done. Maybe a CoRwLock does what you need?
>
>> +
>> +    task = iscsi_get_lba_status_sync(iscsilun->iscsi, iscsilun->lun,
>> +                                     sector_qemu2lun(sector_num, iscsilun),
>> +                                     8+16);
>
> Spacing around operators (8 + 16)
>
>> +
>> +    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
>> +        scsi_free_scsi_task(task);
>> +        return 1;
>
> Error cases should set *pnum = 0 and return 0.
>
> Kevin
Paolo Bonzini June 21, 2013, 4:31 p.m. UTC | #8
Il 21/06/2013 13:07, Kevin Wolf ha scritto:
>>> > > Note that you're blocking here. The preferred way would be something
>>> > > involving a yield from the coroutine and a reenter as soon as all
>>> > > requests are done. Maybe a CoRwLock does what you need?
>> > Is there a document how to use it? Or can you help here?
> The idea would be to take a read lock while any request is in flight
> (i.e. qemu_co_rwlock_rdlock() before it's started and
> qemu_co_rwlock_unlock() when it completes), and to take a write lock
> (qemu_co_rwlock_wrlock) for the part of iscsi_co_is_allocated() that
> requires that no other request runs in parallel.
> 

You can just send the SCSI command asynchronously and wait for the
result.  There is an example in block/qed.c, the same would apply for iscsi.

Paolo
Peter Lieven June 21, 2013, 5:06 p.m. UTC | #9
Am 21.06.2013 18:31, schrieb Paolo Bonzini:
> Il 21/06/2013 13:07, Kevin Wolf ha scritto:
>>>>>> Note that you're blocking here. The preferred way would be something
>>>>>> involving a yield from the coroutine and a reenter as soon as all
>>>>>> requests are done. Maybe a CoRwLock does what you need?
>>>> Is there a document how to use it? Or can you help here?
>> The idea would be to take a read lock while any request is in flight
>> (i.e. qemu_co_rwlock_rdlock() before it's started and
>> qemu_co_rwlock_unlock() when it completes), and to take a write lock
>> (qemu_co_rwlock_wrlock) for the part of iscsi_co_is_allocated() that
>> requires that no other request runs in parallel.
>>
> You can just send the SCSI command asynchronously and wait for the
> result.  There is an example in block/qed.c, the same would apply for iscsi.

thanks for the pointer paolo, this was i was looking for. this here seems to work:

static void
iscsi_co_is_allocated_cb(struct iscsi_context *iscsi, int status,
                        void *command_data, void *opaque)
{
    struct IscsiTask *iTask = opaque;
    struct scsi_task *task = command_data;
    struct scsi_get_lba_status *lbas = NULL;

    iTask->complete = 1;

    if (status != 0) {
        error_report("iSCSI: Failed to get_lba_status on iSCSI lun. %s",
                     iscsi_get_error(iscsi));
        iTask->status   = 1;
        goto out;
    }
   
    lbas = scsi_datain_unmarshall(task);
    if (lbas == NULL) {
        iTask->status   = 1;
        goto out;
    }

    memcpy(&iTask->lbasd, &lbas->descriptors[0],
           sizeof(struct scsi_lba_status_descriptor));

    iTask->status   = 0;

out:
    scsi_free_scsi_task(task);
   
    if (iTask->co) {
        qemu_coroutine_enter(iTask->co, NULL);
    }
}

static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
                                              int64_t sector_num,
                                              int nb_sectors, int *pnum)
{
    IscsiLun *iscsilun = bs->opaque;
    struct IscsiTask iTask;
    int ret;

    *pnum = nb_sectors;

    if (iscsilun->lbpme == 0) {
        return 1;
    }

    iTask.iscsilun = iscsilun;
    iTask.status = 0;
    iTask.complete = 0;
    iTask.bs = bs;

    if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
                                  sector_qemu2lun(sector_num, iscsilun),
                                  8 + 16, iscsi_co_is_allocated_cb,
                                  &iTask) == NULL) {
        *pnum = 0;
        return 0;
    }

    while (!iTask.complete) {
        iscsi_set_events(iscsilun);
        iTask.co = qemu_coroutine_self();
        qemu_coroutine_yield();
    }
   
    if (iTask.status != 0) {
        /* in case the get_lba_status_callout fails (i.e.
         * because the device is busy or the cmd is not
         * supported) we pretend all blocks are allocated
         * for backwards compatiblity */
        return 1;
    }

    if (sector_qemu2lun(sector_num, iscsilun) != iTask.lbasd.lba) {
        *pnum = 0;
        return 0;
    }

    *pnum = iTask.lbasd.num_blocks * (iscsilun->block_size / BDRV_SECTOR_SIZE);
    if (*pnum > nb_sectors) {
        *pnum = nb_sectors;
    }

    return (iTask.lbasd.provisioning == SCSI_PROVISIONING_TYPE_MAPPED) ? 1 : 0;

    return ret;
}
ronnie sahlberg June 21, 2013, 5:13 p.m. UTC | #10
On Fri, Jun 21, 2013 at 10:06 AM, Peter Lieven <pl@kamp.de> wrote:
> Am 21.06.2013 18:31, schrieb Paolo Bonzini:
>> Il 21/06/2013 13:07, Kevin Wolf ha scritto:
>>>>>>> Note that you're blocking here. The preferred way would be something
>>>>>>> involving a yield from the coroutine and a reenter as soon as all
>>>>>>> requests are done. Maybe a CoRwLock does what you need?
>>>>> Is there a document how to use it? Or can you help here?
>>> The idea would be to take a read lock while any request is in flight
>>> (i.e. qemu_co_rwlock_rdlock() before it's started and
>>> qemu_co_rwlock_unlock() when it completes), and to take a write lock
>>> (qemu_co_rwlock_wrlock) for the part of iscsi_co_is_allocated() that
>>> requires that no other request runs in parallel.
>>>
>> You can just send the SCSI command asynchronously and wait for the
>> result.  There is an example in block/qed.c, the same would apply for iscsi.
>
> thanks for the pointer paolo, this was i was looking for. this here seems to work:
>
> static void
> iscsi_co_is_allocated_cb(struct iscsi_context *iscsi, int status,
>                         void *command_data, void *opaque)
> {
>     struct IscsiTask *iTask = opaque;
>     struct scsi_task *task = command_data;
>     struct scsi_get_lba_status *lbas = NULL;
>
>     iTask->complete = 1;
>
>     if (status != 0) {
>         error_report("iSCSI: Failed to get_lba_status on iSCSI lun. %s",
>                      iscsi_get_error(iscsi));
>         iTask->status   = 1;
>         goto out;
>     }
>
>     lbas = scsi_datain_unmarshall(task);
>     if (lbas == NULL) {
>         iTask->status   = 1;
>         goto out;
>     }
>
>     memcpy(&iTask->lbasd, &lbas->descriptors[0],
>            sizeof(struct scsi_lba_status_descriptor));

Only the first descriptor?
sector_num -> sector_num+nb_sectors  could be partially allocated in
which case you get multiple descriptors


>
>     iTask->status   = 0;
>
> out:
>     scsi_free_scsi_task(task);
>
>     if (iTask->co) {
>         qemu_coroutine_enter(iTask->co, NULL);
>     }
> }
>
> static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
>                                               int64_t sector_num,
>                                               int nb_sectors, int *pnum)
> {
>     IscsiLun *iscsilun = bs->opaque;
>     struct IscsiTask iTask;
>     int ret;
>
>     *pnum = nb_sectors;
>
>     if (iscsilun->lbpme == 0) {
>         return 1;
>     }
>
>     iTask.iscsilun = iscsilun;
>     iTask.status = 0;
>     iTask.complete = 0;
>     iTask.bs = bs;
>
>     if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
>                                   sector_qemu2lun(sector_num, iscsilun),
>                                   8 + 16, iscsi_co_is_allocated_cb,
>                                   &iTask) == NULL) {
>         *pnum = 0;
>         return 0;
>     }
>
>     while (!iTask.complete) {
>         iscsi_set_events(iscsilun);
>         iTask.co = qemu_coroutine_self();
>         qemu_coroutine_yield();
>     }
>
>     if (iTask.status != 0) {
>         /* in case the get_lba_status_callout fails (i.e.
>          * because the device is busy or the cmd is not
>          * supported) we pretend all blocks are allocated
>          * for backwards compatiblity */
>         return 1;
>     }
>
>     if (sector_qemu2lun(sector_num, iscsilun) != iTask.lbasd.lba) {
>         *pnum = 0;
>         return 0;
>     }
>
>     *pnum = iTask.lbasd.num_blocks * (iscsilun->block_size / BDRV_SECTOR_SIZE);
>     if (*pnum > nb_sectors) {
>         *pnum = nb_sectors;
>     }
>
>     return (iTask.lbasd.provisioning == SCSI_PROVISIONING_TYPE_MAPPED) ? 1 : 0;
>
>     return ret;
> }
Peter Lieven June 21, 2013, 5:18 p.m. UTC | #11
Am 21.06.2013 19:13, schrieb ronnie sahlberg:
> On Fri, Jun 21, 2013 at 10:06 AM, Peter Lieven <pl@kamp.de> wrote:
>> Am 21.06.2013 18:31, schrieb Paolo Bonzini:
>>> Il 21/06/2013 13:07, Kevin Wolf ha scritto:
>>>>>>>> Note that you're blocking here. The preferred way would be something
>>>>>>>> involving a yield from the coroutine and a reenter as soon as all
>>>>>>>> requests are done. Maybe a CoRwLock does what you need?
>>>>>> Is there a document how to use it? Or can you help here?
>>>> The idea would be to take a read lock while any request is in flight
>>>> (i.e. qemu_co_rwlock_rdlock() before it's started and
>>>> qemu_co_rwlock_unlock() when it completes), and to take a write lock
>>>> (qemu_co_rwlock_wrlock) for the part of iscsi_co_is_allocated() that
>>>> requires that no other request runs in parallel.
>>>>
>>> You can just send the SCSI command asynchronously and wait for the
>>> result.  There is an example in block/qed.c, the same would apply for iscsi.
>> thanks for the pointer paolo, this was i was looking for. this here seems to work:
>>
>> static void
>> iscsi_co_is_allocated_cb(struct iscsi_context *iscsi, int status,
>>                         void *command_data, void *opaque)
>> {
>>     struct IscsiTask *iTask = opaque;
>>     struct scsi_task *task = command_data;
>>     struct scsi_get_lba_status *lbas = NULL;
>>
>>     iTask->complete = 1;
>>
>>     if (status != 0) {
>>         error_report("iSCSI: Failed to get_lba_status on iSCSI lun. %s",
>>                      iscsi_get_error(iscsi));
>>         iTask->status   = 1;
>>         goto out;
>>     }
>>
>>     lbas = scsi_datain_unmarshall(task);
>>     if (lbas == NULL) {
>>         iTask->status   = 1;
>>         goto out;
>>     }
>>
>>     memcpy(&iTask->lbasd, &lbas->descriptors[0],
>>            sizeof(struct scsi_lba_status_descriptor));
> Only the first descriptor?
> sector_num -> sector_num+nb_sectors  could be partially allocated in
> which case you get multiple descriptors.
The number of sectors for which the returned provisioning state
holds true is set in *pnum. If it is only partly allocated the
return value of iscsi_co_is_allocated will be 1 or 0 and pnum
returns the number of sectors for which this is true.
>>     iTask->status   = 0;
>>
>> out:
>>     scsi_free_scsi_task(task);
>>
>>     if (iTask->co) {
>>         qemu_coroutine_enter(iTask->co, NULL);
>>     }
>> }
>>
>> static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
>>                                               int64_t sector_num,
>>                                               int nb_sectors, int *pnum)
>> {
>>     IscsiLun *iscsilun = bs->opaque;
>>     struct IscsiTask iTask;
>>     int ret;
>>
>>     *pnum = nb_sectors;
>>
>>     if (iscsilun->lbpme == 0) {
>>         return 1;
>>     }
>>
>>     iTask.iscsilun = iscsilun;
>>     iTask.status = 0;
>>     iTask.complete = 0;
>>     iTask.bs = bs;
>>
>>     if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
>>                                   sector_qemu2lun(sector_num, iscsilun),
>>                                   8 + 16, iscsi_co_is_allocated_cb,
>>                                   &iTask) == NULL) {
>>         *pnum = 0;
>>         return 0;
>>     }
>>
>>     while (!iTask.complete) {
>>         iscsi_set_events(iscsilun);
>>         iTask.co = qemu_coroutine_self();
>>         qemu_coroutine_yield();
>>     }
>>
>>     if (iTask.status != 0) {
>>         /* in case the get_lba_status_callout fails (i.e.
>>          * because the device is busy or the cmd is not
>>          * supported) we pretend all blocks are allocated
>>          * for backwards compatiblity */
>>         return 1;
>>     }
>>
>>     if (sector_qemu2lun(sector_num, iscsilun) != iTask.lbasd.lba) {
>>         *pnum = 0;
>>         return 0;
>>     }
>>
>>     *pnum = iTask.lbasd.num_blocks * (iscsilun->block_size / BDRV_SECTOR_SIZE);
>>     if (*pnum > nb_sectors) {
>>         *pnum = nb_sectors;
>>     }
>>
>>     return (iTask.lbasd.provisioning == SCSI_PROVISIONING_TYPE_MAPPED) ? 1 : 0;
>>
>>     return ret;
>> }
Paolo Bonzini June 21, 2013, 8:01 p.m. UTC | #12
Il 21/06/2013 18:06, ronnie sahlberg ha scritto:
> Should we really mix co-routines and AIO in the same backend?
> 
> Would it not be better to instead add a new bdrb_aio_is_allocaed and
> use non-blocking async calls to libiscsi ?

Certainly, but is_allocated's code is not the tidiest.

I'm going to replace the is_allocated callback with a more generic one
(Peter knows, and Kevin might have suspected it too), and I will do the
cleanups at the time I do that.

Paolo

> 
> On Fri, Jun 21, 2013 at 2:18 AM, Kevin Wolf <kwolf@redhat.com> wrote:
>> Am 20.06.2013 um 20:20 hat Peter Lieven geschrieben:
>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>> ---
>>>  block/iscsi.c |   57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 57 insertions(+)
>>>
>>> diff --git a/block/iscsi.c b/block/iscsi.c
>>> index 0bbf0b1..e6b966d 100644
>>> --- a/block/iscsi.c
>>> +++ b/block/iscsi.c
>>> @@ -49,6 +49,7 @@ typedef struct IscsiLun {
>>>      uint64_t num_blocks;
>>>      int events;
>>>      QEMUTimer *nop_timer;
>>> +    uint8_t lbpme;
>>>  } IscsiLun;
>>>
>>>  typedef struct IscsiAIOCB {
>>> @@ -800,6 +801,60 @@ iscsi_getlength(BlockDriverState *bs)
>>>      return len;
>>>  }
>>>
>>> +static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
>>> +                                              int64_t sector_num,
>>> +                                              int nb_sectors, int *pnum)
>>> +{
>>> +    IscsiLun *iscsilun = bs->opaque;
>>> +    struct scsi_task *task = NULL;
>>> +    struct scsi_get_lba_status *lbas = NULL;
>>> +    struct scsi_lba_status_descriptor *lbasd = NULL;
>>> +    int ret;
>>> +
>>> +    *pnum = nb_sectors;
>>> +
>>> +    if (iscsilun->lbpme == 0) {
>>> +        return 1;
>>> +    }
>>> +
>>> +    /* in-flight requests could invalidate the lba status result */
>>> +    while (iscsi_process_flush(iscsilun)) {
>>> +        qemu_aio_wait();
>>> +    }
>>
>> Note that you're blocking here. The preferred way would be something
>> involving a yield from the coroutine and a reenter as soon as all
>> requests are done. Maybe a CoRwLock does what you need?
>>
>>> +
>>> +    task = iscsi_get_lba_status_sync(iscsilun->iscsi, iscsilun->lun,
>>> +                                     sector_qemu2lun(sector_num, iscsilun),
>>> +                                     8+16);
>>
>> Spacing around operators (8 + 16)
>>
>>> +
>>> +    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
>>> +        scsi_free_scsi_task(task);
>>> +        return 1;
>>
>> Error cases should set *pnum = 0 and return 0.
>>
>> Kevin
> 
>
Stefan Hajnoczi June 24, 2013, 8:13 a.m. UTC | #13
On Fri, Jun 21, 2013 at 11:18:42AM +0200, Kevin Wolf wrote:
> Am 20.06.2013 um 20:20 hat Peter Lieven geschrieben:
> > @@ -800,6 +801,60 @@ iscsi_getlength(BlockDriverState *bs)
> >      return len;
> >  }
> >  
> > +static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
> > +                                              int64_t sector_num,
> > +                                              int nb_sectors, int *pnum)
> > +{
> > +    IscsiLun *iscsilun = bs->opaque;
> > +    struct scsi_task *task = NULL;
> > +    struct scsi_get_lba_status *lbas = NULL;
> > +    struct scsi_lba_status_descriptor *lbasd = NULL;
> > +    int ret;
> > +
> > +    *pnum = nb_sectors;
> > +
> > +    if (iscsilun->lbpme == 0) {
> > +        return 1;
> > +    }
> > +
> > +    /* in-flight requests could invalidate the lba status result */
> > +    while (iscsi_process_flush(iscsilun)) {
> > +        qemu_aio_wait();
> > +    }
> 
> Note that you're blocking here. The preferred way would be something
> involving a yield from the coroutine and a reenter as soon as all
> requests are done. Maybe a CoRwLock does what you need?

The other option is to avoid synchronization here and instead process
bs->tracked_requests so that any in-flight writes count as allocated.

Stefan
Paolo Bonzini June 24, 2013, 1:49 p.m. UTC | #14
Il 24/06/2013 10:13, Stefan Hajnoczi ha scritto:
> On Fri, Jun 21, 2013 at 11:18:42AM +0200, Kevin Wolf wrote:
>> Am 20.06.2013 um 20:20 hat Peter Lieven geschrieben:
>>> @@ -800,6 +801,60 @@ iscsi_getlength(BlockDriverState *bs)
>>>      return len;
>>>  }
>>>  
>>> +static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
>>> +                                              int64_t sector_num,
>>> +                                              int nb_sectors, int *pnum)
>>> +{
>>> +    IscsiLun *iscsilun = bs->opaque;
>>> +    struct scsi_task *task = NULL;
>>> +    struct scsi_get_lba_status *lbas = NULL;
>>> +    struct scsi_lba_status_descriptor *lbasd = NULL;
>>> +    int ret;
>>> +
>>> +    *pnum = nb_sectors;
>>> +
>>> +    if (iscsilun->lbpme == 0) {
>>> +        return 1;
>>> +    }
>>> +
>>> +    /* in-flight requests could invalidate the lba status result */
>>> +    while (iscsi_process_flush(iscsilun)) {
>>> +        qemu_aio_wait();
>>> +    }
>>
>> Note that you're blocking here. The preferred way would be something
>> involving a yield from the coroutine and a reenter as soon as all
>> requests are done. Maybe a CoRwLock does what you need?
> 
> The other option is to avoid synchronization here and instead process
> bs->tracked_requests so that any in-flight writes count as allocated.

I think it's a bug if the caller doesn't take into account in-flight
requests.  For example mirroring expects writes to mark sectors as
dirty, which will pick up everything that is_allocated fails to pick up.

If all else fails, you can always add a bdrv_drain_all before the query.

Hence, this check is not needed.  In fact, raw-posix does not perform it.

Paolo
Peter Lieven June 24, 2013, 4:11 p.m. UTC | #15
Am 24.06.2013 15:49, schrieb Paolo Bonzini:
> Il 24/06/2013 10:13, Stefan Hajnoczi ha scritto:
>> On Fri, Jun 21, 2013 at 11:18:42AM +0200, Kevin Wolf wrote:
>>> Am 20.06.2013 um 20:20 hat Peter Lieven geschrieben:
>>>> @@ -800,6 +801,60 @@ iscsi_getlength(BlockDriverState *bs)
>>>>      return len;
>>>>  }
>>>>  
>>>> +static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
>>>> +                                              int64_t sector_num,
>>>> +                                              int nb_sectors, int *pnum)
>>>> +{
>>>> +    IscsiLun *iscsilun = bs->opaque;
>>>> +    struct scsi_task *task = NULL;
>>>> +    struct scsi_get_lba_status *lbas = NULL;
>>>> +    struct scsi_lba_status_descriptor *lbasd = NULL;
>>>> +    int ret;
>>>> +
>>>> +    *pnum = nb_sectors;
>>>> +
>>>> +    if (iscsilun->lbpme == 0) {
>>>> +        return 1;
>>>> +    }
>>>> +
>>>> +    /* in-flight requests could invalidate the lba status result */
>>>> +    while (iscsi_process_flush(iscsilun)) {
>>>> +        qemu_aio_wait();
>>>> +    }
>>> Note that you're blocking here. The preferred way would be something
>>> involving a yield from the coroutine and a reenter as soon as all
>>> requests are done. Maybe a CoRwLock does what you need?
>> The other option is to avoid synchronization here and instead process
>> bs->tracked_requests so that any in-flight writes count as allocated.
> I think it's a bug if the caller doesn't take into account in-flight
> requests.  For example mirroring expects writes to mark sectors as
> dirty, which will pick up everything that is_allocated fails to pick up.
>
> If all else fails, you can always add a bdrv_drain_all before the query.
>
> Hence, this check is not needed.  In fact, raw-posix does not perform it.
>
> Paolo
Please ignore this patch and look at the async version in the series
which also contained the write zero optimizations.

thanks,
Peter
diff mbox

Patch

diff --git a/block/iscsi.c b/block/iscsi.c
index 0bbf0b1..e6b966d 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -49,6 +49,7 @@  typedef struct IscsiLun {
     uint64_t num_blocks;
     int events;
     QEMUTimer *nop_timer;
+    uint8_t lbpme;
 } IscsiLun;
 
 typedef struct IscsiAIOCB {
@@ -800,6 +801,60 @@  iscsi_getlength(BlockDriverState *bs)
     return len;
 }
 
+static int coroutine_fn iscsi_co_is_allocated(BlockDriverState *bs,
+                                              int64_t sector_num,
+                                              int nb_sectors, int *pnum)
+{
+    IscsiLun *iscsilun = bs->opaque;
+    struct scsi_task *task = NULL;
+    struct scsi_get_lba_status *lbas = NULL;
+    struct scsi_lba_status_descriptor *lbasd = NULL;
+    int ret;
+
+    *pnum = nb_sectors;
+
+    if (iscsilun->lbpme == 0) {
+        return 1;
+    }
+
+    /* in-flight requests could invalidate the lba status result */
+    while (iscsi_process_flush(iscsilun)) {
+        qemu_aio_wait();
+    }
+
+    task = iscsi_get_lba_status_sync(iscsilun->iscsi, iscsilun->lun,
+                                     sector_qemu2lun(sector_num, iscsilun),
+                                     8+16);
+
+    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
+        scsi_free_scsi_task(task);
+        return 1;
+    }
+
+    lbas = scsi_datain_unmarshall(task);
+    if (lbas == NULL) {
+        scsi_free_scsi_task(task);
+        return 1;
+    }
+
+    lbasd = &lbas->descriptors[0];
+
+    if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
+        return 1;
+    }
+
+    *pnum = lbasd->num_blocks * (iscsilun->block_size / BDRV_SECTOR_SIZE);
+    if (*pnum > nb_sectors) {
+        *pnum = nb_sectors;
+    }
+
+    ret = (lbasd->provisioning == SCSI_PROVISIONING_TYPE_MAPPED) ? 1 : 0;
+
+    scsi_free_scsi_task(task);
+
+    return ret;
+}
+
 static int parse_chap(struct iscsi_context *iscsi, const char *target)
 {
     QemuOptsList *list;
@@ -948,6 +1003,7 @@  static int iscsi_readcapacity_sync(IscsiLun *iscsilun)
                 } else {
                     iscsilun->block_size = rc16->block_length;
                     iscsilun->num_blocks = rc16->returned_lba + 1;
+                    iscsilun->lbpme = rc16->lbpme;
                 }
             }
             break;
@@ -1274,6 +1330,7 @@  static BlockDriver bdrv_iscsi = {
 
     .bdrv_aio_discard = iscsi_aio_discard,
     .bdrv_has_zero_init = iscsi_has_zero_init,
+    .bdrv_co_is_allocated = iscsi_co_is_allocated,
 
 #ifdef __linux__
     .bdrv_ioctl       = iscsi_ioctl,