diff mbox

[1/5] ide/atapi: make PIO read requests async

Message ID 56138A76.1030804@kamp.de
State New
Headers show

Commit Message

Peter Lieven Oct. 6, 2015, 8:46 a.m. UTC
Am 05.10.2015 um 23:15 schrieb John Snow:
>
> On 09/21/2015 08:25 AM, Peter Lieven wrote:
>> PIO read requests on the ATAPI interface used to be sync blk requests.
>> This has to siginificant drawbacks. First the main loop hangs util an
>> I/O request is completed and secondly if the I/O request does not
>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
>>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>>   hw/ide/atapi.c | 69 ++++++++++++++++++++++++++++++++++++----------------------
>>   1 file changed, 43 insertions(+), 26 deletions(-)
>>
>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>> index 747f466..9257e1c 100644
>> --- a/hw/ide/atapi.c
>> +++ b/hw/ide/atapi.c
>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>>       memset(buf, 0, 288);
>>   }
>>   
>> -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
>> +static void cd_read_sector_cb(void *opaque, int ret)
>>   {
>> -    int ret;
>> +    IDEState *s = opaque;
>>   
>> -    switch(sector_size) {
>> -    case 2048:
>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>> -        break;
>> -    case 2352:
>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>> -        if (ret < 0)
>> -            return ret;
>> -        cd_data_to_raw(buf, lba);
>> -        break;
>> -    default:
>> -        ret = -EIO;
>> -        break;
>> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
>> +
>> +    if (ret < 0) {
>> +        ide_atapi_io_error(s, ret);
>> +        return;
>> +    }
>> +
>> +    if (s->cd_sector_size == 2352) {
>> +        cd_data_to_raw(s->io_buffer, s->lba);
>>       }
>> -    return ret;
>> +
>> +    s->lba++;
>> +    s->io_buffer_index = 0;
>> +    s->status &= ~BUSY_STAT;
>> +
>> +    ide_atapi_cmd_reply_end(s);
>> +}
>> +
>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
>> +{
>> +    if (sector_size != 2048 && sector_size != 2352) {
>> +        return -EINVAL;
>> +    }
>> +
>> +    s->iov.iov_base = buf;
>> +    if (sector_size == 2352) {
>> +        buf += 4;
>> +    }
>> +
>> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>> +
>> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>> +                      cd_read_sector_cb, s) == NULL) {
>> +        return -EIO;
>> +    }
>> +
>> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
>> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>> +    s->status |= BUSY_STAT;
>> +    return 0;
>>   }
>>   
> We discussed this off-list a bit, but for upstream synchronization:
>
> Unfortunately, I believe making cd_read_sector here non-blocking makes
> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
> s->end_transfer_func() nonblocking, which functions like ide_data_readw
> are not prepared to cope with.
>
> My suggestion is to buffer an entire DRQ block of data at once
> (byte_count_limit) to avoid the problem.

Hi John,

first of all thank you for the detailed analysis.

Is the following what you have i mind. For PIO reads > 1 sector
it is a great improvement for the NFS backend:



Did you also have a look at the other patches?

Thanks,
Peter

Comments

Peter Lieven Oct. 6, 2015, 12:08 p.m. UTC | #1
Am 06.10.2015 um 10:46 schrieb Peter Lieven:
> Am 05.10.2015 um 23:15 schrieb John Snow:
>>
>> On 09/21/2015 08:25 AM, Peter Lieven wrote:
>>> PIO read requests on the ATAPI interface used to be sync blk requests.
>>> This has to siginificant drawbacks. First the main loop hangs util an
>>> I/O request is completed and secondly if the I/O request does not
>>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
>>>
>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>> ---
>>>   hw/ide/atapi.c | 69 ++++++++++++++++++++++++++++++++++++----------------------
>>>   1 file changed, 43 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>>> index 747f466..9257e1c 100644
>>> --- a/hw/ide/atapi.c
>>> +++ b/hw/ide/atapi.c
>>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>>>       memset(buf, 0, 288);
>>>   }
>>>   -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
>>> +static void cd_read_sector_cb(void *opaque, int ret)
>>>   {
>>> -    int ret;
>>> +    IDEState *s = opaque;
>>>   -    switch(sector_size) {
>>> -    case 2048:
>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> -        break;
>>> -    case 2352:
>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> -        if (ret < 0)
>>> -            return ret;
>>> -        cd_data_to_raw(buf, lba);
>>> -        break;
>>> -    default:
>>> -        ret = -EIO;
>>> -        break;
>>> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> +
>>> +    if (ret < 0) {
>>> +        ide_atapi_io_error(s, ret);
>>> +        return;
>>> +    }
>>> +
>>> +    if (s->cd_sector_size == 2352) {
>>> +        cd_data_to_raw(s->io_buffer, s->lba);
>>>       }
>>> -    return ret;
>>> +
>>> +    s->lba++;
>>> +    s->io_buffer_index = 0;
>>> +    s->status &= ~BUSY_STAT;
>>> +
>>> +    ide_atapi_cmd_reply_end(s);
>>> +}
>>> +
>>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
>>> +{
>>> +    if (sector_size != 2048 && sector_size != 2352) {
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    s->iov.iov_base = buf;
>>> +    if (sector_size == 2352) {
>>> +        buf += 4;
>>> +    }
>>> +
>>> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>>> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>>> +
>>> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>>> +                      cd_read_sector_cb, s) == NULL) {
>>> +        return -EIO;
>>> +    }
>>> +
>>> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> +    s->status |= BUSY_STAT;
>>> +    return 0;
>>>   }
>> We discussed this off-list a bit, but for upstream synchronization:
>>
>> Unfortunately, I believe making cd_read_sector here non-blocking makes
>> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
>> s->end_transfer_func() nonblocking, which functions like ide_data_readw
>> are not prepared to cope with.
>>
>> My suggestion is to buffer an entire DRQ block of data at once
>> (byte_count_limit) to avoid the problem.
>
> Hi John,
>
> first of all thank you for the detailed analysis.
>
> Is the following what you have i mind. For PIO reads > 1 sector
> it is a great improvement for the NFS backend:
>
> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
> index ab45495..ec2ba89 100644
> --- a/hw/ide/atapi.c
> +++ b/hw/ide/atapi.c
> @@ -117,37 +117,40 @@ static void cd_read_sector_cb(void *opaque, int ret)
>      }
>
>      if (s->cd_sector_size == 2352) {
> -        cd_data_to_raw(s->io_buffer, s->lba);
> +        int nb_sectors = s->packet_transfer_size / 2352;
> +        while (nb_sectors--) {
> +            memmove(s->io_buffer + nb_sectors * 2352 + 4,
> +                    s->io_buffer + nb_sectors * 2048, 2048);
> +            cd_data_to_raw(s->io_buffer + nb_sectors * 2352,
> +                           s->lba + nb_sectors);
> +        }
>      }
>
> -    s->lba++;
> +    s->lba = -1;
>      s->io_buffer_index = 0;
>      s->status &= ~BUSY_STAT;
>
>      ide_atapi_cmd_reply_end(s);
>  }
>
> -static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
> +static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size, int nb_sectors)
>  {
>      if (sector_size != 2048 && sector_size != 2352) {
>          return -EINVAL;
>      }
>
>      s->iov.iov_base = buf;
> -    if (sector_size == 2352) {
> -        buf += 4;
> -    }
> -
> -    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
> +    s->iov.iov_len = nb_sectors * 2048;
>      qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>
> -    if (ide_readv_cancelable(s, (int64_t)lba << 2, &s->qiov, 4,
> -                      cd_read_sector_cb, s) == NULL) {
> +    if (ide_readv_cancelable(s, (int64_t)lba << 2,
> +                             &s->qiov, nb_sectors * 4,
> +                             cd_read_sector_cb, s) == NULL) {
>          return -EIO;
>      }
>
>      block_acct_start(blk_get_stats(s->blk), &s->acct,
> -                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> +                     nb_sectors * 2048, BLOCK_ACCT_READ);
>      s->status |= BUSY_STAT;
>      return 0;
>  }
> @@ -190,7 +193,7 @@ void ide_atapi_io_error(IDEState *s, int ret)
>  /* The whole ATAPI transfer logic is handled in this function */
>  void ide_atapi_cmd_reply_end(IDEState *s)
>  {
> -    int byte_count_limit, size, ret;
> +    int byte_count_limit, size;
>  #ifdef DEBUG_IDE_ATAPI
>      printf("reply: tx_size=%d elem_tx_size=%d index=%d\n",
>             s->packet_transfer_size,
> @@ -205,14 +208,6 @@ void ide_atapi_cmd_reply_end(IDEState *s)
>          printf("status=0x%x\n", s->status);
>  #endif
>      } else {
> -        /* see if a new sector must be read */
> -        if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) {
> -            ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size);
> -            if (ret < 0) {
> -                ide_atapi_io_error(s, ret);
> -            }
> -            return;
> -        }
>          if (s->elementary_transfer_size > 0) {
>              /* there are some data left to transmit in this elementary
>                 transfer */
> @@ -287,13 +282,18 @@ static void ide_atapi_cmd_reply(IDEState *s, int size, int max_size)
>  static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors,
>                                     int sector_size)
>  {
> +    int ret;
>      s->lba = lba;
>      s->packet_transfer_size = nb_sectors * sector_size;
> +    assert(s->packet_transfer_size <=
> +           IDE_DMA_BUF_SECTORS * BDRV_SECTOR_SIZE + 4);
>      s->elementary_transfer_size = 0;
> -    s->io_buffer_index = sector_size;
>      s->cd_sector_size = sector_size;
> -
> -    ide_atapi_cmd_reply_end(s);
> +    ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size,
> +                         nb_sectors);
> +    if (ret < 0) {
> +        ide_atapi_io_error(s, ret);
> +    }
>  }
>
>  static void ide_atapi_cmd_check_status(IDEState *s)
>
>

Forgot to mention with this patch I pass tests/ide-test now.

Peter
John Snow Oct. 7, 2015, 4:42 p.m. UTC | #2
On 10/06/2015 04:46 AM, Peter Lieven wrote:
> Am 05.10.2015 um 23:15 schrieb John Snow:
>>
>> On 09/21/2015 08:25 AM, Peter Lieven wrote:
>>> PIO read requests on the ATAPI interface used to be sync blk requests.
>>> This has to siginificant drawbacks. First the main loop hangs util an
>>> I/O request is completed and secondly if the I/O request does not
>>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
>>>
>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>> ---
>>>   hw/ide/atapi.c | 69
>>> ++++++++++++++++++++++++++++++++++++----------------------
>>>   1 file changed, 43 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>>> index 747f466..9257e1c 100644
>>> --- a/hw/ide/atapi.c
>>> +++ b/hw/ide/atapi.c
>>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>>>       memset(buf, 0, 288);
>>>   }
>>>   -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int
>>> sector_size)
>>> +static void cd_read_sector_cb(void *opaque, int ret)
>>>   {
>>> -    int ret;
>>> +    IDEState *s = opaque;
>>>   -    switch(sector_size) {
>>> -    case 2048:
>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> -        break;
>>> -    case 2352:
>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> -        if (ret < 0)
>>> -            return ret;
>>> -        cd_data_to_raw(buf, lba);
>>> -        break;
>>> -    default:
>>> -        ret = -EIO;
>>> -        break;
>>> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> +
>>> +    if (ret < 0) {
>>> +        ide_atapi_io_error(s, ret);
>>> +        return;
>>> +    }
>>> +
>>> +    if (s->cd_sector_size == 2352) {
>>> +        cd_data_to_raw(s->io_buffer, s->lba);
>>>       }
>>> -    return ret;
>>> +
>>> +    s->lba++;
>>> +    s->io_buffer_index = 0;
>>> +    s->status &= ~BUSY_STAT;
>>> +
>>> +    ide_atapi_cmd_reply_end(s);
>>> +}
>>> +
>>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int
>>> sector_size)
>>> +{
>>> +    if (sector_size != 2048 && sector_size != 2352) {
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    s->iov.iov_base = buf;
>>> +    if (sector_size == 2352) {
>>> +        buf += 4;
>>> +    }
>>> +
>>> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>>> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>>> +
>>> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>>> +                      cd_read_sector_cb, s) == NULL) {
>>> +        return -EIO;
>>> +    }
>>> +
>>> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> +    s->status |= BUSY_STAT;
>>> +    return 0;
>>>   }
>>>   
>> We discussed this off-list a bit, but for upstream synchronization:
>>
>> Unfortunately, I believe making cd_read_sector here non-blocking makes
>> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
>> s->end_transfer_func() nonblocking, which functions like ide_data_readw
>> are not prepared to cope with.
>>
>> My suggestion is to buffer an entire DRQ block of data at once
>> (byte_count_limit) to avoid the problem.
> 
> Hi John,
> 
> first of all thank you for the detailed analysis.
> 
> Is the following what you have i mind. For PIO reads > 1 sector
> it is a great improvement for the NFS backend:
> 
> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
> index ab45495..ec2ba89 100644
> --- a/hw/ide/atapi.c
> +++ b/hw/ide/atapi.c
> @@ -117,37 +117,40 @@ static void cd_read_sector_cb(void *opaque, int ret)
>      }
> 
>      if (s->cd_sector_size == 2352) {
> -        cd_data_to_raw(s->io_buffer, s->lba);
> +        int nb_sectors = s->packet_transfer_size / 2352;
> +        while (nb_sectors--) {
> +            memmove(s->io_buffer + nb_sectors * 2352 + 4,
> +                    s->io_buffer + nb_sectors * 2048, 2048);
> +            cd_data_to_raw(s->io_buffer + nb_sectors * 2352,
> +                           s->lba + nb_sectors);
> +        }
>      }

Is this going to be correct in cases where the number of sectors we are
copying is less than the total request size? We might need to bookmark
how many sectors/bytes we're copying this go-around. Perhaps by looking
at lcyl/hcyl.

> 
> -    s->lba++;
> +    s->lba = -1;
>      s->io_buffer_index = 0;
>      s->status &= ~BUSY_STAT;
> 
>      ide_atapi_cmd_reply_end(s);
>  }
> 

Well, I might not name it cd_read_sector and cd_read_sector_cb anymore.
Perhaps cd_read_sectors[_cb].

> -static int cd_read_sector(IDEState *s, int lba, void *buf, int
> sector_size)
> +static int cd_read_sector(IDEState *s, int lba, void *buf, int
> sector_size, int nb_sectors)
>  {
>      if (sector_size != 2048 && sector_size != 2352) {
>          return -EINVAL;
>      }
> 
>      s->iov.iov_base = buf;
> -    if (sector_size == 2352) {
> -        buf += 4;
> -    }
> -
> -    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
> +    s->iov.iov_len = nb_sectors * 2048;
>      qemu_iovec_init_external(&s->qiov, &s->iov, 1);
> 
> -    if (ide_readv_cancelable(s, (int64_t)lba << 2, &s->qiov, 4,
> -                      cd_read_sector_cb, s) == NULL) {
> +    if (ide_readv_cancelable(s, (int64_t)lba << 2,
> +                             &s->qiov, nb_sectors * 4,
> +                             cd_read_sector_cb, s) == NULL) {
>          return -EIO;
>      }
> 
>      block_acct_start(blk_get_stats(s->blk), &s->acct,
> -                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> +                     nb_sectors * 2048, BLOCK_ACCT_READ);
>      s->status |= BUSY_STAT;
>      return 0;
>  }
> @@ -190,7 +193,7 @@ void ide_atapi_io_error(IDEState *s, int ret)
>  /* The whole ATAPI transfer logic is handled in this function */
>  void ide_atapi_cmd_reply_end(IDEState *s)
>  {
> -    int byte_count_limit, size, ret;
> +    int byte_count_limit, size;
>  #ifdef DEBUG_IDE_ATAPI
>      printf("reply: tx_size=%d elem_tx_size=%d index=%d\n",
>             s->packet_transfer_size,
> @@ -205,14 +208,6 @@ void ide_atapi_cmd_reply_end(IDEState *s)
>          printf("status=0x%x\n", s->status);
>  #endif
>      } else {
> -        /* see if a new sector must be read */
> -        if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) {
> -            ret = cd_read_sector(s, s->lba, s->io_buffer,
> s->cd_sector_size);
> -            if (ret < 0) {
> -                ide_atapi_io_error(s, ret);
> -            }
> -            return;
> -        }
>          if (s->elementary_transfer_size > 0) {
>              /* there are some data left to transmit in this elementary
>                 transfer */
> @@ -287,13 +282,18 @@ static void ide_atapi_cmd_reply(IDEState *s, int
> size, int max_size)
>  static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors,
>                                     int sector_size)
>  {
> +    int ret;
>      s->lba = lba;
>      s->packet_transfer_size = nb_sectors * sector_size;
> +    assert(s->packet_transfer_size <=
> +           IDE_DMA_BUF_SECTORS * BDRV_SECTOR_SIZE + 4);
>      s->elementary_transfer_size = 0;
> -    s->io_buffer_index = sector_size;
>      s->cd_sector_size = sector_size;
> -
> -    ide_atapi_cmd_reply_end(s);
> +    ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size,
> +                         nb_sectors);
> +    if (ret < 0) {
> +        ide_atapi_io_error(s, ret);
> +    }
>  }
> 
>  static void ide_atapi_cmd_check_status(IDEState *s)
> 
> 
> Did you also have a look at the other patches?
> 
> Thanks,
> Peter

On my queue; hopefully Stefan can take a peek too, but I'll try to
review the IDE-specific bits. I imagine you want to wait to respin until
we've looked at all the patches, that's fine -- I'll try not to keep you
waiting for much longer.

--js
Peter Lieven Oct. 7, 2015, 6:53 p.m. UTC | #3
Am 07.10.2015 um 18:42 schrieb John Snow:
>
> On 10/06/2015 04:46 AM, Peter Lieven wrote:
>> Am 05.10.2015 um 23:15 schrieb John Snow:
>>> On 09/21/2015 08:25 AM, Peter Lieven wrote:
>>>> PIO read requests on the ATAPI interface used to be sync blk requests.
>>>> This has to siginificant drawbacks. First the main loop hangs util an
>>>> I/O request is completed and secondly if the I/O request does not
>>>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
>>>>
>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>> ---
>>>>   hw/ide/atapi.c | 69
>>>> ++++++++++++++++++++++++++++++++++++----------------------
>>>>   1 file changed, 43 insertions(+), 26 deletions(-)
>>>>
>>>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>>>> index 747f466..9257e1c 100644
>>>> --- a/hw/ide/atapi.c
>>>> +++ b/hw/ide/atapi.c
>>>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>>>>       memset(buf, 0, 288);
>>>>   }
>>>>   -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int
>>>> sector_size)
>>>> +static void cd_read_sector_cb(void *opaque, int ret)
>>>>   {
>>>> -    int ret;
>>>> +    IDEState *s = opaque;
>>>>   -    switch(sector_size) {
>>>> -    case 2048:
>>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
>>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>> -        break;
>>>> -    case 2352:
>>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
>>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>> -        if (ret < 0)
>>>> -            return ret;
>>>> -        cd_data_to_raw(buf, lba);
>>>> -        break;
>>>> -    default:
>>>> -        ret = -EIO;
>>>> -        break;
>>>> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
>>>> +
>>>> +    if (ret < 0) {
>>>> +        ide_atapi_io_error(s, ret);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    if (s->cd_sector_size == 2352) {
>>>> +        cd_data_to_raw(s->io_buffer, s->lba);
>>>>       }
>>>> -    return ret;
>>>> +
>>>> +    s->lba++;
>>>> +    s->io_buffer_index = 0;
>>>> +    s->status &= ~BUSY_STAT;
>>>> +
>>>> +    ide_atapi_cmd_reply_end(s);
>>>> +}
>>>> +
>>>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int
>>>> sector_size)
>>>> +{
>>>> +    if (sector_size != 2048 && sector_size != 2352) {
>>>> +        return -EINVAL;
>>>> +    }
>>>> +
>>>> +    s->iov.iov_base = buf;
>>>> +    if (sector_size == 2352) {
>>>> +        buf += 4;
>>>> +    }
>>>> +
>>>> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>>>> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>>>> +
>>>> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>>>> +                      cd_read_sector_cb, s) == NULL) {
>>>> +        return -EIO;
>>>> +    }
>>>> +
>>>> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
>>>> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>>> +    s->status |= BUSY_STAT;
>>>> +    return 0;
>>>>   }
>>>>   
>>> We discussed this off-list a bit, but for upstream synchronization:
>>>
>>> Unfortunately, I believe making cd_read_sector here non-blocking makes
>>> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
>>> s->end_transfer_func() nonblocking, which functions like ide_data_readw
>>> are not prepared to cope with.
>>>
>>> My suggestion is to buffer an entire DRQ block of data at once
>>> (byte_count_limit) to avoid the problem.
>> Hi John,
>>
>> first of all thank you for the detailed analysis.
>>
>> Is the following what you have i mind. For PIO reads > 1 sector
>> it is a great improvement for the NFS backend:
>>
>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>> index ab45495..ec2ba89 100644
>> --- a/hw/ide/atapi.c
>> +++ b/hw/ide/atapi.c
>> @@ -117,37 +117,40 @@ static void cd_read_sector_cb(void *opaque, int ret)
>>      }
>>
>>      if (s->cd_sector_size == 2352) {
>> -        cd_data_to_raw(s->io_buffer, s->lba);
>> +        int nb_sectors = s->packet_transfer_size / 2352;
>> +        while (nb_sectors--) {
>> +            memmove(s->io_buffer + nb_sectors * 2352 + 4,
>> +                    s->io_buffer + nb_sectors * 2048, 2048);
>> +            cd_data_to_raw(s->io_buffer + nb_sectors * 2352,
>> +                           s->lba + nb_sectors);
>> +        }
>>      }
> Is this going to be correct in cases where the number of sectors we are
> copying is less than the total request size? We might need to bookmark
> how many sectors/bytes we're copying this go-around. Perhaps by looking
> at lcyl/hcyl.

It needs some adjustments, like the whole copying logic. We need
a read and a write offset.

And, of course, it should read +16 and not +4 in the memmove
line as Kevin pointed out.

My current plan is to rebuffer if not all data has been read from
the block layer and we have less than 0xfffe unsend bytes in the buffer.

I would then move the unsend bytes to the front of the io_buffer and
append more data.

In any case it would be good to have a test for such a big transfer
in ide_test. With byte limits that devide the sector size and also not.

>
>> -    s->lba++;
>> +    s->lba = -1;
>>      s->io_buffer_index = 0;
>>      s->status &= ~BUSY_STAT;
>>
>>      ide_atapi_cmd_reply_end(s);
>>  }
>>
> Well, I might not name it cd_read_sector and cd_read_sector_cb anymore.
> Perhaps cd_read_sectors[_cb].

Sure, we could also add a pio_ präfix. Since DMA uses its
own functions.

>
>> -static int cd_read_sector(IDEState *s, int lba, void *buf, int
>> sector_size)
>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int
>> sector_size, int nb_sectors)
>>  {
>>      if (sector_size != 2048 && sector_size != 2352) {
>>          return -EINVAL;
>>      }
>>
>>      s->iov.iov_base = buf;
>> -    if (sector_size == 2352) {
>> -        buf += 4;
>> -    }
>> -
>> -    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>> +    s->iov.iov_len = nb_sectors * 2048;
>>      qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>>
>> -    if (ide_readv_cancelable(s, (int64_t)lba << 2, &s->qiov, 4,
>> -                      cd_read_sector_cb, s) == NULL) {
>> +    if (ide_readv_cancelable(s, (int64_t)lba << 2,
>> +                             &s->qiov, nb_sectors * 4,
>> +                             cd_read_sector_cb, s) == NULL) {
>>          return -EIO;
>>      }
>>
>>      block_acct_start(blk_get_stats(s->blk), &s->acct,
>> -                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>> +                     nb_sectors * 2048, BLOCK_ACCT_READ);
>>      s->status |= BUSY_STAT;
>>      return 0;
>>  }
>> @@ -190,7 +193,7 @@ void ide_atapi_io_error(IDEState *s, int ret)
>>  /* The whole ATAPI transfer logic is handled in this function */
>>  void ide_atapi_cmd_reply_end(IDEState *s)
>>  {
>> -    int byte_count_limit, size, ret;
>> +    int byte_count_limit, size;
>>  #ifdef DEBUG_IDE_ATAPI
>>      printf("reply: tx_size=%d elem_tx_size=%d index=%d\n",
>>             s->packet_transfer_size,
>> @@ -205,14 +208,6 @@ void ide_atapi_cmd_reply_end(IDEState *s)
>>          printf("status=0x%x\n", s->status);
>>  #endif
>>      } else {
>> -        /* see if a new sector must be read */
>> -        if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) {
>> -            ret = cd_read_sector(s, s->lba, s->io_buffer,
>> s->cd_sector_size);
>> -            if (ret < 0) {
>> -                ide_atapi_io_error(s, ret);
>> -            }
>> -            return;
>> -        }
>>          if (s->elementary_transfer_size > 0) {
>>              /* there are some data left to transmit in this elementary
>>                 transfer */
>> @@ -287,13 +282,18 @@ static void ide_atapi_cmd_reply(IDEState *s, int
>> size, int max_size)
>>  static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors,
>>                                     int sector_size)
>>  {
>> +    int ret;
>>      s->lba = lba;
>>      s->packet_transfer_size = nb_sectors * sector_size;
>> +    assert(s->packet_transfer_size <=
>> +           IDE_DMA_BUF_SECTORS * BDRV_SECTOR_SIZE + 4);
>>      s->elementary_transfer_size = 0;
>> -    s->io_buffer_index = sector_size;
>>      s->cd_sector_size = sector_size;
>> -
>> -    ide_atapi_cmd_reply_end(s);
>> +    ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size,
>> +                         nb_sectors);
>> +    if (ret < 0) {
>> +        ide_atapi_io_error(s, ret);
>> +    }
>>  }
>>
>>  static void ide_atapi_cmd_check_status(IDEState *s)
>>
>>
>> Did you also have a look at the other patches?
>>
>> Thanks,
>> Peter
> On my queue; hopefully Stefan can take a peek too, but I'll try to
> review the IDE-specific bits. I imagine you want to wait to respin until
> we've looked at all the patches, that's fine -- I'll try not to keep you
> waiting for much longer.

Thanks,
Peter
Peter Lieven Oct. 8, 2015, 12:06 p.m. UTC | #4
Hi all,

short summary from my side. The whole thing seems to get complicated, let me explain why:

1) During review I found that the code in ide_atapi_cmd_reply_end can't work correctly if the
byte_count_limit is not a divider or a multiple of cd_sector_size. The reason is that as soon
as we load the next sector we start at io_buffer offset 0 overwriting whatever is left in there
for transfer. We also reset the io_buffer_index to 0 which means if we continue with the
elementary transfer we always transfer a whole sector (of corrupt data) regardless if we
are allowed to transfer that much data. Before we consider fixing this I wonder if it
is legal at all to have an unaligned byte_count_limit. It obviously has never caused trouble in
practice so maybe its not happening in real life.

2) I found that whatever cool optimization I put in to buffer multiple sectors at once I end
up with code that breaks migration because older versions would either not fill the io_buffer
as expected or we introduce variables that older versions do not understand. This will
lead to problems if we migrate in the middle of a transfer.

3) My current plan to get this patch to a useful state would be to use my initial patch and just
change the code to use a sync request if we need to buffer additional sectors in an elementary
transfer. I found that in real world operating systems the byte_count_limit seems to be equal to
the cd_sector_size. After all its just a PIO transfer an operating system will likely switch to DMA
as soon as the kernel ist loaded.

Thanks,
Peter
John Snow Oct. 8, 2015, 4:44 p.m. UTC | #5
On 10/08/2015 08:06 AM, Peter Lieven wrote:
> Hi all,
> 
> short summary from my side. The whole thing seems to get complicated,
> let me explain why:
> 
> 1) During review I found that the code in ide_atapi_cmd_reply_end can't
> work correctly if the
> byte_count_limit is not a divider or a multiple of cd_sector_size. The
> reason is that as soon
> as we load the next sector we start at io_buffer offset 0 overwriting
> whatever is left in there
> for transfer. We also reset the io_buffer_index to 0 which means if we
> continue with the
> elementary transfer we always transfer a whole sector (of corrupt data)
> regardless if we
> are allowed to transfer that much data. Before we consider fixing this I
> wonder if it
> is legal at all to have an unaligned byte_count_limit. It obviously has
> never caused trouble in
> practice so maybe its not happening in real life.
> 

I had overlooked that part. Good catch. I do suspect that in practice
nobody will be asking for bizarre values.

There's no rule against an unaligned byte_count_limit as far as I have
read, but suspect nobody would have a reason to use it in practice.

> 2) I found that whatever cool optimization I put in to buffer multiple
> sectors at once I end
> up with code that breaks migration because older versions would either
> not fill the io_buffer
> as expected or we introduce variables that older versions do not
> understand. This will
> lead to problems if we migrate in the middle of a transfer.
> 

Ech. This sounds like a bit of a problem. I'll need to think about this
one...

> 3) My current plan to get this patch to a useful state would be to use
> my initial patch and just
> change the code to use a sync request if we need to buffer additional
> sectors in an elementary
> transfer. I found that in real world operating systems the
> byte_count_limit seems to be equal to
> the cd_sector_size. After all its just a PIO transfer an operating
> system will likely switch to DMA
> as soon as the kernel ist loaded.
> 
> Thanks,
> Peter
> 

It sounds like that might be "good enough" for now, and won't make
behavior *worse* than it currently is. You can adjust the test I had
checked in to not use a "tricky" value and we can amend support for this
later if desired.
Kevin Wolf Oct. 9, 2015, 8:21 a.m. UTC | #6
Am 08.10.2015 um 18:44 hat John Snow geschrieben:
> On 10/08/2015 08:06 AM, Peter Lieven wrote:
> > Hi all,
> > 
> > short summary from my side. The whole thing seems to get complicated,
> > let me explain why:
> > 
> > 1) During review I found that the code in ide_atapi_cmd_reply_end can't
> > work correctly if the
> > byte_count_limit is not a divider or a multiple of cd_sector_size. The
> > reason is that as soon
> > as we load the next sector we start at io_buffer offset 0 overwriting
> > whatever is left in there
> > for transfer. We also reset the io_buffer_index to 0 which means if we
> > continue with the
> > elementary transfer we always transfer a whole sector (of corrupt data)
> > regardless if we
> > are allowed to transfer that much data. Before we consider fixing this I
> > wonder if it
> > is legal at all to have an unaligned byte_count_limit. It obviously has
> > never caused trouble in
> > practice so maybe its not happening in real life.
> > 
> 
> I had overlooked that part. Good catch. I do suspect that in practice
> nobody will be asking for bizarre values.
> 
> There's no rule against an unaligned byte_count_limit as far as I have
> read, but suspect nobody would have a reason to use it in practice.

If we know that our code is technically wrong, "nobody uses it" is not a
good reason not to fix it. Because sooner or later someone will use it.

> > 2) I found that whatever cool optimization I put in to buffer multiple
> > sectors at once I end
> > up with code that breaks migration because older versions would either
> > not fill the io_buffer
> > as expected or we introduce variables that older versions do not
> > understand. This will
> > lead to problems if we migrate in the middle of a transfer.
> > 
> 
> Ech. This sounds like a bit of a problem. I'll need to think about this
> one...

Sounds like a textbook example for subsections to me?

Kevin
Peter Lieven Oct. 9, 2015, 11:18 a.m. UTC | #7
Am 09.10.2015 um 10:21 schrieb Kevin Wolf:
> Am 08.10.2015 um 18:44 hat John Snow geschrieben:
>> On 10/08/2015 08:06 AM, Peter Lieven wrote:
>>> Hi all,
>>>
>>> short summary from my side. The whole thing seems to get complicated,
>>> let me explain why:
>>>
>>> 1) During review I found that the code in ide_atapi_cmd_reply_end can't
>>> work correctly if the
>>> byte_count_limit is not a divider or a multiple of cd_sector_size. The
>>> reason is that as soon
>>> as we load the next sector we start at io_buffer offset 0 overwriting
>>> whatever is left in there
>>> for transfer. We also reset the io_buffer_index to 0 which means if we
>>> continue with the
>>> elementary transfer we always transfer a whole sector (of corrupt data)
>>> regardless if we
>>> are allowed to transfer that much data. Before we consider fixing this I
>>> wonder if it
>>> is legal at all to have an unaligned byte_count_limit. It obviously has
>>> never caused trouble in
>>> practice so maybe its not happening in real life.
>>>
>> I had overlooked that part. Good catch. I do suspect that in practice
>> nobody will be asking for bizarre values.
>>
>> There's no rule against an unaligned byte_count_limit as far as I have
>> read, but suspect nobody would have a reason to use it in practice.
> If we know that our code is technically wrong, "nobody uses it" is not a
> good reason not to fix it. Because sooner or later someone will use it.

I found that I just misinterpreted the code. I think its correct altough
its not nice.

>
>>> 2) I found that whatever cool optimization I put in to buffer multiple
>>> sectors at once I end
>>> up with code that breaks migration because older versions would either
>>> not fill the io_buffer
>>> as expected or we introduce variables that older versions do not
>>> understand. This will
>>> lead to problems if we migrate in the middle of a transfer.
>>>
>> Ech. This sounds like a bit of a problem. I'll need to think about this
>> one...
> Sounds like a textbook example for subsections to me?

I wonder if we need this at all. Its just PIO. However, Windows and Linux
fall back to PIO if I disconnect the NFS Server for some time. With the
latest version of my patch series this works fine now and even works
fine after restoring NFS connectivy. This was not properly working
because of the glitch that John found.

I have an optimization in mind that might work and will disable the need
for the sync requests while keeping migration possible.
I hope to be able to complete this by Monday and send out a V2 then.

If someone wants to have a look at what I currently have (especially
the cancelable requests part) its here:
https://github.com/plieven/qemu/commits/atapi_async_V2

Thanks,
Peter
John Snow Oct. 9, 2015, 4:32 p.m. UTC | #8
On 10/09/2015 04:21 AM, Kevin Wolf wrote:
> Am 08.10.2015 um 18:44 hat John Snow geschrieben:
>> On 10/08/2015 08:06 AM, Peter Lieven wrote:
>>> Hi all,
>>>
>>> short summary from my side. The whole thing seems to get complicated,
>>> let me explain why:
>>>
>>> 1) During review I found that the code in ide_atapi_cmd_reply_end can't
>>> work correctly if the
>>> byte_count_limit is not a divider or a multiple of cd_sector_size. The
>>> reason is that as soon
>>> as we load the next sector we start at io_buffer offset 0 overwriting
>>> whatever is left in there
>>> for transfer. We also reset the io_buffer_index to 0 which means if we
>>> continue with the
>>> elementary transfer we always transfer a whole sector (of corrupt data)
>>> regardless if we
>>> are allowed to transfer that much data. Before we consider fixing this I
>>> wonder if it
>>> is legal at all to have an unaligned byte_count_limit. It obviously has
>>> never caused trouble in
>>> practice so maybe its not happening in real life.
>>>
>>
>> I had overlooked that part. Good catch. I do suspect that in practice
>> nobody will be asking for bizarre values.
>>
>> There's no rule against an unaligned byte_count_limit as far as I have
>> read, but suspect nobody would have a reason to use it in practice.
> 
> If we know that our code is technically wrong, "nobody uses it" is not a
> good reason not to fix it. Because sooner or later someone will use it.
> 

Not arguing against fixing it, just speaking to priorities and not
making it worse.

If it were up to me I'd spent the next three months obsessively making
the AHCI emulation spec-perfect, but suspect I'd fix close to zero bugs
actually being observed in the real world.

You can always trust that I want to fix every bug no matter how trivial
or untriggerable in the field. :)

>>> 2) I found that whatever cool optimization I put in to buffer multiple
>>> sectors at once I end
>>> up with code that breaks migration because older versions would either
>>> not fill the io_buffer
>>> as expected or we introduce variables that older versions do not
>>> understand. This will
>>> lead to problems if we migrate in the middle of a transfer.
>>>
>>
>> Ech. This sounds like a bit of a problem. I'll need to think about this
>> one...
> 
> Sounds like a textbook example for subsections to me?
> 
> Kevin
> 

Haven't used them before, personally -- I'll take a look.
Peter Lieven Oct. 14, 2015, 6:19 p.m. UTC | #9
Am 08.10.2015 um 18:44 schrieb John Snow:
>
> On 10/08/2015 08:06 AM, Peter Lieven wrote:
>> Hi all,
>>
>> short summary from my side. The whole thing seems to get complicated,
>> let me explain why:
>>
>> 1) During review I found that the code in ide_atapi_cmd_reply_end can't
>> work correctly if the
>> byte_count_limit is not a divider or a multiple of cd_sector_size. The
>> reason is that as soon
>> as we load the next sector we start at io_buffer offset 0 overwriting
>> whatever is left in there
>> for transfer. We also reset the io_buffer_index to 0 which means if we
>> continue with the
>> elementary transfer we always transfer a whole sector (of corrupt data)
>> regardless if we
>> are allowed to transfer that much data. Before we consider fixing this I
>> wonder if it
>> is legal at all to have an unaligned byte_count_limit. It obviously has
>> never caused trouble in
>> practice so maybe its not happening in real life.
>>
> I had overlooked that part. Good catch. I do suspect that in practice
> nobody will be asking for bizarre values.
>
> There's no rule against an unaligned byte_count_limit as far as I have
> read, but suspect nobody would have a reason to use it in practice.
>
>> 2) I found that whatever cool optimization I put in to buffer multiple
>> sectors at once I end
>> up with code that breaks migration because older versions would either
>> not fill the io_buffer
>> as expected or we introduce variables that older versions do not
>> understand. This will
>> lead to problems if we migrate in the middle of a transfer.
>>
> Ech. This sounds like a bit of a problem. I'll need to think about this
> one...
>
>> 3) My current plan to get this patch to a useful state would be to use
>> my initial patch and just
>> change the code to use a sync request if we need to buffer additional
>> sectors in an elementary
>> transfer. I found that in real world operating systems the
>> byte_count_limit seems to be equal to
>> the cd_sector_size. After all its just a PIO transfer an operating
>> system will likely switch to DMA
>> as soon as the kernel ist loaded.
>>
>> Thanks,
>> Peter
>>
> It sounds like that might be "good enough" for now, and won't make
> behavior *worse* than it currently is. You can adjust the test I had
> checked in to not use a "tricky" value and we can amend support for this
> later if desired.

Have you had a chance to look at the series with the "good enough" fix?

Thanks,
Peter
John Snow Oct. 14, 2015, 6:21 p.m. UTC | #10
On 10/14/2015 02:19 PM, Peter Lieven wrote:
> Am 08.10.2015 um 18:44 schrieb John Snow:
>>
>> On 10/08/2015 08:06 AM, Peter Lieven wrote:
>>> Hi all,
>>>
>>> short summary from my side. The whole thing seems to get complicated,
>>> let me explain why:
>>>
>>> 1) During review I found that the code in ide_atapi_cmd_reply_end can't
>>> work correctly if the
>>> byte_count_limit is not a divider or a multiple of cd_sector_size. The
>>> reason is that as soon
>>> as we load the next sector we start at io_buffer offset 0 overwriting
>>> whatever is left in there
>>> for transfer. We also reset the io_buffer_index to 0 which means if we
>>> continue with the
>>> elementary transfer we always transfer a whole sector (of corrupt data)
>>> regardless if we
>>> are allowed to transfer that much data. Before we consider fixing this I
>>> wonder if it
>>> is legal at all to have an unaligned byte_count_limit. It obviously has
>>> never caused trouble in
>>> practice so maybe its not happening in real life.
>>>
>> I had overlooked that part. Good catch. I do suspect that in practice
>> nobody will be asking for bizarre values.
>>
>> There's no rule against an unaligned byte_count_limit as far as I have
>> read, but suspect nobody would have a reason to use it in practice.
>>
>>> 2) I found that whatever cool optimization I put in to buffer multiple
>>> sectors at once I end
>>> up with code that breaks migration because older versions would either
>>> not fill the io_buffer
>>> as expected or we introduce variables that older versions do not
>>> understand. This will
>>> lead to problems if we migrate in the middle of a transfer.
>>>
>> Ech. This sounds like a bit of a problem. I'll need to think about this
>> one...
>>
>>> 3) My current plan to get this patch to a useful state would be to use
>>> my initial patch and just
>>> change the code to use a sync request if we need to buffer additional
>>> sectors in an elementary
>>> transfer. I found that in real world operating systems the
>>> byte_count_limit seems to be equal to
>>> the cd_sector_size. After all its just a PIO transfer an operating
>>> system will likely switch to DMA
>>> as soon as the kernel ist loaded.
>>>
>>> Thanks,
>>> Peter
>>>
>> It sounds like that might be "good enough" for now, and won't make
>> behavior *worse* than it currently is. You can adjust the test I had
>> checked in to not use a "tricky" value and we can amend support for this
>> later if desired.
> 
> Have you had a chance to look at the series with the "good enough" fix?
> 
> Thanks,
> Peter
> 

Will do so Friday, thanks!

--js
Peter Lieven Oct. 16, 2015, 10:56 a.m. UTC | #11
Am 14.10.2015 um 20:21 schrieb John Snow:
>
> On 10/14/2015 02:19 PM, Peter Lieven wrote:
>> Am 08.10.2015 um 18:44 schrieb John Snow:
>>> On 10/08/2015 08:06 AM, Peter Lieven wrote:
>>>> Hi all,
>>>>
>>>> short summary from my side. The whole thing seems to get complicated,
>>>> let me explain why:
>>>>
>>>> 1) During review I found that the code in ide_atapi_cmd_reply_end can't
>>>> work correctly if the
>>>> byte_count_limit is not a divider or a multiple of cd_sector_size. The
>>>> reason is that as soon
>>>> as we load the next sector we start at io_buffer offset 0 overwriting
>>>> whatever is left in there
>>>> for transfer. We also reset the io_buffer_index to 0 which means if we
>>>> continue with the
>>>> elementary transfer we always transfer a whole sector (of corrupt data)
>>>> regardless if we
>>>> are allowed to transfer that much data. Before we consider fixing this I
>>>> wonder if it
>>>> is legal at all to have an unaligned byte_count_limit. It obviously has
>>>> never caused trouble in
>>>> practice so maybe its not happening in real life.
>>>>
>>> I had overlooked that part. Good catch. I do suspect that in practice
>>> nobody will be asking for bizarre values.
>>>
>>> There's no rule against an unaligned byte_count_limit as far as I have
>>> read, but suspect nobody would have a reason to use it in practice.
>>>
>>>> 2) I found that whatever cool optimization I put in to buffer multiple
>>>> sectors at once I end
>>>> up with code that breaks migration because older versions would either
>>>> not fill the io_buffer
>>>> as expected or we introduce variables that older versions do not
>>>> understand. This will
>>>> lead to problems if we migrate in the middle of a transfer.
>>>>
>>> Ech. This sounds like a bit of a problem. I'll need to think about this
>>> one...
>>>
>>>> 3) My current plan to get this patch to a useful state would be to use
>>>> my initial patch and just
>>>> change the code to use a sync request if we need to buffer additional
>>>> sectors in an elementary
>>>> transfer. I found that in real world operating systems the
>>>> byte_count_limit seems to be equal to
>>>> the cd_sector_size. After all its just a PIO transfer an operating
>>>> system will likely switch to DMA
>>>> as soon as the kernel ist loaded.
>>>>
>>>> Thanks,
>>>> Peter
>>>>
>>> It sounds like that might be "good enough" for now, and won't make
>>> behavior *worse* than it currently is. You can adjust the test I had
>>> checked in to not use a "tricky" value and we can amend support for this
>>> later if desired.
>> Have you had a chance to look at the series with the "good enough" fix?
>>
>> Thanks,
>> Peter
>>
> Will do so Friday, thanks!

Thank you,
Peter
diff mbox

Patch

diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index ab45495..ec2ba89 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -117,37 +117,40 @@  static void cd_read_sector_cb(void *opaque, int ret)
      }

      if (s->cd_sector_size == 2352) {
-        cd_data_to_raw(s->io_buffer, s->lba);
+        int nb_sectors = s->packet_transfer_size / 2352;
+        while (nb_sectors--) {
+            memmove(s->io_buffer + nb_sectors * 2352 + 4,
+                    s->io_buffer + nb_sectors * 2048, 2048);
+            cd_data_to_raw(s->io_buffer + nb_sectors * 2352,
+                           s->lba + nb_sectors);
+        }
      }

-    s->lba++;
+    s->lba = -1;
      s->io_buffer_index = 0;
      s->status &= ~BUSY_STAT;

      ide_atapi_cmd_reply_end(s);
  }

-static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size)
+static int cd_read_sector(IDEState *s, int lba, void *buf, int sector_size, int nb_sectors)
  {
      if (sector_size != 2048 && sector_size != 2352) {
          return -EINVAL;
      }

      s->iov.iov_base = buf;
-    if (sector_size == 2352) {
-        buf += 4;
-    }
-
-    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
+    s->iov.iov_len = nb_sectors * 2048;
      qemu_iovec_init_external(&s->qiov, &s->iov, 1);

-    if (ide_readv_cancelable(s, (int64_t)lba << 2, &s->qiov, 4,
-                      cd_read_sector_cb, s) == NULL) {
+    if (ide_readv_cancelable(s, (int64_t)lba << 2,
+                             &s->qiov, nb_sectors * 4,
+                             cd_read_sector_cb, s) == NULL) {
          return -EIO;
      }

      block_acct_start(blk_get_stats(s->blk), &s->acct,
-                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
+                     nb_sectors * 2048, BLOCK_ACCT_READ);
      s->status |= BUSY_STAT;
      return 0;
  }
@@ -190,7 +193,7 @@  void ide_atapi_io_error(IDEState *s, int ret)
  /* The whole ATAPI transfer logic is handled in this function */
  void ide_atapi_cmd_reply_end(IDEState *s)
  {
-    int byte_count_limit, size, ret;
+    int byte_count_limit, size;
  #ifdef DEBUG_IDE_ATAPI
      printf("reply: tx_size=%d elem_tx_size=%d index=%d\n",
             s->packet_transfer_size,
@@ -205,14 +208,6 @@  void ide_atapi_cmd_reply_end(IDEState *s)
          printf("status=0x%x\n", s->status);
  #endif
      } else {
-        /* see if a new sector must be read */
-        if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) {
-            ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size);
-            if (ret < 0) {
-                ide_atapi_io_error(s, ret);
-            }
-            return;
-        }
          if (s->elementary_transfer_size > 0) {
              /* there are some data left to transmit in this elementary
                 transfer */
@@ -287,13 +282,18 @@  static void ide_atapi_cmd_reply(IDEState *s, int size, int max_size)
  static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors,
                                     int sector_size)
  {
+    int ret;
      s->lba = lba;
      s->packet_transfer_size = nb_sectors * sector_size;
+    assert(s->packet_transfer_size <=
+           IDE_DMA_BUF_SECTORS * BDRV_SECTOR_SIZE + 4);
      s->elementary_transfer_size = 0;
-    s->io_buffer_index = sector_size;
      s->cd_sector_size = sector_size;
-
-    ide_atapi_cmd_reply_end(s);
+    ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size,
+                         nb_sectors);
+    if (ret < 0) {
+        ide_atapi_io_error(s, ret);
+    }
  }

  static void ide_atapi_cmd_check_status(IDEState *s)