diff mbox

[RFC] Block device size rounding

Message ID 1444446115-3796-1-git-send-email-crosthwaite.peter@gmail.com
State New
Headers show

Commit Message

Peter Crosthwaite Oct. 10, 2015, 3:01 a.m. UTC
I have in interesting problem with SD cards, where if you pass a block
device that is not multiple-of-512k size the last bit gets chopped off.
The problem is the card can only report a 512kX size to the guest, so
a significant rounding is needed one way or the other. The current
round-down policy causes crashing boots because parts of my guest
file-system are missing.

The below patch works around it, by changing to round-up and simply
ignoring reads and writes past the end of the block device file.

What is the correct action here though? If the file is writeable should
we just allow the device to extend its size? Is that possible already?
Just zero-pad read-only?

The same could be applied to pflash, where the device init barfs if the
backing file is too small (the devices are inited of a constant size,
not based on the block device size).

Requiring the user to pad files in a device dependent way is a little
user-unfriendly.

Regards,
Peter
---
 hw/sd/sd.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

Comments

John Snow Oct. 12, 2015, 3:56 p.m. UTC | #1
On 10/09/2015 11:01 PM, Peter Crosthwaite wrote:
> I have in interesting problem with SD cards, where if you pass a block
> device that is not multiple-of-512k size the last bit gets chopped off.
> The problem is the card can only report a 512kX size to the guest, so
> a significant rounding is needed one way or the other. The current
> round-down policy causes crashing boots because parts of my guest
> file-system are missing.
> 
> The below patch works around it, by changing to round-up and simply
> ignoring reads and writes past the end of the block device file.
> 
> What is the correct action here though? If the file is writeable should
> we just allow the device to extend its size? Is that possible already?
> Just zero-pad read-only?
> 

Read-only seems like an easy case of append zeroes.

Read-write ... well, we can't write-protect just half of a 512k block.
Forcibly extending the size might be the only viable solution.

I would almost suggest doing a sparse allocation for the remainder of
the block and don't extend the physical size until the first write to
that block, but then we have the strange situation where:

- QEMU may extend your image according to the device model, sometimes
- Or sometimes not.

People don't seem to like unpredictability much, so maybe just outright
extending the image is the best thing to do, because then it can be
documented.

I also suppose an interactive warning prompt wouldn't work either ("Hey,
this file is a pinch too small for this device, may I extend it y/n?"),
since there's no existing protocol for negotiating that sort of thing
and it might cause management layers to explode...

Probably just forcibly increasing the size on RW or refusing to use the
file altogether are probably the sane deterministic things we want.


> The same could be applied to pflash, where the device init barfs if the
> backing file is too small (the devices are inited of a constant size,
> not based on the block device size).
> 
> Requiring the user to pad files in a device dependent way is a little
> user-unfriendly.
> 
> Regards,
> Peter
> ---
>  hw/sd/sd.c | 19 +++++++++++++------
>  1 file changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/sd/sd.c b/hw/sd/sd.c
> index 3e2a451..539bb72 100644
> --- a/hw/sd/sd.c
> +++ b/hw/sd/sd.c
> @@ -248,13 +248,18 @@ static const uint8_t sd_csd_rw_mask[16] = {
>      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0xfe,
>  };
>  
> -static void sd_set_csd(SDState *sd, uint64_t size)
> +static uint64_t sd_set_csd(SDState *sd, uint64_t size)
>  {
> -    uint32_t csize = (size >> (CMULT_SHIFT + HWBLOCK_SHIFT)) - 1;
> -    uint32_t sectsize = (1 << (SECTOR_SHIFT + 1)) - 1;
> -    uint32_t wpsize = (1 << (WPGROUP_SHIFT + 1)) - 1;
> +    uint64_t actual_size;
>  
>      if (size <= 0x40000000) {	/* Standard Capacity SD */
> +        uint32_t sectsize = (1 << (SECTOR_SHIFT + 1)) - 1;
> +        uint32_t wpsize = (1 << (WPGROUP_SHIFT + 1)) - 1;
> +        uint32_t csize;
> +
> +        actual_size = ROUND_UP(size, 1 << (CMULT_SHIFT + HWBLOCK_SHIFT));
> +        csize = (actual_size >> (CMULT_SHIFT + HWBLOCK_SHIFT)) - 1;
> +
>          sd->csd[0] = 0x00;	/* CSD structure */
>          sd->csd[1] = 0x26;	/* Data read access-time-1 */
>          sd->csd[2] = 0x00;	/* Data read access-time-2 */
> @@ -281,7 +286,8 @@ static void sd_set_csd(SDState *sd, uint64_t size)
>          sd->csd[14] = 0x00;	/* File format group */
>          sd->csd[15] = (sd_crc7(sd->csd, 15) << 1) | 1;
>      } else {			/* SDHC */
> -        size /= 512 * 1024;
> +        actual_size = ROUND_UP(size, 512 * 1024);
> +        size = actual_size / (512 * 1024);
>          size -= 1;
>          sd->csd[0] = 0x40;
>          sd->csd[1] = 0x0e;
> @@ -301,6 +307,7 @@ static void sd_set_csd(SDState *sd, uint64_t size)
>          sd->csd[15] = 0x00;
>          sd->ocr |= 1 << 30;     /* High Capacity SD Memory Card */
>      }
> +    return actual_size;
>  }
>  
>  static void sd_set_rca(SDState *sd)
> @@ -408,7 +415,7 @@ static void sd_reset(SDState *sd)
>      sd_set_ocr(sd);
>      sd_set_scr(sd);
>      sd_set_cid(sd);
> -    sd_set_csd(sd, size);
> +    size = sd_set_csd(sd, size);
>      sd_set_cardstatus(sd);
>      sd_set_sdstatus(sd);
>  
>
Eric Blake Oct. 12, 2015, 4:26 p.m. UTC | #2
On 10/12/2015 09:56 AM, John Snow wrote:

>> What is the correct action here though? If the file is writeable should
>> we just allow the device to extend its size? Is that possible already?
>> Just zero-pad read-only?
>>
> 
> Read-only seems like an easy case of append zeroes.

Yes, allowing read-only with append-zero behavior seems sane.

> 
> Read-write ... well, we can't write-protect just half of a 512k block.

> Probably just forcibly increasing the size on RW or refusing to use the
> file altogether are probably the sane deterministic things we want.

I'd lean towards outright rejection if the file size isn't up to snuff
for use as read-write.  Forcibly increasing the size (done
unconditionally) still feels like magic, and may not be possible if the
size is due to something backed by a block device rather than a file.
Peter Crosthwaite Oct. 12, 2015, 6:09 p.m. UTC | #3
On Mon, Oct 12, 2015 at 9:26 AM, Eric Blake <eblake@redhat.com> wrote:
> On 10/12/2015 09:56 AM, John Snow wrote:
>
>>> What is the correct action here though? If the file is writeable should
>>> we just allow the device to extend its size? Is that possible already?
>>> Just zero-pad read-only?
>>>
>>
>> Read-only seems like an easy case of append zeroes.
>
> Yes, allowing read-only with append-zero behavior seems sane.
>
>>
>> Read-write ... well, we can't write-protect just half of a 512k block.
>
>> Probably just forcibly increasing the size on RW or refusing to use the
>> file altogether are probably the sane deterministic things we want.
>
> I'd lean towards outright rejection if the file size isn't up to snuff
> for use as read-write.  Forcibly increasing the size (done
> unconditionally) still feels like magic, and may not be possible if the
> size is due to something backed by a block device rather than a file.
>

Inability to extend is easily detectable and can become a failure mode
in it's own right. If we cant extend the file perhaps we can just
LOG_UNIMP the data writes? Having to include in your user instructions
"dd your already-on-SATA file system to this container just so it can
work for SD" is a pain.

Regards,
Peter

> --
> Eric Blake   eblake redhat com    +1-919-301-3266
> Libvirt virtualization library http://libvirt.org
>
John Snow Oct. 12, 2015, 6:26 p.m. UTC | #4
On 10/12/2015 02:09 PM, Peter Crosthwaite wrote:
> On Mon, Oct 12, 2015 at 9:26 AM, Eric Blake <eblake@redhat.com> wrote:
>> On 10/12/2015 09:56 AM, John Snow wrote:
>>
>>>> What is the correct action here though? If the file is writeable should
>>>> we just allow the device to extend its size? Is that possible already?
>>>> Just zero-pad read-only?
>>>>
>>>
>>> Read-only seems like an easy case of append zeroes.
>>
>> Yes, allowing read-only with append-zero behavior seems sane.
>>
>>>
>>> Read-write ... well, we can't write-protect just half of a 512k block.
>>
>>> Probably just forcibly increasing the size on RW or refusing to use the
>>> file altogether are probably the sane deterministic things we want.
>>
>> I'd lean towards outright rejection if the file size isn't up to snuff
>> for use as read-write.  Forcibly increasing the size (done
>> unconditionally) still feels like magic, and may not be possible if the
>> size is due to something backed by a block device rather than a file.
>>
> 
> Inability to extend is easily detectable and can become a failure mode
> in it's own right. If we cant extend the file perhaps we can just
> LOG_UNIMP the data writes? Having to include in your user instructions
> "dd your already-on-SATA file system to this container just so it can
> work for SD" is a pain.
> 
> Regards,
> Peter
> 

Fits within my "Always extend the size" answer. Failing to do so is a
good cause to fail.

I'm not sure if this is the sort of thing that might require an extra
flag or option for compatibility reasons or not, though. If there is no
precedent for QEMU resizing a block device to make it compatible with a
particular device model, it's probably reasonable that no management
tool is expecting this to happen automatically either.

Then again, it's still annoying that the current default is definitely
broken.

I think this is going to boil down into an interface-and-expectations
argument. I am otherwise in favor of just forcing the resize whenever
possible and failing when it isn't.

>> --
>> Eric Blake   eblake redhat com    +1-919-301-3266
>> Libvirt virtualization library http://libvirt.org
>>
Markus Armbruster Oct. 13, 2015, 7:16 a.m. UTC | #5
John Snow <jsnow@redhat.com> writes:

> On 10/12/2015 02:09 PM, Peter Crosthwaite wrote:
>> On Mon, Oct 12, 2015 at 9:26 AM, Eric Blake <eblake@redhat.com> wrote:
>>> On 10/12/2015 09:56 AM, John Snow wrote:
>>>
>>>>> What is the correct action here though? If the file is writeable should
>>>>> we just allow the device to extend its size? Is that possible already?
>>>>> Just zero-pad read-only?
>>>>>
>>>>
>>>> Read-only seems like an easy case of append zeroes.
>>>
>>> Yes, allowing read-only with append-zero behavior seems sane.

This is tolerable.  Do we want to warn?

A reopen can bring in the read/write case.

>>>> Read-write ... well, we can't write-protect just half of a 512k block.
>>>
>>>> Probably just forcibly increasing the size on RW or refusing to use the
>>>> file altogether are probably the sane deterministic things we want.
>>>
>>> I'd lean towards outright rejection if the file size isn't up to snuff
>>> for use as read-write.  Forcibly increasing the size (done
>>> unconditionally) still feels like magic, and may not be possible if the
>>> size is due to something backed by a block device rather than a file.

Concur.

>> Inability to extend is easily detectable and can become a failure mode
>> in it's own right. If we cant extend the file perhaps we can just
>> LOG_UNIMP the data writes? Having to include in your user instructions
>> "dd your already-on-SATA file system to this container just so it can
>> work for SD" is a pain.

Whenever QEMU proper can extend to the right size, qemu-img should be
able to do so as well, shouldn't it?  QEMU's error message could even
explain how.

> Fits within my "Always extend the size" answer. Failing to do so is a
> good cause to fail.
>
> I'm not sure if this is the sort of thing that might require an extra
> flag or option for compatibility reasons or not, though. If there is no
> precedent for QEMU resizing a block device to make it compatible with a
> particular device model, it's probably reasonable that no management
> tool is expecting this to happen automatically either.
>
> Then again, it's still annoying that the current default is definitely
> broken.
>
> I think this is going to boil down into an interface-and-expectations
> argument. I am otherwise in favor of just forcing the resize whenever
> possible and failing when it isn't.

I agree it's about expectations.

When I give QEMU read/write access to an image, I expect it to modify
the image, but I don't expect it to resize it on its own.  Perhaps my
expectation is wrong.  Do we have precedence?
Kevin Wolf Oct. 13, 2015, 9:14 a.m. UTC | #6
Am 12.10.2015 um 20:26 hat John Snow geschrieben:
> 
> 
> On 10/12/2015 02:09 PM, Peter Crosthwaite wrote:
> > On Mon, Oct 12, 2015 at 9:26 AM, Eric Blake <eblake@redhat.com> wrote:
> >> On 10/12/2015 09:56 AM, John Snow wrote:
> >>
> >>>> What is the correct action here though? If the file is writeable should
> >>>> we just allow the device to extend its size? Is that possible already?
> >>>> Just zero-pad read-only?
> >>>>
> >>>
> >>> Read-only seems like an easy case of append zeroes.
> >>
> >> Yes, allowing read-only with append-zero behavior seems sane.
> >>
> >>>
> >>> Read-write ... well, we can't write-protect just half of a 512k block.
> >>
> >>> Probably just forcibly increasing the size on RW or refusing to use the
> >>> file altogether are probably the sane deterministic things we want.
> >>
> >> I'd lean towards outright rejection if the file size isn't up to snuff
> >> for use as read-write.  Forcibly increasing the size (done
> >> unconditionally) still feels like magic, and may not be possible if the
> >> size is due to something backed by a block device rather than a file.

Agreed, let's just reject the image for r/w. Image resize should always
been an explicit action invoked by the user, not a side effect of using
the image with a specific device.

> > Inability to extend is easily detectable and can become a failure mode
> > in it's own right. If we cant extend the file perhaps we can just
> > LOG_UNIMP the data writes? Having to include in your user instructions
> > "dd your already-on-SATA file system to this container just so it can
> > work for SD" is a pain.
> > 
> > Regards,
> > Peter
> > 
> 
> Fits within my "Always extend the size" answer. Failing to do so is a
> good cause to fail.
> 
> I'm not sure if this is the sort of thing that might require an extra
> flag or option for compatibility reasons or not, though. If there is no
> precedent for QEMU resizing a block device to make it compatible with a
> particular device model, it's probably reasonable that no management
> tool is expecting this to happen automatically either.
> 
> Then again, it's still annoying that the current default is definitely
> broken.

That's not so clear to me. Strictly speaking, this is really a user
error because the user passed an image that isn't suitable for the
device. All we're discussing is handling this user error friendlier.

Maybe we should take a step back: What's the specific use case here,
i.e. where does the misaligned image come from and what is it used for?
I assume this is not an image created with qemu-img, because then the
obvious options would already result in an aligned size.

> I think this is going to boil down into an interface-and-expectations
> argument. I am otherwise in favor of just forcing the resize whenever
> possible and failing when it isn't.

I'm strongly objecting to any automagic resizing of images.

Kevin
Peter Crosthwaite Oct. 13, 2015, 3:30 p.m. UTC | #7
On Tue, Oct 13, 2015 at 2:14 AM, Kevin Wolf <kwolf@redhat.com> wrote:
> Am 12.10.2015 um 20:26 hat John Snow geschrieben:
>>
>>
>> On 10/12/2015 02:09 PM, Peter Crosthwaite wrote:
>> > On Mon, Oct 12, 2015 at 9:26 AM, Eric Blake <eblake@redhat.com> wrote:
>> >> On 10/12/2015 09:56 AM, John Snow wrote:
>> >>
>> >>>> What is the correct action here though? If the file is writeable should
>> >>>> we just allow the device to extend its size? Is that possible already?
>> >>>> Just zero-pad read-only?
>> >>>>
>> >>>
>> >>> Read-only seems like an easy case of append zeroes.
>> >>
>> >> Yes, allowing read-only with append-zero behavior seems sane.
>> >>
>> >>>
>> >>> Read-write ... well, we can't write-protect just half of a 512k block.
>> >>
>> >>> Probably just forcibly increasing the size on RW or refusing to use the
>> >>> file altogether are probably the sane deterministic things we want.
>> >>
>> >> I'd lean towards outright rejection if the file size isn't up to snuff
>> >> for use as read-write.  Forcibly increasing the size (done
>> >> unconditionally) still feels like magic, and may not be possible if the
>> >> size is due to something backed by a block device rather than a file.
>
> Agreed, let's just reject the image for r/w. Image resize should always
> been an explicit action invoked by the user, not a side effect of using
> the image with a specific device.
>
>> > Inability to extend is easily detectable and can become a failure mode
>> > in it's own right. If we cant extend the file perhaps we can just
>> > LOG_UNIMP the data writes? Having to include in your user instructions
>> > "dd your already-on-SATA file system to this container just so it can
>> > work for SD" is a pain.
>> >
>> > Regards,
>> > Peter
>> >
>>
>> Fits within my "Always extend the size" answer. Failing to do so is a
>> good cause to fail.
>>
>> I'm not sure if this is the sort of thing that might require an extra
>> flag or option for compatibility reasons or not, though. If there is no
>> precedent for QEMU resizing a block device to make it compatible with a
>> particular device model, it's probably reasonable that no management
>> tool is expecting this to happen automatically either.
>>
>> Then again, it's still annoying that the current default is definitely
>> broken.
>
> That's not so clear to me. Strictly speaking, this is really a user
> error because the user passed an image that isn't suitable for the
> device. All we're discussing is handling this user error friendlier.
>
> Maybe we should take a step back: What's the specific use case here,
> i.e. where does the misaligned image come from and what is it used for?

An ext filesystem image built by the Yocto build system. It is passed
straight to QEMU as a raw image. The user does not create disk images,
they are done by the build system. Note that the build system is not
QEMU specific, it is designed to target either QEMU or be used for
some form of real-hardware deployment so padding there is
inappropriate.

> I assume this is not an image created with qemu-img, because then the

I am not using qemu-img at all.

> obvious options would already result in an aligned size.
>

Maybe. What is the alignment of qemu-img? Note this requires 512K
alignment, which is kinda huge.

>> I think this is going to boil down into an interface-and-expectations
>> argument. I am otherwise in favor of just forcing the resize whenever
>> possible and failing when it isn't.
>
> I'm strongly objecting to any automagic resizing of images.
>

Can we LOG_UNIMP writes to the missing sectors? The the user can RW to
the in-band sectors which should contain the limit of a pre-existing
filesystem.

Regards,
Peter

> Kevin
John Snow Oct. 13, 2015, 3:51 p.m. UTC | #8
On 10/13/2015 11:30 AM, Peter Crosthwaite wrote:
> On Tue, Oct 13, 2015 at 2:14 AM, Kevin Wolf <kwolf@redhat.com> wrote:
>> Am 12.10.2015 um 20:26 hat John Snow geschrieben:
>>>
>>>
>>> On 10/12/2015 02:09 PM, Peter Crosthwaite wrote:
>>>> On Mon, Oct 12, 2015 at 9:26 AM, Eric Blake <eblake@redhat.com> wrote:
>>>>> On 10/12/2015 09:56 AM, John Snow wrote:
>>>>>
>>>>>>> What is the correct action here though? If the file is writeable should
>>>>>>> we just allow the device to extend its size? Is that possible already?
>>>>>>> Just zero-pad read-only?
>>>>>>>
>>>>>>
>>>>>> Read-only seems like an easy case of append zeroes.
>>>>>
>>>>> Yes, allowing read-only with append-zero behavior seems sane.
>>>>>
>>>>>>
>>>>>> Read-write ... well, we can't write-protect just half of a 512k block.
>>>>>
>>>>>> Probably just forcibly increasing the size on RW or refusing to use the
>>>>>> file altogether are probably the sane deterministic things we want.
>>>>>
>>>>> I'd lean towards outright rejection if the file size isn't up to snuff
>>>>> for use as read-write.  Forcibly increasing the size (done
>>>>> unconditionally) still feels like magic, and may not be possible if the
>>>>> size is due to something backed by a block device rather than a file.
>>
>> Agreed, let's just reject the image for r/w. Image resize should always
>> been an explicit action invoked by the user, not a side effect of using
>> the image with a specific device.
>>
>>>> Inability to extend is easily detectable and can become a failure mode
>>>> in it's own right. If we cant extend the file perhaps we can just
>>>> LOG_UNIMP the data writes? Having to include in your user instructions
>>>> "dd your already-on-SATA file system to this container just so it can
>>>> work for SD" is a pain.
>>>>
>>>> Regards,
>>>> Peter
>>>>
>>>
>>> Fits within my "Always extend the size" answer. Failing to do so is a
>>> good cause to fail.
>>>
>>> I'm not sure if this is the sort of thing that might require an extra
>>> flag or option for compatibility reasons or not, though. If there is no
>>> precedent for QEMU resizing a block device to make it compatible with a
>>> particular device model, it's probably reasonable that no management
>>> tool is expecting this to happen automatically either.
>>>
>>> Then again, it's still annoying that the current default is definitely
>>> broken.
>>
>> That's not so clear to me. Strictly speaking, this is really a user
>> error because the user passed an image that isn't suitable for the
>> device. All we're discussing is handling this user error friendlier.
>>
>> Maybe we should take a step back: What's the specific use case here,
>> i.e. where does the misaligned image come from and what is it used for?
> 
> An ext filesystem image built by the Yocto build system. It is passed
> straight to QEMU as a raw image. The user does not create disk images,
> they are done by the build system. Note that the build system is not
> QEMU specific, it is designed to target either QEMU or be used for
> some form of real-hardware deployment so padding there is
> inappropriate.
> 
>> I assume this is not an image created with qemu-img, because then the
> 
> I am not using qemu-img at all.
> 
>> obvious options would already result in an aligned size.
>>
> 
> Maybe. What is the alignment of qemu-img? Note this requires 512K
> alignment, which is kinda huge.
> 
>>> I think this is going to boil down into an interface-and-expectations
>>> argument. I am otherwise in favor of just forcing the resize whenever
>>> possible and failing when it isn't.
>>
>> I'm strongly objecting to any automagic resizing of images.
>>
> 
> Can we LOG_UNIMP writes to the missing sectors? The the user can RW to
> the in-band sectors which should contain the limit of a pre-existing
> filesystem.
> 

This sounds potentially dangerous. Do we know for sure any data written
here is unimportant?

If it's all zeroes, we can probably guess it's unimportant. As soon as
any non-zero data lands up in this extension range... how do we assert
that this is garbage?

I don't think we can...

> Regards,
> Peter
> 
>> Kevin
Kevin Wolf Oct. 14, 2015, 8:36 a.m. UTC | #9
Am 13.10.2015 um 17:51 hat John Snow geschrieben:
> 
> 
> On 10/13/2015 11:30 AM, Peter Crosthwaite wrote:
> > On Tue, Oct 13, 2015 at 2:14 AM, Kevin Wolf <kwolf@redhat.com> wrote:
> >> Am 12.10.2015 um 20:26 hat John Snow geschrieben:
> >>>
> >>>
> >>> On 10/12/2015 02:09 PM, Peter Crosthwaite wrote:
> >>>> On Mon, Oct 12, 2015 at 9:26 AM, Eric Blake <eblake@redhat.com> wrote:
> >>>>> On 10/12/2015 09:56 AM, John Snow wrote:
> >>>>>
> >>>>>>> What is the correct action here though? If the file is writeable should
> >>>>>>> we just allow the device to extend its size? Is that possible already?
> >>>>>>> Just zero-pad read-only?
> >>>>>>>
> >>>>>>
> >>>>>> Read-only seems like an easy case of append zeroes.
> >>>>>
> >>>>> Yes, allowing read-only with append-zero behavior seems sane.
> >>>>>
> >>>>>>
> >>>>>> Read-write ... well, we can't write-protect just half of a 512k block.
> >>>>>
> >>>>>> Probably just forcibly increasing the size on RW or refusing to use the
> >>>>>> file altogether are probably the sane deterministic things we want.
> >>>>>
> >>>>> I'd lean towards outright rejection if the file size isn't up to snuff
> >>>>> for use as read-write.  Forcibly increasing the size (done
> >>>>> unconditionally) still feels like magic, and may not be possible if the
> >>>>> size is due to something backed by a block device rather than a file.
> >>
> >> Agreed, let's just reject the image for r/w. Image resize should always
> >> been an explicit action invoked by the user, not a side effect of using
> >> the image with a specific device.
> >>
> >>>> Inability to extend is easily detectable and can become a failure mode
> >>>> in it's own right. If we cant extend the file perhaps we can just
> >>>> LOG_UNIMP the data writes? Having to include in your user instructions
> >>>> "dd your already-on-SATA file system to this container just so it can
> >>>> work for SD" is a pain.
> >>>>
> >>>> Regards,
> >>>> Peter
> >>>>
> >>>
> >>> Fits within my "Always extend the size" answer. Failing to do so is a
> >>> good cause to fail.
> >>>
> >>> I'm not sure if this is the sort of thing that might require an extra
> >>> flag or option for compatibility reasons or not, though. If there is no
> >>> precedent for QEMU resizing a block device to make it compatible with a
> >>> particular device model, it's probably reasonable that no management
> >>> tool is expecting this to happen automatically either.
> >>>
> >>> Then again, it's still annoying that the current default is definitely
> >>> broken.
> >>
> >> That's not so clear to me. Strictly speaking, this is really a user
> >> error because the user passed an image that isn't suitable for the
> >> device. All we're discussing is handling this user error friendlier.
> >>
> >> Maybe we should take a step back: What's the specific use case here,
> >> i.e. where does the misaligned image come from and what is it used for?
> > 
> > An ext filesystem image built by the Yocto build system. It is passed
> > straight to QEMU as a raw image. The user does not create disk images,
> > they are done by the build system. Note that the build system is not
> > QEMU specific, it is designed to target either QEMU or be used for
> > some form of real-hardware deployment so padding there is
> > inappropriate.
> > 
> >> I assume this is not an image created with qemu-img, because then the
> > 
> > I am not using qemu-img at all.
> > 
> >> obvious options would already result in an aligned size.
> >>
> > 
> > Maybe. What is the alignment of qemu-img? Note this requires 512K
> > alignment, which is kinda huge.
> > 
> >>> I think this is going to boil down into an interface-and-expectations
> >>> argument. I am otherwise in favor of just forcing the resize whenever
> >>> possible and failing when it isn't.
> >>
> >> I'm strongly objecting to any automagic resizing of images.
> >>
> > 
> > Can we LOG_UNIMP writes to the missing sectors? The the user can RW to
> > the in-band sectors which should contain the limit of a pre-existing
> > filesystem.
> > 
> 
> This sounds potentially dangerous. Do we know for sure any data written
> here is unimportant?
> 
> If it's all zeroes, we can probably guess it's unimportant. As soon as
> any non-zero data lands up in this extension range... how do we assert
> that this is garbage?
> 
> I don't think we can...

Can we return write errors to the guest? If so, and we know that
normally the guest shouldn't even try to access the area after the
filesystem, it might be reasonable enough to just return write errors in
the area that isn't covered by the image. Probably makes the guest
unhappy, but it's a bad guest anyway if it tries to write there.

In that case, the device model should just round up the size, and the
block layer will automatically fail anything touching areas beyond the
image size.

Kevin
John Snow Oct. 16, 2015, 5:04 p.m. UTC | #10
On 10/14/2015 04:36 AM, Kevin Wolf wrote:
> Am 13.10.2015 um 17:51 hat John Snow geschrieben:
>>
>>
>> On 10/13/2015 11:30 AM, Peter Crosthwaite wrote:
>>> On Tue, Oct 13, 2015 at 2:14 AM, Kevin Wolf <kwolf@redhat.com> wrote:
>>>> Am 12.10.2015 um 20:26 hat John Snow geschrieben:
>>>>>
>>>>>
>>>>> On 10/12/2015 02:09 PM, Peter Crosthwaite wrote:
>>>>>> On Mon, Oct 12, 2015 at 9:26 AM, Eric Blake <eblake@redhat.com> wrote:
>>>>>>> On 10/12/2015 09:56 AM, John Snow wrote:
>>>>>>>
>>>>>>>>> What is the correct action here though? If the file is writeable should
>>>>>>>>> we just allow the device to extend its size? Is that possible already?
>>>>>>>>> Just zero-pad read-only?
>>>>>>>>>
>>>>>>>>
>>>>>>>> Read-only seems like an easy case of append zeroes.
>>>>>>>
>>>>>>> Yes, allowing read-only with append-zero behavior seems sane.
>>>>>>>
>>>>>>>>
>>>>>>>> Read-write ... well, we can't write-protect just half of a 512k block.
>>>>>>>
>>>>>>>> Probably just forcibly increasing the size on RW or refusing to use the
>>>>>>>> file altogether are probably the sane deterministic things we want.
>>>>>>>
>>>>>>> I'd lean towards outright rejection if the file size isn't up to snuff
>>>>>>> for use as read-write.  Forcibly increasing the size (done
>>>>>>> unconditionally) still feels like magic, and may not be possible if the
>>>>>>> size is due to something backed by a block device rather than a file.
>>>>
>>>> Agreed, let's just reject the image for r/w. Image resize should always
>>>> been an explicit action invoked by the user, not a side effect of using
>>>> the image with a specific device.
>>>>
>>>>>> Inability to extend is easily detectable and can become a failure mode
>>>>>> in it's own right. If we cant extend the file perhaps we can just
>>>>>> LOG_UNIMP the data writes? Having to include in your user instructions
>>>>>> "dd your already-on-SATA file system to this container just so it can
>>>>>> work for SD" is a pain.
>>>>>>
>>>>>> Regards,
>>>>>> Peter
>>>>>>
>>>>>
>>>>> Fits within my "Always extend the size" answer. Failing to do so is a
>>>>> good cause to fail.
>>>>>
>>>>> I'm not sure if this is the sort of thing that might require an extra
>>>>> flag or option for compatibility reasons or not, though. If there is no
>>>>> precedent for QEMU resizing a block device to make it compatible with a
>>>>> particular device model, it's probably reasonable that no management
>>>>> tool is expecting this to happen automatically either.
>>>>>
>>>>> Then again, it's still annoying that the current default is definitely
>>>>> broken.
>>>>
>>>> That's not so clear to me. Strictly speaking, this is really a user
>>>> error because the user passed an image that isn't suitable for the
>>>> device. All we're discussing is handling this user error friendlier.
>>>>
>>>> Maybe we should take a step back: What's the specific use case here,
>>>> i.e. where does the misaligned image come from and what is it used for?
>>>
>>> An ext filesystem image built by the Yocto build system. It is passed
>>> straight to QEMU as a raw image. The user does not create disk images,
>>> they are done by the build system. Note that the build system is not
>>> QEMU specific, it is designed to target either QEMU or be used for
>>> some form of real-hardware deployment so padding there is
>>> inappropriate.
>>>
>>>> I assume this is not an image created with qemu-img, because then the
>>>
>>> I am not using qemu-img at all.
>>>
>>>> obvious options would already result in an aligned size.
>>>>
>>>
>>> Maybe. What is the alignment of qemu-img? Note this requires 512K
>>> alignment, which is kinda huge.
>>>
>>>>> I think this is going to boil down into an interface-and-expectations
>>>>> argument. I am otherwise in favor of just forcing the resize whenever
>>>>> possible and failing when it isn't.
>>>>
>>>> I'm strongly objecting to any automagic resizing of images.
>>>>
>>>
>>> Can we LOG_UNIMP writes to the missing sectors? The the user can RW to
>>> the in-band sectors which should contain the limit of a pre-existing
>>> filesystem.
>>>
>>
>> This sounds potentially dangerous. Do we know for sure any data written
>> here is unimportant?
>>
>> If it's all zeroes, we can probably guess it's unimportant. As soon as
>> any non-zero data lands up in this extension range... how do we assert
>> that this is garbage?
>>
>> I don't think we can...
> 
> Can we return write errors to the guest? If so, and we know that
> normally the guest shouldn't even try to access the area after the
> filesystem, it might be reasonable enough to just return write errors in
> the area that isn't covered by the image. Probably makes the guest
> unhappy, but it's a bad guest anyway if it tries to write there.
> 
> In that case, the device model should just round up the size, and the
> block layer will automatically fail anything touching areas beyond the
> image size.
> 
> Kevin
> 

Maybe as an option?

This would break re-formatting, right? It might still be nice as a
low-hassle option, though.
Peter Crosthwaite Oct. 16, 2015, 6:10 p.m. UTC | #11
On Fri, Oct 16, 2015 at 10:04 AM, John Snow <jsnow@redhat.com> wrote:
>
>
> On 10/14/2015 04:36 AM, Kevin Wolf wrote:
>> Am 13.10.2015 um 17:51 hat John Snow geschrieben:
>>>
>>>
>>> On 10/13/2015 11:30 AM, Peter Crosthwaite wrote:
>>>> On Tue, Oct 13, 2015 at 2:14 AM, Kevin Wolf <kwolf@redhat.com> wrote:
>>>>> Am 12.10.2015 um 20:26 hat John Snow geschrieben:
>>>>>>
>>>>>>
>>>>>> On 10/12/2015 02:09 PM, Peter Crosthwaite wrote:
>>>>>>> On Mon, Oct 12, 2015 at 9:26 AM, Eric Blake <eblake@redhat.com> wrote:
>>>>>>>> On 10/12/2015 09:56 AM, John Snow wrote:
>>>>>>>>
>>>>>>>>>> What is the correct action here though? If the file is writeable should
>>>>>>>>>> we just allow the device to extend its size? Is that possible already?
>>>>>>>>>> Just zero-pad read-only?
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Read-only seems like an easy case of append zeroes.
>>>>>>>>
>>>>>>>> Yes, allowing read-only with append-zero behavior seems sane.
>>>>>>>>
>>>>>>>>>
>>>>>>>>> Read-write ... well, we can't write-protect just half of a 512k block.
>>>>>>>>
>>>>>>>>> Probably just forcibly increasing the size on RW or refusing to use the
>>>>>>>>> file altogether are probably the sane deterministic things we want.
>>>>>>>>
>>>>>>>> I'd lean towards outright rejection if the file size isn't up to snuff
>>>>>>>> for use as read-write.  Forcibly increasing the size (done
>>>>>>>> unconditionally) still feels like magic, and may not be possible if the
>>>>>>>> size is due to something backed by a block device rather than a file.
>>>>>
>>>>> Agreed, let's just reject the image for r/w. Image resize should always
>>>>> been an explicit action invoked by the user, not a side effect of using
>>>>> the image with a specific device.
>>>>>
>>>>>>> Inability to extend is easily detectable and can become a failure mode
>>>>>>> in it's own right. If we cant extend the file perhaps we can just
>>>>>>> LOG_UNIMP the data writes? Having to include in your user instructions
>>>>>>> "dd your already-on-SATA file system to this container just so it can
>>>>>>> work for SD" is a pain.
>>>>>>>
>>>>>>> Regards,
>>>>>>> Peter
>>>>>>>
>>>>>>
>>>>>> Fits within my "Always extend the size" answer. Failing to do so is a
>>>>>> good cause to fail.
>>>>>>
>>>>>> I'm not sure if this is the sort of thing that might require an extra
>>>>>> flag or option for compatibility reasons or not, though. If there is no
>>>>>> precedent for QEMU resizing a block device to make it compatible with a
>>>>>> particular device model, it's probably reasonable that no management
>>>>>> tool is expecting this to happen automatically either.
>>>>>>
>>>>>> Then again, it's still annoying that the current default is definitely
>>>>>> broken.
>>>>>
>>>>> That's not so clear to me. Strictly speaking, this is really a user
>>>>> error because the user passed an image that isn't suitable for the
>>>>> device. All we're discussing is handling this user error friendlier.
>>>>>
>>>>> Maybe we should take a step back: What's the specific use case here,
>>>>> i.e. where does the misaligned image come from and what is it used for?
>>>>
>>>> An ext filesystem image built by the Yocto build system. It is passed
>>>> straight to QEMU as a raw image. The user does not create disk images,
>>>> they are done by the build system. Note that the build system is not
>>>> QEMU specific, it is designed to target either QEMU or be used for
>>>> some form of real-hardware deployment so padding there is
>>>> inappropriate.
>>>>
>>>>> I assume this is not an image created with qemu-img, because then the
>>>>
>>>> I am not using qemu-img at all.
>>>>
>>>>> obvious options would already result in an aligned size.
>>>>>
>>>>
>>>> Maybe. What is the alignment of qemu-img? Note this requires 512K
>>>> alignment, which is kinda huge.
>>>>
>>>>>> I think this is going to boil down into an interface-and-expectations
>>>>>> argument. I am otherwise in favor of just forcing the resize whenever
>>>>>> possible and failing when it isn't.
>>>>>
>>>>> I'm strongly objecting to any automagic resizing of images.
>>>>>
>>>>
>>>> Can we LOG_UNIMP writes to the missing sectors? The the user can RW to
>>>> the in-band sectors which should contain the limit of a pre-existing
>>>> filesystem.
>>>>
>>>
>>> This sounds potentially dangerous. Do we know for sure any data written
>>> here is unimportant?
>>>
>>> If it's all zeroes, we can probably guess it's unimportant. As soon as
>>> any non-zero data lands up in this extension range... how do we assert
>>> that this is garbage?
>>>
>>> I don't think we can...
>>
>> Can we return write errors to the guest? If so, and we know that

It is going to vary from device to device. Basically we need something
in the device spec with the semantics of "Your write failed for an
unknown reason and don't bother retrying".

That said, many devices and drivers need to support the notion of bad
blocks and sectors, so there's a good bet the guest can just handle
corruption. For example, in NAND flash the layout of a bad block is
well defined as a specific data pattern, so for that one we could just
mark these read-only-0 extended sectors as bad and everything is then
the guests problem (as it is now completely valid for the device to
corrupt write data). I wonder if similar mechainisms exist for
everything else?

>> normally the guest shouldn't even try to access the area after the
>> filesystem, it might be reasonable enough to just return write errors in
>> the area that isn't covered by the image. Probably makes the guest
>> unhappy, but it's a bad guest anyway if it tries to write there.

Worthy of a LOG_UNIMP.

>>
>> In that case, the device model should just round up the size, and the
>> block layer will automatically fail anything touching areas beyond the
>> image size.
>>
>> Kevin
>>
>
> Maybe as an option?
>
> This would break re-formatting, right? It might still be nice as a
> low-hassle option, though.

Not if the format process is bad-block tolerant.

Regards,
Peter
diff mbox

Patch

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 3e2a451..539bb72 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -248,13 +248,18 @@  static const uint8_t sd_csd_rw_mask[16] = {
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0xfe,
 };
 
-static void sd_set_csd(SDState *sd, uint64_t size)
+static uint64_t sd_set_csd(SDState *sd, uint64_t size)
 {
-    uint32_t csize = (size >> (CMULT_SHIFT + HWBLOCK_SHIFT)) - 1;
-    uint32_t sectsize = (1 << (SECTOR_SHIFT + 1)) - 1;
-    uint32_t wpsize = (1 << (WPGROUP_SHIFT + 1)) - 1;
+    uint64_t actual_size;
 
     if (size <= 0x40000000) {	/* Standard Capacity SD */
+        uint32_t sectsize = (1 << (SECTOR_SHIFT + 1)) - 1;
+        uint32_t wpsize = (1 << (WPGROUP_SHIFT + 1)) - 1;
+        uint32_t csize;
+
+        actual_size = ROUND_UP(size, 1 << (CMULT_SHIFT + HWBLOCK_SHIFT));
+        csize = (actual_size >> (CMULT_SHIFT + HWBLOCK_SHIFT)) - 1;
+
         sd->csd[0] = 0x00;	/* CSD structure */
         sd->csd[1] = 0x26;	/* Data read access-time-1 */
         sd->csd[2] = 0x00;	/* Data read access-time-2 */
@@ -281,7 +286,8 @@  static void sd_set_csd(SDState *sd, uint64_t size)
         sd->csd[14] = 0x00;	/* File format group */
         sd->csd[15] = (sd_crc7(sd->csd, 15) << 1) | 1;
     } else {			/* SDHC */
-        size /= 512 * 1024;
+        actual_size = ROUND_UP(size, 512 * 1024);
+        size = actual_size / (512 * 1024);
         size -= 1;
         sd->csd[0] = 0x40;
         sd->csd[1] = 0x0e;
@@ -301,6 +307,7 @@  static void sd_set_csd(SDState *sd, uint64_t size)
         sd->csd[15] = 0x00;
         sd->ocr |= 1 << 30;     /* High Capacity SD Memory Card */
     }
+    return actual_size;
 }
 
 static void sd_set_rca(SDState *sd)
@@ -408,7 +415,7 @@  static void sd_reset(SDState *sd)
     sd_set_ocr(sd);
     sd_set_scr(sd);
     sd_set_cid(sd);
-    sd_set_csd(sd, size);
+    size = sd_set_csd(sd, size);
     sd_set_cardstatus(sd);
     sd_set_sdstatus(sd);