Patchwork block: add logical_block_size property

login
register
mail settings
Submitter Christoph Hellwig
Date March 4, 2010, 1:20 p.m.
Message ID <20100304132017.GA2200@lst.de>
Download mbox | patch
Permalink /patch/46905/
State New
Headers show

Comments

Christoph Hellwig - March 4, 2010, 1:20 p.m.
Add a logical block size attribute as various guest side tools only
increase the filesystem sector size based on it, not the advisory
physical block size.

For scsi we already have support for a different logical block size
in place for CDROMs that we can built upon.  Only my recent block
device characteristics VPD page needs some fixups.  Note that we
leave the logial block size for CDROMs hardcoded as the 2k value
is expected for it in general.

For virtio-blk we already have a feature flag claiming to support
a variable logical block size that was added for the s390 kuli
hypervisor.  Interestingly it does not actually change the units
in which the protocol works, which is still fixed at 512 bytes,
but only communicates a different minimum I/O granularity.  So
all we need to do in virtio is to add a trap for unaligned I/O
and round down the device size to the next multiple of the logical
block size.

IDE does not support any other logical block size than 512 bytes.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Christian Borntraeger - March 4, 2010, 2:38 p.m.
Am Donnerstag 04 März 2010 14:20:17 schrieb Christoph Hellwig:
> 
> Add a logical block size attribute as various guest side tools only
> increase the filesystem sector size based on it, not the advisory
> physical block size.
> 
> For scsi we already have support for a different logical block size
> in place for CDROMs that we can built upon.  Only my recent block
> device characteristics VPD page needs some fixups.  Note that we
> leave the logial block size for CDROMs hardcoded as the 2k value
> is expected for it in general.
> 
> For virtio-blk we already have a feature flag claiming to support
> a variable logical block size that was added for the s390 kuli
> hypervisor.  Interestingly it does not actually change the units
> in which the protocol works, which is still fixed at 512 bytes,
> but only communicates a different minimum I/O granularity.  So
> all we need to do in virtio is to add a trap for unaligned I/O
> and round down the device size to the next multiple of the logical
> block size.
> 
> IDE does not support any other logical block size than 512 bytes.

We did that because we wanted to use DASD devices, which have a 4k
block/sector size as backing for virtio. 512 byte request did work
(thanks to host page cache) but a single 512 byte write might have
triggered a 4k read and O_DIRECT accesses (avoiding host caching)
was also not working.

Testing and review is still in my todo list.

Christian
Kevin Wolf - March 5, 2010, 9:26 a.m.
Am 04.03.2010 14:20, schrieb Christoph Hellwig:
> 
> Add a logical block size attribute as various guest side tools only
> increase the filesystem sector size based on it, not the advisory
> physical block size.
> 
> For scsi we already have support for a different logical block size
> in place for CDROMs that we can built upon.  Only my recent block
> device characteristics VPD page needs some fixups.  Note that we
> leave the logial block size for CDROMs hardcoded as the 2k value
> is expected for it in general.
> 
> For virtio-blk we already have a feature flag claiming to support
> a variable logical block size that was added for the s390 kuli
> hypervisor.  Interestingly it does not actually change the units
> in which the protocol works, which is still fixed at 512 bytes,
> but only communicates a different minimum I/O granularity.  So
> all we need to do in virtio is to add a trap for unaligned I/O
> and round down the device size to the next multiple of the logical
> block size.
> 
> IDE does not support any other logical block size than 512 bytes.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Index: qemu/block_int.h
> ===================================================================
> --- qemu.orig/block_int.h	2010-03-03 19:16:13.408253228 +0100
> +++ qemu/block_int.h	2010-03-03 19:16:43.030003751 +0100
> @@ -209,6 +209,7 @@ struct DriveInfo;
>  typedef struct BlockConf {
>      struct DriveInfo *dinfo;
>      uint16_t physical_block_size;
> +    uint16_t logical_block_size;
>      uint16_t min_io_size;
>      uint32_t opt_io_size;
>  } BlockConf;
> @@ -226,6 +227,8 @@ static inline unsigned int get_physical_
>  
>  #define DEFINE_BLOCK_PROPERTIES(_state, _conf)                          \
>      DEFINE_PROP_DRIVE("drive", _state, _conf.dinfo),                    \
> +    DEFINE_PROP_UINT16("logical_block_size", _state,                    \
> +                       _conf.logical_block_size, 512),                  \
>      DEFINE_PROP_UINT16("physical_block_size", _state,                   \
>                         _conf.physical_block_size, 512),                 \
>      DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 512),  \
> Index: qemu/hw/scsi-disk.c
> ===================================================================
> --- qemu.orig/hw/scsi-disk.c	2010-03-03 19:16:13.419254346 +0100
> +++ qemu/hw/scsi-disk.c	2010-03-03 19:16:43.031004240 +0100
> @@ -397,8 +397,10 @@ static int scsi_disk_emulate_inquiry(SCS
>          }
>          case 0xb0: /* block device characteristics */
>          {
> -            unsigned int min_io_size = s->qdev.conf.min_io_size >> 9;
> -            unsigned int opt_io_size = s->qdev.conf.opt_io_size >> 9;
> +            unsigned int min_io_size =
> +                    s->qdev.conf.min_io_size / s->qdev.blocksize;
> +            unsigned int opt_io_size =
> +                    s->qdev.conf.opt_io_size / s->qdev.blocksize;
>  
>              /* required VPD size with unmap support */
>              outbuf[3] = buflen = 0x3c;
> @@ -1028,11 +1030,12 @@ static int scsi_disk_initfn(SCSIDevice *
>      s->bs = s->qdev.conf.dinfo->bdrv;
>  
>      if (bdrv_get_type_hint(s->bs) == BDRV_TYPE_CDROM) {
> -        s->cluster_size = 4;
> +        s->qdev.blocksize = 2048;
>      } else {
> -        s->cluster_size = 1;
> +        s->qdev.blocksize = s->qdev.conf.logical_block_size;
>      }
> -    s->qdev.blocksize = 512 * s->cluster_size;
> +    s->cluster_size = s->qdev.blocksize / 512;
> +
>      s->qdev.type = TYPE_DISK;
>      bdrv_get_geometry(s->bs, &nb_sectors);
>      nb_sectors /= s->cluster_size;
> Index: qemu/hw/virtio-blk.c
> ===================================================================
> --- qemu.orig/hw/virtio-blk.c	2010-03-03 19:16:13.426273971 +0100
> +++ qemu/hw/virtio-blk.c	2010-03-03 19:35:16.636028605 +0100
> @@ -27,6 +27,7 @@ typedef struct VirtIOBlock
>      void *rq;
>      QEMUBH *bh;
>      BlockConf *conf;
> +    unsigned short sector_mask;
>  } VirtIOBlock;
>  
>  static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
> @@ -250,6 +251,11 @@ static void virtio_blk_handle_flush(Virt
>  static void virtio_blk_handle_write(BlockRequest *blkreq, int *num_writes,
>      VirtIOBlockReq *req, BlockDriverState **old_bs)
>  {
> +    if (req->out->sector & req->dev->sector_mask) {
> +        virtio_blk_rw_complete(req, -EIO);
> +        return;
> +    }
> +
>      if (req->dev->bs != *old_bs || *num_writes == 32) {
>          if (*old_bs != NULL) {
>              do_multiwrite(*old_bs, blkreq, *num_writes);
> @@ -272,6 +278,11 @@ static void virtio_blk_handle_read(VirtI
>  {
>      BlockDriverAIOCB *acb;
>  
> +    if (req->out->sector & req->dev->sector_mask) {
> +        virtio_blk_rw_complete(req, -EIO);
> +        return;
> +    }
> +
>      acb = bdrv_aio_readv(req->dev->bs, req->out->sector, &req->qiov,
>                           req->qiov.size / 512, virtio_blk_rw_complete, req);
>      if (!acb) {
> @@ -404,12 +415,13 @@ static void virtio_blk_update_config(Vir
>      stl_raw(&blkcfg.seg_max, 128 - 2);
>      stw_raw(&blkcfg.cylinders, cylinders);
>      blkcfg.heads = heads;
> -    blkcfg.sectors = secs;
> +    blkcfg.sectors = secs & ~s->sector_mask;
> +    blkcfg.blk_size = s->conf->logical_block_size;
>      blkcfg.size_max = 0;
>      blkcfg.physical_block_exp = get_physical_block_exp(s->conf);
>      blkcfg.alignment_offset = 0;
> -    blkcfg.min_io_size = s->conf->min_io_size / 512;
> -    blkcfg.opt_io_size = s->conf->opt_io_size / 512;
> +    blkcfg.min_io_size = s->conf->min_io_size / blkcfg.blk_size;
> +    blkcfg.opt_io_size = s->conf->opt_io_size / blkcfg.blk_size;
>      memcpy(config, &blkcfg, sizeof(struct virtio_blk_config));
>  }
>  
> @@ -420,6 +432,7 @@ static uint32_t virtio_blk_get_features(
>      features |= (1 << VIRTIO_BLK_F_SEG_MAX);
>      features |= (1 << VIRTIO_BLK_F_GEOMETRY);
>      features |= (1 << VIRTIO_BLK_F_TOPOLOGY);
> +    features |= (1 << VIRTIO_BLK_F_BLK_SIZE);
>  
>      if (bdrv_enable_write_cache(s->bs))
>          features |= (1 << VIRTIO_BLK_F_WCACHE);
> @@ -479,6 +492,7 @@ VirtIODevice *virtio_blk_init(DeviceStat
>      s->bs = conf->dinfo->bdrv;
>      s->conf = conf;
>      s->rq = NULL;
> +    s->sector_mask = (s->conf->logical_block_size / 512) - 1;

Is there a check anywhere that the user didn't give us an odd block
size? We could get an interesting bit mask otherwise.

Kevin
Christoph Hellwig - March 5, 2010, 9:32 a.m.
On Fri, Mar 05, 2010 at 10:26:21AM +0100, Kevin Wolf wrote:
> Is there a check anywhere that the user didn't give us an odd block
> size? We could get an interesting bit mask otherwise.

Not yet.  I used to have such a check in the first incarnation of
the block topology patches, but I have no idea how to do it in a
centralized way using the qdev attributs.

Does anyone know if there's a better way to do this than just
duplicating the check in every driver?
Kevin Wolf - March 5, 2010, 9:55 a.m.
Am 05.03.2010 10:32, schrieb Christoph Hellwig:
> On Fri, Mar 05, 2010 at 10:26:21AM +0100, Kevin Wolf wrote:
>> Is there a check anywhere that the user didn't give us an odd block
>> size? We could get an interesting bit mask otherwise.
> 
> Not yet.  I used to have such a check in the first incarnation of
> the block topology patches, but I have no idea how to do it in a
> centralized way using the qdev attributs.
> 
> Does anyone know if there's a better way to do this than just
> duplicating the check in every driver?

We could probably introduce a new qdev property type that says "unsigned
16 bit integer, but only powers of two", and doing this looks quite
easy. On the other hand, having a new type for each possible restriction
sounds a bit silly...

Kevin
Christoph Hellwig - March 16, 2010, 9:23 a.m.
ping?

On Thu, Mar 04, 2010 at 02:20:17PM +0100, Christoph Hellwig wrote:
> 
> Add a logical block size attribute as various guest side tools only
> increase the filesystem sector size based on it, not the advisory
> physical block size.
> 
> For scsi we already have support for a different logical block size
> in place for CDROMs that we can built upon.  Only my recent block
> device characteristics VPD page needs some fixups.  Note that we
> leave the logial block size for CDROMs hardcoded as the 2k value
> is expected for it in general.
> 
> For virtio-blk we already have a feature flag claiming to support
> a variable logical block size that was added for the s390 kuli
> hypervisor.  Interestingly it does not actually change the units
> in which the protocol works, which is still fixed at 512 bytes,
> but only communicates a different minimum I/O granularity.  So
> all we need to do in virtio is to add a trap for unaligned I/O
> and round down the device size to the next multiple of the logical
> block size.
> 
> IDE does not support any other logical block size than 512 bytes.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Index: qemu/block_int.h
> ===================================================================
> --- qemu.orig/block_int.h	2010-03-03 19:16:13.408253228 +0100
> +++ qemu/block_int.h	2010-03-03 19:16:43.030003751 +0100
> @@ -209,6 +209,7 @@ struct DriveInfo;
>  typedef struct BlockConf {
>      struct DriveInfo *dinfo;
>      uint16_t physical_block_size;
> +    uint16_t logical_block_size;
>      uint16_t min_io_size;
>      uint32_t opt_io_size;
>  } BlockConf;
> @@ -226,6 +227,8 @@ static inline unsigned int get_physical_
>  
>  #define DEFINE_BLOCK_PROPERTIES(_state, _conf)                          \
>      DEFINE_PROP_DRIVE("drive", _state, _conf.dinfo),                    \
> +    DEFINE_PROP_UINT16("logical_block_size", _state,                    \
> +                       _conf.logical_block_size, 512),                  \
>      DEFINE_PROP_UINT16("physical_block_size", _state,                   \
>                         _conf.physical_block_size, 512),                 \
>      DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 512),  \
> Index: qemu/hw/scsi-disk.c
> ===================================================================
> --- qemu.orig/hw/scsi-disk.c	2010-03-03 19:16:13.419254346 +0100
> +++ qemu/hw/scsi-disk.c	2010-03-03 19:16:43.031004240 +0100
> @@ -397,8 +397,10 @@ static int scsi_disk_emulate_inquiry(SCS
>          }
>          case 0xb0: /* block device characteristics */
>          {
> -            unsigned int min_io_size = s->qdev.conf.min_io_size >> 9;
> -            unsigned int opt_io_size = s->qdev.conf.opt_io_size >> 9;
> +            unsigned int min_io_size =
> +                    s->qdev.conf.min_io_size / s->qdev.blocksize;
> +            unsigned int opt_io_size =
> +                    s->qdev.conf.opt_io_size / s->qdev.blocksize;
>  
>              /* required VPD size with unmap support */
>              outbuf[3] = buflen = 0x3c;
> @@ -1028,11 +1030,12 @@ static int scsi_disk_initfn(SCSIDevice *
>      s->bs = s->qdev.conf.dinfo->bdrv;
>  
>      if (bdrv_get_type_hint(s->bs) == BDRV_TYPE_CDROM) {
> -        s->cluster_size = 4;
> +        s->qdev.blocksize = 2048;
>      } else {
> -        s->cluster_size = 1;
> +        s->qdev.blocksize = s->qdev.conf.logical_block_size;
>      }
> -    s->qdev.blocksize = 512 * s->cluster_size;
> +    s->cluster_size = s->qdev.blocksize / 512;
> +
>      s->qdev.type = TYPE_DISK;
>      bdrv_get_geometry(s->bs, &nb_sectors);
>      nb_sectors /= s->cluster_size;
> Index: qemu/hw/virtio-blk.c
> ===================================================================
> --- qemu.orig/hw/virtio-blk.c	2010-03-03 19:16:13.426273971 +0100
> +++ qemu/hw/virtio-blk.c	2010-03-03 19:35:16.636028605 +0100
> @@ -27,6 +27,7 @@ typedef struct VirtIOBlock
>      void *rq;
>      QEMUBH *bh;
>      BlockConf *conf;
> +    unsigned short sector_mask;
>  } VirtIOBlock;
>  
>  static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
> @@ -250,6 +251,11 @@ static void virtio_blk_handle_flush(Virt
>  static void virtio_blk_handle_write(BlockRequest *blkreq, int *num_writes,
>      VirtIOBlockReq *req, BlockDriverState **old_bs)
>  {
> +    if (req->out->sector & req->dev->sector_mask) {
> +        virtio_blk_rw_complete(req, -EIO);
> +        return;
> +    }
> +
>      if (req->dev->bs != *old_bs || *num_writes == 32) {
>          if (*old_bs != NULL) {
>              do_multiwrite(*old_bs, blkreq, *num_writes);
> @@ -272,6 +278,11 @@ static void virtio_blk_handle_read(VirtI
>  {
>      BlockDriverAIOCB *acb;
>  
> +    if (req->out->sector & req->dev->sector_mask) {
> +        virtio_blk_rw_complete(req, -EIO);
> +        return;
> +    }
> +
>      acb = bdrv_aio_readv(req->dev->bs, req->out->sector, &req->qiov,
>                           req->qiov.size / 512, virtio_blk_rw_complete, req);
>      if (!acb) {
> @@ -404,12 +415,13 @@ static void virtio_blk_update_config(Vir
>      stl_raw(&blkcfg.seg_max, 128 - 2);
>      stw_raw(&blkcfg.cylinders, cylinders);
>      blkcfg.heads = heads;
> -    blkcfg.sectors = secs;
> +    blkcfg.sectors = secs & ~s->sector_mask;
> +    blkcfg.blk_size = s->conf->logical_block_size;
>      blkcfg.size_max = 0;
>      blkcfg.physical_block_exp = get_physical_block_exp(s->conf);
>      blkcfg.alignment_offset = 0;
> -    blkcfg.min_io_size = s->conf->min_io_size / 512;
> -    blkcfg.opt_io_size = s->conf->opt_io_size / 512;
> +    blkcfg.min_io_size = s->conf->min_io_size / blkcfg.blk_size;
> +    blkcfg.opt_io_size = s->conf->opt_io_size / blkcfg.blk_size;
>      memcpy(config, &blkcfg, sizeof(struct virtio_blk_config));
>  }
>  
> @@ -420,6 +432,7 @@ static uint32_t virtio_blk_get_features(
>      features |= (1 << VIRTIO_BLK_F_SEG_MAX);
>      features |= (1 << VIRTIO_BLK_F_GEOMETRY);
>      features |= (1 << VIRTIO_BLK_F_TOPOLOGY);
> +    features |= (1 << VIRTIO_BLK_F_BLK_SIZE);
>  
>      if (bdrv_enable_write_cache(s->bs))
>          features |= (1 << VIRTIO_BLK_F_WCACHE);
> @@ -479,6 +492,7 @@ VirtIODevice *virtio_blk_init(DeviceStat
>      s->bs = conf->dinfo->bdrv;
>      s->conf = conf;
>      s->rq = NULL;
> +    s->sector_mask = (s->conf->logical_block_size / 512) - 1;
>      bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs);
>      bdrv_set_geometry_hint(s->bs, cylinders, heads, secs);
>  
> Index: qemu/hw/virtio-blk.h
> ===================================================================
> --- qemu.orig/hw/virtio-blk.h	2010-03-03 19:16:13.434257349 +0100
> +++ qemu/hw/virtio-blk.h	2010-03-03 19:16:43.036004100 +0100
> @@ -42,7 +42,7 @@ struct virtio_blk_config
>      uint16_t cylinders;
>      uint8_t heads;
>      uint8_t sectors;
> -    uint32_t _blk_size;    /* structure pad, currently unused */
> +    uint32_t blk_size;
>      uint8_t physical_block_exp;
>      uint8_t alignment_offset;
>      uint16_t min_io_size;
> 
---end quoted text---
Christian Borntraeger - March 17, 2010, 9:59 a.m.
Am Donnerstag 04 März 2010 14:20:17 schrieb Christoph Hellwig:
> 
> Add a logical block size attribute as various guest side tools only
> increase the filesystem sector size based on it, not the advisory
> physical block size.
> 
> For scsi we already have support for a different logical block size
> in place for CDROMs that we can built upon.  Only my recent block
> device characteristics VPD page needs some fixups.  Note that we
> leave the logial block size for CDROMs hardcoded as the 2k value
> is expected for it in general.
> 
> For virtio-blk we already have a feature flag claiming to support
> a variable logical block size that was added for the s390 kuli
> hypervisor.  Interestingly it does not actually change the units
> in which the protocol works, which is still fixed at 512 bytes,
> but only communicates a different minimum I/O granularity.  So
> all we need to do in virtio is to add a trap for unaligned I/O
> and round down the device size to the next multiple of the logical
> block size.
> 
> IDE does not support any other logical block size than 512 bytes.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Patch looks sane.
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Anthony Liguori - March 17, 2010, 3:59 p.m.
On 03/04/2010 07:20 AM, Christoph Hellwig wrote:
> Add a logical block size attribute as various guest side tools only
> increase the filesystem sector size based on it, not the advisory
> physical block size.
>
> For scsi we already have support for a different logical block size
> in place for CDROMs that we can built upon.  Only my recent block
> device characteristics VPD page needs some fixups.  Note that we
> leave the logial block size for CDROMs hardcoded as the 2k value
> is expected for it in general.
>
> For virtio-blk we already have a feature flag claiming to support
> a variable logical block size that was added for the s390 kuli
> hypervisor.  Interestingly it does not actually change the units
> in which the protocol works, which is still fixed at 512 bytes,
> but only communicates a different minimum I/O granularity.  So
> all we need to do in virtio is to add a trap for unaligned I/O
> and round down the device size to the next multiple of the logical
> block size.
>
> IDE does not support any other logical block size than 512 bytes.
>
> Signed-off-by: Christoph Hellwig<hch@lst.de>
>    

Applied.  Thanks.

Regards,

Anthony Liguori

> Index: qemu/block_int.h
> ===================================================================
> --- qemu.orig/block_int.h	2010-03-03 19:16:13.408253228 +0100
> +++ qemu/block_int.h	2010-03-03 19:16:43.030003751 +0100
> @@ -209,6 +209,7 @@ struct DriveInfo;
>   typedef struct BlockConf {
>       struct DriveInfo *dinfo;
>       uint16_t physical_block_size;
> +    uint16_t logical_block_size;
>       uint16_t min_io_size;
>       uint32_t opt_io_size;
>   } BlockConf;
> @@ -226,6 +227,8 @@ static inline unsigned int get_physical_
>
>   #define DEFINE_BLOCK_PROPERTIES(_state, _conf)                          \
>       DEFINE_PROP_DRIVE("drive", _state, _conf.dinfo),                    \
> +    DEFINE_PROP_UINT16("logical_block_size", _state,                    \
> +                       _conf.logical_block_size, 512),                  \
>       DEFINE_PROP_UINT16("physical_block_size", _state,                   \
>                          _conf.physical_block_size, 512),                 \
>       DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 512),  \
> Index: qemu/hw/scsi-disk.c
> ===================================================================
> --- qemu.orig/hw/scsi-disk.c	2010-03-03 19:16:13.419254346 +0100
> +++ qemu/hw/scsi-disk.c	2010-03-03 19:16:43.031004240 +0100
> @@ -397,8 +397,10 @@ static int scsi_disk_emulate_inquiry(SCS
>           }
>           case 0xb0: /* block device characteristics */
>           {
> -            unsigned int min_io_size = s->qdev.conf.min_io_size>>  9;
> -            unsigned int opt_io_size = s->qdev.conf.opt_io_size>>  9;
> +            unsigned int min_io_size =
> +                    s->qdev.conf.min_io_size / s->qdev.blocksize;
> +            unsigned int opt_io_size =
> +                    s->qdev.conf.opt_io_size / s->qdev.blocksize;
>
>               /* required VPD size with unmap support */
>               outbuf[3] = buflen = 0x3c;
> @@ -1028,11 +1030,12 @@ static int scsi_disk_initfn(SCSIDevice *
>       s->bs = s->qdev.conf.dinfo->bdrv;
>
>       if (bdrv_get_type_hint(s->bs) == BDRV_TYPE_CDROM) {
> -        s->cluster_size = 4;
> +        s->qdev.blocksize = 2048;
>       } else {
> -        s->cluster_size = 1;
> +        s->qdev.blocksize = s->qdev.conf.logical_block_size;
>       }
> -    s->qdev.blocksize = 512 * s->cluster_size;
> +    s->cluster_size = s->qdev.blocksize / 512;
> +
>       s->qdev.type = TYPE_DISK;
>       bdrv_get_geometry(s->bs,&nb_sectors);
>       nb_sectors /= s->cluster_size;
> Index: qemu/hw/virtio-blk.c
> ===================================================================
> --- qemu.orig/hw/virtio-blk.c	2010-03-03 19:16:13.426273971 +0100
> +++ qemu/hw/virtio-blk.c	2010-03-03 19:35:16.636028605 +0100
> @@ -27,6 +27,7 @@ typedef struct VirtIOBlock
>       void *rq;
>       QEMUBH *bh;
>       BlockConf *conf;
> +    unsigned short sector_mask;
>   } VirtIOBlock;
>
>   static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
> @@ -250,6 +251,11 @@ static void virtio_blk_handle_flush(Virt
>   static void virtio_blk_handle_write(BlockRequest *blkreq, int *num_writes,
>       VirtIOBlockReq *req, BlockDriverState **old_bs)
>   {
> +    if (req->out->sector&  req->dev->sector_mask) {
> +        virtio_blk_rw_complete(req, -EIO);
> +        return;
> +    }
> +
>       if (req->dev->bs != *old_bs || *num_writes == 32) {
>           if (*old_bs != NULL) {
>               do_multiwrite(*old_bs, blkreq, *num_writes);
> @@ -272,6 +278,11 @@ static void virtio_blk_handle_read(VirtI
>   {
>       BlockDriverAIOCB *acb;
>
> +    if (req->out->sector&  req->dev->sector_mask) {
> +        virtio_blk_rw_complete(req, -EIO);
> +        return;
> +    }
> +
>       acb = bdrv_aio_readv(req->dev->bs, req->out->sector,&req->qiov,
>                            req->qiov.size / 512, virtio_blk_rw_complete, req);
>       if (!acb) {
> @@ -404,12 +415,13 @@ static void virtio_blk_update_config(Vir
>       stl_raw(&blkcfg.seg_max, 128 - 2);
>       stw_raw(&blkcfg.cylinders, cylinders);
>       blkcfg.heads = heads;
> -    blkcfg.sectors = secs;
> +    blkcfg.sectors = secs&  ~s->sector_mask;
> +    blkcfg.blk_size = s->conf->logical_block_size;
>       blkcfg.size_max = 0;
>       blkcfg.physical_block_exp = get_physical_block_exp(s->conf);
>       blkcfg.alignment_offset = 0;
> -    blkcfg.min_io_size = s->conf->min_io_size / 512;
> -    blkcfg.opt_io_size = s->conf->opt_io_size / 512;
> +    blkcfg.min_io_size = s->conf->min_io_size / blkcfg.blk_size;
> +    blkcfg.opt_io_size = s->conf->opt_io_size / blkcfg.blk_size;
>       memcpy(config,&blkcfg, sizeof(struct virtio_blk_config));
>   }
>
> @@ -420,6 +432,7 @@ static uint32_t virtio_blk_get_features(
>       features |= (1<<  VIRTIO_BLK_F_SEG_MAX);
>       features |= (1<<  VIRTIO_BLK_F_GEOMETRY);
>       features |= (1<<  VIRTIO_BLK_F_TOPOLOGY);
> +    features |= (1<<  VIRTIO_BLK_F_BLK_SIZE);
>
>       if (bdrv_enable_write_cache(s->bs))
>           features |= (1<<  VIRTIO_BLK_F_WCACHE);
> @@ -479,6 +492,7 @@ VirtIODevice *virtio_blk_init(DeviceStat
>       s->bs = conf->dinfo->bdrv;
>       s->conf = conf;
>       s->rq = NULL;
> +    s->sector_mask = (s->conf->logical_block_size / 512) - 1;
>       bdrv_guess_geometry(s->bs,&cylinders,&heads,&secs);
>       bdrv_set_geometry_hint(s->bs, cylinders, heads, secs);
>
> Index: qemu/hw/virtio-blk.h
> ===================================================================
> --- qemu.orig/hw/virtio-blk.h	2010-03-03 19:16:13.434257349 +0100
> +++ qemu/hw/virtio-blk.h	2010-03-03 19:16:43.036004100 +0100
> @@ -42,7 +42,7 @@ struct virtio_blk_config
>       uint16_t cylinders;
>       uint8_t heads;
>       uint8_t sectors;
> -    uint32_t _blk_size;    /* structure pad, currently unused */
> +    uint32_t blk_size;
>       uint8_t physical_block_exp;
>       uint8_t alignment_offset;
>       uint16_t min_io_size;
>
>
>
>
Artyom Tarasenko - May 3, 2010, 5:29 p.m.
2010/3/4 Christoph Hellwig <hch@lst.de>:
>
> Add a logical block size attribute as various guest side tools only
> increase the filesystem sector size based on it, not the advisory
> physical block size.
>
> For scsi we already have support for a different logical block size
> in place for CDROMs that we can built upon.  Only my recent block
> device characteristics VPD page needs some fixups.  Note that we
> leave the logial block size for CDROMs hardcoded as the 2k value
> is expected for it in general.

By "general" do you mean qemu, or the guests?

Solaris 2.0 - 2.5.1 and SunOS 4.x expect CDROM block size of 512 bytes.
Would have been nice to have the block size for CDROM configurable too.

Currently the CDROMs mentioned above have to be plugged as hard disks
in order to function.

Patch

Index: qemu/block_int.h
===================================================================
--- qemu.orig/block_int.h	2010-03-03 19:16:13.408253228 +0100
+++ qemu/block_int.h	2010-03-03 19:16:43.030003751 +0100
@@ -209,6 +209,7 @@  struct DriveInfo;
 typedef struct BlockConf {
     struct DriveInfo *dinfo;
     uint16_t physical_block_size;
+    uint16_t logical_block_size;
     uint16_t min_io_size;
     uint32_t opt_io_size;
 } BlockConf;
@@ -226,6 +227,8 @@  static inline unsigned int get_physical_
 
 #define DEFINE_BLOCK_PROPERTIES(_state, _conf)                          \
     DEFINE_PROP_DRIVE("drive", _state, _conf.dinfo),                    \
+    DEFINE_PROP_UINT16("logical_block_size", _state,                    \
+                       _conf.logical_block_size, 512),                  \
     DEFINE_PROP_UINT16("physical_block_size", _state,                   \
                        _conf.physical_block_size, 512),                 \
     DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 512),  \
Index: qemu/hw/scsi-disk.c
===================================================================
--- qemu.orig/hw/scsi-disk.c	2010-03-03 19:16:13.419254346 +0100
+++ qemu/hw/scsi-disk.c	2010-03-03 19:16:43.031004240 +0100
@@ -397,8 +397,10 @@  static int scsi_disk_emulate_inquiry(SCS
         }
         case 0xb0: /* block device characteristics */
         {
-            unsigned int min_io_size = s->qdev.conf.min_io_size >> 9;
-            unsigned int opt_io_size = s->qdev.conf.opt_io_size >> 9;
+            unsigned int min_io_size =
+                    s->qdev.conf.min_io_size / s->qdev.blocksize;
+            unsigned int opt_io_size =
+                    s->qdev.conf.opt_io_size / s->qdev.blocksize;
 
             /* required VPD size with unmap support */
             outbuf[3] = buflen = 0x3c;
@@ -1028,11 +1030,12 @@  static int scsi_disk_initfn(SCSIDevice *
     s->bs = s->qdev.conf.dinfo->bdrv;
 
     if (bdrv_get_type_hint(s->bs) == BDRV_TYPE_CDROM) {
-        s->cluster_size = 4;
+        s->qdev.blocksize = 2048;
     } else {
-        s->cluster_size = 1;
+        s->qdev.blocksize = s->qdev.conf.logical_block_size;
     }
-    s->qdev.blocksize = 512 * s->cluster_size;
+    s->cluster_size = s->qdev.blocksize / 512;
+
     s->qdev.type = TYPE_DISK;
     bdrv_get_geometry(s->bs, &nb_sectors);
     nb_sectors /= s->cluster_size;
Index: qemu/hw/virtio-blk.c
===================================================================
--- qemu.orig/hw/virtio-blk.c	2010-03-03 19:16:13.426273971 +0100
+++ qemu/hw/virtio-blk.c	2010-03-03 19:35:16.636028605 +0100
@@ -27,6 +27,7 @@  typedef struct VirtIOBlock
     void *rq;
     QEMUBH *bh;
     BlockConf *conf;
+    unsigned short sector_mask;
 } VirtIOBlock;
 
 static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
@@ -250,6 +251,11 @@  static void virtio_blk_handle_flush(Virt
 static void virtio_blk_handle_write(BlockRequest *blkreq, int *num_writes,
     VirtIOBlockReq *req, BlockDriverState **old_bs)
 {
+    if (req->out->sector & req->dev->sector_mask) {
+        virtio_blk_rw_complete(req, -EIO);
+        return;
+    }
+
     if (req->dev->bs != *old_bs || *num_writes == 32) {
         if (*old_bs != NULL) {
             do_multiwrite(*old_bs, blkreq, *num_writes);
@@ -272,6 +278,11 @@  static void virtio_blk_handle_read(VirtI
 {
     BlockDriverAIOCB *acb;
 
+    if (req->out->sector & req->dev->sector_mask) {
+        virtio_blk_rw_complete(req, -EIO);
+        return;
+    }
+
     acb = bdrv_aio_readv(req->dev->bs, req->out->sector, &req->qiov,
                          req->qiov.size / 512, virtio_blk_rw_complete, req);
     if (!acb) {
@@ -404,12 +415,13 @@  static void virtio_blk_update_config(Vir
     stl_raw(&blkcfg.seg_max, 128 - 2);
     stw_raw(&blkcfg.cylinders, cylinders);
     blkcfg.heads = heads;
-    blkcfg.sectors = secs;
+    blkcfg.sectors = secs & ~s->sector_mask;
+    blkcfg.blk_size = s->conf->logical_block_size;
     blkcfg.size_max = 0;
     blkcfg.physical_block_exp = get_physical_block_exp(s->conf);
     blkcfg.alignment_offset = 0;
-    blkcfg.min_io_size = s->conf->min_io_size / 512;
-    blkcfg.opt_io_size = s->conf->opt_io_size / 512;
+    blkcfg.min_io_size = s->conf->min_io_size / blkcfg.blk_size;
+    blkcfg.opt_io_size = s->conf->opt_io_size / blkcfg.blk_size;
     memcpy(config, &blkcfg, sizeof(struct virtio_blk_config));
 }
 
@@ -420,6 +432,7 @@  static uint32_t virtio_blk_get_features(
     features |= (1 << VIRTIO_BLK_F_SEG_MAX);
     features |= (1 << VIRTIO_BLK_F_GEOMETRY);
     features |= (1 << VIRTIO_BLK_F_TOPOLOGY);
+    features |= (1 << VIRTIO_BLK_F_BLK_SIZE);
 
     if (bdrv_enable_write_cache(s->bs))
         features |= (1 << VIRTIO_BLK_F_WCACHE);
@@ -479,6 +492,7 @@  VirtIODevice *virtio_blk_init(DeviceStat
     s->bs = conf->dinfo->bdrv;
     s->conf = conf;
     s->rq = NULL;
+    s->sector_mask = (s->conf->logical_block_size / 512) - 1;
     bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs);
     bdrv_set_geometry_hint(s->bs, cylinders, heads, secs);
 
Index: qemu/hw/virtio-blk.h
===================================================================
--- qemu.orig/hw/virtio-blk.h	2010-03-03 19:16:13.434257349 +0100
+++ qemu/hw/virtio-blk.h	2010-03-03 19:16:43.036004100 +0100
@@ -42,7 +42,7 @@  struct virtio_blk_config
     uint16_t cylinders;
     uint8_t heads;
     uint8_t sectors;
-    uint32_t _blk_size;    /* structure pad, currently unused */
+    uint32_t blk_size;
     uint8_t physical_block_exp;
     uint8_t alignment_offset;
     uint16_t min_io_size;