diff mbox

Fix accesses at LBA28 boundary (old bug, but nasty)

Message ID 4BBBB975.7000203@teksavvy.com
State Not Applicable
Delegated to: David Miller
Headers show

Commit Message

Mark Lord April 6, 2010, 10:45 p.m. UTC
Most drives from Seagate, Hitachi, and possibly other brands,
do not allow LBA28 access to sector number 0x0fffffff (2^28 - 1).
So instead use LBA48 for such accesses.

This bug could bite a lot of systems, especially when the user has
taken care to align partitions to 4KB boundaries.  On misaligned systems,
it is less likely to be encountered, since a 4KB read would end at 0x10000000
rather than at 0x0fffffff.

Signed-off-by: Mark Lord <mlord@pobox.com>

--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Mark Lord April 6, 2010, 10:47 p.m. UTC | #1
Here is a test program, to see if a drive
On 06/04/10 06:45 PM, Mark Lord wrote:
> Most drives from Seagate, Hitachi, and possibly other brands,
> do not allow LBA28 access to sector number 0x0fffffff (2^28 - 1).
> So instead use LBA48 for such accesses.
...

Here is a test program, to see if a particular drive has this problem or not.
It works only on drives larger than 128GB.

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>

#include <linux/fs.h>
#include <linux/hdreg.h>
#include <scsi/scsi.h>
#include <scsi/sg.h>

typedef unsigned long long u64;

enum {
	ATA_CMD_DMA_READ		= 0xc8,
	ATA_CMD_DMA_READ_EXT		= 0x25,
	ATA_SECT_SIZE			= 512,
	ATA_16				= 0x85,
	ATA_16_LEN			= 16,
	ATA_DEV_REG_LBA			= (1 << 6),
	ATA_LBA48			= 1,
	ATA_PROTO_DMA			= ( 6 << 1),
};

static int sg_read (int fd, u64 lba, unsigned int nsects, void *buf, int force_lba28)
{
	unsigned char cdb[ATA_16_LEN] = { ATA_16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
	struct sg_io_hdr hdr;
	unsigned char sense[32];

	cdb[ 1] = ATA_PROTO_DMA;
	cdb[ 6] = nsects;
	cdb[ 8] = lba;
	cdb[10] = lba >>  8;
	cdb[12] = lba >> 16;
	cdb[13] = ATA_DEV_REG_LBA;

	if (force_lba28 || (nsects <= 256 && (lba + nsects) < (1ULL << 28))) {
		cdb[13] |= (lba >> 24) & 0x0f;
		cdb[14]  = ATA_CMD_DMA_READ;
	} else {
		cdb[ 1] |= ATA_LBA48;
		cdb[ 5]  = nsects >> 8;
		cdb[ 7]  = lba >> 24;
		cdb[ 9]  = lba >> 32;
		cdb[11]  = lba >> 40;
		cdb[14]  = ATA_CMD_DMA_READ_EXT;
	}

	memset(&hdr, 0, sizeof(struct sg_io_hdr));
	hdr.interface_id	= 'S';
	hdr.cmd_len		= ATA_16_LEN;
	hdr.mx_sb_len		= sizeof(sense);
	hdr.dxfer_direction	= SG_DXFER_FROM_DEV;
	hdr.dxfer_len		= nsects * ATA_SECT_SIZE;
	hdr.dxferp		= buf;
	hdr.cmdp		= cdb;
	hdr.sbp			= sense;
	hdr.pack_id		= lba;
	hdr.timeout		= 5000; /* milliseconds */

	memset(sense, 0, sizeof(sense));
	if (ioctl(fd, SG_IO, &hdr) < 0) {
		perror("ioctl(SG_IO)");
		return (-1);
	}
	if (hdr.status == 0 && hdr.host_status == 0 && hdr.driver_status == 0)
		return 0; /* success */

	if (hdr.status > 0) {
		unsigned char *s = sense + 8;
		/* SCSI status is non-zero, let's go for the error LBA */
		lba = ((u64)s[10] << 40) | ((u64)s[8] << 32) | (s[6] << 24) |
			(s[11] << 16) | (s[9] << 8) | s[7];
		if (0) fprintf(stderr, "SG_IO error: SCSI sense=0x%x/%02x/%02x, ATA=0x%02x/%02x, LBA=%llu (0x%llx)\n",
			sense[1] & 0xf, sense[2], sense[3], s[13], s[3], lba, lba);
		return -1;
	}

	/* some other error we don't know about yet */
	fprintf(stderr, "SG_IO returned: SCSI status=0x%x, host_status=0x%x, driver_status=0x%x",
		hdr.status, hdr.host_status, hdr.driver_status);
	return -1;
}

static void print_drive_model (const char *devpath)
{
	char cmd[256];

	snprintf(cmd, sizeof(cmd), "hdparm -I %s | grep 'Model Number' | sed '-es/^[ 	]*//'", devpath);
	system(cmd);
	snprintf(cmd, sizeof(cmd), "hdparm -I %s | grep 'Firmware Revision' | sed '-es/^[ 	]*//'", devpath);
	system(cmd);
}

int main (int argc, char *argv[])
{
	unsigned char buf[8 * ATA_SECT_SIZE];
	const char *devpath;
	int rc, fd, nsects;
	u64 lba;

	if (argc != 2) {
		fprintf(stderr, "%s: bad/missing parms: expected <devpath>\n", argv[0]);
		exit(1);
	}
	devpath = argv[1];

	fd = open(devpath, O_RDONLY);
	if (fd == -1) {
		perror(devpath);
		exit(1);
	}

	print_drive_model(devpath);

	nsects = 8;
	lba = 0x0ffffff8;
	printf("Reading %u sectors starting at LBA=%llu (0x%llx): ", nsects, lba, lba);
	fflush(stdout);
	rc = sg_read(fd, lba, nsects, buf, 1);
	printf("%s\n", rc ? "FAILED" : "succeeded");

	nsects = 1;
	lba = 0x0fffffff;
	printf("Reading %u sectors starting at LBA=%llu (0x%llx): ", nsects, lba, lba);
	fflush(stdout);
	rc = sg_read(fd, lba, nsects, buf, 1);
	printf("%s\n", rc ? "FAILED" : "succeeded");

	exit(0);
}
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tejun Heo April 7, 2010, 1:29 a.m. UTC | #2
Hello, Mark.

On 04/07/2010 07:45 AM, Mark Lord wrote:
> Most drives from Seagate, Hitachi, and possibly other brands,
> do not allow LBA28 access to sector number 0x0fffffff (2^28 - 1).
> So instead use LBA48 for such accesses.
> 
> This bug could bite a lot of systems, especially when the user has
> taken care to align partitions to 4KB boundaries.  On misaligned systems,
> it is less likely to be encountered, since a 4KB read would end at
> 0x10000000 rather than at 0x0fffffff.

Oh, nice catch.  Kinda scary.

> Signed-off-by: Mark Lord <mlord@pobox.com>
>
> --- linux-2.6.34-rc3/include/linux/ata.h    2010-03-30
> 12:24:39.000000000 -0400
> +++ linux/include/linux/ata.h    2010-04-06 18:39:41.167702612 -0400
> @@ -1025,8 +1025,8 @@
>  
>  static inline int lba_28_ok(u64 block, u32 n_block)
>  {
> -    /* check the ending block number */
> -    return ((block + n_block) < ((u64)1 << 28)) && (n_block <= 256);
> +    /* check the ending block number: must be LESS THAN 0x0fffffff */
> +    return ((block + n_block) < (u64)((1 << 28) - 1)) && (n_block <= 256);

But why move the type casting?  The cast isn't required to begin with
but starting with u64 constant means the whole compile time
calculation will be in u64 (the intention of that cast I guess).

Thanks.
Mark Lord April 7, 2010, 3:30 a.m. UTC | #3
On 06/04/10 09:29 PM, Tejun Heo wrote:
..
>> -    /* check the ending block number */
>> -    return ((block + n_block)<  ((u64)1<<  28))&&  (n_block<= 256);
>> +    /* check the ending block number: must be LESS THAN 0x0fffffff */
>> +    return ((block + n_block)<  (u64)((1<<  28) - 1))&&  (n_block<= 256);
>
> But why move the type casting?  The cast isn't required to begin with
> but starting with u64 constant means the whole compile time
> calculation will be in u64 (the intention of that cast I guess).
..

The cast was there already, so I left it there.
Just moved it to cover the entire compile-time expression as a unit,
rather than just the (u64)1  as the existing code did it.

If you prefer a different format for that line, then chirp up!  :)
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tejun Heo April 7, 2010, 5:11 a.m. UTC | #4
Hello,

On 04/07/2010 12:30 PM, Mark Lord wrote:
> The cast was there already, so I left it there.
> Just moved it to cover the entire compile-time expression as a unit,
> rather than just the (u64)1  as the existing code did it.

Oh, but there's functional difference between the two.  In the
original place, the initial casting ripples through the whole
expression making each step of the calculation u64.  It's meant to
prevent int overflow during calculation (which isn't necessary in the
current expression BTW) but if you cast the end result, it doesn't
really mean anything.  If you want to kill the casting, kill it but
moving the casting outside doesn't make much sense.  Can you repost
without moving the cast?

Thanks.
Mark Lord April 7, 2010, 5:52 p.m. UTC | #5
On 07/04/10 01:11 AM, Tejun Heo wrote:
> Hello,
>
> On 04/07/2010 12:30 PM, Mark Lord wrote:
>> The cast was there already, so I left it there.
>> Just moved it to cover the entire compile-time expression as a unit,
>> rather than just the (u64)1  as the existing code did it.
>
> Oh, but there's functional difference between the two.  In the
> original place, the initial casting ripples through the whole
> expression making each step of the calculation u64.  It's meant to
> prevent int overflow during calculation (which isn't necessary in the
> current expression BTW) but if you cast the end result, it doesn't
> really mean anything.  If you want to kill the casting, kill it but
> moving the casting outside doesn't make much sense.  Can you repost
> without moving the cast?
..

No.  But I can/did repost with it completely gone.
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

--- linux-2.6.34-rc3/include/linux/ata.h	2010-03-30 12:24:39.000000000 -0400
+++ linux/include/linux/ata.h	2010-04-06 18:39:41.167702612 -0400
@@ -1025,8 +1025,8 @@ 
  
  static inline int lba_28_ok(u64 block, u32 n_block)
  {
-	/* check the ending block number */
-	return ((block + n_block) < ((u64)1 << 28)) && (n_block <= 256);
+	/* check the ending block number: must be LESS THAN 0x0fffffff */
+	return ((block + n_block) < (u64)((1 << 28) - 1)) && (n_block <= 256);
  }
  
  static inline int lba_48_ok(u64 block, u32 n_block)