Patchwork [v5,06/22] Treat XIP like O_DIRECT

login
register
mail settings
Submitter Ross Zwisler
Date Feb. 12, 2014, 11:53 p.m.
Message ID <alpine.OSX.2.00.1402121640310.60058@scrumpy>
Download mbox | patch
Permalink /patch/319846/
State New
Headers show

Comments

Ross Zwisler - Feb. 12, 2014, 11:53 p.m.
On Wed, 15 Jan 2014, Matthew Wilcox wrote:
> Instead of separate read and write methods, use the generic AIO
> infrastructure.  In addition to giving us support for AIO, this adds
> the locking between read() and truncate() that was missing.
> 
> Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>

...

> +static ssize_t xip_io(int rw, struct inode *inode, const struct iovec
> *iov,
> +			loff_t start, loff_t end, unsigned nr_segs,
> +			get_block_t get_block, struct buffer_head *bh)
> +{
> +	ssize_t retval = 0;
> +	unsigned seg = 0;
> +	unsigned len;
> +	unsigned copied = 0;
> +	loff_t offset = start;
> +	loff_t max = start;
> +	void *addr;
> +	bool hole = false;
> +
> +	while (offset < end) {
> +		void __user *buf = iov[seg].iov_base + copied;
> +
> +		if (max == offset) {
> +			sector_t block = offset >> inode->i_blkbits;
> +			long size;
> +			memset(bh, 0, sizeof(*bh));
> +			bh->b_size = ALIGN(end - offset, PAGE_SIZE);
> +			retval = get_block(inode, block, bh, rw == WRITE);
> +			if (retval)
> +				break;
> +			if (buffer_mapped(bh)) {
> +				retval = xip_get_addr(inode, bh, &addr);
> +				if (retval < 0)
> +					break;
> +				addr += offset - (block << inode->i_blkbits);
> +				hole = false;
> +				size = retval;
> +			} else {
> +				if (rw == WRITE) {
> +					retval = -EIO;
> +					break;
> +				}
> +				addr = NULL;
> +				hole = true;
> +				size = bh->b_size;
> +			}
> +			max = offset + size;
> +		}
> +
> +		len = min_t(unsigned, iov[seg].iov_len - copied, max - offset);
> +
> +		if (rw == WRITE)
> +			len -= __copy_from_user_nocache(addr, buf, len);
> +		else if (!hole)
> +			len -= __copy_to_user(buf, addr, len);
> +		else
> +			len -= __clear_user(buf, len);
> +
> +		if (!len)
> +			break;
> +
> +		offset += len;
> +		copied += len;
> +		if (copied == iov[seg].iov_len) {
> +			seg++;
> +			copied = 0;
> +		}
> +	}
> +
> +	return (offset == start) ? retval : offset - start;
> +}

xip_io() as it is currently written has an issue where reads can go beyond
inode->i_size.  A quick test to show this issue is:

	create a new file
	write to the file for 1/2 a block
	seek back to 0
	read for a full block

The read in this case will return 4096, the length of the full block that was
requested.  It should return 2048, reading just the data that was written.

The issue is that we do have a full block allocated in ext4, we do have it
available via XIP via xip_get_addr(), and the only extra check that we
currently have is a check against iov_len.  iov_len in this case is 4096, so
no one stops us from doing a full block read.

Here is a quick patch that fixes this issue:

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/fs/xip.c b/fs/xip.c
index e902593..1608f29 100644
--- a/fs/xip.c
+++ b/fs/xip.c
@@ -91,13 +91,16 @@  static ssize_t xip_io(int rw, struct inode *inode, const struct
 {
        ssize_t retval = 0;
        unsigned seg = 0;
-       unsigned len;
+       unsigned len, total_len;
        unsigned copied = 0;
        loff_t offset = start;
        loff_t max = start;
        void *addr;
        bool hole = false;
 
+       end = min(end, inode->i_size);
+       total_len = end - start;
+
        while (offset < end) {
                void __user *buf = iov[seg].iov_base + copied;
 
@@ -136,6 +139,7 @@  static ssize_t xip_io(int rw, struct inode *inode, const struct
                }
 
                len = min_t(unsigned, iov[seg].iov_len - copied, max - offset);
+               len = min(len, total_len);
 
                if (rw == WRITE)
                        len -= __copy_from_user_nocache(addr, buf, len);
@@ -149,6 +153,7 @@  static ssize_t xip_io(int rw, struct inode *inode, const struct
 
                offset += len;
                copied += len;
+               total_len -= len;
                if (copied == iov[seg].iov_len) {
                        seg++;
                        copied = 0;