Message ID | 1468852709-12360-4-git-send-email-tim.gardner@canonical.com |
---|---|
State | New |
Headers | show |
Fails to build in xenial: CC fs/ext4/file.o fs/ext4/file.c: In function ‘ext4_seek_data’: fs/ext4/file.c:597:4: error: implicit declaration of function ‘inode_unlock’ [-Werror=implicit-function-declaration] inode_unlock(inode); ^ (That doesn't get introduced until v4.5 by "5955102 wrappers for ->i_mutex access") -Kamal On Mon, Jul 18, 2016 at 08:38:29AM -0600, Tim Gardner wrote: > From: Jan Kara <jack@suse.cz> > > BugLink: http://bugs.launchpad.net/bugs/1602524 > > Using SEEK_DATA in a huge sparse file can easily lead to sotflockups as > ext4_seek_data() iterates hole block-by-block. Fix the problem by using > returned hole size from ext4_map_blocks() and thus skip the hole in one > go. > > Update also SEEK_HOLE implementation to follow the same pattern as > SEEK_DATA to make future maintenance easier. > > Furthermore we add cond_resched() to both ext4_seek_data() and > ext4_seek_hole() to avoid softlockups in case evil user creates huge > fragmented file and we have to go through lots of extents. > > Signed-off-by: Jan Kara <jack@suse.cz> > Signed-off-by: Theodore Ts'o <tytso@mit.edu> > (back ported from commit 2d90c160e5f1d784e180f1e1458d56eee4d7f4f4) > Signed-off-by: Tim Gardner <tim.gardner@canonical.com> > > Conflicts: > fs/ext4/ext4.h > --- > fs/ext4/ext4.h | 5 +++ > fs/ext4/file.c | 97 +++++++++++++++++++++------------------------------------ > fs/ext4/inode.c | 67 +++++++++++++++++++++++++++++++++++++++ > 3 files changed, 108 insertions(+), 61 deletions(-) > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index b7e921d..6148573 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -2520,6 +2520,11 @@ extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf); > extern qsize_t *ext4_get_reserved_space(struct inode *inode); > extern void ext4_da_update_reserve_space(struct inode *inode, > int used, int quota_claim); > +extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, > + ext4_fsblk_t pblk, ext4_lblk_t len); > +extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk, > + unsigned int map_len, > + struct extent_status *result); > > /* indirect.c */ > extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, > diff --git a/fs/ext4/file.c b/fs/ext4/file.c > index 0d24ebc..2753bed 100644 > --- a/fs/ext4/file.c > +++ b/fs/ext4/file.c > @@ -435,7 +435,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) > */ > static int ext4_find_unwritten_pgoff(struct inode *inode, > int whence, > - struct ext4_map_blocks *map, > + ext4_lblk_t end_blk, > loff_t *offset) > { > struct pagevec pvec; > @@ -450,7 +450,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, > blkbits = inode->i_sb->s_blocksize_bits; > startoff = *offset; > lastoff = startoff; > - endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; > + endoff = (loff_t)end_blk << blkbits; > > index = startoff >> PAGE_CACHE_SHIFT; > end = endoff >> PAGE_CACHE_SHIFT; > @@ -568,12 +568,11 @@ out: > static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) > { > struct inode *inode = file->f_mapping->host; > - struct ext4_map_blocks map; > struct extent_status es; > ext4_lblk_t start, last, end; > loff_t dataoff, isize; > int blkbits; > - int ret = 0; > + int ret; > > mutex_lock(&inode->i_mutex); > > @@ -590,41 +589,32 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) > dataoff = offset; > > do { > - map.m_lblk = last; > - map.m_len = end - last + 1; > - ret = ext4_map_blocks(NULL, inode, &map, 0); > - if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { > - if (last != start) > - dataoff = (loff_t)last << blkbits; > - break; > + ret = ext4_get_next_extent(inode, last, end - last + 1, &es); > + if (ret <= 0) { > + /* No extent found -> no data */ > + if (ret == 0) > + ret = -ENXIO; > + inode_unlock(inode); > + return ret; > } > > - /* > - * If there is a delay extent at this offset, > - * it will be as a data. > - */ > - ext4_es_find_delayed_extent_range(inode, last, last, &es); > - if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { > - if (last != start) > - dataoff = (loff_t)last << blkbits; > + last = es.es_lblk; > + if (last != start) > + dataoff = (loff_t)last << blkbits; > + if (!ext4_es_is_unwritten(&es)) > break; > - } > > /* > * If there is a unwritten extent at this offset, > * it will be as a data or a hole according to page > * cache that has data or not. > */ > - if (map.m_flags & EXT4_MAP_UNWRITTEN) { > - int unwritten; > - unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA, > - &map, &dataoff); > - if (unwritten) > - break; > - } > - > - last++; > + if (ext4_find_unwritten_pgoff(inode, SEEK_DATA, > + es.es_lblk + es.es_len, &dataoff)) > + break; > + last += es.es_len; > dataoff = (loff_t)last << blkbits; > + cond_resched(); > } while (last <= end); > > mutex_unlock(&inode->i_mutex); > @@ -641,12 +631,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) > static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) > { > struct inode *inode = file->f_mapping->host; > - struct ext4_map_blocks map; > struct extent_status es; > ext4_lblk_t start, last, end; > loff_t holeoff, isize; > int blkbits; > - int ret = 0; > + int ret; > > mutex_lock(&inode->i_mutex); > > @@ -663,44 +652,30 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) > holeoff = offset; > > do { > - map.m_lblk = last; > - map.m_len = end - last + 1; > - ret = ext4_map_blocks(NULL, inode, &map, 0); > - if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { > - last += ret; > - holeoff = (loff_t)last << blkbits; > - continue; > + ret = ext4_get_next_extent(inode, last, end - last + 1, &es); > + if (ret < 0) { > + inode_unlock(inode); > + return ret; > } > - > - /* > - * If there is a delay extent at this offset, > - * we will skip this extent. > - */ > - ext4_es_find_delayed_extent_range(inode, last, last, &es); > - if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { > - last = es.es_lblk + es.es_len; > - holeoff = (loff_t)last << blkbits; > - continue; > + /* Found a hole? */ > + if (ret == 0 || es.es_lblk > last) { > + if (last != start) > + holeoff = (loff_t)last << blkbits; > + break; > } > - > /* > * If there is a unwritten extent at this offset, > * it will be as a data or a hole according to page > * cache that has data or not. > */ > - if (map.m_flags & EXT4_MAP_UNWRITTEN) { > - int unwritten; > - unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE, > - &map, &holeoff); > - if (!unwritten) { > - last += ret; > - holeoff = (loff_t)last << blkbits; > - continue; > - } > - } > + if (ext4_es_is_unwritten(&es) && > + ext4_find_unwritten_pgoff(inode, SEEK_HOLE, > + last + es.es_len, &holeoff)) > + break; > > - /* find a hole */ > - break; > + last += es.es_len; > + holeoff = (loff_t)last << blkbits; > + cond_resched(); > } while (last <= end); > > mutex_unlock(&inode->i_mutex); > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index f2736bc..42658bc 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -5434,3 +5434,70 @@ int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) > > return err; > } > + > +/* > + * Find the first extent at or after @lblk in an inode that is not a hole. > + * Search for @map_len blocks at most. The extent is returned in @result. > + * > + * The function returns 1 if we found an extent. The function returns 0 in > + * case there is no extent at or after @lblk and in that case also sets > + * @result->es_len to 0. In case of error, the error code is returned. > + */ > +int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk, > + unsigned int map_len, struct extent_status *result) > +{ > + struct ext4_map_blocks map; > + struct extent_status es = {}; > + int ret; > + > + map.m_lblk = lblk; > + map.m_len = map_len; > + > + /* > + * For non-extent based files this loop may iterate several times since > + * we do not determine full hole size. > + */ > + while (map.m_len > 0) { > + ret = ext4_map_blocks(NULL, inode, &map, 0); > + if (ret < 0) > + return ret; > + /* There's extent covering m_lblk? Just return it. */ > + if (ret > 0) { > + int status; > + > + ext4_es_store_pblock(result, map.m_pblk); > + result->es_lblk = map.m_lblk; > + result->es_len = map.m_len; > + if (map.m_flags & EXT4_MAP_UNWRITTEN) > + status = EXTENT_STATUS_UNWRITTEN; > + else > + status = EXTENT_STATUS_WRITTEN; > + ext4_es_store_status(result, status); > + return 1; > + } > + ext4_es_find_delayed_extent_range(inode, map.m_lblk, > + map.m_lblk + map.m_len - 1, > + &es); > + /* Is delalloc data before next block in extent tree? */ > + if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) { > + ext4_lblk_t offset = 0; > + > + if (es.es_lblk < lblk) > + offset = lblk - es.es_lblk; > + result->es_lblk = es.es_lblk + offset; > + ext4_es_store_pblock(result, > + ext4_es_pblock(&es) + offset); > + result->es_len = es.es_len - offset; > + ext4_es_store_status(result, ext4_es_status(&es)); > + > + return 1; > + } > + /* There's a hole at m_lblk, advance us after it */ > + map.m_lblk += map.m_len; > + map_len -= map.m_len; > + map.m_len = map_len; > + cond_resched(); > + } > + result->es_len = 0; > + return 0; > +} > -- > 1.9.1 > > > -- > kernel-team mailing list > kernel-team@lists.ubuntu.com > https://lists.ubuntu.com/mailman/listinfo/kernel-team
Patch 0003-ext4-more-efficient-SEEK_DATA-implementation.patch as attached to the bug did not apply cleanly, so I did my own backport. Clearly I got it wrong. This patch will actually apply with a bit of fuzz, so that is V2. Test kernel at http://people.canonical.com/~rtg/ext4-lp1602524/ No changes to patches 1 or 2. rtg [PATCH 1/3 Xenial SRU V2] ext4: factor out determining of hole size [PATCH 2/3 Xenial SRU V2] ext4: return hole from ext4_map_blocks() [PATCH 3/3 Xenial SRU V2] ext4: more efficient SEEK_DATA implementation
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b7e921d..6148573 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2520,6 +2520,11 @@ extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern void ext4_da_update_reserve_space(struct inode *inode, int used, int quota_claim); +extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, + ext4_fsblk_t pblk, ext4_lblk_t len); +extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk, + unsigned int map_len, + struct extent_status *result); /* indirect.c */ extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 0d24ebc..2753bed 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -435,7 +435,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) */ static int ext4_find_unwritten_pgoff(struct inode *inode, int whence, - struct ext4_map_blocks *map, + ext4_lblk_t end_blk, loff_t *offset) { struct pagevec pvec; @@ -450,7 +450,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, blkbits = inode->i_sb->s_blocksize_bits; startoff = *offset; lastoff = startoff; - endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; + endoff = (loff_t)end_blk << blkbits; index = startoff >> PAGE_CACHE_SHIFT; end = endoff >> PAGE_CACHE_SHIFT; @@ -568,12 +568,11 @@ out: static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) { struct inode *inode = file->f_mapping->host; - struct ext4_map_blocks map; struct extent_status es; ext4_lblk_t start, last, end; loff_t dataoff, isize; int blkbits; - int ret = 0; + int ret; mutex_lock(&inode->i_mutex); @@ -590,41 +589,32 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) dataoff = offset; do { - map.m_lblk = last; - map.m_len = end - last + 1; - ret = ext4_map_blocks(NULL, inode, &map, 0); - if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { - if (last != start) - dataoff = (loff_t)last << blkbits; - break; + ret = ext4_get_next_extent(inode, last, end - last + 1, &es); + if (ret <= 0) { + /* No extent found -> no data */ + if (ret == 0) + ret = -ENXIO; + inode_unlock(inode); + return ret; } - /* - * If there is a delay extent at this offset, - * it will be as a data. - */ - ext4_es_find_delayed_extent_range(inode, last, last, &es); - if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { - if (last != start) - dataoff = (loff_t)last << blkbits; + last = es.es_lblk; + if (last != start) + dataoff = (loff_t)last << blkbits; + if (!ext4_es_is_unwritten(&es)) break; - } /* * If there is a unwritten extent at this offset, * it will be as a data or a hole according to page * cache that has data or not. */ - if (map.m_flags & EXT4_MAP_UNWRITTEN) { - int unwritten; - unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA, - &map, &dataoff); - if (unwritten) - break; - } - - last++; + if (ext4_find_unwritten_pgoff(inode, SEEK_DATA, + es.es_lblk + es.es_len, &dataoff)) + break; + last += es.es_len; dataoff = (loff_t)last << blkbits; + cond_resched(); } while (last <= end); mutex_unlock(&inode->i_mutex); @@ -641,12 +631,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) { struct inode *inode = file->f_mapping->host; - struct ext4_map_blocks map; struct extent_status es; ext4_lblk_t start, last, end; loff_t holeoff, isize; int blkbits; - int ret = 0; + int ret; mutex_lock(&inode->i_mutex); @@ -663,44 +652,30 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) holeoff = offset; do { - map.m_lblk = last; - map.m_len = end - last + 1; - ret = ext4_map_blocks(NULL, inode, &map, 0); - if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { - last += ret; - holeoff = (loff_t)last << blkbits; - continue; + ret = ext4_get_next_extent(inode, last, end - last + 1, &es); + if (ret < 0) { + inode_unlock(inode); + return ret; } - - /* - * If there is a delay extent at this offset, - * we will skip this extent. - */ - ext4_es_find_delayed_extent_range(inode, last, last, &es); - if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { - last = es.es_lblk + es.es_len; - holeoff = (loff_t)last << blkbits; - continue; + /* Found a hole? */ + if (ret == 0 || es.es_lblk > last) { + if (last != start) + holeoff = (loff_t)last << blkbits; + break; } - /* * If there is a unwritten extent at this offset, * it will be as a data or a hole according to page * cache that has data or not. */ - if (map.m_flags & EXT4_MAP_UNWRITTEN) { - int unwritten; - unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE, - &map, &holeoff); - if (!unwritten) { - last += ret; - holeoff = (loff_t)last << blkbits; - continue; - } - } + if (ext4_es_is_unwritten(&es) && + ext4_find_unwritten_pgoff(inode, SEEK_HOLE, + last + es.es_len, &holeoff)) + break; - /* find a hole */ - break; + last += es.es_len; + holeoff = (loff_t)last << blkbits; + cond_resched(); } while (last <= end); mutex_unlock(&inode->i_mutex); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f2736bc..42658bc 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5434,3 +5434,70 @@ int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return err; } + +/* + * Find the first extent at or after @lblk in an inode that is not a hole. + * Search for @map_len blocks at most. The extent is returned in @result. + * + * The function returns 1 if we found an extent. The function returns 0 in + * case there is no extent at or after @lblk and in that case also sets + * @result->es_len to 0. In case of error, the error code is returned. + */ +int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk, + unsigned int map_len, struct extent_status *result) +{ + struct ext4_map_blocks map; + struct extent_status es = {}; + int ret; + + map.m_lblk = lblk; + map.m_len = map_len; + + /* + * For non-extent based files this loop may iterate several times since + * we do not determine full hole size. + */ + while (map.m_len > 0) { + ret = ext4_map_blocks(NULL, inode, &map, 0); + if (ret < 0) + return ret; + /* There's extent covering m_lblk? Just return it. */ + if (ret > 0) { + int status; + + ext4_es_store_pblock(result, map.m_pblk); + result->es_lblk = map.m_lblk; + result->es_len = map.m_len; + if (map.m_flags & EXT4_MAP_UNWRITTEN) + status = EXTENT_STATUS_UNWRITTEN; + else + status = EXTENT_STATUS_WRITTEN; + ext4_es_store_status(result, status); + return 1; + } + ext4_es_find_delayed_extent_range(inode, map.m_lblk, + map.m_lblk + map.m_len - 1, + &es); + /* Is delalloc data before next block in extent tree? */ + if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) { + ext4_lblk_t offset = 0; + + if (es.es_lblk < lblk) + offset = lblk - es.es_lblk; + result->es_lblk = es.es_lblk + offset; + ext4_es_store_pblock(result, + ext4_es_pblock(&es) + offset); + result->es_len = es.es_len - offset; + ext4_es_store_status(result, ext4_es_status(&es)); + + return 1; + } + /* There's a hole at m_lblk, advance us after it */ + map.m_lblk += map.m_len; + map_len -= map.m_len; + map.m_len = map_len; + cond_resched(); + } + result->es_len = 0; + return 0; +}