Message ID | 1397322161-32148-6-git-send-email-tytso@mit.edu |
---|---|
State | Accepted, archived |
Headers | show |
On Sat 12-04-14 13:02:41, Ted Tso wrote: > Al Viro pointed out that we need to make sure we only allow one > O_APPEND write to proceed at a time so that the the s_bitmap_maxbytes > check can be properly checked. But this introduces lock inversion between aio_mutex and i_mutex, doesn't it? Honza > Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> > --- > fs/ext4/file.c | 46 +++++++++++++++++++++++++++++----------------- > 1 file changed, 29 insertions(+), 17 deletions(-) > > diff --git a/fs/ext4/file.c b/fs/ext4/file.c > index 7c8f483..c3824dd 100644 > --- a/fs/ext4/file.c > +++ b/fs/ext4/file.c > @@ -98,24 +98,32 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, > struct file *file = iocb->ki_filp; > struct inode *inode = file_inode(iocb->ki_filp); > struct blk_plug plug; > + struct mutex *aio_mutex = NULL; > int o_direct = file->f_flags & O_DIRECT; > - int overwrite = 0; > + int overwrite = 0, i_mutex_grabbed = 0; > size_t length = iov_length(iov, nr_segs); > ssize_t ret; > > BUG_ON(iocb->ki_pos != pos); > > + if (file->f_flags & O_APPEND) { > + mutex_lock(&inode->i_mutex); > + i_mutex_grabbed = 1; > + iocb->ki_pos = pos = i_size_read(inode); > + } > + > /* > * If we have encountered a bitmap-format file, the size limit > * is smaller than s_maxbytes, which is for extent-mapped files. > */ > - > if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { > struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); > > if ((pos > sbi->s_bitmap_maxbytes || > - (pos == sbi->s_bitmap_maxbytes && length > 0))) > - return -EFBIG; > + (pos == sbi->s_bitmap_maxbytes && length > 0))) { > + ret = -EFBIG; > + goto errout; > + } > > if (pos + length > sbi->s_bitmap_maxbytes) { > nr_segs = iov_shorten((struct iovec *)iov, nr_segs, > @@ -123,19 +131,20 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, > } > } > > - if (o_direct) { > - struct mutex *aio_mutex = NULL; > - > - /* Unaligned direct AIO must be serialized; see comment above */ > - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && > - !is_sync_kiocb(iocb) && > - ext4_unaligned_aio(inode, iov, nr_segs, pos)) { > - aio_mutex = ext4_aio_mutex(inode); > - mutex_lock(aio_mutex); > - ext4_unwritten_wait(inode); > - } > + /* Unaligned direct AIO must be serialized; see comment above */ > + if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && > + !is_sync_kiocb(iocb) && > + ext4_unaligned_aio(inode, iov, nr_segs, pos)) { > + aio_mutex = ext4_aio_mutex(inode); > + mutex_lock(aio_mutex); > + ext4_unwritten_wait(inode); > + } > > + if (!i_mutex_grabbed) > mutex_lock(&inode->i_mutex); > + i_mutex_grabbed = 1; > + > + if (o_direct) { > if (aio_mutex) > mutex_unlock(aio_mutex); > blk_start_plug(&plug); > @@ -170,11 +179,11 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, > if (err == len && (map.m_flags & EXT4_MAP_MAPPED)) > overwrite = 1; > } > - } else > - mutex_lock(&inode->i_mutex); > + } > > ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); > mutex_unlock(&inode->i_mutex); > + i_mutex_grabbed = 0; > > if (ret > 0) { > ssize_t err; > @@ -186,6 +195,9 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, > if (o_direct) > blk_finish_plug(&plug); > > +errout: > + if (i_mutex_grabbed) > + mutex_unlock(&inode->i_mutex); > return ret; > } > > -- > 1.9.0 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Apr 15, 2014 at 07:05:40PM +0200, Jan Kara wrote: > On Sat 12-04-14 13:02:41, Ted Tso wrote: > > Al Viro pointed out that we need to make sure we only allow one > > O_APPEND write to proceed at a time so that the the s_bitmap_maxbytes > > check can be properly checked. > But this introduces lock inversion between aio_mutex and i_mutex, doesn't > it? Doh! Thanks for pointing that out. Fortunately, we don't need to care about optimizing the AIO/DIO O_APPEND write case, so probably the best thing to do is to unconditionally take aio_mutex and call ext4_unwritten_wait() early, before we grab i_mutex. So in effect we'll treat all O_APPEND writes as being unaligned in and in need of serialization. I'll send out a revised version for this last patch. - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 7c8f483..c3824dd 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -98,24 +98,32 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, struct file *file = iocb->ki_filp; struct inode *inode = file_inode(iocb->ki_filp); struct blk_plug plug; + struct mutex *aio_mutex = NULL; int o_direct = file->f_flags & O_DIRECT; - int overwrite = 0; + int overwrite = 0, i_mutex_grabbed = 0; size_t length = iov_length(iov, nr_segs); ssize_t ret; BUG_ON(iocb->ki_pos != pos); + if (file->f_flags & O_APPEND) { + mutex_lock(&inode->i_mutex); + i_mutex_grabbed = 1; + iocb->ki_pos = pos = i_size_read(inode); + } + /* * If we have encountered a bitmap-format file, the size limit * is smaller than s_maxbytes, which is for extent-mapped files. */ - if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); if ((pos > sbi->s_bitmap_maxbytes || - (pos == sbi->s_bitmap_maxbytes && length > 0))) - return -EFBIG; + (pos == sbi->s_bitmap_maxbytes && length > 0))) { + ret = -EFBIG; + goto errout; + } if (pos + length > sbi->s_bitmap_maxbytes) { nr_segs = iov_shorten((struct iovec *)iov, nr_segs, @@ -123,19 +131,20 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, } } - if (o_direct) { - struct mutex *aio_mutex = NULL; - - /* Unaligned direct AIO must be serialized; see comment above */ - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && - !is_sync_kiocb(iocb) && - ext4_unaligned_aio(inode, iov, nr_segs, pos)) { - aio_mutex = ext4_aio_mutex(inode); - mutex_lock(aio_mutex); - ext4_unwritten_wait(inode); - } + /* Unaligned direct AIO must be serialized; see comment above */ + if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && + !is_sync_kiocb(iocb) && + ext4_unaligned_aio(inode, iov, nr_segs, pos)) { + aio_mutex = ext4_aio_mutex(inode); + mutex_lock(aio_mutex); + ext4_unwritten_wait(inode); + } + if (!i_mutex_grabbed) mutex_lock(&inode->i_mutex); + i_mutex_grabbed = 1; + + if (o_direct) { if (aio_mutex) mutex_unlock(aio_mutex); blk_start_plug(&plug); @@ -170,11 +179,11 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, if (err == len && (map.m_flags & EXT4_MAP_MAPPED)) overwrite = 1; } - } else - mutex_lock(&inode->i_mutex); + } ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); mutex_unlock(&inode->i_mutex); + i_mutex_grabbed = 0; if (ret > 0) { ssize_t err; @@ -186,6 +195,9 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, if (o_direct) blk_finish_plug(&plug); +errout: + if (i_mutex_grabbed) + mutex_unlock(&inode->i_mutex); return ret; }
Al Viro pointed out that we need to make sure we only allow one O_APPEND write to proceed at a time so that the the s_bitmap_maxbytes check can be properly checked. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> --- fs/ext4/file.c | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-)