@@ -158,6 +158,48 @@ xfs_setfilesize(
}
/*
+ * In the case of synchronous, AIO, O_DIRECT writes, we need to flush
+ * the disk cache when the I/O is complete.
+ */
+STATIC bool
+xfs_ioend_needs_cache_flush(
+ struct xfs_ioend *ioend)
+{
+ if (!ioend->io_isasync)
+ return false;
+
+ return (IS_SYNC(ioend->io_inode) ||
+ (ioend->io_iocb->ki_filp->f_flags & O_DSYNC));
+}
+
+STATIC void
+xfs_end_io_flush(
+ struct bio *bio,
+ int error)
+{
+ struct xfs_ioend *ioend = bio->bi_private;
+
+ if (error && ioend->io_result > 0)
+ ioend->io_result = error;
+
+ xfs_destroy_ioend(ioend);
+ bio_put(bio);
+}
+
+STATIC void
+xfs_ioend_flush_cache(
+ struct xfs_ioend *ioend)
+{
+ struct bio *bio;
+
+ bio = bio_alloc(GFP_KERNEL, 0);
+ bio->bi_end_io = xfs_end_io_flush;
+ bio->bi_bdev = xfs_find_bdev_for_inode(ioend->io_inode);
+ bio->bi_private = ioend;
+ submit_bio(WRITE_FLUSH, bio);
+}
+
+/*
* Schedule IO completion handling on the final put of an ioend.
*
* If there is no work to do we might as well call it a day and free the
@@ -172,6 +214,8 @@ xfs_finish_ioend(
queue_work(xfsconvertd_workqueue, &ioend->io_work);
else if (xfs_ioend_is_append(ioend))
queue_work(xfsdatad_workqueue, &ioend->io_work);
+ else if (xfs_ioend_needs_cache_flush(ioend))
+ queue_work(xfsflushd_workqueue, &ioend->io_work);
else
xfs_destroy_ioend(ioend);
}
@@ -226,9 +270,30 @@ done:
xfs_finish_ioend(ioend);
/* ensure we don't spin on blocked ioends */
delay(1);
- } else {
+ } else if (xfs_ioend_needs_cache_flush(ioend)) {
+ struct xfs_inode *ip = XFS_I(ioend->io_inode);
+ struct xfs_mount *mp = ip->i_mount;
+ int err;
+ int log_flushed = 0;
+
+ /*
+ * Check to see if we only need to sync data. If so,
+ * we can skip the log flush.
+ */
+ if (IS_SYNC(ioend->io_inode) ||
+ (ioend->io_iocb->ki_filp->f_flags & __O_SYNC)) {
+ err = _xfs_log_force(mp, XFS_LOG_SYNC, &log_flushed);
+ if (err && ioend->io_result > 0)
+ ioend->io_result = err;
+ if (err || log_flushed) {
+ xfs_destroy_ioend(ioend);
+ return;
+ }
+ }
+ /* log not flushed or data sync only, flush the disk cache */
+ xfs_ioend_flush_cache(ioend);
+ } else
xfs_destroy_ioend(ioend);
- }
}
/*
@@ -20,6 +20,7 @@
extern struct workqueue_struct *xfsdatad_workqueue;
extern struct workqueue_struct *xfsconvertd_workqueue;
+extern struct workqueue_struct *xfsflushd_workqueue;
extern mempool_t *xfs_ioend_pool;
/*
@@ -47,6 +47,7 @@ STATIC int xfsbufd(void *);
static struct workqueue_struct *xfslogd_workqueue;
struct workqueue_struct *xfsdatad_workqueue;
struct workqueue_struct *xfsconvertd_workqueue;
+struct workqueue_struct *xfsflushd_workqueue;
#ifdef XFS_BUF_LOCK_TRACKING
# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
@@ -1802,8 +1803,15 @@ xfs_buf_init(void)
if (!xfsconvertd_workqueue)
goto out_destroy_xfsdatad_workqueue;
+ xfsflushd_workqueue = alloc_workqueue("xfsflushd",
+ WQ_MEM_RECLAIM, 1);
+ if (!xfsflushd_workqueue)
+ goto out_destroy_xfsconvertd_workqueue;
+
return 0;
+ out_destroy_xfsconvertd_workqueue:
+ destroy_workqueue(xfsconvertd_workqueue);
out_destroy_xfsdatad_workqueue:
destroy_workqueue(xfsdatad_workqueue);
out_destroy_xfslogd_workqueue:
@@ -1817,6 +1825,7 @@ xfs_buf_init(void)
void
xfs_buf_terminate(void)
{
+ destroy_workqueue(xfsflushd_workqueue);
destroy_workqueue(xfsconvertd_workqueue);
destroy_workqueue(xfsdatad_workqueue);
destroy_workqueue(xfslogd_workqueue);
Hi, If a file is opened with O_SYNC|O_DIRECT, the drive cache does not get flushed after the write completion. Instead, it's flushed *before* the I/O is sent to the disk (in __generic_file_aio_write). This patch attempts to fix that problem by marking an I/O as requiring a cache flush in endio processing. I'll send a follow-on patch to the generic write code to get rid of the bogus generic_write_sync call when EIOCBQUEUED is returned. Signed-off-by: Jeff Moyer <jmoyer@redhat.com> --- fs/xfs/xfs_aops.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++- fs/xfs/xfs_aops.h | 1 + fs/xfs/xfs_buf.c | 9 +++++++ 3 files changed, 77 insertions(+), 2 deletions(-)