Message ID | 93ab8386c4620395c5e674a7930506895fc758ef.1681365596.git.ritesh.list@gmail.com |
---|---|
State | Not Applicable |
Headers | show |
Series | ext2: DIO to use iomap | expand |
On Thu, Apr 13, 2023 at 02:10:32PM +0530, Ritesh Harjani (IBM) wrote: > This patch adds trace point events for iomap DIO path. > > <e.g. iomap dio trace> > xfs_io-8815 [000] 526.790418: iomap_dio_rw_begin: dev 7:7 ino 0xc isize 0x0 pos 0x0 count 4096 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 0 > xfs_io-8815 [000] 526.790978: iomap_dio_complete: dev 7:7 ino 0xc isize 0x1000 pos 0x1000 flags DIRECT aio 0 error 0 ret 4096 > xfs_io-8815 [000] 526.790988: iomap_dio_rw_end: dev 7:7 ino 0xc isize 0x1000 pos 0x1000 count 0 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 4096 > fsx-8827 [005] 526.939345: iomap_dio_rw_begin: dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 61440 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags done_before 0 aio 1 ret 0 > fsx-8827 [005] 526.939459: iomap_dio_rw_end: dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 0 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags done_before 0 aio 1 ret -529 > ksoftirqd/5-41 [005] 526.939564: iomap_dio_complete: dev 7:7 ino 0xc isize 0x922f8 pos 0x5e000 flags NOWAIT|DIRECT|ALLOC_CACHE aio 1 error 0 ret 61440 > > Tested-by: Disha Goel <disgoel@linux.ibm.com> > Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com> > --- > fs/iomap/direct-io.c | 3 ++ > fs/iomap/trace.c | 1 + > fs/iomap/trace.h | 90 ++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 94 insertions(+) > > diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c > index 5871956ee880..bb7a6dfbc8b3 100644 > --- a/fs/iomap/direct-io.c > +++ b/fs/iomap/direct-io.c > @@ -130,6 +130,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) > if (ret > 0) > ret += dio->done_before; > > + trace_iomap_dio_complete(iocb, dio->error, ret); > kfree(dio); > > return ret; > @@ -681,6 +682,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, > struct iomap_dio *dio; > ssize_t ret = 0; > > + trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before, ret); > dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, private, > done_before); > if (IS_ERR_OR_NULL(dio)) { > @@ -689,6 +691,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, > } > ret = iomap_dio_complete(dio); > out: > + trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret); > return ret; > } > EXPORT_SYMBOL_GPL(iomap_dio_rw); > diff --git a/fs/iomap/trace.c b/fs/iomap/trace.c > index da217246b1a9..728d5443daf5 100644 > --- a/fs/iomap/trace.c > +++ b/fs/iomap/trace.c > @@ -3,6 +3,7 @@ > * Copyright (c) 2019 Christoph Hellwig > */ > #include <linux/iomap.h> > +#include <linux/uio.h> > > /* > * We include this last to have the helpers above available for the trace > diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h > index f6ea9540d082..dcb4dd4db5fb 100644 > --- a/fs/iomap/trace.h > +++ b/fs/iomap/trace.h > @@ -183,6 +183,96 @@ TRACE_EVENT(iomap_iter, > (void *)__entry->caller) > ); > > +#define TRACE_IOMAP_DIO_STRINGS \ > + {IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \ > + {IOMAP_DIO_OVERWRITE_ONLY, "DIO_OVERWRITE_ONLY" }, \ > + {IOMAP_DIO_PARTIAL, "DIO_PARTIAL" } Can you make the strings line up too, please? > + > +DECLARE_EVENT_CLASS(iomap_dio_class, > + TP_PROTO(struct kiocb *iocb, struct iov_iter *iter, > + unsigned int dio_flags, u64 done_before, int ret), We're passing in ssize_t values for @ret, shouldn't the types match? > + TP_ARGS(iocb, iter, dio_flags, done_before, ret), > + TP_STRUCT__entry( > + __field(dev_t, dev) > + __field(ino_t, ino) > + __field(loff_t, isize) > + __field(loff_t, pos) > + __field(u64, count) What's the difference between "length" as used in the other tracepoints and "count" here? > + __field(u64, done_before) > + __field(int, ki_flags) > + __field(unsigned int, dio_flags) > + __field(bool, aio) > + __field(int, ret) > + ), > + TP_fast_assign( > + __entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev; > + __entry->ino = file_inode(iocb->ki_filp)->i_ino; > + __entry->isize = file_inode(iocb->ki_filp)->i_size; > + __entry->pos = iocb->ki_pos; > + __entry->count = iov_iter_count(iter); > + __entry->done_before = done_before; > + __entry->dio_flags = dio_flags; > + __entry->ki_flags = iocb->ki_flags; > + __entry->aio = !is_sync_kiocb(iocb); > + __entry->ret = ret; > + ), > + TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx count %llu " count and done_before are lengths of file operations, in bytes, right? Everywhere else we use 0x%llx for that. > + "flags %s dio_flags %s done_before %llu aio %d ret %d", > + MAJOR(__entry->dev), MINOR(__entry->dev), > + __entry->ino, > + __entry->isize, > + __entry->pos, > + __entry->count, > + __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS), > + __print_flags(__entry->dio_flags, "|", TRACE_IOMAP_DIO_STRINGS), > + __entry->done_before, > + __entry->aio, > + __entry->ret) > +) > + > +#define DEFINE_DIO_RW_EVENT(name) \ > +DEFINE_EVENT(iomap_dio_class, name, \ > + TP_PROTO(struct kiocb *iocb, struct iov_iter *iter, \ > + unsigned int dio_flags, u64 done_before, \ > + int ret), \ > + TP_ARGS(iocb, iter, dio_flags, done_before, ret)) > +DEFINE_DIO_RW_EVENT(iomap_dio_rw_begin); > +DEFINE_DIO_RW_EVENT(iomap_dio_rw_end); > + > +TRACE_EVENT(iomap_dio_complete, > + TP_PROTO(struct kiocb *iocb, int error, int ret), > + TP_ARGS(iocb, error, ret), > + TP_STRUCT__entry( > + __field(dev_t, dev) > + __field(ino_t, ino) > + __field(loff_t, isize) > + __field(loff_t, pos) > + __field(int, ki_flags) > + __field(bool, aio) > + __field(int, error) > + __field(int, ret) Same comment about @ret and ssize_t here. --D > + ), > + TP_fast_assign( > + __entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev; > + __entry->ino = file_inode(iocb->ki_filp)->i_ino; > + __entry->isize = file_inode(iocb->ki_filp)->i_size; > + __entry->pos = iocb->ki_pos; > + __entry->ki_flags = iocb->ki_flags; > + __entry->aio = !is_sync_kiocb(iocb); > + __entry->error = error; > + __entry->ret = ret; > + ), > + TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx flags %s aio %d error %d ret %d", > + MAJOR(__entry->dev), MINOR(__entry->dev), > + __entry->ino, > + __entry->isize, > + __entry->pos, > + __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS), > + __entry->aio, > + __entry->error, > + __entry->ret) > +); > + > #endif /* _IOMAP_TRACE_H */ > > #undef TRACE_INCLUDE_PATH > -- > 2.39.2 >
"Darrick J. Wong" <djwong@kernel.org> writes: > On Thu, Apr 13, 2023 at 02:10:32PM +0530, Ritesh Harjani (IBM) wrote: >> This patch adds trace point events for iomap DIO path. >> >> <e.g. iomap dio trace> >> xfs_io-8815 [000] 526.790418: iomap_dio_rw_begin: dev 7:7 ino 0xc isize 0x0 pos 0x0 count 4096 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 0 >> xfs_io-8815 [000] 526.790978: iomap_dio_complete: dev 7:7 ino 0xc isize 0x1000 pos 0x1000 flags DIRECT aio 0 error 0 ret 4096 >> xfs_io-8815 [000] 526.790988: iomap_dio_rw_end: dev 7:7 ino 0xc isize 0x1000 pos 0x1000 count 0 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 4096 >> fsx-8827 [005] 526.939345: iomap_dio_rw_begin: dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 61440 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags done_before 0 aio 1 ret 0 >> fsx-8827 [005] 526.939459: iomap_dio_rw_end: dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 0 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags done_before 0 aio 1 ret -529 >> ksoftirqd/5-41 [005] 526.939564: iomap_dio_complete: dev 7:7 ino 0xc isize 0x922f8 pos 0x5e000 flags NOWAIT|DIRECT|ALLOC_CACHE aio 1 error 0 ret 61440 >> >> Tested-by: Disha Goel <disgoel@linux.ibm.com> >> Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com> >> --- >> fs/iomap/direct-io.c | 3 ++ >> fs/iomap/trace.c | 1 + >> fs/iomap/trace.h | 90 ++++++++++++++++++++++++++++++++++++++++++++ >> 3 files changed, 94 insertions(+) >> >> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c >> index 5871956ee880..bb7a6dfbc8b3 100644 >> --- a/fs/iomap/direct-io.c >> +++ b/fs/iomap/direct-io.c >> @@ -130,6 +130,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) >> if (ret > 0) >> ret += dio->done_before; >> >> + trace_iomap_dio_complete(iocb, dio->error, ret); >> kfree(dio); >> >> return ret; >> @@ -681,6 +682,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, >> struct iomap_dio *dio; >> ssize_t ret = 0; >> >> + trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before, ret); >> dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, private, >> done_before); >> if (IS_ERR_OR_NULL(dio)) { >> @@ -689,6 +691,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, >> } >> ret = iomap_dio_complete(dio); >> out: >> + trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret); >> return ret; >> } >> EXPORT_SYMBOL_GPL(iomap_dio_rw); >> diff --git a/fs/iomap/trace.c b/fs/iomap/trace.c >> index da217246b1a9..728d5443daf5 100644 >> --- a/fs/iomap/trace.c >> +++ b/fs/iomap/trace.c >> @@ -3,6 +3,7 @@ >> * Copyright (c) 2019 Christoph Hellwig >> */ >> #include <linux/iomap.h> >> +#include <linux/uio.h> >> >> /* >> * We include this last to have the helpers above available for the trace >> diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h >> index f6ea9540d082..dcb4dd4db5fb 100644 >> --- a/fs/iomap/trace.h >> +++ b/fs/iomap/trace.h >> @@ -183,6 +183,96 @@ TRACE_EVENT(iomap_iter, >> (void *)__entry->caller) >> ); >> >> +#define TRACE_IOMAP_DIO_STRINGS \ >> + {IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \ >> + {IOMAP_DIO_OVERWRITE_ONLY, "DIO_OVERWRITE_ONLY" }, \ >> + {IOMAP_DIO_PARTIAL, "DIO_PARTIAL" } > > Can you make the strings line up too, please? > Ok near other _STRINGS macro. Sure, will do that. >> + >> +DECLARE_EVENT_CLASS(iomap_dio_class, >> + TP_PROTO(struct kiocb *iocb, struct iov_iter *iter, >> + unsigned int dio_flags, u64 done_before, int ret), > > We're passing in ssize_t values for @ret, shouldn't the types match? > Yes, I missed to correct that. Will make it loff_t. This should be fixed in ext2 trace point macro too. (ssize_t can vary based on 32 bit v/s 64 bit, so while printing it as %llx it gives warning on 32bit. Hence will use loff_t for ret) >> + TP_ARGS(iocb, iter, dio_flags, done_before, ret), >> + TP_STRUCT__entry( >> + __field(dev_t, dev) >> + __field(ino_t, ino) >> + __field(loff_t, isize) >> + __field(loff_t, pos) >> + __field(u64, count) > > What's the difference between "length" as used in the other tracepoints > and "count" here? > Yup let me make it length which will be a more consistent naming. I chose count just because of (iov_iter_count(iter)). >> + __field(u64, done_before) >> + __field(int, ki_flags) >> + __field(unsigned int, dio_flags) >> + __field(bool, aio) >> + __field(int, ret) >> + ), >> + TP_fast_assign( >> + __entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev; >> + __entry->ino = file_inode(iocb->ki_filp)->i_ino; >> + __entry->isize = file_inode(iocb->ki_filp)->i_size; >> + __entry->pos = iocb->ki_pos; >> + __entry->count = iov_iter_count(iter); >> + __entry->done_before = done_before; >> + __entry->dio_flags = dio_flags; >> + __entry->ki_flags = iocb->ki_flags; >> + __entry->aio = !is_sync_kiocb(iocb); >> + __entry->ret = ret; >> + ), >> + TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx count %llu " > > count and done_before are lengths of file operations, in bytes, right? Yes, that's right. > > Everywhere else we use 0x%llx for that. > Yup I had noticed that, but I guess I missed it. Thanks for catching it. I will fix it. >> + "flags %s dio_flags %s done_before %llu aio %d ret %d", >> + MAJOR(__entry->dev), MINOR(__entry->dev), >> + __entry->ino, >> + __entry->isize, >> + __entry->pos, >> + __entry->count, >> + __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS), >> + __print_flags(__entry->dio_flags, "|", TRACE_IOMAP_DIO_STRINGS), >> + __entry->done_before, >> + __entry->aio, >> + __entry->ret) >> +) >> + >> +#define DEFINE_DIO_RW_EVENT(name) \ >> +DEFINE_EVENT(iomap_dio_class, name, \ >> + TP_PROTO(struct kiocb *iocb, struct iov_iter *iter, \ >> + unsigned int dio_flags, u64 done_before, \ >> + int ret), \ >> + TP_ARGS(iocb, iter, dio_flags, done_before, ret)) >> +DEFINE_DIO_RW_EVENT(iomap_dio_rw_begin); >> +DEFINE_DIO_RW_EVENT(iomap_dio_rw_end); >> + >> +TRACE_EVENT(iomap_dio_complete, >> + TP_PROTO(struct kiocb *iocb, int error, int ret), >> + TP_ARGS(iocb, error, ret), >> + TP_STRUCT__entry( >> + __field(dev_t, dev) >> + __field(ino_t, ino) >> + __field(loff_t, isize) >> + __field(loff_t, pos) >> + __field(int, ki_flags) >> + __field(bool, aio) >> + __field(int, error) >> + __field(int, ret) > > Same comment about @ret and ssize_t here. Got it. Thanks for the review! -ritesh > > --D > >> + ), >> + TP_fast_assign( >> + __entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev; >> + __entry->ino = file_inode(iocb->ki_filp)->i_ino; >> + __entry->isize = file_inode(iocb->ki_filp)->i_size; >> + __entry->pos = iocb->ki_pos; >> + __entry->ki_flags = iocb->ki_flags; >> + __entry->aio = !is_sync_kiocb(iocb); >> + __entry->error = error; >> + __entry->ret = ret; >> + ), >> + TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx flags %s aio %d error %d ret %d", >> + MAJOR(__entry->dev), MINOR(__entry->dev), >> + __entry->ino, >> + __entry->isize, >> + __entry->pos, >> + __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS), >> + __entry->aio, >> + __entry->error, >> + __entry->ret) >> +); >> + >> #endif /* _IOMAP_TRACE_H */ >> >> #undef TRACE_INCLUDE_PATH >> -- >> 2.39.2 >>
On Fri, Apr 14, 2023 at 01:48:49AM +0530, Ritesh Harjani wrote: > "Darrick J. Wong" <djwong@kernel.org> writes: > > > On Thu, Apr 13, 2023 at 02:10:32PM +0530, Ritesh Harjani (IBM) wrote: > >> This patch adds trace point events for iomap DIO path. > >> > >> <e.g. iomap dio trace> > >> xfs_io-8815 [000] 526.790418: iomap_dio_rw_begin: dev 7:7 ino 0xc isize 0x0 pos 0x0 count 4096 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 0 > >> xfs_io-8815 [000] 526.790978: iomap_dio_complete: dev 7:7 ino 0xc isize 0x1000 pos 0x1000 flags DIRECT aio 0 error 0 ret 4096 > >> xfs_io-8815 [000] 526.790988: iomap_dio_rw_end: dev 7:7 ino 0xc isize 0x1000 pos 0x1000 count 0 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 4096 > >> fsx-8827 [005] 526.939345: iomap_dio_rw_begin: dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 61440 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags done_before 0 aio 1 ret 0 > >> fsx-8827 [005] 526.939459: iomap_dio_rw_end: dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 0 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags done_before 0 aio 1 ret -529 > >> ksoftirqd/5-41 [005] 526.939564: iomap_dio_complete: dev 7:7 ino 0xc isize 0x922f8 pos 0x5e000 flags NOWAIT|DIRECT|ALLOC_CACHE aio 1 error 0 ret 61440 > >> > >> Tested-by: Disha Goel <disgoel@linux.ibm.com> > >> Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com> > >> --- > >> fs/iomap/direct-io.c | 3 ++ > >> fs/iomap/trace.c | 1 + > >> fs/iomap/trace.h | 90 ++++++++++++++++++++++++++++++++++++++++++++ > >> 3 files changed, 94 insertions(+) > >> > >> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c > >> index 5871956ee880..bb7a6dfbc8b3 100644 > >> --- a/fs/iomap/direct-io.c > >> +++ b/fs/iomap/direct-io.c > >> @@ -130,6 +130,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) > >> if (ret > 0) > >> ret += dio->done_before; > >> > >> + trace_iomap_dio_complete(iocb, dio->error, ret); > >> kfree(dio); > >> > >> return ret; > >> @@ -681,6 +682,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, > >> struct iomap_dio *dio; > >> ssize_t ret = 0; > >> > >> + trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before, ret); > >> dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, private, > >> done_before); > >> if (IS_ERR_OR_NULL(dio)) { > >> @@ -689,6 +691,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, > >> } > >> ret = iomap_dio_complete(dio); > >> out: > >> + trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret); > >> return ret; > >> } > >> EXPORT_SYMBOL_GPL(iomap_dio_rw); > >> diff --git a/fs/iomap/trace.c b/fs/iomap/trace.c > >> index da217246b1a9..728d5443daf5 100644 > >> --- a/fs/iomap/trace.c > >> +++ b/fs/iomap/trace.c > >> @@ -3,6 +3,7 @@ > >> * Copyright (c) 2019 Christoph Hellwig > >> */ > >> #include <linux/iomap.h> > >> +#include <linux/uio.h> > >> > >> /* > >> * We include this last to have the helpers above available for the trace > >> diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h > >> index f6ea9540d082..dcb4dd4db5fb 100644 > >> --- a/fs/iomap/trace.h > >> +++ b/fs/iomap/trace.h > >> @@ -183,6 +183,96 @@ TRACE_EVENT(iomap_iter, > >> (void *)__entry->caller) > >> ); > >> > >> +#define TRACE_IOMAP_DIO_STRINGS \ > >> + {IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \ > >> + {IOMAP_DIO_OVERWRITE_ONLY, "DIO_OVERWRITE_ONLY" }, \ > >> + {IOMAP_DIO_PARTIAL, "DIO_PARTIAL" } > > > > Can you make the strings line up too, please? > > > > Ok near other _STRINGS macro. Sure, will do that. > > > >> + > >> +DECLARE_EVENT_CLASS(iomap_dio_class, > >> + TP_PROTO(struct kiocb *iocb, struct iov_iter *iter, > >> + unsigned int dio_flags, u64 done_before, int ret), > > > > We're passing in ssize_t values for @ret, shouldn't the types match? > > > > Yes, I missed to correct that. Will make it loff_t. > This should be fixed in ext2 trace point macro too. > > (ssize_t can vary based on 32 bit v/s 64 bit, so while printing it as > %llx it gives warning on 32bit. Hence will use loff_t for ret) How about %zd? --D > > >> + TP_ARGS(iocb, iter, dio_flags, done_before, ret), > >> + TP_STRUCT__entry( > >> + __field(dev_t, dev) > >> + __field(ino_t, ino) > >> + __field(loff_t, isize) > >> + __field(loff_t, pos) > >> + __field(u64, count) > > > > What's the difference between "length" as used in the other tracepoints > > and "count" here? > > > > Yup let me make it length which will be a more consistent naming. > I chose count just because of (iov_iter_count(iter)). > > >> + __field(u64, done_before) > >> + __field(int, ki_flags) > >> + __field(unsigned int, dio_flags) > >> + __field(bool, aio) > >> + __field(int, ret) > >> + ), > >> + TP_fast_assign( > >> + __entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev; > >> + __entry->ino = file_inode(iocb->ki_filp)->i_ino; > >> + __entry->isize = file_inode(iocb->ki_filp)->i_size; > >> + __entry->pos = iocb->ki_pos; > >> + __entry->count = iov_iter_count(iter); > >> + __entry->done_before = done_before; > >> + __entry->dio_flags = dio_flags; > >> + __entry->ki_flags = iocb->ki_flags; > >> + __entry->aio = !is_sync_kiocb(iocb); > >> + __entry->ret = ret; > >> + ), > >> + TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx count %llu " > > > > count and done_before are lengths of file operations, in bytes, right? > > Yes, that's right. > > > > > Everywhere else we use 0x%llx for that. > > > > Yup I had noticed that, but I guess I missed it. > Thanks for catching it. I will fix it. > > >> + "flags %s dio_flags %s done_before %llu aio %d ret %d", > >> + MAJOR(__entry->dev), MINOR(__entry->dev), > >> + __entry->ino, > >> + __entry->isize, > >> + __entry->pos, > >> + __entry->count, > >> + __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS), > >> + __print_flags(__entry->dio_flags, "|", TRACE_IOMAP_DIO_STRINGS), > >> + __entry->done_before, > >> + __entry->aio, > >> + __entry->ret) > >> +) > >> + > >> +#define DEFINE_DIO_RW_EVENT(name) \ > >> +DEFINE_EVENT(iomap_dio_class, name, \ > >> + TP_PROTO(struct kiocb *iocb, struct iov_iter *iter, \ > >> + unsigned int dio_flags, u64 done_before, \ > >> + int ret), \ > >> + TP_ARGS(iocb, iter, dio_flags, done_before, ret)) > >> +DEFINE_DIO_RW_EVENT(iomap_dio_rw_begin); > >> +DEFINE_DIO_RW_EVENT(iomap_dio_rw_end); > >> + > >> +TRACE_EVENT(iomap_dio_complete, > >> + TP_PROTO(struct kiocb *iocb, int error, int ret), > >> + TP_ARGS(iocb, error, ret), > >> + TP_STRUCT__entry( > >> + __field(dev_t, dev) > >> + __field(ino_t, ino) > >> + __field(loff_t, isize) > >> + __field(loff_t, pos) > >> + __field(int, ki_flags) > >> + __field(bool, aio) > >> + __field(int, error) > >> + __field(int, ret) > > > > Same comment about @ret and ssize_t here. > > Got it. > > Thanks for the review! > -ritesh > > > > > > --D > > > >> + ), > >> + TP_fast_assign( > >> + __entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev; > >> + __entry->ino = file_inode(iocb->ki_filp)->i_ino; > >> + __entry->isize = file_inode(iocb->ki_filp)->i_size; > >> + __entry->pos = iocb->ki_pos; > >> + __entry->ki_flags = iocb->ki_flags; > >> + __entry->aio = !is_sync_kiocb(iocb); > >> + __entry->error = error; > >> + __entry->ret = ret; > >> + ), > >> + TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx flags %s aio %d error %d ret %d", > >> + MAJOR(__entry->dev), MINOR(__entry->dev), > >> + __entry->ino, > >> + __entry->isize, > >> + __entry->pos, > >> + __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS), > >> + __entry->aio, > >> + __entry->error, > >> + __entry->ret) > >> +); > >> + > >> #endif /* _IOMAP_TRACE_H */ > >> > >> #undef TRACE_INCLUDE_PATH > >> -- > >> 2.39.2 > >>
"Darrick J. Wong" <djwong@kernel.org> writes: > On Fri, Apr 14, 2023 at 01:48:49AM +0530, Ritesh Harjani wrote: >> "Darrick J. Wong" <djwong@kernel.org> writes: >> >> > On Thu, Apr 13, 2023 at 02:10:32PM +0530, Ritesh Harjani (IBM) wrote: >> >> This patch adds trace point events for iomap DIO path. >> >> >> >> <e.g. iomap dio trace> >> >> xfs_io-8815 [000] 526.790418: iomap_dio_rw_begin: dev 7:7 ino 0xc isize 0x0 pos 0x0 count 4096 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 0 >> >> xfs_io-8815 [000] 526.790978: iomap_dio_complete: dev 7:7 ino 0xc isize 0x1000 pos 0x1000 flags DIRECT aio 0 error 0 ret 4096 >> >> xfs_io-8815 [000] 526.790988: iomap_dio_rw_end: dev 7:7 ino 0xc isize 0x1000 pos 0x1000 count 0 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 4096 >> >> fsx-8827 [005] 526.939345: iomap_dio_rw_begin: dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 61440 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags done_before 0 aio 1 ret 0 >> >> fsx-8827 [005] 526.939459: iomap_dio_rw_end: dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 0 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags done_before 0 aio 1 ret -529 >> >> ksoftirqd/5-41 [005] 526.939564: iomap_dio_complete: dev 7:7 ino 0xc isize 0x922f8 pos 0x5e000 flags NOWAIT|DIRECT|ALLOC_CACHE aio 1 error 0 ret 61440 >> >> >> >> Tested-by: Disha Goel <disgoel@linux.ibm.com> >> >> Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com> >> >> --- >> >> fs/iomap/direct-io.c | 3 ++ >> >> fs/iomap/trace.c | 1 + >> >> fs/iomap/trace.h | 90 ++++++++++++++++++++++++++++++++++++++++++++ >> >> 3 files changed, 94 insertions(+) >> >> >> >> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c >> >> index 5871956ee880..bb7a6dfbc8b3 100644 >> >> --- a/fs/iomap/direct-io.c >> >> +++ b/fs/iomap/direct-io.c >> >> @@ -130,6 +130,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) >> >> if (ret > 0) >> >> ret += dio->done_before; >> >> >> >> + trace_iomap_dio_complete(iocb, dio->error, ret); >> >> kfree(dio); >> >> >> >> return ret; >> >> @@ -681,6 +682,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, >> >> struct iomap_dio *dio; >> >> ssize_t ret = 0; >> >> >> >> + trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before, ret); >> >> dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, private, >> >> done_before); >> >> if (IS_ERR_OR_NULL(dio)) { >> >> @@ -689,6 +691,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, >> >> } >> >> ret = iomap_dio_complete(dio); >> >> out: >> >> + trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret); >> >> return ret; >> >> } >> >> EXPORT_SYMBOL_GPL(iomap_dio_rw); >> >> diff --git a/fs/iomap/trace.c b/fs/iomap/trace.c >> >> index da217246b1a9..728d5443daf5 100644 >> >> --- a/fs/iomap/trace.c >> >> +++ b/fs/iomap/trace.c >> >> @@ -3,6 +3,7 @@ >> >> * Copyright (c) 2019 Christoph Hellwig >> >> */ >> >> #include <linux/iomap.h> >> >> +#include <linux/uio.h> >> >> >> >> /* >> >> * We include this last to have the helpers above available for the trace >> >> diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h >> >> index f6ea9540d082..dcb4dd4db5fb 100644 >> >> --- a/fs/iomap/trace.h >> >> +++ b/fs/iomap/trace.h >> >> @@ -183,6 +183,96 @@ TRACE_EVENT(iomap_iter, >> >> (void *)__entry->caller) >> >> ); >> >> >> >> +#define TRACE_IOMAP_DIO_STRINGS \ >> >> + {IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \ >> >> + {IOMAP_DIO_OVERWRITE_ONLY, "DIO_OVERWRITE_ONLY" }, \ >> >> + {IOMAP_DIO_PARTIAL, "DIO_PARTIAL" } >> > >> > Can you make the strings line up too, please? >> > >> >> Ok near other _STRINGS macro. Sure, will do that. >> >> >> >> + >> >> +DECLARE_EVENT_CLASS(iomap_dio_class, >> >> + TP_PROTO(struct kiocb *iocb, struct iov_iter *iter, >> >> + unsigned int dio_flags, u64 done_before, int ret), >> > >> > We're passing in ssize_t values for @ret, shouldn't the types match? >> > >> >> Yes, I missed to correct that. Will make it loff_t. >> This should be fixed in ext2 trace point macro too. >> >> (ssize_t can vary based on 32 bit v/s 64 bit, so while printing it as >> %llx it gives warning on 32bit. Hence will use loff_t for ret) > > How about %zd? Aah yes. My bad, I wanted to look into print-format specifiers, but missed it. Documentation/core-api/printk-formats.rst size_t %zu or %zx ssize_t %zd or %zx Will send the next revision soon with the comments addressed then. Thanks! -ritesh
> + trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before, ret); > dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, private, > done_before); > if (IS_ERR_OR_NULL(dio)) { > @@ -689,6 +691,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, > } > ret = iomap_dio_complete(dio); > out: > + trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret); The trace_iomap_dio_rw_end tracepoint heere seems a bit weird, and we'll miss it for file systems using __iomap_dio_rw directly. I'd instead add a trace_iomap_dio_rw_queued for the case where __iomap_dio_rw returns ERR_PTR(-EIOCBQUEUED), as otherwise we're nicely covered by the complete trace points. > + __print_flags(__entry->dio_flags, "|", TRACE_IOMAP_DIO_STRINGS), Please avoid the overly lone line here.
Christoph Hellwig <hch@infradead.org> writes: >> + trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before, ret); >> dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, private, >> done_before); >> if (IS_ERR_OR_NULL(dio)) { >> @@ -689,6 +691,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, >> } >> ret = iomap_dio_complete(dio); >> out: >> + trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret); > > The trace_iomap_dio_rw_end tracepoint heere seems a bit weird, > and we'll miss it for file systems using __iomap_dio_rw directly. Sorry, yes you are right. > > I'd instead add a trace_iomap_dio_rw_queued for the case where > __iomap_dio_rw returns ERR_PTR(-EIOCBQUEUED), as otherwise we're > nicely covered by the complete trace points. > How about this below change? Does this look good to you? It should cover all error types and both entry and exit. And should I fold this entire change in 1 patch or should I split the refactoring of common out routine into a seperate one? diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 5871956ee880..e412fdc4ee86 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -130,6 +130,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) if (ret > 0) ret += dio->done_before; + trace_iomap_dio_complete(iocb, dio->error, ret); kfree(dio); return ret; @@ -493,12 +494,15 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, struct blk_plug plug; struct iomap_dio *dio; + trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before, ret); if (!iomi.len) - return NULL; + goto out; dio = kmalloc(sizeof(*dio), GFP_KERNEL); - if (!dio) - return ERR_PTR(-ENOMEM); + if (!dio) { + ret = -ENOMEM; + goto out; + } dio->iocb = iocb; atomic_set(&dio->ref, 1); @@ -650,8 +654,11 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, */ dio->wait_for_completion = wait_for_completion; if (!atomic_dec_and_test(&dio->ref)) { - if (!wait_for_completion) - return ERR_PTR(-EIOCBQUEUED); + if (!wait_for_completion) { + ret = -EIOCBQUEUED; + goto out; + } + for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); @@ -663,10 +670,13 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, __set_current_state(TASK_RUNNING); } + trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret); return dio; out_free_dio: kfree(dio); +out: + trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret); if (ret) return ERR_PTR(ret); return NULL; >> + __print_flags(__entry->dio_flags, "|", TRACE_IOMAP_DIO_STRINGS), > > Please avoid the overly lone line here. Somehow my checkpatch never gave a warning about it. I will check why was that. But yes, I have anyways made the name to IOMAP_DIO_STRINGS similar to other namings used in fs/iomap/trace.h -ritesh
On Fri, Apr 14, 2023 at 01:26:38PM +0530, Ritesh Harjani wrote: > How about this below change? Does this look good to you? > It should cover all error types and both entry and exit. I don't think it is very useful. The complete tracepoint is the end of the I/O. Having a separate end one doesn't make sense. That's why I suggested a queued one for the asynchronous case.
Christoph Hellwig <hch@infradead.org> writes: > On Fri, Apr 14, 2023 at 01:26:38PM +0530, Ritesh Harjani wrote: >> How about this below change? Does this look good to you? >> It should cover all error types and both entry and exit. > > I don't think it is very useful. The complete tracepoint is the > end of the I/O. Having a separate end one doesn't make sense. > That's why I suggested a queued one for the asynchronous case. Ok, does this look good then? diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 36ab1152dbea..859efb5de1bf 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -130,6 +130,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) if (ret > 0) ret += dio->done_before; + trace_iomap_dio_complete(iocb, dio->error, ret); kfree(dio); return ret; @@ -650,8 +651,12 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, */ dio->wait_for_completion = wait_for_completion; if (!atomic_dec_and_test(&dio->ref)) { - if (!wait_for_completion) - return ERR_PTR(-EIOCBQUEUED); + if (!wait_for_completion) { + ret = -EIOCBQUEUED; + trace_iomap_dio_rw_queued(iocb, iter, dio_flags, + done_before, ret); + return ERR_PTR(ret); + } for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); -ritesh
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 5871956ee880..bb7a6dfbc8b3 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -130,6 +130,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) if (ret > 0) ret += dio->done_before; + trace_iomap_dio_complete(iocb, dio->error, ret); kfree(dio); return ret; @@ -681,6 +682,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_dio *dio; ssize_t ret = 0; + trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before, ret); dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, private, done_before); if (IS_ERR_OR_NULL(dio)) { @@ -689,6 +691,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, } ret = iomap_dio_complete(dio); out: + trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret); return ret; } EXPORT_SYMBOL_GPL(iomap_dio_rw); diff --git a/fs/iomap/trace.c b/fs/iomap/trace.c index da217246b1a9..728d5443daf5 100644 --- a/fs/iomap/trace.c +++ b/fs/iomap/trace.c @@ -3,6 +3,7 @@ * Copyright (c) 2019 Christoph Hellwig */ #include <linux/iomap.h> +#include <linux/uio.h> /* * We include this last to have the helpers above available for the trace diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h index f6ea9540d082..dcb4dd4db5fb 100644 --- a/fs/iomap/trace.h +++ b/fs/iomap/trace.h @@ -183,6 +183,96 @@ TRACE_EVENT(iomap_iter, (void *)__entry->caller) ); +#define TRACE_IOMAP_DIO_STRINGS \ + {IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \ + {IOMAP_DIO_OVERWRITE_ONLY, "DIO_OVERWRITE_ONLY" }, \ + {IOMAP_DIO_PARTIAL, "DIO_PARTIAL" } + +DECLARE_EVENT_CLASS(iomap_dio_class, + TP_PROTO(struct kiocb *iocb, struct iov_iter *iter, + unsigned int dio_flags, u64 done_before, int ret), + TP_ARGS(iocb, iter, dio_flags, done_before, ret), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, isize) + __field(loff_t, pos) + __field(u64, count) + __field(u64, done_before) + __field(int, ki_flags) + __field(unsigned int, dio_flags) + __field(bool, aio) + __field(int, ret) + ), + TP_fast_assign( + __entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev; + __entry->ino = file_inode(iocb->ki_filp)->i_ino; + __entry->isize = file_inode(iocb->ki_filp)->i_size; + __entry->pos = iocb->ki_pos; + __entry->count = iov_iter_count(iter); + __entry->done_before = done_before; + __entry->dio_flags = dio_flags; + __entry->ki_flags = iocb->ki_flags; + __entry->aio = !is_sync_kiocb(iocb); + __entry->ret = ret; + ), + TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx count %llu " + "flags %s dio_flags %s done_before %llu aio %d ret %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->isize, + __entry->pos, + __entry->count, + __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS), + __print_flags(__entry->dio_flags, "|", TRACE_IOMAP_DIO_STRINGS), + __entry->done_before, + __entry->aio, + __entry->ret) +) + +#define DEFINE_DIO_RW_EVENT(name) \ +DEFINE_EVENT(iomap_dio_class, name, \ + TP_PROTO(struct kiocb *iocb, struct iov_iter *iter, \ + unsigned int dio_flags, u64 done_before, \ + int ret), \ + TP_ARGS(iocb, iter, dio_flags, done_before, ret)) +DEFINE_DIO_RW_EVENT(iomap_dio_rw_begin); +DEFINE_DIO_RW_EVENT(iomap_dio_rw_end); + +TRACE_EVENT(iomap_dio_complete, + TP_PROTO(struct kiocb *iocb, int error, int ret), + TP_ARGS(iocb, error, ret), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, isize) + __field(loff_t, pos) + __field(int, ki_flags) + __field(bool, aio) + __field(int, error) + __field(int, ret) + ), + TP_fast_assign( + __entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev; + __entry->ino = file_inode(iocb->ki_filp)->i_ino; + __entry->isize = file_inode(iocb->ki_filp)->i_size; + __entry->pos = iocb->ki_pos; + __entry->ki_flags = iocb->ki_flags; + __entry->aio = !is_sync_kiocb(iocb); + __entry->error = error; + __entry->ret = ret; + ), + TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx flags %s aio %d error %d ret %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->isize, + __entry->pos, + __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS), + __entry->aio, + __entry->error, + __entry->ret) +); + #endif /* _IOMAP_TRACE_H */ #undef TRACE_INCLUDE_PATH