diff mbox series

[RFCv3,10/10] iomap: Add trace points for DIO path

Message ID 93ab8386c4620395c5e674a7930506895fc758ef.1681365596.git.ritesh.list@gmail.com
State Not Applicable
Headers show
Series ext2: DIO to use iomap | expand

Commit Message

Ritesh Harjani (IBM) April 13, 2023, 8:40 a.m. UTC
This patch adds trace point events for iomap DIO path.

<e.g. iomap dio trace>
     xfs_io-8815  [000]   526.790418: iomap_dio_rw_begin:   dev 7:7 ino 0xc isize 0x0 pos 0x0 count 4096 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 0
     xfs_io-8815  [000]   526.790978: iomap_dio_complete:   dev 7:7 ino 0xc isize 0x1000 pos 0x1000 flags DIRECT aio 0 error 0 ret 4096
     xfs_io-8815  [000]   526.790988: iomap_dio_rw_end:     dev 7:7 ino 0xc isize 0x1000 pos 0x1000 count 0 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 4096
        fsx-8827  [005]   526.939345: iomap_dio_rw_begin:   dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 61440 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags  done_before 0 aio 1 ret 0
        fsx-8827  [005]   526.939459: iomap_dio_rw_end:     dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 0 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags  done_before 0 aio 1 ret -529
ksoftirqd/5-41    [005]   526.939564: iomap_dio_complete:   dev 7:7 ino 0xc isize 0x922f8 pos 0x5e000 flags NOWAIT|DIRECT|ALLOC_CACHE aio 1 error 0 ret 61440

Tested-by: Disha Goel <disgoel@linux.ibm.com>
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
---
 fs/iomap/direct-io.c |  3 ++
 fs/iomap/trace.c     |  1 +
 fs/iomap/trace.h     | 90 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 94 insertions(+)

Comments

Darrick J. Wong April 13, 2023, 2:42 p.m. UTC | #1
On Thu, Apr 13, 2023 at 02:10:32PM +0530, Ritesh Harjani (IBM) wrote:
> This patch adds trace point events for iomap DIO path.
> 
> <e.g. iomap dio trace>
>      xfs_io-8815  [000]   526.790418: iomap_dio_rw_begin:   dev 7:7 ino 0xc isize 0x0 pos 0x0 count 4096 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 0
>      xfs_io-8815  [000]   526.790978: iomap_dio_complete:   dev 7:7 ino 0xc isize 0x1000 pos 0x1000 flags DIRECT aio 0 error 0 ret 4096
>      xfs_io-8815  [000]   526.790988: iomap_dio_rw_end:     dev 7:7 ino 0xc isize 0x1000 pos 0x1000 count 0 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 4096
>         fsx-8827  [005]   526.939345: iomap_dio_rw_begin:   dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 61440 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags  done_before 0 aio 1 ret 0
>         fsx-8827  [005]   526.939459: iomap_dio_rw_end:     dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 0 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags  done_before 0 aio 1 ret -529
> ksoftirqd/5-41    [005]   526.939564: iomap_dio_complete:   dev 7:7 ino 0xc isize 0x922f8 pos 0x5e000 flags NOWAIT|DIRECT|ALLOC_CACHE aio 1 error 0 ret 61440
> 
> Tested-by: Disha Goel <disgoel@linux.ibm.com>
> Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
> ---
>  fs/iomap/direct-io.c |  3 ++
>  fs/iomap/trace.c     |  1 +
>  fs/iomap/trace.h     | 90 ++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 94 insertions(+)
> 
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index 5871956ee880..bb7a6dfbc8b3 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -130,6 +130,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
>  	if (ret > 0)
>  		ret += dio->done_before;
>  
> +	trace_iomap_dio_complete(iocb, dio->error, ret);
>  	kfree(dio);
>  
>  	return ret;
> @@ -681,6 +682,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>  	struct iomap_dio *dio;
>  	ssize_t ret = 0;
>  
> +	trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before, ret);
>  	dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, private,
>  			     done_before);
>  	if (IS_ERR_OR_NULL(dio)) {
> @@ -689,6 +691,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>  	}
>  	ret = iomap_dio_complete(dio);
>  out:
> +	trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret);
>  	return ret;
>  }
>  EXPORT_SYMBOL_GPL(iomap_dio_rw);
> diff --git a/fs/iomap/trace.c b/fs/iomap/trace.c
> index da217246b1a9..728d5443daf5 100644
> --- a/fs/iomap/trace.c
> +++ b/fs/iomap/trace.c
> @@ -3,6 +3,7 @@
>   * Copyright (c) 2019 Christoph Hellwig
>   */
>  #include <linux/iomap.h>
> +#include <linux/uio.h>
>  
>  /*
>   * We include this last to have the helpers above available for the trace
> diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
> index f6ea9540d082..dcb4dd4db5fb 100644
> --- a/fs/iomap/trace.h
> +++ b/fs/iomap/trace.h
> @@ -183,6 +183,96 @@ TRACE_EVENT(iomap_iter,
>  		   (void *)__entry->caller)
>  );
>  
> +#define TRACE_IOMAP_DIO_STRINGS \
> +	{IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \
> +	{IOMAP_DIO_OVERWRITE_ONLY, "DIO_OVERWRITE_ONLY" }, \
> +	{IOMAP_DIO_PARTIAL, "DIO_PARTIAL" }

Can you make the strings line up too, please?

> +
> +DECLARE_EVENT_CLASS(iomap_dio_class,
> +	TP_PROTO(struct kiocb *iocb, struct iov_iter *iter,
> +		 unsigned int dio_flags, u64 done_before, int ret),

We're passing in ssize_t values for @ret, shouldn't the types match?

> +	TP_ARGS(iocb, iter, dio_flags, done_before, ret),
> +	TP_STRUCT__entry(
> +		__field(dev_t,	dev)
> +		__field(ino_t,	ino)
> +		__field(loff_t, isize)
> +		__field(loff_t, pos)
> +		__field(u64,	count)

What's the difference between "length" as used in the other tracepoints
and "count" here?

> +		__field(u64,	done_before)
> +		__field(int,	ki_flags)
> +		__field(unsigned int,	dio_flags)
> +		__field(bool,	aio)
> +		__field(int, ret)
> +	),
> +	TP_fast_assign(
> +		__entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev;
> +		__entry->ino = file_inode(iocb->ki_filp)->i_ino;
> +		__entry->isize = file_inode(iocb->ki_filp)->i_size;
> +		__entry->pos = iocb->ki_pos;
> +		__entry->count = iov_iter_count(iter);
> +		__entry->done_before = done_before;
> +		__entry->dio_flags = dio_flags;
> +		__entry->ki_flags = iocb->ki_flags;
> +		__entry->aio = !is_sync_kiocb(iocb);
> +		__entry->ret = ret;
> +	),
> +	TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx count %llu "

count and done_before are lengths of file operations, in bytes, right?

Everywhere else we use 0x%llx for that.

> +		  "flags %s dio_flags %s done_before %llu aio %d ret %d",
> +		  MAJOR(__entry->dev), MINOR(__entry->dev),
> +		  __entry->ino,
> +		  __entry->isize,
> +		  __entry->pos,
> +		  __entry->count,
> +		  __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS),
> +		  __print_flags(__entry->dio_flags, "|", TRACE_IOMAP_DIO_STRINGS),
> +		  __entry->done_before,
> +		  __entry->aio,
> +		  __entry->ret)
> +)
> +
> +#define DEFINE_DIO_RW_EVENT(name)					\
> +DEFINE_EVENT(iomap_dio_class, name,					\
> +	TP_PROTO(struct kiocb *iocb, struct iov_iter *iter,		\
> +		 unsigned int dio_flags, u64 done_before,		\
> +		 int ret),						\
> +	TP_ARGS(iocb, iter, dio_flags, done_before, ret))
> +DEFINE_DIO_RW_EVENT(iomap_dio_rw_begin);
> +DEFINE_DIO_RW_EVENT(iomap_dio_rw_end);
> +
> +TRACE_EVENT(iomap_dio_complete,
> +	TP_PROTO(struct kiocb *iocb, int error, int ret),
> +	TP_ARGS(iocb, error, ret),
> +	TP_STRUCT__entry(
> +		__field(dev_t,	dev)
> +		__field(ino_t,	ino)
> +		__field(loff_t, isize)
> +		__field(loff_t, pos)
> +		__field(int,	ki_flags)
> +		__field(bool,	aio)
> +		__field(int,	error)
> +		__field(int,	ret)

Same comment about @ret and ssize_t here.

--D

> +	),
> +	TP_fast_assign(
> +		__entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev;
> +		__entry->ino = file_inode(iocb->ki_filp)->i_ino;
> +		__entry->isize = file_inode(iocb->ki_filp)->i_size;
> +		__entry->pos = iocb->ki_pos;
> +		__entry->ki_flags = iocb->ki_flags;
> +		__entry->aio = !is_sync_kiocb(iocb);
> +		__entry->error = error;
> +		__entry->ret = ret;
> +	),
> +	TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx flags %s aio %d error %d ret %d",
> +		  MAJOR(__entry->dev), MINOR(__entry->dev),
> +		  __entry->ino,
> +		  __entry->isize,
> +		  __entry->pos,
> +		  __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS),
> +		  __entry->aio,
> +		  __entry->error,
> +		  __entry->ret)
> +);
> +
>  #endif /* _IOMAP_TRACE_H */
>  
>  #undef TRACE_INCLUDE_PATH
> -- 
> 2.39.2
>
Ritesh Harjani (IBM) April 13, 2023, 8:18 p.m. UTC | #2
"Darrick J. Wong" <djwong@kernel.org> writes:

> On Thu, Apr 13, 2023 at 02:10:32PM +0530, Ritesh Harjani (IBM) wrote:
>> This patch adds trace point events for iomap DIO path.
>>
>> <e.g. iomap dio trace>
>>      xfs_io-8815  [000]   526.790418: iomap_dio_rw_begin:   dev 7:7 ino 0xc isize 0x0 pos 0x0 count 4096 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 0
>>      xfs_io-8815  [000]   526.790978: iomap_dio_complete:   dev 7:7 ino 0xc isize 0x1000 pos 0x1000 flags DIRECT aio 0 error 0 ret 4096
>>      xfs_io-8815  [000]   526.790988: iomap_dio_rw_end:     dev 7:7 ino 0xc isize 0x1000 pos 0x1000 count 0 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 4096
>>         fsx-8827  [005]   526.939345: iomap_dio_rw_begin:   dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 61440 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags  done_before 0 aio 1 ret 0
>>         fsx-8827  [005]   526.939459: iomap_dio_rw_end:     dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 0 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags  done_before 0 aio 1 ret -529
>> ksoftirqd/5-41    [005]   526.939564: iomap_dio_complete:   dev 7:7 ino 0xc isize 0x922f8 pos 0x5e000 flags NOWAIT|DIRECT|ALLOC_CACHE aio 1 error 0 ret 61440
>>
>> Tested-by: Disha Goel <disgoel@linux.ibm.com>
>> Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
>> ---
>>  fs/iomap/direct-io.c |  3 ++
>>  fs/iomap/trace.c     |  1 +
>>  fs/iomap/trace.h     | 90 ++++++++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 94 insertions(+)
>>
>> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
>> index 5871956ee880..bb7a6dfbc8b3 100644
>> --- a/fs/iomap/direct-io.c
>> +++ b/fs/iomap/direct-io.c
>> @@ -130,6 +130,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
>>  	if (ret > 0)
>>  		ret += dio->done_before;
>>
>> +	trace_iomap_dio_complete(iocb, dio->error, ret);
>>  	kfree(dio);
>>
>>  	return ret;
>> @@ -681,6 +682,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>>  	struct iomap_dio *dio;
>>  	ssize_t ret = 0;
>>
>> +	trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before, ret);
>>  	dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, private,
>>  			     done_before);
>>  	if (IS_ERR_OR_NULL(dio)) {
>> @@ -689,6 +691,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>>  	}
>>  	ret = iomap_dio_complete(dio);
>>  out:
>> +	trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret);
>>  	return ret;
>>  }
>>  EXPORT_SYMBOL_GPL(iomap_dio_rw);
>> diff --git a/fs/iomap/trace.c b/fs/iomap/trace.c
>> index da217246b1a9..728d5443daf5 100644
>> --- a/fs/iomap/trace.c
>> +++ b/fs/iomap/trace.c
>> @@ -3,6 +3,7 @@
>>   * Copyright (c) 2019 Christoph Hellwig
>>   */
>>  #include <linux/iomap.h>
>> +#include <linux/uio.h>
>>
>>  /*
>>   * We include this last to have the helpers above available for the trace
>> diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
>> index f6ea9540d082..dcb4dd4db5fb 100644
>> --- a/fs/iomap/trace.h
>> +++ b/fs/iomap/trace.h
>> @@ -183,6 +183,96 @@ TRACE_EVENT(iomap_iter,
>>  		   (void *)__entry->caller)
>>  );
>>
>> +#define TRACE_IOMAP_DIO_STRINGS \
>> +	{IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \
>> +	{IOMAP_DIO_OVERWRITE_ONLY, "DIO_OVERWRITE_ONLY" }, \
>> +	{IOMAP_DIO_PARTIAL, "DIO_PARTIAL" }
>
> Can you make the strings line up too, please?
>

Ok near other _STRINGS macro. Sure, will do that.


>> +
>> +DECLARE_EVENT_CLASS(iomap_dio_class,
>> +	TP_PROTO(struct kiocb *iocb, struct iov_iter *iter,
>> +		 unsigned int dio_flags, u64 done_before, int ret),
>
> We're passing in ssize_t values for @ret, shouldn't the types match?
>

Yes, I missed to correct that. Will make it loff_t.
This should be fixed in ext2 trace point macro too.

(ssize_t can vary based on 32 bit v/s 64 bit, so while printing it as
%llx it gives warning on 32bit. Hence will use loff_t for ret)


>> +	TP_ARGS(iocb, iter, dio_flags, done_before, ret),
>> +	TP_STRUCT__entry(
>> +		__field(dev_t,	dev)
>> +		__field(ino_t,	ino)
>> +		__field(loff_t, isize)
>> +		__field(loff_t, pos)
>> +		__field(u64,	count)
>
> What's the difference between "length" as used in the other tracepoints
> and "count" here?
>

Yup let me make it length which will be a more consistent naming.
I chose count just because of (iov_iter_count(iter)).

>> +		__field(u64,	done_before)
>> +		__field(int,	ki_flags)
>> +		__field(unsigned int,	dio_flags)
>> +		__field(bool,	aio)
>> +		__field(int, ret)
>> +	),
>> +	TP_fast_assign(
>> +		__entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev;
>> +		__entry->ino = file_inode(iocb->ki_filp)->i_ino;
>> +		__entry->isize = file_inode(iocb->ki_filp)->i_size;
>> +		__entry->pos = iocb->ki_pos;
>> +		__entry->count = iov_iter_count(iter);
>> +		__entry->done_before = done_before;
>> +		__entry->dio_flags = dio_flags;
>> +		__entry->ki_flags = iocb->ki_flags;
>> +		__entry->aio = !is_sync_kiocb(iocb);
>> +		__entry->ret = ret;
>> +	),
>> +	TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx count %llu "
>
> count and done_before are lengths of file operations, in bytes, right?

Yes, that's right.

>
> Everywhere else we use 0x%llx for that.
>

Yup I had noticed that, but I guess I missed it.
Thanks for catching it. I will fix it.

>> +		  "flags %s dio_flags %s done_before %llu aio %d ret %d",
>> +		  MAJOR(__entry->dev), MINOR(__entry->dev),
>> +		  __entry->ino,
>> +		  __entry->isize,
>> +		  __entry->pos,
>> +		  __entry->count,
>> +		  __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS),
>> +		  __print_flags(__entry->dio_flags, "|", TRACE_IOMAP_DIO_STRINGS),
>> +		  __entry->done_before,
>> +		  __entry->aio,
>> +		  __entry->ret)
>> +)
>> +
>> +#define DEFINE_DIO_RW_EVENT(name)					\
>> +DEFINE_EVENT(iomap_dio_class, name,					\
>> +	TP_PROTO(struct kiocb *iocb, struct iov_iter *iter,		\
>> +		 unsigned int dio_flags, u64 done_before,		\
>> +		 int ret),						\
>> +	TP_ARGS(iocb, iter, dio_flags, done_before, ret))
>> +DEFINE_DIO_RW_EVENT(iomap_dio_rw_begin);
>> +DEFINE_DIO_RW_EVENT(iomap_dio_rw_end);
>> +
>> +TRACE_EVENT(iomap_dio_complete,
>> +	TP_PROTO(struct kiocb *iocb, int error, int ret),
>> +	TP_ARGS(iocb, error, ret),
>> +	TP_STRUCT__entry(
>> +		__field(dev_t,	dev)
>> +		__field(ino_t,	ino)
>> +		__field(loff_t, isize)
>> +		__field(loff_t, pos)
>> +		__field(int,	ki_flags)
>> +		__field(bool,	aio)
>> +		__field(int,	error)
>> +		__field(int,	ret)
>
> Same comment about @ret and ssize_t here.

Got it.

Thanks for the review!
-ritesh


>
> --D
>
>> +	),
>> +	TP_fast_assign(
>> +		__entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev;
>> +		__entry->ino = file_inode(iocb->ki_filp)->i_ino;
>> +		__entry->isize = file_inode(iocb->ki_filp)->i_size;
>> +		__entry->pos = iocb->ki_pos;
>> +		__entry->ki_flags = iocb->ki_flags;
>> +		__entry->aio = !is_sync_kiocb(iocb);
>> +		__entry->error = error;
>> +		__entry->ret = ret;
>> +	),
>> +	TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx flags %s aio %d error %d ret %d",
>> +		  MAJOR(__entry->dev), MINOR(__entry->dev),
>> +		  __entry->ino,
>> +		  __entry->isize,
>> +		  __entry->pos,
>> +		  __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS),
>> +		  __entry->aio,
>> +		  __entry->error,
>> +		  __entry->ret)
>> +);
>> +
>>  #endif /* _IOMAP_TRACE_H */
>>
>>  #undef TRACE_INCLUDE_PATH
>> --
>> 2.39.2
>>
Darrick J. Wong April 14, 2023, 2:16 a.m. UTC | #3
On Fri, Apr 14, 2023 at 01:48:49AM +0530, Ritesh Harjani wrote:
> "Darrick J. Wong" <djwong@kernel.org> writes:
> 
> > On Thu, Apr 13, 2023 at 02:10:32PM +0530, Ritesh Harjani (IBM) wrote:
> >> This patch adds trace point events for iomap DIO path.
> >>
> >> <e.g. iomap dio trace>
> >>      xfs_io-8815  [000]   526.790418: iomap_dio_rw_begin:   dev 7:7 ino 0xc isize 0x0 pos 0x0 count 4096 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 0
> >>      xfs_io-8815  [000]   526.790978: iomap_dio_complete:   dev 7:7 ino 0xc isize 0x1000 pos 0x1000 flags DIRECT aio 0 error 0 ret 4096
> >>      xfs_io-8815  [000]   526.790988: iomap_dio_rw_end:     dev 7:7 ino 0xc isize 0x1000 pos 0x1000 count 0 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 4096
> >>         fsx-8827  [005]   526.939345: iomap_dio_rw_begin:   dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 61440 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags  done_before 0 aio 1 ret 0
> >>         fsx-8827  [005]   526.939459: iomap_dio_rw_end:     dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 0 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags  done_before 0 aio 1 ret -529
> >> ksoftirqd/5-41    [005]   526.939564: iomap_dio_complete:   dev 7:7 ino 0xc isize 0x922f8 pos 0x5e000 flags NOWAIT|DIRECT|ALLOC_CACHE aio 1 error 0 ret 61440
> >>
> >> Tested-by: Disha Goel <disgoel@linux.ibm.com>
> >> Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
> >> ---
> >>  fs/iomap/direct-io.c |  3 ++
> >>  fs/iomap/trace.c     |  1 +
> >>  fs/iomap/trace.h     | 90 ++++++++++++++++++++++++++++++++++++++++++++
> >>  3 files changed, 94 insertions(+)
> >>
> >> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> >> index 5871956ee880..bb7a6dfbc8b3 100644
> >> --- a/fs/iomap/direct-io.c
> >> +++ b/fs/iomap/direct-io.c
> >> @@ -130,6 +130,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
> >>  	if (ret > 0)
> >>  		ret += dio->done_before;
> >>
> >> +	trace_iomap_dio_complete(iocb, dio->error, ret);
> >>  	kfree(dio);
> >>
> >>  	return ret;
> >> @@ -681,6 +682,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
> >>  	struct iomap_dio *dio;
> >>  	ssize_t ret = 0;
> >>
> >> +	trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before, ret);
> >>  	dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, private,
> >>  			     done_before);
> >>  	if (IS_ERR_OR_NULL(dio)) {
> >> @@ -689,6 +691,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
> >>  	}
> >>  	ret = iomap_dio_complete(dio);
> >>  out:
> >> +	trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret);
> >>  	return ret;
> >>  }
> >>  EXPORT_SYMBOL_GPL(iomap_dio_rw);
> >> diff --git a/fs/iomap/trace.c b/fs/iomap/trace.c
> >> index da217246b1a9..728d5443daf5 100644
> >> --- a/fs/iomap/trace.c
> >> +++ b/fs/iomap/trace.c
> >> @@ -3,6 +3,7 @@
> >>   * Copyright (c) 2019 Christoph Hellwig
> >>   */
> >>  #include <linux/iomap.h>
> >> +#include <linux/uio.h>
> >>
> >>  /*
> >>   * We include this last to have the helpers above available for the trace
> >> diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
> >> index f6ea9540d082..dcb4dd4db5fb 100644
> >> --- a/fs/iomap/trace.h
> >> +++ b/fs/iomap/trace.h
> >> @@ -183,6 +183,96 @@ TRACE_EVENT(iomap_iter,
> >>  		   (void *)__entry->caller)
> >>  );
> >>
> >> +#define TRACE_IOMAP_DIO_STRINGS \
> >> +	{IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \
> >> +	{IOMAP_DIO_OVERWRITE_ONLY, "DIO_OVERWRITE_ONLY" }, \
> >> +	{IOMAP_DIO_PARTIAL, "DIO_PARTIAL" }
> >
> > Can you make the strings line up too, please?
> >
> 
> Ok near other _STRINGS macro. Sure, will do that.
> 
> 
> >> +
> >> +DECLARE_EVENT_CLASS(iomap_dio_class,
> >> +	TP_PROTO(struct kiocb *iocb, struct iov_iter *iter,
> >> +		 unsigned int dio_flags, u64 done_before, int ret),
> >
> > We're passing in ssize_t values for @ret, shouldn't the types match?
> >
> 
> Yes, I missed to correct that. Will make it loff_t.
> This should be fixed in ext2 trace point macro too.
> 
> (ssize_t can vary based on 32 bit v/s 64 bit, so while printing it as
> %llx it gives warning on 32bit. Hence will use loff_t for ret)

How about %zd?

--D

> 
> >> +	TP_ARGS(iocb, iter, dio_flags, done_before, ret),
> >> +	TP_STRUCT__entry(
> >> +		__field(dev_t,	dev)
> >> +		__field(ino_t,	ino)
> >> +		__field(loff_t, isize)
> >> +		__field(loff_t, pos)
> >> +		__field(u64,	count)
> >
> > What's the difference between "length" as used in the other tracepoints
> > and "count" here?
> >
> 
> Yup let me make it length which will be a more consistent naming.
> I chose count just because of (iov_iter_count(iter)).
> 
> >> +		__field(u64,	done_before)
> >> +		__field(int,	ki_flags)
> >> +		__field(unsigned int,	dio_flags)
> >> +		__field(bool,	aio)
> >> +		__field(int, ret)
> >> +	),
> >> +	TP_fast_assign(
> >> +		__entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev;
> >> +		__entry->ino = file_inode(iocb->ki_filp)->i_ino;
> >> +		__entry->isize = file_inode(iocb->ki_filp)->i_size;
> >> +		__entry->pos = iocb->ki_pos;
> >> +		__entry->count = iov_iter_count(iter);
> >> +		__entry->done_before = done_before;
> >> +		__entry->dio_flags = dio_flags;
> >> +		__entry->ki_flags = iocb->ki_flags;
> >> +		__entry->aio = !is_sync_kiocb(iocb);
> >> +		__entry->ret = ret;
> >> +	),
> >> +	TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx count %llu "
> >
> > count and done_before are lengths of file operations, in bytes, right?
> 
> Yes, that's right.
> 
> >
> > Everywhere else we use 0x%llx for that.
> >
> 
> Yup I had noticed that, but I guess I missed it.
> Thanks for catching it. I will fix it.
> 
> >> +		  "flags %s dio_flags %s done_before %llu aio %d ret %d",
> >> +		  MAJOR(__entry->dev), MINOR(__entry->dev),
> >> +		  __entry->ino,
> >> +		  __entry->isize,
> >> +		  __entry->pos,
> >> +		  __entry->count,
> >> +		  __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS),
> >> +		  __print_flags(__entry->dio_flags, "|", TRACE_IOMAP_DIO_STRINGS),
> >> +		  __entry->done_before,
> >> +		  __entry->aio,
> >> +		  __entry->ret)
> >> +)
> >> +
> >> +#define DEFINE_DIO_RW_EVENT(name)					\
> >> +DEFINE_EVENT(iomap_dio_class, name,					\
> >> +	TP_PROTO(struct kiocb *iocb, struct iov_iter *iter,		\
> >> +		 unsigned int dio_flags, u64 done_before,		\
> >> +		 int ret),						\
> >> +	TP_ARGS(iocb, iter, dio_flags, done_before, ret))
> >> +DEFINE_DIO_RW_EVENT(iomap_dio_rw_begin);
> >> +DEFINE_DIO_RW_EVENT(iomap_dio_rw_end);
> >> +
> >> +TRACE_EVENT(iomap_dio_complete,
> >> +	TP_PROTO(struct kiocb *iocb, int error, int ret),
> >> +	TP_ARGS(iocb, error, ret),
> >> +	TP_STRUCT__entry(
> >> +		__field(dev_t,	dev)
> >> +		__field(ino_t,	ino)
> >> +		__field(loff_t, isize)
> >> +		__field(loff_t, pos)
> >> +		__field(int,	ki_flags)
> >> +		__field(bool,	aio)
> >> +		__field(int,	error)
> >> +		__field(int,	ret)
> >
> > Same comment about @ret and ssize_t here.
> 
> Got it.
> 
> Thanks for the review!
> -ritesh
> 
> 
> >
> > --D
> >
> >> +	),
> >> +	TP_fast_assign(
> >> +		__entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev;
> >> +		__entry->ino = file_inode(iocb->ki_filp)->i_ino;
> >> +		__entry->isize = file_inode(iocb->ki_filp)->i_size;
> >> +		__entry->pos = iocb->ki_pos;
> >> +		__entry->ki_flags = iocb->ki_flags;
> >> +		__entry->aio = !is_sync_kiocb(iocb);
> >> +		__entry->error = error;
> >> +		__entry->ret = ret;
> >> +	),
> >> +	TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx flags %s aio %d error %d ret %d",
> >> +		  MAJOR(__entry->dev), MINOR(__entry->dev),
> >> +		  __entry->ino,
> >> +		  __entry->isize,
> >> +		  __entry->pos,
> >> +		  __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS),
> >> +		  __entry->aio,
> >> +		  __entry->error,
> >> +		  __entry->ret)
> >> +);
> >> +
> >>  #endif /* _IOMAP_TRACE_H */
> >>
> >>  #undef TRACE_INCLUDE_PATH
> >> --
> >> 2.39.2
> >>
Ritesh Harjani (IBM) April 14, 2023, 5:21 a.m. UTC | #4
"Darrick J. Wong" <djwong@kernel.org> writes:

> On Fri, Apr 14, 2023 at 01:48:49AM +0530, Ritesh Harjani wrote:
>> "Darrick J. Wong" <djwong@kernel.org> writes:
>>
>> > On Thu, Apr 13, 2023 at 02:10:32PM +0530, Ritesh Harjani (IBM) wrote:
>> >> This patch adds trace point events for iomap DIO path.
>> >>
>> >> <e.g. iomap dio trace>
>> >>      xfs_io-8815  [000]   526.790418: iomap_dio_rw_begin:   dev 7:7 ino 0xc isize 0x0 pos 0x0 count 4096 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 0
>> >>      xfs_io-8815  [000]   526.790978: iomap_dio_complete:   dev 7:7 ino 0xc isize 0x1000 pos 0x1000 flags DIRECT aio 0 error 0 ret 4096
>> >>      xfs_io-8815  [000]   526.790988: iomap_dio_rw_end:     dev 7:7 ino 0xc isize 0x1000 pos 0x1000 count 0 flags DIRECT dio_flags DIO_FORCE_WAIT done_before 0 aio 0 ret 4096
>> >>         fsx-8827  [005]   526.939345: iomap_dio_rw_begin:   dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 61440 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags  done_before 0 aio 1 ret 0
>> >>         fsx-8827  [005]   526.939459: iomap_dio_rw_end:     dev 7:7 ino 0xc isize 0x922f8 pos 0x4f000 count 0 flags NOWAIT|DIRECT|ALLOC_CACHE dio_flags  done_before 0 aio 1 ret -529
>> >> ksoftirqd/5-41    [005]   526.939564: iomap_dio_complete:   dev 7:7 ino 0xc isize 0x922f8 pos 0x5e000 flags NOWAIT|DIRECT|ALLOC_CACHE aio 1 error 0 ret 61440
>> >>
>> >> Tested-by: Disha Goel <disgoel@linux.ibm.com>
>> >> Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
>> >> ---
>> >>  fs/iomap/direct-io.c |  3 ++
>> >>  fs/iomap/trace.c     |  1 +
>> >>  fs/iomap/trace.h     | 90 ++++++++++++++++++++++++++++++++++++++++++++
>> >>  3 files changed, 94 insertions(+)
>> >>
>> >> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
>> >> index 5871956ee880..bb7a6dfbc8b3 100644
>> >> --- a/fs/iomap/direct-io.c
>> >> +++ b/fs/iomap/direct-io.c
>> >> @@ -130,6 +130,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
>> >>  	if (ret > 0)
>> >>  		ret += dio->done_before;
>> >>
>> >> +	trace_iomap_dio_complete(iocb, dio->error, ret);
>> >>  	kfree(dio);
>> >>
>> >>  	return ret;
>> >> @@ -681,6 +682,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>> >>  	struct iomap_dio *dio;
>> >>  	ssize_t ret = 0;
>> >>
>> >> +	trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before, ret);
>> >>  	dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, private,
>> >>  			     done_before);
>> >>  	if (IS_ERR_OR_NULL(dio)) {
>> >> @@ -689,6 +691,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>> >>  	}
>> >>  	ret = iomap_dio_complete(dio);
>> >>  out:
>> >> +	trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret);
>> >>  	return ret;
>> >>  }
>> >>  EXPORT_SYMBOL_GPL(iomap_dio_rw);
>> >> diff --git a/fs/iomap/trace.c b/fs/iomap/trace.c
>> >> index da217246b1a9..728d5443daf5 100644
>> >> --- a/fs/iomap/trace.c
>> >> +++ b/fs/iomap/trace.c
>> >> @@ -3,6 +3,7 @@
>> >>   * Copyright (c) 2019 Christoph Hellwig
>> >>   */
>> >>  #include <linux/iomap.h>
>> >> +#include <linux/uio.h>
>> >>
>> >>  /*
>> >>   * We include this last to have the helpers above available for the trace
>> >> diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
>> >> index f6ea9540d082..dcb4dd4db5fb 100644
>> >> --- a/fs/iomap/trace.h
>> >> +++ b/fs/iomap/trace.h
>> >> @@ -183,6 +183,96 @@ TRACE_EVENT(iomap_iter,
>> >>  		   (void *)__entry->caller)
>> >>  );
>> >>
>> >> +#define TRACE_IOMAP_DIO_STRINGS \
>> >> +	{IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \
>> >> +	{IOMAP_DIO_OVERWRITE_ONLY, "DIO_OVERWRITE_ONLY" }, \
>> >> +	{IOMAP_DIO_PARTIAL, "DIO_PARTIAL" }
>> >
>> > Can you make the strings line up too, please?
>> >
>>
>> Ok near other _STRINGS macro. Sure, will do that.
>>
>>
>> >> +
>> >> +DECLARE_EVENT_CLASS(iomap_dio_class,
>> >> +	TP_PROTO(struct kiocb *iocb, struct iov_iter *iter,
>> >> +		 unsigned int dio_flags, u64 done_before, int ret),
>> >
>> > We're passing in ssize_t values for @ret, shouldn't the types match?
>> >
>>
>> Yes, I missed to correct that. Will make it loff_t.
>> This should be fixed in ext2 trace point macro too.
>>
>> (ssize_t can vary based on 32 bit v/s 64 bit, so while printing it as
>> %llx it gives warning on 32bit. Hence will use loff_t for ret)
>
> How about %zd?

Aah yes. My bad, I wanted to look into print-format specifiers, but
missed it.

Documentation/core-api/printk-formats.rst
		size_t			%zu or %zx
		ssize_t			%zd or %zx

Will send the next revision soon with the comments addressed then.

Thanks!
-ritesh
Christoph Hellwig April 14, 2023, 6:04 a.m. UTC | #5
> +	trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before, ret);
>  	dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, private,
>  			     done_before);
>  	if (IS_ERR_OR_NULL(dio)) {
> @@ -689,6 +691,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>  	}
>  	ret = iomap_dio_complete(dio);
>  out:
> +	trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret);

The trace_iomap_dio_rw_end tracepoint heere seems a bit weird,
and we'll miss it for file systems using  __iomap_dio_rw directly.

I'd instead add a trace_iomap_dio_rw_queued for the case where
__iomap_dio_rw returns ERR_PTR(-EIOCBQUEUED), as otherwise we're
nicely covered by the complete trace points.

> +		  __print_flags(__entry->dio_flags, "|", TRACE_IOMAP_DIO_STRINGS),

Please avoid the overly lone line here.
Ritesh Harjani (IBM) April 14, 2023, 7:56 a.m. UTC | #6
Christoph Hellwig <hch@infradead.org> writes:

>> +	trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before, ret);
>>  	dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, private,
>>  			     done_before);
>>  	if (IS_ERR_OR_NULL(dio)) {
>> @@ -689,6 +691,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>>  	}
>>  	ret = iomap_dio_complete(dio);
>>  out:
>> +	trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret);
>
> The trace_iomap_dio_rw_end tracepoint heere seems a bit weird,
> and we'll miss it for file systems using  __iomap_dio_rw directly.

Sorry, yes you are right.

>
> I'd instead add a trace_iomap_dio_rw_queued for the case where
> __iomap_dio_rw returns ERR_PTR(-EIOCBQUEUED), as otherwise we're
> nicely covered by the complete trace points.
>

How about this below change? Does this look good to you?
It should cover all error types and both entry and exit.

And should I fold this entire change in 1 patch or should I split the
refactoring of common out routine into a seperate one?


diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 5871956ee880..e412fdc4ee86 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -130,6 +130,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
        if (ret > 0)
                ret += dio->done_before;

+       trace_iomap_dio_complete(iocb, dio->error, ret);
        kfree(dio);

        return ret;
@@ -493,12 +494,15 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        struct blk_plug plug;
        struct iomap_dio *dio;

+       trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before, ret);
        if (!iomi.len)
-               return NULL;
+               goto out;

        dio = kmalloc(sizeof(*dio), GFP_KERNEL);
-       if (!dio)
-               return ERR_PTR(-ENOMEM);
+       if (!dio) {
+               ret = -ENOMEM;
+               goto out;
+       }

        dio->iocb = iocb;
        atomic_set(&dio->ref, 1);
@@ -650,8 +654,11 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
         */
        dio->wait_for_completion = wait_for_completion;
        if (!atomic_dec_and_test(&dio->ref)) {
-               if (!wait_for_completion)
-                       return ERR_PTR(-EIOCBQUEUED);
+               if (!wait_for_completion) {
+                       ret = -EIOCBQUEUED;
+                       goto out;
+               }
+

                for (;;) {
                        set_current_state(TASK_UNINTERRUPTIBLE);
@@ -663,10 +670,13 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                __set_current_state(TASK_RUNNING);
        }

+       trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret);
        return dio;

 out_free_dio:
        kfree(dio);
+out:
+       trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret);
        if (ret)
                return ERR_PTR(ret);
        return NULL;


>> +		  __print_flags(__entry->dio_flags, "|", TRACE_IOMAP_DIO_STRINGS),
>
> Please avoid the overly lone line here.

Somehow my checkpatch never gave a warning about it.
I will check why was that. But yes, I have anyways made the name to
IOMAP_DIO_STRINGS similar to other namings used in fs/iomap/trace.h

-ritesh
Christoph Hellwig April 14, 2023, 1:06 p.m. UTC | #7
On Fri, Apr 14, 2023 at 01:26:38PM +0530, Ritesh Harjani wrote:
> How about this below change? Does this look good to you?
> It should cover all error types and both entry and exit.

I don't think it is very useful.  The complete tracepoint is the
end of the I/O.  Having a separate end one doesn't make sense.
That's why I suggested a queued one for the asynchronous case.
Ritesh Harjani (IBM) April 14, 2023, 2:38 p.m. UTC | #8
Christoph Hellwig <hch@infradead.org> writes:

> On Fri, Apr 14, 2023 at 01:26:38PM +0530, Ritesh Harjani wrote:
>> How about this below change? Does this look good to you?
>> It should cover all error types and both entry and exit.
>
> I don't think it is very useful.  The complete tracepoint is the
> end of the I/O.  Having a separate end one doesn't make sense.
> That's why I suggested a queued one for the asynchronous case.

Ok, does this look good then?

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 36ab1152dbea..859efb5de1bf 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -130,6 +130,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
        if (ret > 0)
                ret += dio->done_before;

+       trace_iomap_dio_complete(iocb, dio->error, ret);
        kfree(dio);

        return ret;
@@ -650,8 +651,12 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
         */
        dio->wait_for_completion = wait_for_completion;
        if (!atomic_dec_and_test(&dio->ref)) {
-               if (!wait_for_completion)
-                       return ERR_PTR(-EIOCBQUEUED);
+               if (!wait_for_completion) {
+                       ret = -EIOCBQUEUED;
+                       trace_iomap_dio_rw_queued(iocb, iter, dio_flags,
+                                                 done_before, ret);
+                       return ERR_PTR(ret);
+               }

                for (;;) {
                        set_current_state(TASK_UNINTERRUPTIBLE);

-ritesh
diff mbox series

Patch

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 5871956ee880..bb7a6dfbc8b3 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -130,6 +130,7 @@  ssize_t iomap_dio_complete(struct iomap_dio *dio)
 	if (ret > 0)
 		ret += dio->done_before;
 
+	trace_iomap_dio_complete(iocb, dio->error, ret);
 	kfree(dio);
 
 	return ret;
@@ -681,6 +682,7 @@  iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	struct iomap_dio *dio;
 	ssize_t ret = 0;
 
+	trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before, ret);
 	dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, private,
 			     done_before);
 	if (IS_ERR_OR_NULL(dio)) {
@@ -689,6 +691,7 @@  iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	}
 	ret = iomap_dio_complete(dio);
 out:
+	trace_iomap_dio_rw_end(iocb, iter, dio_flags, done_before, ret);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(iomap_dio_rw);
diff --git a/fs/iomap/trace.c b/fs/iomap/trace.c
index da217246b1a9..728d5443daf5 100644
--- a/fs/iomap/trace.c
+++ b/fs/iomap/trace.c
@@ -3,6 +3,7 @@ 
  * Copyright (c) 2019 Christoph Hellwig
  */
 #include <linux/iomap.h>
+#include <linux/uio.h>
 
 /*
  * We include this last to have the helpers above available for the trace
diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
index f6ea9540d082..dcb4dd4db5fb 100644
--- a/fs/iomap/trace.h
+++ b/fs/iomap/trace.h
@@ -183,6 +183,96 @@  TRACE_EVENT(iomap_iter,
 		   (void *)__entry->caller)
 );
 
+#define TRACE_IOMAP_DIO_STRINGS \
+	{IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \
+	{IOMAP_DIO_OVERWRITE_ONLY, "DIO_OVERWRITE_ONLY" }, \
+	{IOMAP_DIO_PARTIAL, "DIO_PARTIAL" }
+
+DECLARE_EVENT_CLASS(iomap_dio_class,
+	TP_PROTO(struct kiocb *iocb, struct iov_iter *iter,
+		 unsigned int dio_flags, u64 done_before, int ret),
+	TP_ARGS(iocb, iter, dio_flags, done_before, ret),
+	TP_STRUCT__entry(
+		__field(dev_t,	dev)
+		__field(ino_t,	ino)
+		__field(loff_t, isize)
+		__field(loff_t, pos)
+		__field(u64,	count)
+		__field(u64,	done_before)
+		__field(int,	ki_flags)
+		__field(unsigned int,	dio_flags)
+		__field(bool,	aio)
+		__field(int, ret)
+	),
+	TP_fast_assign(
+		__entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev;
+		__entry->ino = file_inode(iocb->ki_filp)->i_ino;
+		__entry->isize = file_inode(iocb->ki_filp)->i_size;
+		__entry->pos = iocb->ki_pos;
+		__entry->count = iov_iter_count(iter);
+		__entry->done_before = done_before;
+		__entry->dio_flags = dio_flags;
+		__entry->ki_flags = iocb->ki_flags;
+		__entry->aio = !is_sync_kiocb(iocb);
+		__entry->ret = ret;
+	),
+	TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx count %llu "
+		  "flags %s dio_flags %s done_before %llu aio %d ret %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->isize,
+		  __entry->pos,
+		  __entry->count,
+		  __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS),
+		  __print_flags(__entry->dio_flags, "|", TRACE_IOMAP_DIO_STRINGS),
+		  __entry->done_before,
+		  __entry->aio,
+		  __entry->ret)
+)
+
+#define DEFINE_DIO_RW_EVENT(name)					\
+DEFINE_EVENT(iomap_dio_class, name,					\
+	TP_PROTO(struct kiocb *iocb, struct iov_iter *iter,		\
+		 unsigned int dio_flags, u64 done_before,		\
+		 int ret),						\
+	TP_ARGS(iocb, iter, dio_flags, done_before, ret))
+DEFINE_DIO_RW_EVENT(iomap_dio_rw_begin);
+DEFINE_DIO_RW_EVENT(iomap_dio_rw_end);
+
+TRACE_EVENT(iomap_dio_complete,
+	TP_PROTO(struct kiocb *iocb, int error, int ret),
+	TP_ARGS(iocb, error, ret),
+	TP_STRUCT__entry(
+		__field(dev_t,	dev)
+		__field(ino_t,	ino)
+		__field(loff_t, isize)
+		__field(loff_t, pos)
+		__field(int,	ki_flags)
+		__field(bool,	aio)
+		__field(int,	error)
+		__field(int,	ret)
+	),
+	TP_fast_assign(
+		__entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev;
+		__entry->ino = file_inode(iocb->ki_filp)->i_ino;
+		__entry->isize = file_inode(iocb->ki_filp)->i_size;
+		__entry->pos = iocb->ki_pos;
+		__entry->ki_flags = iocb->ki_flags;
+		__entry->aio = !is_sync_kiocb(iocb);
+		__entry->error = error;
+		__entry->ret = ret;
+	),
+	TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx flags %s aio %d error %d ret %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->isize,
+		  __entry->pos,
+		  __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS),
+		  __entry->aio,
+		  __entry->error,
+		  __entry->ret)
+);
+
 #endif /* _IOMAP_TRACE_H */
 
 #undef TRACE_INCLUDE_PATH