diff mbox

[1/6] writeback: initial tracing support

Message ID 1274784852-30502-2-git-send-email-david@fromorbit.com
State Not Applicable, archived
Headers show

Commit Message

Dave Chinner May 25, 2010, 10:54 a.m. UTC
From: From: Jens Axboe <jens.axboe@oracle.com>

Trace queue/sched/exec parts of the writeback loop.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/fs-writeback.c                |   40 +++++++--
 include/trace/events/writeback.h |  171 ++++++++++++++++++++++++++++++++++++++
 mm/backing-dev.c                 |    3 +
 3 files changed, 204 insertions(+), 10 deletions(-)
 create mode 100644 include/trace/events/writeback.h

Comments

Christoph Hellwig May 25, 2010, 11:13 a.m. UTC | #1
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/writeback.h>

It might be worth to add a comment why this is in an unusal place.

Otherwise looks good,


Reviewed-by: Christoph Hellwig <hch@lst.de>
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andrew Morton May 27, 2010, 9:32 p.m. UTC | #2
On Tue, 25 May 2010 20:54:07 +1000
Dave Chinner <david@fromorbit.com> wrote:

> From: From: Jens Axboe <jens.axboe@oracle.com>
> 
> Trace queue/sched/exec parts of the writeback loop.

It would be most useful if this patchset's description provided sample
tracing output, so we can see what the patch is actually providing us.

> -#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
> -
> -/*
> - * We don't actually have pdflush, but this one is exported though /proc...
> - */
> -int nr_pdflush_threads;
> -
>  /*
>   * Passed into wb_writeback(), essentially a subset of writeback_control
>   */
> @@ -63,6 +57,16 @@ struct bdi_work {
>  	unsigned long state;		/* flag bits, see WS_* */
>  };
>  
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/writeback.h>
> +
> +#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)

Could/should be implemented in C.

> +/*
> + * We don't actually have pdflush, but this one is exported though /proc...
> + */
> +int nr_pdflush_threads;

So this is always zero now?

We don't want to keep it forever.  Add a
printk_once("nr_pdflush_threads is deprecated") when someone reads it,
remove it in 2014.

>
> ...
>
> --- /dev/null
> +++ b/include/trace/events/writeback.h
> @@ -0,0 +1,171 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM writeback
> +
> +#if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ)

Seems wrong.  If you define TRACE_HEADER_MULTI_READ then include this
header twice, things explode.  Which negates the purpose of
_TRACE_WRITEBACK_H.

> +#define _TRACE_WRITEBACK_H
> +
> +#include <linux/backing-dev.h>
> +#include <linux/writeback.h>
> +
> +TRACE_EVENT(writeback_queue,
> +
> +	TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_args *args),
> +
> +	TP_ARGS(bdi, args),
> +
> +	TP_STRUCT__entry(
> +		__array(char,		name,		16)
> +		__field(long,		nr_pages)
> +		__field(int,		sb)
> +		__field(int,		sync_mode)
> +		__field(int,		for_kupdate)
> +		__field(int,		range_cyclic)
> +		__field(int,		for_background)
> +	),
> +
> +	TP_fast_assign(
> +		strncpy(__entry->name, dev_name(bdi->dev), 16);
> +		__entry->nr_pages	= args->nr_pages;
> +		__entry->sb		= !!args->sb;
> +		__entry->for_kupdate	= args->for_kupdate;
> +		__entry->range_cyclic	= args->range_cyclic;
> +		__entry->for_background	= args->for_background;
> +	),
> +
> +	TP_printk("%s: pages=%ld, sb=%d, kupdate=%d, range_cyclic=%d "
> +		  "for_background=%d", __entry->name, __entry->nr_pages,
> +			__entry->sb, __entry->for_kupdate,
> +			__entry->range_cyclic, __entry->for_background)
> +);
> +

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Chinner May 28, 2010, 12:44 a.m. UTC | #3
On Thu, May 27, 2010 at 02:32:33PM -0700, Andrew Morton wrote:
> On Tue, 25 May 2010 20:54:07 +1000
> Dave Chinner <david@fromorbit.com> wrote:
> 
> > From: From: Jens Axboe <jens.axboe@oracle.com>
> > 
> > Trace queue/sched/exec parts of the writeback loop.
> 
> It would be most useful if this patchset's description provided sample
> tracing output, so we can see what the patch is actually providing us.

This is just a forward port of Jen's patch. I guess I'll have to
clean it up some more...

> 
> > -#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
> > -
> > -/*
> > - * We don't actually have pdflush, but this one is exported though /proc...
> > - */
> > -int nr_pdflush_threads;
> > -
> >  /*
> >   * Passed into wb_writeback(), essentially a subset of writeback_control
> >   */
> > @@ -63,6 +57,16 @@ struct bdi_work {
> >  	unsigned long state;		/* flag bits, see WS_* */
> >  };
> >  
> > +#define CREATE_TRACE_POINTS
> > +#include <trace/events/writeback.h>
> > +
> > +#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
> 
> Could/should be implemented in C.

OK.

> 
> > +/*
> > + * We don't actually have pdflush, but this one is exported though /proc...
> > + */
> > +int nr_pdflush_threads;
> 
> So this is always zero now?

I guess so. I'd forgotten that this was in the original patch....

> We don't want to keep it forever.  Add a
> printk_once("nr_pdflush_threads is deprecated") when someone reads it,
> remove it in 2014.

OK.

> > --- /dev/null
> > +++ b/include/trace/events/writeback.h
> > @@ -0,0 +1,171 @@
> > +#undef TRACE_SYSTEM
> > +#define TRACE_SYSTEM writeback
> > +
> > +#if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ)
> 
> Seems wrong.  If you define TRACE_HEADER_MULTI_READ then include this
> header twice, things explode.  Which negates the purpose of
> _TRACE_WRITEBACK_H.

Every other trace event header does this, so if it's wrong then the
same mistake is in at least 30 files now. I don't know enough about
the tracing code to know why this is done, and I'm not keen to
address such a mistake here...

Cheers,

Dave.
Steven Rostedt May 28, 2010, 1:18 a.m. UTC | #4
On Thu, 2010-05-27 at 14:32 -0700, Andrew Morton wrote:

> > --- /dev/null
> > +++ b/include/trace/events/writeback.h
> > @@ -0,0 +1,171 @@
> > +#undef TRACE_SYSTEM
> > +#define TRACE_SYSTEM writeback
> > +
> > +#if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ)
> 
> Seems wrong.  If you define TRACE_HEADER_MULTI_READ then include this
> header twice, things explode.  Which negates the purpose of
> _TRACE_WRITEBACK_H.

That's intended. It is documented in
samples/trace_events/trace-events-samples.h

The purpose of the TRACE_HEADER_MULTI_READ is to read the trace header
multi times. ;-)


You can also read about it here:

 http://lwn.net/Articles/379903/

here:

  http://lwn.net/Articles/381064/

and here:

   http://lwn.net/Articles/383362/

-- Steve

> 
> > +#define _TRACE_WRITEBACK_H
> > +
> > +#include <linux/backing-dev.h>
> > +#include <linux/writeback.h>
> > +

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steven Rostedt May 28, 2010, 1:20 a.m. UTC | #5
On Fri, 2010-05-28 at 10:44 +1000, Dave Chinner wrote:
> On Thu, May 27, 2010 at 02:32:33PM -0700, Andrew Morton wrote:
> >
> > > --- /dev/null
> > > +++ b/include/trace/events/writeback.h
> > > @@ -0,0 +1,171 @@
> > > +#undef TRACE_SYSTEM
> > > +#define TRACE_SYSTEM writeback
> > > +
> > > +#if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ)
> > 
> > Seems wrong.  If you define TRACE_HEADER_MULTI_READ then include this
> > header twice, things explode.  Which negates the purpose of
> > _TRACE_WRITEBACK_H.
> 
> Every other trace event header does this, so if it's wrong then the
> same mistake is in at least 30 files now. I don't know enough about
> the tracing code to know why this is done, and I'm not keen to
> address such a mistake here...

It's all part of the CPP voodoo ritual. You don't want to know why, just
do it, otherwise you may find dancing CPP skulls running around your
bathtub.

If you want to be a CPP Voodoo Witch doctor too, read up on LWN, I
describe some of the magic there.

-- Steve


--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig May 28, 2010, 7:45 a.m. UTC | #6
On Thu, May 27, 2010 at 02:32:33PM -0700, Andrew Morton wrote:
> > +#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
> 
> Could/should be implemented in C.
> 
> > +/*
> > + * We don't actually have pdflush, but this one is exported though /proc...
> > + */
> > +int nr_pdflush_threads;
> 
> So this is always zero now?
> 
> We don't want to keep it forever.  Add a
> printk_once("nr_pdflush_threads is deprecated") when someone reads it,
> remove it in 2014.

These two lines are just moved down a bit by the patch, I think any
cleanups should be left to separate patches.

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index b1e76ef..b44aba6 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -26,15 +26,9 @@ 
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
+#include <linux/ftrace.h>
 #include "internal.h"
 
-#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
-
-/*
- * We don't actually have pdflush, but this one is exported though /proc...
- */
-int nr_pdflush_threads;
-
 /*
  * Passed into wb_writeback(), essentially a subset of writeback_control
  */
@@ -63,6 +57,16 @@  struct bdi_work {
 	unsigned long state;		/* flag bits, see WS_* */
 };
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/writeback.h>
+
+#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
+
+/*
+ * We don't actually have pdflush, but this one is exported though /proc...
+ */
+int nr_pdflush_threads;
+
 enum {
 	WS_USED_B = 0,
 	WS_ONSTACK_B,
@@ -137,6 +141,8 @@  static void wb_work_complete(struct bdi_work *work)
 
 static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
 {
+	trace_writeback_clear(work);
+
 	/*
 	 * The caller has retrieved the work arguments from this work,
 	 * drop our reference. If this is the last ref, delete and free it
@@ -172,12 +178,16 @@  static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
 	 * If the default thread isn't there, make sure we add it. When
 	 * it gets created and wakes up, we'll run this work.
 	 */
-	if (unlikely(list_empty_careful(&bdi->wb_list)))
+	if (unlikely(list_empty_careful(&bdi->wb_list))) {
+		trace_writeback_sched(bdi, work, "default");
 		wake_up_process(default_backing_dev_info.wb.task);
-	else {
+	} else {
 		struct bdi_writeback *wb = &bdi->wb;
+		struct task_struct *task = wb->task;
 
-		if (wb->task)
+		trace_writeback_sched(bdi, work, task ? "task" : "notask");
+
+		if (task)
 			wake_up_process(wb->task);
 	}
 }
@@ -205,6 +215,7 @@  static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
 	work = kmalloc(sizeof(*work), GFP_ATOMIC);
 	if (work) {
 		bdi_work_init(work, args);
+		trace_writeback_queue(bdi, args);
 		bdi_queue_work(bdi, work);
 		if (wait)
 			bdi_wait_on_work_clear(work);
@@ -245,6 +256,7 @@  static void bdi_sync_writeback(struct backing_dev_info *bdi,
 	bdi_work_init(&work, &args);
 	work.state |= WS_ONSTACK;
 
+	trace_writeback_queue(bdi, &args);
 	bdi_queue_work(bdi, &work);
 	bdi_wait_on_work_clear(&work);
 }
@@ -914,6 +926,8 @@  long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 		struct wb_writeback_args args = work->args;
 		int post_clear;
 
+		trace_writeback_exec(work);
+
 		/*
 		 * Override sync mode, in case we must wait for completion
 		 */
@@ -957,9 +971,13 @@  int bdi_writeback_task(struct bdi_writeback *wb)
 	unsigned long wait_jiffies = -1UL;
 	long pages_written;
 
+	trace_writeback_thread_start(1);
+
 	while (!kthread_should_stop()) {
 		pages_written = wb_do_writeback(wb, 0);
 
+		trace_writeback_pages_written(pages_written);
+
 		if (pages_written)
 			last_active = jiffies;
 		else if (wait_jiffies != -1UL) {
@@ -989,6 +1007,8 @@  int bdi_writeback_task(struct bdi_writeback *wb)
 		try_to_freeze();
 	}
 
+	trace_writeback_thread_start(0);
+
 	return 0;
 }
 
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
new file mode 100644
index 0000000..df76457
--- /dev/null
+++ b/include/trace/events/writeback.h
@@ -0,0 +1,171 @@ 
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM writeback
+
+#if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WRITEBACK_H
+
+#include <linux/backing-dev.h>
+#include <linux/writeback.h>
+
+TRACE_EVENT(writeback_queue,
+
+	TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_args *args),
+
+	TP_ARGS(bdi, args),
+
+	TP_STRUCT__entry(
+		__array(char,		name,		16)
+		__field(long,		nr_pages)
+		__field(int,		sb)
+		__field(int,		sync_mode)
+		__field(int,		for_kupdate)
+		__field(int,		range_cyclic)
+		__field(int,		for_background)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 16);
+		__entry->nr_pages	= args->nr_pages;
+		__entry->sb		= !!args->sb;
+		__entry->for_kupdate	= args->for_kupdate;
+		__entry->range_cyclic	= args->range_cyclic;
+		__entry->for_background	= args->for_background;
+	),
+
+	TP_printk("%s: pages=%ld, sb=%d, kupdate=%d, range_cyclic=%d "
+		  "for_background=%d", __entry->name, __entry->nr_pages,
+			__entry->sb, __entry->for_kupdate,
+			__entry->range_cyclic, __entry->for_background)
+);
+
+TRACE_EVENT(writeback_sched,
+
+	TP_PROTO(struct backing_dev_info *bdi, struct bdi_work *work,
+		 const char *msg),
+
+	TP_ARGS(bdi, work, msg),
+
+	TP_STRUCT__entry(
+		__array(char,			name,		16)
+		__field(unsigned int,		work)
+		__array(char,			task,		8)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 16);
+		__entry->work = (unsigned long) work & 0xffff;
+		snprintf(__entry->task, 8, "%s", msg);
+	),
+
+	TP_printk("work=%x, task=%s", __entry->work, __entry->task)
+);
+
+TRACE_EVENT(writeback_exec,
+
+	TP_PROTO(struct bdi_work *work),
+
+	TP_ARGS(work),
+
+	TP_STRUCT__entry(
+		__field(unsigned int,	work)
+		__field(long,		nr_pages)
+		__field(int,		sb)
+		__field(int,		sync_mode)
+		__field(int,		for_kupdate)
+		__field(int,		range_cyclic)
+		__field(int,		for_background)
+	),
+
+	TP_fast_assign(
+		__entry->work		= (unsigned long) work & 0xffff;
+		__entry->nr_pages	= work->args.nr_pages;
+		__entry->sb		= !!work->args.sb;
+		__entry->for_kupdate	= work->args.for_kupdate;
+		__entry->range_cyclic	= work->args.range_cyclic;
+		__entry->for_background	= work->args.for_background;
+
+	),
+
+	TP_printk("work=%x pages=%ld, sb=%d, kupdate=%d, range_cyclic=%d"
+		  " for_background=%d", __entry->work,
+			__entry->nr_pages, __entry->sb, __entry->for_kupdate,
+			__entry->range_cyclic, __entry->for_background)
+);
+
+TRACE_EVENT(writeback_clear,
+
+	TP_PROTO(struct bdi_work *work),
+
+	TP_ARGS(work),
+
+	TP_STRUCT__entry(
+		__field(struct bdi_work *,	work)
+		__field(int,			refs)
+	),
+
+	TP_fast_assign(
+		__entry->work		= work;
+		__entry->refs		= atomic_read(&work->pending);
+	),
+
+	TP_printk("work=%p, refs=%d", __entry->work, __entry->refs)
+);
+
+TRACE_EVENT(writeback_pages_written,
+
+	TP_PROTO(long pages_written),
+
+	TP_ARGS(pages_written),
+
+	TP_STRUCT__entry(
+		__field(long,		pages)
+	),
+
+	TP_fast_assign(
+		__entry->pages		= pages_written;
+	),
+
+	TP_printk("%ld", __entry->pages)
+);
+
+
+TRACE_EVENT(writeback_thread_start,
+
+	TP_PROTO(int start),
+
+	TP_ARGS(start),
+
+	TP_STRUCT__entry(
+		__field(int,	start)
+	),
+
+	TP_fast_assign(
+		__entry->start = start;
+	),
+
+	TP_printk("%s", __entry->start ? "started" : "exited")
+);
+
+TRACE_EVENT(writeback_bdi_register,
+
+	TP_PROTO(const char *name, int start),
+
+	TP_ARGS(name, start),
+
+	TP_STRUCT__entry(
+		__array(char,	name,		16)
+		__field(int,	start)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->name, name, 16);
+		__entry->start = start;
+	),
+
+	TP_printk("%s: %s", __entry->name,
+			__entry->start ? "registered" : "unregistered")
+);
+#endif /* _TRACE_WRITEBACK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 660a87a..1f7723b 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -10,6 +10,7 @@ 
 #include <linux/module.h>
 #include <linux/writeback.h>
 #include <linux/device.h>
+#include <trace/events/writeback.h>
 
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
 
@@ -585,6 +586,7 @@  int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 
 	bdi_debug_register(bdi, dev_name(dev));
 	set_bit(BDI_registered, &bdi->state);
+	trace_writeback_bdi_register(dev_name(dev), 1);
 exit:
 	return ret;
 }
@@ -647,6 +649,7 @@  static void bdi_prune_sb(struct backing_dev_info *bdi)
 void bdi_unregister(struct backing_dev_info *bdi)
 {
 	if (bdi->dev) {
+		trace_writeback_bdi_register(dev_name(bdi->dev), 0);
 		bdi_prune_sb(bdi);
 
 		if (!bdi_cap_flush_forker(bdi))