diff mbox

[RFC] sputrace: use the generic event tracer

Message ID 20090506102918.GA23278@lst.de
State New
Headers show

Commit Message

Christoph Hellwig May 6, 2009, 10:29 a.m. UTC
I wrote sputrace before generic tracing infrastrucure was available.
Now that we have the generic event tracer we can convert it over and
remove a lot of code:

  8 files changed, 45 insertions(+), 285 deletions(-)

To use it make sure CONFIG_EVENT_TRACING is enabled and then enable
the spufs trace channel by

  echo 1 > /sys/kernel/debug/tracing/events/spufs/spufs_context

and then read the trace records using e.g.

  cat /sys/kernel/debug/tracing/trace


Note that the patch is ontop of the current tracing tree

  git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git#tracing/ftrace

as there have been some TRACE_EVENT changes in there that aren't in
mainline yet.

I don't have any cell hardware anymore so this is only compile tested.


Signed-off-by: Christoph Hellwig <hch@lst.de>

Comments

Ingo Molnar May 6, 2009, 10:57 a.m. UTC | #1
(Note: please Cc all tracing related patches to lkml.)

* Christoph Hellwig <hch@lst.de> wrote:

> I wrote sputrace before generic tracing infrastrucure was 
> available. Now that we have the generic event tracer we can 
> convert it over and remove a lot of code:
> 
>   8 files changed, 45 insertions(+), 285 deletions(-)

Nice! Needs also an Ack from PowerPC folks before we can do this. 
The cross section to other powerpc code seems to be rather low.

A few comments:

> To use it make sure CONFIG_EVENT_TRACING is enabled and then enable
> the spufs trace channel by
> 
>   echo 1 > /sys/kernel/debug/tracing/events/spufs/spufs_context
> 
> and then read the trace records using e.g.
> 
>   cat /sys/kernel/debug/tracing/trace
> 
> 
> Note that the patch is ontop of the current tracing tree
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git#tracing/ftrace
> 
> as there have been some TRACE_EVENT changes in there that aren't in
> mainline yet.
> 
> I don't have any cell hardware anymore so this is only compile tested.
> 
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Index: linux-2.6-tip/arch/powerpc/platforms/cell/spufs/sputrace.h
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6-tip/arch/powerpc/platforms/cell/spufs/sputrace.h	2009-05-06 10:17:20.000000000 +0000
> @@ -0,0 +1,39 @@
> +#if !defined(_TRACE_SPUFS_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_SPUFS_H
> +
> +#include <linux/tracepoint.h>
> +
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM spufs
> +
> +TRACE_EVENT(spufs_context,
> +	TP_PROTO(struct spu_context *ctx, struct spu *spu, const char *name),
> +	TP_ARGS(ctx, spu, name),
> +
> +	TP_STRUCT__entry(
> +		__field(int, owner_tid)
> +		__field(const char *, name)
> +		__field(int, number)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->owner_tid = ctx->tid;
> +		__entry->name = name;
> +		__entry->number = spu ? spu->number : -1;
> +	),
> +
> +	TP_printk("%s (ctxthread = %d, spu = %d)",
> +		__entry->name, __entry->owner_tid, __entry->number)

Please keep the natural field ordering that is visible in the output 
format:

		__field(const char *, name)
		__field(int, owner_tid)
		__field(int, number)

This also has the advantage of (potentially) compressing the trace 
entry some more, as two int's are now following each other. The 
original sputrace record format had this order:

struct sputrace {
	ktime_t tstamp;
	int owner_tid; /* owner */
	int curr_tid;
	const char *name;
	int number;
};

There 'name' and 'curr_tid' was probably flipped around because 
'curr_tid' was there too. In the event tracer the PID is at the head 
of the record.

> +);
> +
> +#define spu_context_trace(name, ctx, spu) \
> +	trace_spufs_context(ctx, spu, __stringify(name))
> +#define spu_context_nospu_trace(name, ctx) \
> +	trace_spufs_context(ctx, NULL, __stringify(name))
> +
> +#endif /* _TRACE_SPUFS_H */
> +
> +#undef TRACE_INCLUDE_PATH
> +#define TRACE_INCLUDE_PATH .
> +#define TRACE_INCLUDE_FILE sputrace
> +#include <trace/define_trace.h>
> Index: linux-2.6-tip/arch/powerpc/platforms/cell/spufs/spufs.h
> ===================================================================
> --- linux-2.6-tip.orig/arch/powerpc/platforms/cell/spufs/spufs.h	2009-05-06 10:14:40.000000000 +0000
> +++ linux-2.6-tip/arch/powerpc/platforms/cell/spufs/spufs.h	2009-05-06 10:20:14.000000000 +0000
> @@ -373,9 +373,4 @@
>  extern void spuctx_switch_state(struct spu_context *ctx,
>  		enum spu_utilization_state new_state);
>  
> -#define spu_context_trace(name, ctx, spu) \
> -	trace_mark(name, "ctx %p spu %p", ctx, spu);
> -#define spu_context_nospu_trace(name, ctx) \
> -	trace_mark(name, "ctx %p", ctx);
> -
>  #endif
> Index: linux-2.6-tip/arch/powerpc/platforms/cell/Kconfig
> ===================================================================
> --- linux-2.6-tip.orig/arch/powerpc/platforms/cell/Kconfig	2009-05-06 10:17:46.000000000 +0000
> +++ linux-2.6-tip/arch/powerpc/platforms/cell/Kconfig	2009-05-06 10:17:50.000000000 +0000
> @@ -77,13 +77,6 @@
>  	  uses 4K pages. This can improve performances of applications
>  	  using multiple SPEs by lowering the TLB pressure on them.
>  
> -config SPU_TRACE
> -	tristate "SPU event tracing support"
> -	depends on SPU_FS && MARKERS
> -	help
> -	  This option allows reading a trace of spu-related events through
> -	  the sputrace file in procfs.

I think we should keep this option around.

> -
>  config SPU_BASE
>  	bool
>  	default n
> Index: linux-2.6-tip/arch/powerpc/platforms/cell/spufs/Makefile
> ===================================================================
> --- linux-2.6-tip.orig/arch/powerpc/platforms/cell/spufs/Makefile	2009-05-06 10:17:56.000000000 +0000
> +++ linux-2.6-tip/arch/powerpc/platforms/cell/spufs/Makefile	2009-05-06 10:22:23.000000000 +0000
> @@ -4,7 +4,8 @@
>  spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o
>  spufs-y += switch.o fault.o lscsa_alloc.o
>  
> -obj-$(CONFIG_SPU_TRACE)	+= sputrace.o
> +# magic for the trace events
> +CFLAGS_sched.o := -I$(src)

Steve, i'm wondering whether this type of Makefile hackery (caused 
by modular tracepoints) could be eliminated ...

Anyway, nice patch!

	Ingo
Ingo Molnar May 6, 2009, 10:58 a.m. UTC | #2
* Christoph Hellwig <hch@lst.de> wrote:

> Index: linux-2.6-tip/arch/powerpc/platforms/cell/spufs/sputrace.c
> ===================================================================
> --- linux-2.6-tip.orig/arch/powerpc/platforms/cell/spufs/sputrace.c	2009-05-06 10:18:03.000000000 +0000
> +++ /dev/null	1970-01-01 00:00:00.000000000 +0000
> @@ -1,272 +0,0 @@
> -/*
> - * Copyright (C) 2007 IBM Deutschland Entwicklung GmbH
> - *	Released under GPL v2.
> - *
> - * Partially based on net/ipv4/tcp_probe.c.

btw., net/ipv4/tcp_probe.c would be another potential candidate to 
convert to tracepoints.

	Ingo
Christoph Hellwig May 6, 2009, 11:02 a.m. UTC | #3
On Wed, May 06, 2009 at 12:57:48PM +0200, Ingo Molnar wrote:
> Nice! Needs also an Ack from PowerPC folks before we can do this. 
> The cross section to other powerpc code seems to be rather low.

cbe-oss-dev is the Cell list.

> > -config SPU_TRACE
> > -	tristate "SPU event tracing support"
> > -	depends on SPU_FS && MARKERS
> > -	help
> > -	  This option allows reading a trace of spu-related events through
> > -	  the sputrace file in procfs.
> 
> I think we should keep this option around.

Why?  trace_events that aren't enabled are extremly low overhead.
And most in the current tree are non-optional.

> > +# magic for the trace events
> > +CFLAGS_sched.o := -I$(src)
> 
> Steve, i'm wondering whether this type of Makefile hackery (caused 
> by modular tracepoints) could be eliminated ...

We would just have to include the header file with "" instead of <>.
But I remember Steve not liking this when we talked about it.
Ingo Molnar May 6, 2009, 11:23 a.m. UTC | #4
* Christoph Hellwig <hch@lst.de> wrote:

> On Wed, May 06, 2009 at 12:57:48PM +0200, Ingo Molnar wrote:
> > Nice! Needs also an Ack from PowerPC folks before we can do this. 
> > The cross section to other powerpc code seems to be rather low.
> 
> cbe-oss-dev is the Cell list.

( ... and linuxppc-dev. And since it's all a sub-architecture of 
  PowerPC and Cell changes go upstream via the PowerPC tree, it's 
  nice to have the attention (and acks) of generic-arch maintainers 
  as well (if they are interested). And even if it's OK, Ben and 
  Arnd obviously calls the shots when it comes to workflow details: 
  in which tree and how to queue it up. )

> > > -config SPU_TRACE
> > > -	tristate "SPU event tracing support"
> > > -	depends on SPU_FS && MARKERS
> > > -	help
> > > -	  This option allows reading a trace of spu-related events through
> > > -	  the sputrace file in procfs.
> > 
> > I think we should keep this option around.
> 
> Why?  trace_events that aren't enabled are extremly low overhead.
> And most in the current tree are non-optional.

It is general curtesy to maintain the old Kconfig structure and ease 
migration to a new facility.

Part of the "being nice" excercise ;-)

> > > +# magic for the trace events
> > > +CFLAGS_sched.o := -I$(src)
> > 
> > Steve, i'm wondering whether this type of Makefile hackery (caused 
> > by modular tracepoints) could be eliminated ...
> 
> We would just have to include the header file with "" instead of 
> <>. But I remember Steve not liking this when we talked about it.

Yeah. But changing Makefiles isnt particularly clean either ...

And adding -I$(src) can have side-effects: we often have a local 
foo.h while an include/linux/foo.h as well.

	Ingo
Steven Rostedt May 6, 2009, 1:54 p.m. UTC | #5
On Wed, 2009-05-06 at 13:23 +0200, Ingo Molnar wrote:

> > > > +# magic for the trace events
> > > > +CFLAGS_sched.o := -I$(src)
> > > 
> > > Steve, i'm wondering whether this type of Makefile hackery (caused 
> > > by modular tracepoints) could be eliminated ...
> > 
> > We would just have to include the header file with "" instead of 
> > <>. But I remember Steve not liking this when we talked about it.
> 
> Yeah. But changing Makefiles isnt particularly clean either ...
> 
> And adding -I$(src) can have side-effects: we often have a local 
> foo.h while an include/linux/foo.h as well.

That still would not conflict, because

#include "foo.h"

will not include "linux/foo.h" and

#include <linux/foo.h>

will not include a local foo.h, unless there's also a local "linux"
directory with a foo.h in it.


The Makefile hack has to do with being able to have the "foo.h" file
with the TRACE_EVENTs someplace other than include/trace.

If the "foo.h" is in include/trace.h we do not need to include this
hack. But because the include/trace/define_trace.h needs to include the
"foo.h" file recursively, it must be able to find it. If we do not add a
search path, include/trace/define_trace.h will not look in the other
locations.

Note, as Christoph did, we only need to add the include path to the file
that defines "CREATE_TRACE_POINTS". Which is only one file.

CFLAGS_sched.o := -I$(src)

Only touches the sched.c file in that directory (Note, for those reading
this thread out of context, this is not the same file as kernel/sched.c)

-- Steve
Frédéric Weisbecker May 6, 2009, 4:53 p.m. UTC | #6
On Wed, May 06, 2009 at 12:57:48PM +0200, Ingo Molnar wrote:
> 
> (Note: please Cc all tracing related patches to lkml.)
> 
> * Christoph Hellwig <hch@lst.de> wrote:
> 
> > I wrote sputrace before generic tracing infrastrucure was 
> > available. Now that we have the generic event tracer we can 
> > convert it over and remove a lot of code:
> > 
> >   8 files changed, 45 insertions(+), 285 deletions(-)
> 
> Nice! Needs also an Ack from PowerPC folks before we can do this. 
> The cross section to other powerpc code seems to be rather low.
> 
> A few comments:
> 
> > To use it make sure CONFIG_EVENT_TRACING is enabled and then enable
> > the spufs trace channel by
> > 
> >   echo 1 > /sys/kernel/debug/tracing/events/spufs/spufs_context
> > 
> > and then read the trace records using e.g.
> > 
> >   cat /sys/kernel/debug/tracing/trace
> > 
> > 
> > Note that the patch is ontop of the current tracing tree
> > 
> >   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git#tracing/ftrace
> > 
> > as there have been some TRACE_EVENT changes in there that aren't in
> > mainline yet.
> > 
> > I don't have any cell hardware anymore so this is only compile tested.
> > 
> > 
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > 
> > Index: linux-2.6-tip/arch/powerpc/platforms/cell/spufs/sputrace.h
> > ===================================================================
> > --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> > +++ linux-2.6-tip/arch/powerpc/platforms/cell/spufs/sputrace.h	2009-05-06 10:17:20.000000000 +0000
> > @@ -0,0 +1,39 @@
> > +#if !defined(_TRACE_SPUFS_H) || defined(TRACE_HEADER_MULTI_READ)
> > +#define _TRACE_SPUFS_H
> > +
> > +#include <linux/tracepoint.h>
> > +
> > +#undef TRACE_SYSTEM
> > +#define TRACE_SYSTEM spufs
> > +
> > +TRACE_EVENT(spufs_context,
> > +	TP_PROTO(struct spu_context *ctx, struct spu *spu, const char *name),
> > +	TP_ARGS(ctx, spu, name),
> > +
> > +	TP_STRUCT__entry(
> > +		__field(int, owner_tid)
> > +		__field(const char *, name)


Christoph,

I don't know much the code you are tracing. But it is rare that
a const char * is safe on tracing. Still it could be, you just have to
ensure the string cannot be freed in any way because this pointer
will be stored in the ring buffer and it can be read and dereferenced later
in a random time, could be several years :-)

So if this pointer references built-in data, no problem with that.
But if it can freed (comes from a module, __initdata, ...), then
you should use the __string() field which does an strcpy on the
ring buffer.

If you think this is safe, then it's the best choice because
storing a pointer is of course less costly than an strcpy.
If so I will add the support for char * in the filters (trivial).

Thanks,

Frederic.
Christoph Hellwig May 6, 2009, 5:13 p.m. UTC | #7
On Wed, May 06, 2009 at 06:53:37PM +0200, Frederic Weisbecker wrote:
> I don't know much the code you are tracing. But it is rare that
> a const char * is safe on tracing. Still it could be, you just have to
> ensure the string cannot be freed in any way because this pointer
> will be stored in the ring buffer and it can be read and dereferenced later
> in a random time, could be several years :-)
> 
> So if this pointer references built-in data, no problem with that.
> But if it can freed (comes from a module, __initdata, ...), then
> you should use the __string() field which does an strcpy on the
> ring buffer.
> 
> If you think this is safe, then it's the best choice because
> storing a pointer is of course less costly than an strcpy.
> If so I will add the support for char * in the filters (trivial).

The pointer here only ever references string constants, it's always
a string literal in the callers.
Steven Rostedt May 6, 2009, 7:05 p.m. UTC | #8
On Wed, 2009-05-06 at 19:13 +0200, Christoph Hellwig wrote:
> On Wed, May 06, 2009 at 06:53:37PM +0200, Frederic Weisbecker wrote:
> > I don't know much the code you are tracing. But it is rare that
> > a const char * is safe on tracing. Still it could be, you just have to
> > ensure the string cannot be freed in any way because this pointer
> > will be stored in the ring buffer and it can be read and dereferenced later
> > in a random time, could be several years :-)
> > 
> > So if this pointer references built-in data, no problem with that.
> > But if it can freed (comes from a module, __initdata, ...), then
> > you should use the __string() field which does an strcpy on the
> > ring buffer.
> > 
> > If you think this is safe, then it's the best choice because
> > storing a pointer is of course less costly than an strcpy.
> > If so I will add the support for char * in the filters (trivial).
> 
> The pointer here only ever references string constants, it's always
> a string literal in the callers.

The worry is if this is used by modules. A constant string may not be
around when the buffer is read.

This should not be a problem because the formatting of the string is not
around either, and we just output 'unknown type'.  But I may be adding
code when a module is unloaded to reset the ring buffer if the module
registered any events. That's because we have other races to worry
about.

-- Steve
Jeremy Kerr May 7, 2009, 1:34 a.m. UTC | #9
Hi Christoph,

Nice work!

We'd just need to keep in sync with the performance folks who use the 
sputrace data. I don't imagine that the changes would be complex on 
their end though.

I'm happy to merge into the spufs tree, pending the ftrace dependencies. 
Or would this be better going in with those?

Cheers,


Jeremy
Ingo Molnar May 7, 2009, 8:56 a.m. UTC | #10
* Jeremy Kerr <jk@ozlabs.org> wrote:

> Hi Christoph,
> 
> Nice work!
> 
> We'd just need to keep in sync with the performance folks who use 
> the sputrace data. I don't imagine that the changes would be 
> complex on their end though.
> 
> I'm happy to merge into the spufs tree, pending the ftrace 
> dependencies. Or would this be better going in with those?

If you think there's not going to be big ongoing conflicts in this 
specific code in the next month leading up to the .31 merge window, 
and if you ack all the changes, i'd be glad to merge this patch into 
the tracing tree.

I do daily builds of powerpc defconfig, which has these spufs 
settings:

 #
 # Cell Broadband Engine options
 #
 CONFIG_SPU_FS=m
 CONFIG_SPU_FS_64K_LS=y
 # CONFIG_SPU_TRACE is not set

so spufs is build-tested, but spu-trace is not - but with 
Christoph's patch the tracing facility would be available 
unconditionally, so that would get (build) tested as well,
and propagated to linux-next.

	Ingo
Christoph Hellwig May 10, 2009, 7:07 p.m. UTC | #11
On Wed, May 06, 2009 at 03:05:39PM -0400, Steven Rostedt wrote:
> The worry is if this is used by modules. A constant string may not be
> around when the buffer is read.
> 
> This should not be a problem because the formatting of the string is not
> around either, and we just output 'unknown type'.  But I may be adding
> code when a module is unloaded to reset the ring buffer if the module
> registered any events. That's because we have other races to worry
> about.

I think having some constant string description for trace events is
pretty common and used in just above any ad-hoc tracer I've seen,
and just storing the pointers makes this a lot more efficient.

So either we try to make sure this works, or we need some big waivers
in the documentation that people need to use the slower __string
variant.
Christoph Hellwig May 10, 2009, 7:11 p.m. UTC | #12
On Thu, May 07, 2009 at 11:34:10AM +1000, Jeremy Kerr wrote:
> Hi Christoph,
> 
> Nice work!
> 
> We'd just need to keep in sync with the performance folks who use the 
> sputrace data. I don't imagine that the changes would be complex on 
> their end though.
> 
> I'm happy to merge into the spufs tree, pending the ftrace dependencies. 
> Or would this be better going in with those?

For me life would be simpler if goes in via the ftrace tree, so if you
don't have any changes conflicting with it pending (and I kept the
wrapper macros for the tracepoints to keep that chance low) I'd go down
the route.

Btw, do you want to keep the config option or are you fine with removing
it?
Christoph Hellwig June 4, 2009, 12:51 p.m. UTC | #13
On Thu, May 07, 2009 at 11:34:10AM +1000, Jeremy Kerr wrote:
> Hi Christoph,
> 
> Nice work!
> 
> We'd just need to keep in sync with the performance folks who use the 
> sputrace data. I don't imagine that the changes would be complex on 
> their end though.
> 
> I'm happy to merge into the spufs tree, pending the ftrace dependencies. 
> Or would this be better going in with those?

As we're getting to the end of the subsystem merge window for 2.6.31
what's the feedback from the performance folks?

If everyone is fine with the patch I'll respin it with the field
re-ordering suggested by Ingo and then we can put it in the tracing
tree.
Ingo Molnar June 7, 2009, 10:30 a.m. UTC | #14
* Christoph Hellwig <hch@lst.de> wrote:

> On Thu, May 07, 2009 at 11:34:10AM +1000, Jeremy Kerr wrote:
> > Hi Christoph,
> > 
> > Nice work!
> > 
> > We'd just need to keep in sync with the performance folks who use the 
> > sputrace data. I don't imagine that the changes would be complex on 
> > their end though.
> > 
> > I'm happy to merge into the spufs tree, pending the ftrace dependencies. 
> > Or would this be better going in with those?
> 
> As we're getting to the end of the subsystem merge window for 
> 2.6.31 what's the feedback from the performance folks?
> 
> If everyone is fine with the patch I'll respin it with the field 
> re-ordering suggested by Ingo and then we can put it in the 
> tracing tree.

either solution is fine with me. (It could also be pushed out of 
order in the merge window, after the powerpc and the tracing tree.)

	Ingo
Jeremy Kerr June 11, 2009, 12:18 a.m. UTC | #15
Hi Christoph,

> As we're getting to the end of the subsystem merge window for 2.6.31
> what's the feedback from the performance folks?

Still no answer, I say merge it.

Cheers,


Jeremy
Christoph Hellwig June 16, 2009, 2:56 p.m. UTC | #16
On Thu, Jun 11, 2009 at 10:18:06AM +1000, Jeremy Kerr wrote:
> Hi Christoph,
> 
> > As we're getting to the end of the subsystem merge window for 2.6.31
> > what's the feedback from the performance folks?
> 
> Still no answer, I say merge it.

Now that all pre-requisites are in mainline can you just pull in through
the spufs tree?  Would be great to still get it into 2.6.31 so we can
kill markers entirely this merge window.
diff mbox

Patch

Index: linux-2.6-tip/arch/powerpc/platforms/cell/spufs/sputrace.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-tip/arch/powerpc/platforms/cell/spufs/sputrace.h	2009-05-06 10:17:20.000000000 +0000
@@ -0,0 +1,39 @@ 
+#if !defined(_TRACE_SPUFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SPUFS_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM spufs
+
+TRACE_EVENT(spufs_context,
+	TP_PROTO(struct spu_context *ctx, struct spu *spu, const char *name),
+	TP_ARGS(ctx, spu, name),
+
+	TP_STRUCT__entry(
+		__field(int, owner_tid)
+		__field(const char *, name)
+		__field(int, number)
+	),
+
+	TP_fast_assign(
+		__entry->owner_tid = ctx->tid;
+		__entry->name = name;
+		__entry->number = spu ? spu->number : -1;
+	),
+
+	TP_printk("%s (ctxthread = %d, spu = %d)",
+		__entry->name, __entry->owner_tid, __entry->number)
+);
+
+#define spu_context_trace(name, ctx, spu) \
+	trace_spufs_context(ctx, spu, __stringify(name))
+#define spu_context_nospu_trace(name, ctx) \
+	trace_spufs_context(ctx, NULL, __stringify(name))
+
+#endif /* _TRACE_SPUFS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE sputrace
+#include <trace/define_trace.h>
Index: linux-2.6-tip/arch/powerpc/platforms/cell/spufs/spufs.h
===================================================================
--- linux-2.6-tip.orig/arch/powerpc/platforms/cell/spufs/spufs.h	2009-05-06 10:14:40.000000000 +0000
+++ linux-2.6-tip/arch/powerpc/platforms/cell/spufs/spufs.h	2009-05-06 10:20:14.000000000 +0000
@@ -373,9 +373,4 @@ 
 extern void spuctx_switch_state(struct spu_context *ctx,
 		enum spu_utilization_state new_state);
 
-#define spu_context_trace(name, ctx, spu) \
-	trace_mark(name, "ctx %p spu %p", ctx, spu);
-#define spu_context_nospu_trace(name, ctx) \
-	trace_mark(name, "ctx %p", ctx);
-
 #endif
Index: linux-2.6-tip/arch/powerpc/platforms/cell/Kconfig
===================================================================
--- linux-2.6-tip.orig/arch/powerpc/platforms/cell/Kconfig	2009-05-06 10:17:46.000000000 +0000
+++ linux-2.6-tip/arch/powerpc/platforms/cell/Kconfig	2009-05-06 10:17:50.000000000 +0000
@@ -77,13 +77,6 @@ 
 	  uses 4K pages. This can improve performances of applications
 	  using multiple SPEs by lowering the TLB pressure on them.
 
-config SPU_TRACE
-	tristate "SPU event tracing support"
-	depends on SPU_FS && MARKERS
-	help
-	  This option allows reading a trace of spu-related events through
-	  the sputrace file in procfs.
-
 config SPU_BASE
 	bool
 	default n
Index: linux-2.6-tip/arch/powerpc/platforms/cell/spufs/Makefile
===================================================================
--- linux-2.6-tip.orig/arch/powerpc/platforms/cell/spufs/Makefile	2009-05-06 10:17:56.000000000 +0000
+++ linux-2.6-tip/arch/powerpc/platforms/cell/spufs/Makefile	2009-05-06 10:22:23.000000000 +0000
@@ -4,7 +4,8 @@ 
 spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o
 spufs-y += switch.o fault.o lscsa_alloc.o
 
-obj-$(CONFIG_SPU_TRACE)	+= sputrace.o
+# magic for the trace events
+CFLAGS_sched.o := -I$(src)
 
 # Rules to build switch.o with the help of SPU tool chain
 SPU_CROSS	:= spu-
Index: linux-2.6-tip/arch/powerpc/platforms/cell/spufs/sputrace.c
===================================================================
--- linux-2.6-tip.orig/arch/powerpc/platforms/cell/spufs/sputrace.c	2009-05-06 10:18:03.000000000 +0000
+++ /dev/null	1970-01-01 00:00:00.000000000 +0000
@@ -1,272 +0,0 @@ 
-/*
- * Copyright (C) 2007 IBM Deutschland Entwicklung GmbH
- *	Released under GPL v2.
- *
- * Partially based on net/ipv4/tcp_probe.c.
- *
- * Simple tracing facility for spu contexts.
- */
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/marker.h>
-#include <linux/proc_fs.h>
-#include <linux/wait.h>
-#include <asm/atomic.h>
-#include <asm/uaccess.h>
-#include "spufs.h"
-
-struct spu_probe {
-	const char *name;
-	const char *format;
-	marker_probe_func *probe_func;
-};
-
-struct sputrace {
-	ktime_t tstamp;
-	int owner_tid; /* owner */
-	int curr_tid;
-	const char *name;
-	int number;
-};
-
-static int bufsize __read_mostly = 16384;
-MODULE_PARM_DESC(bufsize, "Log buffer size (number of records)");
-module_param(bufsize, int, 0);
-
-
-static DEFINE_SPINLOCK(sputrace_lock);
-static DECLARE_WAIT_QUEUE_HEAD(sputrace_wait);
-static ktime_t sputrace_start;
-static unsigned long sputrace_head, sputrace_tail;
-static struct sputrace *sputrace_log;
-static int sputrace_logging;
-
-static int sputrace_used(void)
-{
-	return (sputrace_head - sputrace_tail) % bufsize;
-}
-
-static inline int sputrace_avail(void)
-{
-	return bufsize - sputrace_used();
-}
-
-static int sputrace_sprint(char *tbuf, int n)
-{
-	const struct sputrace *t = sputrace_log + sputrace_tail % bufsize;
-	struct timespec tv =
-		ktime_to_timespec(ktime_sub(t->tstamp, sputrace_start));
-
-	return snprintf(tbuf, n,
-		"[%lu.%09lu] %d: %s (ctxthread = %d, spu = %d)\n",
-		(unsigned long) tv.tv_sec,
-		(unsigned long) tv.tv_nsec,
-		t->curr_tid,
-		t->name,
-		t->owner_tid,
-		t->number);
-}
-
-static ssize_t sputrace_read(struct file *file, char __user *buf,
-		size_t len, loff_t *ppos)
-{
-	int error = 0, cnt = 0;
-
-	if (!buf || len < 0)
-		return -EINVAL;
-
-	while (cnt < len) {
-		char tbuf[128];
-		int width;
-
-		/* If we have data ready to return, don't block waiting
-		 * for more */
-		if (cnt > 0 && sputrace_used() == 0)
-			break;
-
-		error = wait_event_interruptible(sputrace_wait,
-						 sputrace_used() > 0);
-		if (error)
-			break;
-
-		spin_lock(&sputrace_lock);
-		if (sputrace_head == sputrace_tail) {
-			spin_unlock(&sputrace_lock);
-			continue;
-		}
-
-		width = sputrace_sprint(tbuf, sizeof(tbuf));
-		if (width < len)
-			sputrace_tail = (sputrace_tail + 1) % bufsize;
-		spin_unlock(&sputrace_lock);
-
-		if (width >= len)
-			break;
-
-		error = copy_to_user(buf + cnt, tbuf, width);
-		if (error)
-			break;
-		cnt += width;
-	}
-
-	return cnt == 0 ? error : cnt;
-}
-
-static int sputrace_open(struct inode *inode, struct file *file)
-{
-	int rc;
-
-	spin_lock(&sputrace_lock);
-	if (sputrace_logging) {
-		rc = -EBUSY;
-		goto out;
-	}
-
-	sputrace_logging = 1;
-	sputrace_head = sputrace_tail = 0;
-	sputrace_start = ktime_get();
-	rc = 0;
-
-out:
-	spin_unlock(&sputrace_lock);
-	return rc;
-}
-
-static int sputrace_release(struct inode *inode, struct file *file)
-{
-	spin_lock(&sputrace_lock);
-	sputrace_logging = 0;
-	spin_unlock(&sputrace_lock);
-	return 0;
-}
-
-static const struct file_operations sputrace_fops = {
-	.owner   = THIS_MODULE,
-	.open    = sputrace_open,
-	.read    = sputrace_read,
-	.release = sputrace_release,
-};
-
-static void sputrace_log_item(const char *name, struct spu_context *ctx,
-		struct spu *spu)
-{
-	spin_lock(&sputrace_lock);
-
-	if (!sputrace_logging) {
-		spin_unlock(&sputrace_lock);
-		return;
-	}
-
-	if (sputrace_avail() > 1) {
-		struct sputrace *t = sputrace_log + sputrace_head;
-
-		t->tstamp = ktime_get();
-		t->owner_tid = ctx->tid;
-		t->name = name;
-		t->curr_tid = current->pid;
-		t->number = spu ? spu->number : -1;
-
-		sputrace_head = (sputrace_head + 1) % bufsize;
-	} else {
-		printk(KERN_WARNING
-		       "sputrace: lost samples due to full buffer.\n");
-	}
-	spin_unlock(&sputrace_lock);
-
-	wake_up(&sputrace_wait);
-}
-
-static void spu_context_event(void *probe_private, void *call_data,
-		const char *format, va_list *args)
-{
-	struct spu_probe *p = probe_private;
-	struct spu_context *ctx;
-	struct spu *spu;
-
-	ctx = va_arg(*args, struct spu_context *);
-	spu = va_arg(*args, struct spu *);
-
-	sputrace_log_item(p->name, ctx, spu);
-}
-
-static void spu_context_nospu_event(void *probe_private, void *call_data,
-		const char *format, va_list *args)
-{
-	struct spu_probe *p = probe_private;
-	struct spu_context *ctx;
-
-	ctx = va_arg(*args, struct spu_context *);
-
-	sputrace_log_item(p->name, ctx, NULL);
-}
-
-struct spu_probe spu_probes[] = {
-	{ "spu_bind_context__enter", "ctx %p spu %p", spu_context_event },
-	{ "spu_unbind_context__enter", "ctx %p spu %p", spu_context_event },
-	{ "spu_get_idle__enter", "ctx %p", spu_context_nospu_event },
-	{ "spu_get_idle__found", "ctx %p spu %p", spu_context_event },
-	{ "spu_get_idle__not_found", "ctx %p", spu_context_nospu_event },
-	{ "spu_find_victim__enter", "ctx %p", spu_context_nospu_event },
-	{ "spusched_tick__preempt", "ctx %p spu %p", spu_context_event },
-	{ "spusched_tick__newslice", "ctx %p", spu_context_nospu_event },
-	{ "spu_yield__enter", "ctx %p", spu_context_nospu_event },
-	{ "spu_deactivate__enter", "ctx %p", spu_context_nospu_event },
-	{ "__spu_deactivate__unload", "ctx %p spu %p", spu_context_event },
-	{ "spufs_ps_fault__enter", "ctx %p", spu_context_nospu_event },
-	{ "spufs_ps_fault__sleep", "ctx %p", spu_context_nospu_event },
-	{ "spufs_ps_fault__wake", "ctx %p spu %p", spu_context_event },
-	{ "spufs_ps_fault__insert", "ctx %p spu %p", spu_context_event },
-	{ "spu_acquire_saved__enter", "ctx %p", spu_context_nospu_event },
-	{ "destroy_spu_context__enter", "ctx %p", spu_context_nospu_event },
-	{ "spufs_stop_callback__enter", "ctx %p spu %p", spu_context_event },
-};
-
-static int __init sputrace_init(void)
-{
-	struct proc_dir_entry *entry;
-	int i, error = -ENOMEM;
-
-	sputrace_log = kcalloc(bufsize, sizeof(struct sputrace), GFP_KERNEL);
-	if (!sputrace_log)
-		goto out;
-
-	entry = proc_create("sputrace", S_IRUSR, NULL, &sputrace_fops);
-	if (!entry)
-		goto out_free_log;
-
-	for (i = 0; i < ARRAY_SIZE(spu_probes); i++) {
-		struct spu_probe *p = &spu_probes[i];
-
-		error = marker_probe_register(p->name, p->format,
-					      p->probe_func, p);
-		if (error)
-			printk(KERN_INFO "Unable to register probe %s\n",
-					p->name);
-	}
-
-	return 0;
-
-out_free_log:
-	kfree(sputrace_log);
-out:
-	return -ENOMEM;
-}
-
-static void __exit sputrace_exit(void)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(spu_probes); i++)
-		marker_probe_unregister(spu_probes[i].name,
-			spu_probes[i].probe_func, &spu_probes[i]);
-
-	remove_proc_entry("sputrace", NULL);
-	kfree(sputrace_log);
-	marker_synchronize_unregister();
-}
-
-module_init(sputrace_init);
-module_exit(sputrace_exit);
-
-MODULE_LICENSE("GPL");
Index: linux-2.6-tip/arch/powerpc/platforms/cell/spufs/context.c
===================================================================
--- linux-2.6-tip.orig/arch/powerpc/platforms/cell/spufs/context.c	2009-05-06 10:19:40.000000000 +0000
+++ linux-2.6-tip/arch/powerpc/platforms/cell/spufs/context.c	2009-05-06 10:19:44.000000000 +0000
@@ -28,6 +28,7 @@ 
 #include <asm/spu.h>
 #include <asm/spu_csa.h>
 #include "spufs.h"
+#include "sputrace.h"
 
 
 atomic_t nr_spu_contexts = ATOMIC_INIT(0);
Index: linux-2.6-tip/arch/powerpc/platforms/cell/spufs/file.c
===================================================================
--- linux-2.6-tip.orig/arch/powerpc/platforms/cell/spufs/file.c	2009-05-06 10:19:27.000000000 +0000
+++ linux-2.6-tip/arch/powerpc/platforms/cell/spufs/file.c	2009-05-06 10:19:34.000000000 +0000
@@ -38,6 +38,7 @@ 
 #include <asm/uaccess.h>
 
 #include "spufs.h"
+#include "sputrace.h"
 
 #define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000)
 
Index: linux-2.6-tip/arch/powerpc/platforms/cell/spufs/sched.c
===================================================================
--- linux-2.6-tip.orig/arch/powerpc/platforms/cell/spufs/sched.c	2009-05-06 10:19:52.000000000 +0000
+++ linux-2.6-tip/arch/powerpc/platforms/cell/spufs/sched.c	2009-05-06 10:20:05.000000000 +0000
@@ -47,6 +47,8 @@ 
 #include <asm/spu_csa.h>
 #include <asm/spu_priv1.h>
 #include "spufs.h"
+#define CREATE_TRACE_POINTS
+#include "sputrace.h"
 
 struct spu_prio_array {
 	DECLARE_BITMAP(bitmap, MAX_PRIO);