diff mbox

[1/1] PPC64-HWBKPT: Implement hw-breakpoints for PPC64

Message ID 20100215055914.GA6017@in.ibm.com (mailing list archive)
State Superseded
Delegated to: Benjamin Herrenschmidt
Headers show

Commit Message

K.Prasad Feb. 15, 2010, 5:59 a.m. UTC
Implement perf-events based hw-breakpoint interfaces for PPC64 processors.
These interfaces help arbitrate requests from various users and schedules
them as appropriate.

Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig                     |    1 
 arch/powerpc/include/asm/hw_breakpoint.h |   55 ++++
 arch/powerpc/include/asm/processor.h     |    6 
 arch/powerpc/include/asm/reg.h           |    1 
 arch/powerpc/kernel/Makefile             |    2 
 arch/powerpc/kernel/hw_breakpoint.c      |  363 +++++++++++++++++++++++++++++++
 arch/powerpc/kernel/process.c            |    5 
 arch/powerpc/kernel/ptrace.c             |   77 ++++++
 arch/powerpc/mm/fault.c                  |   14 -
 9 files changed, 515 insertions(+), 9 deletions(-)

Comments

Frédéric Weisbecker Feb. 21, 2010, 1:01 a.m. UTC | #1
On Mon, Feb 15, 2010 at 11:29:14AM +0530, K.Prasad wrote:
> +struct arch_hw_breakpoint {
> +	u8		len; /* length of the target symbol */
> +	int		type;
> +	char		*name; /* Contains name of the symbol to set bkpt */
> +	unsigned long	address;
> +};




I don't think it's a good idea to integrate the name of
the target. This is something that should be done in a higher
level, not in an arch backend.
We don't even need to store it anywhere as we can resolve
back an address easily. Symbol awareness is not something
the hardware breakpoint should care about, neither in the
arch nor the generic level.

Also, do you think addr/len/type is enough to abstract out
any ppc breakpoints?

This looks enough to me to express range breakpoints and
simple breakpoints. But what about value comparison?
(And still, there may be other trickier implementations
I don't know in ppc).


> +
> +#include <linux/kdebug.h>
> +#include <asm/reg.h>
> +#include <asm/system.h>
> +
> +/* Total number of available HW breakpoint registers */
> +#define HBP_NUM 1


Looking at the G2 PowerPc implementation, DABR and DABR2 can either
express two different watchpoints or one range watchpoint.

There are also IABR and IABR2 for instruction breakpoints that
follow the same above scheme. I'm not sure we can abstract that
using a constant max linear number of resources.



> +static inline void hw_breakpoint_disable(void)
> +{
> +	set_dabr(0);
> +}



So, this is only about data breakpoints?



> +	/*
> +	 * As a policy, the callback is invoked in a 'trigger-after-execute'
> +	 * fashion
> +	 */
> +	(bp->overflow_handler)(bp, 0, NULL, regs);



Why are you calling this explicitly instead of using the perf_bp_event()
thing? This looks like it won't work with perf as the event won't
be recorded by perf.


> +void ptrace_triggered(struct perf_event *bp, int nmi,
> +		      struct perf_sample_data *data, struct pt_regs *regs)
> +{
> +	struct perf_event_attr attr;
> +
> +	/*
> +	 * Disable the breakpoint request here since ptrace has defined a
> +	 * one-shot behaviour for breakpoint exceptions in PPC64.
> +	 * The SIGTRAP signal is generated automatically for us in do_dabr().
> +	 * We don't have to do anything about that here
> +	 */



Oh, why does ptrace use a one-shot behaviour in ppc? Breakpoints
only trigger once?



> +	if (bp) {
> +		attr = bp->attr;
> +		attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN;
> +
> +		switch (data & (DABR_DATA_WRITE | DABR_DATA_READ)) {
> +		case DABR_DATA_READ:
> +			attr.bp_type = HW_BREAKPOINT_R;
> +			break;
> +		case DABR_DATA_WRITE:
> +			attr.bp_type = HW_BREAKPOINT_W;
> +			break;
> +		case (DABR_DATA_WRITE | DABR_DATA_READ):
> +			attr.bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
> +			break;
> +		}
> +		ret =  modify_user_hw_breakpoint(bp, &attr);
> +		if (ret)
> +			return ret;
> +		thread->ptrace_bps[0] = bp;
> +		thread->dabr = data;
> +		return 0;
> +	}
> +
> +	/* Create a new breakpoint request if one doesn't exist already */
> +	hw_breakpoint_init(&attr);
> +	attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN;
> +	switch (data & (DABR_DATA_WRITE | DABR_DATA_READ)) {
> +	case DABR_DATA_READ:
> +		attr.bp_type = HW_BREAKPOINT_R;
> +		break;
> +	case DABR_DATA_WRITE:
> +		attr.bp_type = HW_BREAKPOINT_W;
> +		break;
> +	case (DABR_DATA_WRITE | DABR_DATA_READ):
> +		attr.bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
> +		break;
> +	}
> +	thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
> +							ptrace_triggered, task);
> +	if (IS_ERR(bp)) {
> +		thread->ptrace_bps[0] = NULL;
> +		return PTR_ERR(bp);
> +	}
> +
> +#endif /* CONFIG_PPC64 */



This looks fine for basic breakpoints. And this can probably be
improved to integrate ranges.

But I think we did something wrong with the generic breakpoint
interface. We are translating the arch values to generic
attributes. Then this all will be translated back to arch
values.

Having generic attributes is necessary for any perf event
use from userspace. But it looks like a waste for ptrace
that already gives us arch values. And the problem
is the same for x86.

So I think we should implement a register_ptrace_breakpoint()
that doesn't take perf_event_attr but specific arch informations,
so that we don't need to pass through a generic conversion, which:

- is wasteful
- won't be able to express 100% of any arch capabilities. We
  can certainly express most arch breakpoints features through
  the generic interface, but not all of them (given how tricky
  the data value comparison features can be)

I will rework that during the next cycle.

Thanks.
K.Prasad Feb. 22, 2010, 1:17 p.m. UTC | #2
On Sun, Feb 21, 2010 at 02:01:37AM +0100, Frederic Weisbecker wrote:
> On Mon, Feb 15, 2010 at 11:29:14AM +0530, K.Prasad wrote:
> > +struct arch_hw_breakpoint {
> > +	u8		len; /* length of the target symbol */
> > +	int		type;
> > +	char		*name; /* Contains name of the symbol to set bkpt */
> > +	unsigned long	address;
> > +};
> 
> 
> 
> 
> I don't think it's a good idea to integrate the name of
> the target. This is something that should be done in a higher
> level, not in an arch backend.
> We don't even need to store it anywhere as we can resolve
> back an address easily. Symbol awareness is not something
> the hardware breakpoint should care about, neither in the
> arch nor the generic level.
> 

The 'name' field here is actually a legacy inherited from x86 code. It
is part of x86's arch-specific hw-breakpoint structure since:
- inspired by the kprobe implementation which accepts symbol name as
  input.
- kallsyms_lookup_name() was 'unexported' and a module could not resolve
  symbol names externally, so the core-infrastructure had to provide
  such facilities.
- I wasn't sure about the discussions behind 'unexporting' of
  kallsyms_lookup_name(), so did not venture to export them again (which
  you rightfully did :-)

Having said that, I'll be glad to remove this field (along with that in
x86), provided we know that there's a way for the user to resolve symbol
names on its own i.e. routines like kallsyms_lookup_name() will remain
exported.

> Also, do you think addr/len/type is enough to abstract out
> any ppc breakpoints?
> 
> This looks enough to me to express range breakpoints and
> simple breakpoints. But what about value comparison?
> (And still, there may be other trickier implementations
> I don't know in ppc).
> 

The above implementation is for PPC64 architecture that supports only
'simple' breakpoints of fixed length (no range breakpoints, no value
comparison). More on that below.
 
> > +
> > +#include <linux/kdebug.h>
> > +#include <asm/reg.h>
> > +#include <asm/system.h>
> > +
> > +/* Total number of available HW breakpoint registers */
> > +#define HBP_NUM 1
> 
> 
> Looking at the G2 PowerPc implementation, DABR and DABR2 can either
> express two different watchpoints or one range watchpoint.
> 
> There are also IABR and IABR2 for instruction breakpoints that
> follow the same above scheme. I'm not sure we can abstract that
> using a constant max linear number of resources.
> 
> 

As stated above, the patch implements breakpoints for PPC64 processors
only (http://www.power.org/resources/downloads/PowerISA_V2.06_PUBLIC.pdf),
which does not support advanced breakpoint features (which its ppc
Book-E counterpart has).
 
> > +static inline void hw_breakpoint_disable(void)
> > +{
> > +	set_dabr(0);
> > +}
> 
> 
> So, this is only about data breakpoints?
> 
> 

Yes, newer PPC64 processors do not support IABR.

> > +	/*
> > +	 * As a policy, the callback is invoked in a 'trigger-after-execute'
> > +	 * fashion
> > +	 */
> > +	(bp->overflow_handler)(bp, 0, NULL, regs);
> 
> 
> Why are you calling this explicitly instead of using the perf_bp_event()
> thing? This looks like it won't work with perf as the event won't
> be recorded by perf.
> 

Yes, should have invoked perf_bp_event() for perf to work well (on a
side note, it makes me wonder at the amount of 'extra' code that each
breakpoint exception would execute if it were not called through perf
sys-call...well, the costs of integrating with a generic infrastructure!)

> > +void ptrace_triggered(struct perf_event *bp, int nmi,
> > +		      struct perf_sample_data *data, struct pt_regs *regs)
> > +{
> > +	struct perf_event_attr attr;
> > +
> > +	/*
> > +	 * Disable the breakpoint request here since ptrace has defined a
> > +	 * one-shot behaviour for breakpoint exceptions in PPC64.
> > +	 * The SIGTRAP signal is generated automatically for us in do_dabr().
> > +	 * We don't have to do anything about that here
> > +	 */
> 
> 
> Oh, why does ptrace use a one-shot behaviour in ppc? Breakpoints
> only trigger once?
> 

Yes, ptrace breakpoints on PPC64 are designed to trigger once and this
patch retains that behaviour. It is very convenient to use one-shot
behaviour on archs where exceptions are triggered-before-execute.

> > +	if (bp) {
> > +		attr = bp->attr;
> > +		attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN;
> > +
> > +		switch (data & (DABR_DATA_WRITE | DABR_DATA_READ)) {
> > +		case DABR_DATA_READ:
> > +			attr.bp_type = HW_BREAKPOINT_R;
> > +			break;
> > +		case DABR_DATA_WRITE:
> > +			attr.bp_type = HW_BREAKPOINT_W;
> > +			break;
> > +		case (DABR_DATA_WRITE | DABR_DATA_READ):
> > +			attr.bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
> > +			break;
> > +		}
> > +		ret =  modify_user_hw_breakpoint(bp, &attr);
> > +		if (ret)
> > +			return ret;
> > +		thread->ptrace_bps[0] = bp;
> > +		thread->dabr = data;
> > +		return 0;
> > +	}
> > +
> > +	/* Create a new breakpoint request if one doesn't exist already */
> > +	hw_breakpoint_init(&attr);
> > +	attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN;
> > +	switch (data & (DABR_DATA_WRITE | DABR_DATA_READ)) {
> > +	case DABR_DATA_READ:
> > +		attr.bp_type = HW_BREAKPOINT_R;
> > +		break;
> > +	case DABR_DATA_WRITE:
> > +		attr.bp_type = HW_BREAKPOINT_W;
> > +		break;
> > +	case (DABR_DATA_WRITE | DABR_DATA_READ):
> > +		attr.bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
> > +		break;
> > +	}
> > +	thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
> > +							ptrace_triggered, task);
> > +	if (IS_ERR(bp)) {
> > +		thread->ptrace_bps[0] = NULL;
> > +		return PTR_ERR(bp);
> > +	}
> > +
> > +#endif /* CONFIG_PPC64 */
> 
> 
> 
> This looks fine for basic breakpoints. And this can probably be
> improved to integrate ranges.
> 
> But I think we did something wrong with the generic breakpoint
> interface. We are translating the arch values to generic
> attributes. Then this all will be translated back to arch
> values.
> 
> Having generic attributes is necessary for any perf event
> use from userspace. But it looks like a waste for ptrace
> that already gives us arch values. And the problem
> is the same for x86.
> 
> So I think we should implement a register_ptrace_breakpoint()
> that doesn't take perf_event_attr but specific arch informations,
> so that we don't need to pass through a generic conversion, which:
> 

I agree that the layers of conversion from generic to arch-specific
breakpoint constants is wasteful.
Can't the arch_bp_generic_fields() function be moved to
arch/x86/kernel/ptrace.c instead of a new interface?

> - is wasteful
> - won't be able to express 100% of any arch capabilities. We
>   can certainly express most arch breakpoints features through
>   the generic interface, but not all of them (given how tricky
>   the data value comparison features can be)
> 
> I will rework that during the next cycle.
> 
> Thanks.
>

Thank you for the comments. I will re-send a new version of the patch
with the perf_bp_event() substitution.

-- K.Prasad
K.Prasad Feb. 23, 2010, 10:57 a.m. UTC | #3
On Mon, Feb 22, 2010 at 06:47:46PM +0530, K.Prasad wrote:
> On Sun, Feb 21, 2010 at 02:01:37AM +0100, Frederic Weisbecker wrote:
> > On Mon, Feb 15, 2010 at 11:29:14AM +0530, K.Prasad wrote:
[snipped]
> > Also, do you think addr/len/type is enough to abstract out
> > any ppc breakpoints?
> > 
> > This looks enough to me to express range breakpoints and
> > simple breakpoints. But what about value comparison?
> > (And still, there may be other trickier implementations
> > I don't know in ppc).
> > 
> 
> The above implementation is for PPC64 architecture that supports only
> 'simple' breakpoints of fixed length (no range breakpoints, no value
> comparison). More on that below.
>

Looks like I forgot the 'more on that below' part :-)....here are some
thoughts...

Architectures like PPC Book-E have support for a variety of
sophisticated debug features and our generic framework, in its present
form, cannot easily port itself to these processors. In order to extend
the framework for PPC Book-E, I intend the following to begin with:

- Implement support for data breakpoints through DAC registers with all
  the 'bells and whistles'...support for instruction breakpoints through
  IAC can come in later (without precluding its use through ptrace).

- Embed the flags/variables to store DVC, masked address mode, etc. in
  'struct arch_hw_breakpoint', which will be populated by the user of
  register_breakpoint interface.

Apart from the above extensions to the framework, changes in the generic
code would be required as described in an earlier LKML mail (ref:
message-id: 20091127190705.GB18408@in.ibm.com)....relevant contents
pasted below:

"I think the register_<> interfaces can become wrappers around functions
that do the following:

- arch_validate(): Validate request by invoking an arch-dependant
  routine. Proceed if returned valid.
- arch-specific debugreg availability: Do something like
  if (arch_hw_breakpoint_availabile())
        bp = perf_event_create_kernel_counter();

  perf_event_create_kernel_counter()--->arch_install_hw_breakpoint();

This way, all book-keeping related work (no. of pinned/flexible/per-cpu)
will be moved to arch-specific files (will be helpful for PPC Book-E
implementation having two types of debug registers). Every new
architecture that intends to port to the new hw-breakpoint
implementation must define their arch_validate(),
arch_hw_breakpoint_available() and an arch_install_hw_breakpoint(),
while the hw-breakpoint code will be flexible enough to extend itself to
each of these archs."

Let me know what you think of the above.

Thanks,
K.Prasad
Frédéric Weisbecker Feb. 26, 2010, 1:58 a.m. UTC | #4
On Mon, Feb 22, 2010 at 06:47:46PM +0530, K.Prasad wrote:
> The 'name' field here is actually a legacy inherited from x86 code. It
> is part of x86's arch-specific hw-breakpoint structure since:
> - inspired by the kprobe implementation which accepts symbol name as
>   input.
> - kallsyms_lookup_name() was 'unexported' and a module could not resolve
>   symbol names externally, so the core-infrastructure had to provide
>   such facilities.
> - I wasn't sure about the discussions behind 'unexporting' of
>   kallsyms_lookup_name(), so did not venture to export them again (which
>   you rightfully did :-)
> 
> Having said that, I'll be glad to remove this field (along with that in
> x86),



Cool, I'll integrate the x86 name field removal to the .24 series



> provided we know that there's a way for the user to resolve symbol
> names on its own i.e. routines like kallsyms_lookup_name() will remain
> exported.


Yeah, I guess it's fine to keep kallsyms_lookup_name() exported.


 
> > Also, do you think addr/len/type is enough to abstract out
> > any ppc breakpoints?
> > 
> > This looks enough to me to express range breakpoints and
> > simple breakpoints. But what about value comparison?
> > (And still, there may be other trickier implementations
> > I don't know in ppc).
> > 
> 
> The above implementation is for PPC64 architecture that supports only
> 'simple' breakpoints of fixed length (no range breakpoints, no value
> comparison). More on that below.


Ok. I was just a bit confused in the middle of the several PPC breakpoint
implementations :)



> > > +	/*
> > > +	 * As a policy, the callback is invoked in a 'trigger-after-execute'
> > > +	 * fashion
> > > +	 */
> > > +	(bp->overflow_handler)(bp, 0, NULL, regs);
> > 
> > 
> > Why are you calling this explicitly instead of using the perf_bp_event()
> > thing? This looks like it won't work with perf as the event won't
> > be recorded by perf.
> > 
> 
> Yes, should have invoked perf_bp_event() for perf to work well (on a
> side note, it makes me wonder at the amount of 'extra' code that each
> breakpoint exception would execute if it were not called through perf
> sys-call...well, the costs of integrating with a generic infrastructure!)


It has the benefit of not adding extra checks in the breakpoint handler,
like checking the callback. Every breakpoint is treated the same way, which
makes the code more simple.


 
> > > +void ptrace_triggered(struct perf_event *bp, int nmi,
> > > +		      struct perf_sample_data *data, struct pt_regs *regs)
> > > +{
> > > +	struct perf_event_attr attr;
> > > +
> > > +	/*
> > > +	 * Disable the breakpoint request here since ptrace has defined a
> > > +	 * one-shot behaviour for breakpoint exceptions in PPC64.
> > > +	 * The SIGTRAP signal is generated automatically for us in do_dabr().
> > > +	 * We don't have to do anything about that here
> > > +	 */
> > 
> > 
> > Oh, why does ptrace use a one-shot behaviour in ppc? Breakpoints
> > only trigger once?
> > 
> 
> Yes, ptrace breakpoints on PPC64 are designed to trigger once and this
> patch retains that behaviour. It is very convenient to use one-shot
> behaviour on archs where exceptions are triggered-before-execute.


Ah, Why?



> > This looks fine for basic breakpoints. And this can probably be
> > improved to integrate ranges.
> > 
> > But I think we did something wrong with the generic breakpoint
> > interface. We are translating the arch values to generic
> > attributes. Then this all will be translated back to arch
> > values.
> > 
> > Having generic attributes is necessary for any perf event
> > use from userspace. But it looks like a waste for ptrace
> > that already gives us arch values. And the problem
> > is the same for x86.
> > 
> > So I think we should implement a register_ptrace_breakpoint()
> > that doesn't take perf_event_attr but specific arch informations,
> > so that we don't need to pass through a generic conversion, which:
> > 
> 
> I agree that the layers of conversion from generic to arch-specific
> breakpoint constants is wasteful.
> Can't the arch_bp_generic_fields() function be moved to
> arch/x86/kernel/ptrace.c instead of a new interface?


I'll answer in your subsequent mail :)


 
> > - is wasteful
> > - won't be able to express 100% of any arch capabilities. We
> >   can certainly express most arch breakpoints features through
> >   the generic interface, but not all of them (given how tricky
> >   the data value comparison features can be)
> > 
> > I will rework that during the next cycle.
> > 
> > Thanks.
> >
> 
> Thank you for the comments. I will re-send a new version of the patch
> with the perf_bp_event() substitution.


Thanks.
Frédéric Weisbecker Feb. 26, 2010, 5:52 p.m. UTC | #5
On Tue, Feb 23, 2010 at 04:27:15PM +0530, K.Prasad wrote:
> On Mon, Feb 22, 2010 at 06:47:46PM +0530, K.Prasad wrote:
> > On Sun, Feb 21, 2010 at 02:01:37AM +0100, Frederic Weisbecker wrote:
> > > On Mon, Feb 15, 2010 at 11:29:14AM +0530, K.Prasad wrote:
> [snipped]
> > > Also, do you think addr/len/type is enough to abstract out
> > > any ppc breakpoints?
> > > 
> > > This looks enough to me to express range breakpoints and
> > > simple breakpoints. But what about value comparison?
> > > (And still, there may be other trickier implementations
> > > I don't know in ppc).
> > > 
> > 
> > The above implementation is for PPC64 architecture that supports only
> > 'simple' breakpoints of fixed length (no range breakpoints, no value
> > comparison). More on that below.
> >
> 
> Looks like I forgot the 'more on that below' part :-)....here are some
> thoughts...
> 
> Architectures like PPC Book-E have support for a variety of
> sophisticated debug features and our generic framework, in its present
> form, cannot easily port itself to these processors. In order to extend
> the framework for PPC Book-E, I intend the following to begin with:
> 
> - Implement support for data breakpoints through DAC registers with all
>   the 'bells and whistles'...support for instruction breakpoints through
>   IAC can come in later (without precluding its use through ptrace).
> 
> - Embed the flags/variables to store DVC, masked address mode, etc. in
>   'struct arch_hw_breakpoint', which will be populated by the user of
>   register_breakpoint interface.



Agreed.



> 
> Apart from the above extensions to the framework, changes in the generic
> code would be required as described in an earlier LKML mail (ref:
> message-id: 20091127190705.GB18408@in.ibm.com)....relevant contents
> pasted below:
> 
> "I think the register_<> interfaces can become wrappers around functions
> that do the following:
> 
> - arch_validate(): Validate request by invoking an arch-dependant
>   routine. Proceed if returned valid.
> - arch-specific debugreg availability: Do something like
>   if (arch_hw_breakpoint_availabile())
>         bp = perf_event_create_kernel_counter();



This is already what does register_hw_break....(), it fails
if a slot is not available:

perf_event_create_kernel_counter -> perf_bp_init() -> reserve_bp_slot()

Having a:

if (arch_hw_breakpoint_availabile())
         bp = perf_event_create_kernel_counter();

would be racy.



> 
>   perf_event_create_kernel_counter()--->arch_install_hw_breakpoint();
> 
> This way, all book-keeping related work (no. of pinned/flexible/per-cpu)
> will be moved to arch-specific files (will be helpful for PPC Book-E
> implementation having two types of debug registers). Every new
> architecture that intends to port to the new hw-breakpoint
> implementation must define their arch_validate(),
> arch_hw_breakpoint_available() and an arch_install_hw_breakpoint(),
> while the hw-breakpoint code will be flexible enough to extend itself to
> each of these archs."
> 
> Let me know what you think of the above.



We certainly need the slot reservation in arch (a part of it at least).
But we also need a kind of new interface for arch predefined attributes,
instead of generic attributes.

Probably we need a kind of perf_event_create_kernel_counter() that
can accept either a perf_event_attr (for perf syscall or ftrace)
and an arch structure that can be passed to the breakpoint API,
so that we don't need the generic translation.


> 
> Thanks,
> K.Prasad
>
David Gibson March 8, 2010, 11:57 p.m. UTC | #6
On Fri, Feb 26, 2010 at 02:58:12AM +0100, Frederic Weisbecker wrote:
> On Mon, Feb 22, 2010 at 06:47:46PM +0530, K.Prasad wrote:
[snip]
> > > Oh, why does ptrace use a one-shot behaviour in ppc? Breakpoints
> > > only trigger once?
> > > 
> > 
> > Yes, ptrace breakpoints on PPC64 are designed to trigger once and this
> > patch retains that behaviour. It is very convenient to use one-shot
> > behaviour on archs where exceptions are triggered-before-execute.
> 
> Ah, Why?

Because otherwise you have jump through some tricky hoops so that when
gdb (or whatever) resumes after the breakpoint, you don't immediately
retrap in the same place.  I gather x86 has hardware assistance to do
this, but powerpc doesn't.
K.Prasad March 9, 2010, 2:14 a.m. UTC | #7
On Fri, Feb 26, 2010 at 02:58:12AM +0100, Frederic Weisbecker wrote:
> On Mon, Feb 22, 2010 at 06:47:46PM +0530, K.Prasad wrote:
> > The 'name' field here is actually a legacy inherited from x86 code. It
> > is part of x86's arch-specific hw-breakpoint structure since:
> > - inspired by the kprobe implementation which accepts symbol name as
> >   input.
> > - kallsyms_lookup_name() was 'unexported' and a module could not resolve
> >   symbol names externally, so the core-infrastructure had to provide
> >   such facilities.
> > - I wasn't sure about the discussions behind 'unexporting' of
> >   kallsyms_lookup_name(), so did not venture to export them again (which
> >   you rightfully did :-)
> > 
> > Having said that, I'll be glad to remove this field (along with that in
> > x86),
> 
> Cool, I'll integrate the x86 name field removal to the .24 series
> 

I've removed the .name field in the latest version of the patch sent
(linuxppc-dev message-id: 20100308181232.GA3406@in.ibm.com).

> > > > +void ptrace_triggered(struct perf_event *bp, int nmi,
> > > > +		      struct perf_sample_data *data, struct pt_regs *regs)
> > > > +{
> > > > +	struct perf_event_attr attr;
> > > > +
> > > > +	/*
> > > > +	 * Disable the breakpoint request here since ptrace has defined a
> > > > +	 * one-shot behaviour for breakpoint exceptions in PPC64.
> > > > +	 * The SIGTRAP signal is generated automatically for us in do_dabr().
> > > > +	 * We don't have to do anything about that here
> > > > +	 */
> > > 
> > > 
> > > Oh, why does ptrace use a one-shot behaviour in ppc? Breakpoints
> > > only trigger once?
> > > 
> > 
> > Yes, ptrace breakpoints on PPC64 are designed to trigger once and this
> > patch retains that behaviour. It is very convenient to use one-shot
> > behaviour on archs where exceptions are triggered-before-execute.
> 
> Ah, Why?
>

Because we don't have to create the single_step_dabr_instruction()
function :-)

With one-shot behaviour, the hw_breakpoint_handler() doesn't have to
worry about entering into an infinite-loop (since exceptions are
triggered before instruction execution, and if breakpoints are still
active every attempt to execute the causative instruction raises the
breakpoint exception). To circumvent infinite looping, we temporarily
disable the breakpoints, enable single-stepping (to single-step over
the causative instruction) and re-enable them inside single-step 
exception handler. For the one-shot type breakpoint, the pain of
re-registration is left to the end-user.

I've wondered why PowerPC was designed to be 'trigger-before-execute'
when handling the exception can grow complex as this. Perhaps it is
meant to give absolute control over the data value (stored in the target
address) before the instruction gets to see it. For instance,
breakpoints can be used to change the data to a 'desirable' value before
actually being 'seen' by instructions.

Thanks,
K.Prasad
diff mbox

Patch

Index: linux-2.6.ppc64_test/arch/powerpc/include/asm/hw_breakpoint.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.ppc64_test/arch/powerpc/include/asm/hw_breakpoint.h	2010-02-15 02:10:43.000000000 +0530
@@ -0,0 +1,55 @@ 
+#ifndef	_PPC64_HW_BREAKPOINT_H
+#define	_PPC64_HW_BREAKPOINT_H
+
+#ifdef	__KERNEL__
+#define	__ARCH_HW_BREAKPOINT_H
+#ifdef CONFIG_PPC64
+
+struct arch_hw_breakpoint {
+	u8		len; /* length of the target symbol */
+	int		type;
+	char		*name; /* Contains name of the symbol to set bkpt */
+	unsigned long	address;
+};
+
+#include <linux/kdebug.h>
+#include <asm/reg.h>
+#include <asm/system.h>
+
+/* Total number of available HW breakpoint registers */
+#define HBP_NUM 1
+
+struct perf_event;
+struct pmu;
+struct perf_sample_data;
+
+#define HW_BREAKPOINT_ALIGN 0x7
+/* Maximum permissible length of any HW Breakpoint */
+#define HW_BREAKPOINT_LEN 0x8
+
+extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
+						struct task_struct *tsk);
+extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+						unsigned long val, void *data);
+int arch_install_hw_breakpoint(struct perf_event *bp);
+void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+void hw_breakpoint_pmu_read(struct perf_event *bp);
+void hw_breakpoint_pmu_unthrottle(struct perf_event *bp);
+extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
+
+extern struct pmu perf_ops_bp;
+extern void ptrace_triggered(struct perf_event *bp, int nmi,
+			struct perf_sample_data *data, struct pt_regs *regs);
+static inline void hw_breakpoint_disable(void)
+{
+	set_dabr(0);
+}
+
+#else
+static inline void hw_breakpoint_disable(void)
+{
+	/* Function is defined only on PPC64 for now */
+}
+#endif	/* CONFIG_PPC64 */
+#endif	/* __KERNEL__ */
+#endif	/* _PPC64_HW_BREAKPOINT_H */
Index: linux-2.6.ppc64_test/arch/powerpc/kernel/hw_breakpoint.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.ppc64_test/arch/powerpc/kernel/hw_breakpoint.c	2010-02-15 02:10:43.000000000 +0530
@@ -0,0 +1,363 @@ 
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers. Derived from
+ * "arch/x86/kernel/hw_breakpoint.c"
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2009 IBM Corporation
+ * Author: K.Prasad <prasad@linux.vnet.ibm.com>
+ *
+ */
+
+#include <linux/hw_breakpoint.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
+#include <linux/percpu.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/hw_breakpoint.h>
+#include <asm/processor.h>
+#include <asm/sstep.h>
+
+/*
+ * Store the 'bp' that caused the hw-breakpoint exception just before we
+ * single-step. Used to distinguish a single-step exception (due to a previous
+ * hw-breakpoint exception) from a normal one
+ */
+static DEFINE_PER_CPU(struct perf_event *, last_hit_bp);
+
+/*
+ * Flag to denote if the kernel was already single-stepping. Used to
+ * conditionally unset the MSR_SE flag in the single-step exception
+ * following the breakpoint exception.
+ */
+static DEFINE_PER_CPU(bool, is_cpu_singlestep);
+
+/*
+ * Stores the breakpoints currently in use on each breakpoint address
+ * register for each cpus
+ */
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg);
+
+/*
+ * Install a perf counter breakpoint.
+ *
+ * We seek a free debug address register and use it for this
+ * breakpoint.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+int arch_install_hw_breakpoint(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	struct perf_event **slot = &__get_cpu_var(bp_per_reg);
+
+	if (!*slot)
+		*slot = bp;
+	else {
+		WARN_ONCE(1, "Can't find any breakpoint slot");
+		return -EBUSY;
+	}
+
+	set_dabr(info->address | info->type | DABR_TRANSLATION);
+	return 0;
+}
+
+/*
+ * Uninstall the breakpoint contained in the given counter.
+ *
+ * First we search the debug address register it uses and then we disable
+ * it.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+{
+	struct perf_event **slot = &__get_cpu_var(bp_per_reg);
+
+	if (*slot == bp)
+		*slot = NULL;
+	else {
+		WARN_ONCE(1, "Can't find the breakpoint slot");
+		return;
+	}
+	set_dabr(0);
+}
+
+/*
+ * Validate the arch-specific HW Breakpoint register settings
+ */
+int arch_validate_hwbkpt_settings(struct perf_event *bp,
+						struct task_struct *tsk)
+{
+	int is_kernel, ret = -EINVAL;
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+	if (!bp)
+		return ret;
+
+	switch (bp->attr.bp_type) {
+	case HW_BREAKPOINT_R:
+		info->type = DABR_DATA_READ;
+		break;
+	case HW_BREAKPOINT_W:
+		info->type = DABR_DATA_WRITE;
+		break;
+	case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
+		info->type = (DABR_DATA_READ | DABR_DATA_WRITE);
+		break;
+	default:
+		return ret;
+	}
+	/* TODO: Check for a valid triggered function */
+	/* if (!bp->triggered)
+		return -EINVAL; */
+
+	is_kernel = is_kernel_addr(bp->attr.bp_addr);
+	if ((tsk && is_kernel) || (!tsk && !is_kernel))
+		return -EINVAL;
+
+	info->address = bp->attr.bp_addr;
+	info->len = bp->attr.bp_len;
+
+	/*
+	 * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8)
+	 * and breakpoint addresses are aligned to nearest double-word
+	 * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the
+	 * 'symbolsize' should satisfy the check below.
+	 */
+	if (info->len >
+	    (HW_BREAKPOINT_LEN - (info->address & HW_BREAKPOINT_ALIGN)))
+		return -EINVAL;
+	return 0;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_handler(struct die_args *args)
+{
+	int rc = NOTIFY_STOP;
+	struct perf_event *bp;
+	struct pt_regs *regs = args->regs;
+	unsigned long dar = regs->dar;
+	int cpu, is_kernel, stepped = 1;
+	struct arch_hw_breakpoint *info;
+
+	/* Disable breakpoints during exception handling */
+	set_dabr(0);
+	cpu = get_cpu();
+	/*
+	 * The counter may be concurrently released but that can only
+	 * occur from a call_rcu() path. We can then safely fetch
+	 * the breakpoint, use its callback, touch its counter
+	 * while we are in an rcu_read_lock() path.
+	 */
+	rcu_read_lock();
+
+	bp = per_cpu(bp_per_reg, cpu);
+	if (!bp)
+		goto out;
+	info = counter_arch_bp(bp);
+	is_kernel = is_kernel_addr(bp->attr.bp_addr);
+
+	/*
+	 * Verify if dar lies within the address range occupied by the symbol
+	 * being watched to filter extraneous exceptions.
+	 */
+	    if (!((bp->attr.bp_addr <= dar) &&
+	    (dar <= (bp->attr.bp_addr + bp->attr.bp_len))))
+		/*
+		 * This exception is triggered not because of a memory access on
+		 * the monitored variable but in the double-word address range
+		 * in which it is contained. We will consume this exception,
+		 * considering it as 'noise'.
+		 */
+		goto out;
+
+	/*
+	 * Return early after invoking user-callback function without restoring
+	 * DABR if the breakpoint is from ptrace which always operates in
+	 * one-shot mode
+	 */
+	if (bp->overflow_handler == ptrace_triggered) {
+		(bp->overflow_handler)(bp, 0, NULL, regs);
+		rc = NOTIFY_DONE;
+		goto out;
+	}
+
+	/*
+	 * Do not emulate user-space instructions from kernel-space,
+	 * instead single-step them.
+	 */
+	if (!is_kernel) {
+		current->thread.last_hit_ubp = bp;
+		goto out;
+	}
+
+	/*
+	 * Emulating single-step over causative instruction if already
+	 * in MSR_SE mode will harm other users of kernel single-stepping
+	 * (like an in-kernel debugger), single-step over them.
+	 */
+
+	if (regs->msr & MSR_SE) {
+		per_cpu(is_cpu_singlestep, cpu) = true;
+		per_cpu(last_hit_bp, cpu) = bp;
+		goto out;
+	}
+
+	stepped = emulate_step(regs, regs->nip);
+	 /* emulate_step() could not execute it, single-step them */
+	if (stepped == 0) {
+		regs->msr |= MSR_SE;
+		if (is_kernel)
+			per_cpu(last_hit_bp, cpu) = bp;
+		else
+			current->thread.last_hit_ubp = bp;
+		per_cpu(last_hit_bp, cpu) = bp;
+		goto out;
+	}
+	/*
+	 * As a policy, the callback is invoked in a 'trigger-after-execute'
+	 * fashion
+	 */
+	(bp->overflow_handler)(bp, 0, NULL, regs);
+	set_dabr(info->address | info->type | DABR_TRANSLATION);
+	per_cpu(is_cpu_singlestep, cpu) = false;
+out:
+	rcu_read_unlock();
+	put_cpu();
+	return rc;
+}
+
+/*
+ * Handle single-step exceptions following a DABR hit.
+ */
+int __kprobes single_step_dabr_instruction(struct die_args *args)
+{
+	struct pt_regs *regs = args->regs;
+	int cpu = get_cpu();
+	int ret = NOTIFY_DONE;
+	siginfo_t info;
+	struct perf_event *bp = NULL, *kernel_bp, *user_bp;
+	struct arch_hw_breakpoint *bp_info;
+
+	/*
+	 * Identify the cause of single-stepping and find the corresponding
+	 * breakpoint structure
+	 */
+	kernel_bp = per_cpu(last_hit_bp, cpu);
+	user_bp = current->thread.last_hit_ubp;
+	if (kernel_bp) {
+		bp = kernel_bp;
+		per_cpu(last_hit_bp, cpu) = NULL;
+	} else if (user_bp) {
+		bp = user_bp;
+		current->thread.last_hit_ubp = NULL;
+	}
+
+	/*
+	 * Check if we are single-stepping as a result of a
+	 * previous HW Breakpoint exception
+	 */
+	if (!bp)
+		goto out;
+
+	bp_info = counter_arch_bp(bp);
+
+	/*
+	 * We shall invoke the user-defined callback function in the single
+	 * stepping handler to confirm to 'trigger-after-execute' semantics
+	 */
+	(bp->overflow_handler)(bp, 0, NULL, regs);
+
+	if (!(per_cpu(is_cpu_singlestep, cpu) ||
+	      test_thread_flag(TIF_SINGLESTEP)))
+		regs->msr &= ~MSR_SE;
+
+	/* Deliver signal to user-space */
+	if (!is_kernel_addr(bp->attr.bp_addr)) {
+		info.si_signo = SIGTRAP;
+		info.si_errno = 0;
+		info.si_code = TRAP_HWBKPT;
+		info.si_addr = (void __user *)bp_info->address;
+		force_sig_info(SIGTRAP, &info, current);
+	}
+
+	set_dabr(bp_info->address | bp_info->type | DABR_TRANSLATION);
+	/*
+	 * The kernel was in single-step mode before hw-breakpoint
+	 * exception, allow them to process the single-step exception further.
+	 */
+	if (per_cpu(is_cpu_singlestep, cpu)) {
+		per_cpu(is_cpu_singlestep, cpu) = false;
+		ret = NOTIFY_DONE;
+	} else
+		ret = NOTIFY_STOP;
+out:
+	put_cpu();
+	return ret;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_exceptions_notify(
+		struct notifier_block *unused, unsigned long val, void *data)
+{
+	int ret = NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_DABR_MATCH:
+		ret = hw_breakpoint_handler(data);
+		break;
+	case DIE_SSTEP:
+		ret = single_step_dabr_instruction(data);
+		break;
+	}
+
+	return ret;
+}
+
+/*
+ * Release the user breakpoints used by ptrace
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+	struct thread_struct *t = &tsk->thread;
+
+	unregister_hw_breakpoint(t->ptrace_bps[0]);
+	t->ptrace_bps[0] = NULL;
+}
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+	/* TODO */
+}
+
+void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
+{
+	/* TODO */
+}
+
+
Index: linux-2.6.ppc64_test/arch/powerpc/Kconfig
===================================================================
--- linux-2.6.ppc64_test.orig/arch/powerpc/Kconfig	2010-02-03 07:37:58.000000000 +0530
+++ linux-2.6.ppc64_test/arch/powerpc/Kconfig	2010-02-15 02:10:43.000000000 +0530
@@ -140,6 +140,7 @@ 
 	select HAVE_SYSCALL_WRAPPERS if PPC64
 	select GENERIC_ATOMIC64 if PPC32
 	select HAVE_PERF_EVENTS
+	select HAVE_HW_BREAKPOINT if PPC64
 
 config EARLY_PRINTK
 	bool
Index: linux-2.6.ppc64_test/arch/powerpc/kernel/Makefile
===================================================================
--- linux-2.6.ppc64_test.orig/arch/powerpc/kernel/Makefile	2010-02-03 07:37:59.000000000 +0530
+++ linux-2.6.ppc64_test/arch/powerpc/kernel/Makefile	2010-02-15 02:10:43.000000000 +0530
@@ -33,7 +33,7 @@ 
 obj-y				+= vdso32/
 obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
 				   signal_64.o ptrace32.o \
-				   paca.o nvram_64.o firmware.o
+				   paca.o nvram_64.o firmware.o hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj64-$(CONFIG_RELOCATABLE)	+= reloc_64.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o
Index: linux-2.6.ppc64_test/arch/powerpc/include/asm/reg.h
===================================================================
--- linux-2.6.ppc64_test.orig/arch/powerpc/include/asm/reg.h	2010-02-03 07:37:59.000000000 +0530
+++ linux-2.6.ppc64_test/arch/powerpc/include/asm/reg.h	2010-02-15 02:10:43.000000000 +0530
@@ -180,6 +180,7 @@ 
 #define   CTRL_TE	0x00c00000	/* thread enable */
 #define   CTRL_RUNLATCH	0x1
 #define SPRN_DABR	0x3F5	/* Data Address Breakpoint Register */
+#define   HBP_NUM	1	/* Number of physical HW breakpoint registers */
 #define   DABR_TRANSLATION	(1UL << 2)
 #define   DABR_DATA_WRITE	(1UL << 1)
 #define   DABR_DATA_READ	(1UL << 0)
Index: linux-2.6.ppc64_test/arch/powerpc/mm/fault.c
===================================================================
--- linux-2.6.ppc64_test.orig/arch/powerpc/mm/fault.c	2010-02-03 07:37:59.000000000 +0530
+++ linux-2.6.ppc64_test/arch/powerpc/mm/fault.c	2010-02-15 02:10:43.000000000 +0530
@@ -137,6 +137,12 @@ 
 		error_code &= 0x48200000;
 	else
 		is_write = error_code & DSISR_ISSTORE;
+
+	if (error_code & DSISR_DABRMATCH) {
+		/* DABR match */
+		do_dabr(regs, address, error_code);
+		return 0;
+	}
 #else
 	is_write = error_code & ESR_DST;
 #endif /* CONFIG_4xx || CONFIG_BOOKE */
@@ -151,14 +157,6 @@ 
 	if (!user_mode(regs) && (address >= TASK_SIZE))
 		return SIGSEGV;
 
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-  	if (error_code & DSISR_DABRMATCH) {
-		/* DABR match */
-		do_dabr(regs, address, error_code);
-		return 0;
-	}
-#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
-
 	if (in_atomic() || mm == NULL) {
 		if (!user_mode(regs))
 			return SIGSEGV;
Index: linux-2.6.ppc64_test/arch/powerpc/include/asm/processor.h
===================================================================
--- linux-2.6.ppc64_test.orig/arch/powerpc/include/asm/processor.h	2010-02-03 07:37:59.000000000 +0530
+++ linux-2.6.ppc64_test/arch/powerpc/include/asm/processor.h	2010-02-15 02:10:43.000000000 +0530
@@ -177,6 +177,12 @@ 
 #ifdef CONFIG_PPC64
 	unsigned long	start_tb;	/* Start purr when proc switched in */
 	unsigned long	accum_tb;	/* Total accumilated purr for process */
+	struct perf_event *ptrace_bps[HBP_NUM];
+	/*
+	 * Point to the hw-breakpoint last. Helps safe pre-emption and
+	 * hw-breakpoint re-enablement.
+	 */
+	struct perf_event *last_hit_ubp;
 #endif
 	unsigned long	dabr;		/* Data address breakpoint register */
 #ifdef CONFIG_ALTIVEC
Index: linux-2.6.ppc64_test/arch/powerpc/kernel/ptrace.c
===================================================================
--- linux-2.6.ppc64_test.orig/arch/powerpc/kernel/ptrace.c	2010-02-03 07:37:59.000000000 +0530
+++ linux-2.6.ppc64_test/arch/powerpc/kernel/ptrace.c	2010-02-15 02:11:24.000000000 +0530
@@ -32,6 +32,8 @@ 
 #ifdef CONFIG_PPC32
 #include <linux/module.h>
 #endif
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -755,9 +757,32 @@ 
 	clear_tsk_thread_flag(task, TIF_SINGLESTEP);
 }
 
+void ptrace_triggered(struct perf_event *bp, int nmi,
+		      struct perf_sample_data *data, struct pt_regs *regs)
+{
+	struct perf_event_attr attr;
+
+	/*
+	 * Disable the breakpoint request here since ptrace has defined a
+	 * one-shot behaviour for breakpoint exceptions in PPC64.
+	 * The SIGTRAP signal is generated automatically for us in do_dabr().
+	 * We don't have to do anything about that here
+	 */
+	attr = bp->attr;
+	attr.disabled = true;
+	modify_user_hw_breakpoint(bp, &attr);
+}
+
 int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 			       unsigned long data)
 {
+#ifdef CONFIG_PPC64
+	int ret;
+	struct thread_struct *thread = &(task->thread);
+	struct perf_event *bp;
+	struct perf_event_attr attr;
+#endif /* CONFIG_PPC64 */
+
 	/* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
 	 *  For embedded processors we support one DAC and no IAC's at the
 	 *  moment.
@@ -786,6 +811,60 @@ 
 	/* Ensure breakpoint translation bit is set */
 	if (data && !(data & DABR_TRANSLATION))
 		return -EIO;
+#ifdef CONFIG_PPC64
+	bp = thread->ptrace_bps[0];
+	if (data == 0) {
+		if (bp) {
+			unregister_hw_breakpoint(bp);
+			thread->ptrace_bps[0] = NULL;
+		}
+		return 0;
+	}
+	if (bp) {
+		attr = bp->attr;
+		attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN;
+
+		switch (data & (DABR_DATA_WRITE | DABR_DATA_READ)) {
+		case DABR_DATA_READ:
+			attr.bp_type = HW_BREAKPOINT_R;
+			break;
+		case DABR_DATA_WRITE:
+			attr.bp_type = HW_BREAKPOINT_W;
+			break;
+		case (DABR_DATA_WRITE | DABR_DATA_READ):
+			attr.bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
+			break;
+		}
+		ret =  modify_user_hw_breakpoint(bp, &attr);
+		if (ret)
+			return ret;
+		thread->ptrace_bps[0] = bp;
+		thread->dabr = data;
+		return 0;
+	}
+
+	/* Create a new breakpoint request if one doesn't exist already */
+	hw_breakpoint_init(&attr);
+	attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN;
+	switch (data & (DABR_DATA_WRITE | DABR_DATA_READ)) {
+	case DABR_DATA_READ:
+		attr.bp_type = HW_BREAKPOINT_R;
+		break;
+	case DABR_DATA_WRITE:
+		attr.bp_type = HW_BREAKPOINT_W;
+		break;
+	case (DABR_DATA_WRITE | DABR_DATA_READ):
+		attr.bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
+		break;
+	}
+	thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
+							ptrace_triggered, task);
+	if (IS_ERR(bp)) {
+		thread->ptrace_bps[0] = NULL;
+		return PTR_ERR(bp);
+	}
+
+#endif /* CONFIG_PPC64 */
 
 	/* Move contents to the DABR register */
 	task->thread.dabr = data;
Index: linux-2.6.ppc64_test/arch/powerpc/kernel/process.c
===================================================================
--- linux-2.6.ppc64_test.orig/arch/powerpc/kernel/process.c	2010-02-03 07:37:59.000000000 +0530
+++ linux-2.6.ppc64_test/arch/powerpc/kernel/process.c	2010-02-15 02:10:43.000000000 +0530
@@ -48,6 +48,7 @@ 
 #include <asm/machdep.h>
 #include <asm/time.h>
 #include <asm/syscalls.h>
+#include <asm/hw_breakpoint.h>
 #ifdef CONFIG_PPC64
 #include <asm/firmware.h>
 #endif
@@ -376,8 +377,11 @@ 
 	if (new->thread.dabr)
 		set_dabr(new->thread.dabr);
 #else
+/* For PPC64, we use the hw-breakpoint interfaces that would schedule DABR */
+#ifndef CONFIG_PPC64
 	if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr))
 		set_dabr(new->thread.dabr);
+#endif /* CONFIG_PPC64 */
 #endif
 
 
@@ -396,6 +400,7 @@ 
 		old_thread->accum_tb += (current_tb - start_tb);
 		new_thread->start_tb = current_tb;
 	}
+	flush_ptrace_hw_breakpoint(current);
 #endif
 
 	local_irq_save(flags);