diff mbox

[2/6] Introduce PPC64 specific Hardware Breakpoint interfaces

Message ID 20090603163511.GC5197@in.ibm.com (mailing list archive)
State Superseded, archived
Delegated to: David Gibson
Headers show

Commit Message

K.Prasad June 3, 2009, 4:35 p.m. UTC
Introduce PPC64 implementation for the generic hardware breakpoint interfaces
defined in kernel/hw_breakpoint.c. Enable the HAVE_HW_BREAKPOINT flag and the
Makefile.

Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig                |    1 
 arch/powerpc/kernel/Makefile        |    2 
 arch/powerpc/kernel/hw_breakpoint.c |  290 ++++++++++++++++++++++++++++++++++++
 3 files changed, 292 insertions(+), 1 deletion(-)

Comments

David Gibson June 5, 2009, 5:11 a.m. UTC | #1
On Wed, Jun 03, 2009 at 10:05:11PM +0530, K.Prasad wrote:
> Introduce PPC64 implementation for the generic hardware breakpoint interfaces
> defined in kernel/hw_breakpoint.c. Enable the HAVE_HW_BREAKPOINT flag and the
> Makefile.


[snip]
> +/*
> + * Install the debug register values for just the kernel, no thread.

This comment does seem to quite match the function below.

> + */
> +void arch_uninstall_thread_hw_breakpoint()
> +{
> +	set_dabr(0);
> +}
> +
> +/*
> + * Store a breakpoint's encoded address, length, and type.
> + */
> +int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk)
> +{
> +	/*
> +	 * User-space requests will always have the address field populated
> +	* Symbol names from user-space are rejected
> +	*/
> +	if (tsk && bp->info.name)
> +		return -EINVAL;
> +	/*
> +	 * User-space requests will always have the address field populated
> +	 * For kernel-addresses, either the address or symbol name can be
> +	 * specified.
> +	 */
> +	if (bp->info.name)
> +		bp->info.address = (unsigned long)
> +					kallsyms_lookup_name(bp->info.name);

Archs don't have to implement this name lookup stuff, but it looks
like most of them would - so it looks like there ought to be a helper
function in generic code that will do the check / name lookup stuff.


> +	if (bp->info.address)
> +		return 0;

Hrm.. you realise there's no theoretical reason a userspace program
couldn't put a breakpoint at address 0...?

> +	return -EINVAL;
> +}
> +
> +/*
> + * Validate the arch-specific HW Breakpoint register settings
> + */
> +int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
> +						struct task_struct *tsk)
> +{
> +	int is_kernel, ret = -EINVAL;
> +
> +	if (!bp)
> +		return ret;
> +
> +	switch (bp->info.type) {
> +	case HW_BREAKPOINT_READ:
> +	case HW_BREAKPOINT_WRITE:
> +	case HW_BREAKPOINT_RW:
> +		break;
> +	default:
> +		return ret;
> +	}
> +
> +	if (bp->triggered)
> +		ret = arch_store_info(bp, tsk);
> +
> +	is_kernel = is_kernel_addr(bp->info.address);
> +	if ((tsk && is_kernel) || (!tsk && !is_kernel))
> +		return -EINVAL;
> +
> +	return ret;
> +}
> +
> +void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk)
> +{
> +	struct thread_struct *thread = &(tsk->thread);
> +	struct hw_breakpoint *bp = thread->hbp[0];
> +
> +	if (bp)
> +		thread->dabr = (bp->info.address & ~HW_BREAKPOINT_ALIGN) |
> +				bp->info.type | DABR_TRANSLATION;
> +	else
> +		thread->dabr = 0;
> +}
> +
> +void arch_flush_thread_hw_breakpoint(struct task_struct *tsk)
> +{
> +	struct thread_struct *thread = &(tsk->thread);
> +
> +	thread->dabr = 0;
> +}
> +
> +/*
> + * Handle debug exception notifications.
> + */
> +int __kprobes hw_breakpoint_handler(struct die_args *args)
> +{
> +	int rc = NOTIFY_STOP;
> +	struct hw_breakpoint *bp;
> +	struct pt_regs *regs = args->regs;
> +	unsigned long dar = regs->dar;
> +	int cpu, is_one_shot, stepped = 1;
> +
> +	/* Disable breakpoints during exception handling */
> +	set_dabr(0);
> +
> +	cpu = get_cpu();
> +	/* Determine whether kernel- or user-space address is the trigger */
> +	bp = (hbp_kernel_pos == HBP_NUM) ? current->thread.hbp[0] :
> +					per_cpu(this_hbp_kernel[0], cpu);
> +	/*
> +	 * bp can be NULL due to lazy debug register switching
> +	 * or due to the delay between updates of hbp_kernel_pos
> +	 * and this_hbp_kernel.
> +	 */
> +	if (!bp)
> +		goto out;
> +
> +	if (dar == bp->info.address)
> +		per_cpu(dabr_data, cpu) = (hbp_kernel_pos == HBP_NUM) ?
> +						current->thread.dabr : kdabr;
> +	else {
> +		/*
> +		 * This exception is triggered not because of a memory access on
> +		 * the monitored variable but in the double-word address range
> +		 * in which it is contained. We will consume this exception,
> +		 * considering it as 'noise'.
> +		 */
> +		rc = NOTIFY_STOP;
> +		goto out;
> +	}
> +	is_one_shot = (bp->triggered == ptrace_triggered) ? 1 : 0;

Ouch, explicitly special-casing ptrace_triggered is pretty nasty.
Since the bp_info is already arch specific, maybe it should include a
flag to indicate whether the breakpoint is one-shot or not.

> +	(bp->triggered)(bp, regs);
> +	/*
> +	 * Ptrace expects the HW Breakpoints to be one-shot. We will return
> +	 * NOTIFY_DONE without restoring DABR with the breakpoint address. The
> +	 * downstream code will generate SIGTRAP to the process
> +	 */
> +	if (is_one_shot) {
> +		rc = NOTIFY_DONE;
> +		goto out;

Don't you need to clear dabr_data?  Otherwise if we enter single step
for some other reason (e.g. gdb turns it on), won't we incorrectly hit
the code-path to step over a dabr breakpoint?

> +	}
> +
> +	stepped = emulate_step(regs, regs->nip);
> +	/*
> +	 * Single-step the causative instruction manually if
> +	 * emulate_step() could not execute it
> +	 */
> +	if (stepped == 0) {
> +		regs->msr |= MSR_SE;
> +		goto out;
> +	}
> +	set_dabr(per_cpu(dabr_data, cpu));
> +	per_cpu(dabr_data, cpu) = 0;
> +
> +out:
> +	/* Enable pre-emption only if single-stepping is finished */
> +	if (stepped)
> +		put_cpu_no_resched();
> +	return rc;
> +}
> +
> +/*
> + * Handle single-step exceptions following a DABR hit.
> + */
> +int __kprobes single_step_dabr_instruction(struct die_args *args)
> +{
> +	struct pt_regs *regs = args->regs;
> +	int cpu = get_cpu();
> +	int ret = NOTIFY_DONE;
> +	siginfo_t info;
> +	unsigned long this_dabr_data = per_cpu(dabr_data, cpu);
> +
> +	/*
> +	 * Check if we are single-stepping as a result of a
> +	 * previous HW Breakpoint exception
> +	 */
> +	if (this_dabr_data == 0)
> +		goto out;
> +
> +	regs->msr &= ~MSR_SE;
> +	/* Deliver signal to user-space */
> +	if (this_dabr_data < TASK_SIZE) {
> +		info.si_signo = SIGTRAP;
> +		info.si_errno = 0;
> +		info.si_code = TRAP_HWBKPT;
> +		info.si_addr = (void __user *)(per_cpu(dabr_data, cpu));
> +		force_sig_info(SIGTRAP, &info, current);

Uh.. I recall mentioning in my previous review that in order to match
previous behaviour we need to deliver the userspace signal *before*
stepping over the breakpointed instruction, rather than after (which
I guess is why breakpoints are one-shot in the old scheme).

> +	}
> +
> +	set_dabr(this_dabr_data);
> +	per_cpu(dabr_data, cpu) = 0;
> +	ret = NOTIFY_STOP;
> +	/*
> +	 * If single-stepped after hw_breakpoint_handler(), pre-emption is
> +	 * already disabled.
> +	 */
> +	put_cpu_no_resched();
> +
> +out:
> +	/*
> +	 * A put_cpu_no_resched() call is required to complement the get_cpu()
> +	 * call used initially
> +	 */
> +	put_cpu_no_resched();
> +	return ret;
> +}
> +
> +/*
> + * Handle debug exception notifications.
> + */
> +int __kprobes hw_breakpoint_exceptions_notify(
> +		struct notifier_block *unused, unsigned long val, void *data)
> +{
> +	int ret = NOTIFY_DONE;
> +
> +	switch (val) {
> +	case DIE_DABR_MATCH:
> +		ret = hw_breakpoint_handler(data);
> +		break;
> +	case DIE_SSTEP:
> +		ret = single_step_dabr_instruction(data);
> +		break;
> +	}
> +
> +	return ret;
> +}
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-dev
>
K.Prasad June 10, 2009, 6:43 a.m. UTC | #2
On Fri, Jun 05, 2009 at 03:11:58PM +1000, David Gibson wrote:
> On Wed, Jun 03, 2009 at 10:05:11PM +0530, K.Prasad wrote:

Hi David,
	Sorry for the delay in response below. In the meanwhile, I
discovered an issue in detecting stray exceptions that affected
user-space handling of breakpoints. I've made some changes to correct
that behaviour which will be included in version VI of the patchset.

> > Introduce PPC64 implementation for the generic hardware breakpoint interfaces
> > defined in kernel/hw_breakpoint.c. Enable the HAVE_HW_BREAKPOINT flag and the
> > Makefile.
> 
> 
> [snip]
> > +/*
> > + * Install the debug register values for just the kernel, no thread.
> 
> This comment does seem to quite match the function below.
> 

Thanks for pointing out. Will change it to read thus:
/*
 * Clear the DABR which contains the thread-specific breakpoint address
 */

> > + */
> > +void arch_uninstall_thread_hw_breakpoint()
> > +{
> > +	set_dabr(0);
> > +}
> > +
> > +/*
> > + * Store a breakpoint's encoded address, length, and type.
> > + */
> > +int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk)
> > +{
> > +	/*
> > +	 * User-space requests will always have the address field populated
> > +	* Symbol names from user-space are rejected
> > +	*/
> > +	if (tsk && bp->info.name)
> > +		return -EINVAL;
> > +	/*
> > +	 * User-space requests will always have the address field populated
> > +	 * For kernel-addresses, either the address or symbol name can be
> > +	 * specified.
> > +	 */
> > +	if (bp->info.name)
> > +		bp->info.address = (unsigned long)
> > +					kallsyms_lookup_name(bp->info.name);
> 
> Archs don't have to implement this name lookup stuff, but it looks
> like most of them would - so it looks like there ought to be a helper
> function in generic code that will do the check / name lookup stuff.
> 
>

It doesn't turn out to be very generic. The IO breakpoints in x86, the
address-range (only) breakpoints in S390 and perhaps 4xx powerpc
processors were what made me think that this should remain in
arch-specific code. In these cases, we might have to deal only with
breakpoint addresses and not names.

> > +	if (bp->info.address)
> > +		return 0;
> 
> Hrm.. you realise there's no theoretical reason a userspace program
> couldn't put a breakpoint at address 0...?
> 

I agree. I think there must be parts of code that works based on this
assumption. Will check and remove them.

> > +	return -EINVAL;
> > +}
> > +
> > +/*
> > + * Validate the arch-specific HW Breakpoint register settings
> > + */
> > +int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
> > +						struct task_struct *tsk)
> > +{
> > +	int is_kernel, ret = -EINVAL;
> > +
> > +	if (!bp)
> > +		return ret;
> > +
> > +	switch (bp->info.type) {
> > +	case HW_BREAKPOINT_READ:
> > +	case HW_BREAKPOINT_WRITE:
> > +	case HW_BREAKPOINT_RW:
> > +		break;
> > +	default:
> > +		return ret;
> > +	}
> > +
> > +	if (bp->triggered)
> > +		ret = arch_store_info(bp, tsk);
> > +
> > +	is_kernel = is_kernel_addr(bp->info.address);
> > +	if ((tsk && is_kernel) || (!tsk && !is_kernel))
> > +		return -EINVAL;
> > +
> > +	return ret;
> > +}
> > +
> > +void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk)
> > +{
> > +	struct thread_struct *thread = &(tsk->thread);
> > +	struct hw_breakpoint *bp = thread->hbp[0];
> > +
> > +	if (bp)
> > +		thread->dabr = (bp->info.address & ~HW_BREAKPOINT_ALIGN) |
> > +				bp->info.type | DABR_TRANSLATION;
> > +	else
> > +		thread->dabr = 0;
> > +}
> > +
> > +void arch_flush_thread_hw_breakpoint(struct task_struct *tsk)
> > +{
> > +	struct thread_struct *thread = &(tsk->thread);
> > +
> > +	thread->dabr = 0;
> > +}
> > +
> > +/*
> > + * Handle debug exception notifications.
> > + */
> > +int __kprobes hw_breakpoint_handler(struct die_args *args)
> > +{
> > +	int rc = NOTIFY_STOP;
> > +	struct hw_breakpoint *bp;
> > +	struct pt_regs *regs = args->regs;
> > +	unsigned long dar = regs->dar;
> > +	int cpu, is_one_shot, stepped = 1;
> > +
> > +	/* Disable breakpoints during exception handling */
> > +	set_dabr(0);
> > +
> > +	cpu = get_cpu();
> > +	/* Determine whether kernel- or user-space address is the trigger */
> > +	bp = (hbp_kernel_pos == HBP_NUM) ? current->thread.hbp[0] :
> > +					per_cpu(this_hbp_kernel[0], cpu);
> > +	/*
> > +	 * bp can be NULL due to lazy debug register switching
> > +	 * or due to the delay between updates of hbp_kernel_pos
> > +	 * and this_hbp_kernel.
> > +	 */
> > +	if (!bp)
> > +		goto out;
> > +
> > +	if (dar == bp->info.address)
> > +		per_cpu(dabr_data, cpu) = (hbp_kernel_pos == HBP_NUM) ?
> > +						current->thread.dabr : kdabr;
> > +	else {
> > +		/*
> > +		 * This exception is triggered not because of a memory access on
> > +		 * the monitored variable but in the double-word address range
> > +		 * in which it is contained. We will consume this exception,
> > +		 * considering it as 'noise'.
> > +		 */
> > +		rc = NOTIFY_STOP;
> > +		goto out;
> > +	}
> > +	is_one_shot = (bp->triggered == ptrace_triggered) ? 1 : 0;
> 
> Ouch, explicitly special-casing ptrace_triggered is pretty nasty.
> Since the bp_info is already arch specific, maybe it should include a
> flag to indicate whether the breakpoint is one-shot or not.
> 

The reason to check for ptrace_triggered is to contain the one-shot
behaviour only to ptrace (thus retaining the semantics) and not to extend
them to all user-space requests through register_user_hw_breakpoint().

A one-shot behaviour for all user-space requests would create more work
for the user-space programs (such as re-registration) and will leave open
a small window of opportunity for debug register grabbing by kernel-space
requests.

So, in effect a request through register_user_hw_breakpoint() interface
will behave as under:
- Single-step over the causative instruction that triggered the
  breakpoint exception handler.
- Deliver the SIGTRAP signal to user-space after executing the causative
  instruction.

This behaviour is in consonance with that of kernel-space requests and
those on x86 processors, and helps define a consistent behaviour across
architectures for user-space.

Let me know what you think on the same.

> > +	(bp->triggered)(bp, regs);
> > +	/*
> > +	 * Ptrace expects the HW Breakpoints to be one-shot. We will return
> > +	 * NOTIFY_DONE without restoring DABR with the breakpoint address. The
> > +	 * downstream code will generate SIGTRAP to the process
> > +	 */
> > +	if (is_one_shot) {
> > +		rc = NOTIFY_DONE;
> > +		goto out;
> 
> Don't you need to clear dabr_data?  Otherwise if we enter single step
> for some other reason (e.g. gdb turns it on), won't we incorrectly hit
> the code-path to step over a dabr breakpoint?
> 

Yes, I missed it.

> > +	}
> > +
> > +	stepped = emulate_step(regs, regs->nip);
> > +	/*
> > +	 * Single-step the causative instruction manually if
> > +	 * emulate_step() could not execute it
> > +	 */
> > +	if (stepped == 0) {
> > +		regs->msr |= MSR_SE;
> > +		goto out;
> > +	}
> > +	set_dabr(per_cpu(dabr_data, cpu));
> > +	per_cpu(dabr_data, cpu) = 0;
> > +
> > +out:
> > +	/* Enable pre-emption only if single-stepping is finished */
> > +	if (stepped)
> > +		put_cpu_no_resched();
> > +	return rc;
> > +}
> > +
> > +/*
> > + * Handle single-step exceptions following a DABR hit.
> > + */
> > +int __kprobes single_step_dabr_instruction(struct die_args *args)
> > +{
> > +	struct pt_regs *regs = args->regs;
> > +	int cpu = get_cpu();
> > +	int ret = NOTIFY_DONE;
> > +	siginfo_t info;
> > +	unsigned long this_dabr_data = per_cpu(dabr_data, cpu);
> > +
> > +	/*
> > +	 * Check if we are single-stepping as a result of a
> > +	 * previous HW Breakpoint exception
> > +	 */
> > +	if (this_dabr_data == 0)
> > +		goto out;
> > +
> > +	regs->msr &= ~MSR_SE;
> > +	/* Deliver signal to user-space */
> > +	if (this_dabr_data < TASK_SIZE) {
> > +		info.si_signo = SIGTRAP;
> > +		info.si_errno = 0;
> > +		info.si_code = TRAP_HWBKPT;
> > +		info.si_addr = (void __user *)(per_cpu(dabr_data, cpu));
> > +		force_sig_info(SIGTRAP, &info, current);
> 
> Uh.. I recall mentioning in my previous review that in order to match
> previous behaviour we need to deliver the userspace signal *before*
> stepping over the breakpointed instruction, rather than after (which
> I guess is why breakpoints are one-shot in the old scheme).
> 

This code would implement the behaviour as stated in the comment for
user-space requests above.

> > _______________________________________________
> > Linuxppc-dev mailing list
> > Linuxppc-dev@ozlabs.org
> > https://ozlabs.org/mailman/listinfo/linuxppc-dev
> > 
> 
> -- 
> David Gibson			| I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
> 				| _way_ _around_!
> http://www.ozlabs.org/~dgibson

Thanks,
K.Prasad
David Gibson June 15, 2009, 6:40 a.m. UTC | #3
On Wed, Jun 10, 2009 at 12:13:49PM +0530, K.Prasad wrote:
> On Fri, Jun 05, 2009 at 03:11:58PM +1000, David Gibson wrote:
> > On Wed, Jun 03, 2009 at 10:05:11PM +0530, K.Prasad wrote:
> 
> Hi David,
> 	Sorry for the delay in response below. In the meanwhile, I
> discovered an issue in detecting stray exceptions that affected
> user-space handling of breakpoints. I've made some changes to correct
> that behaviour which will be included in version VI of the patchset.

[snip]
> > > +	/*
> > > +	 * User-space requests will always have the address field populated
> > > +	 * For kernel-addresses, either the address or symbol name can be
> > > +	 * specified.
> > > +	 */
> > > +	if (bp->info.name)
> > > +		bp->info.address = (unsigned long)
> > > +					kallsyms_lookup_name(bp->info.name);
> > 
> > Archs don't have to implement this name lookup stuff, but it looks
> > like most of them would - so it looks like there ought to be a helper
> > function in generic code that will do the check / name lookup stuff.
> 
> It doesn't turn out to be very generic. The IO breakpoints in x86, the
> address-range (only) breakpoints in S390 and perhaps 4xx powerpc
> processors were what made me think that this should remain in
> arch-specific code. In these cases, we might have to deal only with
> breakpoint addresses and not names.

Hrm, ok.  I was suggesting a helper function that those archs which
can use it call at need, not moving the address lookup to generic code
in the sense of being used everywhere.  Whatever, it's not that
important at this stage.

> > > +	if (bp->info.address)
> > > +		return 0;
> > 
> > Hrm.. you realise there's no theoretical reason a userspace program
> > couldn't put a breakpoint at address 0...?
> 
> I agree. I think there must be parts of code that works based on this
> assumption. Will check and remove them.

Ok, good.

[snip]
> > > +	else {
> > > +		/*
> > > +		 * This exception is triggered not because of a memory access on
> > > +		 * the monitored variable but in the double-word address range
> > > +		 * in which it is contained. We will consume this exception,
> > > +		 * considering it as 'noise'.
> > > +		 */
> > > +		rc = NOTIFY_STOP;
> > > +		goto out;
> > > +	}
> > > +	is_one_shot = (bp->triggered == ptrace_triggered) ? 1 : 0;
> > 
> > Ouch, explicitly special-casing ptrace_triggered is pretty nasty.
> > Since the bp_info is already arch specific, maybe it should include a
> > flag to indicate whether the breakpoint is one-shot or not.
> > 
> 
> The reason to check for ptrace_triggered is to contain the one-shot
> behaviour only to ptrace (thus retaining the semantics) and not to extend
> them to all user-space requests through
> register_user_hw_breakpoint().

Right, but couldn't you implement that withing ptrace_triggered
itself, without a special test here, by having it cancel the
breakpoint.

> A one-shot behaviour for all user-space requests would create more work
> for the user-space programs (such as re-registration) and will leave open
> a small window of opportunity for debug register grabbing by kernel-space
> requests.
> 
> So, in effect a request through register_user_hw_breakpoint() interface
> will behave as under:
> - Single-step over the causative instruction that triggered the
>   breakpoint exception handler.
> - Deliver the SIGTRAP signal to user-space after executing the causative
>   instruction.
> 
> This behaviour is in consonance with that of kernel-space requests and
> those on x86 processors, and helps define a consistent behaviour across
> architectures for user-space.
> 
> Let me know what you think on the same.

I certainly see the value in consistent semantics across archs.
However, I can also see uses for the powerpc trap-before-execute
behaviour.  That's why I'm suggesting it might be worth having an
arch-specific flag.

[snip]
> > > +int __kprobes single_step_dabr_instruction(struct die_args *args)
> > > +{
> > > +	struct pt_regs *regs = args->regs;
> > > +	int cpu = get_cpu();
> > > +	int ret = NOTIFY_DONE;
> > > +	siginfo_t info;
> > > +	unsigned long this_dabr_data = per_cpu(dabr_data, cpu);
> > > +
> > > +	/*
> > > +	 * Check if we are single-stepping as a result of a
> > > +	 * previous HW Breakpoint exception
> > > +	 */
> > > +	if (this_dabr_data == 0)
> > > +		goto out;
> > > +
> > > +	regs->msr &= ~MSR_SE;
> > > +	/* Deliver signal to user-space */
> > > +	if (this_dabr_data < TASK_SIZE) {
> > > +		info.si_signo = SIGTRAP;
> > > +		info.si_errno = 0;
> > > +		info.si_code = TRAP_HWBKPT;
> > > +		info.si_addr = (void __user *)(per_cpu(dabr_data, cpu));
> > > +		force_sig_info(SIGTRAP, &info, current);
> > 
> > Uh.. I recall mentioning in my previous review that in order to match
> > previous behaviour we need to deliver the userspace signal *before*
> > stepping over the breakpointed instruction, rather than after (which
> > I guess is why breakpoints are one-shot in the old scheme).
> 
> This code would implement the behaviour as stated in the comment for
> user-space requests above.

And you're relying on the old trap-sending code in do_dabr for ptrace
requests?
K.Prasad June 15, 2009, 7:18 a.m. UTC | #4
On Mon, Jun 15, 2009 at 04:40:45PM +1000, David Gibson wrote:
> On Wed, Jun 10, 2009 at 12:13:49PM +0530, K.Prasad wrote:
> > On Fri, Jun 05, 2009 at 03:11:58PM +1000, David Gibson wrote:
> > > On Wed, Jun 03, 2009 at 10:05:11PM +0530, K.Prasad wrote:
> > 
> > > > +	else {
> > > > +		/*
> > > > +		 * This exception is triggered not because of a memory access on
> > > > +		 * the monitored variable but in the double-word address range
> > > > +		 * in which it is contained. We will consume this exception,
> > > > +		 * considering it as 'noise'.
> > > > +		 */
> > > > +		rc = NOTIFY_STOP;
> > > > +		goto out;
> > > > +	}
> > > > +	is_one_shot = (bp->triggered == ptrace_triggered) ? 1 : 0;
> > > 
> > > Ouch, explicitly special-casing ptrace_triggered is pretty nasty.
> > > Since the bp_info is already arch specific, maybe it should include a
> > > flag to indicate whether the breakpoint is one-shot or not.
> > > 
> > 
> > The reason to check for ptrace_triggered is to contain the one-shot
> > behaviour only to ptrace (thus retaining the semantics) and not to extend
> > them to all user-space requests through
> > register_user_hw_breakpoint().
> 
> Right, but couldn't you implement that withing ptrace_triggered
> itself, without a special test here, by having it cancel the
> breakpoint.
> 

A special check (either using the callback routine as above, or using a
special flag) will be required in hw_breakpoint_handler() to enable
early return (without single-stepping). I'm not sure if I got your
suggestion right, and let me know if you think so.

> > A one-shot behaviour for all user-space requests would create more work
> > for the user-space programs (such as re-registration) and will leave open
> > a small window of opportunity for debug register grabbing by kernel-space
> > requests.
> > 
> > So, in effect a request through register_user_hw_breakpoint() interface
> > will behave as under:
> > - Single-step over the causative instruction that triggered the
> >   breakpoint exception handler.
> > - Deliver the SIGTRAP signal to user-space after executing the causative
> >   instruction.
> > 
> > This behaviour is in consonance with that of kernel-space requests and
> > those on x86 processors, and helps define a consistent behaviour across
> > architectures for user-space.
> > 
> > Let me know what you think on the same.
> 
> I certainly see the value in consistent semantics across archs.
> However, I can also see uses for the powerpc trap-before-execute
> behaviour.  That's why I'm suggesting it might be worth having an
> arch-specific flag.
> 
> [snip]

So, you suggest that the 'one-shot' behaviour should be driven by
user-request and not just confined to ptrace? (The default behaviour for
all breakpoints-minus-ptrace will remain 'continuous' though).

It can be implemented through an additional flag in 'struct
arch_hw_breakpoint'. I can send a new version 7 of the patchset with this
change (with the hope that the version 6 of the patchset looks fine in
its present form!). Meanwhile, we'd like to know what uses you see in
addition to the present one if the one-shot behaviour is made
user-defined. Are those uses beyond what can be achieved through the
present ptrace interface?

> > > > +int __kprobes single_step_dabr_instruction(struct die_args *args)
> > > > +{
> > > > +	struct pt_regs *regs = args->regs;
> > > > +	int cpu = get_cpu();
> > > > +	int ret = NOTIFY_DONE;
> > > > +	siginfo_t info;
> > > > +	unsigned long this_dabr_data = per_cpu(dabr_data, cpu);
> > > > +
> > > > +	/*
> > > > +	 * Check if we are single-stepping as a result of a
> > > > +	 * previous HW Breakpoint exception
> > > > +	 */
> > > > +	if (this_dabr_data == 0)
> > > > +		goto out;
> > > > +
> > > > +	regs->msr &= ~MSR_SE;
> > > > +	/* Deliver signal to user-space */
> > > > +	if (this_dabr_data < TASK_SIZE) {
> > > > +		info.si_signo = SIGTRAP;
> > > > +		info.si_errno = 0;
> > > > +		info.si_code = TRAP_HWBKPT;
> > > > +		info.si_addr = (void __user *)(per_cpu(dabr_data, cpu));
> > > > +		force_sig_info(SIGTRAP, &info, current);
> > > 
> > > Uh.. I recall mentioning in my previous review that in order to match
> > > previous behaviour we need to deliver the userspace signal *before*
> > > stepping over the breakpointed instruction, rather than after (which
> > > I guess is why breakpoints are one-shot in the old scheme).
> > 
> > This code would implement the behaviour as stated in the comment for
> > user-space requests above.
> 
> And you're relying on the old trap-sending code in do_dabr for ptrace
> requests?
> 

Yes.

> -- 
> David Gibson			| I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
> 				| _way_ _around_!
> http://www.ozlabs.org/~dgibson

Thanks,
K.Prasad
David Gibson June 17, 2009, 4:45 a.m. UTC | #5
On Mon, Jun 15, 2009 at 12:48:28PM +0530, K.Prasad wrote:
> On Mon, Jun 15, 2009 at 04:40:45PM +1000, David Gibson wrote:
> > On Wed, Jun 10, 2009 at 12:13:49PM +0530, K.Prasad wrote:
> > > On Fri, Jun 05, 2009 at 03:11:58PM +1000, David Gibson wrote:
> > > > On Wed, Jun 03, 2009 at 10:05:11PM +0530, K.Prasad wrote:
> > > 
> > > > > +	else {
> > > > > +		/*
> > > > > +		 * This exception is triggered not because of a memory access on
> > > > > +		 * the monitored variable but in the double-word address range
> > > > > +		 * in which it is contained. We will consume this exception,
> > > > > +		 * considering it as 'noise'.
> > > > > +		 */
> > > > > +		rc = NOTIFY_STOP;
> > > > > +		goto out;
> > > > > +	}
> > > > > +	is_one_shot = (bp->triggered == ptrace_triggered) ? 1 : 0;
> > > > 
> > > > Ouch, explicitly special-casing ptrace_triggered is pretty nasty.
> > > > Since the bp_info is already arch specific, maybe it should include a
> > > > flag to indicate whether the breakpoint is one-shot or not.
> > > > 
> > > 
> > > The reason to check for ptrace_triggered is to contain the one-shot
> > > behaviour only to ptrace (thus retaining the semantics) and not to extend
> > > them to all user-space requests through
> > > register_user_hw_breakpoint().
> > 
> > Right, but couldn't you implement that withing ptrace_triggered
> > itself, without a special test here, by having it cancel the
> > breakpoint.
> 
> A special check (either using the callback routine as above, or using a
> special flag) will be required in hw_breakpoint_handler() to enable
> early return (without single-stepping). I'm not sure if I got your
> suggestion right, and let me know if you think so.

Well.. you could also recheck after calling triggered whether the
breakpoint is still in existence.  So cancelling the breakpoint in
triggered would.

Or you could change the signature for the triggered function, so it
returns whether to leave the breakpoint active or not.  That would
have some advantages you could easily implement either one-shot,
continuous or N-shot, or keep firing until some specific event
behaviour from within the triggered function.


But all that's a bit moot, because of the fact that you try to make
the TRAP timing consistent (before or after execution of triggering
instruction) but you don't do so for a general triggered hook.

> > > A one-shot behaviour for all user-space requests would create more work
> > > for the user-space programs (such as re-registration) and will leave open
> > > a small window of opportunity for debug register grabbing by kernel-space
> > > requests.
> > > 
> > > So, in effect a request through register_user_hw_breakpoint() interface
> > > will behave as under:
> > > - Single-step over the causative instruction that triggered the
> > >   breakpoint exception handler.
> > > - Deliver the SIGTRAP signal to user-space after executing the causative
> > >   instruction.
> > > 
> > > This behaviour is in consonance with that of kernel-space requests and
> > > those on x86 processors, and helps define a consistent behaviour across
> > > architectures for user-space.
> > > 
> > > Let me know what you think on the same.
> > 
> > I certainly see the value in consistent semantics across archs.
> > However, I can also see uses for the powerpc trap-before-execute
> > behaviour.  That's why I'm suggesting it might be worth having an
> > arch-specific flag.
> > 
> > [snip]
> 
> So, you suggest that the 'one-shot' behaviour should be driven by
> user-request and not just confined to ptrace? (The default behaviour for
> all breakpoints-minus-ptrace will remain 'continuous' though).

Not one-shot behaviour as such, but whether the trigger fires before
or after execution of the triggering instruction.  The complication is
that combining fire-before semantics with continuous firing becomes
tricky.

> It can be implemented through an additional flag in 'struct
> arch_hw_breakpoint'. I can send a new version 7 of the patchset with this
> change (with the hope that the version 6 of the patchset looks fine in
> its present form!). Meanwhile, we'd like to know what uses you see in
> addition to the present one if the one-shot behaviour is made
> user-defined. Are those uses beyond what can be achieved through the
> present ptrace interface?
> 
> > > > > +int __kprobes single_step_dabr_instruction(struct die_args *args)
> > > > > +{
> > > > > +	struct pt_regs *regs = args->regs;
> > > > > +	int cpu = get_cpu();
> > > > > +	int ret = NOTIFY_DONE;
> > > > > +	siginfo_t info;
> > > > > +	unsigned long this_dabr_data = per_cpu(dabr_data, cpu);
> > > > > +
> > > > > +	/*
> > > > > +	 * Check if we are single-stepping as a result of a
> > > > > +	 * previous HW Breakpoint exception
> > > > > +	 */
> > > > > +	if (this_dabr_data == 0)
> > > > > +		goto out;
> > > > > +
> > > > > +	regs->msr &= ~MSR_SE;
> > > > > +	/* Deliver signal to user-space */
> > > > > +	if (this_dabr_data < TASK_SIZE) {
> > > > > +		info.si_signo = SIGTRAP;
> > > > > +		info.si_errno = 0;
> > > > > +		info.si_code = TRAP_HWBKPT;
> > > > > +		info.si_addr = (void __user *)(per_cpu(dabr_data, cpu));
> > > > > +		force_sig_info(SIGTRAP, &info, current);
> > > > 
> > > > Uh.. I recall mentioning in my previous review that in order to match
> > > > previous behaviour we need to deliver the userspace signal *before*
> > > > stepping over the breakpointed instruction, rather than after (which
> > > > I guess is why breakpoints are one-shot in the old scheme).
> > > 
> > > This code would implement the behaviour as stated in the comment for
> > > user-space requests above.
> > 
> > And you're relying on the old trap-sending code in do_dabr for ptrace
> > requests?
> 
> Yes.

Yeah, since you're taking over the whole breakpoint infrastructure, I
really think you should rewrite do_dabr completely as part of your code.
diff mbox

Patch

Index: linux-2.6-tip.hbkpt/arch/powerpc/Kconfig
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/powerpc/Kconfig
+++ linux-2.6-tip.hbkpt/arch/powerpc/Kconfig
@@ -125,6 +125,7 @@  config PPC
 	select USE_GENERIC_SMP_HELPERS if SMP
 	select HAVE_OPROFILE
 	select HAVE_SYSCALL_WRAPPERS if PPC64
+	select HAVE_HW_BREAKPOINT if PPC64
 
 config EARLY_PRINTK
 	bool
Index: linux-2.6-tip.hbkpt/arch/powerpc/kernel/Makefile
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/powerpc/kernel/Makefile
+++ linux-2.6-tip.hbkpt/arch/powerpc/kernel/Makefile
@@ -33,7 +33,7 @@  obj-$(CONFIG_PPC64)		+= setup_64.o sys_p
 				   signal_64.o ptrace32.o \
 				   paca.o cpu_setup_ppc970.o \
 				   cpu_setup_pa6t.o \
-				   firmware.o nvram_64.o
+				   firmware.o nvram_64.o hw_breakpoint.o
 obj64-$(CONFIG_RELOCATABLE)	+= reloc_64.o
 obj-$(CONFIG_PPC64)		+= vdso64/
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o vector.o
Index: linux-2.6-tip.hbkpt/arch/powerpc/kernel/hw_breakpoint.c
===================================================================
--- /dev/null
+++ linux-2.6-tip.hbkpt/arch/powerpc/kernel/hw_breakpoint.c
@@ -0,0 +1,290 @@ 
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers.
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright © 2009 IBM Corporation
+ */
+
+#include <linux/notifier.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/percpu.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/hw_breakpoint.h>
+#include <asm/processor.h>
+#include <asm/sstep.h>
+
+/* Store the kernel-space breakpoint address value */
+static unsigned long kdabr;
+
+/*
+ * Temporarily stores address for DABR before it is written by the
+ * single-step handler routine
+ */
+static DEFINE_PER_CPU(unsigned long, dabr_data);
+
+void arch_update_kernel_hw_breakpoint(void *unused)
+{
+	struct hw_breakpoint *bp;
+
+	/* Check if there is nothing to update */
+	if (hbp_kernel_pos == HBP_NUM)
+		return;
+
+	per_cpu(this_hbp_kernel[hbp_kernel_pos], get_cpu()) = bp =
+						hbp_kernel[hbp_kernel_pos];
+	if (bp == NULL)
+		kdabr = 0;
+	else
+		kdabr = (bp->info.address & ~HW_BREAKPOINT_ALIGN) |
+			bp->info.type | DABR_TRANSLATION;
+	set_dabr(kdabr);
+	put_cpu_no_resched();
+}
+
+/*
+ * Install the thread breakpoints in their debug registers.
+ */
+void arch_install_thread_hw_breakpoint(struct task_struct *tsk)
+{
+	set_dabr(tsk->thread.dabr);
+}
+
+/*
+ * Install the debug register values for just the kernel, no thread.
+ */
+void arch_uninstall_thread_hw_breakpoint()
+{
+	set_dabr(0);
+}
+
+/*
+ * Store a breakpoint's encoded address, length, and type.
+ */
+int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk)
+{
+	/*
+	 * User-space requests will always have the address field populated
+	* Symbol names from user-space are rejected
+	*/
+	if (tsk && bp->info.name)
+		return -EINVAL;
+	/*
+	 * User-space requests will always have the address field populated
+	 * For kernel-addresses, either the address or symbol name can be
+	 * specified.
+	 */
+	if (bp->info.name)
+		bp->info.address = (unsigned long)
+					kallsyms_lookup_name(bp->info.name);
+	if (bp->info.address)
+		return 0;
+	return -EINVAL;
+}
+
+/*
+ * Validate the arch-specific HW Breakpoint register settings
+ */
+int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
+						struct task_struct *tsk)
+{
+	int is_kernel, ret = -EINVAL;
+
+	if (!bp)
+		return ret;
+
+	switch (bp->info.type) {
+	case HW_BREAKPOINT_READ:
+	case HW_BREAKPOINT_WRITE:
+	case HW_BREAKPOINT_RW:
+		break;
+	default:
+		return ret;
+	}
+
+	if (bp->triggered)
+		ret = arch_store_info(bp, tsk);
+
+	is_kernel = is_kernel_addr(bp->info.address);
+	if ((tsk && is_kernel) || (!tsk && !is_kernel))
+		return -EINVAL;
+
+	return ret;
+}
+
+void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk)
+{
+	struct thread_struct *thread = &(tsk->thread);
+	struct hw_breakpoint *bp = thread->hbp[0];
+
+	if (bp)
+		thread->dabr = (bp->info.address & ~HW_BREAKPOINT_ALIGN) |
+				bp->info.type | DABR_TRANSLATION;
+	else
+		thread->dabr = 0;
+}
+
+void arch_flush_thread_hw_breakpoint(struct task_struct *tsk)
+{
+	struct thread_struct *thread = &(tsk->thread);
+
+	thread->dabr = 0;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_handler(struct die_args *args)
+{
+	int rc = NOTIFY_STOP;
+	struct hw_breakpoint *bp;
+	struct pt_regs *regs = args->regs;
+	unsigned long dar = regs->dar;
+	int cpu, is_one_shot, stepped = 1;
+
+	/* Disable breakpoints during exception handling */
+	set_dabr(0);
+
+	cpu = get_cpu();
+	/* Determine whether kernel- or user-space address is the trigger */
+	bp = (hbp_kernel_pos == HBP_NUM) ? current->thread.hbp[0] :
+					per_cpu(this_hbp_kernel[0], cpu);
+	/*
+	 * bp can be NULL due to lazy debug register switching
+	 * or due to the delay between updates of hbp_kernel_pos
+	 * and this_hbp_kernel.
+	 */
+	if (!bp)
+		goto out;
+
+	if (dar == bp->info.address)
+		per_cpu(dabr_data, cpu) = (hbp_kernel_pos == HBP_NUM) ?
+						current->thread.dabr : kdabr;
+	else {
+		/*
+		 * This exception is triggered not because of a memory access on
+		 * the monitored variable but in the double-word address range
+		 * in which it is contained. We will consume this exception,
+		 * considering it as 'noise'.
+		 */
+		rc = NOTIFY_STOP;
+		goto out;
+	}
+	is_one_shot = (bp->triggered == ptrace_triggered) ? 1 : 0;
+
+	(bp->triggered)(bp, regs);
+	/*
+	 * Ptrace expects the HW Breakpoints to be one-shot. We will return
+	 * NOTIFY_DONE without restoring DABR with the breakpoint address. The
+	 * downstream code will generate SIGTRAP to the process
+	 */
+	if (is_one_shot) {
+		rc = NOTIFY_DONE;
+		goto out;
+	}
+
+	stepped = emulate_step(regs, regs->nip);
+	/*
+	 * Single-step the causative instruction manually if
+	 * emulate_step() could not execute it
+	 */
+	if (stepped == 0) {
+		regs->msr |= MSR_SE;
+		goto out;
+	}
+	set_dabr(per_cpu(dabr_data, cpu));
+	per_cpu(dabr_data, cpu) = 0;
+
+out:
+	/* Enable pre-emption only if single-stepping is finished */
+	if (stepped)
+		put_cpu_no_resched();
+	return rc;
+}
+
+/*
+ * Handle single-step exceptions following a DABR hit.
+ */
+int __kprobes single_step_dabr_instruction(struct die_args *args)
+{
+	struct pt_regs *regs = args->regs;
+	int cpu = get_cpu();
+	int ret = NOTIFY_DONE;
+	siginfo_t info;
+	unsigned long this_dabr_data = per_cpu(dabr_data, cpu);
+
+	/*
+	 * Check if we are single-stepping as a result of a
+	 * previous HW Breakpoint exception
+	 */
+	if (this_dabr_data == 0)
+		goto out;
+
+	regs->msr &= ~MSR_SE;
+	/* Deliver signal to user-space */
+	if (this_dabr_data < TASK_SIZE) {
+		info.si_signo = SIGTRAP;
+		info.si_errno = 0;
+		info.si_code = TRAP_HWBKPT;
+		info.si_addr = (void __user *)(per_cpu(dabr_data, cpu));
+		force_sig_info(SIGTRAP, &info, current);
+	}
+
+	set_dabr(this_dabr_data);
+	per_cpu(dabr_data, cpu) = 0;
+	ret = NOTIFY_STOP;
+	/*
+	 * If single-stepped after hw_breakpoint_handler(), pre-emption is
+	 * already disabled.
+	 */
+	put_cpu_no_resched();
+
+out:
+	/*
+	 * A put_cpu_no_resched() call is required to complement the get_cpu()
+	 * call used initially
+	 */
+	put_cpu_no_resched();
+	return ret;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_exceptions_notify(
+		struct notifier_block *unused, unsigned long val, void *data)
+{
+	int ret = NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_DABR_MATCH:
+		ret = hw_breakpoint_handler(data);
+		break;
+	case DIE_SSTEP:
+		ret = single_step_dabr_instruction(data);
+		break;
+	}
+
+	return ret;
+}