| Submitter | Luis Henriques |
|---|---|
| Date | Feb. 19, 2013, 2:18 p.m. |
| Message ID | <1361283503-26362-3-git-send-email-luis.henriques@canonical.com> |
| Download | mbox | patch |
| Permalink | /patch/221708/ |
| State | New |
| Headers | show |
Comments
On 19/02/13 14:18, Luis Henriques wrote: > From: Oleg Nesterov <oleg@redhat.com> > > CVE-2013-0871 > > putreg() assumes that the tracee is not running and pt_regs_access() can > safely play with its stack. However a killed tracee can return from > ptrace_stop() to the low-level asm code and do RESTORE_REST, this means > that debugger can actually read/modify the kernel stack until the tracee > does SAVE_REST again. > > set_task_blockstep() can race with SIGKILL too and in some sense this > race is even worse, the very fact the tracee can be woken up breaks the > logic. > > As Linus suggested we can clear TASK_WAKEKILL around the arch_ptrace() > call, this ensures that nobody can ever wakeup the tracee while the > debugger looks at it. Not only this fixes the mentioned problems, we > can do some cleanups/simplifications in arch_ptrace() paths. > > Probably ptrace_unfreeze_traced() needs more callers, for example it > makes sense to make the tracee killable for oom-killer before > access_process_vm(). > > While at it, add the comment into may_ptrace_stop() to explain why > ptrace_stop() still can't rely on SIGKILL and signal_pending_state(). > > Reported-by: Salman Qazi <sqazi@google.com> > Reported-by: Suleiman Souhlal <suleiman@google.com> > Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> > Signed-off-by: Oleg Nesterov <oleg@redhat.com> > Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> > (backported from commit 9899d11f654474d2d54ea52ceaa2a1f4db3abd68) > > Conflicts: > arch/x86/kernel/step.c > kernel/ptrace.c > > Signed-off-by: Luis Henriques <luis.henriques@canonical.com> > --- > kernel/ptrace.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++----------- > kernel/signal.c | 5 +++++ > 2 files changed, 55 insertions(+), 12 deletions(-) > > diff --git a/kernel/ptrace.c b/kernel/ptrace.c > index d77fa9e..0f5157c 100644 > --- a/kernel/ptrace.c > +++ b/kernel/ptrace.c > @@ -97,9 +97,36 @@ void __ptrace_unlink(struct task_struct *child) > spin_unlock(&child->sighand->siglock); > } > > -/* > - * Check that we have indeed attached to the thing.. > - */ > +/* Ensure that nothing can wake it up, even SIGKILL */ > +static bool ptrace_freeze_traced(struct task_struct *task) > +{ > + bool ret = false; > + > + spin_lock_irq(&task->sighand->siglock); > + if (task_is_traced(task) && !__fatal_signal_pending(task)) { > + task->state = __TASK_TRACED; > + ret = true; > + } > + spin_unlock_irq(&task->sighand->siglock); > + > + return ret; > +} > + > +static void ptrace_unfreeze_traced(struct task_struct *task) > +{ > + if (task->state != __TASK_TRACED) > + return; > + > + WARN_ON(!task->ptrace || task->parent != current); > + > + spin_lock_irq(&task->sighand->siglock); > + if (__fatal_signal_pending(task)) > + wake_up_state(task, __TASK_TRACED); > + else > + task->state = TASK_TRACED; > + spin_unlock_irq(&task->sighand->siglock); > +} > + > int ptrace_check_attach(struct task_struct *child, int kill) > { > int ret = -ESRCH; > @@ -112,23 +139,29 @@ int ptrace_check_attach(struct task_struct *child, int kill) > * be changed by us so it's not changing right after this. > */ > read_lock(&tasklist_lock); > - if ((child->ptrace & PT_PTRACED) && child->parent == current) { > + if (child->ptrace && child->parent == current) { > + WARN_ON(child->state == __TASK_TRACED); > /* > * child->sighand can't be NULL, release_task() > * does ptrace_unlink() before __exit_signal(). > */ > - spin_lock_irq(&child->sighand->siglock); > - WARN_ON_ONCE(task_is_stopped(child)); > - if (task_is_traced(child) || kill) > + if (kill || ptrace_freeze_traced(child)) > ret = 0; > - spin_unlock_irq(&child->sighand->siglock); > } > read_unlock(&tasklist_lock); > > - if (!ret && !kill) > - ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH; > + if (!ret && !kill) { > + if (!wait_task_inactive(child, __TASK_TRACED)) { > + /* > + * This can only happen if may_ptrace_stop() fails and > + * ptrace_stop() changes ->state back to TASK_RUNNING, > + * so we should not worry about leaking __TASK_TRACED. > + */ > + WARN_ON(child->state == __TASK_TRACED); > + ret = -ESRCH; > + } > + } > > - /* All systems go.. */ > return ret; > } > > @@ -777,6 +810,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, > goto out_put_task_struct; > > ret = arch_ptrace(child, request, addr, data); > + if (ret || request != PTRACE_DETACH) > + ptrace_unfreeze_traced(child); > > out_put_task_struct: > put_task_struct(child); > @@ -915,8 +950,11 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, > } > > ret = ptrace_check_attach(child, request == PTRACE_KILL); > - if (!ret) > + if (!ret) { > ret = compat_arch_ptrace(child, request, addr, data); > + if (ret || request != PTRACE_DETACH) > + ptrace_unfreeze_traced(child); > + } > > out_put_task_struct: > put_task_struct(child); > diff --git a/kernel/signal.c b/kernel/signal.c > index 8b0dd5b..51f2e69 100644 > --- a/kernel/signal.c > +++ b/kernel/signal.c > @@ -1669,6 +1669,10 @@ static inline int may_ptrace_stop(void) > * If SIGKILL was already sent before the caller unlocked > * ->siglock we must see ->core_state != NULL. Otherwise it > * is safe to enter schedule(). > + * > + * This is almost outdated, a task with the pending SIGKILL can't > + * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported > + * after SIGKILL was already dequeued. > */ > if (unlikely(current->mm->core_state) && > unlikely(current->mm == current->parent->mm)) > @@ -1800,6 +1804,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) > if (gstop_done) > do_notify_parent_cldstop(current, false, why); > > + /* tasklist protects us from ptrace_freeze_traced() */ > __set_current_state(TASK_RUNNING); > if (clear_code) > current->exit_code = 0; > Acked-by: Colin Ian King <colin.king@canonical.com>
Patch
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index d77fa9e..0f5157c 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -97,9 +97,36 @@ void __ptrace_unlink(struct task_struct *child) spin_unlock(&child->sighand->siglock); } -/* - * Check that we have indeed attached to the thing.. - */ +/* Ensure that nothing can wake it up, even SIGKILL */ +static bool ptrace_freeze_traced(struct task_struct *task) +{ + bool ret = false; + + spin_lock_irq(&task->sighand->siglock); + if (task_is_traced(task) && !__fatal_signal_pending(task)) { + task->state = __TASK_TRACED; + ret = true; + } + spin_unlock_irq(&task->sighand->siglock); + + return ret; +} + +static void ptrace_unfreeze_traced(struct task_struct *task) +{ + if (task->state != __TASK_TRACED) + return; + + WARN_ON(!task->ptrace || task->parent != current); + + spin_lock_irq(&task->sighand->siglock); + if (__fatal_signal_pending(task)) + wake_up_state(task, __TASK_TRACED); + else + task->state = TASK_TRACED; + spin_unlock_irq(&task->sighand->siglock); +} + int ptrace_check_attach(struct task_struct *child, int kill) { int ret = -ESRCH; @@ -112,23 +139,29 @@ int ptrace_check_attach(struct task_struct *child, int kill) * be changed by us so it's not changing right after this. */ read_lock(&tasklist_lock); - if ((child->ptrace & PT_PTRACED) && child->parent == current) { + if (child->ptrace && child->parent == current) { + WARN_ON(child->state == __TASK_TRACED); /* * child->sighand can't be NULL, release_task() * does ptrace_unlink() before __exit_signal(). */ - spin_lock_irq(&child->sighand->siglock); - WARN_ON_ONCE(task_is_stopped(child)); - if (task_is_traced(child) || kill) + if (kill || ptrace_freeze_traced(child)) ret = 0; - spin_unlock_irq(&child->sighand->siglock); } read_unlock(&tasklist_lock); - if (!ret && !kill) - ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH; + if (!ret && !kill) { + if (!wait_task_inactive(child, __TASK_TRACED)) { + /* + * This can only happen if may_ptrace_stop() fails and + * ptrace_stop() changes ->state back to TASK_RUNNING, + * so we should not worry about leaking __TASK_TRACED. + */ + WARN_ON(child->state == __TASK_TRACED); + ret = -ESRCH; + } + } - /* All systems go.. */ return ret; } @@ -777,6 +810,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, goto out_put_task_struct; ret = arch_ptrace(child, request, addr, data); + if (ret || request != PTRACE_DETACH) + ptrace_unfreeze_traced(child); out_put_task_struct: put_task_struct(child); @@ -915,8 +950,11 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, } ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (!ret) + if (!ret) { ret = compat_arch_ptrace(child, request, addr, data); + if (ret || request != PTRACE_DETACH) + ptrace_unfreeze_traced(child); + } out_put_task_struct: put_task_struct(child); diff --git a/kernel/signal.c b/kernel/signal.c index 8b0dd5b..51f2e69 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1669,6 +1669,10 @@ static inline int may_ptrace_stop(void) * If SIGKILL was already sent before the caller unlocked * ->siglock we must see ->core_state != NULL. Otherwise it * is safe to enter schedule(). + * + * This is almost outdated, a task with the pending SIGKILL can't + * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported + * after SIGKILL was already dequeued. */ if (unlikely(current->mm->core_state) && unlikely(current->mm == current->parent->mm)) @@ -1800,6 +1804,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) if (gstop_done) do_notify_parent_cldstop(current, false, why); + /* tasklist protects us from ptrace_freeze_traced() */ __set_current_state(TASK_RUNNING); if (clear_code) current->exit_code = 0;