diff mbox

[RFC,v2] Unprivileged: Disable raising of privileges

Message ID m1d41w62zc.fsf_-_@fess.ebiederm.org
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Eric W. Biederman Dec. 30, 2009, 12:49 p.m. UTC
If we can know that a process will never raise
it's priveleges we can enable a lot of features
that otherwise would be unsafe, because they
could break assumptions of existing suid executables.

To allow this to be used as a sand boxing feature
also disable ptracing other executables without
this new restriction.

For the moment I have used a per thread flag because
we are out of per process flags.

To ensure all descendants get this flag I rely on
the default copying of procss structures.

Added bprm->nosuid to make remove the need to add
duplicate error prone checks.  This ensures that
the disabling of suid executables is exactly the
same as MNT_NOSUID.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 arch/x86/include/asm/thread_info.h |    2 ++
 fs/exec.c                          |    6 ++++--
 include/linux/binfmts.h            |    1 +
 include/linux/prctl.h              |    2 ++
 kernel/ptrace.c                    |    4 ++++
 kernel/sys.c                       |   16 ++++++++++++++++
 security/commoncap.c               |   14 +++++++++++++-
 security/selinux/hooks.c           |    2 +-
 8 files changed, 43 insertions(+), 4 deletions(-)

Comments

Andrew G. Morgan Dec. 30, 2009, 2:52 p.m. UTC | #1
Eric,

I'm not clear why capabilities need to be manipulated by this feature
(the pure capability support already has a feature for disabling
privilege and blocking unsafe, or insufficient privilege, execution).

Perhaps I'm just unclear what features can be more safely enabled with
this in effect - that is, your description suggests that this is why
you are doing this, but leaves it unclear what they are. Could you
take a few moments to enumerate some of them?

Thanks

Andrew

On Wed, Dec 30, 2009 at 4:49 AM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
>
> If we can know that a process will never raise
> it's priveleges we can enable a lot of features
> that otherwise would be unsafe, because they
> could break assumptions of existing suid executables.
>
> To allow this to be used as a sand boxing feature
> also disable ptracing other executables without
> this new restriction.
>
> For the moment I have used a per thread flag because
> we are out of per process flags.
>
> To ensure all descendants get this flag I rely on
> the default copying of procss structures.
>
> Added bprm->nosuid to make remove the need to add
> duplicate error prone checks.  This ensures that
> the disabling of suid executables is exactly the
> same as MNT_NOSUID.
>
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> ---
>  arch/x86/include/asm/thread_info.h |    2 ++
>  fs/exec.c                          |    6 ++++--
>  include/linux/binfmts.h            |    1 +
>  include/linux/prctl.h              |    2 ++
>  kernel/ptrace.c                    |    4 ++++
>  kernel/sys.c                       |   16 ++++++++++++++++
>  security/commoncap.c               |   14 +++++++++++++-
>  security/selinux/hooks.c           |    2 +-
>  8 files changed, 43 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
> index 375c917..e716203 100644
> --- a/arch/x86/include/asm/thread_info.h
> +++ b/arch/x86/include/asm/thread_info.h
> @@ -82,6 +82,7 @@ struct thread_info {
>  #define TIF_SYSCALL_EMU                6       /* syscall emulation active */
>  #define TIF_SYSCALL_AUDIT      7       /* syscall auditing active */
>  #define TIF_SECCOMP            8       /* secure computing */
> +#define TIF_NOSUID             9       /* suid exec permanently disabled */
>  #define TIF_MCE_NOTIFY         10      /* notify userspace of an MCE */
>  #define TIF_USER_RETURN_NOTIFY 11      /* notify kernel of userspace return */
>  #define TIF_NOTSC              16      /* TSC is not accessible in userland */
> @@ -107,6 +108,7 @@ struct thread_info {
>  #define _TIF_SYSCALL_EMU       (1 << TIF_SYSCALL_EMU)
>  #define _TIF_SYSCALL_AUDIT     (1 << TIF_SYSCALL_AUDIT)
>  #define _TIF_SECCOMP           (1 << TIF_SECCOMP)
> +#define _TIF_NOSUID            (1 << TIF_NOSUID)
>  #define _TIF_MCE_NOTIFY                (1 << TIF_MCE_NOTIFY)
>  #define _TIF_USER_RETURN_NOTIFY        (1 << TIF_USER_RETURN_NOTIFY)
>  #define _TIF_NOTSC             (1 << TIF_NOTSC)
> diff --git a/fs/exec.c b/fs/exec.c
> index 632b02e..5cba5ac 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1131,8 +1131,10 @@ int prepare_binprm(struct linux_binprm *bprm)
>        /* clear any previous set[ug]id data from a previous binary */
>        bprm->cred->euid = current_euid();
>        bprm->cred->egid = current_egid();
> -
> -       if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
> +       bprm->nosuid =
> +               (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) ||
> +               test_tsk_thread_flag(current, TIF_NOSUID);
> +       if (bprm->nosuid) {
>                /* Set-uid? */
>                if (mode & S_ISUID) {
>                        bprm->per_clear |= PER_CLEAR_ON_SETID;
> diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
> index cd4349b..c3b5a30 100644
> --- a/include/linux/binfmts.h
> +++ b/include/linux/binfmts.h
> @@ -44,6 +44,7 @@ struct linux_binprm{
>  #ifdef __alpha__
>        unsigned int taso:1;
>  #endif
> +       unsigned int nosuid:1;  /* True if suid bits are ignored */
>        unsigned int recursion_depth;
>        struct file * file;
>        struct cred *cred;      /* new credentials */
> diff --git a/include/linux/prctl.h b/include/linux/prctl.h
> index a3baeb2..acb3516 100644
> --- a/include/linux/prctl.h
> +++ b/include/linux/prctl.h
> @@ -102,4 +102,6 @@
>
>  #define PR_MCE_KILL_GET 34
>
> +#define PR_SET_NOSUID  35
> +
>  #endif /* _LINUX_PRCTL_H */
> diff --git a/kernel/ptrace.c b/kernel/ptrace.c
> index 23bd09c..b91040c 100644
> --- a/kernel/ptrace.c
> +++ b/kernel/ptrace.c
> @@ -152,6 +152,10 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
>        if (!dumpable && !capable(CAP_SYS_PTRACE))
>                return -EPERM;
>
> +       if (test_tsk_thread_flag(current, TIF_NOSUID) &&
> +           !test_tsk_thread_flag(task, TIF_NOSUID))
> +               return -EPERM;
> +
>        return security_ptrace_access_check(task, mode);
>  }
>
> diff --git a/kernel/sys.c b/kernel/sys.c
> index 26a6b73..1d1902a 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -1578,6 +1578,22 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
>                        else
>                                error = PR_MCE_KILL_DEFAULT;
>                        break;
> +               case PR_SET_NOSUID:
> +               {
> +                       const struct cred *cred = current->cred;
> +                       error = -EINVAL;
> +                       if (    (cred->uid != cred->suid) ||
> +                               (cred->uid != cred->euid) ||
> +                               (cred->uid != cred->fsuid) ||
> +                               (cred->gid != cred->sgid) ||
> +                               (cred->gid != cred->egid) ||
> +                               (cred->gid != cred->fsgid) ||
> +                               (atomic_read(&current->signal->count) != 1))
> +                               break;
> +                       error = 0;
> +                       set_tsk_thread_flag(current, TIF_NOSUID);
> +                       break;
> +               }
>                default:
>                        error = -EINVAL;
>                        break;
> diff --git a/security/commoncap.c b/security/commoncap.c
> index f800fdb..28ab286 100644
> --- a/security/commoncap.c
> +++ b/security/commoncap.c
> @@ -389,7 +389,7 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective)
>        if (!file_caps_enabled)
>                return 0;
>
> -       if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)
> +       if (bprm->nosuid)
>                return 0;
>
>        dentry = dget(bprm->file->f_dentry);
> @@ -869,6 +869,18 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
>                        new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
>                goto changed;
>
> +       case PR_SET_NOSUID:
> +       {
> +               const struct cred *cred = current->cred;
> +               error = -EINVAL;
> +               /* Perform the capabilities checks */
> +               if (!cap_isclear(cred->cap_permitted) ||
> +                   !cap_isclear(cred->cap_effective))
> +                       goto error;
> +               /* Have the default perform the rest of the work. */
> +               error = -ENOSYS;
> +               goto error;
> +       }
>        default:
>                /* No functionality available - continue with default */
>                error = -ENOSYS;
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index 7a374c2..d14cd24 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -2147,7 +2147,7 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm)
>        COMMON_AUDIT_DATA_INIT(&ad, FS);
>        ad.u.fs.path = bprm->file->f_path;
>
> -       if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
> +       if (bprm->nosid)
>                new_tsec->sid = old_tsec->sid;
>
>        if (new_tsec->sid == old_tsec->sid) {
> --
> 1.6.5.2.143.g8cc62
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-security-module" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Serge E. Hallyn Dec. 30, 2009, 6:29 p.m. UTC | #2
Quoting Eric W. Biederman (ebiederm@xmission.com):
> 
> If we can know that a process will never raise
> it's priveleges we can enable a lot of features
> that otherwise would be unsafe, because they
> could break assumptions of existing suid executables.
> 
> To allow this to be used as a sand boxing feature
> also disable ptracing other executables without
> this new restriction.
> 
> For the moment I have used a per thread flag because
> we are out of per process flags.
> 
> To ensure all descendants get this flag I rely on
> the default copying of procss structures.
> 
> Added bprm->nosuid to make remove the need to add
> duplicate error prone checks.  This ensures that
> the disabling of suid executables is exactly the
> same as MNT_NOSUID.
> 
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> ---
>  arch/x86/include/asm/thread_info.h |    2 ++
>  fs/exec.c                          |    6 ++++--
>  include/linux/binfmts.h            |    1 +
>  include/linux/prctl.h              |    2 ++
>  kernel/ptrace.c                    |    4 ++++
>  kernel/sys.c                       |   16 ++++++++++++++++
>  security/commoncap.c               |   14 +++++++++++++-
>  security/selinux/hooks.c           |    2 +-
>  8 files changed, 43 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
> index 375c917..e716203 100644
> --- a/arch/x86/include/asm/thread_info.h
> +++ b/arch/x86/include/asm/thread_info.h
> @@ -82,6 +82,7 @@ struct thread_info {
>  #define TIF_SYSCALL_EMU		6	/* syscall emulation active */
>  #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
>  #define TIF_SECCOMP		8	/* secure computing */
> +#define TIF_NOSUID		9	/* suid exec permanently disabled */
>  #define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
>  #define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
>  #define TIF_NOTSC		16	/* TSC is not accessible in userland */
> @@ -107,6 +108,7 @@ struct thread_info {
>  #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
>  #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
>  #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
> +#define _TIF_NOSUID		(1 << TIF_NOSUID)
>  #define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
>  #define _TIF_USER_RETURN_NOTIFY	(1 << TIF_USER_RETURN_NOTIFY)
>  #define _TIF_NOTSC		(1 << TIF_NOTSC)
> diff --git a/fs/exec.c b/fs/exec.c
> index 632b02e..5cba5ac 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1131,8 +1131,10 @@ int prepare_binprm(struct linux_binprm *bprm)
>  	/* clear any previous set[ug]id data from a previous binary */
>  	bprm->cred->euid = current_euid();
>  	bprm->cred->egid = current_egid();
> -
> -	if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
> +	bprm->nosuid =
> +		(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) ||
> +		test_tsk_thread_flag(current, TIF_NOSUID);
> +	if (bprm->nosuid) {
>  		/* Set-uid? */
>  		if (mode & S_ISUID) {
>  			bprm->per_clear |= PER_CLEAR_ON_SETID;
> diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
> index cd4349b..c3b5a30 100644
> --- a/include/linux/binfmts.h
> +++ b/include/linux/binfmts.h
> @@ -44,6 +44,7 @@ struct linux_binprm{
>  #ifdef __alpha__
>  	unsigned int taso:1;
>  #endif
> +	unsigned int nosuid:1;	/* True if suid bits are ignored */
>  	unsigned int recursion_depth;
>  	struct file * file;
>  	struct cred *cred;	/* new credentials */
> diff --git a/include/linux/prctl.h b/include/linux/prctl.h
> index a3baeb2..acb3516 100644
> --- a/include/linux/prctl.h
> +++ b/include/linux/prctl.h
> @@ -102,4 +102,6 @@
> 
>  #define PR_MCE_KILL_GET 34
> 
> +#define PR_SET_NOSUID	35
> +
>  #endif /* _LINUX_PRCTL_H */
> diff --git a/kernel/ptrace.c b/kernel/ptrace.c
> index 23bd09c..b91040c 100644
> --- a/kernel/ptrace.c
> +++ b/kernel/ptrace.c
> @@ -152,6 +152,10 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
>  	if (!dumpable && !capable(CAP_SYS_PTRACE))
>  		return -EPERM;
> 
> +	if (test_tsk_thread_flag(current, TIF_NOSUID) &&
> +	    !test_tsk_thread_flag(task, TIF_NOSUID))
> +		return -EPERM;
> +
>  	return security_ptrace_access_check(task, mode);
>  }
> 
> diff --git a/kernel/sys.c b/kernel/sys.c
> index 26a6b73..1d1902a 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -1578,6 +1578,22 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
>  			else
>  				error = PR_MCE_KILL_DEFAULT;
>  			break;
> +		case PR_SET_NOSUID:
> +		{
> +			const struct cred *cred = current->cred;
> +			error = -EINVAL;
> +			if (	(cred->uid != cred->suid) ||
> +				(cred->uid != cred->euid) ||
> +				(cred->uid != cred->fsuid) ||
> +				(cred->gid != cred->sgid) ||
> +				(cred->gid != cred->egid) ||
> +				(cred->gid != cred->fsgid) ||
> +				(atomic_read(&current->signal->count) != 1))
> +				break;
> +			error = 0;
> +			set_tsk_thread_flag(current, TIF_NOSUID);
> +			break;
> +		}
>  		default:
>  			error = -EINVAL;
>  			break;
> diff --git a/security/commoncap.c b/security/commoncap.c
> index f800fdb..28ab286 100644
> --- a/security/commoncap.c
> +++ b/security/commoncap.c
> @@ -389,7 +389,7 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective)
>  	if (!file_caps_enabled)
>  		return 0;
> 
> -	if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)
> +	if (bprm->nosuid)
>  		return 0;
> 
>  	dentry = dget(bprm->file->f_dentry);
> @@ -869,6 +869,18 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
>  			new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
>  		goto changed;
> 
> +	case PR_SET_NOSUID:
> +	{
> +		const struct cred *cred = current->cred;
> +		error = -EINVAL;

Should this be -EPERM?  not sure...

> +		/* Perform the capabilities checks */
> +		if (!cap_isclear(cred->cap_permitted) ||
> +		    !cap_isclear(cred->cap_effective))

No need to check cap_effective, as no bits can be there which are not
in cap_permitted.

To be honest, I don't think there is much reason to not have this
check done in the main sys_prctl(0 - capabilities themselves are not
optional in the kernel, while cap_task_prctl() is.  So you are setting
us up to have cases where say an apparmor user can call this with uid
0 and/or active capabilities.

> +			goto error;
> +		/* Have the default perform the rest of the work. */
> +		error = -ENOSYS;
> +		goto error;
> +	}
>  	default:
>  		/* No functionality available - continue with default */
>  		error = -ENOSYS;
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index 7a374c2..d14cd24 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -2147,7 +2147,7 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm)
>  	COMMON_AUDIT_DATA_INIT(&ad, FS);
>  	ad.u.fs.path = bprm->file->f_path;
> 
> -	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
> +	if (bprm->nosid)

typo - nosuid?

>  		new_tsec->sid = old_tsec->sid;
> 
>  	if (new_tsec->sid == old_tsec->sid) {
> -- 
> 1.6.5.2.143.g8cc62

Thanks, I think this looks good.

-serge
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Serge E. Hallyn Dec. 30, 2009, 6:35 p.m. UTC | #3
Quoting Andrew G. Morgan (morgan@kernel.org):
> Eric,
> 
> I'm not clear why capabilities need to be manipulated by this feature
> (the pure capability support already has a feature for disabling
> privilege and blocking unsafe, or insufficient privilege, execution).

Not entirely - this option would also prevent file capabilities from
being honored.

> Perhaps I'm just unclear what features can be more safely enabled with
> this in effect - that is, your description suggests that this is why
> you are doing this, but leaves it unclear what they are. Could you
> take a few moments to enumerate some of them?

There are two desirable features which are at the moment unsafe for
unprivileged users, because it allows them to fool privileged (setuid
or bearing file capabilities) programs.  One is to unconditionally
restrict privilege to yourself and all your descendents.  The recent
disablenetwork patchset is one example.  The other is the ability to
make substantial changes to your environment in a private namespace.
A private namespace can protect already-running privileged program,
but cannot protect privilege-bearing binaries.  Unless we prevent
them from bearing privilege.  Which is what this patch does.

-serge
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman Dec. 30, 2009, 8:07 p.m. UTC | #4
"Serge E. Hallyn" <serue@us.ibm.com> writes:

> Quoting Andrew G. Morgan (morgan@kernel.org):
>> Eric,
>> 
>> I'm not clear why capabilities need to be manipulated by this feature
>> (the pure capability support already has a feature for disabling
>> privilege and blocking unsafe, or insufficient privilege, execution).
>
> Not entirely - this option would also prevent file capabilities from
> being honored.

All my patch does is verify the caller doesn't have privilege.

>> Perhaps I'm just unclear what features can be more safely enabled with
>> this in effect - that is, your description suggests that this is why
>> you are doing this, but leaves it unclear what they are. Could you
>> take a few moments to enumerate some of them?
>
> There are two desirable features which are at the moment unsafe for
> unprivileged users, because it allows them to fool privileged (setuid
> or bearing file capabilities) programs.  One is to unconditionally
> restrict privilege to yourself and all your descendents.  The recent
> disablenetwork patchset is one example.  The other is the ability to
> make substantial changes to your environment in a private namespace.
> A private namespace can protect already-running privileged program,
> but cannot protect privilege-bearing binaries.  Unless we prevent
> them from bearing privilege.  Which is what this patch does.

Effectively by ensuring privileges can not be raised this removes
the set of circumstances that lead to the sendmail capabilities bug.

So any kernel feature that requires capabilities only because not
doing so would break backwards compatibility with suid applications.
This includes namespace manipulation, like plan 9.
This includes unsharing pid and network and sysvipc namespaces.

There are probably other useful but currently root only features
that this will allow to be used by unprivileged processes, that
I am not aware of.

In addition to the fact that knowing privileges can not be escalated
by a process is a good feature all by itself.  Run this in a chroot
and the programs will never be able to gain root access even if
there are suid binaries available for them to execute.

Eric
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Serge E. Hallyn Dec. 30, 2009, 8:17 p.m. UTC | #5
Quoting Eric W. Biederman (ebiederm@xmission.com):
> "Serge E. Hallyn" <serue@us.ibm.com> writes:
> 
> > Quoting Andrew G. Morgan (morgan@kernel.org):
> >> Eric,
> >> 
> >> I'm not clear why capabilities need to be manipulated by this feature
> >> (the pure capability support already has a feature for disabling
> >> privilege and blocking unsafe, or insufficient privilege, execution).
> >
> > Not entirely - this option would also prevent file capabilities from
> > being honored.
> 
> All my patch does is verify the caller doesn't have privilege.

No, you shortcut security/commoncap.c:get_file_caps() if (bprm->nosuid),
which is set if test_tsk_thread_flag(current, TIF_NOSUID) at exec.

So if we're in this new no-suid mode, then file capabilities are not
honored.

Which is the right thing to do.

> >> Perhaps I'm just unclear what features can be more safely enabled with
> >> this in effect - that is, your description suggests that this is why
> >> you are doing this, but leaves it unclear what they are. Could you
> >> take a few moments to enumerate some of them?
> >
> > There are two desirable features which are at the moment unsafe for
> > unprivileged users, because it allows them to fool privileged (setuid
> > or bearing file capabilities) programs.  One is to unconditionally
> > restrict privilege to yourself and all your descendents.  The recent
> > disablenetwork patchset is one example.  The other is the ability to
> > make substantial changes to your environment in a private namespace.
> > A private namespace can protect already-running privileged program,
> > but cannot protect privilege-bearing binaries.  Unless we prevent
> > them from bearing privilege.  Which is what this patch does.
> 
> Effectively by ensuring privileges can not be raised this removes
> the set of circumstances that lead to the sendmail capabilities bug.
> 
> So any kernel feature that requires capabilities only because not
> doing so would break backwards compatibility with suid applications.
> This includes namespace manipulation, like plan 9.
> This includes unsharing pid and network and sysvipc namespaces.
> 
> There are probably other useful but currently root only features
> that this will allow to be used by unprivileged processes, that
> I am not aware of.
> 
> In addition to the fact that knowing privileges can not be escalated
> by a process is a good feature all by itself.  Run this in a chroot
> and the programs will never be able to gain root access even if
> there are suid binaries available for them to execute.
> 
> Eric
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman Dec. 30, 2009, 8:45 p.m. UTC | #6
"Serge E. Hallyn" <serue@us.ibm.com> writes:

>> @@ -869,6 +869,18 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
>>  			new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
>>  		goto changed;
>> 
>> +	case PR_SET_NOSUID:
>> +	{
>> +		const struct cred *cred = current->cred;
>> +		error = -EINVAL;
>
> Should this be -EPERM?  not sure...

I intended -EINVAL to say it is simply a set of initial conditions
that are not supported today.  But could be supported if someone
does the audit, and found there are no security issues.

>> +		/* Perform the capabilities checks */
>> +		if (!cap_isclear(cred->cap_permitted) ||
>> +		    !cap_isclear(cred->cap_effective))
>
> No need to check cap_effective, as no bits can be there which are not
> in cap_permitted.
>
> To be honest, I don't think there is much reason to not have this
> check done in the main sys_prctl(0 - capabilities themselves are not
> optional in the kernel, while cap_task_prctl() is.  So you are setting
> us up to have cases where say an apparmor user can call this with uid
> 0 and/or active capabilities.

Sounds fine to me.  I had noticed all of the capabilities checks were
off in their own file, so I had tried to maintain that.  But you are
right we can't remove capabilities so splitting the code like this only
obfuscates it.

>> @@ -2147,7 +2147,7 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm)
>>  	COMMON_AUDIT_DATA_INIT(&ad, FS);
>>  	ad.u.fs.path = bprm->file->f_path;
>> 
>> -	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
>> +	if (bprm->nosid)
>
> typo - nosuid?

Yep.

Eric
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 375c917..e716203 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -82,6 +82,7 @@  struct thread_info {
 #define TIF_SYSCALL_EMU		6	/* syscall emulation active */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
+#define TIF_NOSUID		9	/* suid exec permanently disabled */
 #define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
 #define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
@@ -107,6 +108,7 @@  struct thread_info {
 #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+#define _TIF_NOSUID		(1 << TIF_NOSUID)
 #define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
 #define _TIF_USER_RETURN_NOTIFY	(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
diff --git a/fs/exec.c b/fs/exec.c
index 632b02e..5cba5ac 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1131,8 +1131,10 @@  int prepare_binprm(struct linux_binprm *bprm)
 	/* clear any previous set[ug]id data from a previous binary */
 	bprm->cred->euid = current_euid();
 	bprm->cred->egid = current_egid();
-
-	if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
+	bprm->nosuid =
+		(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) ||
+		test_tsk_thread_flag(current, TIF_NOSUID);
+	if (bprm->nosuid) {
 		/* Set-uid? */
 		if (mode & S_ISUID) {
 			bprm->per_clear |= PER_CLEAR_ON_SETID;
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index cd4349b..c3b5a30 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -44,6 +44,7 @@  struct linux_binprm{
 #ifdef __alpha__
 	unsigned int taso:1;
 #endif
+	unsigned int nosuid:1;	/* True if suid bits are ignored */
 	unsigned int recursion_depth;
 	struct file * file;
 	struct cred *cred;	/* new credentials */
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index a3baeb2..acb3516 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -102,4 +102,6 @@ 
 
 #define PR_MCE_KILL_GET 34
 
+#define PR_SET_NOSUID	35
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 23bd09c..b91040c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -152,6 +152,10 @@  int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 	if (!dumpable && !capable(CAP_SYS_PTRACE))
 		return -EPERM;
 
+	if (test_tsk_thread_flag(current, TIF_NOSUID) &&
+	    !test_tsk_thread_flag(task, TIF_NOSUID))
+		return -EPERM;
+
 	return security_ptrace_access_check(task, mode);
 }
 
diff --git a/kernel/sys.c b/kernel/sys.c
index 26a6b73..1d1902a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1578,6 +1578,22 @@  SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 			else
 				error = PR_MCE_KILL_DEFAULT;
 			break;
+		case PR_SET_NOSUID:
+		{
+			const struct cred *cred = current->cred;
+			error = -EINVAL;
+			if (	(cred->uid != cred->suid) ||
+				(cred->uid != cred->euid) ||
+				(cred->uid != cred->fsuid) ||
+				(cred->gid != cred->sgid) ||
+				(cred->gid != cred->egid) ||
+				(cred->gid != cred->fsgid) ||
+				(atomic_read(&current->signal->count) != 1))
+				break;
+			error = 0;
+			set_tsk_thread_flag(current, TIF_NOSUID);
+			break;
+		}
 		default:
 			error = -EINVAL;
 			break;
diff --git a/security/commoncap.c b/security/commoncap.c
index f800fdb..28ab286 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -389,7 +389,7 @@  static int get_file_caps(struct linux_binprm *bprm, bool *effective)
 	if (!file_caps_enabled)
 		return 0;
 
-	if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)
+	if (bprm->nosuid)
 		return 0;
 
 	dentry = dget(bprm->file->f_dentry);
@@ -869,6 +869,18 @@  int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 			new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
 		goto changed;
 
+	case PR_SET_NOSUID:
+	{
+		const struct cred *cred = current->cred;
+		error = -EINVAL;
+		/* Perform the capabilities checks */
+		if (!cap_isclear(cred->cap_permitted) ||
+		    !cap_isclear(cred->cap_effective))
+			goto error;
+		/* Have the default perform the rest of the work. */
+		error = -ENOSYS;
+		goto error;
+	}
 	default:
 		/* No functionality available - continue with default */
 		error = -ENOSYS;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 7a374c2..d14cd24 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2147,7 +2147,7 @@  static int selinux_bprm_set_creds(struct linux_binprm *bprm)
 	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path = bprm->file->f_path;
 
-	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
+	if (bprm->nosid)
 		new_tsec->sid = old_tsec->sid;
 
 	if (new_tsec->sid == old_tsec->sid) {