diff mbox

[RFC,v2,3/3] tun: fix LSM/SELinux labeling of tun/tap devices

Message ID 20121205202619.18626.98778.stgit@localhost
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Paul Moore Dec. 5, 2012, 8:26 p.m. UTC
This patch corrects some problems with LSM/SELinux that were introduced
with the multiqueue patchset.  The problem stems from the fact that the
multiqueue work changed the relationship between the tun device and its
associated socket; before the socket persisted for the life of the
device, however after the multiqueue changes the socket only persisted
for the life of the userspace connection (fd open).  For non-persistent
devices this is not an issue, but for persistent devices this can cause
the tun device to lose its SELinux label.

We correct this problem by adding an opaque LSM security blob to the
tun device struct which allows us to have the LSM security state, e.g.
SELinux labeling information, persist for the lifetime of the tun
device.  In the process we tweak the LSM hooks to work with this new
approach to TUN device/socket labeling and introduce a new LSM hook,
security_tun_dev_create_queue(), to approve requests to create a new
TUN queue via TUNSETQUEUE.

The SELinux code has been adjusted to match the new LSM hooks, the
other LSMs do not make use of the LSM TUN controls.  This patch makes
use of the recently added "tun_socket:create_queue" permission to
restrict access to the TUNSETQUEUE operation.  On older SELinux
policies which do not define the "tun_socket:create_queue" permission
the access control decision for TUNSETQUEUE will be handled according
to the SELinux policy's unknown permission setting.

Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 drivers/net/tun.c                 |   26 +++++++++++++---
 include/linux/security.h          |   59 +++++++++++++++++++++++++++++--------
 security/capability.c             |   24 +++++++++++++--
 security/security.c               |   28 ++++++++++++++----
 security/selinux/hooks.c          |   50 ++++++++++++++++++++++++-------
 security/selinux/include/objsec.h |    4 +++
 6 files changed, 153 insertions(+), 38 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Jason Wang Dec. 6, 2012, 10:29 a.m. UTC | #1
On Wednesday, December 05, 2012 03:26:19 PM Paul Moore wrote:
> This patch corrects some problems with LSM/SELinux that were introduced
> with the multiqueue patchset.  The problem stems from the fact that the
> multiqueue work changed the relationship between the tun device and its
> associated socket; before the socket persisted for the life of the
> device, however after the multiqueue changes the socket only persisted
> for the life of the userspace connection (fd open).  For non-persistent
> devices this is not an issue, but for persistent devices this can cause
> the tun device to lose its SELinux label.
> 
> We correct this problem by adding an opaque LSM security blob to the
> tun device struct which allows us to have the LSM security state, e.g.
> SELinux labeling information, persist for the lifetime of the tun
> device.  In the process we tweak the LSM hooks to work with this new
> approach to TUN device/socket labeling and introduce a new LSM hook,
> security_tun_dev_create_queue(), to approve requests to create a new
> TUN queue via TUNSETQUEUE.
> 
> The SELinux code has been adjusted to match the new LSM hooks, the
> other LSMs do not make use of the LSM TUN controls.  This patch makes
> use of the recently added "tun_socket:create_queue" permission to
> restrict access to the TUNSETQUEUE operation.  On older SELinux
> policies which do not define the "tun_socket:create_queue" permission
> the access control decision for TUNSETQUEUE will be handled according
> to the SELinux policy's unknown permission setting.
> 
> Signed-off-by: Paul Moore <pmoore@redhat.com>
> ---
>  drivers/net/tun.c                 |   26 +++++++++++++---
>  include/linux/security.h          |   59
> +++++++++++++++++++++++++++++-------- security/capability.c             |  
> 24 +++++++++++++--
>  security/security.c               |   28 ++++++++++++++----
>  security/selinux/hooks.c          |   50 ++++++++++++++++++++++++-------
>  security/selinux/include/objsec.h |    4 +++
>  6 files changed, 153 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 14a0454..fb8148b 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -182,6 +182,7 @@ struct tun_struct {
>  	struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
>  	struct timer_list flow_gc_timer;
>  	unsigned long ageing_time;
> +	void *security;
>  };
> 
>  static inline u32 tun_hashfn(u32 rxhash)
> @@ -465,6 +466,10 @@ static int tun_attach(struct tun_struct *tun, struct
> file *file) struct tun_file *tfile = file->private_data;
>  	int err;
> 
> +	err = security_tun_dev_attach(tfile->socket.sk, tun->security);
> +	if (err < 0)
> +		goto out;
> +
>  	err = -EINVAL;
>  	if (rcu_dereference_protected(tfile->tun, lockdep_rtnl_is_held()))
>  		goto out;
> @@ -1348,6 +1353,7 @@ static void tun_free_netdev(struct net_device *dev)
>  	struct tun_struct *tun = netdev_priv(dev);
> 
>  	tun_flow_uninit(tun);
> +	security_tun_dev_free_security(tun->security);
>  	free_netdev(dev);
>  }
> 
> @@ -1534,7 +1540,7 @@ static int tun_set_iff(struct net *net, struct file
> *file, struct ifreq *ifr)
> 
>  		if (tun_not_capable(tun))
>  			return -EPERM;
> -		err = security_tun_dev_attach(tfile->socket.sk);
> +		err = security_tun_dev_open(tun->security);
>  		if (err < 0)
>  			return err;
> 
> @@ -1587,7 +1593,9 @@ static int tun_set_iff(struct net *net, struct file
> *file, struct ifreq *ifr)
> 
>  		spin_lock_init(&tun->lock);
> 
> -		security_tun_dev_post_create(&tfile->sk);
> +		err = security_tun_dev_alloc_security(&tun->security);
> +		if (err < 0)
> +			goto err_free_dev;
> 
>  		tun_net_init(dev);
> 
> @@ -1767,12 +1775,18 @@ static int tun_set_queue(struct file *file, struct
> ifreq *ifr)
> 
>  		tun = netdev_priv(dev);
>  		if (dev->netdev_ops != &tap_netdev_ops &&
> -			dev->netdev_ops != &tun_netdev_ops)
> +			dev->netdev_ops != &tun_netdev_ops) {
>  			ret = -EINVAL;
> -		else if (tun_not_capable(tun))
> +			goto unlock;
> +		}
> +		if (tun_not_capable(tun)) {
>  			ret = -EPERM;
> -		else
> -			ret = tun_attach(tun, file);
> +			goto unlock;
> +		}
> +		ret = security_tun_dev_create_queue(tun->security);
> +		if (ret < 0)
> +			goto unlock;
> +		ret = tun_attach(tun, file);
>  	} else if (ifr->ifr_flags & IFF_DETACH_QUEUE)
>  		__tun_detach(tfile, false);
>  	else
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 05e88bd..8ea923b 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -983,17 +983,29 @@ static inline void security_free_mnt_opts(struct
> security_mnt_opts *opts) *	tells the LSM to decrement the number of 
secmark
> labeling rules loaded * @req_classify_flow:
>   *	Sets the flow's sid to the openreq sid.
> + * @tun_dev_alloc_security:
> + *	This hook allows a module to allocate a security structure for a TUN
> + *	device.
> + *	@security pointer to a security structure pointer.
> + *	Returns a zero on success, negative values on failure.
> + * @tun_dev_free_security:
> + *	This hook allows a module to free the security structure for a TUN
> + *	device.
> + *	@security pointer to the TUN device's security structure
>   * @tun_dev_create:
>   *	Check permissions prior to creating a new TUN device.
> - * @tun_dev_post_create:
> - *	This hook allows a module to update or allocate a per-socket security
> - *	structure.
> - *	@sk contains the newly created sock structure.
> + * @tun_dev_create_queue:
> + *	Check permissions prior to creating a new TUN device queue.
> + *	@security pointer to the TUN device's security structure.
>   * @tun_dev_attach:
> - *	Check permissions prior to attaching to a persistent TUN device.  This
> - *	hook can also be used by the module to update any security state
> + *	This hook can be used by the module to update any security state
>   *	associated with the TUN device's sock structure.
>   *	@sk contains the existing sock structure.
> + *	@security pointer to the TUN device's security structure.
> + * @tun_dev_open:
> + *	This hook can be used by the module to update any security state
> + *	associated with the TUN device's security structure.
> + *	@security pointer to the TUN devices's security structure.
>   *
>   * Security hooks for XFRM operations.
>   *
> @@ -1613,9 +1625,12 @@ struct security_operations {
>  	void (*secmark_refcount_inc) (void);
>  	void (*secmark_refcount_dec) (void);
>  	void (*req_classify_flow) (const struct request_sock *req, struct flowi
> *fl); -	int (*tun_dev_create)(void);
> -	void (*tun_dev_post_create)(struct sock *sk);
> -	int (*tun_dev_attach)(struct sock *sk);
> +	int (*tun_dev_alloc_security) (void **security);
> +	void (*tun_dev_free_security) (void *security);
> +	int (*tun_dev_create) (void);
> +	int (*tun_dev_create_queue) (void *security);
> +	int (*tun_dev_attach) (struct sock *sk, void *security);
> +	int (*tun_dev_open) (void *security);
>  #endif	/* CONFIG_SECURITY_NETWORK */
> 
>  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> @@ -2553,9 +2568,12 @@ void security_inet_conn_established(struct sock *sk,
>  int security_secmark_relabel_packet(u32 secid);
>  void security_secmark_refcount_inc(void);
>  void security_secmark_refcount_dec(void);
> +int security_tun_dev_alloc_security(void **security);
> +void security_tun_dev_free_security(void *security);
>  int security_tun_dev_create(void);
> -void security_tun_dev_post_create(struct sock *sk);
> -int security_tun_dev_attach(struct sock *sk);
> +int security_tun_dev_create_queue(void *security);
> +int security_tun_dev_attach(struct sock *sk, void *security);
> +int security_tun_dev_open(void *security);
> 
>  #else	/* CONFIG_SECURITY_NETWORK */
>  static inline int security_unix_stream_connect(struct sock *sock,
> @@ -2720,16 +2738,31 @@ static inline void
> security_secmark_refcount_dec(void) {
>  }
> 
> +static inline int security_tun_dev_alloc_security(void **security)
> +{
> +	return 0;
> +}
> +
> +static inline void security_tun_dev_free_security(void *security)
> +{
> +}
> +
>  static inline int security_tun_dev_create(void)
>  {
>  	return 0;
>  }
> 
> -static inline void security_tun_dev_post_create(struct sock *sk)
> +static inline int security_tun_dev_create_queue(void *security)
> +{
> +	return 0;
> +}
> +
> +static inline int security_tun_dev_attach(struct sock *sk, void *security)
>  {
> +	return 0;
>  }
> 
> -static inline int security_tun_dev_attach(struct sock *sk)
> +static inline int security_tun_dev_open(void *security)
>  {
>  	return 0;
>  }
> diff --git a/security/capability.c b/security/capability.c
> index b14a30c..bf4cbf2 100644
> --- a/security/capability.c
> +++ b/security/capability.c
> @@ -704,16 +704,31 @@ static void cap_req_classify_flow(const struct
> request_sock *req, {
>  }
> 
> +static int cap_tun_dev_alloc_security(void **security)
> +{
> +	return 0;
> +}
> +
> +static void cap_tun_dev_free_security(void *security)
> +{
> +}
> +
>  static int cap_tun_dev_create(void)
>  {
>  	return 0;
>  }
> 
> -static void cap_tun_dev_post_create(struct sock *sk)
> +static int cap_tun_dev_create_queue(void *security)
> +{
> +	return 0;
> +}
> +
> +static int cap_tun_dev_attach(struct sock *sk, void *security)
>  {
> +	return 0;
>  }
> 
> -static int cap_tun_dev_attach(struct sock *sk)
> +static int cap_tun_dev_open(void *security)
>  {
>  	return 0;
>  }
> @@ -1044,8 +1059,11 @@ void __init security_fixup_ops(struct
> security_operations *ops) set_to_cap_if_null(ops, secmark_refcount_inc);
>  	set_to_cap_if_null(ops, secmark_refcount_dec);
>  	set_to_cap_if_null(ops, req_classify_flow);
> +	set_to_cap_if_null(ops, tun_dev_alloc_security);
> +	set_to_cap_if_null(ops, tun_dev_free_security);
>  	set_to_cap_if_null(ops, tun_dev_create);
> -	set_to_cap_if_null(ops, tun_dev_post_create);
> +	set_to_cap_if_null(ops, tun_dev_create_queue);
> +	set_to_cap_if_null(ops, tun_dev_open);
>  	set_to_cap_if_null(ops, tun_dev_attach);
>  #endif	/* CONFIG_SECURITY_NETWORK */
>  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> diff --git a/security/security.c b/security/security.c
> index 8dcd4ae..4d82654 100644
> --- a/security/security.c
> +++ b/security/security.c
> @@ -1244,24 +1244,42 @@ void security_secmark_refcount_dec(void)
>  }
>  EXPORT_SYMBOL(security_secmark_refcount_dec);
> 
> +int security_tun_dev_alloc_security(void **security)
> +{
> +	return security_ops->tun_dev_alloc_security(security);
> +}
> +EXPORT_SYMBOL(security_tun_dev_alloc_security);
> +
> +void security_tun_dev_free_security(void *security)
> +{
> +	security_ops->tun_dev_free_security(security);
> +}
> +EXPORT_SYMBOL(security_tun_dev_free_security);
> +
>  int security_tun_dev_create(void)
>  {
>  	return security_ops->tun_dev_create();
>  }
>  EXPORT_SYMBOL(security_tun_dev_create);
> 
> -void security_tun_dev_post_create(struct sock *sk)
> +int security_tun_dev_create_queue(void *security)
>  {
> -	return security_ops->tun_dev_post_create(sk);
> +	return security_ops->tun_dev_create_queue(security);
>  }
> -EXPORT_SYMBOL(security_tun_dev_post_create);
> +EXPORT_SYMBOL(security_tun_dev_create_queue);
> 
> -int security_tun_dev_attach(struct sock *sk)
> +int security_tun_dev_attach(struct sock *sk, void *security)
>  {
> -	return security_ops->tun_dev_attach(sk);
> +	return security_ops->tun_dev_attach(sk, security);
>  }
>  EXPORT_SYMBOL(security_tun_dev_attach);
> 
> +int security_tun_dev_open(void *security)
> +{
> +	return security_ops->tun_dev_open(security);
> +}
> +EXPORT_SYMBOL(security_tun_dev_open);
> +
>  #endif	/* CONFIG_SECURITY_NETWORK */
> 
>  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index 61a5336..f1efb08 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -4399,6 +4399,24 @@ static void selinux_req_classify_flow(const struct
> request_sock *req, fl->flowi_secid = req->secid;
>  }
> 
> +static int selinux_tun_dev_alloc_security(void **security)
> +{
> +	struct tun_security_struct *tunsec;
> +
> +	tunsec = kzalloc(sizeof(*tunsec), GFP_KERNEL);
> +	if (!tunsec)
> +		return -ENOMEM;
> +	tunsec->sid = current_sid();
> +
> +	*security = tunsec;
> +	return 0;
> +}
> +
> +static void selinux_tun_dev_free_security(void *security)
> +{
> +	kfree(security);
> +}
> +
>  static int selinux_tun_dev_create(void)
>  {
>  	u32 sid = current_sid();
> @@ -4414,8 +4432,17 @@ static int selinux_tun_dev_create(void)
>  			    NULL);
>  }
> 
> -static void selinux_tun_dev_post_create(struct sock *sk)
> +static int selinux_tun_dev_create_queue(void *security)
>  {
> +	struct tun_security_struct *tunsec = security;
> +
> +	return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET,
> +			    TUN_SOCKET__CREATE_QUEUE, NULL);
> +}
> +
> +static int selinux_tun_dev_attach(struct sock *sk, void *security)
> +{
> +	struct tun_security_struct *tunsec = security;
>  	struct sk_security_struct *sksec = sk->sk_security;
> 
>  	/* we don't currently perform any NetLabel based labeling here and it
> @@ -4425,20 +4452,19 @@ static void selinux_tun_dev_post_create(struct sock
> *sk) * cause confusion to the TUN user that had no idea network labeling *
> protocols were being used */
> 
> -	/* see the comments in selinux_tun_dev_create() about why we don't use
> -	 * the sockcreate SID here */
> -
> -	sksec->sid = current_sid();
> +	sksec->sid = tunsec->sid;

Since both tun_set_iff() and tun_set_queue() would call this. I wonder when it 
is called by tun_set_queue() we need some checking just like what we done in 
v1, otherwise it's unconditionally in TUNSETQUEUE. Or we can add them in 
selinux_tun_dev_create_queue()?
>  	sksec->sclass = SECCLASS_TUN_SOCKET;
> +
> +	return 0;
>  }
> 
> -static int selinux_tun_dev_attach(struct sock *sk)
> +static int selinux_tun_dev_open(void *security)
>  {
> -	struct sk_security_struct *sksec = sk->sk_security;
> +	struct tun_security_struct *tunsec = security;
>  	u32 sid = current_sid();
>  	int err;
> 
> -	err = avc_has_perm(sid, sksec->sid, SECCLASS_TUN_SOCKET,
> +	err = avc_has_perm(sid, tunsec->sid, SECCLASS_TUN_SOCKET,
>  			   TUN_SOCKET__RELABELFROM, NULL);
>  	if (err)
>  		return err;
> @@ -4446,8 +4472,7 @@ static int selinux_tun_dev_attach(struct sock *sk)
>  			   TUN_SOCKET__RELABELTO, NULL);
>  	if (err)
>  		return err;
> -
> -	sksec->sid = sid;
> +	tunsec->sid = sid;
> 
>  	return 0;
>  }
> @@ -5642,9 +5667,12 @@ static struct security_operations selinux_ops = {
>  	.secmark_refcount_inc =		selinux_secmark_refcount_inc,
>  	.secmark_refcount_dec =		selinux_secmark_refcount_dec,
>  	.req_classify_flow =		selinux_req_classify_flow,
> +	.tun_dev_alloc_security =	selinux_tun_dev_alloc_security,
> +	.tun_dev_free_security =	selinux_tun_dev_free_security,
>  	.tun_dev_create =		selinux_tun_dev_create,
> -	.tun_dev_post_create = 		selinux_tun_dev_post_create,
> +	.tun_dev_create_queue =		selinux_tun_dev_create_queue,
>  	.tun_dev_attach =		selinux_tun_dev_attach,
> +	.tun_dev_open =			selinux_tun_dev_open,
> 
>  #ifdef CONFIG_SECURITY_NETWORK_XFRM
>  	.xfrm_policy_alloc_security =	selinux_xfrm_policy_alloc,
> diff --git a/security/selinux/include/objsec.h
> b/security/selinux/include/objsec.h index 26c7eee..aa47bca 100644
> --- a/security/selinux/include/objsec.h
> +++ b/security/selinux/include/objsec.h
> @@ -110,6 +110,10 @@ struct sk_security_struct {
>  	u16 sclass;			/* sock security class */
>  };
> 
> +struct tun_security_struct {
> +	u32 sid;			/* SID for the tun device sockets */
> +};
> +
>  struct key_security_struct {
>  	u32 sid;	/* SID of key */
>  };
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Dec. 6, 2012, 10:33 a.m. UTC | #2
On Wed, Dec 05, 2012 at 03:26:19PM -0500, Paul Moore wrote:
> This patch corrects some problems with LSM/SELinux that were introduced
> with the multiqueue patchset.  The problem stems from the fact that the
> multiqueue work changed the relationship between the tun device and its
> associated socket; before the socket persisted for the life of the
> device, however after the multiqueue changes the socket only persisted
> for the life of the userspace connection (fd open).  For non-persistent
> devices this is not an issue, but for persistent devices this can cause
> the tun device to lose its SELinux label.
> 
> We correct this problem by adding an opaque LSM security blob to the
> tun device struct which allows us to have the LSM security state, e.g.
> SELinux labeling information, persist for the lifetime of the tun
> device.  In the process we tweak the LSM hooks to work with this new
> approach to TUN device/socket labeling and introduce a new LSM hook,
> security_tun_dev_create_queue(), to approve requests to create a new
> TUN queue via TUNSETQUEUE.
> 
> The SELinux code has been adjusted to match the new LSM hooks, the
> other LSMs do not make use of the LSM TUN controls.  This patch makes
> use of the recently added "tun_socket:create_queue" permission to
> restrict access to the TUNSETQUEUE operation.  On older SELinux
> policies which do not define the "tun_socket:create_queue" permission
> the access control decision for TUNSETQUEUE will be handled according
> to the SELinux policy's unknown permission setting.
> 
> Signed-off-by: Paul Moore <pmoore@redhat.com>

OK so just to verify: this can be used to ensure that qemu
process that has the queue fd can only attach it to
a specific device, right?

> ---
>  drivers/net/tun.c                 |   26 +++++++++++++---
>  include/linux/security.h          |   59 +++++++++++++++++++++++++++++--------
>  security/capability.c             |   24 +++++++++++++--
>  security/security.c               |   28 ++++++++++++++----
>  security/selinux/hooks.c          |   50 ++++++++++++++++++++++++-------
>  security/selinux/include/objsec.h |    4 +++
>  6 files changed, 153 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 14a0454..fb8148b 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -182,6 +182,7 @@ struct tun_struct {
>  	struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
>  	struct timer_list flow_gc_timer;
>  	unsigned long ageing_time;
> +	void *security;
>  };
>  
>  static inline u32 tun_hashfn(u32 rxhash)
> @@ -465,6 +466,10 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
>  	struct tun_file *tfile = file->private_data;
>  	int err;
>  
> +	err = security_tun_dev_attach(tfile->socket.sk, tun->security);
> +	if (err < 0)
> +		goto out;
> +
>  	err = -EINVAL;
>  	if (rcu_dereference_protected(tfile->tun, lockdep_rtnl_is_held()))
>  		goto out;
> @@ -1348,6 +1353,7 @@ static void tun_free_netdev(struct net_device *dev)
>  	struct tun_struct *tun = netdev_priv(dev);
>  
>  	tun_flow_uninit(tun);
> +	security_tun_dev_free_security(tun->security);
>  	free_netdev(dev);
>  }
>  
> @@ -1534,7 +1540,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  
>  		if (tun_not_capable(tun))
>  			return -EPERM;
> -		err = security_tun_dev_attach(tfile->socket.sk);
> +		err = security_tun_dev_open(tun->security);
>  		if (err < 0)
>  			return err;
>  
> @@ -1587,7 +1593,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  
>  		spin_lock_init(&tun->lock);
>  
> -		security_tun_dev_post_create(&tfile->sk);
> +		err = security_tun_dev_alloc_security(&tun->security);
> +		if (err < 0)
> +			goto err_free_dev;
>  
>  		tun_net_init(dev);
>  
> @@ -1767,12 +1775,18 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
>  
>  		tun = netdev_priv(dev);
>  		if (dev->netdev_ops != &tap_netdev_ops &&
> -			dev->netdev_ops != &tun_netdev_ops)
> +			dev->netdev_ops != &tun_netdev_ops) {
>  			ret = -EINVAL;
> -		else if (tun_not_capable(tun))
> +			goto unlock;
> +		}
> +		if (tun_not_capable(tun)) {
>  			ret = -EPERM;
> -		else
> -			ret = tun_attach(tun, file);
> +			goto unlock;
> +		}
> +		ret = security_tun_dev_create_queue(tun->security);
> +		if (ret < 0)
> +			goto unlock;
> +		ret = tun_attach(tun, file);
>  	} else if (ifr->ifr_flags & IFF_DETACH_QUEUE)
>  		__tun_detach(tfile, false);
>  	else
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 05e88bd..8ea923b 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -983,17 +983,29 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
>   *	tells the LSM to decrement the number of secmark labeling rules loaded
>   * @req_classify_flow:
>   *	Sets the flow's sid to the openreq sid.
> + * @tun_dev_alloc_security:
> + *	This hook allows a module to allocate a security structure for a TUN
> + *	device.
> + *	@security pointer to a security structure pointer.
> + *	Returns a zero on success, negative values on failure.
> + * @tun_dev_free_security:
> + *	This hook allows a module to free the security structure for a TUN
> + *	device.
> + *	@security pointer to the TUN device's security structure
>   * @tun_dev_create:
>   *	Check permissions prior to creating a new TUN device.
> - * @tun_dev_post_create:
> - *	This hook allows a module to update or allocate a per-socket security
> - *	structure.
> - *	@sk contains the newly created sock structure.
> + * @tun_dev_create_queue:
> + *	Check permissions prior to creating a new TUN device queue.
> + *	@security pointer to the TUN device's security structure.
>   * @tun_dev_attach:
> - *	Check permissions prior to attaching to a persistent TUN device.  This
> - *	hook can also be used by the module to update any security state
> + *	This hook can be used by the module to update any security state
>   *	associated with the TUN device's sock structure.
>   *	@sk contains the existing sock structure.
> + *	@security pointer to the TUN device's security structure.
> + * @tun_dev_open:
> + *	This hook can be used by the module to update any security state
> + *	associated with the TUN device's security structure.
> + *	@security pointer to the TUN devices's security structure.
>   *
>   * Security hooks for XFRM operations.
>   *
> @@ -1613,9 +1625,12 @@ struct security_operations {
>  	void (*secmark_refcount_inc) (void);
>  	void (*secmark_refcount_dec) (void);
>  	void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl);
> -	int (*tun_dev_create)(void);
> -	void (*tun_dev_post_create)(struct sock *sk);
> -	int (*tun_dev_attach)(struct sock *sk);
> +	int (*tun_dev_alloc_security) (void **security);
> +	void (*tun_dev_free_security) (void *security);
> +	int (*tun_dev_create) (void);
> +	int (*tun_dev_create_queue) (void *security);
> +	int (*tun_dev_attach) (struct sock *sk, void *security);
> +	int (*tun_dev_open) (void *security);
>  #endif	/* CONFIG_SECURITY_NETWORK */
>  
>  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> @@ -2553,9 +2568,12 @@ void security_inet_conn_established(struct sock *sk,
>  int security_secmark_relabel_packet(u32 secid);
>  void security_secmark_refcount_inc(void);
>  void security_secmark_refcount_dec(void);
> +int security_tun_dev_alloc_security(void **security);
> +void security_tun_dev_free_security(void *security);
>  int security_tun_dev_create(void);
> -void security_tun_dev_post_create(struct sock *sk);
> -int security_tun_dev_attach(struct sock *sk);
> +int security_tun_dev_create_queue(void *security);
> +int security_tun_dev_attach(struct sock *sk, void *security);
> +int security_tun_dev_open(void *security);
>  
>  #else	/* CONFIG_SECURITY_NETWORK */
>  static inline int security_unix_stream_connect(struct sock *sock,
> @@ -2720,16 +2738,31 @@ static inline void security_secmark_refcount_dec(void)
>  {
>  }
>  
> +static inline int security_tun_dev_alloc_security(void **security)
> +{
> +	return 0;
> +}
> +
> +static inline void security_tun_dev_free_security(void *security)
> +{
> +}
> +
>  static inline int security_tun_dev_create(void)
>  {
>  	return 0;
>  }
>  
> -static inline void security_tun_dev_post_create(struct sock *sk)
> +static inline int security_tun_dev_create_queue(void *security)
> +{
> +	return 0;
> +}
> +
> +static inline int security_tun_dev_attach(struct sock *sk, void *security)
>  {
> +	return 0;
>  }
>  
> -static inline int security_tun_dev_attach(struct sock *sk)
> +static inline int security_tun_dev_open(void *security)
>  {
>  	return 0;
>  }
> diff --git a/security/capability.c b/security/capability.c
> index b14a30c..bf4cbf2 100644
> --- a/security/capability.c
> +++ b/security/capability.c
> @@ -704,16 +704,31 @@ static void cap_req_classify_flow(const struct request_sock *req,
>  {
>  }
>  
> +static int cap_tun_dev_alloc_security(void **security)
> +{
> +	return 0;
> +}
> +
> +static void cap_tun_dev_free_security(void *security)
> +{
> +}
> +
>  static int cap_tun_dev_create(void)
>  {
>  	return 0;
>  }
>  
> -static void cap_tun_dev_post_create(struct sock *sk)
> +static int cap_tun_dev_create_queue(void *security)
> +{
> +	return 0;
> +}
> +
> +static int cap_tun_dev_attach(struct sock *sk, void *security)
>  {
> +	return 0;
>  }
>  
> -static int cap_tun_dev_attach(struct sock *sk)
> +static int cap_tun_dev_open(void *security)
>  {
>  	return 0;
>  }
> @@ -1044,8 +1059,11 @@ void __init security_fixup_ops(struct security_operations *ops)
>  	set_to_cap_if_null(ops, secmark_refcount_inc);
>  	set_to_cap_if_null(ops, secmark_refcount_dec);
>  	set_to_cap_if_null(ops, req_classify_flow);
> +	set_to_cap_if_null(ops, tun_dev_alloc_security);
> +	set_to_cap_if_null(ops, tun_dev_free_security);
>  	set_to_cap_if_null(ops, tun_dev_create);
> -	set_to_cap_if_null(ops, tun_dev_post_create);
> +	set_to_cap_if_null(ops, tun_dev_create_queue);
> +	set_to_cap_if_null(ops, tun_dev_open);
>  	set_to_cap_if_null(ops, tun_dev_attach);
>  #endif	/* CONFIG_SECURITY_NETWORK */
>  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> diff --git a/security/security.c b/security/security.c
> index 8dcd4ae..4d82654 100644
> --- a/security/security.c
> +++ b/security/security.c
> @@ -1244,24 +1244,42 @@ void security_secmark_refcount_dec(void)
>  }
>  EXPORT_SYMBOL(security_secmark_refcount_dec);
>  
> +int security_tun_dev_alloc_security(void **security)
> +{
> +	return security_ops->tun_dev_alloc_security(security);
> +}
> +EXPORT_SYMBOL(security_tun_dev_alloc_security);
> +
> +void security_tun_dev_free_security(void *security)
> +{
> +	security_ops->tun_dev_free_security(security);
> +}
> +EXPORT_SYMBOL(security_tun_dev_free_security);
> +
>  int security_tun_dev_create(void)
>  {
>  	return security_ops->tun_dev_create();
>  }
>  EXPORT_SYMBOL(security_tun_dev_create);
>  
> -void security_tun_dev_post_create(struct sock *sk)
> +int security_tun_dev_create_queue(void *security)
>  {
> -	return security_ops->tun_dev_post_create(sk);
> +	return security_ops->tun_dev_create_queue(security);
>  }
> -EXPORT_SYMBOL(security_tun_dev_post_create);
> +EXPORT_SYMBOL(security_tun_dev_create_queue);
>  
> -int security_tun_dev_attach(struct sock *sk)
> +int security_tun_dev_attach(struct sock *sk, void *security)
>  {
> -	return security_ops->tun_dev_attach(sk);
> +	return security_ops->tun_dev_attach(sk, security);
>  }
>  EXPORT_SYMBOL(security_tun_dev_attach);
>  
> +int security_tun_dev_open(void *security)
> +{
> +	return security_ops->tun_dev_open(security);
> +}
> +EXPORT_SYMBOL(security_tun_dev_open);
> +
>  #endif	/* CONFIG_SECURITY_NETWORK */
>  
>  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index 61a5336..f1efb08 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -4399,6 +4399,24 @@ static void selinux_req_classify_flow(const struct request_sock *req,
>  	fl->flowi_secid = req->secid;
>  }
>  
> +static int selinux_tun_dev_alloc_security(void **security)
> +{
> +	struct tun_security_struct *tunsec;
> +
> +	tunsec = kzalloc(sizeof(*tunsec), GFP_KERNEL);
> +	if (!tunsec)
> +		return -ENOMEM;
> +	tunsec->sid = current_sid();
> +
> +	*security = tunsec;
> +	return 0;
> +}
> +
> +static void selinux_tun_dev_free_security(void *security)
> +{
> +	kfree(security);
> +}
> +
>  static int selinux_tun_dev_create(void)
>  {
>  	u32 sid = current_sid();
> @@ -4414,8 +4432,17 @@ static int selinux_tun_dev_create(void)
>  			    NULL);
>  }
>  
> -static void selinux_tun_dev_post_create(struct sock *sk)
> +static int selinux_tun_dev_create_queue(void *security)
>  {
> +	struct tun_security_struct *tunsec = security;
> +
> +	return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET,
> +			    TUN_SOCKET__CREATE_QUEUE, NULL);
> +}
> +
> +static int selinux_tun_dev_attach(struct sock *sk, void *security)
> +{
> +	struct tun_security_struct *tunsec = security;
>  	struct sk_security_struct *sksec = sk->sk_security;
>  
>  	/* we don't currently perform any NetLabel based labeling here and it
> @@ -4425,20 +4452,19 @@ static void selinux_tun_dev_post_create(struct sock *sk)
>  	 * cause confusion to the TUN user that had no idea network labeling
>  	 * protocols were being used */
>  
> -	/* see the comments in selinux_tun_dev_create() about why we don't use
> -	 * the sockcreate SID here */
> -
> -	sksec->sid = current_sid();
> +	sksec->sid = tunsec->sid;
>  	sksec->sclass = SECCLASS_TUN_SOCKET;
> +
> +	return 0;
>  }
>  
> -static int selinux_tun_dev_attach(struct sock *sk)
> +static int selinux_tun_dev_open(void *security)
>  {
> -	struct sk_security_struct *sksec = sk->sk_security;
> +	struct tun_security_struct *tunsec = security;
>  	u32 sid = current_sid();
>  	int err;
>  
> -	err = avc_has_perm(sid, sksec->sid, SECCLASS_TUN_SOCKET,
> +	err = avc_has_perm(sid, tunsec->sid, SECCLASS_TUN_SOCKET,
>  			   TUN_SOCKET__RELABELFROM, NULL);
>  	if (err)
>  		return err;
> @@ -4446,8 +4472,7 @@ static int selinux_tun_dev_attach(struct sock *sk)
>  			   TUN_SOCKET__RELABELTO, NULL);
>  	if (err)
>  		return err;
> -
> -	sksec->sid = sid;
> +	tunsec->sid = sid;
>  
>  	return 0;
>  }
> @@ -5642,9 +5667,12 @@ static struct security_operations selinux_ops = {
>  	.secmark_refcount_inc =		selinux_secmark_refcount_inc,
>  	.secmark_refcount_dec =		selinux_secmark_refcount_dec,
>  	.req_classify_flow =		selinux_req_classify_flow,
> +	.tun_dev_alloc_security =	selinux_tun_dev_alloc_security,
> +	.tun_dev_free_security =	selinux_tun_dev_free_security,
>  	.tun_dev_create =		selinux_tun_dev_create,
> -	.tun_dev_post_create = 		selinux_tun_dev_post_create,
> +	.tun_dev_create_queue =		selinux_tun_dev_create_queue,
>  	.tun_dev_attach =		selinux_tun_dev_attach,
> +	.tun_dev_open =			selinux_tun_dev_open,
>  
>  #ifdef CONFIG_SECURITY_NETWORK_XFRM
>  	.xfrm_policy_alloc_security =	selinux_xfrm_policy_alloc,
> diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
> index 26c7eee..aa47bca 100644
> --- a/security/selinux/include/objsec.h
> +++ b/security/selinux/include/objsec.h
> @@ -110,6 +110,10 @@ struct sk_security_struct {
>  	u16 sclass;			/* sock security class */
>  };
>  
> +struct tun_security_struct {
> +	u32 sid;			/* SID for the tun device sockets */
> +};
> +
>  struct key_security_struct {
>  	u32 sid;	/* SID of key */
>  };
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Wang Dec. 6, 2012, 1:51 p.m. UTC | #3
On Thursday, December 06, 2012 12:33:25 PM Michael S. Tsirkin wrote:
> On Wed, Dec 05, 2012 at 03:26:19PM -0500, Paul Moore wrote:
> > This patch corrects some problems with LSM/SELinux that were introduced
> > with the multiqueue patchset.  The problem stems from the fact that the
> > multiqueue work changed the relationship between the tun device and its
> > associated socket; before the socket persisted for the life of the
> > device, however after the multiqueue changes the socket only persisted
> > for the life of the userspace connection (fd open).  For non-persistent
> > devices this is not an issue, but for persistent devices this can cause
> > the tun device to lose its SELinux label.
> > 
> > We correct this problem by adding an opaque LSM security blob to the
> > tun device struct which allows us to have the LSM security state, e.g.
> > SELinux labeling information, persist for the lifetime of the tun
> > device.  In the process we tweak the LSM hooks to work with this new
> > approach to TUN device/socket labeling and introduce a new LSM hook,
> > security_tun_dev_create_queue(), to approve requests to create a new
> > TUN queue via TUNSETQUEUE.
> > 
> > The SELinux code has been adjusted to match the new LSM hooks, the
> > other LSMs do not make use of the LSM TUN controls.  This patch makes
> > use of the recently added "tun_socket:create_queue" permission to
> > restrict access to the TUNSETQUEUE operation.  On older SELinux
> > policies which do not define the "tun_socket:create_queue" permission
> > the access control decision for TUNSETQUEUE will be handled according
> > to the SELinux policy's unknown permission setting.
> > 
> > Signed-off-by: Paul Moore <pmoore@redhat.com>
> 
> OK so just to verify: this can be used to ensure that qemu
> process that has the queue fd can only attach it to
> a specific device, right?

I think it can't. And I'm not sure whether we need selinux help to do this. 
Looks like we can do this without selinux through:

1. Don't assign a NULL pointer to tfile->tun during file detaching
2. Compare the ifr_name and the name of tfil->tun, if not equal, return -EINVAL
3. Set a special flag in tun_detach_all() to notify the fd is not usable, and 
can't be used for future attaching.

Afther this, only the device that the fd is first attched through (TUNSETIFF or 
TUNSETQUEUE) is allowed to be attached again.

> 
> > ---
> > 
> >  drivers/net/tun.c                 |   26 +++++++++++++---
> >  include/linux/security.h          |   59
> >  +++++++++++++++++++++++++++++-------- security/capability.c            
> >  |   24 +++++++++++++--
> >  security/security.c               |   28 ++++++++++++++----
> >  security/selinux/hooks.c          |   50 ++++++++++++++++++++++++-------
> >  security/selinux/include/objsec.h |    4 +++
> >  6 files changed, 153 insertions(+), 38 deletions(-)
> > 
> > diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> > index 14a0454..fb8148b 100644
> > --- a/drivers/net/tun.c
> > +++ b/drivers/net/tun.c
> > @@ -182,6 +182,7 @@ struct tun_struct {
> > 
> >  	struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
> >  	struct timer_list flow_gc_timer;
> >  	unsigned long ageing_time;
> > 
> > +	void *security;
> > 
> >  };
> >  
> >  static inline u32 tun_hashfn(u32 rxhash)
> > 
> > @@ -465,6 +466,10 @@ static int tun_attach(struct tun_struct *tun, struct
> > file *file)> 
> >  	struct tun_file *tfile = file->private_data;
> >  	int err;
> > 
> > +	err = security_tun_dev_attach(tfile->socket.sk, tun->security);
> > +	if (err < 0)
> > +		goto out;
> > +
> > 
> >  	err = -EINVAL;
> >  	if (rcu_dereference_protected(tfile->tun, lockdep_rtnl_is_held()))
> >  	
> >  		goto out;
> > 
> > @@ -1348,6 +1353,7 @@ static void tun_free_netdev(struct net_device *dev)
> > 
> >  	struct tun_struct *tun = netdev_priv(dev);
> >  	
> >  	tun_flow_uninit(tun);
> > 
> > +	security_tun_dev_free_security(tun->security);
> > 
> >  	free_netdev(dev);
> >  
> >  }
> > 
> > @@ -1534,7 +1540,7 @@ static int tun_set_iff(struct net *net, struct file
> > *file, struct ifreq *ifr)> 
> >  		if (tun_not_capable(tun))
> >  		
> >  			return -EPERM;
> > 
> > -		err = security_tun_dev_attach(tfile->socket.sk);
> > +		err = security_tun_dev_open(tun->security);
> > 
> >  		if (err < 0)
> >  		
> >  			return err;
> > 
> > @@ -1587,7 +1593,9 @@ static int tun_set_iff(struct net *net, struct file
> > *file, struct ifreq *ifr)> 
> >  		spin_lock_init(&tun->lock);
> > 
> > -		security_tun_dev_post_create(&tfile->sk);
> > +		err = security_tun_dev_alloc_security(&tun->security);
> > +		if (err < 0)
> > +			goto err_free_dev;
> > 
> >  		tun_net_init(dev);
> > 
> > @@ -1767,12 +1775,18 @@ static int tun_set_queue(struct file *file, struct
> > ifreq *ifr)> 
> >  		tun = netdev_priv(dev);
> >  		if (dev->netdev_ops != &tap_netdev_ops &&
> > 
> > -			dev->netdev_ops != &tun_netdev_ops)
> > +			dev->netdev_ops != &tun_netdev_ops) {
> > 
> >  			ret = -EINVAL;
> > 
> > -		else if (tun_not_capable(tun))
> > +			goto unlock;
> > +		}
> > +		if (tun_not_capable(tun)) {
> > 
> >  			ret = -EPERM;
> > 
> > -		else
> > -			ret = tun_attach(tun, file);
> > +			goto unlock;
> > +		}
> > +		ret = security_tun_dev_create_queue(tun->security);
> > +		if (ret < 0)
> > +			goto unlock;
> > +		ret = tun_attach(tun, file);
> > 
> >  	} else if (ifr->ifr_flags & IFF_DETACH_QUEUE)
> >  	
> >  		__tun_detach(tfile, false);
> >  	
> >  	else
> > 
> > diff --git a/include/linux/security.h b/include/linux/security.h
> > index 05e88bd..8ea923b 100644
> > --- a/include/linux/security.h
> > +++ b/include/linux/security.h
> > @@ -983,17 +983,29 @@ static inline void security_free_mnt_opts(struct
> > security_mnt_opts *opts)> 
> >   *	tells the LSM to decrement the number of secmark labeling rules loaded
> >   * @req_classify_flow:
> >   *	Sets the flow's sid to the openreq sid.
> > 
> > + * @tun_dev_alloc_security:
> > + *	This hook allows a module to allocate a security structure for a TUN
> > + *	device.
> > + *	@security pointer to a security structure pointer.
> > + *	Returns a zero on success, negative values on failure.
> > + * @tun_dev_free_security:
> > + *	This hook allows a module to free the security structure for a TUN
> > + *	device.
> > + *	@security pointer to the TUN device's security structure
> > 
> >   * @tun_dev_create:
> >   *	Check permissions prior to creating a new TUN device.
> > 
> > - * @tun_dev_post_create:
> > - *	This hook allows a module to update or allocate a per-socket security
> > - *	structure.
> > - *	@sk contains the newly created sock structure.
> > + * @tun_dev_create_queue:
> > + *	Check permissions prior to creating a new TUN device queue.
> > + *	@security pointer to the TUN device's security structure.
> > 
> >   * @tun_dev_attach:
> > - *	Check permissions prior to attaching to a persistent TUN device.  This
> > - *	hook can also be used by the module to update any security state
> > + *	This hook can be used by the module to update any security state
> > 
> >   *	associated with the TUN device's sock structure.
> >   *	@sk contains the existing sock structure.
> > 
> > + *	@security pointer to the TUN device's security structure.
> > + * @tun_dev_open:
> > + *	This hook can be used by the module to update any security state
> > + *	associated with the TUN device's security structure.
> > + *	@security pointer to the TUN devices's security structure.
> > 
> >   *
> >   * Security hooks for XFRM operations.
> >   *
> > 
> > @@ -1613,9 +1625,12 @@ struct security_operations {
> > 
> >  	void (*secmark_refcount_inc) (void);
> >  	void (*secmark_refcount_dec) (void);
> >  	void (*req_classify_flow) (const struct request_sock *req, struct flowi
> >  	*fl);> 
> > -	int (*tun_dev_create)(void);
> > -	void (*tun_dev_post_create)(struct sock *sk);
> > -	int (*tun_dev_attach)(struct sock *sk);
> > +	int (*tun_dev_alloc_security) (void **security);
> > +	void (*tun_dev_free_security) (void *security);
> > +	int (*tun_dev_create) (void);
> > +	int (*tun_dev_create_queue) (void *security);
> > +	int (*tun_dev_attach) (struct sock *sk, void *security);
> > +	int (*tun_dev_open) (void *security);
> > 
> >  #endif	/* CONFIG_SECURITY_NETWORK */
> >  
> >  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> > 
> > @@ -2553,9 +2568,12 @@ void security_inet_conn_established(struct sock
> > *sk,
> > 
> >  int security_secmark_relabel_packet(u32 secid);
> >  void security_secmark_refcount_inc(void);
> >  void security_secmark_refcount_dec(void);
> > 
> > +int security_tun_dev_alloc_security(void **security);
> > +void security_tun_dev_free_security(void *security);
> > 
> >  int security_tun_dev_create(void);
> > 
> > -void security_tun_dev_post_create(struct sock *sk);
> > -int security_tun_dev_attach(struct sock *sk);
> > +int security_tun_dev_create_queue(void *security);
> > +int security_tun_dev_attach(struct sock *sk, void *security);
> > +int security_tun_dev_open(void *security);
> > 
> >  #else	/* CONFIG_SECURITY_NETWORK */
> >  static inline int security_unix_stream_connect(struct sock *sock,
> > 
> > @@ -2720,16 +2738,31 @@ static inline void
> > security_secmark_refcount_dec(void)> 
> >  {
> >  }
> > 
> > +static inline int security_tun_dev_alloc_security(void **security)
> > +{
> > +	return 0;
> > +}
> > +
> > +static inline void security_tun_dev_free_security(void *security)
> > +{
> > +}
> > +
> > 
> >  static inline int security_tun_dev_create(void)
> >  {
> >  
> >  	return 0;
> >  
> >  }
> > 
> > -static inline void security_tun_dev_post_create(struct sock *sk)
> > +static inline int security_tun_dev_create_queue(void *security)
> > +{
> > +	return 0;
> > +}
> > +
> > +static inline int security_tun_dev_attach(struct sock *sk, void
> > *security)
> > 
> >  {
> > 
> > +	return 0;
> > 
> >  }
> > 
> > -static inline int security_tun_dev_attach(struct sock *sk)
> > +static inline int security_tun_dev_open(void *security)
> > 
> >  {
> >  
> >  	return 0;
> >  
> >  }
> > 
> > diff --git a/security/capability.c b/security/capability.c
> > index b14a30c..bf4cbf2 100644
> > --- a/security/capability.c
> > +++ b/security/capability.c
> > @@ -704,16 +704,31 @@ static void cap_req_classify_flow(const struct
> > request_sock *req,> 
> >  {
> >  }
> > 
> > +static int cap_tun_dev_alloc_security(void **security)
> > +{
> > +	return 0;
> > +}
> > +
> > +static void cap_tun_dev_free_security(void *security)
> > +{
> > +}
> > +
> > 
> >  static int cap_tun_dev_create(void)
> >  {
> >  
> >  	return 0;
> >  
> >  }
> > 
> > -static void cap_tun_dev_post_create(struct sock *sk)
> > +static int cap_tun_dev_create_queue(void *security)
> > +{
> > +	return 0;
> > +}
> > +
> > +static int cap_tun_dev_attach(struct sock *sk, void *security)
> > 
> >  {
> > 
> > +	return 0;
> > 
> >  }
> > 
> > -static int cap_tun_dev_attach(struct sock *sk)
> > +static int cap_tun_dev_open(void *security)
> > 
> >  {
> >  
> >  	return 0;
> >  
> >  }
> > 
> > @@ -1044,8 +1059,11 @@ void __init security_fixup_ops(struct
> > security_operations *ops)> 
> >  	set_to_cap_if_null(ops, secmark_refcount_inc);
> >  	set_to_cap_if_null(ops, secmark_refcount_dec);
> >  	set_to_cap_if_null(ops, req_classify_flow);
> > 
> > +	set_to_cap_if_null(ops, tun_dev_alloc_security);
> > +	set_to_cap_if_null(ops, tun_dev_free_security);
> > 
> >  	set_to_cap_if_null(ops, tun_dev_create);
> > 
> > -	set_to_cap_if_null(ops, tun_dev_post_create);
> > +	set_to_cap_if_null(ops, tun_dev_create_queue);
> > +	set_to_cap_if_null(ops, tun_dev_open);
> > 
> >  	set_to_cap_if_null(ops, tun_dev_attach);
> >  
> >  #endif	/* CONFIG_SECURITY_NETWORK */
> >  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> > 
> > diff --git a/security/security.c b/security/security.c
> > index 8dcd4ae..4d82654 100644
> > --- a/security/security.c
> > +++ b/security/security.c
> > @@ -1244,24 +1244,42 @@ void security_secmark_refcount_dec(void)
> > 
> >  }
> >  EXPORT_SYMBOL(security_secmark_refcount_dec);
> > 
> > +int security_tun_dev_alloc_security(void **security)
> > +{
> > +	return security_ops->tun_dev_alloc_security(security);
> > +}
> > +EXPORT_SYMBOL(security_tun_dev_alloc_security);
> > +
> > +void security_tun_dev_free_security(void *security)
> > +{
> > +	security_ops->tun_dev_free_security(security);
> > +}
> > +EXPORT_SYMBOL(security_tun_dev_free_security);
> > +
> > 
> >  int security_tun_dev_create(void)
> >  {
> >  
> >  	return security_ops->tun_dev_create();
> >  
> >  }
> >  EXPORT_SYMBOL(security_tun_dev_create);
> > 
> > -void security_tun_dev_post_create(struct sock *sk)
> > +int security_tun_dev_create_queue(void *security)
> > 
> >  {
> > 
> > -	return security_ops->tun_dev_post_create(sk);
> > +	return security_ops->tun_dev_create_queue(security);
> > 
> >  }
> > 
> > -EXPORT_SYMBOL(security_tun_dev_post_create);
> > +EXPORT_SYMBOL(security_tun_dev_create_queue);
> > 
> > -int security_tun_dev_attach(struct sock *sk)
> > +int security_tun_dev_attach(struct sock *sk, void *security)
> > 
> >  {
> > 
> > -	return security_ops->tun_dev_attach(sk);
> > +	return security_ops->tun_dev_attach(sk, security);
> > 
> >  }
> >  EXPORT_SYMBOL(security_tun_dev_attach);
> > 
> > +int security_tun_dev_open(void *security)
> > +{
> > +	return security_ops->tun_dev_open(security);
> > +}
> > +EXPORT_SYMBOL(security_tun_dev_open);
> > +
> > 
> >  #endif	/* CONFIG_SECURITY_NETWORK */
> >  
> >  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> > 
> > diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> > index 61a5336..f1efb08 100644
> > --- a/security/selinux/hooks.c
> > +++ b/security/selinux/hooks.c
> > @@ -4399,6 +4399,24 @@ static void selinux_req_classify_flow(const struct
> > request_sock *req,> 
> >  	fl->flowi_secid = req->secid;
> >  
> >  }
> > 
> > +static int selinux_tun_dev_alloc_security(void **security)
> > +{
> > +	struct tun_security_struct *tunsec;
> > +
> > +	tunsec = kzalloc(sizeof(*tunsec), GFP_KERNEL);
> > +	if (!tunsec)
> > +		return -ENOMEM;
> > +	tunsec->sid = current_sid();
> > +
> > +	*security = tunsec;
> > +	return 0;
> > +}
> > +
> > +static void selinux_tun_dev_free_security(void *security)
> > +{
> > +	kfree(security);
> > +}
> > +
> > 
> >  static int selinux_tun_dev_create(void)
> >  {
> >  
> >  	u32 sid = current_sid();
> > 
> > @@ -4414,8 +4432,17 @@ static int selinux_tun_dev_create(void)
> > 
> >  			    NULL);
> >  
> >  }
> > 
> > -static void selinux_tun_dev_post_create(struct sock *sk)
> > +static int selinux_tun_dev_create_queue(void *security)
> > 
> >  {
> > 
> > +	struct tun_security_struct *tunsec = security;
> > +
> > +	return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET,
> > +			    TUN_SOCKET__CREATE_QUEUE, NULL);
> > +}
> > +
> > +static int selinux_tun_dev_attach(struct sock *sk, void *security)
> > +{
> > +	struct tun_security_struct *tunsec = security;
> > 
> >  	struct sk_security_struct *sksec = sk->sk_security;
> >  	
> >  	/* we don't currently perform any NetLabel based labeling here and it
> > 
> > @@ -4425,20 +4452,19 @@ static void selinux_tun_dev_post_create(struct
> > sock *sk)> 
> >  	 * cause confusion to the TUN user that had no idea network labeling
> >  	 * protocols were being used */
> > 
> > -	/* see the comments in selinux_tun_dev_create() about why we don't use
> > -	 * the sockcreate SID here */
> > -
> > -	sksec->sid = current_sid();
> > +	sksec->sid = tunsec->sid;
> > 
> >  	sksec->sclass = SECCLASS_TUN_SOCKET;
> > 
> > +
> > +	return 0;
> > 
> >  }
> > 
> > -static int selinux_tun_dev_attach(struct sock *sk)
> > +static int selinux_tun_dev_open(void *security)
> > 
> >  {
> > 
> > -	struct sk_security_struct *sksec = sk->sk_security;
> > +	struct tun_security_struct *tunsec = security;
> > 
> >  	u32 sid = current_sid();
> >  	int err;
> > 
> > -	err = avc_has_perm(sid, sksec->sid, SECCLASS_TUN_SOCKET,
> > +	err = avc_has_perm(sid, tunsec->sid, SECCLASS_TUN_SOCKET,
> > 
> >  			   TUN_SOCKET__RELABELFROM, NULL);
> >  	
> >  	if (err)
> >  	
> >  		return err;
> > 
> > @@ -4446,8 +4472,7 @@ static int selinux_tun_dev_attach(struct sock *sk)
> > 
> >  			   TUN_SOCKET__RELABELTO, NULL);
> >  	
> >  	if (err)
> >  	
> >  		return err;
> > 
> > -
> > -	sksec->sid = sid;
> > +	tunsec->sid = sid;
> > 
> >  	return 0;
> >  
> >  }
> > 
> > @@ -5642,9 +5667,12 @@ static struct security_operations selinux_ops = {
> > 
> >  	.secmark_refcount_inc =		selinux_secmark_refcount_inc,
> >  	.secmark_refcount_dec =		selinux_secmark_refcount_dec,
> >  	.req_classify_flow =		selinux_req_classify_flow,
> > 
> > +	.tun_dev_alloc_security =	selinux_tun_dev_alloc_security,
> > +	.tun_dev_free_security =	selinux_tun_dev_free_security,
> > 
> >  	.tun_dev_create =		selinux_tun_dev_create,
> > 
> > -	.tun_dev_post_create = 		selinux_tun_dev_post_create,
> > +	.tun_dev_create_queue =		selinux_tun_dev_create_queue,
> > 
> >  	.tun_dev_attach =		selinux_tun_dev_attach,
> > 
> > +	.tun_dev_open =			selinux_tun_dev_open,
> > 
> >  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> >  
> >  	.xfrm_policy_alloc_security =	selinux_xfrm_policy_alloc,
> > 
> > diff --git a/security/selinux/include/objsec.h
> > b/security/selinux/include/objsec.h index 26c7eee..aa47bca 100644
> > --- a/security/selinux/include/objsec.h
> > +++ b/security/selinux/include/objsec.h
> > @@ -110,6 +110,10 @@ struct sk_security_struct {
> > 
> >  	u16 sclass;			/* sock security class */
> >  
> >  };
> > 
> > +struct tun_security_struct {
> > +	u32 sid;			/* SID for the tun device sockets */
> > +};
> > +
> > 
> >  struct key_security_struct {
> >  
> >  	u32 sid;	/* SID of key */
> >  
> >  };
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Dec. 6, 2012, 2:12 p.m. UTC | #4
On Thu, Dec 06, 2012 at 09:51:13PM +0800, Jason Wang wrote:
> On Thursday, December 06, 2012 12:33:25 PM Michael S. Tsirkin wrote:
> > On Wed, Dec 05, 2012 at 03:26:19PM -0500, Paul Moore wrote:
> > > This patch corrects some problems with LSM/SELinux that were introduced
> > > with the multiqueue patchset.  The problem stems from the fact that the
> > > multiqueue work changed the relationship between the tun device and its
> > > associated socket; before the socket persisted for the life of the
> > > device, however after the multiqueue changes the socket only persisted
> > > for the life of the userspace connection (fd open).  For non-persistent
> > > devices this is not an issue, but for persistent devices this can cause
> > > the tun device to lose its SELinux label.
> > > 
> > > We correct this problem by adding an opaque LSM security blob to the
> > > tun device struct which allows us to have the LSM security state, e.g.
> > > SELinux labeling information, persist for the lifetime of the tun
> > > device.  In the process we tweak the LSM hooks to work with this new
> > > approach to TUN device/socket labeling and introduce a new LSM hook,
> > > security_tun_dev_create_queue(), to approve requests to create a new
> > > TUN queue via TUNSETQUEUE.
> > > 
> > > The SELinux code has been adjusted to match the new LSM hooks, the
> > > other LSMs do not make use of the LSM TUN controls.  This patch makes
> > > use of the recently added "tun_socket:create_queue" permission to
> > > restrict access to the TUNSETQUEUE operation.  On older SELinux
> > > policies which do not define the "tun_socket:create_queue" permission
> > > the access control decision for TUNSETQUEUE will be handled according
> > > to the SELinux policy's unknown permission setting.
> > > 
> > > Signed-off-by: Paul Moore <pmoore@redhat.com>
> > 
> > OK so just to verify: this can be used to ensure that qemu
> > process that has the queue fd can only attach it to
> > a specific device, right?
> 
> I think it can't.
> And I'm not sure whether we need selinux help to do this. 

Well without selinux I doi not see a problem.
If you can do SETQUEUE you can do SETIFF too and then
you can attach to tap.

> Looks like we can do this without selinux through:
> 
> 1. Don't assign a NULL pointer to tfile->tun during file detaching

So you detach from tun but keep a pointer to it? Not good.

> 2. Compare the ifr_name and the name of tfil->tun, if not equal, return -EINVAL
> 3. Set a special flag in tun_detach_all() to notify the fd is not usable, and 
> can't be used for future attaching.
> 
> Afther this, only the device that the fd is first attched through (TUNSETIFF or 
> TUNSETQUEUE) is allowed to be attached again.

This looks like a hard-coded security policy.
The problem is not detach the problem is attach,
we should solve it there.

> > 
> > > ---
> > > 
> > >  drivers/net/tun.c                 |   26 +++++++++++++---
> > >  include/linux/security.h          |   59
> > >  +++++++++++++++++++++++++++++-------- security/capability.c            
> > >  |   24 +++++++++++++--
> > >  security/security.c               |   28 ++++++++++++++----
> > >  security/selinux/hooks.c          |   50 ++++++++++++++++++++++++-------
> > >  security/selinux/include/objsec.h |    4 +++
> > >  6 files changed, 153 insertions(+), 38 deletions(-)
> > > 
> > > diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> > > index 14a0454..fb8148b 100644
> > > --- a/drivers/net/tun.c
> > > +++ b/drivers/net/tun.c
> > > @@ -182,6 +182,7 @@ struct tun_struct {
> > > 
> > >  	struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
> > >  	struct timer_list flow_gc_timer;
> > >  	unsigned long ageing_time;
> > > 
> > > +	void *security;
> > > 
> > >  };
> > >  
> > >  static inline u32 tun_hashfn(u32 rxhash)
> > > 
> > > @@ -465,6 +466,10 @@ static int tun_attach(struct tun_struct *tun, struct
> > > file *file)> 
> > >  	struct tun_file *tfile = file->private_data;
> > >  	int err;
> > > 
> > > +	err = security_tun_dev_attach(tfile->socket.sk, tun->security);
> > > +	if (err < 0)
> > > +		goto out;
> > > +
> > > 
> > >  	err = -EINVAL;
> > >  	if (rcu_dereference_protected(tfile->tun, lockdep_rtnl_is_held()))
> > >  	
> > >  		goto out;
> > > 
> > > @@ -1348,6 +1353,7 @@ static void tun_free_netdev(struct net_device *dev)
> > > 
> > >  	struct tun_struct *tun = netdev_priv(dev);
> > >  	
> > >  	tun_flow_uninit(tun);
> > > 
> > > +	security_tun_dev_free_security(tun->security);
> > > 
> > >  	free_netdev(dev);
> > >  
> > >  }
> > > 
> > > @@ -1534,7 +1540,7 @@ static int tun_set_iff(struct net *net, struct file
> > > *file, struct ifreq *ifr)> 
> > >  		if (tun_not_capable(tun))
> > >  		
> > >  			return -EPERM;
> > > 
> > > -		err = security_tun_dev_attach(tfile->socket.sk);
> > > +		err = security_tun_dev_open(tun->security);
> > > 
> > >  		if (err < 0)
> > >  		
> > >  			return err;
> > > 
> > > @@ -1587,7 +1593,9 @@ static int tun_set_iff(struct net *net, struct file
> > > *file, struct ifreq *ifr)> 
> > >  		spin_lock_init(&tun->lock);
> > > 
> > > -		security_tun_dev_post_create(&tfile->sk);
> > > +		err = security_tun_dev_alloc_security(&tun->security);
> > > +		if (err < 0)
> > > +			goto err_free_dev;
> > > 
> > >  		tun_net_init(dev);
> > > 
> > > @@ -1767,12 +1775,18 @@ static int tun_set_queue(struct file *file, struct
> > > ifreq *ifr)> 
> > >  		tun = netdev_priv(dev);
> > >  		if (dev->netdev_ops != &tap_netdev_ops &&
> > > 
> > > -			dev->netdev_ops != &tun_netdev_ops)
> > > +			dev->netdev_ops != &tun_netdev_ops) {
> > > 
> > >  			ret = -EINVAL;
> > > 
> > > -		else if (tun_not_capable(tun))
> > > +			goto unlock;
> > > +		}
> > > +		if (tun_not_capable(tun)) {
> > > 
> > >  			ret = -EPERM;
> > > 
> > > -		else
> > > -			ret = tun_attach(tun, file);
> > > +			goto unlock;
> > > +		}
> > > +		ret = security_tun_dev_create_queue(tun->security);
> > > +		if (ret < 0)
> > > +			goto unlock;
> > > +		ret = tun_attach(tun, file);
> > > 
> > >  	} else if (ifr->ifr_flags & IFF_DETACH_QUEUE)
> > >  	
> > >  		__tun_detach(tfile, false);
> > >  	
> > >  	else
> > > 
> > > diff --git a/include/linux/security.h b/include/linux/security.h
> > > index 05e88bd..8ea923b 100644
> > > --- a/include/linux/security.h
> > > +++ b/include/linux/security.h
> > > @@ -983,17 +983,29 @@ static inline void security_free_mnt_opts(struct
> > > security_mnt_opts *opts)> 
> > >   *	tells the LSM to decrement the number of secmark labeling rules loaded
> > >   * @req_classify_flow:
> > >   *	Sets the flow's sid to the openreq sid.
> > > 
> > > + * @tun_dev_alloc_security:
> > > + *	This hook allows a module to allocate a security structure for a TUN
> > > + *	device.
> > > + *	@security pointer to a security structure pointer.
> > > + *	Returns a zero on success, negative values on failure.
> > > + * @tun_dev_free_security:
> > > + *	This hook allows a module to free the security structure for a TUN
> > > + *	device.
> > > + *	@security pointer to the TUN device's security structure
> > > 
> > >   * @tun_dev_create:
> > >   *	Check permissions prior to creating a new TUN device.
> > > 
> > > - * @tun_dev_post_create:
> > > - *	This hook allows a module to update or allocate a per-socket security
> > > - *	structure.
> > > - *	@sk contains the newly created sock structure.
> > > + * @tun_dev_create_queue:
> > > + *	Check permissions prior to creating a new TUN device queue.
> > > + *	@security pointer to the TUN device's security structure.
> > > 
> > >   * @tun_dev_attach:
> > > - *	Check permissions prior to attaching to a persistent TUN device.  This
> > > - *	hook can also be used by the module to update any security state
> > > + *	This hook can be used by the module to update any security state
> > > 
> > >   *	associated with the TUN device's sock structure.
> > >   *	@sk contains the existing sock structure.
> > > 
> > > + *	@security pointer to the TUN device's security structure.
> > > + * @tun_dev_open:
> > > + *	This hook can be used by the module to update any security state
> > > + *	associated with the TUN device's security structure.
> > > + *	@security pointer to the TUN devices's security structure.
> > > 
> > >   *
> > >   * Security hooks for XFRM operations.
> > >   *
> > > 
> > > @@ -1613,9 +1625,12 @@ struct security_operations {
> > > 
> > >  	void (*secmark_refcount_inc) (void);
> > >  	void (*secmark_refcount_dec) (void);
> > >  	void (*req_classify_flow) (const struct request_sock *req, struct flowi
> > >  	*fl);> 
> > > -	int (*tun_dev_create)(void);
> > > -	void (*tun_dev_post_create)(struct sock *sk);
> > > -	int (*tun_dev_attach)(struct sock *sk);
> > > +	int (*tun_dev_alloc_security) (void **security);
> > > +	void (*tun_dev_free_security) (void *security);
> > > +	int (*tun_dev_create) (void);
> > > +	int (*tun_dev_create_queue) (void *security);
> > > +	int (*tun_dev_attach) (struct sock *sk, void *security);
> > > +	int (*tun_dev_open) (void *security);
> > > 
> > >  #endif	/* CONFIG_SECURITY_NETWORK */
> > >  
> > >  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> > > 
> > > @@ -2553,9 +2568,12 @@ void security_inet_conn_established(struct sock
> > > *sk,
> > > 
> > >  int security_secmark_relabel_packet(u32 secid);
> > >  void security_secmark_refcount_inc(void);
> > >  void security_secmark_refcount_dec(void);
> > > 
> > > +int security_tun_dev_alloc_security(void **security);
> > > +void security_tun_dev_free_security(void *security);
> > > 
> > >  int security_tun_dev_create(void);
> > > 
> > > -void security_tun_dev_post_create(struct sock *sk);
> > > -int security_tun_dev_attach(struct sock *sk);
> > > +int security_tun_dev_create_queue(void *security);
> > > +int security_tun_dev_attach(struct sock *sk, void *security);
> > > +int security_tun_dev_open(void *security);
> > > 
> > >  #else	/* CONFIG_SECURITY_NETWORK */
> > >  static inline int security_unix_stream_connect(struct sock *sock,
> > > 
> > > @@ -2720,16 +2738,31 @@ static inline void
> > > security_secmark_refcount_dec(void)> 
> > >  {
> > >  }
> > > 
> > > +static inline int security_tun_dev_alloc_security(void **security)
> > > +{
> > > +	return 0;
> > > +}
> > > +
> > > +static inline void security_tun_dev_free_security(void *security)
> > > +{
> > > +}
> > > +
> > > 
> > >  static inline int security_tun_dev_create(void)
> > >  {
> > >  
> > >  	return 0;
> > >  
> > >  }
> > > 
> > > -static inline void security_tun_dev_post_create(struct sock *sk)
> > > +static inline int security_tun_dev_create_queue(void *security)
> > > +{
> > > +	return 0;
> > > +}
> > > +
> > > +static inline int security_tun_dev_attach(struct sock *sk, void
> > > *security)
> > > 
> > >  {
> > > 
> > > +	return 0;
> > > 
> > >  }
> > > 
> > > -static inline int security_tun_dev_attach(struct sock *sk)
> > > +static inline int security_tun_dev_open(void *security)
> > > 
> > >  {
> > >  
> > >  	return 0;
> > >  
> > >  }
> > > 
> > > diff --git a/security/capability.c b/security/capability.c
> > > index b14a30c..bf4cbf2 100644
> > > --- a/security/capability.c
> > > +++ b/security/capability.c
> > > @@ -704,16 +704,31 @@ static void cap_req_classify_flow(const struct
> > > request_sock *req,> 
> > >  {
> > >  }
> > > 
> > > +static int cap_tun_dev_alloc_security(void **security)
> > > +{
> > > +	return 0;
> > > +}
> > > +
> > > +static void cap_tun_dev_free_security(void *security)
> > > +{
> > > +}
> > > +
> > > 
> > >  static int cap_tun_dev_create(void)
> > >  {
> > >  
> > >  	return 0;
> > >  
> > >  }
> > > 
> > > -static void cap_tun_dev_post_create(struct sock *sk)
> > > +static int cap_tun_dev_create_queue(void *security)
> > > +{
> > > +	return 0;
> > > +}
> > > +
> > > +static int cap_tun_dev_attach(struct sock *sk, void *security)
> > > 
> > >  {
> > > 
> > > +	return 0;
> > > 
> > >  }
> > > 
> > > -static int cap_tun_dev_attach(struct sock *sk)
> > > +static int cap_tun_dev_open(void *security)
> > > 
> > >  {
> > >  
> > >  	return 0;
> > >  
> > >  }
> > > 
> > > @@ -1044,8 +1059,11 @@ void __init security_fixup_ops(struct
> > > security_operations *ops)> 
> > >  	set_to_cap_if_null(ops, secmark_refcount_inc);
> > >  	set_to_cap_if_null(ops, secmark_refcount_dec);
> > >  	set_to_cap_if_null(ops, req_classify_flow);
> > > 
> > > +	set_to_cap_if_null(ops, tun_dev_alloc_security);
> > > +	set_to_cap_if_null(ops, tun_dev_free_security);
> > > 
> > >  	set_to_cap_if_null(ops, tun_dev_create);
> > > 
> > > -	set_to_cap_if_null(ops, tun_dev_post_create);
> > > +	set_to_cap_if_null(ops, tun_dev_create_queue);
> > > +	set_to_cap_if_null(ops, tun_dev_open);
> > > 
> > >  	set_to_cap_if_null(ops, tun_dev_attach);
> > >  
> > >  #endif	/* CONFIG_SECURITY_NETWORK */
> > >  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> > > 
> > > diff --git a/security/security.c b/security/security.c
> > > index 8dcd4ae..4d82654 100644
> > > --- a/security/security.c
> > > +++ b/security/security.c
> > > @@ -1244,24 +1244,42 @@ void security_secmark_refcount_dec(void)
> > > 
> > >  }
> > >  EXPORT_SYMBOL(security_secmark_refcount_dec);
> > > 
> > > +int security_tun_dev_alloc_security(void **security)
> > > +{
> > > +	return security_ops->tun_dev_alloc_security(security);
> > > +}
> > > +EXPORT_SYMBOL(security_tun_dev_alloc_security);
> > > +
> > > +void security_tun_dev_free_security(void *security)
> > > +{
> > > +	security_ops->tun_dev_free_security(security);
> > > +}
> > > +EXPORT_SYMBOL(security_tun_dev_free_security);
> > > +
> > > 
> > >  int security_tun_dev_create(void)
> > >  {
> > >  
> > >  	return security_ops->tun_dev_create();
> > >  
> > >  }
> > >  EXPORT_SYMBOL(security_tun_dev_create);
> > > 
> > > -void security_tun_dev_post_create(struct sock *sk)
> > > +int security_tun_dev_create_queue(void *security)
> > > 
> > >  {
> > > 
> > > -	return security_ops->tun_dev_post_create(sk);
> > > +	return security_ops->tun_dev_create_queue(security);
> > > 
> > >  }
> > > 
> > > -EXPORT_SYMBOL(security_tun_dev_post_create);
> > > +EXPORT_SYMBOL(security_tun_dev_create_queue);
> > > 
> > > -int security_tun_dev_attach(struct sock *sk)
> > > +int security_tun_dev_attach(struct sock *sk, void *security)
> > > 
> > >  {
> > > 
> > > -	return security_ops->tun_dev_attach(sk);
> > > +	return security_ops->tun_dev_attach(sk, security);
> > > 
> > >  }
> > >  EXPORT_SYMBOL(security_tun_dev_attach);
> > > 
> > > +int security_tun_dev_open(void *security)
> > > +{
> > > +	return security_ops->tun_dev_open(security);
> > > +}
> > > +EXPORT_SYMBOL(security_tun_dev_open);
> > > +
> > > 
> > >  #endif	/* CONFIG_SECURITY_NETWORK */
> > >  
> > >  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> > > 
> > > diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> > > index 61a5336..f1efb08 100644
> > > --- a/security/selinux/hooks.c
> > > +++ b/security/selinux/hooks.c
> > > @@ -4399,6 +4399,24 @@ static void selinux_req_classify_flow(const struct
> > > request_sock *req,> 
> > >  	fl->flowi_secid = req->secid;
> > >  
> > >  }
> > > 
> > > +static int selinux_tun_dev_alloc_security(void **security)
> > > +{
> > > +	struct tun_security_struct *tunsec;
> > > +
> > > +	tunsec = kzalloc(sizeof(*tunsec), GFP_KERNEL);
> > > +	if (!tunsec)
> > > +		return -ENOMEM;
> > > +	tunsec->sid = current_sid();
> > > +
> > > +	*security = tunsec;
> > > +	return 0;
> > > +}
> > > +
> > > +static void selinux_tun_dev_free_security(void *security)
> > > +{
> > > +	kfree(security);
> > > +}
> > > +
> > > 
> > >  static int selinux_tun_dev_create(void)
> > >  {
> > >  
> > >  	u32 sid = current_sid();
> > > 
> > > @@ -4414,8 +4432,17 @@ static int selinux_tun_dev_create(void)
> > > 
> > >  			    NULL);
> > >  
> > >  }
> > > 
> > > -static void selinux_tun_dev_post_create(struct sock *sk)
> > > +static int selinux_tun_dev_create_queue(void *security)
> > > 
> > >  {
> > > 
> > > +	struct tun_security_struct *tunsec = security;
> > > +
> > > +	return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET,
> > > +			    TUN_SOCKET__CREATE_QUEUE, NULL);
> > > +}
> > > +
> > > +static int selinux_tun_dev_attach(struct sock *sk, void *security)
> > > +{
> > > +	struct tun_security_struct *tunsec = security;
> > > 
> > >  	struct sk_security_struct *sksec = sk->sk_security;
> > >  	
> > >  	/* we don't currently perform any NetLabel based labeling here and it
> > > 
> > > @@ -4425,20 +4452,19 @@ static void selinux_tun_dev_post_create(struct
> > > sock *sk)> 
> > >  	 * cause confusion to the TUN user that had no idea network labeling
> > >  	 * protocols were being used */
> > > 
> > > -	/* see the comments in selinux_tun_dev_create() about why we don't use
> > > -	 * the sockcreate SID here */
> > > -
> > > -	sksec->sid = current_sid();
> > > +	sksec->sid = tunsec->sid;
> > > 
> > >  	sksec->sclass = SECCLASS_TUN_SOCKET;
> > > 
> > > +
> > > +	return 0;
> > > 
> > >  }
> > > 
> > > -static int selinux_tun_dev_attach(struct sock *sk)
> > > +static int selinux_tun_dev_open(void *security)
> > > 
> > >  {
> > > 
> > > -	struct sk_security_struct *sksec = sk->sk_security;
> > > +	struct tun_security_struct *tunsec = security;
> > > 
> > >  	u32 sid = current_sid();
> > >  	int err;
> > > 
> > > -	err = avc_has_perm(sid, sksec->sid, SECCLASS_TUN_SOCKET,
> > > +	err = avc_has_perm(sid, tunsec->sid, SECCLASS_TUN_SOCKET,
> > > 
> > >  			   TUN_SOCKET__RELABELFROM, NULL);
> > >  	
> > >  	if (err)
> > >  	
> > >  		return err;
> > > 
> > > @@ -4446,8 +4472,7 @@ static int selinux_tun_dev_attach(struct sock *sk)
> > > 
> > >  			   TUN_SOCKET__RELABELTO, NULL);
> > >  	
> > >  	if (err)
> > >  	
> > >  		return err;
> > > 
> > > -
> > > -	sksec->sid = sid;
> > > +	tunsec->sid = sid;
> > > 
> > >  	return 0;
> > >  
> > >  }
> > > 
> > > @@ -5642,9 +5667,12 @@ static struct security_operations selinux_ops = {
> > > 
> > >  	.secmark_refcount_inc =		selinux_secmark_refcount_inc,
> > >  	.secmark_refcount_dec =		selinux_secmark_refcount_dec,
> > >  	.req_classify_flow =		selinux_req_classify_flow,
> > > 
> > > +	.tun_dev_alloc_security =	selinux_tun_dev_alloc_security,
> > > +	.tun_dev_free_security =	selinux_tun_dev_free_security,
> > > 
> > >  	.tun_dev_create =		selinux_tun_dev_create,
> > > 
> > > -	.tun_dev_post_create = 		selinux_tun_dev_post_create,
> > > +	.tun_dev_create_queue =		selinux_tun_dev_create_queue,
> > > 
> > >  	.tun_dev_attach =		selinux_tun_dev_attach,
> > > 
> > > +	.tun_dev_open =			selinux_tun_dev_open,
> > > 
> > >  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> > >  
> > >  	.xfrm_policy_alloc_security =	selinux_xfrm_policy_alloc,
> > > 
> > > diff --git a/security/selinux/include/objsec.h
> > > b/security/selinux/include/objsec.h index 26c7eee..aa47bca 100644
> > > --- a/security/selinux/include/objsec.h
> > > +++ b/security/selinux/include/objsec.h
> > > @@ -110,6 +110,10 @@ struct sk_security_struct {
> > > 
> > >  	u16 sclass;			/* sock security class */
> > >  
> > >  };
> > > 
> > > +struct tun_security_struct {
> > > +	u32 sid;			/* SID for the tun device sockets */
> > > +};
> > > +
> > > 
> > >  struct key_security_struct {
> > >  
> > >  	u32 sid;	/* SID of key */
> > >  
> > >  };
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Moore Dec. 6, 2012, 3:36 p.m. UTC | #5
On Thursday, December 06, 2012 06:29:54 PM Jason Wang wrote:
> On Wednesday, December 05, 2012 03:26:19 PM Paul Moore wrote:
> > This patch corrects some problems with LSM/SELinux that were introduced
> > with the multiqueue patchset.  The problem stems from the fact that the
> > multiqueue work changed the relationship between the tun device and its
> > associated socket; before the socket persisted for the life of the
> > device, however after the multiqueue changes the socket only persisted
> > for the life of the userspace connection (fd open).  For non-persistent
> > devices this is not an issue, but for persistent devices this can cause
> > the tun device to lose its SELinux label.
> > 
> > We correct this problem by adding an opaque LSM security blob to the
> > tun device struct which allows us to have the LSM security state, e.g.
> > SELinux labeling information, persist for the lifetime of the tun
> > device.  In the process we tweak the LSM hooks to work with this new
> > approach to TUN device/socket labeling and introduce a new LSM hook,
> > security_tun_dev_create_queue(), to approve requests to create a new
> > TUN queue via TUNSETQUEUE.
> > 
> > The SELinux code has been adjusted to match the new LSM hooks, the
> > other LSMs do not make use of the LSM TUN controls.  This patch makes
> > use of the recently added "tun_socket:create_queue" permission to
> > restrict access to the TUNSETQUEUE operation.  On older SELinux
> > policies which do not define the "tun_socket:create_queue" permission
> > the access control decision for TUNSETQUEUE will be handled according
> > to the SELinux policy's unknown permission setting.
> > 
> > Signed-off-by: Paul Moore <pmoore@redhat.com>

...

> > @@ -4425,20 +4452,19 @@ static void selinux_tun_dev_post_create(struct
> > sock
> > *sk) * cause confusion to the TUN user that had no idea network labeling *
> > protocols were being used */
> > 
> > -	/* see the comments in selinux_tun_dev_create() about why we ...
> > -
> > -	sksec->sid = current_sid();
> > +	sksec->sid = tunsec->sid;
> 
> Since both tun_set_iff() and tun_set_queue() would call this. I wonder when
> it is called by tun_set_queue() we need some checking just like what we
> done in v1, otherwise it's unconditionally in TUNSETQUEUE. Or we can add
> them in selinux_tun_dev_create_queue()?

In all the cases that call tun_attach() we have a new socket which needs to be 
labeled based on the tun->security label, yes?  That is what the 
security_tun_dev_attach() code does, there is no need for access control at 
this point as the operation has already been authorized by either 
security_tun_dev_create() (new device), security_tun_dev_create_queue() (new 
queue), or security_tun_dev_open() (opening persistent device).

I think we are all set, or am I missing something?
 
> >  	sksec->sclass = SECCLASS_TUN_SOCKET;
> > 
> > +
> > +	return 0;
> > 
> >  }
Paul Moore Dec. 6, 2012, 3:46 p.m. UTC | #6
On Thursday, December 06, 2012 12:33:25 PM Michael S. Tsirkin wrote:
> On Wed, Dec 05, 2012 at 03:26:19PM -0500, Paul Moore wrote:
> > This patch corrects some problems with LSM/SELinux that were introduced
> > with the multiqueue patchset.  The problem stems from the fact that the
> > multiqueue work changed the relationship between the tun device and its
> > associated socket; before the socket persisted for the life of the
> > device, however after the multiqueue changes the socket only persisted
> > for the life of the userspace connection (fd open).  For non-persistent
> > devices this is not an issue, but for persistent devices this can cause
> > the tun device to lose its SELinux label.
> > 
> > We correct this problem by adding an opaque LSM security blob to the
> > tun device struct which allows us to have the LSM security state, e.g.
> > SELinux labeling information, persist for the lifetime of the tun
> > device.  In the process we tweak the LSM hooks to work with this new
> > approach to TUN device/socket labeling and introduce a new LSM hook,
> > security_tun_dev_create_queue(), to approve requests to create a new
> > TUN queue via TUNSETQUEUE.
> > 
> > The SELinux code has been adjusted to match the new LSM hooks, the
> > other LSMs do not make use of the LSM TUN controls.  This patch makes
> > use of the recently added "tun_socket:create_queue" permission to
> > restrict access to the TUNSETQUEUE operation.  On older SELinux
> > policies which do not define the "tun_socket:create_queue" permission
> > the access control decision for TUNSETQUEUE will be handled according
> > to the SELinux policy's unknown permission setting.
> > 
> > Signed-off-by: Paul Moore <pmoore@redhat.com>
> 
> OK so just to verify: this can be used to ensure that qemu
> process that has the queue fd can only attach it to
> a specific device, right?

Whenever a new queue is created via TUNSETQUEUE/tun_set_queue() the 
security_tun_dev_create_queue() LSM hook is called.  When SELinux is enabled 
this hook ends up calling selinux_tun_dev_create_queue() which checks that the 
calling process (process_t) is allowed to create a new queue on the specified 
device (tundev_t) .  If you are familiar with SELinux security policy, the 
allow rule would look like this:

  allow process_t tundev_t:tun_socket create_queue;

In practice, if we assume libvirt is creating the TUN device and running with 
a SELinux label of virtd_t and that QEMU instances are running with a SELinux 
label of svirt_t then the allow rule would look like this:

  allow svirt_t virtd_t:tun_socket create_queue;

There is also the matter of the MLS/MCS constraints providing additional 
separation but that is another level of detail which I don't believe is 
important for our discussion.
Michael S. Tsirkin Dec. 6, 2012, 4:12 p.m. UTC | #7
On Thu, Dec 06, 2012 at 10:46:11AM -0500, Paul Moore wrote:
> On Thursday, December 06, 2012 12:33:25 PM Michael S. Tsirkin wrote:
> > On Wed, Dec 05, 2012 at 03:26:19PM -0500, Paul Moore wrote:
> > > This patch corrects some problems with LSM/SELinux that were introduced
> > > with the multiqueue patchset.  The problem stems from the fact that the
> > > multiqueue work changed the relationship between the tun device and its
> > > associated socket; before the socket persisted for the life of the
> > > device, however after the multiqueue changes the socket only persisted
> > > for the life of the userspace connection (fd open).  For non-persistent
> > > devices this is not an issue, but for persistent devices this can cause
> > > the tun device to lose its SELinux label.
> > > 
> > > We correct this problem by adding an opaque LSM security blob to the
> > > tun device struct which allows us to have the LSM security state, e.g.
> > > SELinux labeling information, persist for the lifetime of the tun
> > > device.  In the process we tweak the LSM hooks to work with this new
> > > approach to TUN device/socket labeling and introduce a new LSM hook,
> > > security_tun_dev_create_queue(), to approve requests to create a new
> > > TUN queue via TUNSETQUEUE.
> > > 
> > > The SELinux code has been adjusted to match the new LSM hooks, the
> > > other LSMs do not make use of the LSM TUN controls.  This patch makes
> > > use of the recently added "tun_socket:create_queue" permission to
> > > restrict access to the TUNSETQUEUE operation.  On older SELinux
> > > policies which do not define the "tun_socket:create_queue" permission
> > > the access control decision for TUNSETQUEUE will be handled according
> > > to the SELinux policy's unknown permission setting.
> > > 
> > > Signed-off-by: Paul Moore <pmoore@redhat.com>
> > 
> > OK so just to verify: this can be used to ensure that qemu
> > process that has the queue fd can only attach it to
> > a specific device, right?
> 
> Whenever a new queue is created via TUNSETQUEUE/tun_set_queue() the 
> security_tun_dev_create_queue() LSM hook is called.  When SELinux is enabled 
> this hook ends up calling selinux_tun_dev_create_queue() which checks that the 
> calling process (process_t) is allowed to create a new queue on the specified 
> device (tundev_t) .  If you are familiar with SELinux security policy, the 
> allow rule would look like this:
> 
>   allow process_t tundev_t:tun_socket create_queue;
> 
> In practice, if we assume libvirt is creating the TUN device and running with 
> a SELinux label of virtd_t and that QEMU instances are running with a SELinux 
> label of svirt_t then the allow rule would look like this:
> 
>   allow svirt_t virtd_t:tun_socket create_queue;
> 
> There is also the matter of the MLS/MCS constraints providing additional 
> separation but that is another level of detail which I don't believe is 
> important for our discussion.

Hmm. How do the rules for SETIFF look ATM?
I am just checking default policy does not let qemu do with
SETQUEUE something with a device which it can not
attach to using SETIFF.

> -- 
> paul moore
> security and virtualization @ redhat
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Moore Dec. 6, 2012, 4:56 p.m. UTC | #8
On Thursday, December 06, 2012 06:12:00 PM Michael S. Tsirkin wrote:
> On Thu, Dec 06, 2012 at 10:46:11AM -0500, Paul Moore wrote:
> > On Thursday, December 06, 2012 12:33:25 PM Michael S. Tsirkin wrote:
> > > OK so just to verify: this can be used to ensure that qemu
> > > process that has the queue fd can only attach it to
> > > a specific device, right?
> > 
> > Whenever a new queue is created via TUNSETQUEUE/tun_set_queue() the
> > security_tun_dev_create_queue() LSM hook is called.  When SELinux is
> > enabled this hook ends up calling selinux_tun_dev_create_queue() which
> > checks that the calling process (process_t) is allowed to create a new
> > queue on the specified device (tundev_t) .  If you are familiar with
> > SELinux security policy, the allow rule would look like this:
> >
> >   allow process_t tundev_t:tun_socket create_queue;
> > 
> > In practice, if we assume libvirt is creating the TUN device and running
> > with a SELinux label of virtd_t and that QEMU instances are running with
> > a SELinux label of svirt_t then the allow rule would look like this:
> >
> >   allow svirt_t virtd_t:tun_socket create_queue;
> > 
> > There is also the matter of the MLS/MCS constraints providing additional
> > separation but that is another level of detail which I don't believe is
> > important for our discussion.
> 
> Hmm. How do the rules for SETIFF look ATM?
> I am just checking default policy does not let qemu do with
> SETQUEUE something with a device which it can not
> attach to using SETIFF.

The SETQUEUE/tun_socket:create_queue permissions do not yet exist in any 
released SELinux policy as we are just now adding them with this patchset.  
With current policies loaded into a kernel with this patchset applied the 
SETQUEUE/tun_socket:create_queue permission would be treated according to the 
policy's unknown permission setting.
Michael S. Tsirkin Dec. 6, 2012, 8:57 p.m. UTC | #9
On Thu, Dec 06, 2012 at 11:56:45AM -0500, Paul Moore wrote:
> The SETQUEUE/tun_socket:create_queue permissions do not yet exist in any 
> released SELinux policy as we are just now adding them with this patchset.  
> With current policies loaded into a kernel with this patchset applied the 
> SETQUEUE/tun_socket:create_queue permission would be treated according to the 
> policy's unknown permission setting.


OK I think we need to rethink what we are doing here: what you sent
addresses the problem as stated but I think we mis-stated it.  Let me
try to restate the problem: it is not just selinux problem.  Let's
assume qemu wants to use tun, I (libvirt) don't want to run it as root.

1. TUNSETIFF: I can open tun, attach an fd and pass it to qemu.
Now, qemu does not invoke TUNSETIFF so it can run without
kernel priveledges.

2. TUNSETQUEUE - I can open tun and attach a queue but this
is not what is needed since this automatically switches
to multiqueue mode - we want to change number of queues
on the fly.
So qemu needs to be allowed to run TUNSETQUEUE.
Since this checks tun_not_capable(tun) we would need
to give qemu these priveledges, and we want to avoid this
(I can go into why if it's not obvious).
 
How can we slove this?
I don't see a way without extending the interface.
Here's a simple way to extend it: pass a flag to TUNSETQUEUE
that enables/disables TX on this queue.
If TX is disabled, ignore this queue for flow steering decisions.
Allow TUNSETQUEUE for a non priveledged user if it
it already bound to the currect tun and only changes this flag.


Now I open tun and SETQUEUE with TX disabled flag. Pass it to qemu.
qemu calls SETQUEUE with TX enabled flag.

Jason? Want to try implementing and see what people think?

> -- 
> paul moore
> security and virtualization @ redhat
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Moore Dec. 6, 2012, 9:09 p.m. UTC | #10
On Thursday, December 06, 2012 10:57:16 PM Michael S. Tsirkin wrote:
> On Thu, Dec 06, 2012 at 11:56:45AM -0500, Paul Moore wrote:
> > The SETQUEUE/tun_socket:create_queue permissions do not yet exist in any
> > released SELinux policy as we are just now adding them with this patchset.
> > With current policies loaded into a kernel with this patchset applied the
> > SETQUEUE/tun_socket:create_queue permission would be treated according to
> > the policy's unknown permission setting.
> 
> OK I think we need to rethink what we are doing here: what you sent
> addresses the problem as stated but I think we mis-stated it.  Let me
> try to restate the problem: it is not just selinux problem. Let's assume
> qemu wants to use tun, I (libvirt) don't want to run it as root.
> 
> 1. TUNSETIFF: I can open tun, attach an fd and pass it to qemu.
> Now, qemu does not invoke TUNSETIFF so it can run without
> kernel priveledges.

Correct me if I'm wrong, but I believe libvirt does this while running as 
root.  Assuming that is the case, why not simply setuid()/setgid() to the same 
credentials as the QEMU instance before creating the TUN device?  You can 
always (re)configure the device afterwards while running as 
root/CAP_NET_ADMIN.
 
> 2. TUNSETQUEUE - I can open tun and attach a queue but this
> is not what is needed since this automatically switches
> to multiqueue mode - we want to change number of queues
> on the fly.
> So qemu needs to be allowed to run TUNSETQUEUE.
> Since this checks tun_not_capable(tun) we would need
> to give qemu these priveledges, and we want to avoid this
> (I can go into why if it's not obvious).

If libvirt creates the TUN device while its effective credentials match those 
of the QEMU instance then the QEMU instance should be able to perform a 
TUNSETQUEUE, yes?

> How can we slove this?
> I don't see a way without extending the interface.
> Here's a simple way to extend it: pass a flag to TUNSETQUEUE
> that enables/disables TX on this queue.
> If TX is disabled, ignore this queue for flow steering decisions.
> Allow TUNSETQUEUE for a non priveledged user if it
> it already bound to the currect tun and only changes this flag.
> 
> Now I open tun and SETQUEUE with TX disabled flag. Pass it to qemu.
> qemu calls SETQUEUE with TX enabled flag.
> 
> Jason? Want to try implementing and see what people think?
Jason Wang Dec. 7, 2012, 5:29 a.m. UTC | #11
On Thursday, December 06, 2012 10:36:11 AM Paul Moore wrote:
> On Thursday, December 06, 2012 06:29:54 PM Jason Wang wrote:
> > On Wednesday, December 05, 2012 03:26:19 PM Paul Moore wrote:
> > > This patch corrects some problems with LSM/SELinux that were introduced
> > > with the multiqueue patchset.  The problem stems from the fact that the
> > > multiqueue work changed the relationship between the tun device and its
> > > associated socket; before the socket persisted for the life of the
> > > device, however after the multiqueue changes the socket only persisted
> > > for the life of the userspace connection (fd open).  For non-persistent
> > > devices this is not an issue, but for persistent devices this can cause
> > > the tun device to lose its SELinux label.
> > > 
> > > We correct this problem by adding an opaque LSM security blob to the
> > > tun device struct which allows us to have the LSM security state, e.g.
> > > SELinux labeling information, persist for the lifetime of the tun
> > > device.  In the process we tweak the LSM hooks to work with this new
> > > approach to TUN device/socket labeling and introduce a new LSM hook,
> > > security_tun_dev_create_queue(), to approve requests to create a new
> > > TUN queue via TUNSETQUEUE.
> > > 
> > > The SELinux code has been adjusted to match the new LSM hooks, the
> > > other LSMs do not make use of the LSM TUN controls.  This patch makes
> > > use of the recently added "tun_socket:create_queue" permission to
> > > restrict access to the TUNSETQUEUE operation.  On older SELinux
> > > policies which do not define the "tun_socket:create_queue" permission
> > > the access control decision for TUNSETQUEUE will be handled according
> > > to the SELinux policy's unknown permission setting.
> > > 
> > > Signed-off-by: Paul Moore <pmoore@redhat.com>
> 
> ...
> 
> > > @@ -4425,20 +4452,19 @@ static void selinux_tun_dev_post_create(struct
> > > sock
> > > *sk) * cause confusion to the TUN user that had no idea network labeling
> > > *
> > > protocols were being used */
> > > 
> > > -	/* see the comments in selinux_tun_dev_create() about why we ...
> > > -
> > > -	sksec->sid = current_sid();
> > > +	sksec->sid = tunsec->sid;
> > 
> > Since both tun_set_iff() and tun_set_queue() would call this. I wonder
> > when
> > it is called by tun_set_queue() we need some checking just like what we
> > done in v1, otherwise it's unconditionally in TUNSETQUEUE. Or we can add
> > them in selinux_tun_dev_create_queue()?
> 
> In all the cases that call tun_attach() we have a new socket which needs to
> be labeled based on the tun->security label, yes?  That is what the

Yes.
> security_tun_dev_attach() code does, there is no need for access control at
> this point as the operation has already been authorized by either
> security_tun_dev_create() (new device), security_tun_dev_create_queue() (new
> queue), or security_tun_dev_open() (opening persistent device).
> 
> I think we are all set, or am I missing something?
> 

Looks fine, thanks for the explanation.
> > >  	sksec->sclass = SECCLASS_TUN_SOCKET;
> > > 
> > > +
> > > +	return 0;
> > > 
> > >  }
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Wang Dec. 7, 2012, 5:41 a.m. UTC | #12
On Thursday, December 06, 2012 10:57:16 PM Michael S. Tsirkin wrote:
> On Thu, Dec 06, 2012 at 11:56:45AM -0500, Paul Moore wrote:
> > The SETQUEUE/tun_socket:create_queue permissions do not yet exist in any
> > released SELinux policy as we are just now adding them with this patchset.
> > With current policies loaded into a kernel with this patchset applied the
> > SETQUEUE/tun_socket:create_queue permission would be treated according to
> > the policy's unknown permission setting.
> 
> OK I think we need to rethink what we are doing here: what you sent
> addresses the problem as stated but I think we mis-stated it.  Let me
> try to restate the problem: it is not just selinux problem.  Let's
> assume qemu wants to use tun, I (libvirt) don't want to run it as root.
> 
> 1. TUNSETIFF: I can open tun, attach an fd and pass it to qemu.
> Now, qemu does not invoke TUNSETIFF so it can run without
> kernel priveledges.
> 
> 2. TUNSETQUEUE - I can open tun and attach a queue but this
> is not what is needed since this automatically switches
> to multiqueue mode - we want to change number of queues
> on the fly.
> So qemu needs to be allowed to run TUNSETQUEUE.
> Since this checks tun_not_capable(tun) we would need
> to give qemu these priveledges, and we want to avoid this
> (I can go into why if it's not obvious).
> 
> How can we slove this?
> I don't see a way without extending the interface.
> Here's a simple way to extend it: pass a flag to TUNSETQUEUE
> that enables/disables TX on this queue.
> If TX is disabled, ignore this queue for flow steering decisions.
> Allow TUNSETQUEUE for a non priveledged user if it
> it already bound to the currect tun and only changes this flag.
> 
> 
> Now I open tun and SETQUEUE with TX disabled flag. Pass it to qemu.
> qemu calls SETQUEUE with TX enabled flag.

So the check of CAP_NET_ADMIN is bypassed in the situation.  And new selinux 
policy is then needed for this new flag.

The only concern with this is whether it could be treated as a kind of host 
network device configuration and need CAP_NET_ADMIN capability. (But it looks 
the only method that we could let qemu change the queue used by tun).
> 
> Jason? Want to try implementing and see what people think?

Sure, will draft a path based on this.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Dec. 7, 2012, 12:25 p.m. UTC | #13
On Thu, Dec 06, 2012 at 04:09:51PM -0500, Paul Moore wrote:
> On Thursday, December 06, 2012 10:57:16 PM Michael S. Tsirkin wrote:
> > On Thu, Dec 06, 2012 at 11:56:45AM -0500, Paul Moore wrote:
> > > The SETQUEUE/tun_socket:create_queue permissions do not yet exist in any
> > > released SELinux policy as we are just now adding them with this patchset.
> > > With current policies loaded into a kernel with this patchset applied the
> > > SETQUEUE/tun_socket:create_queue permission would be treated according to
> > > the policy's unknown permission setting.
> > 
> > OK I think we need to rethink what we are doing here: what you sent
> > addresses the problem as stated but I think we mis-stated it.  Let me
> > try to restate the problem: it is not just selinux problem. Let's assume
> > qemu wants to use tun, I (libvirt) don't want to run it as root.
> > 
> > 1. TUNSETIFF: I can open tun, attach an fd and pass it to qemu.
> > Now, qemu does not invoke TUNSETIFF so it can run without
> > kernel priveledges.
> 
> Correct me if I'm wrong, but I believe libvirt does this while running as 
> root.  Assuming that is the case, why not simply setuid()/setgid() to the same 
> credentials as the QEMU instance before creating the TUN device?  You can 
> always (re)configure the device afterwards while running as 
> root/CAP_NET_ADMIN.

We want isolation between qemu instances.
Giving qemu right to open tun and SETIFF would give it rights
to access any tun device.

There could also be user tun users we want them isolated from qemu.

> > 2. TUNSETQUEUE - I can open tun and attach a queue but this
> > is not what is needed since this automatically switches
> > to multiqueue mode - we want to change number of queues
> > on the fly.
> > So qemu needs to be allowed to run TUNSETQUEUE.
> > Since this checks tun_not_capable(tun) we would need
> > to give qemu these priveledges, and we want to avoid this
> > (I can go into why if it's not obvious).
> 
> If libvirt creates the TUN device while its effective credentials match those 
> of the QEMU instance then the QEMU instance should be able to perform a 
> TUNSETQUEUE, yes?
> 
> > How can we slove this?
> > I don't see a way without extending the interface.
> > Here's a simple way to extend it: pass a flag to TUNSETQUEUE
> > that enables/disables TX on this queue.
> > If TX is disabled, ignore this queue for flow steering decisions.
> > Allow TUNSETQUEUE for a non priveledged user if it
> > it already bound to the currect tun and only changes this flag.
> > 
> > Now I open tun and SETQUEUE with TX disabled flag. Pass it to qemu.
> > qemu calls SETQUEUE with TX enabled flag.
> > 
> > Jason? Want to try implementing and see what people think?
> 
> -- 
> paul moore
> security and virtualization @ redhat
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Moore Dec. 10, 2012, 5:04 p.m. UTC | #14
On Friday, December 07, 2012 02:25:16 PM Michael S. Tsirkin wrote:
> On Thu, Dec 06, 2012 at 04:09:51PM -0500, Paul Moore wrote:
> > On Thursday, December 06, 2012 10:57:16 PM Michael S. Tsirkin wrote:
> > > On Thu, Dec 06, 2012 at 11:56:45AM -0500, Paul Moore wrote:
> > > > The SETQUEUE/tun_socket:create_queue permissions do not yet exist in
> > > > any released SELinux policy as we are just now adding them with this
> > > > patchset. With current policies loaded into a kernel with this
> > > > patchset applied the SETQUEUE/tun_socket:create_queue permission would
> > > > be treated according to the policy's unknown permission setting.
> > > 
> > > OK I think we need to rethink what we are doing here: what you sent
> > > addresses the problem as stated but I think we mis-stated it.  Let me
> > > try to restate the problem: it is not just selinux problem. Let's assume
> > > qemu wants to use tun, I (libvirt) don't want to run it as root.
> > > 
> > > 1. TUNSETIFF: I can open tun, attach an fd and pass it to qemu.
> > > Now, qemu does not invoke TUNSETIFF so it can run without
> > > kernel priveledges.
> > 
> > Correct me if I'm wrong, but I believe libvirt does this while running as
> > root.  Assuming that is the case, why not simply setuid()/setgid() to the
> > same credentials as the QEMU instance before creating the TUN device? 
> > You can always (re)configure the device afterwards while running as
> > root/CAP_NET_ADMIN.
> 
> We want isolation between qemu instances.

Understood, I agree.

Achieving separation via SELinux is easily done, with libvirt/sVirt already 
doing this for us automatically in most cases; the only thing we will want to 
do is make sure the SELinux policy is aware of the new permission.

Achieving separation via DAC should also be easily done, simply run each QEMU 
instance with a separate UID and/or GID.

> Giving qemu right to open tun and SETIFF would give it rights
> to access any tun device.

I'm quickly looked at tun_chr_open() again and I don't see any special 
rights/privileges required, the same for tun_chr_ioctl() and 
__tun_chr_ioctl().  Looking at tun_set_queue() I see we call tun_not_capable() 
which does a simple DAC check; it must have the same UID/GID or have 
CAP_NET_ADMIN.

I'm having a hard time seeing the problem you are describing; help me 
understand.

> There could also be user tun users we want them isolated from qemu.

Once again, should be possible using either SELinux, DAC, or both.
Michael S. Tsirkin Dec. 10, 2012, 5:26 p.m. UTC | #15
On Mon, Dec 10, 2012 at 12:04:35PM -0500, Paul Moore wrote:
> On Friday, December 07, 2012 02:25:16 PM Michael S. Tsirkin wrote:
> > On Thu, Dec 06, 2012 at 04:09:51PM -0500, Paul Moore wrote:
> > > On Thursday, December 06, 2012 10:57:16 PM Michael S. Tsirkin wrote:
> > > > On Thu, Dec 06, 2012 at 11:56:45AM -0500, Paul Moore wrote:
> > > > > The SETQUEUE/tun_socket:create_queue permissions do not yet exist in
> > > > > any released SELinux policy as we are just now adding them with this
> > > > > patchset. With current policies loaded into a kernel with this
> > > > > patchset applied the SETQUEUE/tun_socket:create_queue permission would
> > > > > be treated according to the policy's unknown permission setting.
> > > > 
> > > > OK I think we need to rethink what we are doing here: what you sent
> > > > addresses the problem as stated but I think we mis-stated it.  Let me
> > > > try to restate the problem: it is not just selinux problem. Let's assume
> > > > qemu wants to use tun, I (libvirt) don't want to run it as root.
> > > > 
> > > > 1. TUNSETIFF: I can open tun, attach an fd and pass it to qemu.
> > > > Now, qemu does not invoke TUNSETIFF so it can run without
> > > > kernel priveledges.
> > > 
> > > Correct me if I'm wrong, but I believe libvirt does this while running as
> > > root.  Assuming that is the case, why not simply setuid()/setgid() to the
> > > same credentials as the QEMU instance before creating the TUN device? 
> > > You can always (re)configure the device afterwards while running as
> > > root/CAP_NET_ADMIN.
> > 
> > We want isolation between qemu instances.
> 
> Understood, I agree.
> 
> Achieving separation via SELinux is easily done, with libvirt/sVirt already 
> doing this for us automatically in most cases; the only thing we will want to 
> do is make sure the SELinux policy is aware of the new permission.
> 
> Achieving separation via DAC should also be easily done, simply run each QEMU 
> instance with a separate UID and/or GID.
> 
> > Giving qemu right to open tun and SETIFF would give it rights
> > to access any tun device.
> 
> I'm quickly looked at tun_chr_open() again and I don't see any special 
> rights/privileges required, the same for tun_chr_ioctl() and 
> __tun_chr_ioctl().  Looking at tun_set_queue() I see we call tun_not_capable() 
> which does a simple DAC check; it must have the same UID/GID or have 
> CAP_NET_ADMIN.
> 
> I'm having a hard time seeing the problem you are describing; help me 
> understand.

The issue is guest controls the number of queues in use.
So qemu would be required to be allowed to call tun_set_queue.
If we allow this we have a problem as one qemu will be
able to access any tun.

At least with DAC, looks like there's a problem. SELinux I think
can address this.

> > There could also be user tun users we want them isolated from qemu.
> 
> Once again, should be possible using either SELinux, DAC, or both.
> 
> -- 
> paul moore
> security and virtualization @ redhat
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Moore Dec. 10, 2012, 5:33 p.m. UTC | #16
On Monday, December 10, 2012 07:26:56 PM Michael S. Tsirkin wrote:
> On Mon, Dec 10, 2012 at 12:04:35PM -0500, Paul Moore wrote:
> > On Friday, December 07, 2012 02:25:16 PM Michael S. Tsirkin wrote:
> > > On Thu, Dec 06, 2012 at 04:09:51PM -0500, Paul Moore wrote:
> > > > On Thursday, December 06, 2012 10:57:16 PM Michael S. Tsirkin wrote:
> > > > > On Thu, Dec 06, 2012 at 11:56:45AM -0500, Paul Moore wrote:
> > > > > > The SETQUEUE/tun_socket:create_queue permissions do not yet exist
> > > > > > in any released SELinux policy as we are just now adding them with
> > > > > > this patchset. With current policies loaded into a kernel with
> > > > > > this patchset applied the SETQUEUE/tun_socket:create_queue
> > > > > > permission would be treated according to the policy's unknown
> > > > > > permission setting.
> > > > > 
> > > > > OK I think we need to rethink what we are doing here: what you sent
> > > > > addresses the problem as stated but I think we mis-stated it.  Let
> > > > > me try to restate the problem: it is not just selinux problem. Let's
> > > > > assume qemu wants to use tun, I (libvirt) don't want to run it as
> > > > > root.
> > > > > 
> > > > > 1. TUNSETIFF: I can open tun, attach an fd and pass it to qemu.
> > > > > Now, qemu does not invoke TUNSETIFF so it can run without
> > > > > kernel priveledges.
> > > > 
> > > > Correct me if I'm wrong, but I believe libvirt does this while running
> > > > as root.  Assuming that is the case, why not simply setuid()/setgid()
> > > > to the same credentials as the QEMU instance before creating the TUN
> > > > device? You can always (re)configure the device afterwards while
> > > > running as root/CAP_NET_ADMIN.
> > > 
> > > We want isolation between qemu instances.
> > 
> > Understood, I agree.
> > 
> > Achieving separation via SELinux is easily done, with libvirt/sVirt
> > already doing this for us automatically in most cases; the only thing we
> > will want to do is make sure the SELinux policy is aware of the new
> > permission.
> > 
> > Achieving separation via DAC should also be easily done, simply run each
> > QEMU instance with a separate UID and/or GID.
> > 
> > > Giving qemu right to open tun and SETIFF would give it rights
> > > to access any tun device.
> > 
> > I'm quickly looked at tun_chr_open() again and I don't see any special
> > rights/privileges required, the same for tun_chr_ioctl() and
> > __tun_chr_ioctl().  Looking at tun_set_queue() I see we call
> > tun_not_capable() which does a simple DAC check; it must have the same
> > UID/GID or have CAP_NET_ADMIN.
> > 
> > I'm having a hard time seeing the problem you are describing; help me
> > understand.
> 
> The issue is guest controls the number of queues in use.
> So qemu would be required to be allowed to call tun_set_queue.
> If we allow this we have a problem as one qemu will be
> able to access any tun.

QEMU can call tun_set_queue() as long as it satisfies tun_not_capable(), which 
from a practical point of view means that the TUN device was created with the 
same UID/GID as the QEMU instance.  If you want TUN device separation between 
QEMU instances using DAC you need to run each QEMU instance with a different 
UID/GID (which you should be doing anyway if you want DAC enforced general 
separation).

I believe I've stated this point several times now and I don't feel you've 
addressed it properly.
Michael S. Tsirkin Dec. 10, 2012, 5:50 p.m. UTC | #17
On Mon, Dec 10, 2012 at 12:33:49PM -0500, Paul Moore wrote:
> On Monday, December 10, 2012 07:26:56 PM Michael S. Tsirkin wrote:
> > On Mon, Dec 10, 2012 at 12:04:35PM -0500, Paul Moore wrote:
> > > On Friday, December 07, 2012 02:25:16 PM Michael S. Tsirkin wrote:
> > > > On Thu, Dec 06, 2012 at 04:09:51PM -0500, Paul Moore wrote:
> > > > > On Thursday, December 06, 2012 10:57:16 PM Michael S. Tsirkin wrote:
> > > > > > On Thu, Dec 06, 2012 at 11:56:45AM -0500, Paul Moore wrote:
> > > > > > > The SETQUEUE/tun_socket:create_queue permissions do not yet exist
> > > > > > > in any released SELinux policy as we are just now adding them with
> > > > > > > this patchset. With current policies loaded into a kernel with
> > > > > > > this patchset applied the SETQUEUE/tun_socket:create_queue
> > > > > > > permission would be treated according to the policy's unknown
> > > > > > > permission setting.
> > > > > > 
> > > > > > OK I think we need to rethink what we are doing here: what you sent
> > > > > > addresses the problem as stated but I think we mis-stated it.  Let
> > > > > > me try to restate the problem: it is not just selinux problem. Let's
> > > > > > assume qemu wants to use tun, I (libvirt) don't want to run it as
> > > > > > root.
> > > > > > 
> > > > > > 1. TUNSETIFF: I can open tun, attach an fd and pass it to qemu.
> > > > > > Now, qemu does not invoke TUNSETIFF so it can run without
> > > > > > kernel priveledges.
> > > > > 
> > > > > Correct me if I'm wrong, but I believe libvirt does this while running
> > > > > as root.  Assuming that is the case, why not simply setuid()/setgid()
> > > > > to the same credentials as the QEMU instance before creating the TUN
> > > > > device? You can always (re)configure the device afterwards while
> > > > > running as root/CAP_NET_ADMIN.
> > > > 
> > > > We want isolation between qemu instances.
> > > 
> > > Understood, I agree.
> > > 
> > > Achieving separation via SELinux is easily done, with libvirt/sVirt
> > > already doing this for us automatically in most cases; the only thing we
> > > will want to do is make sure the SELinux policy is aware of the new
> > > permission.
> > > 
> > > Achieving separation via DAC should also be easily done, simply run each
> > > QEMU instance with a separate UID and/or GID.
> > > 
> > > > Giving qemu right to open tun and SETIFF would give it rights
> > > > to access any tun device.
> > > 
> > > I'm quickly looked at tun_chr_open() again and I don't see any special
> > > rights/privileges required, the same for tun_chr_ioctl() and
> > > __tun_chr_ioctl().  Looking at tun_set_queue() I see we call
> > > tun_not_capable() which does a simple DAC check; it must have the same
> > > UID/GID or have CAP_NET_ADMIN.
> > > 
> > > I'm having a hard time seeing the problem you are describing; help me
> > > understand.
> > 
> > The issue is guest controls the number of queues in use.
> > So qemu would be required to be allowed to call tun_set_queue.
> > If we allow this we have a problem as one qemu will be
> > able to access any tun.
> 
> QEMU can call tun_set_queue() as long as it satisfies tun_not_capable(), which 
> from a practical point of view means that the TUN device was created with the 
> same UID/GID as the QEMU instance.  If you want TUN device separation between 
> QEMU instances using DAC you need to run each QEMU instance with a different 
> UID/GID (which you should be doing anyway if you want DAC enforced general 
> separation).
> 
> I believe I've stated this point several times now and I don't feel you've 
> addressed it properly.

Look at how it works at the moment:
a priveledged libvirt server calls tun_set_iff
and passes the fd to qemu which is not priveledged.

The result is isolation between qemu instances without
need to create uid per qemu instance.

How do we create multiple queues? It makes sense to
follow this model and pass in fds for individual queues.
However they need to be disabled initially
so libvirt can not do tun_set_queue for us.
When qemu later calls tun_set_queue it will fail which means we
can't utilize multiqueue.

My solution is an unpriveledged variant
of tun_set_queue that only enables/disables
a queue without attach/detach.


> -- 
> paul moore
> security and virtualization @ redhat
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Paris Dec. 10, 2012, 6:42 p.m. UTC | #18
Let me abstract a little here Paul.  Lets say user A starts an
unclassified process and a top secret process.  SELinux policy darn
well better be able to enforce that they can not attach to the same
tun.

Am I missing something here?

On Mon, Dec 10, 2012 at 12:50 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Mon, Dec 10, 2012 at 12:33:49PM -0500, Paul Moore wrote:
>> On Monday, December 10, 2012 07:26:56 PM Michael S. Tsirkin wrote:
>> > On Mon, Dec 10, 2012 at 12:04:35PM -0500, Paul Moore wrote:
>> > > On Friday, December 07, 2012 02:25:16 PM Michael S. Tsirkin wrote:
>> > > > On Thu, Dec 06, 2012 at 04:09:51PM -0500, Paul Moore wrote:
>> > > > > On Thursday, December 06, 2012 10:57:16 PM Michael S. Tsirkin wrote:
>> > > > > > On Thu, Dec 06, 2012 at 11:56:45AM -0500, Paul Moore wrote:
>> > > > > > > The SETQUEUE/tun_socket:create_queue permissions do not yet exist
>> > > > > > > in any released SELinux policy as we are just now adding them with
>> > > > > > > this patchset. With current policies loaded into a kernel with
>> > > > > > > this patchset applied the SETQUEUE/tun_socket:create_queue
>> > > > > > > permission would be treated according to the policy's unknown
>> > > > > > > permission setting.
>> > > > > >
>> > > > > > OK I think we need to rethink what we are doing here: what you sent
>> > > > > > addresses the problem as stated but I think we mis-stated it.  Let
>> > > > > > me try to restate the problem: it is not just selinux problem. Let's
>> > > > > > assume qemu wants to use tun, I (libvirt) don't want to run it as
>> > > > > > root.
>> > > > > >
>> > > > > > 1. TUNSETIFF: I can open tun, attach an fd and pass it to qemu.
>> > > > > > Now, qemu does not invoke TUNSETIFF so it can run without
>> > > > > > kernel priveledges.
>> > > > >
>> > > > > Correct me if I'm wrong, but I believe libvirt does this while running
>> > > > > as root.  Assuming that is the case, why not simply setuid()/setgid()
>> > > > > to the same credentials as the QEMU instance before creating the TUN
>> > > > > device? You can always (re)configure the device afterwards while
>> > > > > running as root/CAP_NET_ADMIN.
>> > > >
>> > > > We want isolation between qemu instances.
>> > >
>> > > Understood, I agree.
>> > >
>> > > Achieving separation via SELinux is easily done, with libvirt/sVirt
>> > > already doing this for us automatically in most cases; the only thing we
>> > > will want to do is make sure the SELinux policy is aware of the new
>> > > permission.
>> > >
>> > > Achieving separation via DAC should also be easily done, simply run each
>> > > QEMU instance with a separate UID and/or GID.
>> > >
>> > > > Giving qemu right to open tun and SETIFF would give it rights
>> > > > to access any tun device.
>> > >
>> > > I'm quickly looked at tun_chr_open() again and I don't see any special
>> > > rights/privileges required, the same for tun_chr_ioctl() and
>> > > __tun_chr_ioctl().  Looking at tun_set_queue() I see we call
>> > > tun_not_capable() which does a simple DAC check; it must have the same
>> > > UID/GID or have CAP_NET_ADMIN.
>> > >
>> > > I'm having a hard time seeing the problem you are describing; help me
>> > > understand.
>> >
>> > The issue is guest controls the number of queues in use.
>> > So qemu would be required to be allowed to call tun_set_queue.
>> > If we allow this we have a problem as one qemu will be
>> > able to access any tun.
>>
>> QEMU can call tun_set_queue() as long as it satisfies tun_not_capable(), which
>> from a practical point of view means that the TUN device was created with the
>> same UID/GID as the QEMU instance.  If you want TUN device separation between
>> QEMU instances using DAC you need to run each QEMU instance with a different
>> UID/GID (which you should be doing anyway if you want DAC enforced general
>> separation).
>>
>> I believe I've stated this point several times now and I don't feel you've
>> addressed it properly.
>
> Look at how it works at the moment:
> a priveledged libvirt server calls tun_set_iff
> and passes the fd to qemu which is not priveledged.
>
> The result is isolation between qemu instances without
> need to create uid per qemu instance.
>
> How do we create multiple queues? It makes sense to
> follow this model and pass in fds for individual queues.
> However they need to be disabled initially
> so libvirt can not do tun_set_queue for us.
> When qemu later calls tun_set_queue it will fail which means we
> can't utilize multiqueue.
>
> My solution is an unpriveledged variant
> of tun_set_queue that only enables/disables
> a queue without attach/detach.
>
>
>> --
>> paul moore
>> security and virtualization @ redhat
> --
> To unsubscribe from this list: send the line "unsubscribe linux-security-module" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Moore Dec. 10, 2012, 10:21 p.m. UTC | #19
On Monday, December 10, 2012 01:42:12 PM Eric Paris wrote:
> Let me abstract a little here Paul.  Lets say user A starts an
> unclassified process and a top secret process.  SELinux policy darn
> well better be able to enforce that they can not attach to the same
> tun.
> 
> Am I missing something here?

Relax, all the SELinux enforced separation still exists, and works.  We're 
just fixing the LSM/SELinux stuff that was broken with the multiqueue addition 
and adding a new SELinux permission to control access to the new queue 
command.

What we are currently discussing is DAC only.  While Michael have different 
opinions on how to solve the DAC issues, we agree that SELinux works 
correctly.
Paul Moore Dec. 10, 2012, 10:43 p.m. UTC | #20
On Monday, December 10, 2012 07:50:35 PM Michael S. Tsirkin wrote:
> On Mon, Dec 10, 2012 at 12:33:49PM -0500, Paul Moore wrote:
> > On Monday, December 10, 2012 07:26:56 PM Michael S. Tsirkin wrote:
> > > On Mon, Dec 10, 2012 at 12:04:35PM -0500, Paul Moore wrote:
> > > > On Friday, December 07, 2012 02:25:16 PM Michael S. Tsirkin wrote:
> > > > > On Thu, Dec 06, 2012 at 04:09:51PM -0500, Paul Moore wrote:
> > > > > > On Thursday, December 06, 2012 10:57:16 PM Michael S. Tsirkin 
wrote:
> > > > > > > On Thu, Dec 06, 2012 at 11:56:45AM -0500, Paul Moore wrote:
> > > > > > > > The SETQUEUE/tun_socket:create_queue permissions do not yet
> > > > > > > > exist
> > > > > > > > in any released SELinux policy as we are just now adding them
> > > > > > > > with
> > > > > > > > this patchset. With current policies loaded into a kernel with
> > > > > > > > this patchset applied the SETQUEUE/tun_socket:create_queue
> > > > > > > > permission would be treated according to the policy's unknown
> > > > > > > > permission setting.
> > > > > > > 
> > > > > > > OK I think we need to rethink what we are doing here: what you
> > > > > > > sent
> > > > > > > addresses the problem as stated but I think we mis-stated it. 
> > > > > > > Let
> > > > > > > me try to restate the problem: it is not just selinux problem.
> > > > > > > Let's
> > > > > > > assume qemu wants to use tun, I (libvirt) don't want to run it
> > > > > > > as
> > > > > > > root.
> > > > > > > 
> > > > > > > 1. TUNSETIFF: I can open tun, attach an fd and pass it to qemu.
> > > > > > > Now, qemu does not invoke TUNSETIFF so it can run without
> > > > > > > kernel priveledges.
> > > > > > 
> > > > > > Correct me if I'm wrong, but I believe libvirt does this while
> > > > > > running
> > > > > > as root.  Assuming that is the case, why not simply
> > > > > > setuid()/setgid()
> > > > > > to the same credentials as the QEMU instance before creating the
> > > > > > TUN
> > > > > > device? You can always (re)configure the device afterwards while
> > > > > > running as root/CAP_NET_ADMIN.
> > > > > 
> > > > > We want isolation between qemu instances.
> > > > 
> > > > Understood, I agree.
> > > > 
> > > > Achieving separation via SELinux is easily done, with libvirt/sVirt
> > > > already doing this for us automatically in most cases; the only thing
> > > > we
> > > > will want to do is make sure the SELinux policy is aware of the new
> > > > permission.
> > > > 
> > > > Achieving separation via DAC should also be easily done, simply run
> > > > each
> > > > QEMU instance with a separate UID and/or GID.
> > > > 
> > > > > Giving qemu right to open tun and SETIFF would give it rights
> > > > > to access any tun device.
> > > > 
> > > > I'm quickly looked at tun_chr_open() again and I don't see any special
> > > > rights/privileges required, the same for tun_chr_ioctl() and
> > > > __tun_chr_ioctl().  Looking at tun_set_queue() I see we call
> > > > tun_not_capable() which does a simple DAC check; it must have the same
> > > > UID/GID or have CAP_NET_ADMIN.
> > > > 
> > > > I'm having a hard time seeing the problem you are describing; help me
> > > > understand.
> > > 
> > > The issue is guest controls the number of queues in use.
> > > So qemu would be required to be allowed to call tun_set_queue.
> > > If we allow this we have a problem as one qemu will be
> > > able to access any tun.
> > 
> > QEMU can call tun_set_queue() as long as it satisfies tun_not_capable(),
> > which from a practical point of view means that the TUN device was
> > created with the same UID/GID as the QEMU instance.  If you want TUN
> > device separation between QEMU instances using DAC you need to run each
> > QEMU instance with a different UID/GID (which you should be doing anyway
> > if you want DAC enforced general separation).
> > 
> > I believe I've stated this point several times now and I don't feel you've
> > addressed it properly.
> 
> Look at how it works at the moment:
> a priveledged libvirt server calls tun_set_iff
> and passes the fd to qemu which is not priveledged.
> 
> The result is isolation between qemu instances without
> need to create uid per qemu instance.

Okay, good.  That is my understanding.
 
> How do we create multiple queues? It makes sense to
> follow this model and pass in fds for individual queues.

Okay.

> However they need to be disabled initially
> so libvirt can not do tun_set_queue for us.

Unrelated question: why do the queues need to be disabled initially?  Is this 
to prevent traffic from being queued up?  Some other reason?  I'm just curious 
as to the reason ...

> When qemu later calls tun_set_queue it will fail which means we
> can't utilize multiqueue.

I still don't understand why in the multiqueue case libvirt doesn't just 
change it's effective UID/GID when creating the TUN device, or just use the 
TUNSETOWNER/TUNSETGROUP commands. This would solve the problem you describe 
above and - at least to me - seems like a better solution conceptually.

Help me understand why you believe that will not work.

Do you not want to give ownership of the TUN device to QEMU?  That would be 
the only reason I can think of, but all of your comments that I can recall 
have been about isolation between QEMU instances and not access control 
between a QEMU instance and its assigned TUN device.

> My solution is an unpriveledged variant
> of tun_set_queue that only enables/disables
> a queue without attach/detach.
Jason Wang Dec. 11, 2012, 6:41 a.m. UTC | #21
On Monday, December 10, 2012 05:43:49 PM Paul Moore wrote:
> On Monday, December 10, 2012 07:50:35 PM Michael S. Tsirkin wrote:
> > On Mon, Dec 10, 2012 at 12:33:49PM -0500, Paul Moore wrote:
> > > On Monday, December 10, 2012 07:26:56 PM Michael S. Tsirkin wrote:
> > > > On Mon, Dec 10, 2012 at 12:04:35PM -0500, Paul Moore wrote:
> > > > > On Friday, December 07, 2012 02:25:16 PM Michael S. Tsirkin wrote:
> > > > > > On Thu, Dec 06, 2012 at 04:09:51PM -0500, Paul Moore wrote:
> > > > > > > On Thursday, December 06, 2012 10:57:16 PM Michael S. Tsirkin
> 
> wrote:
> > > > > > > > On Thu, Dec 06, 2012 at 11:56:45AM -0500, Paul Moore wrote:
> > > > > > > > > The SETQUEUE/tun_socket:create_queue permissions do not yet
> > > > > > > > > exist
> > > > > > > > > in any released SELinux policy as we are just now adding
> > > > > > > > > them
> > > > > > > > > with
> > > > > > > > > this patchset. With current policies loaded into a kernel
> > > > > > > > > with
> > > > > > > > > this patchset applied the SETQUEUE/tun_socket:create_queue
> > > > > > > > > permission would be treated according to the policy's
> > > > > > > > > unknown
> > > > > > > > > permission setting.
> > > > > > > > 
> > > > > > > > OK I think we need to rethink what we are doing here: what you
> > > > > > > > sent
> > > > > > > > addresses the problem as stated but I think we mis-stated it.
> > > > > > > > Let
> > > > > > > > me try to restate the problem: it is not just selinux problem.
> > > > > > > > Let's
> > > > > > > > assume qemu wants to use tun, I (libvirt) don't want to run it
> > > > > > > > as
> > > > > > > > root.
> > > > > > > > 
> > > > > > > > 1. TUNSETIFF: I can open tun, attach an fd and pass it to
> > > > > > > > qemu.
> > > > > > > > Now, qemu does not invoke TUNSETIFF so it can run without
> > > > > > > > kernel priveledges.
> > > > > > > 
> > > > > > > Correct me if I'm wrong, but I believe libvirt does this while
> > > > > > > running
> > > > > > > as root.  Assuming that is the case, why not simply
> > > > > > > setuid()/setgid()
> > > > > > > to the same credentials as the QEMU instance before creating the
> > > > > > > TUN
> > > > > > > device? You can always (re)configure the device afterwards while
> > > > > > > running as root/CAP_NET_ADMIN.
> > > > > > 
> > > > > > We want isolation between qemu instances.
> > > > > 
> > > > > Understood, I agree.
> > > > > 
> > > > > Achieving separation via SELinux is easily done, with libvirt/sVirt
> > > > > already doing this for us automatically in most cases; the only
> > > > > thing
> > > > > we
> > > > > will want to do is make sure the SELinux policy is aware of the new
> > > > > permission.
> > > > > 
> > > > > Achieving separation via DAC should also be easily done, simply run
> > > > > each
> > > > > QEMU instance with a separate UID and/or GID.
> > > > > 
> > > > > > Giving qemu right to open tun and SETIFF would give it rights
> > > > > > to access any tun device.
> > > > > 
> > > > > I'm quickly looked at tun_chr_open() again and I don't see any
> > > > > special
> > > > > rights/privileges required, the same for tun_chr_ioctl() and
> > > > > __tun_chr_ioctl().  Looking at tun_set_queue() I see we call
> > > > > tun_not_capable() which does a simple DAC check; it must have the
> > > > > same
> > > > > UID/GID or have CAP_NET_ADMIN.
> > > > > 
> > > > > I'm having a hard time seeing the problem you are describing; help
> > > > > me
> > > > > understand.
> > > > 
> > > > The issue is guest controls the number of queues in use.
> > > > So qemu would be required to be allowed to call tun_set_queue.
> > > > If we allow this we have a problem as one qemu will be
> > > > able to access any tun.
> > > 
> > > QEMU can call tun_set_queue() as long as it satisfies tun_not_capable(),
> > > which from a practical point of view means that the TUN device was
> > > created with the same UID/GID as the QEMU instance.  If you want TUN
> > > device separation between QEMU instances using DAC you need to run each
> > > QEMU instance with a different UID/GID (which you should be doing anyway
> > > if you want DAC enforced general separation).
> > > 
> > > I believe I've stated this point several times now and I don't feel
> > > you've
> > > addressed it properly.
> > 
> > Look at how it works at the moment:
> > a priveledged libvirt server calls tun_set_iff
> > and passes the fd to qemu which is not priveledged.
> > 
> > The result is isolation between qemu instances without
> > need to create uid per qemu instance.
> 
> Okay, good.  That is my understanding.
> 
> > How do we create multiple queues? It makes sense to
> > follow this model and pass in fds for individual queues.
> 
> Okay.
> 
> > However they need to be disabled initially
> > so libvirt can not do tun_set_queue for us.
> 
> Unrelated question: why do the queues need to be disabled initially?  Is
> this to prevent traffic from being queued up?  Some other reason?  I'm jus
> curious as to the reason ...

Only one queue is used by default, so queues other than 0 should be disabled 
after creating by either libvirt or qemu. There're several choices:

A. libvirt only calls TUNSETIFF, and passing this fd to qemu. Qemu creates the 
rest of the queues through TUNSETQUEUE, and also disable them by default
B. libvirt calls TUNSETIFF and creates queues through TUNSETQUEUE, then it 
passes all file descriptors to qemu. Qemu disables queues other than 0 by 
default.
C. libvirt call TUNSETIFF, TUNSETQUEUE to create queues and disable all queues 
other than queue 0. Then it can pass all the file descriptors to qemu.

Since qemu is not priveledged, method A is not applicable, since creating 
queues needs CAT_NET_ADMIN. Either B or C is ok if we add an extra flags to 
disable/enable the queue.
> 
> > When qemu later calls tun_set_queue it will fail which means we
> > can't utilize multiqueue.
> 
> I still don't understand why in the multiqueue case libvirt doesn't just
> change it's effective UID/GID when creating the TUN device, or just use the
> TUNSETOWNER/TUNSETGROUP commands. This would solve the problem you describe
> above and - at least to me - seems like a better solution conceptually.

I think it make sense to do this. Have a quick glance on libvirt code, looks 
like it does not call TUNSETOWNER/TUNSETGROUP. Maybe libvirt guys (cc'ed) can 
answer this question.
> 
> Help me understand why you believe that will not work.
> 
> Do you not want to give ownership of the TUN device to QEMU?  That would be
> the only reason I can think of, but all of your comments that I can recall
> have been about isolation between QEMU instances and not access control
> between a QEMU instance and its assigned TUN device.
> 
> > My solution is an unpriveledged variant
> > of tun_set_queue that only enables/disables
> > a queue without attach/detach.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Dec. 12, 2012, 9:10 a.m. UTC | #22
On Mon, Dec 10, 2012 at 05:43:49PM -0500, Paul Moore wrote:
> On Monday, December 10, 2012 07:50:35 PM Michael S. Tsirkin wrote:
> > On Mon, Dec 10, 2012 at 12:33:49PM -0500, Paul Moore wrote:
> > > On Monday, December 10, 2012 07:26:56 PM Michael S. Tsirkin wrote:
> > > > On Mon, Dec 10, 2012 at 12:04:35PM -0500, Paul Moore wrote:
> > > > > On Friday, December 07, 2012 02:25:16 PM Michael S. Tsirkin wrote:
> > > > > > On Thu, Dec 06, 2012 at 04:09:51PM -0500, Paul Moore wrote:
> > > > > > > On Thursday, December 06, 2012 10:57:16 PM Michael S. Tsirkin 
> wrote:
> > > > > > > > On Thu, Dec 06, 2012 at 11:56:45AM -0500, Paul Moore wrote:
> > > > > > > > > The SETQUEUE/tun_socket:create_queue permissions do not yet
> > > > > > > > > exist
> > > > > > > > > in any released SELinux policy as we are just now adding them
> > > > > > > > > with
> > > > > > > > > this patchset. With current policies loaded into a kernel with
> > > > > > > > > this patchset applied the SETQUEUE/tun_socket:create_queue
> > > > > > > > > permission would be treated according to the policy's unknown
> > > > > > > > > permission setting.
> > > > > > > > 
> > > > > > > > OK I think we need to rethink what we are doing here: what you
> > > > > > > > sent
> > > > > > > > addresses the problem as stated but I think we mis-stated it. 
> > > > > > > > Let
> > > > > > > > me try to restate the problem: it is not just selinux problem.
> > > > > > > > Let's
> > > > > > > > assume qemu wants to use tun, I (libvirt) don't want to run it
> > > > > > > > as
> > > > > > > > root.
> > > > > > > > 
> > > > > > > > 1. TUNSETIFF: I can open tun, attach an fd and pass it to qemu.
> > > > > > > > Now, qemu does not invoke TUNSETIFF so it can run without
> > > > > > > > kernel priveledges.
> > > > > > > 
> > > > > > > Correct me if I'm wrong, but I believe libvirt does this while
> > > > > > > running
> > > > > > > as root.  Assuming that is the case, why not simply
> > > > > > > setuid()/setgid()
> > > > > > > to the same credentials as the QEMU instance before creating the
> > > > > > > TUN
> > > > > > > device? You can always (re)configure the device afterwards while
> > > > > > > running as root/CAP_NET_ADMIN.
> > > > > > 
> > > > > > We want isolation between qemu instances.
> > > > > 
> > > > > Understood, I agree.
> > > > > 
> > > > > Achieving separation via SELinux is easily done, with libvirt/sVirt
> > > > > already doing this for us automatically in most cases; the only thing
> > > > > we
> > > > > will want to do is make sure the SELinux policy is aware of the new
> > > > > permission.
> > > > > 
> > > > > Achieving separation via DAC should also be easily done, simply run
> > > > > each
> > > > > QEMU instance with a separate UID and/or GID.
> > > > > 
> > > > > > Giving qemu right to open tun and SETIFF would give it rights
> > > > > > to access any tun device.
> > > > > 
> > > > > I'm quickly looked at tun_chr_open() again and I don't see any special
> > > > > rights/privileges required, the same for tun_chr_ioctl() and
> > > > > __tun_chr_ioctl().  Looking at tun_set_queue() I see we call
> > > > > tun_not_capable() which does a simple DAC check; it must have the same
> > > > > UID/GID or have CAP_NET_ADMIN.
> > > > > 
> > > > > I'm having a hard time seeing the problem you are describing; help me
> > > > > understand.
> > > > 
> > > > The issue is guest controls the number of queues in use.
> > > > So qemu would be required to be allowed to call tun_set_queue.
> > > > If we allow this we have a problem as one qemu will be
> > > > able to access any tun.
> > > 
> > > QEMU can call tun_set_queue() as long as it satisfies tun_not_capable(),
> > > which from a practical point of view means that the TUN device was
> > > created with the same UID/GID as the QEMU instance.  If you want TUN
> > > device separation between QEMU instances using DAC you need to run each
> > > QEMU instance with a different UID/GID (which you should be doing anyway
> > > if you want DAC enforced general separation).
> > > 
> > > I believe I've stated this point several times now and I don't feel you've
> > > addressed it properly.
> > 
> > Look at how it works at the moment:
> > a priveledged libvirt server calls tun_set_iff
> > and passes the fd to qemu which is not priveledged.
> > 
> > The result is isolation between qemu instances without
> > need to create uid per qemu instance.
> 
> Okay, good.  That is my understanding.
>  
> > How do we create multiple queues? It makes sense to
> > follow this model and pass in fds for individual queues.
> 
> Okay.
> 
> > However they need to be disabled initially
> > so libvirt can not do tun_set_queue for us.
> 
> Unrelated question: why do the queues need to be disabled initially?  Is this 
> to prevent traffic from being queued up?  Some other reason?  I'm just curious 
> as to the reason ...

Yes.
Basically because old guests only use a single queue.
If a guest comes along and declares multiqueue support
we can queue up traffic on new queues but if we
do this with a legacy guest it will not be able to
consume it.



> > can't utilize multiqueue.
> 
> I still don't understand why in the multiqueue case libvirt doesn't just 
> change it's effective UID/GID when creating the TUN device, or just use the 
> TUNSETOWNER/TUNSETGROUP commands. This would solve the problem you describe 
> above and - at least to me - seems like a better solution conceptually.
> 
> Help me understand why you believe that will not work.
> 
> Do you not want to give ownership of the TUN device to QEMU?  That would be 
> the only reason I can think of, but all of your comments that I can recall 
> have been about isolation between QEMU instances and not access control 
> between a QEMU instance and its assigned TUN device.

I think I might have confused things more than clarified them.
Let me comment on specific lines in patch that worry me
that will make it clear I hope.

> > My solution is an unpriveledged variant
> > of tun_set_queue that only enables/disables
> > a queue without attach/detach.
> 
> -- 
> paul moore
> security and virtualization @ redhat
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Dec. 12, 2012, 9:22 a.m. UTC | #23
On Wed, Dec 05, 2012 at 03:26:19PM -0500, Paul Moore wrote:
> This patch corrects some problems with LSM/SELinux that were introduced
> with the multiqueue patchset.  The problem stems from the fact that the
> multiqueue work changed the relationship between the tun device and its
> associated socket; before the socket persisted for the life of the
> device, however after the multiqueue changes the socket only persisted
> for the life of the userspace connection (fd open).  For non-persistent
> devices this is not an issue, but for persistent devices this can cause
> the tun device to lose its SELinux label.
> 
> We correct this problem by adding an opaque LSM security blob to the
> tun device struct which allows us to have the LSM security state, e.g.
> SELinux labeling information, persist for the lifetime of the tun
> device.  In the process we tweak the LSM hooks to work with this new
> approach to TUN device/socket labeling and introduce a new LSM hook,
> security_tun_dev_create_queue(), to approve requests to create a new
> TUN queue via TUNSETQUEUE.
> 
> The SELinux code has been adjusted to match the new LSM hooks, the
> other LSMs do not make use of the LSM TUN controls.  This patch makes
> use of the recently added "tun_socket:create_queue" permission to
> restrict access to the TUNSETQUEUE operation.  On older SELinux
> policies which do not define the "tun_socket:create_queue" permission
> the access control decision for TUNSETQUEUE will be handled according
> to the SELinux policy's unknown permission setting.
> 
> Signed-off-by: Paul Moore <pmoore@redhat.com>
> ---
>  drivers/net/tun.c                 |   26 +++++++++++++---
>  include/linux/security.h          |   59 +++++++++++++++++++++++++++++--------
>  security/capability.c             |   24 +++++++++++++--
>  security/security.c               |   28 ++++++++++++++----
>  security/selinux/hooks.c          |   50 ++++++++++++++++++++++++-------
>  security/selinux/include/objsec.h |    4 +++
>  6 files changed, 153 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 14a0454..fb8148b 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -182,6 +182,7 @@ struct tun_struct {
>  	struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
>  	struct timer_list flow_gc_timer;
>  	unsigned long ageing_time;
> +	void *security;
>  };
>  
>  static inline u32 tun_hashfn(u32 rxhash)
> @@ -465,6 +466,10 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
>  	struct tun_file *tfile = file->private_data;
>  	int err;
>  
> +	err = security_tun_dev_attach(tfile->socket.sk, tun->security);
> +	if (err < 0)
> +		goto out;
> +
>  	err = -EINVAL;
>  	if (rcu_dereference_protected(tfile->tun, lockdep_rtnl_is_held()))
>  		goto out;

This hook triggers with both set_queue and set_iff,
and it also seems to trigger when attaching to a
persistent device and when creating a new one. But I
believe we might want to be able to allow one but not the other.

For example:
	- we might want to allow qemu to do set_queue but not set_iff
	- we might want to configure presistent devices and
	  prevent a user from adding new ones

> @@ -1348,6 +1353,7 @@ static void tun_free_netdev(struct net_device *dev)
>  	struct tun_struct *tun = netdev_priv(dev);
>  
>  	tun_flow_uninit(tun);
> +	security_tun_dev_free_security(tun->security);
>  	free_netdev(dev);
>  }
>  
> @@ -1534,7 +1540,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  
>  		if (tun_not_capable(tun))
>  			return -EPERM;
> -		err = security_tun_dev_attach(tfile->socket.sk);
> +		err = security_tun_dev_open(tun->security);
>  		if (err < 0)
>  			return err;
>  
> @@ -1587,7 +1593,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  
>  		spin_lock_init(&tun->lock);
>  
> -		security_tun_dev_post_create(&tfile->sk);
> +		err = security_tun_dev_alloc_security(&tun->security);
> +		if (err < 0)
> +			goto err_free_dev;
>  
>  		tun_net_init(dev);
>  
> @@ -1767,12 +1775,18 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
>  
>  		tun = netdev_priv(dev);
>  		if (dev->netdev_ops != &tap_netdev_ops &&
> -			dev->netdev_ops != &tun_netdev_ops)
> +			dev->netdev_ops != &tun_netdev_ops) {
>  			ret = -EINVAL;
> -		else if (tun_not_capable(tun))
> +			goto unlock;
> +		}
> +		if (tun_not_capable(tun)) {
>  			ret = -EPERM;
> -		else
> -			ret = tun_attach(tun, file);
> +			goto unlock;
> +		}
> +		ret = security_tun_dev_create_queue(tun->security);
> +		if (ret < 0)
> +			goto unlock;
> +		ret = tun_attach(tun, file);
>  	} else if (ifr->ifr_flags & IFF_DETACH_QUEUE)
>  		__tun_detach(tfile, false);
>  	else
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 05e88bd..8ea923b 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -983,17 +983,29 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
>   *	tells the LSM to decrement the number of secmark labeling rules loaded
>   * @req_classify_flow:
>   *	Sets the flow's sid to the openreq sid.
> + * @tun_dev_alloc_security:
> + *	This hook allows a module to allocate a security structure for a TUN
> + *	device.
> + *	@security pointer to a security structure pointer.
> + *	Returns a zero on success, negative values on failure.
> + * @tun_dev_free_security:
> + *	This hook allows a module to free the security structure for a TUN
> + *	device.
> + *	@security pointer to the TUN device's security structure
>   * @tun_dev_create:
>   *	Check permissions prior to creating a new TUN device.
> - * @tun_dev_post_create:
> - *	This hook allows a module to update or allocate a per-socket security
> - *	structure.
> - *	@sk contains the newly created sock structure.

I worry that removing a hook hurt users that use it in their
ecurity policy.

> + * @tun_dev_create_queue:
> + *	Check permissions prior to creating a new TUN device queue.
> + *	@security pointer to the TUN device's security structure.
>   * @tun_dev_attach:
> - *	Check permissions prior to attaching to a persistent TUN device.  This
> - *	hook can also be used by the module to update any security state
> + *	This hook can be used by the module to update any security state
>   *	associated with the TUN device's sock structure.
>   *	@sk contains the existing sock structure.
> + *	@security pointer to the TUN device's security structure.
> + * @tun_dev_open:
> + *	This hook can be used by the module to update any security state
> + *	associated with the TUN device's security structure.
> + *	@security pointer to the TUN devices's security structure.
>   *
>   * Security hooks for XFRM operations.
>   *
> @@ -1613,9 +1625,12 @@ struct security_operations {
>  	void (*secmark_refcount_inc) (void);
>  	void (*secmark_refcount_dec) (void);
>  	void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl);
> -	int (*tun_dev_create)(void);
> -	void (*tun_dev_post_create)(struct sock *sk);
> -	int (*tun_dev_attach)(struct sock *sk);
> +	int (*tun_dev_alloc_security) (void **security);
> +	void (*tun_dev_free_security) (void *security);
> +	int (*tun_dev_create) (void);
> +	int (*tun_dev_create_queue) (void *security);
> +	int (*tun_dev_attach) (struct sock *sk, void *security);
> +	int (*tun_dev_open) (void *security);
>  #endif	/* CONFIG_SECURITY_NETWORK */
>  
>  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> @@ -2553,9 +2568,12 @@ void security_inet_conn_established(struct sock *sk,
>  int security_secmark_relabel_packet(u32 secid);
>  void security_secmark_refcount_inc(void);
>  void security_secmark_refcount_dec(void);
> +int security_tun_dev_alloc_security(void **security);
> +void security_tun_dev_free_security(void *security);
>  int security_tun_dev_create(void);
> -void security_tun_dev_post_create(struct sock *sk);
> -int security_tun_dev_attach(struct sock *sk);
> +int security_tun_dev_create_queue(void *security);
> +int security_tun_dev_attach(struct sock *sk, void *security);
> +int security_tun_dev_open(void *security);
>  
>  #else	/* CONFIG_SECURITY_NETWORK */
>  static inline int security_unix_stream_connect(struct sock *sock,
> @@ -2720,16 +2738,31 @@ static inline void security_secmark_refcount_dec(void)
>  {
>  }
>  
> +static inline int security_tun_dev_alloc_security(void **security)
> +{
> +	return 0;
> +}
> +
> +static inline void security_tun_dev_free_security(void *security)
> +{
> +}
> +
>  static inline int security_tun_dev_create(void)
>  {
>  	return 0;
>  }
>  
> -static inline void security_tun_dev_post_create(struct sock *sk)
> +static inline int security_tun_dev_create_queue(void *security)
> +{
> +	return 0;
> +}
> +
> +static inline int security_tun_dev_attach(struct sock *sk, void *security)
>  {
> +	return 0;
>  }
>  
> -static inline int security_tun_dev_attach(struct sock *sk)
> +static inline int security_tun_dev_open(void *security)
>  {
>  	return 0;
>  }
> diff --git a/security/capability.c b/security/capability.c
> index b14a30c..bf4cbf2 100644
> --- a/security/capability.c
> +++ b/security/capability.c
> @@ -704,16 +704,31 @@ static void cap_req_classify_flow(const struct request_sock *req,
>  {
>  }
>  
> +static int cap_tun_dev_alloc_security(void **security)
> +{
> +	return 0;
> +}
> +
> +static void cap_tun_dev_free_security(void *security)
> +{
> +}
> +
>  static int cap_tun_dev_create(void)
>  {
>  	return 0;
>  }
>  
> -static void cap_tun_dev_post_create(struct sock *sk)
> +static int cap_tun_dev_create_queue(void *security)
> +{
> +	return 0;
> +}
> +
> +static int cap_tun_dev_attach(struct sock *sk, void *security)
>  {
> +	return 0;
>  }
>  
> -static int cap_tun_dev_attach(struct sock *sk)
> +static int cap_tun_dev_open(void *security)
>  {
>  	return 0;
>  }
> @@ -1044,8 +1059,11 @@ void __init security_fixup_ops(struct security_operations *ops)
>  	set_to_cap_if_null(ops, secmark_refcount_inc);
>  	set_to_cap_if_null(ops, secmark_refcount_dec);
>  	set_to_cap_if_null(ops, req_classify_flow);
> +	set_to_cap_if_null(ops, tun_dev_alloc_security);
> +	set_to_cap_if_null(ops, tun_dev_free_security);
>  	set_to_cap_if_null(ops, tun_dev_create);
> -	set_to_cap_if_null(ops, tun_dev_post_create);
> +	set_to_cap_if_null(ops, tun_dev_create_queue);
> +	set_to_cap_if_null(ops, tun_dev_open);
>  	set_to_cap_if_null(ops, tun_dev_attach);
>  #endif	/* CONFIG_SECURITY_NETWORK */
>  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> diff --git a/security/security.c b/security/security.c
> index 8dcd4ae..4d82654 100644
> --- a/security/security.c
> +++ b/security/security.c
> @@ -1244,24 +1244,42 @@ void security_secmark_refcount_dec(void)
>  }
>  EXPORT_SYMBOL(security_secmark_refcount_dec);
>  
> +int security_tun_dev_alloc_security(void **security)
> +{
> +	return security_ops->tun_dev_alloc_security(security);
> +}
> +EXPORT_SYMBOL(security_tun_dev_alloc_security);
> +
> +void security_tun_dev_free_security(void *security)
> +{
> +	security_ops->tun_dev_free_security(security);
> +}
> +EXPORT_SYMBOL(security_tun_dev_free_security);
> +
>  int security_tun_dev_create(void)
>  {
>  	return security_ops->tun_dev_create();
>  }
>  EXPORT_SYMBOL(security_tun_dev_create);
>  
> -void security_tun_dev_post_create(struct sock *sk)
> +int security_tun_dev_create_queue(void *security)
>  {
> -	return security_ops->tun_dev_post_create(sk);
> +	return security_ops->tun_dev_create_queue(security);
>  }
> -EXPORT_SYMBOL(security_tun_dev_post_create);
> +EXPORT_SYMBOL(security_tun_dev_create_queue);
>  
> -int security_tun_dev_attach(struct sock *sk)
> +int security_tun_dev_attach(struct sock *sk, void *security)
>  {
> -	return security_ops->tun_dev_attach(sk);
> +	return security_ops->tun_dev_attach(sk, security);
>  }
>  EXPORT_SYMBOL(security_tun_dev_attach);
>  
> +int security_tun_dev_open(void *security)
> +{
> +	return security_ops->tun_dev_open(security);
> +}
> +EXPORT_SYMBOL(security_tun_dev_open);
> +
>  #endif	/* CONFIG_SECURITY_NETWORK */
>  
>  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index 61a5336..f1efb08 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -4399,6 +4399,24 @@ static void selinux_req_classify_flow(const struct request_sock *req,
>  	fl->flowi_secid = req->secid;
>  }
>  
> +static int selinux_tun_dev_alloc_security(void **security)
> +{
> +	struct tun_security_struct *tunsec;
> +
> +	tunsec = kzalloc(sizeof(*tunsec), GFP_KERNEL);
> +	if (!tunsec)
> +		return -ENOMEM;
> +	tunsec->sid = current_sid();
> +
> +	*security = tunsec;
> +	return 0;
> +}
> +
> +static void selinux_tun_dev_free_security(void *security)
> +{
> +	kfree(security);
> +}
> +
>  static int selinux_tun_dev_create(void)
>  {
>  	u32 sid = current_sid();
> @@ -4414,8 +4432,17 @@ static int selinux_tun_dev_create(void)
>  			    NULL);
>  }
>  
> -static void selinux_tun_dev_post_create(struct sock *sk)
> +static int selinux_tun_dev_create_queue(void *security)
>  {
> +	struct tun_security_struct *tunsec = security;
> +
> +	return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET,
> +			    TUN_SOCKET__CREATE_QUEUE, NULL);
> +}
> +
> +static int selinux_tun_dev_attach(struct sock *sk, void *security)
> +{
> +	struct tun_security_struct *tunsec = security;
>  	struct sk_security_struct *sksec = sk->sk_security;
>  
>  	/* we don't currently perform any NetLabel based labeling here and it
> @@ -4425,20 +4452,19 @@ static void selinux_tun_dev_post_create(struct sock *sk)
>  	 * cause confusion to the TUN user that had no idea network labeling
>  	 * protocols were being used */
>  
> -	/* see the comments in selinux_tun_dev_create() about why we don't use
> -	 * the sockcreate SID here */
> -
> -	sksec->sid = current_sid();
> +	sksec->sid = tunsec->sid;
>  	sksec->sclass = SECCLASS_TUN_SOCKET;
> +
> +	return 0;
>  }
>  
> -static int selinux_tun_dev_attach(struct sock *sk)
> +static int selinux_tun_dev_open(void *security)
>  {
> -	struct sk_security_struct *sksec = sk->sk_security;
> +	struct tun_security_struct *tunsec = security;
>  	u32 sid = current_sid();
>  	int err;
>  
> -	err = avc_has_perm(sid, sksec->sid, SECCLASS_TUN_SOCKET,
> +	err = avc_has_perm(sid, tunsec->sid, SECCLASS_TUN_SOCKET,
>  			   TUN_SOCKET__RELABELFROM, NULL);
>  	if (err)
>  		return err;
> @@ -4446,8 +4472,7 @@ static int selinux_tun_dev_attach(struct sock *sk)
>  			   TUN_SOCKET__RELABELTO, NULL);
>  	if (err)
>  		return err;
> -
> -	sksec->sid = sid;
> +	tunsec->sid = sid;
>  
>  	return 0;
>  }
> @@ -5642,9 +5667,12 @@ static struct security_operations selinux_ops = {
>  	.secmark_refcount_inc =		selinux_secmark_refcount_inc,
>  	.secmark_refcount_dec =		selinux_secmark_refcount_dec,
>  	.req_classify_flow =		selinux_req_classify_flow,
> +	.tun_dev_alloc_security =	selinux_tun_dev_alloc_security,
> +	.tun_dev_free_security =	selinux_tun_dev_free_security,
>  	.tun_dev_create =		selinux_tun_dev_create,
> -	.tun_dev_post_create = 		selinux_tun_dev_post_create,
> +	.tun_dev_create_queue =		selinux_tun_dev_create_queue,
>  	.tun_dev_attach =		selinux_tun_dev_attach,
> +	.tun_dev_open =			selinux_tun_dev_open,
>  
>  #ifdef CONFIG_SECURITY_NETWORK_XFRM
>  	.xfrm_policy_alloc_security =	selinux_xfrm_policy_alloc,
> diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
> index 26c7eee..aa47bca 100644
> --- a/security/selinux/include/objsec.h
> +++ b/security/selinux/include/objsec.h
> @@ -110,6 +110,10 @@ struct sk_security_struct {
>  	u16 sclass;			/* sock security class */
>  };
>  
> +struct tun_security_struct {
> +	u32 sid;			/* SID for the tun device sockets */
> +};
> +
>  struct key_security_struct {
>  	u32 sid;	/* SID of key */
>  };
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Moore Dec. 12, 2012, 6:49 p.m. UTC | #24
On Wednesday, December 12, 2012 11:22:36 AM Michael S. Tsirkin wrote:
> On Wed, Dec 05, 2012 at 03:26:19PM -0500, Paul Moore wrote:
> > This patch corrects some problems with LSM/SELinux that were introduced
> > with the multiqueue patchset.  The problem stems from the fact that the
> > multiqueue work changed the relationship between the tun device and its
> > associated socket; before the socket persisted for the life of the
> > device, however after the multiqueue changes the socket only persisted
> > for the life of the userspace connection (fd open).  For non-persistent
> > devices this is not an issue, but for persistent devices this can cause
> > the tun device to lose its SELinux label.
> > 
> > We correct this problem by adding an opaque LSM security blob to the
> > tun device struct which allows us to have the LSM security state, e.g.
> > SELinux labeling information, persist for the lifetime of the tun
> > device.  In the process we tweak the LSM hooks to work with this new
> > approach to TUN device/socket labeling and introduce a new LSM hook,
> > security_tun_dev_create_queue(), to approve requests to create a new
> > TUN queue via TUNSETQUEUE.
> > 
> > The SELinux code has been adjusted to match the new LSM hooks, the
> > other LSMs do not make use of the LSM TUN controls.  This patch makes
> > use of the recently added "tun_socket:create_queue" permission to
> > restrict access to the TUNSETQUEUE operation.  On older SELinux
> > policies which do not define the "tun_socket:create_queue" permission
> > the access control decision for TUNSETQUEUE will be handled according
> > to the SELinux policy's unknown permission setting.

...

> > @@ -465,6 +466,10 @@ static int tun_attach(struct tun_struct *tun, struct
> > file *file)> 
> >  	struct tun_file *tfile = file->private_data;
> >  	int err;
> > 
> > +	err = security_tun_dev_attach(tfile->socket.sk, tun->security);
> > +	if (err < 0)
> > +		goto out;
> > +
> > 
> >  	err = -EINVAL;
> >  	if (rcu_dereference_protected(tfile->tun, lockdep_rtnl_is_held()))
> >  	
> >  		goto out;
> 
> This hook triggers with both set_queue and set_iff,
> and it also seems to trigger when attaching to a
> persistent device and when creating a new one. But I
> believe we might want to be able to allow one but not the other.
> 
> For example:
> 	- we might want to allow qemu to do set_queue but not set_iff
> 	- we might want to configure presistent devices and
> 	  prevent a user from adding new ones

Please look at the rest of the patch and see what the hook actually does.  It 
does not perform any access control under SELinux, all it does is ensure that 
the socket is labeled based on the associated TUN device.

> > - * @tun_dev_post_create:
> > - *	This hook allows a module to update or allocate a per-socket security
> > - *	structure.
> > - *	@sk contains the newly created sock structure.
> 
> I worry that removing a hook hurt users that use it in their
> security policy.

We need to change the hooks because there was a significant change to the 
implementation of a TUN device.

However, even when changing the LSM hooks, we have preserved the SELinux 
access controls for standard, e.g. single queue, TUN devices such that 
existing SELinux policies will work for existing TUN users.  The new SELinux 
access control we added only comes into play when TUN users want to enable 
multiple queues.
diff mbox

Patch

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 14a0454..fb8148b 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -182,6 +182,7 @@  struct tun_struct {
 	struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
 	struct timer_list flow_gc_timer;
 	unsigned long ageing_time;
+	void *security;
 };
 
 static inline u32 tun_hashfn(u32 rxhash)
@@ -465,6 +466,10 @@  static int tun_attach(struct tun_struct *tun, struct file *file)
 	struct tun_file *tfile = file->private_data;
 	int err;
 
+	err = security_tun_dev_attach(tfile->socket.sk, tun->security);
+	if (err < 0)
+		goto out;
+
 	err = -EINVAL;
 	if (rcu_dereference_protected(tfile->tun, lockdep_rtnl_is_held()))
 		goto out;
@@ -1348,6 +1353,7 @@  static void tun_free_netdev(struct net_device *dev)
 	struct tun_struct *tun = netdev_priv(dev);
 
 	tun_flow_uninit(tun);
+	security_tun_dev_free_security(tun->security);
 	free_netdev(dev);
 }
 
@@ -1534,7 +1540,7 @@  static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 		if (tun_not_capable(tun))
 			return -EPERM;
-		err = security_tun_dev_attach(tfile->socket.sk);
+		err = security_tun_dev_open(tun->security);
 		if (err < 0)
 			return err;
 
@@ -1587,7 +1593,9 @@  static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 		spin_lock_init(&tun->lock);
 
-		security_tun_dev_post_create(&tfile->sk);
+		err = security_tun_dev_alloc_security(&tun->security);
+		if (err < 0)
+			goto err_free_dev;
 
 		tun_net_init(dev);
 
@@ -1767,12 +1775,18 @@  static int tun_set_queue(struct file *file, struct ifreq *ifr)
 
 		tun = netdev_priv(dev);
 		if (dev->netdev_ops != &tap_netdev_ops &&
-			dev->netdev_ops != &tun_netdev_ops)
+			dev->netdev_ops != &tun_netdev_ops) {
 			ret = -EINVAL;
-		else if (tun_not_capable(tun))
+			goto unlock;
+		}
+		if (tun_not_capable(tun)) {
 			ret = -EPERM;
-		else
-			ret = tun_attach(tun, file);
+			goto unlock;
+		}
+		ret = security_tun_dev_create_queue(tun->security);
+		if (ret < 0)
+			goto unlock;
+		ret = tun_attach(tun, file);
 	} else if (ifr->ifr_flags & IFF_DETACH_QUEUE)
 		__tun_detach(tfile, false);
 	else
diff --git a/include/linux/security.h b/include/linux/security.h
index 05e88bd..8ea923b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -983,17 +983,29 @@  static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	tells the LSM to decrement the number of secmark labeling rules loaded
  * @req_classify_flow:
  *	Sets the flow's sid to the openreq sid.
+ * @tun_dev_alloc_security:
+ *	This hook allows a module to allocate a security structure for a TUN
+ *	device.
+ *	@security pointer to a security structure pointer.
+ *	Returns a zero on success, negative values on failure.
+ * @tun_dev_free_security:
+ *	This hook allows a module to free the security structure for a TUN
+ *	device.
+ *	@security pointer to the TUN device's security structure
  * @tun_dev_create:
  *	Check permissions prior to creating a new TUN device.
- * @tun_dev_post_create:
- *	This hook allows a module to update or allocate a per-socket security
- *	structure.
- *	@sk contains the newly created sock structure.
+ * @tun_dev_create_queue:
+ *	Check permissions prior to creating a new TUN device queue.
+ *	@security pointer to the TUN device's security structure.
  * @tun_dev_attach:
- *	Check permissions prior to attaching to a persistent TUN device.  This
- *	hook can also be used by the module to update any security state
+ *	This hook can be used by the module to update any security state
  *	associated with the TUN device's sock structure.
  *	@sk contains the existing sock structure.
+ *	@security pointer to the TUN device's security structure.
+ * @tun_dev_open:
+ *	This hook can be used by the module to update any security state
+ *	associated with the TUN device's security structure.
+ *	@security pointer to the TUN devices's security structure.
  *
  * Security hooks for XFRM operations.
  *
@@ -1613,9 +1625,12 @@  struct security_operations {
 	void (*secmark_refcount_inc) (void);
 	void (*secmark_refcount_dec) (void);
 	void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl);
-	int (*tun_dev_create)(void);
-	void (*tun_dev_post_create)(struct sock *sk);
-	int (*tun_dev_attach)(struct sock *sk);
+	int (*tun_dev_alloc_security) (void **security);
+	void (*tun_dev_free_security) (void *security);
+	int (*tun_dev_create) (void);
+	int (*tun_dev_create_queue) (void *security);
+	int (*tun_dev_attach) (struct sock *sk, void *security);
+	int (*tun_dev_open) (void *security);
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -2553,9 +2568,12 @@  void security_inet_conn_established(struct sock *sk,
 int security_secmark_relabel_packet(u32 secid);
 void security_secmark_refcount_inc(void);
 void security_secmark_refcount_dec(void);
+int security_tun_dev_alloc_security(void **security);
+void security_tun_dev_free_security(void *security);
 int security_tun_dev_create(void);
-void security_tun_dev_post_create(struct sock *sk);
-int security_tun_dev_attach(struct sock *sk);
+int security_tun_dev_create_queue(void *security);
+int security_tun_dev_attach(struct sock *sk, void *security);
+int security_tun_dev_open(void *security);
 
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct sock *sock,
@@ -2720,16 +2738,31 @@  static inline void security_secmark_refcount_dec(void)
 {
 }
 
+static inline int security_tun_dev_alloc_security(void **security)
+{
+	return 0;
+}
+
+static inline void security_tun_dev_free_security(void *security)
+{
+}
+
 static inline int security_tun_dev_create(void)
 {
 	return 0;
 }
 
-static inline void security_tun_dev_post_create(struct sock *sk)
+static inline int security_tun_dev_create_queue(void *security)
+{
+	return 0;
+}
+
+static inline int security_tun_dev_attach(struct sock *sk, void *security)
 {
+	return 0;
 }
 
-static inline int security_tun_dev_attach(struct sock *sk)
+static inline int security_tun_dev_open(void *security)
 {
 	return 0;
 }
diff --git a/security/capability.c b/security/capability.c
index b14a30c..bf4cbf2 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -704,16 +704,31 @@  static void cap_req_classify_flow(const struct request_sock *req,
 {
 }
 
+static int cap_tun_dev_alloc_security(void **security)
+{
+	return 0;
+}
+
+static void cap_tun_dev_free_security(void *security)
+{
+}
+
 static int cap_tun_dev_create(void)
 {
 	return 0;
 }
 
-static void cap_tun_dev_post_create(struct sock *sk)
+static int cap_tun_dev_create_queue(void *security)
+{
+	return 0;
+}
+
+static int cap_tun_dev_attach(struct sock *sk, void *security)
 {
+	return 0;
 }
 
-static int cap_tun_dev_attach(struct sock *sk)
+static int cap_tun_dev_open(void *security)
 {
 	return 0;
 }
@@ -1044,8 +1059,11 @@  void __init security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, secmark_refcount_inc);
 	set_to_cap_if_null(ops, secmark_refcount_dec);
 	set_to_cap_if_null(ops, req_classify_flow);
+	set_to_cap_if_null(ops, tun_dev_alloc_security);
+	set_to_cap_if_null(ops, tun_dev_free_security);
 	set_to_cap_if_null(ops, tun_dev_create);
-	set_to_cap_if_null(ops, tun_dev_post_create);
+	set_to_cap_if_null(ops, tun_dev_create_queue);
+	set_to_cap_if_null(ops, tun_dev_open);
 	set_to_cap_if_null(ops, tun_dev_attach);
 #endif	/* CONFIG_SECURITY_NETWORK */
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
diff --git a/security/security.c b/security/security.c
index 8dcd4ae..4d82654 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1244,24 +1244,42 @@  void security_secmark_refcount_dec(void)
 }
 EXPORT_SYMBOL(security_secmark_refcount_dec);
 
+int security_tun_dev_alloc_security(void **security)
+{
+	return security_ops->tun_dev_alloc_security(security);
+}
+EXPORT_SYMBOL(security_tun_dev_alloc_security);
+
+void security_tun_dev_free_security(void *security)
+{
+	security_ops->tun_dev_free_security(security);
+}
+EXPORT_SYMBOL(security_tun_dev_free_security);
+
 int security_tun_dev_create(void)
 {
 	return security_ops->tun_dev_create();
 }
 EXPORT_SYMBOL(security_tun_dev_create);
 
-void security_tun_dev_post_create(struct sock *sk)
+int security_tun_dev_create_queue(void *security)
 {
-	return security_ops->tun_dev_post_create(sk);
+	return security_ops->tun_dev_create_queue(security);
 }
-EXPORT_SYMBOL(security_tun_dev_post_create);
+EXPORT_SYMBOL(security_tun_dev_create_queue);
 
-int security_tun_dev_attach(struct sock *sk)
+int security_tun_dev_attach(struct sock *sk, void *security)
 {
-	return security_ops->tun_dev_attach(sk);
+	return security_ops->tun_dev_attach(sk, security);
 }
 EXPORT_SYMBOL(security_tun_dev_attach);
 
+int security_tun_dev_open(void *security)
+{
+	return security_ops->tun_dev_open(security);
+}
+EXPORT_SYMBOL(security_tun_dev_open);
+
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 61a5336..f1efb08 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4399,6 +4399,24 @@  static void selinux_req_classify_flow(const struct request_sock *req,
 	fl->flowi_secid = req->secid;
 }
 
+static int selinux_tun_dev_alloc_security(void **security)
+{
+	struct tun_security_struct *tunsec;
+
+	tunsec = kzalloc(sizeof(*tunsec), GFP_KERNEL);
+	if (!tunsec)
+		return -ENOMEM;
+	tunsec->sid = current_sid();
+
+	*security = tunsec;
+	return 0;
+}
+
+static void selinux_tun_dev_free_security(void *security)
+{
+	kfree(security);
+}
+
 static int selinux_tun_dev_create(void)
 {
 	u32 sid = current_sid();
@@ -4414,8 +4432,17 @@  static int selinux_tun_dev_create(void)
 			    NULL);
 }
 
-static void selinux_tun_dev_post_create(struct sock *sk)
+static int selinux_tun_dev_create_queue(void *security)
 {
+	struct tun_security_struct *tunsec = security;
+
+	return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET,
+			    TUN_SOCKET__CREATE_QUEUE, NULL);
+}
+
+static int selinux_tun_dev_attach(struct sock *sk, void *security)
+{
+	struct tun_security_struct *tunsec = security;
 	struct sk_security_struct *sksec = sk->sk_security;
 
 	/* we don't currently perform any NetLabel based labeling here and it
@@ -4425,20 +4452,19 @@  static void selinux_tun_dev_post_create(struct sock *sk)
 	 * cause confusion to the TUN user that had no idea network labeling
 	 * protocols were being used */
 
-	/* see the comments in selinux_tun_dev_create() about why we don't use
-	 * the sockcreate SID here */
-
-	sksec->sid = current_sid();
+	sksec->sid = tunsec->sid;
 	sksec->sclass = SECCLASS_TUN_SOCKET;
+
+	return 0;
 }
 
-static int selinux_tun_dev_attach(struct sock *sk)
+static int selinux_tun_dev_open(void *security)
 {
-	struct sk_security_struct *sksec = sk->sk_security;
+	struct tun_security_struct *tunsec = security;
 	u32 sid = current_sid();
 	int err;
 
-	err = avc_has_perm(sid, sksec->sid, SECCLASS_TUN_SOCKET,
+	err = avc_has_perm(sid, tunsec->sid, SECCLASS_TUN_SOCKET,
 			   TUN_SOCKET__RELABELFROM, NULL);
 	if (err)
 		return err;
@@ -4446,8 +4472,7 @@  static int selinux_tun_dev_attach(struct sock *sk)
 			   TUN_SOCKET__RELABELTO, NULL);
 	if (err)
 		return err;
-
-	sksec->sid = sid;
+	tunsec->sid = sid;
 
 	return 0;
 }
@@ -5642,9 +5667,12 @@  static struct security_operations selinux_ops = {
 	.secmark_refcount_inc =		selinux_secmark_refcount_inc,
 	.secmark_refcount_dec =		selinux_secmark_refcount_dec,
 	.req_classify_flow =		selinux_req_classify_flow,
+	.tun_dev_alloc_security =	selinux_tun_dev_alloc_security,
+	.tun_dev_free_security =	selinux_tun_dev_free_security,
 	.tun_dev_create =		selinux_tun_dev_create,
-	.tun_dev_post_create = 		selinux_tun_dev_post_create,
+	.tun_dev_create_queue =		selinux_tun_dev_create_queue,
 	.tun_dev_attach =		selinux_tun_dev_attach,
+	.tun_dev_open =			selinux_tun_dev_open,
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 	.xfrm_policy_alloc_security =	selinux_xfrm_policy_alloc,
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 26c7eee..aa47bca 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -110,6 +110,10 @@  struct sk_security_struct {
 	u16 sclass;			/* sock security class */
 };
 
+struct tun_security_struct {
+	u32 sid;			/* SID for the tun device sockets */
+};
+
 struct key_security_struct {
 	u32 sid;	/* SID of key */
 };