Patchwork mark newly opened fds as FD_CLOEXEC (close on exec) [part 2]

login
register
mail settings
Submitter Maciej Żenczykowski
Date March 21, 2012, 10:52 a.m.
Message ID <1332327120-22444-1-git-send-email-zenczykowski@gmail.com>
Download mbox | patch
Permalink /patch/147962/
State Accepted
Headers show

Comments

Maciej Żenczykowski - March 21, 2012, 10:52 a.m.
From: Maciej Żenczykowski <maze@google.com>

This is iptables-1.4.11-cloexec.patch from Fedora 18 iptables source
rpm, in particular:
  http://kojipkgs.fedoraproject.org/packages/iptables/1.4.12.2/4.fc18/src/iptables-1.4.12.2-4.fc18.src.rpm

Signed-off-by: Maciej Żenczykowski <maze@google.com>
---
 extensions/libxt_set.h |    7 +++++++
 libiptc/libiptc.c      |    8 ++++++++
 2 files changed, 15 insertions(+), 0 deletions(-)
Eric Dumazet - March 21, 2012, 8:27 p.m.
On Wed, 2012-03-21 at 03:52 -0700, Maciej Żenczykowski wrote:
> From: Maciej Żenczykowski <maze@google.com>
> 
> This is iptables-1.4.11-cloexec.patch from Fedora 18 iptables source
> rpm, in particular:
>   http://kojipkgs.fedoraproject.org/packages/iptables/1.4.12.2/4.fc18/src/iptables-1.4.12.2-4.fc18.src.rpm
> 
> Signed-off-by: Maciej Żenczykowski <maze@google.com>
> ---
>  extensions/libxt_set.h |    7 +++++++
>  libiptc/libiptc.c      |    8 ++++++++
>  2 files changed, 15 insertions(+), 0 deletions(-)
> 
> diff --git a/extensions/libxt_set.h b/extensions/libxt_set.h
> index 4ac84fa9c022..47c3f5b6f5d4 100644
> --- a/extensions/libxt_set.h
> +++ b/extensions/libxt_set.h
> @@ -2,6 +2,7 @@
>  #define _LIBXT_SET_H
>  
>  #include <unistd.h>
> +#include <fcntl.h>
>  #include <sys/types.h>
>  #include <sys/socket.h>
>  #include <errno.h>
> @@ -23,6 +24,12 @@ get_version(unsigned *version)
>  		xtables_error(OTHER_PROBLEM,
>  			      "Can't open socket to ipset.\n");
>  
> +	if (fcntl(sockfd, F_SETFD, FD_CLOEXEC) == -1) {
> +		xtables_error(OTHER_PROBLEM,
> +			      "Could not set close on exec: %s\n",
> +			      strerror(errno));
> +	}
> +
>  	req_version.op = IP_SET_OP_VERSION;
>  	res = getsockopt(sockfd, SOL_IP, SO_IP_SET, &req_version, &size);
>  	if (res != 0)
> diff --git a/libiptc/libiptc.c b/libiptc/libiptc.c
> index 13e41d525f28..63965e738596 100644
> --- a/libiptc/libiptc.c
> +++ b/libiptc/libiptc.c
> @@ -29,6 +29,8 @@
>   * 	- performance work: speedup initial ruleset parsing.
>   * 	- sponsored by ComX Networks A/S (http://www.comx.dk/)
>   */
> +#include <unistd.h>
> +#include <fcntl.h>
>  #include <sys/types.h>
>  #include <sys/socket.h>
>  #include <stdbool.h>
> @@ -1316,6 +1318,12 @@ TC_INIT(const char *tablename)
>  	if (sockfd < 0)
>  		return NULL;
>  
> +	if (fcntl(sockfd, F_SETFD, FD_CLOEXEC) == -1) {
> +		fprintf(stderr, "Could not set close on exec: %s\n",
> +			strerror(errno));
> +		abort();
> +	}
> +
>  retry:
>  	s = sizeof(info);
>  

Looks fine, but since commit a677a039b (flag parameters: socket and
socketpair) from Ulrich Drepper, we can pass SOCK_CLOEXEC directly in
the socket() call, thus avoiding extra fcntl() call.

Not relevant to iptables (opening this socket in iptables is not
performance critical), but worth to mention for reference.



--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Maciej Żenczykowski - March 21, 2012, 8:40 p.m.
But that's a kernel feature, and I don't think we can rely on it being
present in iptables userspace (it has to be backwards compatible to
IMHO at least back to 2.6.9 if at all possible),
and adding the "try with flag, if fails, retry without flag and
manually set it" logic seems overkill.

I think such details only matter for multithreaded programs with exec races.
Which I don't believe to be the case here.

On Wed, Mar 21, 2012 at 1:27 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Wed, 2012-03-21 at 03:52 -0700, Maciej Żenczykowski wrote:
>> From: Maciej Żenczykowski <maze@google.com>
>>
>> This is iptables-1.4.11-cloexec.patch from Fedora 18 iptables source
>> rpm, in particular:
>>   http://kojipkgs.fedoraproject.org/packages/iptables/1.4.12.2/4.fc18/src/iptables-1.4.12.2-4.fc18.src.rpm
>>
>> Signed-off-by: Maciej Żenczykowski <maze@google.com>
>> ---
>>  extensions/libxt_set.h |    7 +++++++
>>  libiptc/libiptc.c      |    8 ++++++++
>>  2 files changed, 15 insertions(+), 0 deletions(-)
>>
>> diff --git a/extensions/libxt_set.h b/extensions/libxt_set.h
>> index 4ac84fa9c022..47c3f5b6f5d4 100644
>> --- a/extensions/libxt_set.h
>> +++ b/extensions/libxt_set.h
>> @@ -2,6 +2,7 @@
>>  #define _LIBXT_SET_H
>>
>>  #include <unistd.h>
>> +#include <fcntl.h>
>>  #include <sys/types.h>
>>  #include <sys/socket.h>
>>  #include <errno.h>
>> @@ -23,6 +24,12 @@ get_version(unsigned *version)
>>               xtables_error(OTHER_PROBLEM,
>>                             "Can't open socket to ipset.\n");
>>
>> +     if (fcntl(sockfd, F_SETFD, FD_CLOEXEC) == -1) {
>> +             xtables_error(OTHER_PROBLEM,
>> +                           "Could not set close on exec: %s\n",
>> +                           strerror(errno));
>> +     }
>> +
>>       req_version.op = IP_SET_OP_VERSION;
>>       res = getsockopt(sockfd, SOL_IP, SO_IP_SET, &req_version, &size);
>>       if (res != 0)
>> diff --git a/libiptc/libiptc.c b/libiptc/libiptc.c
>> index 13e41d525f28..63965e738596 100644
>> --- a/libiptc/libiptc.c
>> +++ b/libiptc/libiptc.c
>> @@ -29,6 +29,8 @@
>>   *   - performance work: speedup initial ruleset parsing.
>>   *   - sponsored by ComX Networks A/S (http://www.comx.dk/)
>>   */
>> +#include <unistd.h>
>> +#include <fcntl.h>
>>  #include <sys/types.h>
>>  #include <sys/socket.h>
>>  #include <stdbool.h>
>> @@ -1316,6 +1318,12 @@ TC_INIT(const char *tablename)
>>       if (sockfd < 0)
>>               return NULL;
>>
>> +     if (fcntl(sockfd, F_SETFD, FD_CLOEXEC) == -1) {
>> +             fprintf(stderr, "Could not set close on exec: %s\n",
>> +                     strerror(errno));
>> +             abort();
>> +     }
>> +
>>  retry:
>>       s = sizeof(info);
>>
>
> Looks fine, but since commit a677a039b (flag parameters: socket and
> socketpair) from Ulrich Drepper, we can pass SOCK_CLOEXEC directly in
> the socket() call, thus avoiding extra fcntl() call.
>
> Not relevant to iptables (opening this socket in iptables is not
> performance critical), but worth to mention for reference.
>
>
>
Eric Dumazet - March 21, 2012, 8:44 p.m.
On Wed, 2012-03-21 at 13:40 -0700, Maciej Żenczykowski wrote:
> But that's a kernel feature, and I don't think we can rely on it being
> present in iptables userspace (it has to be backwards compatible to
> IMHO at least back to 2.6.9 if at all possible),
> and adding the "try with flag, if fails, retry without flag and
> manually set it" logic seems overkill.
> 
> I think such details only matter for multithreaded programs with exec races.
> Which I don't believe to be the case here.

True, but CLOEXEC on iptables... I mean... how is it mandatory ?



--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Maciej Żenczykowski - March 21, 2012, 8:50 p.m.
> True, but CLOEXEC on iptables... I mean... how is it mandatory ?

I'm not sure what you mean by mandatory.

iptables does potentially fork/exec modprobe to load modules.
That can cause a selinux 'domain'/'role'/whatever-it-is-called crossing.
You can do automated inspection of what gets carried across such
privilege changes and any unexpected open file descriptors flag
problems, patches like this cut down on the noise.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira - March 22, 2012, 11:21 a.m.
Hi Maciej,

On Wed, Mar 21, 2012 at 01:50:59PM -0700, Maciej Żenczykowski wrote:
> > True, but CLOEXEC on iptables... I mean... how is it mandatory ?
> 
> I'm not sure what you mean by mandatory.

If this patch is needed, I think we have to stick to fcntl for
backward compatibility reasons as well.

> iptables does potentially fork/exec modprobe to load modules.
> That can cause a selinux 'domain'/'role'/whatever-it-is-called crossing.
> You can do automated inspection of what gets carried across such
> privilege changes and any unexpected open file descriptors flag
> problems, patches like this cut down on the noise.

Could you resend the patch including the description of the precise
problem that this fixes in selinux?

IMO, if the patch description would have include since the beginnging
why that CLOEXEC is needed, we would have save a couple of emails.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira - March 23, 2012, 10:26 a.m.
On Thu, Mar 22, 2012 at 12:21:20PM +0100, Pablo Neira Ayuso wrote:
> Hi Maciej,
> 
> On Wed, Mar 21, 2012 at 01:50:59PM -0700, Maciej Żenczykowski wrote:
> > > True, but CLOEXEC on iptables... I mean... how is it mandatory ?
> > 
> > I'm not sure what you mean by mandatory.
> 
> If this patch is needed, I think we have to stick to fcntl for
> backward compatibility reasons as well.
> 
> > iptables does potentially fork/exec modprobe to load modules.
> > That can cause a selinux 'domain'/'role'/whatever-it-is-called crossing.
> > You can do automated inspection of what gets carried across such
> > privilege changes and any unexpected open file descriptors flag
> > problems, patches like this cut down on the noise.
> 
> Could you resend the patch including the description of the precise
> problem that this fixes in selinux?

No need to do it. I've applied this to git.netfilter.org.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Maciej Żenczykowski - March 24, 2012, 9:11 a.m.
Oh, sorry, I did actually send a patch with nice comments, but
apparently I ended up missing the mailing list in the cc list.

From: Maciej Żenczykowski <maze@google.com>

This is iptables-1.4.11-cloexec.patch from Fedora 18 iptables source
rpm, in particular:
 http://kojipkgs.fedoraproject.org/packages/iptables/1.4.12.2/4.fc18/src/iptables-1.4.12.2-4.fc18.src.rpm

Reasoning:

On an example Fedora 15 system:
 $ ls -alZ /sbin/ip{,6}tables* /sbin/modprobe
 lrwxrwxrwx. root root system_u:object_r:bin_t:s0
/sbin/ip6tables -> ip6tables-multi
 -rwxr-xr-x. root root system_u:object_r:iptables_exec_t:s0
/sbin/ip6tables-multi
 lrwxrwxrwx. root root system_u:object_r:bin_t:s0
/sbin/ip6tables-restore -> ip6tables-multi
 lrwxrwxrwx. root root system_u:object_r:bin_t:s0
/sbin/ip6tables-save -> ip6tables-multi
 lrwxrwxrwx. root root system_u:object_r:bin_t:s0
/sbin/iptables -> iptables-multi
 -rwxr-xr-x. root root system_u:object_r:iptables_exec_t:s0 /sbin/iptables-multi
 lrwxrwxrwx. root root system_u:object_r:bin_t:s0
/sbin/iptables-restore -> iptables-multi
 lrwxrwxrwx. root root system_u:object_r:bin_t:s0
/sbin/iptables-save -> iptables-multi
 -rwxr-xr-x. root root system_u:object_r:insmod_exec_t:s0   /sbin/modprobe

You will note that the iptables binaries and the modprobe binaries
differ in the SELinux security context 'type'.

Since ip{,6}tables may manually fork/exec modprobe to load modules
(not everything relies on kernel module autoloading), it may trigger
a SELinux security context transition.

It is desirable to not have any unexpectedly open file descriptors
leak across this security boundary - this may just generate needless
monitoring/auditing noise (especially in extremely hardened environs).

It's also arguably just generally good programming practice to mark
all file descripters as close on exec if you are ever going to exec
(on an API level this really should have been the default state of
any new file descriptor).

In order to prevent races between file descriptor creation and exec
in multi-threaded programs, newer linux kernels now support creating
sockets as close on exec right from the initial syscall.

Fortunately we are not multi-threaded, and thus this race condition
does not matter, and hence we do not need to implement the complex
logic of "try socket(domain, type | SOCK_CLOEXEC, protocol), if that
failed use normal socket() system call and set FD_CLOEXEC flag manually
via fcntl" which is necessary to maintain desired backwards compatibility
with older pre-2.6.27 kernels.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/extensions/libxt_set.h b/extensions/libxt_set.h
index 4ac84fa9c022..47c3f5b6f5d4 100644
--- a/extensions/libxt_set.h
+++ b/extensions/libxt_set.h
@@ -2,6 +2,7 @@ 
 #define _LIBXT_SET_H
 
 #include <unistd.h>
+#include <fcntl.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <errno.h>
@@ -23,6 +24,12 @@  get_version(unsigned *version)
 		xtables_error(OTHER_PROBLEM,
 			      "Can't open socket to ipset.\n");
 
+	if (fcntl(sockfd, F_SETFD, FD_CLOEXEC) == -1) {
+		xtables_error(OTHER_PROBLEM,
+			      "Could not set close on exec: %s\n",
+			      strerror(errno));
+	}
+
 	req_version.op = IP_SET_OP_VERSION;
 	res = getsockopt(sockfd, SOL_IP, SO_IP_SET, &req_version, &size);
 	if (res != 0)
diff --git a/libiptc/libiptc.c b/libiptc/libiptc.c
index 13e41d525f28..63965e738596 100644
--- a/libiptc/libiptc.c
+++ b/libiptc/libiptc.c
@@ -29,6 +29,8 @@ 
  * 	- performance work: speedup initial ruleset parsing.
  * 	- sponsored by ComX Networks A/S (http://www.comx.dk/)
  */
+#include <unistd.h>
+#include <fcntl.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <stdbool.h>
@@ -1316,6 +1318,12 @@  TC_INIT(const char *tablename)
 	if (sockfd < 0)
 		return NULL;
 
+	if (fcntl(sockfd, F_SETFD, FD_CLOEXEC) == -1) {
+		fprintf(stderr, "Could not set close on exec: %s\n",
+			strerror(errno));
+		abort();
+	}
+
 retry:
 	s = sizeof(info);