Message ID | 1346461915-6309-1-git-send-email-aatteka@nicira.com |
---|---|
State | Superseded |
Headers | show |
On Fri, Aug 31, 2012 at 6:11 PM, Ansis Atteka <aatteka@nicira.com> wrote: > This patch allows conntrackd to open CT Netlink sockets into a given > network namespace. Channel sockets (e.g. UDP) would still be opened into > the same namespace where conntrackd was started. > > The only binary this patch affects is conntrackd. All other binaries (e.g. > conntrack, nfct) would still operate in the same namespace where they were > started. > > To make use of this patch: > 1. create a network namespace: "ip netns add the_ns" > 2. add "NetlinkNamespace /var/run/netns/the_ns" line to the conntrackd.conf > file inside General {...} section. Wanted to provide more details about this patch and also bump it up for attention. Basically, what it does is allows conntrackd to open Conntrack Netlink sockets into a different namespace than where Channel Sockets were opened. This isolation brings benefits to: 1. security, because the channel socket (and management interface) will reside in a different namespace. They won't be exposed to the traffic that traverses the namespace; 2. flexibility, because arbitrary IP addresses could be used inside that namespace for Connection Tracking purposes. No need to worry that there might be overlapping IP addresses with the Management interface; 3. scalability w.r.t. namespaces, because all the namespaces would end up using a single management interface and IP address in the root namespace. There wouldn't be need to maintain a dedicated management interface and IP address inside every namespace. Also this patch would prepare soil for my next patches, that would ease connection state synchronization for virtualized networks even more: 1. allow single conntrackd instance to synchronize multiple namespaces; 2. add configuration dynamically to conntrackd (without restarting the daemon). Those other two patches involve quite a lot of code re-factoring and depend on this patch. So comments are welcome! Thanks, Ansis > > Signed-off-by: Ansis Atteka <aatteka@nicira.com> > --- > include/Makefile.am | 2 +- > include/conntrackd.h | 2 + > include/namespace.h | 12 ++++++ > src/Makefile.am | 3 +- > src/cthelper.c | 3 +- > src/ctnl.c | 20 +++++++-- > src/expect.c | 4 +- > src/external_inject.c | 3 +- > src/internal_bypass.c | 5 ++- > src/namespace.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++ > src/netlink.c | 6 ++- > src/read_config_lex.l | 1 + > src/read_config_yy.y | 8 +++- > src/run.c | 3 ++ > src/sync-mode.c | 4 +- > src/sync-notrack.c | 3 +- > 16 files changed, 175 insertions(+), 16 deletions(-) > create mode 100644 include/namespace.h > create mode 100644 src/namespace.c > > diff --git a/include/Makefile.am b/include/Makefile.am > index 6bd0f7f..e06fd4d 100644 > --- a/include/Makefile.am > +++ b/include/Makefile.am > @@ -6,5 +6,5 @@ noinst_HEADERS = alarm.h jhash.h cache.h linux_list.h linux_rbtree.h \ > network.h filter.h queue.h vector.h cidr.h \ > traffic_stats.h netlink.h fds.h event.h bitops.h channel.h \ > process.h origin.h internal.h external.h date.h nfct.h \ > - helper.h myct.h stack.h > + helper.h myct.h stack.h namespace.h > > diff --git a/include/conntrackd.h b/include/conntrackd.h > index 19e613c..c349d72 100644 > --- a/include/conntrackd.h > +++ b/include/conntrackd.h > @@ -94,6 +94,7 @@ struct ct_conf { > int channel_type_global; > struct channel_conf channel[MULTICHANNEL_MAX]; > struct local_conf local; /* unix socket facilities */ > + char netlink_namespace[FILENAME_MAXLEN]; > int nice; > int limit; > int refresh; > @@ -143,6 +144,7 @@ struct ct_conf { > #define STATE(x) st.x > > struct ct_general_state { > + int ns_fd; /* namespace fd for NL sockets */ > sigset_t block; > FILE *log; > FILE *stats_log; > diff --git a/include/namespace.h b/include/namespace.h > new file mode 100644 > index 0000000..668a270 > --- /dev/null > +++ b/include/namespace.h > @@ -0,0 +1,12 @@ > +#ifndef _NAMESPACE_H_ > +#define _NAMESPACE_H_ > + > +#include <libmnl/libmnl.h> > +#include <libnetfilter_conntrack/libnetfilter_conntrack.h> > + > +void init_namespaces(void); > + > +struct nfct_handle *nfct_open_ns(u_int8_t, unsigned, int); > +struct mnl_socket *mnl_socket_open_ns(int, int); > + > +#endif > diff --git a/src/Makefile.am b/src/Makefile.am > index d8074d2..60314ab 100644 > --- a/src/Makefile.am > +++ b/src/Makefile.am > @@ -39,7 +39,8 @@ conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \ > external_cache.c external_inject.c \ > internal_cache.c internal_bypass.c \ > read_config_yy.y read_config_lex.l \ > - stack.c helpers.c utils.c expect.c > + stack.c helpers.c utils.c expect.c \ > + namespace.c > > # yacc and lex generate dirty code > read_config_yy.o read_config_lex.o: AM_CFLAGS += -Wno-missing-prototypes -Wno-missing-declarations -Wno-implicit-function-declaration -Wno-nested-externs -Wno-undef -Wno-redundant-decls > diff --git a/src/cthelper.c b/src/cthelper.c > index c119869..b165900 100644 > --- a/src/cthelper.c > +++ b/src/cthelper.c > @@ -22,6 +22,7 @@ > #include "log.h" > #include "fds.h" > #include "helper.h" > +#include "namespace.h" > > #include <unistd.h> > #include <fcntl.h> > @@ -493,7 +494,7 @@ int cthelper_init(void) > return -1; > } > > - STATE_CTH(nl) = mnl_socket_open(NETLINK_NETFILTER); > + STATE_CTH(nl) = mnl_socket_open_ns(NETLINK_NETFILTER, STATE(ns_fd)); > if (STATE_CTH(nl) == NULL) { > dlog(LOG_ERR, "cannot open nfq socket"); > return -1; > diff --git a/src/ctnl.c b/src/ctnl.c > index bb54727..65784c3 100644 > --- a/src/ctnl.c > +++ b/src/ctnl.c > @@ -29,6 +29,7 @@ > #include "origin.h" > #include "date.h" > #include "internal.h" > +#include "namespace.h" > > #include <errno.h> > #include <signal.h> > @@ -399,6 +400,17 @@ static void poll_cb(void *data) > > int ctnl_init(void) > { > + if (CONFIG(netlink_namespace)[0]) { > + STATE(ns_fd) = open(CONFIG(netlink_namespace), O_RDONLY); > + if (STATE(ns_fd) == -1) { > + dlog(LOG_ERR, "could not open network namespace %s: %s", > + CONFIG(netlink_namespace), strerror(errno)); > + return -1; > + } > + } else { > + STATE(ns_fd) = -1; > + } > + > if (CONFIG(flags) & CTD_STATS_MODE) > STATE(mode) = &stats_mode; > else if (CONFIG(flags) & CTD_SYNC_MODE) > @@ -417,7 +429,7 @@ int ctnl_init(void) > } > > /* resynchronize (like 'dump' socket) but it also purges old entries */ > - STATE(resync) = nfct_open(CONFIG(netlink).subsys_id, 0); > + STATE(resync) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (STATE(resync)== NULL) { > dlog(LOG_ERR, "can't open netlink handler: %s", > strerror(errno)); > @@ -438,7 +450,7 @@ int ctnl_init(void) > fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK); > > if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { > - STATE(dump) = nfct_open(CONFIG(netlink).subsys_id, 0); > + STATE(dump) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (STATE(dump) == NULL) { > dlog(LOG_ERR, "can't open netlink handler: %s", > strerror(errno)); > @@ -467,7 +479,7 @@ int ctnl_init(void) > } > } > > - STATE(get) = nfct_open(CONFIG(netlink).subsys_id, 0); > + STATE(get) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (STATE(get) == NULL) { > dlog(LOG_ERR, "can't open netlink handler: %s", > strerror(errno)); > @@ -481,7 +493,7 @@ int ctnl_init(void) > exp_get_handler, NULL); > } > > - STATE(flush) = nfct_open(CONFIG(netlink).subsys_id, 0); > + STATE(flush) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (STATE(flush) == NULL) { > dlog(LOG_ERR, "cannot open flusher handler"); > return -1; > diff --git a/src/expect.c b/src/expect.c > index 6069770..bedec2c 100644 > --- a/src/expect.c > +++ b/src/expect.c > @@ -8,7 +8,9 @@ > * This code has been sponsored by Vyatta Inc. <http://www.vyatta.com> > */ > > +#include "conntrackd.h" > #include "helper.h" > +#include "namespace.h" > > #include <stdio.h> > #include <string.h> > @@ -165,7 +167,7 @@ static int cthelper_expect_cmd(struct nf_expect *exp, int cmd) > int ret; > struct nfct_handle *h; > > - h = nfct_open(EXPECT, 0); > + h = nfct_open_ns(EXPECT, 0, STATE(ns_fd)); > if (!h) > return -1; > > diff --git a/src/external_inject.c b/src/external_inject.c > index 0ad3478..5a6680b 100644 > --- a/src/external_inject.c > +++ b/src/external_inject.c > @@ -22,6 +22,7 @@ > #include "cache.h" > #include "origin.h" > #include "external.h" > +#include "namespace.h" > #include "netlink.h" > > #include <libnetfilter_conntrack/libnetfilter_conntrack.h> > @@ -42,7 +43,7 @@ struct { > static int external_inject_init(void) > { > /* handler to directly inject conntracks into kernel-space */ > - inject = nfct_open(CONFIG(netlink).subsys_id, 0); > + inject = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (inject == NULL) { > dlog(LOG_ERR, "can't open netlink handler: %s", > strerror(errno)); > diff --git a/src/internal_bypass.c b/src/internal_bypass.c > index 1194339..520c55d 100644 > --- a/src/internal_bypass.c > +++ b/src/internal_bypass.c > @@ -16,6 +16,7 @@ > #include "netlink.h" > #include "network.h" > #include "origin.h" > +#include "namespace.h" > > static int internal_bypass_init(void) > { > @@ -52,7 +53,7 @@ static void internal_bypass_ct_dump(int fd, int type) > u_int32_t family = AF_UNSPEC; > int ret; > > - h = nfct_open(CONFIG(netlink).subsys_id, 0); > + h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (h == NULL) { > dlog(LOG_ERR, "can't allocate memory for the internal cache"); > return; > @@ -183,7 +184,7 @@ static void internal_bypass_exp_dump(int fd, int type) > u_int32_t family = AF_UNSPEC; > int ret; > > - h = nfct_open(CONFIG(netlink).subsys_id, 0); > + h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (h == NULL) { > dlog(LOG_ERR, "can't allocate memory for the internal cache"); > return; > diff --git a/src/namespace.c b/src/namespace.c > new file mode 100644 > index 0000000..03db222 > --- /dev/null > +++ b/src/namespace.c > @@ -0,0 +1,112 @@ > +/* > + * Copyright (C) 2012 Nicira, Inc. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. > + * > + * Authors: > + * Ansis Atteka <aatteka@nicira.com> > + */ > +#define _GNU_SOURCE > + > +#include "namespace.h" > + > +#include <fcntl.h> > +#include <sched.h> > +#include <sys/syscall.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <unistd.h> > + > +#include "conntrackd.h" > +#include "log.h" > + > +#ifndef CLONE_NEWNET > +#define CLONE_NEWNET 0x40000000 > +#endif > + > +#ifndef __NR_setns > +#ifdef __x86_64 > +#define __NR_setns 308 > +#endif > +#ifdef __i386 > +#define __NR_setns 346 > +#endif > +#endif > + > +#ifdef __NR_setns > + > +static int root_fd = -1; > + > +void init_namespaces(void) { > + root_fd = open("/proc/self/ns/net", O_RDONLY); > + if (root_fd == -1) { > + dlog(LOG_WARNING, "could not open current namespace"); > + } > +} > + > +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions, > + int ns_fd) { > + struct nfct_handle *handle = NULL; > + > + if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) { > + dlog(LOG_ERR, "could not switch between namespaces"); > + } else { > + handle = nfct_open(subsys_id, subscriptions); > + if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) { > + dlog(LOG_ERR, "fatal: could not switch back to main namespace"); > + } > + } > + return handle; > +} > + > +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) { > + struct mnl_socket *handle = NULL; > + > + if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) { > + dlog(LOG_ERR, "could not switch between namespaces"); > + } else { > + handle = mnl_socket_open(bus); > + if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) { > + dlog(LOG_ERR, "fatal: could not switch back to main namespace"); > + } > + } > + return handle; > +} > + > +#else > + > +void init_namespaces(void) { > +} > + > +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions, > + int ns_fd) { > + if (ns_fd == -1) { > + return nfct_open(subsys_id, subscriptions); > + } else { > + dlog(LOG_ERR, "network namespaces are not supported on this system"); > + return NULL; > + } > +} > + > +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) { > + if (ns_fd == -1) { > + return mnl_socket_open(bus); > + } else { > + dlog(LOG_ERR, "network namespaces are not supported on this system"); > + return NULL; > + } > +} > + > +#endif > diff --git a/src/netlink.c b/src/netlink.c > index bd38d99..c71b463 100644 > --- a/src/netlink.c > +++ b/src/netlink.c > @@ -21,6 +21,7 @@ > #include "conntrackd.h" > #include "filter.h" > #include "log.h" > +#include "namespace.h" > > #include <string.h> > #include <errno.h> > @@ -33,7 +34,8 @@ struct nfct_handle *nl_init_event_handler(void) > { > struct nfct_handle *h; > > - h = nfct_open(CONFIG(netlink).subsys_id, CONFIG(netlink).groups); > + h = nfct_open_ns(CONFIG(netlink).subsys_id, CONFIG(netlink).groups, > + STATE(ns_fd)); > if (h == NULL) > return NULL; > > @@ -175,7 +177,7 @@ int nl_flush_conntrack_table_selective(void) > struct nfct_handle *h; > int ret; > > - h = nfct_open(CONNTRACK, 0); > + h = nfct_open_ns(CONNTRACK, 0, STATE(ns_fd)); > if (h == NULL) { > dlog(LOG_ERR, "cannot open handle"); > return -1; > diff --git a/src/read_config_lex.l b/src/read_config_lex.l > index 31fa32e..7a09c52 100644 > --- a/src/read_config_lex.l > +++ b/src/read_config_lex.l > @@ -91,6 +91,7 @@ notrack [N|n][O|o][T|t][R|r][A|a][C|c][K|k] > "SocketBufferSizeMaxGrowth" { return T_BUFFER_SIZE_MAX_GROWN; /* alias */ } > "NetlinkBufferSize" { return T_BUFFER_SIZE; } > "NetlinkBufferSizeMaxGrowth" { return T_BUFFER_SIZE_MAX_GROWN; } > +"NetlinkNamespace" { return T_NETLINK_NAMESPACE; } > "Mode" { return T_SYNC_MODE; } > "ListenTo" { return T_LISTEN_TO; } > "Family" { return T_FAMILY; } > diff --git a/src/read_config_yy.y b/src/read_config_yy.y > index c9235d3..b0985e7 100644 > --- a/src/read_config_yy.y > +++ b/src/read_config_yy.y > @@ -87,7 +87,7 @@ enum { > %token T_DISABLE_INTERNAL_CACHE T_DISABLE_EXTERNAL_CACHE T_ERROR_QUEUE_LENGTH > %token T_OPTIONS T_TCP_WINDOW_TRACKING T_EXPECT_SYNC > %token T_HELPER T_HELPER_QUEUE_NUM T_HELPER_POLICY T_HELPER_EXPECT_MAX > -%token T_HELPER_EXPECT_TIMEOUT > +%token T_HELPER_EXPECT_TIMEOUT T_NETLINK_NAMESPACE > > %token <string> T_IP T_PATH_VAL > %token <val> T_NUMBER > @@ -1113,6 +1113,7 @@ general_line: hashsize > | unix_line > | netlink_buffer_size > | netlink_buffer_size_max_grown > + | netlink_namespace > | family > | event_iterations_limit > | poll_secs > @@ -1158,6 +1159,11 @@ netlink_events_reliable : T_NETLINK_EVENTS_RELIABLE T_OFF > conf.netlink.events_reliable = 0; > }; > > +netlink_namespace : T_NETLINK_NAMESPACE T_PATH_VAL > +{ > + strncpy(conf.netlink_namespace, $2, FILENAME_MAXLEN); > +}; > + > nice : T_NICE T_SIGNED_NUMBER > { > conf.nice = $2; > diff --git a/src/run.c b/src/run.c > index 3337694..7aa5032 100644 > --- a/src/run.c > +++ b/src/run.c > @@ -30,6 +30,7 @@ > #include "origin.h" > #include "date.h" > #include "internal.h" > +#include "namespace.h" > > #include <errno.h> > #include <signal.h> > @@ -252,6 +253,8 @@ init(void) > return -1; > > /* Initialization */ > + init_namespaces(); > + > if (CONFIG(flags) & (CTD_SYNC_MODE | CTD_STATS_MODE)) > if (ctnl_init() < 0) > return -1; > diff --git a/src/sync-mode.c b/src/sync-mode.c > index e69ecfe..de58b8c 100644 > --- a/src/sync-mode.c > +++ b/src/sync-mode.c > @@ -31,6 +31,7 @@ > #include "origin.h" > #include "internal.h" > #include "external.h" > +#include "namespace.h" > > #include <errno.h> > #include <unistd.h> > @@ -451,7 +452,8 @@ static int init_sync(void) > tx_queue_cb, NULL, STATE(fds)) == -1) > return -1; > > - STATE_SYNC(commit).h = nfct_open(CONFIG(netlink).subsys_id, 0); > + STATE_SYNC(commit).h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, > + STATE(ns_fd)); > if (STATE_SYNC(commit).h == NULL) { > dlog(LOG_ERR, "can't create handler to commit"); > return -1; > diff --git a/src/sync-notrack.c b/src/sync-notrack.c > index a7df4e7..d8010fb 100644 > --- a/src/sync-notrack.c > +++ b/src/sync-notrack.c > @@ -24,6 +24,7 @@ > #include "log.h" > #include "cache.h" > #include "fds.h" > +#include "namespace.h" > > #include <string.h> > > @@ -102,7 +103,7 @@ static void kernel_resync(void) > u_int32_t family = AF_UNSPEC; > int ret; > > - h = nfct_open(CONFIG(netlink).subsys_id, 0); > + h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (h == NULL) { > dlog(LOG_ERR, "can't allocate memory for the internal cache"); > return; > -- > 1.7.9.5 > -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Ansis, On Fri, Aug 31, 2012 at 06:11:55PM -0700, Ansis Atteka wrote: > This patch allows conntrackd to open CT Netlink sockets into a given > network namespace. Channel sockets (e.g. UDP) would still be opened into > the same namespace where conntrackd was started. > > The only binary this patch affects is conntrackd. All other binaries (e.g. > conntrack, nfct) would still operate in the same namespace where they were > started. > > To make use of this patch: > 1. create a network namespace: "ip netns add the_ns" > 2. add "NetlinkNamespace /var/run/netns/the_ns" line to the conntrackd.conf > file inside General {...} section. > > Signed-off-by: Ansis Atteka <aatteka@nicira.com> > --- > include/Makefile.am | 2 +- > include/conntrackd.h | 2 + > include/namespace.h | 12 ++++++ > src/Makefile.am | 3 +- > src/cthelper.c | 3 +- > src/ctnl.c | 20 +++++++-- > src/expect.c | 4 +- > src/external_inject.c | 3 +- > src/internal_bypass.c | 5 ++- > src/namespace.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++ > src/netlink.c | 6 ++- > src/read_config_lex.l | 1 + > src/read_config_yy.y | 8 +++- > src/run.c | 3 ++ > src/sync-mode.c | 4 +- > src/sync-notrack.c | 3 +- > 16 files changed, 175 insertions(+), 16 deletions(-) > create mode 100644 include/namespace.h > create mode 100644 src/namespace.c > > diff --git a/include/Makefile.am b/include/Makefile.am > index 6bd0f7f..e06fd4d 100644 > --- a/include/Makefile.am > +++ b/include/Makefile.am > @@ -6,5 +6,5 @@ noinst_HEADERS = alarm.h jhash.h cache.h linux_list.h linux_rbtree.h \ > network.h filter.h queue.h vector.h cidr.h \ > traffic_stats.h netlink.h fds.h event.h bitops.h channel.h \ > process.h origin.h internal.h external.h date.h nfct.h \ > - helper.h myct.h stack.h > + helper.h myct.h stack.h namespace.h > > diff --git a/include/conntrackd.h b/include/conntrackd.h > index 19e613c..c349d72 100644 > --- a/include/conntrackd.h > +++ b/include/conntrackd.h > @@ -94,6 +94,7 @@ struct ct_conf { > int channel_type_global; > struct channel_conf channel[MULTICHANNEL_MAX]; > struct local_conf local; /* unix socket facilities */ > + char netlink_namespace[FILENAME_MAXLEN]; > int nice; > int limit; > int refresh; > @@ -143,6 +144,7 @@ struct ct_conf { > #define STATE(x) st.x > > struct ct_general_state { > + int ns_fd; /* namespace fd for NL sockets */ We use the same coding style of the Linux kernel. Please, break lines at 80 chars. > sigset_t block; > FILE *log; > FILE *stats_log; > diff --git a/include/namespace.h b/include/namespace.h > new file mode 100644 > index 0000000..668a270 > --- /dev/null > +++ b/include/namespace.h > @@ -0,0 +1,12 @@ > +#ifndef _NAMESPACE_H_ > +#define _NAMESPACE_H_ > + > +#include <libmnl/libmnl.h> > +#include <libnetfilter_conntrack/libnetfilter_conntrack.h> > + > +void init_namespaces(void); > + > +struct nfct_handle *nfct_open_ns(u_int8_t, unsigned, int); > +struct mnl_socket *mnl_socket_open_ns(int, int); > + > +#endif > diff --git a/src/Makefile.am b/src/Makefile.am > index d8074d2..60314ab 100644 > --- a/src/Makefile.am > +++ b/src/Makefile.am > @@ -39,7 +39,8 @@ conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \ > external_cache.c external_inject.c \ > internal_cache.c internal_bypass.c \ > read_config_yy.y read_config_lex.l \ > - stack.c helpers.c utils.c expect.c > + stack.c helpers.c utils.c expect.c \ > + namespace.c > > # yacc and lex generate dirty code > read_config_yy.o read_config_lex.o: AM_CFLAGS += -Wno-missing-prototypes -Wno-missing-declarations -Wno-implicit-function-declaration -Wno-nested-externs -Wno-undef -Wno-redundant-decls > diff --git a/src/cthelper.c b/src/cthelper.c > index c119869..b165900 100644 > --- a/src/cthelper.c > +++ b/src/cthelper.c > @@ -22,6 +22,7 @@ > #include "log.h" > #include "fds.h" > #include "helper.h" > +#include "namespace.h" > > #include <unistd.h> > #include <fcntl.h> > @@ -493,7 +494,7 @@ int cthelper_init(void) > return -1; > } > > - STATE_CTH(nl) = mnl_socket_open(NETLINK_NETFILTER); > + STATE_CTH(nl) = mnl_socket_open_ns(NETLINK_NETFILTER, STATE(ns_fd)); > if (STATE_CTH(nl) == NULL) { > dlog(LOG_ERR, "cannot open nfq socket"); > return -1; > diff --git a/src/ctnl.c b/src/ctnl.c > index bb54727..65784c3 100644 > --- a/src/ctnl.c > +++ b/src/ctnl.c > @@ -29,6 +29,7 @@ > #include "origin.h" > #include "date.h" > #include "internal.h" > +#include "namespace.h" > > #include <errno.h> > #include <signal.h> > @@ -399,6 +400,17 @@ static void poll_cb(void *data) > > int ctnl_init(void) > { > + if (CONFIG(netlink_namespace)[0]) { > + STATE(ns_fd) = open(CONFIG(netlink_namespace), O_RDONLY); > + if (STATE(ns_fd) == -1) { > + dlog(LOG_ERR, "could not open network namespace %s: %s", > + CONFIG(netlink_namespace), strerror(errno)); > + return -1; > + } > + } else { > + STATE(ns_fd) = -1; > + } > + > if (CONFIG(flags) & CTD_STATS_MODE) > STATE(mode) = &stats_mode; > else if (CONFIG(flags) & CTD_SYNC_MODE) > @@ -417,7 +429,7 @@ int ctnl_init(void) > } > > /* resynchronize (like 'dump' socket) but it also purges old entries */ > - STATE(resync) = nfct_open(CONFIG(netlink).subsys_id, 0); > + STATE(resync) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (STATE(resync)== NULL) { > dlog(LOG_ERR, "can't open netlink handler: %s", > strerror(errno)); > @@ -438,7 +450,7 @@ int ctnl_init(void) > fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK); > > if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { > - STATE(dump) = nfct_open(CONFIG(netlink).subsys_id, 0); > + STATE(dump) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (STATE(dump) == NULL) { > dlog(LOG_ERR, "can't open netlink handler: %s", > strerror(errno)); > @@ -467,7 +479,7 @@ int ctnl_init(void) > } > } > > - STATE(get) = nfct_open(CONFIG(netlink).subsys_id, 0); > + STATE(get) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (STATE(get) == NULL) { > dlog(LOG_ERR, "can't open netlink handler: %s", > strerror(errno)); > @@ -481,7 +493,7 @@ int ctnl_init(void) > exp_get_handler, NULL); > } > > - STATE(flush) = nfct_open(CONFIG(netlink).subsys_id, 0); > + STATE(flush) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (STATE(flush) == NULL) { > dlog(LOG_ERR, "cannot open flusher handler"); > return -1; > diff --git a/src/expect.c b/src/expect.c > index 6069770..bedec2c 100644 > --- a/src/expect.c > +++ b/src/expect.c > @@ -8,7 +8,9 @@ > * This code has been sponsored by Vyatta Inc. <http://www.vyatta.com> > */ > > +#include "conntrackd.h" > #include "helper.h" > +#include "namespace.h" > > #include <stdio.h> > #include <string.h> > @@ -165,7 +167,7 @@ static int cthelper_expect_cmd(struct nf_expect *exp, int cmd) > int ret; > struct nfct_handle *h; > > - h = nfct_open(EXPECT, 0); > + h = nfct_open_ns(EXPECT, 0, STATE(ns_fd)); > if (!h) > return -1; > > diff --git a/src/external_inject.c b/src/external_inject.c > index 0ad3478..5a6680b 100644 > --- a/src/external_inject.c > +++ b/src/external_inject.c > @@ -22,6 +22,7 @@ > #include "cache.h" > #include "origin.h" > #include "external.h" > +#include "namespace.h" > #include "netlink.h" > > #include <libnetfilter_conntrack/libnetfilter_conntrack.h> > @@ -42,7 +43,7 @@ struct { > static int external_inject_init(void) > { > /* handler to directly inject conntracks into kernel-space */ > - inject = nfct_open(CONFIG(netlink).subsys_id, 0); > + inject = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (inject == NULL) { > dlog(LOG_ERR, "can't open netlink handler: %s", > strerror(errno)); > diff --git a/src/internal_bypass.c b/src/internal_bypass.c > index 1194339..520c55d 100644 > --- a/src/internal_bypass.c > +++ b/src/internal_bypass.c > @@ -16,6 +16,7 @@ > #include "netlink.h" > #include "network.h" > #include "origin.h" > +#include "namespace.h" > > static int internal_bypass_init(void) > { > @@ -52,7 +53,7 @@ static void internal_bypass_ct_dump(int fd, int type) > u_int32_t family = AF_UNSPEC; > int ret; > > - h = nfct_open(CONFIG(netlink).subsys_id, 0); > + h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (h == NULL) { > dlog(LOG_ERR, "can't allocate memory for the internal cache"); > return; > @@ -183,7 +184,7 @@ static void internal_bypass_exp_dump(int fd, int type) > u_int32_t family = AF_UNSPEC; > int ret; > > - h = nfct_open(CONFIG(netlink).subsys_id, 0); > + h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (h == NULL) { > dlog(LOG_ERR, "can't allocate memory for the internal cache"); > return; > diff --git a/src/namespace.c b/src/namespace.c > new file mode 100644 > index 0000000..03db222 > --- /dev/null > +++ b/src/namespace.c > @@ -0,0 +1,112 @@ > +/* > + * Copyright (C) 2012 Nicira, Inc. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. > + * > + * Authors: > + * Ansis Atteka <aatteka@nicira.com> > + */ > +#define _GNU_SOURCE > + > +#include "namespace.h" > + > +#include <fcntl.h> > +#include <sched.h> > +#include <sys/syscall.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <unistd.h> > + > +#include "conntrackd.h" > +#include "log.h" > + > +#ifndef CLONE_NEWNET > +#define CLONE_NEWNET 0x40000000 > +#endif > + > +#ifndef __NR_setns > +#ifdef __x86_64 > +#define __NR_setns 308 > +#endif > +#ifdef __i386 > +#define __NR_setns 346 > +#endif > +#endif I can see there's some setns wrapper since glibc 2.14 that will make this portable. What I would try is to check in configure.ac if setns() exists, if so use it. Otherwise, check for __NR_setns, if so define setns yourself in this new namespace.c file. If neither setns() nor __NR_setns exists, then define setns() to return success and print display error and exist if NetlinkNamespace is used. > + > +#ifdef __NR_setns > + > +static int root_fd = -1; This root_fd should go to STATE(ns).root_fd: struct ct_state { ... struct { int cur_fd; /* current namespace */ int root_fd; /* root namespace */ } ns; > + > +void init_namespaces(void) { > + root_fd = open("/proc/self/ns/net", O_RDONLY); > + if (root_fd == -1) { > + dlog(LOG_WARNING, "could not open current namespace"); > + } > +} > + > +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions, > + int ns_fd) { > + struct nfct_handle *handle = NULL; > + > + if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) { > + dlog(LOG_ERR, "could not switch between namespaces"); > + } else { > + handle = nfct_open(subsys_id, subscriptions); > + if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) { > + dlog(LOG_ERR, "fatal: could not switch back to main namespace"); > + } > + } Oh, this code above is confusing, please make it more readable, like this: if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) { dlog(LOG_ERR, "could not switch between namespaces"); return NULL; } handle = nfct_open(subsys_id, subscriptions); if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) { dlog(LOG_ERR, "could not switch back to main namespace"); } And you encapsulate part of that code in some new function like: int ctd_change_namespace(int fd) { if (fd != -1) return 0; if (syscall(__NR_setns, fd, CLONE_NEWNET)) { dlog(LOG_ERR, "could not switch back to main namespace"); return -1; } return 0; } That you can reuse over and over again. > + return handle; > +} > + > +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) { > + struct mnl_socket *handle = NULL; > + > + if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) { > + dlog(LOG_ERR, "could not switch between namespaces"); > + } else { > + handle = mnl_socket_open(bus); > + if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) { > + dlog(LOG_ERR, "fatal: could not switch back to main namespace"); > + } > + } > + return handle; Same thing for this function above. > +} > + > +#else > + > +void init_namespaces(void) { > +} > + > +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions, > + int ns_fd) { > + if (ns_fd == -1) { > + return nfct_open(subsys_id, subscriptions); > + } else { > + dlog(LOG_ERR, "network namespaces are not supported on this system"); > + return NULL; > + } > +} > + > +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) { > + if (ns_fd == -1) { > + return mnl_socket_open(bus); > + } else { > + dlog(LOG_ERR, "network namespaces are not supported on this system"); > + return NULL; > + } > +} You can remove this conditional code above since the #else if you do something like: int ctd_change_namespace(int fd) { #ifdef SETNS_EXISTS if (fd != -1) return 0; if (syscall(__NR_setns, fd, CLONE_NEWNET)) { dlog(LOG_ERR, "could not switch back to main namespace"); return -1; } #endif return 0; } That's all by now. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi, On Wed, Sep 05, 2012 at 06:36:29PM -0700, Ansis Atteka wrote: > On Fri, Aug 31, 2012 at 6:11 PM, Ansis Atteka <aatteka@nicira.com> wrote: > > This patch allows conntrackd to open CT Netlink sockets into a given > > network namespace. Channel sockets (e.g. UDP) would still be opened into > > the same namespace where conntrackd was started. > > > > The only binary this patch affects is conntrackd. All other binaries (e.g. > > conntrack, nfct) would still operate in the same namespace where they were > > started. > > > > To make use of this patch: > > 1. create a network namespace: "ip netns add the_ns" > > 2. add "NetlinkNamespace /var/run/netns/the_ns" line to the conntrackd.conf > > file inside General {...} section. > > Wanted to provide more details about this patch and also bump it up > for attention. > > Basically, what it does is allows conntrackd to open Conntrack Netlink > sockets into a different namespace than where Channel Sockets were > opened. I see. > This isolation brings benefits to: > 1. security, because the channel socket (and management interface) will > reside in a different namespace. They won't be exposed to the traffic > that traverses the namespace; > 2. flexibility, because arbitrary IP addresses could be used inside that > namespace for Connection Tracking purposes. No need to worry that > there might be overlapping IP addresses with the Management interface; I don't understand this second benefit, could you develop the idea a bit more? > 3. scalability w.r.t. namespaces, because all the namespaces would > end up using a single management interface and IP address in the root > namespace. There wouldn't be need to maintain a dedicated > management interface and IP address inside every namespace. > > Also this patch would prepare soil for my next patches, that would > ease connection state synchronization for virtualized networks even more: > 1. allow single conntrackd instance to synchronize multiple namespaces; > 2. add configuration dynamically to conntrackd (without restarting > the daemon). Those seem interesting to have, definitely. My plan is to release conntrack-tools 1.4.0 by the time Linux kernel 3.6 is released since it contains a major milestone (user-space helper support), that will be quite soon. We can schedule this for some 1.6.0 release. So I can keep these namespace changes in some branch until they are merged to master. I'd start by point 2 then go back to the namespace support, but it's up to you. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Sep 6, 2012 at 10:17 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote: > Hi, > > On Wed, Sep 05, 2012 at 06:36:29PM -0700, Ansis Atteka wrote: >> On Fri, Aug 31, 2012 at 6:11 PM, Ansis Atteka <aatteka@nicira.com> wrote: >> > This patch allows conntrackd to open CT Netlink sockets into a given >> > network namespace. Channel sockets (e.g. UDP) would still be opened into >> > the same namespace where conntrackd was started. >> > >> > The only binary this patch affects is conntrackd. All other binaries (e.g. >> > conntrack, nfct) would still operate in the same namespace where they were >> > started. >> > >> > To make use of this patch: >> > 1. create a network namespace: "ip netns add the_ns" >> > 2. add "NetlinkNamespace /var/run/netns/the_ns" line to the conntrackd.conf >> > file inside General {...} section. >> >> Wanted to provide more details about this patch and also bump it up >> for attention. >> >> Basically, what it does is allows conntrackd to open Conntrack Netlink >> sockets into a different namespace than where Channel Sockets were >> opened. > > I see. > >> This isolation brings benefits to: >> 1. security, because the channel socket (and management interface) will >> reside in a different namespace. They won't be exposed to the traffic >> that traverses the namespace; >> 2. flexibility, because arbitrary IP addresses could be used inside that >> namespace for Connection Tracking purposes. No need to worry that >> there might be overlapping IP addresses with the Management interface; > > I don't understand this second benefit, could you develop the idea a > bit more? My point was that network administrator could assign arbitrary IP addresses and routing rules inside the namespace, without worrying that they might conflict with management IP address and/or routes. I guess this is not that important in static setup where IP addresses are configured only once and left the way they are. > >> 3. scalability w.r.t. namespaces, because all the namespaces would >> end up using a single management interface and IP address in the root >> namespace. There wouldn't be need to maintain a dedicated >> management interface and IP address inside every namespace. >> >> Also this patch would prepare soil for my next patches, that would >> ease connection state synchronization for virtualized networks even more: >> 1. allow single conntrackd instance to synchronize multiple namespaces; >> 2. add configuration dynamically to conntrackd (without restarting >> the daemon). > > Those seem interesting to have, definitely. > > My plan is to release conntrack-tools 1.4.0 by the time Linux kernel > 3.6 is released since it contains a major milestone (user-space > helper support), that will be quite soon. > > We can schedule this for some 1.6.0 release. So I can keep these > namespace changes in some branch until they are merged to master. > > I'd start by point 2 then go back to the namespace support, but it's > up to you. Yes, that seems to be the right sequence to do that. Thanks, Ansis -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Sep 6, 2012 at 10:02 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote: > Hi Ansis, > > On Fri, Aug 31, 2012 at 06:11:55PM -0700, Ansis Atteka wrote: >> This patch allows conntrackd to open CT Netlink sockets into a given >> network namespace. Channel sockets (e.g. UDP) would still be opened into >> the same namespace where conntrackd was started. >> >> The only binary this patch affects is conntrackd. All other binaries (e.g. >> conntrack, nfct) would still operate in the same namespace where they were >> started. >> >> To make use of this patch: >> 1. create a network namespace: "ip netns add the_ns" >> 2. add "NetlinkNamespace /var/run/netns/the_ns" line to the conntrackd.conf >> file inside General {...} section. >> >> Signed-off-by: Ansis Atteka <aatteka@nicira.com> >> --- >> include/Makefile.am | 2 +- >> include/conntrackd.h | 2 + >> include/namespace.h | 12 ++++++ >> src/Makefile.am | 3 +- >> src/cthelper.c | 3 +- >> src/ctnl.c | 20 +++++++-- >> src/expect.c | 4 +- >> src/external_inject.c | 3 +- >> src/internal_bypass.c | 5 ++- >> src/namespace.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++ >> src/netlink.c | 6 ++- >> src/read_config_lex.l | 1 + >> src/read_config_yy.y | 8 +++- >> src/run.c | 3 ++ >> src/sync-mode.c | 4 +- >> src/sync-notrack.c | 3 +- >> 16 files changed, 175 insertions(+), 16 deletions(-) >> create mode 100644 include/namespace.h >> create mode 100644 src/namespace.c >> >> diff --git a/include/Makefile.am b/include/Makefile.am >> index 6bd0f7f..e06fd4d 100644 >> --- a/include/Makefile.am >> +++ b/include/Makefile.am >> @@ -6,5 +6,5 @@ noinst_HEADERS = alarm.h jhash.h cache.h linux_list.h linux_rbtree.h \ >> network.h filter.h queue.h vector.h cidr.h \ >> traffic_stats.h netlink.h fds.h event.h bitops.h channel.h \ >> process.h origin.h internal.h external.h date.h nfct.h \ >> - helper.h myct.h stack.h >> + helper.h myct.h stack.h namespace.h >> >> diff --git a/include/conntrackd.h b/include/conntrackd.h >> index 19e613c..c349d72 100644 >> --- a/include/conntrackd.h >> +++ b/include/conntrackd.h >> @@ -94,6 +94,7 @@ struct ct_conf { >> int channel_type_global; >> struct channel_conf channel[MULTICHANNEL_MAX]; >> struct local_conf local; /* unix socket facilities */ >> + char netlink_namespace[FILENAME_MAXLEN]; >> int nice; >> int limit; >> int refresh; >> @@ -143,6 +144,7 @@ struct ct_conf { >> #define STATE(x) st.x >> >> struct ct_general_state { >> + int ns_fd; /* namespace fd for NL sockets */ > > We use the same coding style of the Linux kernel. Please, break lines > at 80 chars. Ok. Forgot that tabs are treated as 8 characters instead of 4. > >> sigset_t block; >> FILE *log; >> FILE *stats_log; >> diff --git a/include/namespace.h b/include/namespace.h >> new file mode 100644 >> index 0000000..668a270 >> --- /dev/null >> +++ b/include/namespace.h >> @@ -0,0 +1,12 @@ >> +#ifndef _NAMESPACE_H_ >> +#define _NAMESPACE_H_ >> + >> +#include <libmnl/libmnl.h> >> +#include <libnetfilter_conntrack/libnetfilter_conntrack.h> >> + >> +void init_namespaces(void); >> + >> +struct nfct_handle *nfct_open_ns(u_int8_t, unsigned, int); >> +struct mnl_socket *mnl_socket_open_ns(int, int); >> + >> +#endif >> diff --git a/src/Makefile.am b/src/Makefile.am >> index d8074d2..60314ab 100644 >> --- a/src/Makefile.am >> +++ b/src/Makefile.am >> @@ -39,7 +39,8 @@ conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \ >> external_cache.c external_inject.c \ >> internal_cache.c internal_bypass.c \ >> read_config_yy.y read_config_lex.l \ >> - stack.c helpers.c utils.c expect.c >> + stack.c helpers.c utils.c expect.c \ >> + namespace.c >> >> # yacc and lex generate dirty code >> read_config_yy.o read_config_lex.o: AM_CFLAGS += -Wno-missing-prototypes -Wno-missing-declarations -Wno-implicit-function-declaration -Wno-nested-externs -Wno-undef -Wno-redundant-decls >> diff --git a/src/cthelper.c b/src/cthelper.c >> index c119869..b165900 100644 >> --- a/src/cthelper.c >> +++ b/src/cthelper.c >> @@ -22,6 +22,7 @@ >> #include "log.h" >> #include "fds.h" >> #include "helper.h" >> +#include "namespace.h" >> >> #include <unistd.h> >> #include <fcntl.h> >> @@ -493,7 +494,7 @@ int cthelper_init(void) >> return -1; >> } >> >> - STATE_CTH(nl) = mnl_socket_open(NETLINK_NETFILTER); >> + STATE_CTH(nl) = mnl_socket_open_ns(NETLINK_NETFILTER, STATE(ns_fd)); >> if (STATE_CTH(nl) == NULL) { >> dlog(LOG_ERR, "cannot open nfq socket"); >> return -1; >> diff --git a/src/ctnl.c b/src/ctnl.c >> index bb54727..65784c3 100644 >> --- a/src/ctnl.c >> +++ b/src/ctnl.c >> @@ -29,6 +29,7 @@ >> #include "origin.h" >> #include "date.h" >> #include "internal.h" >> +#include "namespace.h" >> >> #include <errno.h> >> #include <signal.h> >> @@ -399,6 +400,17 @@ static void poll_cb(void *data) >> >> int ctnl_init(void) >> { >> + if (CONFIG(netlink_namespace)[0]) { >> + STATE(ns_fd) = open(CONFIG(netlink_namespace), O_RDONLY); >> + if (STATE(ns_fd) == -1) { >> + dlog(LOG_ERR, "could not open network namespace %s: %s", >> + CONFIG(netlink_namespace), strerror(errno)); >> + return -1; >> + } >> + } else { >> + STATE(ns_fd) = -1; >> + } >> + >> if (CONFIG(flags) & CTD_STATS_MODE) >> STATE(mode) = &stats_mode; >> else if (CONFIG(flags) & CTD_SYNC_MODE) >> @@ -417,7 +429,7 @@ int ctnl_init(void) >> } >> >> /* resynchronize (like 'dump' socket) but it also purges old entries */ >> - STATE(resync) = nfct_open(CONFIG(netlink).subsys_id, 0); >> + STATE(resync) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); >> if (STATE(resync)== NULL) { >> dlog(LOG_ERR, "can't open netlink handler: %s", >> strerror(errno)); >> @@ -438,7 +450,7 @@ int ctnl_init(void) >> fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK); >> >> if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { >> - STATE(dump) = nfct_open(CONFIG(netlink).subsys_id, 0); >> + STATE(dump) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); >> if (STATE(dump) == NULL) { >> dlog(LOG_ERR, "can't open netlink handler: %s", >> strerror(errno)); >> @@ -467,7 +479,7 @@ int ctnl_init(void) >> } >> } >> >> - STATE(get) = nfct_open(CONFIG(netlink).subsys_id, 0); >> + STATE(get) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); >> if (STATE(get) == NULL) { >> dlog(LOG_ERR, "can't open netlink handler: %s", >> strerror(errno)); >> @@ -481,7 +493,7 @@ int ctnl_init(void) >> exp_get_handler, NULL); >> } >> >> - STATE(flush) = nfct_open(CONFIG(netlink).subsys_id, 0); >> + STATE(flush) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); >> if (STATE(flush) == NULL) { >> dlog(LOG_ERR, "cannot open flusher handler"); >> return -1; >> diff --git a/src/expect.c b/src/expect.c >> index 6069770..bedec2c 100644 >> --- a/src/expect.c >> +++ b/src/expect.c >> @@ -8,7 +8,9 @@ >> * This code has been sponsored by Vyatta Inc. <http://www.vyatta.com> >> */ >> >> +#include "conntrackd.h" >> #include "helper.h" >> +#include "namespace.h" >> >> #include <stdio.h> >> #include <string.h> >> @@ -165,7 +167,7 @@ static int cthelper_expect_cmd(struct nf_expect *exp, int cmd) >> int ret; >> struct nfct_handle *h; >> >> - h = nfct_open(EXPECT, 0); >> + h = nfct_open_ns(EXPECT, 0, STATE(ns_fd)); >> if (!h) >> return -1; >> >> diff --git a/src/external_inject.c b/src/external_inject.c >> index 0ad3478..5a6680b 100644 >> --- a/src/external_inject.c >> +++ b/src/external_inject.c >> @@ -22,6 +22,7 @@ >> #include "cache.h" >> #include "origin.h" >> #include "external.h" >> +#include "namespace.h" >> #include "netlink.h" >> >> #include <libnetfilter_conntrack/libnetfilter_conntrack.h> >> @@ -42,7 +43,7 @@ struct { >> static int external_inject_init(void) >> { >> /* handler to directly inject conntracks into kernel-space */ >> - inject = nfct_open(CONFIG(netlink).subsys_id, 0); >> + inject = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); >> if (inject == NULL) { >> dlog(LOG_ERR, "can't open netlink handler: %s", >> strerror(errno)); >> diff --git a/src/internal_bypass.c b/src/internal_bypass.c >> index 1194339..520c55d 100644 >> --- a/src/internal_bypass.c >> +++ b/src/internal_bypass.c >> @@ -16,6 +16,7 @@ >> #include "netlink.h" >> #include "network.h" >> #include "origin.h" >> +#include "namespace.h" >> >> static int internal_bypass_init(void) >> { >> @@ -52,7 +53,7 @@ static void internal_bypass_ct_dump(int fd, int type) >> u_int32_t family = AF_UNSPEC; >> int ret; >> >> - h = nfct_open(CONFIG(netlink).subsys_id, 0); >> + h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); >> if (h == NULL) { >> dlog(LOG_ERR, "can't allocate memory for the internal cache"); >> return; >> @@ -183,7 +184,7 @@ static void internal_bypass_exp_dump(int fd, int type) >> u_int32_t family = AF_UNSPEC; >> int ret; >> >> - h = nfct_open(CONFIG(netlink).subsys_id, 0); >> + h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); >> if (h == NULL) { >> dlog(LOG_ERR, "can't allocate memory for the internal cache"); >> return; >> diff --git a/src/namespace.c b/src/namespace.c >> new file mode 100644 >> index 0000000..03db222 >> --- /dev/null >> +++ b/src/namespace.c >> @@ -0,0 +1,112 @@ >> +/* >> + * Copyright (C) 2012 Nicira, Inc. >> + * >> + * This program is free software; you can redistribute it and/or modify >> + * it under the terms of the GNU General Public License as published by >> + * the Free Software Foundation; either version 2 of the License, or >> + * (at your option) any later version. >> + * >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + * >> + * You should have received a copy of the GNU General Public License >> + * along with this program; if not, write to the Free Software >> + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. >> + * >> + * Authors: >> + * Ansis Atteka <aatteka@nicira.com> >> + */ >> +#define _GNU_SOURCE >> + >> +#include "namespace.h" >> + >> +#include <fcntl.h> >> +#include <sched.h> >> +#include <sys/syscall.h> >> +#include <sys/types.h> >> +#include <sys/stat.h> >> +#include <unistd.h> >> + >> +#include "conntrackd.h" >> +#include "log.h" >> + >> +#ifndef CLONE_NEWNET >> +#define CLONE_NEWNET 0x40000000 >> +#endif >> + >> +#ifndef __NR_setns >> +#ifdef __x86_64 >> +#define __NR_setns 308 >> +#endif >> +#ifdef __i386 >> +#define __NR_setns 346 >> +#endif >> +#endif > > I can see there's some setns wrapper since glibc 2.14 that will make > this portable. ok > > What I would try is to check in configure.ac if setns() exists, if so > use it. Otherwise, check for __NR_setns, if so define setns yourself > in this new namespace.c file. If neither setns() nor __NR_setns > exists, then define setns() to return success and print display error > and exist if NetlinkNamespace is used. ok > >> + >> +#ifdef __NR_setns >> + >> +static int root_fd = -1; > > This root_fd should go to STATE(ns).root_fd: Right, it makes more sense to put it inside ct_state for now. Once conntrackd will become multi-namespace aware, we will have to move it somewhere else, because all the namespaces(i.e. ct_states) will use the same root_fd. > > struct ct_state { > ... > struct { > int cur_fd; /* current namespace */ > int root_fd; /* root namespace */ > } ns; > >> + >> +void init_namespaces(void) { >> + root_fd = open("/proc/self/ns/net", O_RDONLY); >> + if (root_fd == -1) { >> + dlog(LOG_WARNING, "could not open current namespace"); >> + } >> +} >> + >> +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions, >> + int ns_fd) { >> + struct nfct_handle *handle = NULL; >> + >> + if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) { >> + dlog(LOG_ERR, "could not switch between namespaces"); >> + } else { >> + handle = nfct_open(subsys_id, subscriptions); >> + if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) { >> + dlog(LOG_ERR, "fatal: could not switch back to main namespace"); >> + } >> + } > > Oh, this code above is confusing, please make it more readable, like > this: > > if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) { > dlog(LOG_ERR, "could not switch between namespaces"); > return NULL; > } > > handle = nfct_open(subsys_id, subscriptions); > if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) { > dlog(LOG_ERR, "could not switch back to main namespace"); > } > > And you encapsulate part of that code in some new function like: > > int ctd_change_namespace(int fd) > { > if (fd != -1) > return 0; > > if (syscall(__NR_setns, fd, CLONE_NEWNET)) { > dlog(LOG_ERR, "could not switch back to main namespace"); > return -1; > } > > return 0; > } > > That you can reuse over and over again. ok > >> + return handle; >> +} >> + >> +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) { >> + struct mnl_socket *handle = NULL; >> + >> + if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) { >> + dlog(LOG_ERR, "could not switch between namespaces"); >> + } else { >> + handle = mnl_socket_open(bus); >> + if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) { >> + dlog(LOG_ERR, "fatal: could not switch back to main namespace"); >> + } >> + } >> + return handle; > > Same thing for this function above. ok > >> +} >> + >> +#else >> + >> +void init_namespaces(void) { >> +} >> + >> +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions, >> + int ns_fd) { >> + if (ns_fd == -1) { >> + return nfct_open(subsys_id, subscriptions); >> + } else { >> + dlog(LOG_ERR, "network namespaces are not supported on this system"); >> + return NULL; >> + } >> +} >> + >> +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) { >> + if (ns_fd == -1) { >> + return mnl_socket_open(bus); >> + } else { >> + dlog(LOG_ERR, "network namespaces are not supported on this system"); >> + return NULL; >> + } >> +} > > You can remove this conditional code above since the #else if you do > something like: > > int ctd_change_namespace(int fd) > { > #ifdef SETNS_EXISTS > if (fd != -1) > return 0; > > if (syscall(__NR_setns, fd, CLONE_NEWNET)) { > dlog(LOG_ERR, "could not switch back to main namespace"); > return -1; > } > #endif > return 0; > } > > That's all by now. Thanks for review! I will send updated patch soon. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Sep 10, 2012 at 04:24:49PM -0700, Ansis Atteka wrote: [...] > > > >> + > >> +#ifdef __NR_setns > >> + > >> +static int root_fd = -1; > > > > This root_fd should go to STATE(ns).root_fd: > > Right, it makes more sense to put it inside ct_state for > now. > > Once conntrackd will become multi-namespace > aware, we will have to move it somewhere else, > because all the namespaces(i.e. ct_states) will > use the same root_fd. Can you develop a bit more the change that should happen in your opinion) to conntrackd to support multi-namespace? I'd like to get an idea before that patch lands on the mailing list. [...] > Thanks for review! I will send updated patch soon. Thanks. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Sep 11, 2012 at 8:44 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote: > On Mon, Sep 10, 2012 at 04:24:49PM -0700, Ansis Atteka wrote: > [...] >> > >> >> + >> >> +#ifdef __NR_setns >> >> + >> >> +static int root_fd = -1; >> > >> > This root_fd should go to STATE(ns).root_fd: >> >> Right, it makes more sense to put it inside ct_state for >> now. >> >> Once conntrackd will become multi-namespace >> aware, we will have to move it somewhere else, >> because all the namespaces(i.e. ct_states) will >> use the same root_fd. > > Can you develop a bit more the change that should happen in your > opinion) to conntrackd to support multi-namespace? I'd like to get an > idea before that patch lands on the mailing list. I know that this would be a massive change, so I might miss something in the proposal. Anyway feel free to correct me or ask for more details where necessary. Here is a list of necessary changes: 1. Quite a lot of refactoring in configuration parser. I would suggest to split ct_conf struct into three logical pieces (i.e. smaller structs): a. channel config (instance per remote conntrackd instance) b. conntrack-state config (instance per namespace) c. static/global config (single instance; Would contain path to log file, unix socket e.t.c.) This decoupling would allow much more easily to maintain relation between conntrack-states and channels (for example, when multiple namespaces are using the same channel). Also, currently CONFIG(X) macro allows to reference only a single ct_conf instance. This will need to be changed so that each namespace has its own ct_conf_sync instance. And each channel has its own ct_conf_channel instance. Also, I am afraid that, for this to make more sense, I would have to extend conntrackd.conf syntax, For example,introduce following top-level sections: channel {}, sync {} and global {} respectively. 2. Allow to add, remove and list configuration dynamically without restarting conntrackd. This would require ability to start conntrackd with minimal global/static configuration. After that add namespaces and channel configuration by using Unix Domain socket. For example, instead of starting conntrackd with following command: conntrackd -C /etc/conntrackd/conntrackd.conf One would have to use something like this: conntrackd --global-config /etc/conntrackd/conntrackd_global.conf # This config file would just specify Unix socket, log file e.t.c. and then on-the-fly add channel and namespace configuration with: conntrackd -U /var/run/conntrackd.ctl --add /etc/conntrackd/conntrackd_namespace1.conf conntrackd -U /var/run/conntrackd.ctl --add /etc/conntrackd/conntrackd_channel1.conf conntrackd -U /var/run/conntrackd.ctl --add /etc/conntrackd/conntrackd_namespace2.conf conntrackd -U /var/run/conntrackd.ctl --add /etc/conntrackd/conntrackd_channel2.conf We could glue namespaces together with channels by using some IDs. Another question is, whether over the Unix domain socket we would prefer to transfer binary (already parsed) or textual (yet unparsed) configuration? Also, I would need to figure out if/how to maintain backward compatibility with already existing commands, when there are multiple namespaces (e.g. dump internal cache, commit external cache ...). 3. Wire protocol format improvements, so that namespace ID would be encapsulated into the protocol too. This is required, when same channel is being used by multiple namespaces. 4. Similarly as CONFIG(x) was broken down into 3 logical pieces, the same thing would need to be done for STATE(x) macros. Thanks, Ansis -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Sep 13, 2012 at 12:37:18AM -0700, Ansis Atteka wrote: [...] > I know that this would be a massive change, so I might miss > something in the proposal. Anyway feel free to correct me or ask for > more details where necessary. Here is a list of necessary changes: > > 1. Quite a lot of refactoring in configuration parser. > > I would suggest to split ct_conf struct into three logical pieces (i.e. smaller > structs): > a. channel config (instance per remote conntrackd instance) > b. conntrack-state config (instance per namespace) > c. static/global config (single instance; Would contain path to log > file, unix socket e.t.c.) I always wanted to clean up ct_conf. See patch attached, I didn't commit it yet since I want to give it some test (Please, not need to rebase the patch we're currently discussing upon it). > This decoupling would allow much more easily to maintain relation > between conntrack-states and channels (for example, when multiple > namespaces are using the same channel). I'm not familiar with the channel definition you're using. Note that conntrackd already uses the name `channel' for the state-sync link abstraction (ie. TCP / UDP / MCAST / TIPC ...). > Also, currently CONFIG(X) macro allows to reference only a single ct_conf > instance. This will need to be changed so that each namespace has its own > ct_conf_sync instance. And each channel has its own ct_conf_channel instance. > > Also, I am afraid that, for this to make more sense, I would have to extend > conntrackd.conf syntax, For example,introduce following top-level sections: > channel {}, sync {} and global {} respectively. > > 2. Allow to add, remove and list configuration dynamically without > restarting conntrackd. This would require ability to start conntrackd > with minimal global/static configuration. After that add namespaces and > channel configuration by using Unix Domain socket. > > For example, instead of starting conntrackd with following command: > conntrackd -C /etc/conntrackd/conntrackd.conf > > One would have to use something like this: > conntrackd --global-config /etc/conntrackd/conntrackd_global.conf # > This config file would just specify Unix socket, log file e.t.c. > and then on-the-fly add channel and namespace configuration with: > conntrackd -U /var/run/conntrackd.ctl --add > /etc/conntrackd/conntrackd_namespace1.conf > conntrackd -U /var/run/conntrackd.ctl --add > /etc/conntrackd/conntrackd_channel1.conf > conntrackd -U /var/run/conntrackd.ctl --add > /etc/conntrackd/conntrackd_namespace2.conf > conntrackd -U /var/run/conntrackd.ctl --add > /etc/conntrackd/conntrackd_channel2.conf > > We could glue namespaces together with channels by using some IDs. > > Another question is, whether over the Unix domain socket we would prefer to > transfer binary (already parsed) or textual (yet unparsed) configuration? > > Also, I would need to figure out if/how to maintain backward compatibility with > already existing commands, when there are multiple namespaces (e.g. dump > internal cache, commit external cache ...). > > 3. Wire protocol format improvements, so that namespace ID would be encapsulated > into the protocol too. This is required, when same channel is being > used by multiple namespaces. This only requires adding one new TLV attribute to identify this. So we don't need to bloat the header with one field that is not use. > 4. Similarly as CONFIG(x) was broken down into 3 logical pieces, the > same thing would > need to be done for STATE(x) macros. This seems to be a huge changeset what you're proposing. I need some convincing architecture example that describes how this can be used before you submit such a big patch. I need to understand it to know if there is a different way to make this. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Sep 18, 2012 at 12:23 PM, Pablo Neira Ayuso <pablo@netfilter.org> wrote: > On Thu, Sep 13, 2012 at 12:37:18AM -0700, Ansis Atteka wrote: > [...] >> I know that this would be a massive change, so I might miss >> something in the proposal. Anyway feel free to correct me or ask for >> more details where necessary. Here is a list of necessary changes: >> >> 1. Quite a lot of refactoring in configuration parser. >> >> I would suggest to split ct_conf struct into three logical pieces (i.e. smaller >> structs): >> a. channel config (instance per remote conntrackd instance) >> b. conntrack-state config (instance per namespace) >> c. static/global config (single instance; Would contain path to log >> file, unix socket e.t.c.) > > I always wanted to clean up ct_conf. See patch attached, I didn't > commit it yet since I want to give it some test (Please, not need to > rebase the patch we're currently discussing upon it). I don't see any patches attached. > >> This decoupling would allow much more easily to maintain relation >> between conntrack-states and channels (for example, when multiple >> namespaces are using the same channel). > > I'm not familiar with the channel definition you're using. By "channel" I meant channel & channel_ops structures. > > Note that conntrackd already uses the name `channel' for the state-sync > link abstraction (ie. TCP / UDP / MCAST / TIPC ...). I guess we are actually talking about the same thing. > >> Also, currently CONFIG(X) macro allows to reference only a single ct_conf >> instance. This will need to be changed so that each namespace has its own >> ct_conf_sync instance. And each channel has its own ct_conf_channel instance. >> >> Also, I am afraid that, for this to make more sense, I would have to extend >> conntrackd.conf syntax, For example,introduce following top-level sections: >> channel {}, sync {} and global {} respectively. >> >> 2. Allow to add, remove and list configuration dynamically without >> restarting conntrackd. This would require ability to start conntrackd >> with minimal global/static configuration. After that add namespaces and >> channel configuration by using Unix Domain socket. >> >> For example, instead of starting conntrackd with following command: >> conntrackd -C /etc/conntrackd/conntrackd.conf >> >> One would have to use something like this: >> conntrackd --global-config /etc/conntrackd/conntrackd_global.conf # >> This config file would just specify Unix socket, log file e.t.c. >> and then on-the-fly add channel and namespace configuration with: >> conntrackd -U /var/run/conntrackd.ctl --add >> /etc/conntrackd/conntrackd_namespace1.conf >> conntrackd -U /var/run/conntrackd.ctl --add >> /etc/conntrackd/conntrackd_channel1.conf >> conntrackd -U /var/run/conntrackd.ctl --add >> /etc/conntrackd/conntrackd_namespace2.conf >> conntrackd -U /var/run/conntrackd.ctl --add >> /etc/conntrackd/conntrackd_channel2.conf >> >> We could glue namespaces together with channels by using some IDs. >> >> Another question is, whether over the Unix domain socket we would prefer to >> transfer binary (already parsed) or textual (yet unparsed) configuration? >> >> Also, I would need to figure out if/how to maintain backward compatibility with >> already existing commands, when there are multiple namespaces (e.g. dump >> internal cache, commit external cache ...). >> >> 3. Wire protocol format improvements, so that namespace ID would be encapsulated >> into the protocol too. This is required, when same channel is being >> used by multiple namespaces. > > This only requires adding one new TLV attribute to identify this. So > we don't need to bloat the header with one field that is not use. Here is how I am currently doing that: struct nethdr { #if __BYTE_ORDER == __LITTLE_ENDIAN uint8_t type:4, version:4; #elif __BYTE_ORDER == __BIG_ENDIAN uint8_t version:4, type:4; #else #error "Unknown system endianess!" #endif uint8_t flags; uint16_t len; uint32_t seq; uint32_t nsid; < -present only if nethdr.flag & }; nsid is the namespace id. This field would be present only if nethdr.flags & NET_F_NAMESPACE != 0. Could you please elaborate more on your suggested approach? If I understand it correctly, then you want to add the namespace id inside the data section that follows immediately after the nethdr structure as TLV variable? Also, if I understand correctly, then after nethdr conntrackd simply transfers nf_conntrack struct (or the contents of cache object). I have had imagined that we should have cache table per namespace, so storing namespace id inside nf_conntrackd would become redundant. What is your take on this? > >> 4. Similarly as CONFIG(x) was broken down into 3 logical pieces, the >> same thing would >> need to be done for STATE(x) macros. > > This seems to be a huge changeset what you're proposing. > > I need some convincing architecture example that describes how this > can be used before you submit such a big patch. I need to understand > it to know if there is a different way to make this. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/include/Makefile.am b/include/Makefile.am index 6bd0f7f..e06fd4d 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -6,5 +6,5 @@ noinst_HEADERS = alarm.h jhash.h cache.h linux_list.h linux_rbtree.h \ network.h filter.h queue.h vector.h cidr.h \ traffic_stats.h netlink.h fds.h event.h bitops.h channel.h \ process.h origin.h internal.h external.h date.h nfct.h \ - helper.h myct.h stack.h + helper.h myct.h stack.h namespace.h diff --git a/include/conntrackd.h b/include/conntrackd.h index 19e613c..c349d72 100644 --- a/include/conntrackd.h +++ b/include/conntrackd.h @@ -94,6 +94,7 @@ struct ct_conf { int channel_type_global; struct channel_conf channel[MULTICHANNEL_MAX]; struct local_conf local; /* unix socket facilities */ + char netlink_namespace[FILENAME_MAXLEN]; int nice; int limit; int refresh; @@ -143,6 +144,7 @@ struct ct_conf { #define STATE(x) st.x struct ct_general_state { + int ns_fd; /* namespace fd for NL sockets */ sigset_t block; FILE *log; FILE *stats_log; diff --git a/include/namespace.h b/include/namespace.h new file mode 100644 index 0000000..668a270 --- /dev/null +++ b/include/namespace.h @@ -0,0 +1,12 @@ +#ifndef _NAMESPACE_H_ +#define _NAMESPACE_H_ + +#include <libmnl/libmnl.h> +#include <libnetfilter_conntrack/libnetfilter_conntrack.h> + +void init_namespaces(void); + +struct nfct_handle *nfct_open_ns(u_int8_t, unsigned, int); +struct mnl_socket *mnl_socket_open_ns(int, int); + +#endif diff --git a/src/Makefile.am b/src/Makefile.am index d8074d2..60314ab 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -39,7 +39,8 @@ conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \ external_cache.c external_inject.c \ internal_cache.c internal_bypass.c \ read_config_yy.y read_config_lex.l \ - stack.c helpers.c utils.c expect.c + stack.c helpers.c utils.c expect.c \ + namespace.c # yacc and lex generate dirty code read_config_yy.o read_config_lex.o: AM_CFLAGS += -Wno-missing-prototypes -Wno-missing-declarations -Wno-implicit-function-declaration -Wno-nested-externs -Wno-undef -Wno-redundant-decls diff --git a/src/cthelper.c b/src/cthelper.c index c119869..b165900 100644 --- a/src/cthelper.c +++ b/src/cthelper.c @@ -22,6 +22,7 @@ #include "log.h" #include "fds.h" #include "helper.h" +#include "namespace.h" #include <unistd.h> #include <fcntl.h> @@ -493,7 +494,7 @@ int cthelper_init(void) return -1; } - STATE_CTH(nl) = mnl_socket_open(NETLINK_NETFILTER); + STATE_CTH(nl) = mnl_socket_open_ns(NETLINK_NETFILTER, STATE(ns_fd)); if (STATE_CTH(nl) == NULL) { dlog(LOG_ERR, "cannot open nfq socket"); return -1; diff --git a/src/ctnl.c b/src/ctnl.c index bb54727..65784c3 100644 --- a/src/ctnl.c +++ b/src/ctnl.c @@ -29,6 +29,7 @@ #include "origin.h" #include "date.h" #include "internal.h" +#include "namespace.h" #include <errno.h> #include <signal.h> @@ -399,6 +400,17 @@ static void poll_cb(void *data) int ctnl_init(void) { + if (CONFIG(netlink_namespace)[0]) { + STATE(ns_fd) = open(CONFIG(netlink_namespace), O_RDONLY); + if (STATE(ns_fd) == -1) { + dlog(LOG_ERR, "could not open network namespace %s: %s", + CONFIG(netlink_namespace), strerror(errno)); + return -1; + } + } else { + STATE(ns_fd) = -1; + } + if (CONFIG(flags) & CTD_STATS_MODE) STATE(mode) = &stats_mode; else if (CONFIG(flags) & CTD_SYNC_MODE) @@ -417,7 +429,7 @@ int ctnl_init(void) } /* resynchronize (like 'dump' socket) but it also purges old entries */ - STATE(resync) = nfct_open(CONFIG(netlink).subsys_id, 0); + STATE(resync) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); if (STATE(resync)== NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); @@ -438,7 +450,7 @@ int ctnl_init(void) fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK); if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { - STATE(dump) = nfct_open(CONFIG(netlink).subsys_id, 0); + STATE(dump) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); if (STATE(dump) == NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); @@ -467,7 +479,7 @@ int ctnl_init(void) } } - STATE(get) = nfct_open(CONFIG(netlink).subsys_id, 0); + STATE(get) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); if (STATE(get) == NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); @@ -481,7 +493,7 @@ int ctnl_init(void) exp_get_handler, NULL); } - STATE(flush) = nfct_open(CONFIG(netlink).subsys_id, 0); + STATE(flush) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); if (STATE(flush) == NULL) { dlog(LOG_ERR, "cannot open flusher handler"); return -1; diff --git a/src/expect.c b/src/expect.c index 6069770..bedec2c 100644 --- a/src/expect.c +++ b/src/expect.c @@ -8,7 +8,9 @@ * This code has been sponsored by Vyatta Inc. <http://www.vyatta.com> */ +#include "conntrackd.h" #include "helper.h" +#include "namespace.h" #include <stdio.h> #include <string.h> @@ -165,7 +167,7 @@ static int cthelper_expect_cmd(struct nf_expect *exp, int cmd) int ret; struct nfct_handle *h; - h = nfct_open(EXPECT, 0); + h = nfct_open_ns(EXPECT, 0, STATE(ns_fd)); if (!h) return -1; diff --git a/src/external_inject.c b/src/external_inject.c index 0ad3478..5a6680b 100644 --- a/src/external_inject.c +++ b/src/external_inject.c @@ -22,6 +22,7 @@ #include "cache.h" #include "origin.h" #include "external.h" +#include "namespace.h" #include "netlink.h" #include <libnetfilter_conntrack/libnetfilter_conntrack.h> @@ -42,7 +43,7 @@ struct { static int external_inject_init(void) { /* handler to directly inject conntracks into kernel-space */ - inject = nfct_open(CONFIG(netlink).subsys_id, 0); + inject = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); if (inject == NULL) { dlog(LOG_ERR, "can't open netlink handler: %s", strerror(errno)); diff --git a/src/internal_bypass.c b/src/internal_bypass.c index 1194339..520c55d 100644 --- a/src/internal_bypass.c +++ b/src/internal_bypass.c @@ -16,6 +16,7 @@ #include "netlink.h" #include "network.h" #include "origin.h" +#include "namespace.h" static int internal_bypass_init(void) { @@ -52,7 +53,7 @@ static void internal_bypass_ct_dump(int fd, int type) u_int32_t family = AF_UNSPEC; int ret; - h = nfct_open(CONFIG(netlink).subsys_id, 0); + h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); if (h == NULL) { dlog(LOG_ERR, "can't allocate memory for the internal cache"); return; @@ -183,7 +184,7 @@ static void internal_bypass_exp_dump(int fd, int type) u_int32_t family = AF_UNSPEC; int ret; - h = nfct_open(CONFIG(netlink).subsys_id, 0); + h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); if (h == NULL) { dlog(LOG_ERR, "can't allocate memory for the internal cache"); return; diff --git a/src/namespace.c b/src/namespace.c new file mode 100644 index 0000000..03db222 --- /dev/null +++ b/src/namespace.c @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2012 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Authors: + * Ansis Atteka <aatteka@nicira.com> + */ +#define _GNU_SOURCE + +#include "namespace.h" + +#include <fcntl.h> +#include <sched.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "conntrackd.h" +#include "log.h" + +#ifndef CLONE_NEWNET +#define CLONE_NEWNET 0x40000000 +#endif + +#ifndef __NR_setns +#ifdef __x86_64 +#define __NR_setns 308 +#endif +#ifdef __i386 +#define __NR_setns 346 +#endif +#endif + +#ifdef __NR_setns + +static int root_fd = -1; + +void init_namespaces(void) { + root_fd = open("/proc/self/ns/net", O_RDONLY); + if (root_fd == -1) { + dlog(LOG_WARNING, "could not open current namespace"); + } +} + +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions, + int ns_fd) { + struct nfct_handle *handle = NULL; + + if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) { + dlog(LOG_ERR, "could not switch between namespaces"); + } else { + handle = nfct_open(subsys_id, subscriptions); + if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) { + dlog(LOG_ERR, "fatal: could not switch back to main namespace"); + } + } + return handle; +} + +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) { + struct mnl_socket *handle = NULL; + + if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) { + dlog(LOG_ERR, "could not switch between namespaces"); + } else { + handle = mnl_socket_open(bus); + if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) { + dlog(LOG_ERR, "fatal: could not switch back to main namespace"); + } + } + return handle; +} + +#else + +void init_namespaces(void) { +} + +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions, + int ns_fd) { + if (ns_fd == -1) { + return nfct_open(subsys_id, subscriptions); + } else { + dlog(LOG_ERR, "network namespaces are not supported on this system"); + return NULL; + } +} + +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) { + if (ns_fd == -1) { + return mnl_socket_open(bus); + } else { + dlog(LOG_ERR, "network namespaces are not supported on this system"); + return NULL; + } +} + +#endif diff --git a/src/netlink.c b/src/netlink.c index bd38d99..c71b463 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -21,6 +21,7 @@ #include "conntrackd.h" #include "filter.h" #include "log.h" +#include "namespace.h" #include <string.h> #include <errno.h> @@ -33,7 +34,8 @@ struct nfct_handle *nl_init_event_handler(void) { struct nfct_handle *h; - h = nfct_open(CONFIG(netlink).subsys_id, CONFIG(netlink).groups); + h = nfct_open_ns(CONFIG(netlink).subsys_id, CONFIG(netlink).groups, + STATE(ns_fd)); if (h == NULL) return NULL; @@ -175,7 +177,7 @@ int nl_flush_conntrack_table_selective(void) struct nfct_handle *h; int ret; - h = nfct_open(CONNTRACK, 0); + h = nfct_open_ns(CONNTRACK, 0, STATE(ns_fd)); if (h == NULL) { dlog(LOG_ERR, "cannot open handle"); return -1; diff --git a/src/read_config_lex.l b/src/read_config_lex.l index 31fa32e..7a09c52 100644 --- a/src/read_config_lex.l +++ b/src/read_config_lex.l @@ -91,6 +91,7 @@ notrack [N|n][O|o][T|t][R|r][A|a][C|c][K|k] "SocketBufferSizeMaxGrowth" { return T_BUFFER_SIZE_MAX_GROWN; /* alias */ } "NetlinkBufferSize" { return T_BUFFER_SIZE; } "NetlinkBufferSizeMaxGrowth" { return T_BUFFER_SIZE_MAX_GROWN; } +"NetlinkNamespace" { return T_NETLINK_NAMESPACE; } "Mode" { return T_SYNC_MODE; } "ListenTo" { return T_LISTEN_TO; } "Family" { return T_FAMILY; } diff --git a/src/read_config_yy.y b/src/read_config_yy.y index c9235d3..b0985e7 100644 --- a/src/read_config_yy.y +++ b/src/read_config_yy.y @@ -87,7 +87,7 @@ enum { %token T_DISABLE_INTERNAL_CACHE T_DISABLE_EXTERNAL_CACHE T_ERROR_QUEUE_LENGTH %token T_OPTIONS T_TCP_WINDOW_TRACKING T_EXPECT_SYNC %token T_HELPER T_HELPER_QUEUE_NUM T_HELPER_POLICY T_HELPER_EXPECT_MAX -%token T_HELPER_EXPECT_TIMEOUT +%token T_HELPER_EXPECT_TIMEOUT T_NETLINK_NAMESPACE %token <string> T_IP T_PATH_VAL %token <val> T_NUMBER @@ -1113,6 +1113,7 @@ general_line: hashsize | unix_line | netlink_buffer_size | netlink_buffer_size_max_grown + | netlink_namespace | family | event_iterations_limit | poll_secs @@ -1158,6 +1159,11 @@ netlink_events_reliable : T_NETLINK_EVENTS_RELIABLE T_OFF conf.netlink.events_reliable = 0; }; +netlink_namespace : T_NETLINK_NAMESPACE T_PATH_VAL +{ + strncpy(conf.netlink_namespace, $2, FILENAME_MAXLEN); +}; + nice : T_NICE T_SIGNED_NUMBER { conf.nice = $2; diff --git a/src/run.c b/src/run.c index 3337694..7aa5032 100644 --- a/src/run.c +++ b/src/run.c @@ -30,6 +30,7 @@ #include "origin.h" #include "date.h" #include "internal.h" +#include "namespace.h" #include <errno.h> #include <signal.h> @@ -252,6 +253,8 @@ init(void) return -1; /* Initialization */ + init_namespaces(); + if (CONFIG(flags) & (CTD_SYNC_MODE | CTD_STATS_MODE)) if (ctnl_init() < 0) return -1; diff --git a/src/sync-mode.c b/src/sync-mode.c index e69ecfe..de58b8c 100644 --- a/src/sync-mode.c +++ b/src/sync-mode.c @@ -31,6 +31,7 @@ #include "origin.h" #include "internal.h" #include "external.h" +#include "namespace.h" #include <errno.h> #include <unistd.h> @@ -451,7 +452,8 @@ static int init_sync(void) tx_queue_cb, NULL, STATE(fds)) == -1) return -1; - STATE_SYNC(commit).h = nfct_open(CONFIG(netlink).subsys_id, 0); + STATE_SYNC(commit).h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, + STATE(ns_fd)); if (STATE_SYNC(commit).h == NULL) { dlog(LOG_ERR, "can't create handler to commit"); return -1; diff --git a/src/sync-notrack.c b/src/sync-notrack.c index a7df4e7..d8010fb 100644 --- a/src/sync-notrack.c +++ b/src/sync-notrack.c @@ -24,6 +24,7 @@ #include "log.h" #include "cache.h" #include "fds.h" +#include "namespace.h" #include <string.h> @@ -102,7 +103,7 @@ static void kernel_resync(void) u_int32_t family = AF_UNSPEC; int ret; - h = nfct_open(CONFIG(netlink).subsys_id, 0); + h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); if (h == NULL) { dlog(LOG_ERR, "can't allocate memory for the internal cache"); return;
This patch allows conntrackd to open CT Netlink sockets into a given network namespace. Channel sockets (e.g. UDP) would still be opened into the same namespace where conntrackd was started. The only binary this patch affects is conntrackd. All other binaries (e.g. conntrack, nfct) would still operate in the same namespace where they were started. To make use of this patch: 1. create a network namespace: "ip netns add the_ns" 2. add "NetlinkNamespace /var/run/netns/the_ns" line to the conntrackd.conf file inside General {...} section. Signed-off-by: Ansis Atteka <aatteka@nicira.com> --- include/Makefile.am | 2 +- include/conntrackd.h | 2 + include/namespace.h | 12 ++++++ src/Makefile.am | 3 +- src/cthelper.c | 3 +- src/ctnl.c | 20 +++++++-- src/expect.c | 4 +- src/external_inject.c | 3 +- src/internal_bypass.c | 5 ++- src/namespace.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++ src/netlink.c | 6 ++- src/read_config_lex.l | 1 + src/read_config_yy.y | 8 +++- src/run.c | 3 ++ src/sync-mode.c | 4 +- src/sync-notrack.c | 3 +- 16 files changed, 175 insertions(+), 16 deletions(-) create mode 100644 include/namespace.h create mode 100644 src/namespace.c