diff mbox

conntrackd: make conntrackd namespace aware

Message ID 1346461915-6309-1-git-send-email-aatteka@nicira.com
State Superseded
Headers show

Commit Message

Ansis Atteka Sept. 1, 2012, 1:11 a.m. UTC
This patch allows conntrackd to open CT Netlink sockets into a given
network namespace. Channel sockets (e.g. UDP) would still be opened into
the same namespace where conntrackd was started.

The only binary this patch affects is conntrackd. All other binaries (e.g.
conntrack, nfct) would still operate in the same namespace where they were
started.

To make use of this patch:
1. create a network namespace: "ip netns add the_ns"
2. add "NetlinkNamespace /var/run/netns/the_ns" line to the conntrackd.conf
file inside General {...} section.

Signed-off-by: Ansis Atteka <aatteka@nicira.com>
---
 include/Makefile.am   |    2 +-
 include/conntrackd.h  |    2 +
 include/namespace.h   |   12 ++++++
 src/Makefile.am       |    3 +-
 src/cthelper.c        |    3 +-
 src/ctnl.c            |   20 +++++++--
 src/expect.c          |    4 +-
 src/external_inject.c |    3 +-
 src/internal_bypass.c |    5 ++-
 src/namespace.c       |  112 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/netlink.c         |    6 ++-
 src/read_config_lex.l |    1 +
 src/read_config_yy.y  |    8 +++-
 src/run.c             |    3 ++
 src/sync-mode.c       |    4 +-
 src/sync-notrack.c    |    3 +-
 16 files changed, 175 insertions(+), 16 deletions(-)
 create mode 100644 include/namespace.h
 create mode 100644 src/namespace.c

Comments

Ansis Atteka Sept. 6, 2012, 1:36 a.m. UTC | #1
On Fri, Aug 31, 2012 at 6:11 PM, Ansis Atteka <aatteka@nicira.com> wrote:
> This patch allows conntrackd to open CT Netlink sockets into a given
> network namespace. Channel sockets (e.g. UDP) would still be opened into
> the same namespace where conntrackd was started.
>
> The only binary this patch affects is conntrackd. All other binaries (e.g.
> conntrack, nfct) would still operate in the same namespace where they were
> started.
>
> To make use of this patch:
> 1. create a network namespace: "ip netns add the_ns"
> 2. add "NetlinkNamespace /var/run/netns/the_ns" line to the conntrackd.conf
> file inside General {...} section.

Wanted to provide more details about this patch and also bump it up
for attention.

Basically, what it does is allows conntrackd to open Conntrack Netlink
sockets into a different namespace than where Channel Sockets were
opened. This isolation brings benefits to:
1. security, because the channel socket (and management interface) will
reside in a different namespace. They won't be exposed to the traffic
that traverses the namespace;
2. flexibility, because arbitrary IP addresses could be used inside that
namespace for Connection Tracking purposes. No need to worry that
there might be overlapping IP addresses with the Management interface;
3. scalability w.r.t. namespaces, because all the namespaces would
end up using a single management interface and IP address in the root
namespace. There wouldn't be need to maintain a dedicated
management interface and IP address inside every namespace.

Also this patch would prepare soil for my next patches, that would
ease connection state synchronization for virtualized networks even more:
1. allow single conntrackd instance to synchronize multiple namespaces;
2. add configuration dynamically to conntrackd (without restarting
the daemon).

Those other two patches involve quite a lot of code re-factoring and
depend on this patch. So comments are welcome!

Thanks,
Ansis

>
> Signed-off-by: Ansis Atteka <aatteka@nicira.com>
> ---
>  include/Makefile.am   |    2 +-
>  include/conntrackd.h  |    2 +
>  include/namespace.h   |   12 ++++++
>  src/Makefile.am       |    3 +-
>  src/cthelper.c        |    3 +-
>  src/ctnl.c            |   20 +++++++--
>  src/expect.c          |    4 +-
>  src/external_inject.c |    3 +-
>  src/internal_bypass.c |    5 ++-
>  src/namespace.c       |  112 +++++++++++++++++++++++++++++++++++++++++++++++++
>  src/netlink.c         |    6 ++-
>  src/read_config_lex.l |    1 +
>  src/read_config_yy.y  |    8 +++-
>  src/run.c             |    3 ++
>  src/sync-mode.c       |    4 +-
>  src/sync-notrack.c    |    3 +-
>  16 files changed, 175 insertions(+), 16 deletions(-)
>  create mode 100644 include/namespace.h
>  create mode 100644 src/namespace.c
>
> diff --git a/include/Makefile.am b/include/Makefile.am
> index 6bd0f7f..e06fd4d 100644
> --- a/include/Makefile.am
> +++ b/include/Makefile.am
> @@ -6,5 +6,5 @@ noinst_HEADERS = alarm.h jhash.h cache.h linux_list.h linux_rbtree.h \
>                  network.h filter.h queue.h vector.h cidr.h \
>                  traffic_stats.h netlink.h fds.h event.h bitops.h channel.h \
>                  process.h origin.h internal.h external.h date.h nfct.h \
> -                helper.h myct.h stack.h
> +                helper.h myct.h stack.h namespace.h
>
> diff --git a/include/conntrackd.h b/include/conntrackd.h
> index 19e613c..c349d72 100644
> --- a/include/conntrackd.h
> +++ b/include/conntrackd.h
> @@ -94,6 +94,7 @@ struct ct_conf {
>         int channel_type_global;
>         struct channel_conf channel[MULTICHANNEL_MAX];
>         struct local_conf local;        /* unix socket facilities */
> +       char netlink_namespace[FILENAME_MAXLEN];
>         int nice;
>         int limit;
>         int refresh;
> @@ -143,6 +144,7 @@ struct ct_conf {
>  #define STATE(x) st.x
>
>  struct ct_general_state {
> +       int                                     ns_fd;          /* namespace fd for NL sockets */
>         sigset_t                        block;
>         FILE                            *log;
>         FILE                            *stats_log;
> diff --git a/include/namespace.h b/include/namespace.h
> new file mode 100644
> index 0000000..668a270
> --- /dev/null
> +++ b/include/namespace.h
> @@ -0,0 +1,12 @@
> +#ifndef _NAMESPACE_H_
> +#define _NAMESPACE_H_
> +
> +#include <libmnl/libmnl.h>
> +#include <libnetfilter_conntrack/libnetfilter_conntrack.h>
> +
> +void init_namespaces(void);
> +
> +struct nfct_handle *nfct_open_ns(u_int8_t, unsigned, int);
> +struct mnl_socket *mnl_socket_open_ns(int, int);
> +
> +#endif
> diff --git a/src/Makefile.am b/src/Makefile.am
> index d8074d2..60314ab 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -39,7 +39,8 @@ conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \
>                     external_cache.c external_inject.c \
>                     internal_cache.c internal_bypass.c \
>                     read_config_yy.y read_config_lex.l \
> -                   stack.c helpers.c utils.c expect.c
> +                   stack.c helpers.c utils.c expect.c \
> +                   namespace.c
>
>  # yacc and lex generate dirty code
>  read_config_yy.o read_config_lex.o: AM_CFLAGS += -Wno-missing-prototypes -Wno-missing-declarations -Wno-implicit-function-declaration -Wno-nested-externs -Wno-undef -Wno-redundant-decls
> diff --git a/src/cthelper.c b/src/cthelper.c
> index c119869..b165900 100644
> --- a/src/cthelper.c
> +++ b/src/cthelper.c
> @@ -22,6 +22,7 @@
>  #include "log.h"
>  #include "fds.h"
>  #include "helper.h"
> +#include "namespace.h"
>
>  #include <unistd.h>
>  #include <fcntl.h>
> @@ -493,7 +494,7 @@ int cthelper_init(void)
>                 return -1;
>         }
>
> -       STATE_CTH(nl) = mnl_socket_open(NETLINK_NETFILTER);
> +       STATE_CTH(nl) = mnl_socket_open_ns(NETLINK_NETFILTER, STATE(ns_fd));
>         if (STATE_CTH(nl) == NULL) {
>                 dlog(LOG_ERR, "cannot open nfq socket");
>                 return -1;
> diff --git a/src/ctnl.c b/src/ctnl.c
> index bb54727..65784c3 100644
> --- a/src/ctnl.c
> +++ b/src/ctnl.c
> @@ -29,6 +29,7 @@
>  #include "origin.h"
>  #include "date.h"
>  #include "internal.h"
> +#include "namespace.h"
>
>  #include <errno.h>
>  #include <signal.h>
> @@ -399,6 +400,17 @@ static void poll_cb(void *data)
>
>  int ctnl_init(void)
>  {
> +       if (CONFIG(netlink_namespace)[0]) {
> +               STATE(ns_fd) = open(CONFIG(netlink_namespace), O_RDONLY);
> +               if (STATE(ns_fd) == -1) {
> +                       dlog(LOG_ERR, "could not open network namespace %s: %s",
> +                                CONFIG(netlink_namespace), strerror(errno));
> +                       return -1;
> +               }
> +       } else {
> +               STATE(ns_fd) = -1;
> +       }
> +
>         if (CONFIG(flags) & CTD_STATS_MODE)
>                 STATE(mode) = &stats_mode;
>         else if (CONFIG(flags) & CTD_SYNC_MODE)
> @@ -417,7 +429,7 @@ int ctnl_init(void)
>         }
>
>         /* resynchronize (like 'dump' socket) but it also purges old entries */
> -       STATE(resync) = nfct_open(CONFIG(netlink).subsys_id, 0);
> +       STATE(resync) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>         if (STATE(resync)== NULL) {
>                 dlog(LOG_ERR, "can't open netlink handler: %s",
>                      strerror(errno));
> @@ -438,7 +450,7 @@ int ctnl_init(void)
>         fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK);
>
>         if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) {
> -               STATE(dump) = nfct_open(CONFIG(netlink).subsys_id, 0);
> +               STATE(dump) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>                 if (STATE(dump) == NULL) {
>                         dlog(LOG_ERR, "can't open netlink handler: %s",
>                              strerror(errno));
> @@ -467,7 +479,7 @@ int ctnl_init(void)
>                 }
>         }
>
> -       STATE(get) = nfct_open(CONFIG(netlink).subsys_id, 0);
> +       STATE(get) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>         if (STATE(get) == NULL) {
>                 dlog(LOG_ERR, "can't open netlink handler: %s",
>                      strerror(errno));
> @@ -481,7 +493,7 @@ int ctnl_init(void)
>                                         exp_get_handler, NULL);
>         }
>
> -       STATE(flush) = nfct_open(CONFIG(netlink).subsys_id, 0);
> +       STATE(flush) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>         if (STATE(flush) == NULL) {
>                 dlog(LOG_ERR, "cannot open flusher handler");
>                 return -1;
> diff --git a/src/expect.c b/src/expect.c
> index 6069770..bedec2c 100644
> --- a/src/expect.c
> +++ b/src/expect.c
> @@ -8,7 +8,9 @@
>   * This code has been sponsored by Vyatta Inc. <http://www.vyatta.com>
>   */
>
> +#include "conntrackd.h"
>  #include "helper.h"
> +#include "namespace.h"
>
>  #include <stdio.h>
>  #include <string.h>
> @@ -165,7 +167,7 @@ static int cthelper_expect_cmd(struct nf_expect *exp, int cmd)
>         int ret;
>         struct nfct_handle *h;
>
> -       h = nfct_open(EXPECT, 0);
> +       h = nfct_open_ns(EXPECT, 0, STATE(ns_fd));
>         if (!h)
>                 return -1;
>
> diff --git a/src/external_inject.c b/src/external_inject.c
> index 0ad3478..5a6680b 100644
> --- a/src/external_inject.c
> +++ b/src/external_inject.c
> @@ -22,6 +22,7 @@
>  #include "cache.h"
>  #include "origin.h"
>  #include "external.h"
> +#include "namespace.h"
>  #include "netlink.h"
>
>  #include <libnetfilter_conntrack/libnetfilter_conntrack.h>
> @@ -42,7 +43,7 @@ struct {
>  static int external_inject_init(void)
>  {
>         /* handler to directly inject conntracks into kernel-space */
> -       inject = nfct_open(CONFIG(netlink).subsys_id, 0);
> +       inject = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>         if (inject == NULL) {
>                 dlog(LOG_ERR, "can't open netlink handler: %s",
>                      strerror(errno));
> diff --git a/src/internal_bypass.c b/src/internal_bypass.c
> index 1194339..520c55d 100644
> --- a/src/internal_bypass.c
> +++ b/src/internal_bypass.c
> @@ -16,6 +16,7 @@
>  #include "netlink.h"
>  #include "network.h"
>  #include "origin.h"
> +#include "namespace.h"
>
>  static int internal_bypass_init(void)
>  {
> @@ -52,7 +53,7 @@ static void internal_bypass_ct_dump(int fd, int type)
>         u_int32_t family = AF_UNSPEC;
>         int ret;
>
> -       h = nfct_open(CONFIG(netlink).subsys_id, 0);
> +       h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>         if (h == NULL) {
>                 dlog(LOG_ERR, "can't allocate memory for the internal cache");
>                 return;
> @@ -183,7 +184,7 @@ static void internal_bypass_exp_dump(int fd, int type)
>         u_int32_t family = AF_UNSPEC;
>         int ret;
>
> -       h = nfct_open(CONFIG(netlink).subsys_id, 0);
> +       h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>         if (h == NULL) {
>                 dlog(LOG_ERR, "can't allocate memory for the internal cache");
>                 return;
> diff --git a/src/namespace.c b/src/namespace.c
> new file mode 100644
> index 0000000..03db222
> --- /dev/null
> +++ b/src/namespace.c
> @@ -0,0 +1,112 @@
> +/*
> + * Copyright (C) 2012 Nicira, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
> + *
> + * Authors:
> + *     Ansis Atteka <aatteka@nicira.com>
> + */
> +#define _GNU_SOURCE
> +
> +#include "namespace.h"
> +
> +#include <fcntl.h>
> +#include <sched.h>
> +#include <sys/syscall.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <unistd.h>
> +
> +#include "conntrackd.h"
> +#include "log.h"
> +
> +#ifndef CLONE_NEWNET
> +#define CLONE_NEWNET 0x40000000
> +#endif
> +
> +#ifndef __NR_setns
> +#ifdef __x86_64
> +#define __NR_setns 308
> +#endif
> +#ifdef __i386
> +#define __NR_setns 346
> +#endif
> +#endif
> +
> +#ifdef __NR_setns
> +
> +static int root_fd = -1;
> +
> +void init_namespaces(void) {
> +       root_fd = open("/proc/self/ns/net", O_RDONLY);
> +       if (root_fd == -1) {
> +               dlog(LOG_WARNING, "could not open current namespace");
> +       }
> +}
> +
> +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions,
> +                                                                int ns_fd) {
> +       struct nfct_handle *handle = NULL;
> +
> +       if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) {
> +               dlog(LOG_ERR, "could not switch between namespaces");
> +       } else {
> +               handle = nfct_open(subsys_id, subscriptions);
> +               if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) {
> +                       dlog(LOG_ERR, "fatal: could not switch back to main namespace");
> +               }
> +       }
> +       return handle;
> +}
> +
> +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) {
> +       struct mnl_socket *handle = NULL;
> +
> +       if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) {
> +               dlog(LOG_ERR, "could not switch between namespaces");
> +       } else {
> +               handle = mnl_socket_open(bus);
> +               if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) {
> +                       dlog(LOG_ERR, "fatal: could not switch back to main namespace");
> +               }
> +       }
> +       return handle;
> +}
> +
> +#else
> +
> +void init_namespaces(void) {
> +}
> +
> +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions,
> +                                                                int ns_fd) {
> +       if (ns_fd == -1) {
> +               return nfct_open(subsys_id, subscriptions);
> +       } else {
> +               dlog(LOG_ERR, "network namespaces are not supported on this system");
> +               return NULL;
> +       }
> +}
> +
> +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) {
> +       if (ns_fd == -1) {
> +               return mnl_socket_open(bus);
> +       } else {
> +               dlog(LOG_ERR, "network namespaces are not supported on this system");
> +               return NULL;
> +       }
> +}
> +
> +#endif
> diff --git a/src/netlink.c b/src/netlink.c
> index bd38d99..c71b463 100644
> --- a/src/netlink.c
> +++ b/src/netlink.c
> @@ -21,6 +21,7 @@
>  #include "conntrackd.h"
>  #include "filter.h"
>  #include "log.h"
> +#include "namespace.h"
>
>  #include <string.h>
>  #include <errno.h>
> @@ -33,7 +34,8 @@ struct nfct_handle *nl_init_event_handler(void)
>  {
>         struct nfct_handle *h;
>
> -       h = nfct_open(CONFIG(netlink).subsys_id, CONFIG(netlink).groups);
> +       h = nfct_open_ns(CONFIG(netlink).subsys_id, CONFIG(netlink).groups,
> +                                        STATE(ns_fd));
>         if (h == NULL)
>                 return NULL;
>
> @@ -175,7 +177,7 @@ int nl_flush_conntrack_table_selective(void)
>         struct nfct_handle *h;
>         int ret;
>
> -       h = nfct_open(CONNTRACK, 0);
> +       h = nfct_open_ns(CONNTRACK, 0, STATE(ns_fd));
>         if (h == NULL) {
>                 dlog(LOG_ERR, "cannot open handle");
>                 return -1;
> diff --git a/src/read_config_lex.l b/src/read_config_lex.l
> index 31fa32e..7a09c52 100644
> --- a/src/read_config_lex.l
> +++ b/src/read_config_lex.l
> @@ -91,6 +91,7 @@ notrack               [N|n][O|o][T|t][R|r][A|a][C|c][K|k]
>  "SocketBufferSizeMaxGrowth"    { return T_BUFFER_SIZE_MAX_GROWN; /* alias */ }
>  "NetlinkBufferSize"            { return T_BUFFER_SIZE; }
>  "NetlinkBufferSizeMaxGrowth"   { return T_BUFFER_SIZE_MAX_GROWN; }
> +"NetlinkNamespace"             { return T_NETLINK_NAMESPACE; }
>  "Mode"                         { return T_SYNC_MODE; }
>  "ListenTo"                     { return T_LISTEN_TO; }
>  "Family"                       { return T_FAMILY; }
> diff --git a/src/read_config_yy.y b/src/read_config_yy.y
> index c9235d3..b0985e7 100644
> --- a/src/read_config_yy.y
> +++ b/src/read_config_yy.y
> @@ -87,7 +87,7 @@ enum {
>  %token T_DISABLE_INTERNAL_CACHE T_DISABLE_EXTERNAL_CACHE T_ERROR_QUEUE_LENGTH
>  %token T_OPTIONS T_TCP_WINDOW_TRACKING T_EXPECT_SYNC
>  %token T_HELPER T_HELPER_QUEUE_NUM T_HELPER_POLICY T_HELPER_EXPECT_MAX
> -%token T_HELPER_EXPECT_TIMEOUT
> +%token T_HELPER_EXPECT_TIMEOUT T_NETLINK_NAMESPACE
>
>  %token <string> T_IP T_PATH_VAL
>  %token <val> T_NUMBER
> @@ -1113,6 +1113,7 @@ general_line: hashsize
>             | unix_line
>             | netlink_buffer_size
>             | netlink_buffer_size_max_grown
> +           | netlink_namespace
>             | family
>             | event_iterations_limit
>             | poll_secs
> @@ -1158,6 +1159,11 @@ netlink_events_reliable : T_NETLINK_EVENTS_RELIABLE T_OFF
>         conf.netlink.events_reliable = 0;
>  };
>
> +netlink_namespace : T_NETLINK_NAMESPACE T_PATH_VAL
> +{
> +       strncpy(conf.netlink_namespace, $2, FILENAME_MAXLEN);
> +};
> +
>  nice : T_NICE T_SIGNED_NUMBER
>  {
>         conf.nice = $2;
> diff --git a/src/run.c b/src/run.c
> index 3337694..7aa5032 100644
> --- a/src/run.c
> +++ b/src/run.c
> @@ -30,6 +30,7 @@
>  #include "origin.h"
>  #include "date.h"
>  #include "internal.h"
> +#include "namespace.h"
>
>  #include <errno.h>
>  #include <signal.h>
> @@ -252,6 +253,8 @@ init(void)
>                 return -1;
>
>         /* Initialization */
> +       init_namespaces();
> +
>         if (CONFIG(flags) & (CTD_SYNC_MODE | CTD_STATS_MODE))
>                 if (ctnl_init() < 0)
>                         return -1;
> diff --git a/src/sync-mode.c b/src/sync-mode.c
> index e69ecfe..de58b8c 100644
> --- a/src/sync-mode.c
> +++ b/src/sync-mode.c
> @@ -31,6 +31,7 @@
>  #include "origin.h"
>  #include "internal.h"
>  #include "external.h"
> +#include "namespace.h"
>
>  #include <errno.h>
>  #include <unistd.h>
> @@ -451,7 +452,8 @@ static int init_sync(void)
>                         tx_queue_cb, NULL, STATE(fds)) == -1)
>                 return -1;
>
> -       STATE_SYNC(commit).h = nfct_open(CONFIG(netlink).subsys_id, 0);
> +       STATE_SYNC(commit).h = nfct_open_ns(CONFIG(netlink).subsys_id, 0,
> +                                                                               STATE(ns_fd));
>         if (STATE_SYNC(commit).h == NULL) {
>                 dlog(LOG_ERR, "can't create handler to commit");
>                 return -1;
> diff --git a/src/sync-notrack.c b/src/sync-notrack.c
> index a7df4e7..d8010fb 100644
> --- a/src/sync-notrack.c
> +++ b/src/sync-notrack.c
> @@ -24,6 +24,7 @@
>  #include "log.h"
>  #include "cache.h"
>  #include "fds.h"
> +#include "namespace.h"
>
>  #include <string.h>
>
> @@ -102,7 +103,7 @@ static void kernel_resync(void)
>         u_int32_t family = AF_UNSPEC;
>         int ret;
>
> -       h = nfct_open(CONFIG(netlink).subsys_id, 0);
> +       h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>         if (h == NULL) {
>                 dlog(LOG_ERR, "can't allocate memory for the internal cache");
>                 return;
> --
> 1.7.9.5
>
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso Sept. 6, 2012, 5:02 p.m. UTC | #2
Hi Ansis,

On Fri, Aug 31, 2012 at 06:11:55PM -0700, Ansis Atteka wrote:
> This patch allows conntrackd to open CT Netlink sockets into a given
> network namespace. Channel sockets (e.g. UDP) would still be opened into
> the same namespace where conntrackd was started.
> 
> The only binary this patch affects is conntrackd. All other binaries (e.g.
> conntrack, nfct) would still operate in the same namespace where they were
> started.
> 
> To make use of this patch:
> 1. create a network namespace: "ip netns add the_ns"
> 2. add "NetlinkNamespace /var/run/netns/the_ns" line to the conntrackd.conf
> file inside General {...} section.
> 
> Signed-off-by: Ansis Atteka <aatteka@nicira.com>
> ---
>  include/Makefile.am   |    2 +-
>  include/conntrackd.h  |    2 +
>  include/namespace.h   |   12 ++++++
>  src/Makefile.am       |    3 +-
>  src/cthelper.c        |    3 +-
>  src/ctnl.c            |   20 +++++++--
>  src/expect.c          |    4 +-
>  src/external_inject.c |    3 +-
>  src/internal_bypass.c |    5 ++-
>  src/namespace.c       |  112 +++++++++++++++++++++++++++++++++++++++++++++++++
>  src/netlink.c         |    6 ++-
>  src/read_config_lex.l |    1 +
>  src/read_config_yy.y  |    8 +++-
>  src/run.c             |    3 ++
>  src/sync-mode.c       |    4 +-
>  src/sync-notrack.c    |    3 +-
>  16 files changed, 175 insertions(+), 16 deletions(-)
>  create mode 100644 include/namespace.h
>  create mode 100644 src/namespace.c
> 
> diff --git a/include/Makefile.am b/include/Makefile.am
> index 6bd0f7f..e06fd4d 100644
> --- a/include/Makefile.am
> +++ b/include/Makefile.am
> @@ -6,5 +6,5 @@ noinst_HEADERS = alarm.h jhash.h cache.h linux_list.h linux_rbtree.h \
>  		 network.h filter.h queue.h vector.h cidr.h \
>  		 traffic_stats.h netlink.h fds.h event.h bitops.h channel.h \
>  		 process.h origin.h internal.h external.h date.h nfct.h \
> -		 helper.h myct.h stack.h
> +		 helper.h myct.h stack.h namespace.h
>  
> diff --git a/include/conntrackd.h b/include/conntrackd.h
> index 19e613c..c349d72 100644
> --- a/include/conntrackd.h
> +++ b/include/conntrackd.h
> @@ -94,6 +94,7 @@ struct ct_conf {
>  	int channel_type_global;
>  	struct channel_conf channel[MULTICHANNEL_MAX];
>  	struct local_conf local;	/* unix socket facilities */
> +	char netlink_namespace[FILENAME_MAXLEN];
>  	int nice;
>  	int limit;
>  	int refresh;
> @@ -143,6 +144,7 @@ struct ct_conf {
>  #define STATE(x) st.x
>  
>  struct ct_general_state {
> +	int					ns_fd;		/* namespace fd for NL sockets */

We use the same coding style of the Linux kernel. Please, break lines
at 80 chars.

>  	sigset_t 			block;
>  	FILE 				*log;
>  	FILE				*stats_log;
> diff --git a/include/namespace.h b/include/namespace.h
> new file mode 100644
> index 0000000..668a270
> --- /dev/null
> +++ b/include/namespace.h
> @@ -0,0 +1,12 @@
> +#ifndef _NAMESPACE_H_
> +#define _NAMESPACE_H_
> +
> +#include <libmnl/libmnl.h>
> +#include <libnetfilter_conntrack/libnetfilter_conntrack.h>
> +
> +void init_namespaces(void);
> +
> +struct nfct_handle *nfct_open_ns(u_int8_t, unsigned, int);
> +struct mnl_socket *mnl_socket_open_ns(int, int);
> +
> +#endif
> diff --git a/src/Makefile.am b/src/Makefile.am
> index d8074d2..60314ab 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -39,7 +39,8 @@ conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \
>  		    external_cache.c external_inject.c \
>  		    internal_cache.c internal_bypass.c \
>  		    read_config_yy.y read_config_lex.l \
> -		    stack.c helpers.c utils.c expect.c
> +		    stack.c helpers.c utils.c expect.c \
> +		    namespace.c
>  
>  # yacc and lex generate dirty code
>  read_config_yy.o read_config_lex.o: AM_CFLAGS += -Wno-missing-prototypes -Wno-missing-declarations -Wno-implicit-function-declaration -Wno-nested-externs -Wno-undef -Wno-redundant-decls
> diff --git a/src/cthelper.c b/src/cthelper.c
> index c119869..b165900 100644
> --- a/src/cthelper.c
> +++ b/src/cthelper.c
> @@ -22,6 +22,7 @@
>  #include "log.h"
>  #include "fds.h"
>  #include "helper.h"
> +#include "namespace.h"
>  
>  #include <unistd.h>
>  #include <fcntl.h>
> @@ -493,7 +494,7 @@ int cthelper_init(void)
>  		return -1;
>  	}
>  
> -	STATE_CTH(nl) = mnl_socket_open(NETLINK_NETFILTER);
> +	STATE_CTH(nl) = mnl_socket_open_ns(NETLINK_NETFILTER, STATE(ns_fd));
>  	if (STATE_CTH(nl) == NULL) {
>  		dlog(LOG_ERR, "cannot open nfq socket");
>  		return -1;
> diff --git a/src/ctnl.c b/src/ctnl.c
> index bb54727..65784c3 100644
> --- a/src/ctnl.c
> +++ b/src/ctnl.c
> @@ -29,6 +29,7 @@
>  #include "origin.h"
>  #include "date.h"
>  #include "internal.h"
> +#include "namespace.h"
>  
>  #include <errno.h>
>  #include <signal.h>
> @@ -399,6 +400,17 @@ static void poll_cb(void *data)
>  
>  int ctnl_init(void)
>  {
> +	if (CONFIG(netlink_namespace)[0]) {
> +		STATE(ns_fd) = open(CONFIG(netlink_namespace), O_RDONLY);
> +		if (STATE(ns_fd) == -1) {
> +			dlog(LOG_ERR, "could not open network namespace %s: %s",
> +				 CONFIG(netlink_namespace), strerror(errno));
> +			return -1;
> +		}
> +	} else {
> +		STATE(ns_fd) = -1;
> +	}
> +
>  	if (CONFIG(flags) & CTD_STATS_MODE)
>  		STATE(mode) = &stats_mode;
>  	else if (CONFIG(flags) & CTD_SYNC_MODE)
> @@ -417,7 +429,7 @@ int ctnl_init(void)
>  	}
>  
>  	/* resynchronize (like 'dump' socket) but it also purges old entries */
> -	STATE(resync) = nfct_open(CONFIG(netlink).subsys_id, 0);
> +	STATE(resync) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>  	if (STATE(resync)== NULL) {
>  		dlog(LOG_ERR, "can't open netlink handler: %s",
>  		     strerror(errno));
> @@ -438,7 +450,7 @@ int ctnl_init(void)
>  	fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK);
>  
>  	if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) {
> -		STATE(dump) = nfct_open(CONFIG(netlink).subsys_id, 0);
> +		STATE(dump) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>  		if (STATE(dump) == NULL) {
>  			dlog(LOG_ERR, "can't open netlink handler: %s",
>  			     strerror(errno));
> @@ -467,7 +479,7 @@ int ctnl_init(void)
>  		}
>  	}
>  
> -	STATE(get) = nfct_open(CONFIG(netlink).subsys_id, 0);
> +	STATE(get) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>  	if (STATE(get) == NULL) {
>  		dlog(LOG_ERR, "can't open netlink handler: %s",
>  		     strerror(errno));
> @@ -481,7 +493,7 @@ int ctnl_init(void)
>  					exp_get_handler, NULL);
>  	}
>  
> -	STATE(flush) = nfct_open(CONFIG(netlink).subsys_id, 0);
> +	STATE(flush) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>  	if (STATE(flush) == NULL) {
>  		dlog(LOG_ERR, "cannot open flusher handler");
>  		return -1;
> diff --git a/src/expect.c b/src/expect.c
> index 6069770..bedec2c 100644
> --- a/src/expect.c
> +++ b/src/expect.c
> @@ -8,7 +8,9 @@
>   * This code has been sponsored by Vyatta Inc. <http://www.vyatta.com>
>   */
>  
> +#include "conntrackd.h"
>  #include "helper.h"
> +#include "namespace.h"
>  
>  #include <stdio.h>
>  #include <string.h>
> @@ -165,7 +167,7 @@ static int cthelper_expect_cmd(struct nf_expect *exp, int cmd)
>  	int ret;
>  	struct nfct_handle *h;
>  
> -	h = nfct_open(EXPECT, 0);
> +	h = nfct_open_ns(EXPECT, 0, STATE(ns_fd));
>  	if (!h)
>  		return -1;
>  
> diff --git a/src/external_inject.c b/src/external_inject.c
> index 0ad3478..5a6680b 100644
> --- a/src/external_inject.c
> +++ b/src/external_inject.c
> @@ -22,6 +22,7 @@
>  #include "cache.h"
>  #include "origin.h"
>  #include "external.h"
> +#include "namespace.h"
>  #include "netlink.h"
>  
>  #include <libnetfilter_conntrack/libnetfilter_conntrack.h>
> @@ -42,7 +43,7 @@ struct {
>  static int external_inject_init(void)
>  {
>  	/* handler to directly inject conntracks into kernel-space */
> -	inject = nfct_open(CONFIG(netlink).subsys_id, 0);
> +	inject = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>  	if (inject == NULL) {
>  		dlog(LOG_ERR, "can't open netlink handler: %s",
>  		     strerror(errno));
> diff --git a/src/internal_bypass.c b/src/internal_bypass.c
> index 1194339..520c55d 100644
> --- a/src/internal_bypass.c
> +++ b/src/internal_bypass.c
> @@ -16,6 +16,7 @@
>  #include "netlink.h"
>  #include "network.h"
>  #include "origin.h"
> +#include "namespace.h"
>  
>  static int internal_bypass_init(void)
>  {
> @@ -52,7 +53,7 @@ static void internal_bypass_ct_dump(int fd, int type)
>  	u_int32_t family = AF_UNSPEC;
>  	int ret;
>  
> -	h = nfct_open(CONFIG(netlink).subsys_id, 0);
> +	h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>  	if (h == NULL) {
>  		dlog(LOG_ERR, "can't allocate memory for the internal cache");
>  		return;
> @@ -183,7 +184,7 @@ static void internal_bypass_exp_dump(int fd, int type)
>  	u_int32_t family = AF_UNSPEC;
>  	int ret;
>  
> -	h = nfct_open(CONFIG(netlink).subsys_id, 0);
> +	h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>  	if (h == NULL) {
>  		dlog(LOG_ERR, "can't allocate memory for the internal cache");
>  		return;
> diff --git a/src/namespace.c b/src/namespace.c
> new file mode 100644
> index 0000000..03db222
> --- /dev/null
> +++ b/src/namespace.c
> @@ -0,0 +1,112 @@
> +/*
> + * Copyright (C) 2012 Nicira, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
> + *
> + * Authors:
> + *     Ansis Atteka <aatteka@nicira.com>
> + */
> +#define _GNU_SOURCE
> +
> +#include "namespace.h"
> +
> +#include <fcntl.h>
> +#include <sched.h>
> +#include <sys/syscall.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <unistd.h>
> +
> +#include "conntrackd.h"
> +#include "log.h"
> +
> +#ifndef CLONE_NEWNET
> +#define CLONE_NEWNET 0x40000000
> +#endif
> +
> +#ifndef __NR_setns
> +#ifdef __x86_64
> +#define __NR_setns 308
> +#endif
> +#ifdef __i386
> +#define __NR_setns 346
> +#endif
> +#endif

I can see there's some setns wrapper since glibc 2.14 that will make
this portable.

What I would try is to check in configure.ac if setns() exists, if so
use it. Otherwise, check for __NR_setns, if so define setns yourself
in this new namespace.c file. If neither setns() nor __NR_setns
exists, then define setns() to return success and print display error
and exist if NetlinkNamespace is used.

> +
> +#ifdef __NR_setns
> +
> +static int root_fd = -1;

This root_fd should go to STATE(ns).root_fd:

struct ct_state {
...
        struct {
                int cur_fd;     /* current namespace */
                int root_fd;    /* root namespace */
        } ns;

> +
> +void init_namespaces(void) {
> +	root_fd = open("/proc/self/ns/net", O_RDONLY);
> +	if (root_fd == -1) {
> +		dlog(LOG_WARNING, "could not open current namespace");
> +	}
> +}
> +
> +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions,
> +								 int ns_fd) {
> +	struct nfct_handle *handle = NULL;
> +
> +	if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) {
> +		dlog(LOG_ERR, "could not switch between namespaces");
> +	} else {
> +		handle = nfct_open(subsys_id, subscriptions);
> +		if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) {
> +			dlog(LOG_ERR, "fatal: could not switch back to main namespace");
> +		}
> +	}

Oh, this code above is confusing, please make it more readable, like
this:

	if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) {
		dlog(LOG_ERR, "could not switch between namespaces");
                return NULL;
        }

	handle = nfct_open(subsys_id, subscriptions);
	if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) {
		dlog(LOG_ERR, "could not switch back to main namespace");
	}

And you encapsulate part of that code in some new function like:

int ctd_change_namespace(int fd)
{
	if (fd != -1)
                return 0;

        if (syscall(__NR_setns, fd, CLONE_NEWNET)) {
		dlog(LOG_ERR, "could not switch back to main namespace");
                return -1;
	}

        return 0;
}

That you can reuse over and over again.

> +	return handle;
> +}
> +
> +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) {
> +	struct mnl_socket *handle = NULL;
> +
> +	if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) {
> +		dlog(LOG_ERR, "could not switch between namespaces");
> +	} else {
> +		handle = mnl_socket_open(bus);
> +		if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) {
> +			dlog(LOG_ERR, "fatal: could not switch back to main namespace");
> +		}
> +	}
> +	return handle;

Same thing for this function above.

> +}
> +
> +#else
> +
> +void init_namespaces(void) {
> +}
> +
> +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions,
> +								 int ns_fd) {
> +	if (ns_fd == -1) {
> +		return nfct_open(subsys_id, subscriptions);
> +	} else {
> +		dlog(LOG_ERR, "network namespaces are not supported on this system");
> +		return NULL;
> +	}
> +}
> +
> +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) {
> +	if (ns_fd == -1) {
> +		return mnl_socket_open(bus);
> +	} else {
> +		dlog(LOG_ERR, "network namespaces are not supported on this system");
> +		return NULL;
> +	}
> +}

You can remove this conditional code above since the #else if you do
something like:

int ctd_change_namespace(int fd)
{
#ifdef SETNS_EXISTS
	if (fd != -1)
                return 0;

        if (syscall(__NR_setns, fd, CLONE_NEWNET)) {
		dlog(LOG_ERR, "could not switch back to main namespace");
                return -1;
	}
#endif
        return 0;
}

That's all by now.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso Sept. 6, 2012, 5:17 p.m. UTC | #3
Hi,

On Wed, Sep 05, 2012 at 06:36:29PM -0700, Ansis Atteka wrote:
> On Fri, Aug 31, 2012 at 6:11 PM, Ansis Atteka <aatteka@nicira.com> wrote:
> > This patch allows conntrackd to open CT Netlink sockets into a given
> > network namespace. Channel sockets (e.g. UDP) would still be opened into
> > the same namespace where conntrackd was started.
> >
> > The only binary this patch affects is conntrackd. All other binaries (e.g.
> > conntrack, nfct) would still operate in the same namespace where they were
> > started.
> >
> > To make use of this patch:
> > 1. create a network namespace: "ip netns add the_ns"
> > 2. add "NetlinkNamespace /var/run/netns/the_ns" line to the conntrackd.conf
> > file inside General {...} section.
> 
> Wanted to provide more details about this patch and also bump it up
> for attention.
> 
> Basically, what it does is allows conntrackd to open Conntrack Netlink
> sockets into a different namespace than where Channel Sockets were
> opened.

I see.

> This isolation brings benefits to:
> 1. security, because the channel socket (and management interface) will
> reside in a different namespace. They won't be exposed to the traffic
> that traverses the namespace;
> 2. flexibility, because arbitrary IP addresses could be used inside that
> namespace for Connection Tracking purposes. No need to worry that
> there might be overlapping IP addresses with the Management interface;

I don't understand this second benefit, could you develop the idea a
bit more?

> 3. scalability w.r.t. namespaces, because all the namespaces would
> end up using a single management interface and IP address in the root
> namespace. There wouldn't be need to maintain a dedicated
> management interface and IP address inside every namespace.
> 
> Also this patch would prepare soil for my next patches, that would
> ease connection state synchronization for virtualized networks even more:
> 1. allow single conntrackd instance to synchronize multiple namespaces;
> 2. add configuration dynamically to conntrackd (without restarting
> the daemon).

Those seem interesting to have, definitely.

My plan is to release conntrack-tools 1.4.0 by the time Linux kernel
3.6 is released since it contains a major milestone (user-space
helper support), that will be quite soon.

We can schedule this for some 1.6.0 release. So I can keep these
namespace changes in some branch until they are merged to master.

I'd start by point 2 then go back to the namespace support, but it's
up to you.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ansis Atteka Sept. 6, 2012, 8:33 p.m. UTC | #4
On Thu, Sep 6, 2012 at 10:17 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> Hi,
>
> On Wed, Sep 05, 2012 at 06:36:29PM -0700, Ansis Atteka wrote:
>> On Fri, Aug 31, 2012 at 6:11 PM, Ansis Atteka <aatteka@nicira.com> wrote:
>> > This patch allows conntrackd to open CT Netlink sockets into a given
>> > network namespace. Channel sockets (e.g. UDP) would still be opened into
>> > the same namespace where conntrackd was started.
>> >
>> > The only binary this patch affects is conntrackd. All other binaries (e.g.
>> > conntrack, nfct) would still operate in the same namespace where they were
>> > started.
>> >
>> > To make use of this patch:
>> > 1. create a network namespace: "ip netns add the_ns"
>> > 2. add "NetlinkNamespace /var/run/netns/the_ns" line to the conntrackd.conf
>> > file inside General {...} section.
>>
>> Wanted to provide more details about this patch and also bump it up
>> for attention.
>>
>> Basically, what it does is allows conntrackd to open Conntrack Netlink
>> sockets into a different namespace than where Channel Sockets were
>> opened.
>
> I see.
>
>> This isolation brings benefits to:
>> 1. security, because the channel socket (and management interface) will
>> reside in a different namespace. They won't be exposed to the traffic
>> that traverses the namespace;
>> 2. flexibility, because arbitrary IP addresses could be used inside that
>> namespace for Connection Tracking purposes. No need to worry that
>> there might be overlapping IP addresses with the Management interface;
>
> I don't understand this second benefit, could you develop the idea a
> bit more?
My point was that network administrator could assign arbitrary IP
addresses and routing rules inside the namespace, without worrying
that they might conflict with management IP address and/or routes.

I guess this is not that important in static setup where IP addresses
are configured only once and left the way they are.

>
>> 3. scalability w.r.t. namespaces, because all the namespaces would
>> end up using a single management interface and IP address in the root
>> namespace. There wouldn't be need to maintain a dedicated
>> management interface and IP address inside every namespace.
>>
>> Also this patch would prepare soil for my next patches, that would
>> ease connection state synchronization for virtualized networks even more:
>> 1. allow single conntrackd instance to synchronize multiple namespaces;
>> 2. add configuration dynamically to conntrackd (without restarting
>> the daemon).
>
> Those seem interesting to have, definitely.

>
> My plan is to release conntrack-tools 1.4.0 by the time Linux kernel
> 3.6 is released since it contains a major milestone (user-space
> helper support), that will be quite soon.
>
> We can schedule this for some 1.6.0 release. So I can keep these
> namespace changes in some branch until they are merged to master.
>
> I'd start by point 2 then go back to the namespace support, but it's
> up to you.

Yes, that seems to be the right sequence to do that.

Thanks,
Ansis
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ansis Atteka Sept. 10, 2012, 11:24 p.m. UTC | #5
On Thu, Sep 6, 2012 at 10:02 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> Hi Ansis,
>
> On Fri, Aug 31, 2012 at 06:11:55PM -0700, Ansis Atteka wrote:
>> This patch allows conntrackd to open CT Netlink sockets into a given
>> network namespace. Channel sockets (e.g. UDP) would still be opened into
>> the same namespace where conntrackd was started.
>>
>> The only binary this patch affects is conntrackd. All other binaries (e.g.
>> conntrack, nfct) would still operate in the same namespace where they were
>> started.
>>
>> To make use of this patch:
>> 1. create a network namespace: "ip netns add the_ns"
>> 2. add "NetlinkNamespace /var/run/netns/the_ns" line to the conntrackd.conf
>> file inside General {...} section.
>>
>> Signed-off-by: Ansis Atteka <aatteka@nicira.com>
>> ---
>>  include/Makefile.am   |    2 +-
>>  include/conntrackd.h  |    2 +
>>  include/namespace.h   |   12 ++++++
>>  src/Makefile.am       |    3 +-
>>  src/cthelper.c        |    3 +-
>>  src/ctnl.c            |   20 +++++++--
>>  src/expect.c          |    4 +-
>>  src/external_inject.c |    3 +-
>>  src/internal_bypass.c |    5 ++-
>>  src/namespace.c       |  112 +++++++++++++++++++++++++++++++++++++++++++++++++
>>  src/netlink.c         |    6 ++-
>>  src/read_config_lex.l |    1 +
>>  src/read_config_yy.y  |    8 +++-
>>  src/run.c             |    3 ++
>>  src/sync-mode.c       |    4 +-
>>  src/sync-notrack.c    |    3 +-
>>  16 files changed, 175 insertions(+), 16 deletions(-)
>>  create mode 100644 include/namespace.h
>>  create mode 100644 src/namespace.c
>>
>> diff --git a/include/Makefile.am b/include/Makefile.am
>> index 6bd0f7f..e06fd4d 100644
>> --- a/include/Makefile.am
>> +++ b/include/Makefile.am
>> @@ -6,5 +6,5 @@ noinst_HEADERS = alarm.h jhash.h cache.h linux_list.h linux_rbtree.h \
>>                network.h filter.h queue.h vector.h cidr.h \
>>                traffic_stats.h netlink.h fds.h event.h bitops.h channel.h \
>>                process.h origin.h internal.h external.h date.h nfct.h \
>> -              helper.h myct.h stack.h
>> +              helper.h myct.h stack.h namespace.h
>>
>> diff --git a/include/conntrackd.h b/include/conntrackd.h
>> index 19e613c..c349d72 100644
>> --- a/include/conntrackd.h
>> +++ b/include/conntrackd.h
>> @@ -94,6 +94,7 @@ struct ct_conf {
>>       int channel_type_global;
>>       struct channel_conf channel[MULTICHANNEL_MAX];
>>       struct local_conf local;        /* unix socket facilities */
>> +     char netlink_namespace[FILENAME_MAXLEN];
>>       int nice;
>>       int limit;
>>       int refresh;
>> @@ -143,6 +144,7 @@ struct ct_conf {
>>  #define STATE(x) st.x
>>
>>  struct ct_general_state {
>> +     int                                     ns_fd;          /* namespace fd for NL sockets */
>
> We use the same coding style of the Linux kernel. Please, break lines
> at 80 chars.
Ok. Forgot that tabs are treated as 8 characters instead of 4.
>
>>       sigset_t                        block;
>>       FILE                            *log;
>>       FILE                            *stats_log;
>> diff --git a/include/namespace.h b/include/namespace.h
>> new file mode 100644
>> index 0000000..668a270
>> --- /dev/null
>> +++ b/include/namespace.h
>> @@ -0,0 +1,12 @@
>> +#ifndef _NAMESPACE_H_
>> +#define _NAMESPACE_H_
>> +
>> +#include <libmnl/libmnl.h>
>> +#include <libnetfilter_conntrack/libnetfilter_conntrack.h>
>> +
>> +void init_namespaces(void);
>> +
>> +struct nfct_handle *nfct_open_ns(u_int8_t, unsigned, int);
>> +struct mnl_socket *mnl_socket_open_ns(int, int);
>> +
>> +#endif
>> diff --git a/src/Makefile.am b/src/Makefile.am
>> index d8074d2..60314ab 100644
>> --- a/src/Makefile.am
>> +++ b/src/Makefile.am
>> @@ -39,7 +39,8 @@ conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \
>>                   external_cache.c external_inject.c \
>>                   internal_cache.c internal_bypass.c \
>>                   read_config_yy.y read_config_lex.l \
>> -                 stack.c helpers.c utils.c expect.c
>> +                 stack.c helpers.c utils.c expect.c \
>> +                 namespace.c
>>
>>  # yacc and lex generate dirty code
>>  read_config_yy.o read_config_lex.o: AM_CFLAGS += -Wno-missing-prototypes -Wno-missing-declarations -Wno-implicit-function-declaration -Wno-nested-externs -Wno-undef -Wno-redundant-decls
>> diff --git a/src/cthelper.c b/src/cthelper.c
>> index c119869..b165900 100644
>> --- a/src/cthelper.c
>> +++ b/src/cthelper.c
>> @@ -22,6 +22,7 @@
>>  #include "log.h"
>>  #include "fds.h"
>>  #include "helper.h"
>> +#include "namespace.h"
>>
>>  #include <unistd.h>
>>  #include <fcntl.h>
>> @@ -493,7 +494,7 @@ int cthelper_init(void)
>>               return -1;
>>       }
>>
>> -     STATE_CTH(nl) = mnl_socket_open(NETLINK_NETFILTER);
>> +     STATE_CTH(nl) = mnl_socket_open_ns(NETLINK_NETFILTER, STATE(ns_fd));
>>       if (STATE_CTH(nl) == NULL) {
>>               dlog(LOG_ERR, "cannot open nfq socket");
>>               return -1;
>> diff --git a/src/ctnl.c b/src/ctnl.c
>> index bb54727..65784c3 100644
>> --- a/src/ctnl.c
>> +++ b/src/ctnl.c
>> @@ -29,6 +29,7 @@
>>  #include "origin.h"
>>  #include "date.h"
>>  #include "internal.h"
>> +#include "namespace.h"
>>
>>  #include <errno.h>
>>  #include <signal.h>
>> @@ -399,6 +400,17 @@ static void poll_cb(void *data)
>>
>>  int ctnl_init(void)
>>  {
>> +     if (CONFIG(netlink_namespace)[0]) {
>> +             STATE(ns_fd) = open(CONFIG(netlink_namespace), O_RDONLY);
>> +             if (STATE(ns_fd) == -1) {
>> +                     dlog(LOG_ERR, "could not open network namespace %s: %s",
>> +                              CONFIG(netlink_namespace), strerror(errno));
>> +                     return -1;
>> +             }
>> +     } else {
>> +             STATE(ns_fd) = -1;
>> +     }
>> +
>>       if (CONFIG(flags) & CTD_STATS_MODE)
>>               STATE(mode) = &stats_mode;
>>       else if (CONFIG(flags) & CTD_SYNC_MODE)
>> @@ -417,7 +429,7 @@ int ctnl_init(void)
>>       }
>>
>>       /* resynchronize (like 'dump' socket) but it also purges old entries */
>> -     STATE(resync) = nfct_open(CONFIG(netlink).subsys_id, 0);
>> +     STATE(resync) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>>       if (STATE(resync)== NULL) {
>>               dlog(LOG_ERR, "can't open netlink handler: %s",
>>                    strerror(errno));
>> @@ -438,7 +450,7 @@ int ctnl_init(void)
>>       fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK);
>>
>>       if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) {
>> -             STATE(dump) = nfct_open(CONFIG(netlink).subsys_id, 0);
>> +             STATE(dump) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>>               if (STATE(dump) == NULL) {
>>                       dlog(LOG_ERR, "can't open netlink handler: %s",
>>                            strerror(errno));
>> @@ -467,7 +479,7 @@ int ctnl_init(void)
>>               }
>>       }
>>
>> -     STATE(get) = nfct_open(CONFIG(netlink).subsys_id, 0);
>> +     STATE(get) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>>       if (STATE(get) == NULL) {
>>               dlog(LOG_ERR, "can't open netlink handler: %s",
>>                    strerror(errno));
>> @@ -481,7 +493,7 @@ int ctnl_init(void)
>>                                       exp_get_handler, NULL);
>>       }
>>
>> -     STATE(flush) = nfct_open(CONFIG(netlink).subsys_id, 0);
>> +     STATE(flush) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>>       if (STATE(flush) == NULL) {
>>               dlog(LOG_ERR, "cannot open flusher handler");
>>               return -1;
>> diff --git a/src/expect.c b/src/expect.c
>> index 6069770..bedec2c 100644
>> --- a/src/expect.c
>> +++ b/src/expect.c
>> @@ -8,7 +8,9 @@
>>   * This code has been sponsored by Vyatta Inc. <http://www.vyatta.com>
>>   */
>>
>> +#include "conntrackd.h"
>>  #include "helper.h"
>> +#include "namespace.h"
>>
>>  #include <stdio.h>
>>  #include <string.h>
>> @@ -165,7 +167,7 @@ static int cthelper_expect_cmd(struct nf_expect *exp, int cmd)
>>       int ret;
>>       struct nfct_handle *h;
>>
>> -     h = nfct_open(EXPECT, 0);
>> +     h = nfct_open_ns(EXPECT, 0, STATE(ns_fd));
>>       if (!h)
>>               return -1;
>>
>> diff --git a/src/external_inject.c b/src/external_inject.c
>> index 0ad3478..5a6680b 100644
>> --- a/src/external_inject.c
>> +++ b/src/external_inject.c
>> @@ -22,6 +22,7 @@
>>  #include "cache.h"
>>  #include "origin.h"
>>  #include "external.h"
>> +#include "namespace.h"
>>  #include "netlink.h"
>>
>>  #include <libnetfilter_conntrack/libnetfilter_conntrack.h>
>> @@ -42,7 +43,7 @@ struct {
>>  static int external_inject_init(void)
>>  {
>>       /* handler to directly inject conntracks into kernel-space */
>> -     inject = nfct_open(CONFIG(netlink).subsys_id, 0);
>> +     inject = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>>       if (inject == NULL) {
>>               dlog(LOG_ERR, "can't open netlink handler: %s",
>>                    strerror(errno));
>> diff --git a/src/internal_bypass.c b/src/internal_bypass.c
>> index 1194339..520c55d 100644
>> --- a/src/internal_bypass.c
>> +++ b/src/internal_bypass.c
>> @@ -16,6 +16,7 @@
>>  #include "netlink.h"
>>  #include "network.h"
>>  #include "origin.h"
>> +#include "namespace.h"
>>
>>  static int internal_bypass_init(void)
>>  {
>> @@ -52,7 +53,7 @@ static void internal_bypass_ct_dump(int fd, int type)
>>       u_int32_t family = AF_UNSPEC;
>>       int ret;
>>
>> -     h = nfct_open(CONFIG(netlink).subsys_id, 0);
>> +     h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>>       if (h == NULL) {
>>               dlog(LOG_ERR, "can't allocate memory for the internal cache");
>>               return;
>> @@ -183,7 +184,7 @@ static void internal_bypass_exp_dump(int fd, int type)
>>       u_int32_t family = AF_UNSPEC;
>>       int ret;
>>
>> -     h = nfct_open(CONFIG(netlink).subsys_id, 0);
>> +     h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>>       if (h == NULL) {
>>               dlog(LOG_ERR, "can't allocate memory for the internal cache");
>>               return;
>> diff --git a/src/namespace.c b/src/namespace.c
>> new file mode 100644
>> index 0000000..03db222
>> --- /dev/null
>> +++ b/src/namespace.c
>> @@ -0,0 +1,112 @@
>> +/*
>> + * Copyright (C) 2012 Nicira, Inc.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
>> + *
>> + * Authors:
>> + *     Ansis Atteka <aatteka@nicira.com>
>> + */
>> +#define _GNU_SOURCE
>> +
>> +#include "namespace.h"
>> +
>> +#include <fcntl.h>
>> +#include <sched.h>
>> +#include <sys/syscall.h>
>> +#include <sys/types.h>
>> +#include <sys/stat.h>
>> +#include <unistd.h>
>> +
>> +#include "conntrackd.h"
>> +#include "log.h"
>> +
>> +#ifndef CLONE_NEWNET
>> +#define CLONE_NEWNET 0x40000000
>> +#endif
>> +
>> +#ifndef __NR_setns
>> +#ifdef __x86_64
>> +#define __NR_setns 308
>> +#endif
>> +#ifdef __i386
>> +#define __NR_setns 346
>> +#endif
>> +#endif
>
> I can see there's some setns wrapper since glibc 2.14 that will make
> this portable.
ok
>
> What I would try is to check in configure.ac if setns() exists, if so
> use it. Otherwise, check for __NR_setns, if so define setns yourself
> in this new namespace.c file. If neither setns() nor __NR_setns
> exists, then define setns() to return success and print display error
> and exist if NetlinkNamespace is used.
ok
>
>> +
>> +#ifdef __NR_setns
>> +
>> +static int root_fd = -1;
>
> This root_fd should go to STATE(ns).root_fd:
Right, it makes more sense to put it inside ct_state for
now.

Once conntrackd will become multi-namespace
aware, we will have to move it somewhere else,
because all the namespaces(i.e. ct_states) will
use the same root_fd.

>
> struct ct_state {
> ...
>         struct {
>                 int cur_fd;     /* current namespace */
>                 int root_fd;    /* root namespace */
>         } ns;
>
>> +
>> +void init_namespaces(void) {
>> +     root_fd = open("/proc/self/ns/net", O_RDONLY);
>> +     if (root_fd == -1) {
>> +             dlog(LOG_WARNING, "could not open current namespace");
>> +     }
>> +}
>> +
>> +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions,
>> +                                                              int ns_fd) {
>> +     struct nfct_handle *handle = NULL;
>> +
>> +     if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) {
>> +             dlog(LOG_ERR, "could not switch between namespaces");
>> +     } else {
>> +             handle = nfct_open(subsys_id, subscriptions);
>> +             if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) {
>> +                     dlog(LOG_ERR, "fatal: could not switch back to main namespace");
>> +             }
>> +     }
>
> Oh, this code above is confusing, please make it more readable, like
> this:
>
>         if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) {
>                 dlog(LOG_ERR, "could not switch between namespaces");
>                 return NULL;
>         }
>
>         handle = nfct_open(subsys_id, subscriptions);
>         if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) {
>                 dlog(LOG_ERR, "could not switch back to main namespace");
>         }
>
> And you encapsulate part of that code in some new function like:
>
> int ctd_change_namespace(int fd)
> {
>         if (fd != -1)
>                 return 0;
>
>         if (syscall(__NR_setns, fd, CLONE_NEWNET)) {
>                 dlog(LOG_ERR, "could not switch back to main namespace");
>                 return -1;
>         }
>
>         return 0;
> }
>
> That you can reuse over and over again.
ok
>
>> +     return handle;
>> +}
>> +
>> +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) {
>> +     struct mnl_socket *handle = NULL;
>> +
>> +     if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) {
>> +             dlog(LOG_ERR, "could not switch between namespaces");
>> +     } else {
>> +             handle = mnl_socket_open(bus);
>> +             if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) {
>> +                     dlog(LOG_ERR, "fatal: could not switch back to main namespace");
>> +             }
>> +     }
>> +     return handle;
>
> Same thing for this function above.
ok
>
>> +}
>> +
>> +#else
>> +
>> +void init_namespaces(void) {
>> +}
>> +
>> +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions,
>> +                                                              int ns_fd) {
>> +     if (ns_fd == -1) {
>> +             return nfct_open(subsys_id, subscriptions);
>> +     } else {
>> +             dlog(LOG_ERR, "network namespaces are not supported on this system");
>> +             return NULL;
>> +     }
>> +}
>> +
>> +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) {
>> +     if (ns_fd == -1) {
>> +             return mnl_socket_open(bus);
>> +     } else {
>> +             dlog(LOG_ERR, "network namespaces are not supported on this system");
>> +             return NULL;
>> +     }
>> +}
>
> You can remove this conditional code above since the #else if you do
> something like:
>
> int ctd_change_namespace(int fd)
> {
> #ifdef SETNS_EXISTS
>         if (fd != -1)
>                 return 0;
>
>         if (syscall(__NR_setns, fd, CLONE_NEWNET)) {
>                 dlog(LOG_ERR, "could not switch back to main namespace");
>                 return -1;
>         }
> #endif
>         return 0;
> }
>
> That's all by now.

Thanks for review! I will send updated patch soon.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso Sept. 11, 2012, 3:44 p.m. UTC | #6
On Mon, Sep 10, 2012 at 04:24:49PM -0700, Ansis Atteka wrote:
[...]
> >
> >> +
> >> +#ifdef __NR_setns
> >> +
> >> +static int root_fd = -1;
> >
> > This root_fd should go to STATE(ns).root_fd:
>
> Right, it makes more sense to put it inside ct_state for
> now.
> 
> Once conntrackd will become multi-namespace
> aware, we will have to move it somewhere else,
> because all the namespaces(i.e. ct_states) will
> use the same root_fd.

Can you develop a bit more the change that should happen in your
opinion) to conntrackd to support multi-namespace? I'd like to get an
idea before that patch lands on the mailing list.

[...]
> Thanks for review! I will send updated patch soon.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ansis Atteka Sept. 13, 2012, 7:37 a.m. UTC | #7
On Tue, Sep 11, 2012 at 8:44 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Mon, Sep 10, 2012 at 04:24:49PM -0700, Ansis Atteka wrote:
> [...]
>> >
>> >> +
>> >> +#ifdef __NR_setns
>> >> +
>> >> +static int root_fd = -1;
>> >
>> > This root_fd should go to STATE(ns).root_fd:
>>
>> Right, it makes more sense to put it inside ct_state for
>> now.
>>
>> Once conntrackd will become multi-namespace
>> aware, we will have to move it somewhere else,
>> because all the namespaces(i.e. ct_states) will
>> use the same root_fd.
>
> Can you develop a bit more the change that should happen in your
> opinion) to conntrackd to support multi-namespace? I'd like to get an
> idea before that patch lands on the mailing list.
I know that this would be a massive change, so I might miss
something in the proposal. Anyway feel free to correct me or ask for
more details where necessary. Here is a list of necessary changes:

1. Quite a lot of refactoring in configuration parser.

I would suggest to split ct_conf struct into three logical pieces (i.e. smaller
structs):
a. channel config (instance per remote conntrackd instance)
b. conntrack-state config (instance per namespace)
c. static/global config (single instance; Would contain path to log
file, unix socket e.t.c.)

This decoupling would allow much more easily to maintain relation
between conntrack-states and channels (for example, when multiple
namespaces are using the same channel).

Also, currently CONFIG(X) macro allows to reference only a single ct_conf
instance. This will need to be changed so that each namespace has its own
ct_conf_sync instance. And each channel has its own ct_conf_channel instance.

Also, I am afraid that, for this to make more sense, I would have to extend
conntrackd.conf syntax, For example,introduce following top-level sections:
channel {}, sync {} and global {} respectively.

2. Allow to add, remove and list configuration dynamically without
restarting conntrackd. This would require ability to start conntrackd
with minimal global/static configuration. After that add namespaces and
channel configuration by using Unix Domain socket.

For example, instead of starting conntrackd with following command:
conntrackd -C /etc/conntrackd/conntrackd.conf

One would have to use something like this:
conntrackd --global-config /etc/conntrackd/conntrackd_global.conf #
This config file would just specify Unix socket, log file e.t.c.
and then on-the-fly add channel and namespace configuration with:
conntrackd -U /var/run/conntrackd.ctl --add
/etc/conntrackd/conntrackd_namespace1.conf
conntrackd -U /var/run/conntrackd.ctl --add
/etc/conntrackd/conntrackd_channel1.conf
conntrackd -U /var/run/conntrackd.ctl --add
/etc/conntrackd/conntrackd_namespace2.conf
conntrackd -U /var/run/conntrackd.ctl --add
/etc/conntrackd/conntrackd_channel2.conf

We could glue namespaces together with channels by using some IDs.

Another question is, whether over the Unix domain socket we would prefer to
transfer binary (already parsed) or textual (yet unparsed) configuration?

Also, I would need to figure out if/how to maintain backward compatibility with
already existing commands, when there are multiple namespaces (e.g. dump
internal cache, commit external cache ...).

3. Wire protocol format improvements, so that namespace ID would be encapsulated
into the protocol too. This is required, when same channel is being
used by multiple
namespaces.

4. Similarly as CONFIG(x) was broken down into 3 logical pieces, the
same thing would
need to be done for STATE(x) macros.


Thanks,
Ansis
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso Sept. 18, 2012, 7:23 p.m. UTC | #8
On Thu, Sep 13, 2012 at 12:37:18AM -0700, Ansis Atteka wrote:
[...]
> I know that this would be a massive change, so I might miss
> something in the proposal. Anyway feel free to correct me or ask for
> more details where necessary. Here is a list of necessary changes:
> 
> 1. Quite a lot of refactoring in configuration parser.
> 
> I would suggest to split ct_conf struct into three logical pieces (i.e. smaller
> structs):
> a. channel config (instance per remote conntrackd instance)
> b. conntrack-state config (instance per namespace)
> c. static/global config (single instance; Would contain path to log
> file, unix socket e.t.c.)

I always wanted to clean up ct_conf. See patch attached, I didn't
commit it yet since I want to give it some test (Please, not need to
rebase the patch we're currently discussing upon it).

> This decoupling would allow much more easily to maintain relation
> between conntrack-states and channels (for example, when multiple
> namespaces are using the same channel).

I'm not familiar with the channel definition you're using.

Note that conntrackd already uses the name `channel' for the state-sync
link abstraction (ie. TCP / UDP / MCAST / TIPC ...).

> Also, currently CONFIG(X) macro allows to reference only a single ct_conf
> instance. This will need to be changed so that each namespace has its own
> ct_conf_sync instance. And each channel has its own ct_conf_channel instance.
> 
> Also, I am afraid that, for this to make more sense, I would have to extend
> conntrackd.conf syntax, For example,introduce following top-level sections:
> channel {}, sync {} and global {} respectively.
> 
> 2. Allow to add, remove and list configuration dynamically without
> restarting conntrackd. This would require ability to start conntrackd
> with minimal global/static configuration. After that add namespaces and
> channel configuration by using Unix Domain socket.
> 
> For example, instead of starting conntrackd with following command:
> conntrackd -C /etc/conntrackd/conntrackd.conf
> 
> One would have to use something like this:
> conntrackd --global-config /etc/conntrackd/conntrackd_global.conf #
> This config file would just specify Unix socket, log file e.t.c.
> and then on-the-fly add channel and namespace configuration with:
> conntrackd -U /var/run/conntrackd.ctl --add
> /etc/conntrackd/conntrackd_namespace1.conf
> conntrackd -U /var/run/conntrackd.ctl --add
> /etc/conntrackd/conntrackd_channel1.conf
> conntrackd -U /var/run/conntrackd.ctl --add
> /etc/conntrackd/conntrackd_namespace2.conf
> conntrackd -U /var/run/conntrackd.ctl --add
> /etc/conntrackd/conntrackd_channel2.conf
> 
> We could glue namespaces together with channels by using some IDs.
> 
> Another question is, whether over the Unix domain socket we would prefer to
> transfer binary (already parsed) or textual (yet unparsed) configuration?
> 
> Also, I would need to figure out if/how to maintain backward compatibility with
> already existing commands, when there are multiple namespaces (e.g. dump
> internal cache, commit external cache ...).
> 
> 3. Wire protocol format improvements, so that namespace ID would be encapsulated
> into the protocol too. This is required, when same channel is being
> used by multiple namespaces.

This only requires adding one new TLV attribute to identify this. So
we don't need to bloat the header with one field that is not use.

> 4. Similarly as CONFIG(x) was broken down into 3 logical pieces, the
> same thing would
> need to be done for STATE(x) macros.

This seems to be a huge changeset what you're proposing.

I need some convincing architecture example that describes how this
can be used before you submit such a big patch. I need to understand
it to know if there is a different way to make this.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ansis Atteka Sept. 18, 2012, 10:36 p.m. UTC | #9
On Tue, Sep 18, 2012 at 12:23 PM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Thu, Sep 13, 2012 at 12:37:18AM -0700, Ansis Atteka wrote:
> [...]
>> I know that this would be a massive change, so I might miss
>> something in the proposal. Anyway feel free to correct me or ask for
>> more details where necessary. Here is a list of necessary changes:
>>
>> 1. Quite a lot of refactoring in configuration parser.
>>
>> I would suggest to split ct_conf struct into three logical pieces (i.e. smaller
>> structs):
>> a. channel config (instance per remote conntrackd instance)
>> b. conntrack-state config (instance per namespace)
>> c. static/global config (single instance; Would contain path to log
>> file, unix socket e.t.c.)
>
> I always wanted to clean up ct_conf. See patch attached, I didn't
> commit it yet since I want to give it some test (Please, not need to
> rebase the patch we're currently discussing upon it).
I don't see any patches attached.
>
>> This decoupling would allow much more easily to maintain relation
>> between conntrack-states and channels (for example, when multiple
>> namespaces are using the same channel).
>
> I'm not familiar with the channel definition you're using.
By "channel" I meant channel & channel_ops structures.
>
> Note that conntrackd already uses the name `channel' for the state-sync
> link abstraction (ie. TCP / UDP / MCAST / TIPC ...).
I guess we are actually talking about the same thing.
>
>> Also, currently CONFIG(X) macro allows to reference only a single ct_conf
>> instance. This will need to be changed so that each namespace has its own
>> ct_conf_sync instance. And each channel has its own ct_conf_channel instance.
>>
>> Also, I am afraid that, for this to make more sense, I would have to extend
>> conntrackd.conf syntax, For example,introduce following top-level sections:
>> channel {}, sync {} and global {} respectively.
>>
>> 2. Allow to add, remove and list configuration dynamically without
>> restarting conntrackd. This would require ability to start conntrackd
>> with minimal global/static configuration. After that add namespaces and
>> channel configuration by using Unix Domain socket.
>>
>> For example, instead of starting conntrackd with following command:
>> conntrackd -C /etc/conntrackd/conntrackd.conf
>>
>> One would have to use something like this:
>> conntrackd --global-config /etc/conntrackd/conntrackd_global.conf #
>> This config file would just specify Unix socket, log file e.t.c.
>> and then on-the-fly add channel and namespace configuration with:
>> conntrackd -U /var/run/conntrackd.ctl --add
>> /etc/conntrackd/conntrackd_namespace1.conf
>> conntrackd -U /var/run/conntrackd.ctl --add
>> /etc/conntrackd/conntrackd_channel1.conf
>> conntrackd -U /var/run/conntrackd.ctl --add
>> /etc/conntrackd/conntrackd_namespace2.conf
>> conntrackd -U /var/run/conntrackd.ctl --add
>> /etc/conntrackd/conntrackd_channel2.conf
>>
>> We could glue namespaces together with channels by using some IDs.
>>
>> Another question is, whether over the Unix domain socket we would prefer to
>> transfer binary (already parsed) or textual (yet unparsed) configuration?
>>
>> Also, I would need to figure out if/how to maintain backward compatibility with
>> already existing commands, when there are multiple namespaces (e.g. dump
>> internal cache, commit external cache ...).
>>
>> 3. Wire protocol format improvements, so that namespace ID would be encapsulated
>> into the protocol too. This is required, when same channel is being
>> used by multiple namespaces.
>
> This only requires adding one new TLV attribute to identify this. So
> we don't need to bloat the header with one field that is not use.
Here is how I am currently doing that:

struct nethdr {
#if __BYTE_ORDER == __LITTLE_ENDIAN
	uint8_t type:4,
		version:4;
#elif __BYTE_ORDER == __BIG_ENDIAN
	uint8_t version:4,
		type:4;
#else
#error  "Unknown system endianess!"
#endif
	uint8_t flags;
	uint16_t len;
	uint32_t seq;
	uint32_t nsid; < -present only if nethdr.flag &
};
nsid is the namespace id. This field would be present only
if nethdr.flags & NET_F_NAMESPACE != 0.

Could you please elaborate more on your suggested approach? If I
understand it correctly, then you want to add the namespace id
inside the data section that follows immediately after the nethdr
structure as TLV variable?

Also, if I understand correctly, then after nethdr conntrackd
simply transfers nf_conntrack struct (or the contents of cache
object). I have had imagined that we should have cache table
per namespace, so storing namespace id inside nf_conntrackd
would become redundant.

What is your take on this?
>
>> 4. Similarly as CONFIG(x) was broken down into 3 logical pieces, the
>> same thing would
>> need to be done for STATE(x) macros.
>
> This seems to be a huge changeset what you're proposing.
>
> I need some convincing architecture example that describes how this
> can be used before you submit such a big patch. I need to understand
> it to know if there is a different way to make this.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/Makefile.am b/include/Makefile.am
index 6bd0f7f..e06fd4d 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -6,5 +6,5 @@  noinst_HEADERS = alarm.h jhash.h cache.h linux_list.h linux_rbtree.h \
 		 network.h filter.h queue.h vector.h cidr.h \
 		 traffic_stats.h netlink.h fds.h event.h bitops.h channel.h \
 		 process.h origin.h internal.h external.h date.h nfct.h \
-		 helper.h myct.h stack.h
+		 helper.h myct.h stack.h namespace.h
 
diff --git a/include/conntrackd.h b/include/conntrackd.h
index 19e613c..c349d72 100644
--- a/include/conntrackd.h
+++ b/include/conntrackd.h
@@ -94,6 +94,7 @@  struct ct_conf {
 	int channel_type_global;
 	struct channel_conf channel[MULTICHANNEL_MAX];
 	struct local_conf local;	/* unix socket facilities */
+	char netlink_namespace[FILENAME_MAXLEN];
 	int nice;
 	int limit;
 	int refresh;
@@ -143,6 +144,7 @@  struct ct_conf {
 #define STATE(x) st.x
 
 struct ct_general_state {
+	int					ns_fd;		/* namespace fd for NL sockets */
 	sigset_t 			block;
 	FILE 				*log;
 	FILE				*stats_log;
diff --git a/include/namespace.h b/include/namespace.h
new file mode 100644
index 0000000..668a270
--- /dev/null
+++ b/include/namespace.h
@@ -0,0 +1,12 @@ 
+#ifndef _NAMESPACE_H_
+#define _NAMESPACE_H_
+
+#include <libmnl/libmnl.h>
+#include <libnetfilter_conntrack/libnetfilter_conntrack.h>
+
+void init_namespaces(void);
+
+struct nfct_handle *nfct_open_ns(u_int8_t, unsigned, int);
+struct mnl_socket *mnl_socket_open_ns(int, int);
+
+#endif
diff --git a/src/Makefile.am b/src/Makefile.am
index d8074d2..60314ab 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -39,7 +39,8 @@  conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \
 		    external_cache.c external_inject.c \
 		    internal_cache.c internal_bypass.c \
 		    read_config_yy.y read_config_lex.l \
-		    stack.c helpers.c utils.c expect.c
+		    stack.c helpers.c utils.c expect.c \
+		    namespace.c
 
 # yacc and lex generate dirty code
 read_config_yy.o read_config_lex.o: AM_CFLAGS += -Wno-missing-prototypes -Wno-missing-declarations -Wno-implicit-function-declaration -Wno-nested-externs -Wno-undef -Wno-redundant-decls
diff --git a/src/cthelper.c b/src/cthelper.c
index c119869..b165900 100644
--- a/src/cthelper.c
+++ b/src/cthelper.c
@@ -22,6 +22,7 @@ 
 #include "log.h"
 #include "fds.h"
 #include "helper.h"
+#include "namespace.h"
 
 #include <unistd.h>
 #include <fcntl.h>
@@ -493,7 +494,7 @@  int cthelper_init(void)
 		return -1;
 	}
 
-	STATE_CTH(nl) = mnl_socket_open(NETLINK_NETFILTER);
+	STATE_CTH(nl) = mnl_socket_open_ns(NETLINK_NETFILTER, STATE(ns_fd));
 	if (STATE_CTH(nl) == NULL) {
 		dlog(LOG_ERR, "cannot open nfq socket");
 		return -1;
diff --git a/src/ctnl.c b/src/ctnl.c
index bb54727..65784c3 100644
--- a/src/ctnl.c
+++ b/src/ctnl.c
@@ -29,6 +29,7 @@ 
 #include "origin.h"
 #include "date.h"
 #include "internal.h"
+#include "namespace.h"
 
 #include <errno.h>
 #include <signal.h>
@@ -399,6 +400,17 @@  static void poll_cb(void *data)
 
 int ctnl_init(void)
 {
+	if (CONFIG(netlink_namespace)[0]) {
+		STATE(ns_fd) = open(CONFIG(netlink_namespace), O_RDONLY);
+		if (STATE(ns_fd) == -1) {
+			dlog(LOG_ERR, "could not open network namespace %s: %s",
+				 CONFIG(netlink_namespace), strerror(errno));
+			return -1;
+		}
+	} else {
+		STATE(ns_fd) = -1;
+	}
+
 	if (CONFIG(flags) & CTD_STATS_MODE)
 		STATE(mode) = &stats_mode;
 	else if (CONFIG(flags) & CTD_SYNC_MODE)
@@ -417,7 +429,7 @@  int ctnl_init(void)
 	}
 
 	/* resynchronize (like 'dump' socket) but it also purges old entries */
-	STATE(resync) = nfct_open(CONFIG(netlink).subsys_id, 0);
+	STATE(resync) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
 	if (STATE(resync)== NULL) {
 		dlog(LOG_ERR, "can't open netlink handler: %s",
 		     strerror(errno));
@@ -438,7 +450,7 @@  int ctnl_init(void)
 	fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK);
 
 	if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) {
-		STATE(dump) = nfct_open(CONFIG(netlink).subsys_id, 0);
+		STATE(dump) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
 		if (STATE(dump) == NULL) {
 			dlog(LOG_ERR, "can't open netlink handler: %s",
 			     strerror(errno));
@@ -467,7 +479,7 @@  int ctnl_init(void)
 		}
 	}
 
-	STATE(get) = nfct_open(CONFIG(netlink).subsys_id, 0);
+	STATE(get) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
 	if (STATE(get) == NULL) {
 		dlog(LOG_ERR, "can't open netlink handler: %s",
 		     strerror(errno));
@@ -481,7 +493,7 @@  int ctnl_init(void)
 					exp_get_handler, NULL);
 	}
 
-	STATE(flush) = nfct_open(CONFIG(netlink).subsys_id, 0);
+	STATE(flush) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
 	if (STATE(flush) == NULL) {
 		dlog(LOG_ERR, "cannot open flusher handler");
 		return -1;
diff --git a/src/expect.c b/src/expect.c
index 6069770..bedec2c 100644
--- a/src/expect.c
+++ b/src/expect.c
@@ -8,7 +8,9 @@ 
  * This code has been sponsored by Vyatta Inc. <http://www.vyatta.com>
  */
 
+#include "conntrackd.h"
 #include "helper.h"
+#include "namespace.h"
 
 #include <stdio.h>
 #include <string.h>
@@ -165,7 +167,7 @@  static int cthelper_expect_cmd(struct nf_expect *exp, int cmd)
 	int ret;
 	struct nfct_handle *h;
 
-	h = nfct_open(EXPECT, 0);
+	h = nfct_open_ns(EXPECT, 0, STATE(ns_fd));
 	if (!h)
 		return -1;
 
diff --git a/src/external_inject.c b/src/external_inject.c
index 0ad3478..5a6680b 100644
--- a/src/external_inject.c
+++ b/src/external_inject.c
@@ -22,6 +22,7 @@ 
 #include "cache.h"
 #include "origin.h"
 #include "external.h"
+#include "namespace.h"
 #include "netlink.h"
 
 #include <libnetfilter_conntrack/libnetfilter_conntrack.h>
@@ -42,7 +43,7 @@  struct {
 static int external_inject_init(void)
 {
 	/* handler to directly inject conntracks into kernel-space */
-	inject = nfct_open(CONFIG(netlink).subsys_id, 0);
+	inject = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
 	if (inject == NULL) {
 		dlog(LOG_ERR, "can't open netlink handler: %s",
 		     strerror(errno));
diff --git a/src/internal_bypass.c b/src/internal_bypass.c
index 1194339..520c55d 100644
--- a/src/internal_bypass.c
+++ b/src/internal_bypass.c
@@ -16,6 +16,7 @@ 
 #include "netlink.h"
 #include "network.h"
 #include "origin.h"
+#include "namespace.h"
 
 static int internal_bypass_init(void)
 {
@@ -52,7 +53,7 @@  static void internal_bypass_ct_dump(int fd, int type)
 	u_int32_t family = AF_UNSPEC;
 	int ret;
 
-	h = nfct_open(CONFIG(netlink).subsys_id, 0);
+	h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
 	if (h == NULL) {
 		dlog(LOG_ERR, "can't allocate memory for the internal cache");
 		return;
@@ -183,7 +184,7 @@  static void internal_bypass_exp_dump(int fd, int type)
 	u_int32_t family = AF_UNSPEC;
 	int ret;
 
-	h = nfct_open(CONFIG(netlink).subsys_id, 0);
+	h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
 	if (h == NULL) {
 		dlog(LOG_ERR, "can't allocate memory for the internal cache");
 		return;
diff --git a/src/namespace.c b/src/namespace.c
new file mode 100644
index 0000000..03db222
--- /dev/null
+++ b/src/namespace.c
@@ -0,0 +1,112 @@ 
+/*
+ * Copyright (C) 2012 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Authors:
+ *     Ansis Atteka <aatteka@nicira.com>
+ */
+#define _GNU_SOURCE
+
+#include "namespace.h"
+
+#include <fcntl.h>
+#include <sched.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "conntrackd.h"
+#include "log.h"
+
+#ifndef CLONE_NEWNET
+#define CLONE_NEWNET 0x40000000
+#endif
+
+#ifndef __NR_setns
+#ifdef __x86_64
+#define __NR_setns 308
+#endif
+#ifdef __i386
+#define __NR_setns 346
+#endif
+#endif
+
+#ifdef __NR_setns
+
+static int root_fd = -1;
+
+void init_namespaces(void) {
+	root_fd = open("/proc/self/ns/net", O_RDONLY);
+	if (root_fd == -1) {
+		dlog(LOG_WARNING, "could not open current namespace");
+	}
+}
+
+struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions,
+								 int ns_fd) {
+	struct nfct_handle *handle = NULL;
+
+	if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) {
+		dlog(LOG_ERR, "could not switch between namespaces");
+	} else {
+		handle = nfct_open(subsys_id, subscriptions);
+		if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) {
+			dlog(LOG_ERR, "fatal: could not switch back to main namespace");
+		}
+	}
+	return handle;
+}
+
+struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) {
+	struct mnl_socket *handle = NULL;
+
+	if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) {
+		dlog(LOG_ERR, "could not switch between namespaces");
+	} else {
+		handle = mnl_socket_open(bus);
+		if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) {
+			dlog(LOG_ERR, "fatal: could not switch back to main namespace");
+		}
+	}
+	return handle;
+}
+
+#else
+
+void init_namespaces(void) {
+}
+
+struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions,
+								 int ns_fd) {
+	if (ns_fd == -1) {
+		return nfct_open(subsys_id, subscriptions);
+	} else {
+		dlog(LOG_ERR, "network namespaces are not supported on this system");
+		return NULL;
+	}
+}
+
+struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) {
+	if (ns_fd == -1) {
+		return mnl_socket_open(bus);
+	} else {
+		dlog(LOG_ERR, "network namespaces are not supported on this system");
+		return NULL;
+	}
+}
+
+#endif
diff --git a/src/netlink.c b/src/netlink.c
index bd38d99..c71b463 100644
--- a/src/netlink.c
+++ b/src/netlink.c
@@ -21,6 +21,7 @@ 
 #include "conntrackd.h"
 #include "filter.h"
 #include "log.h"
+#include "namespace.h"
 
 #include <string.h>
 #include <errno.h>
@@ -33,7 +34,8 @@  struct nfct_handle *nl_init_event_handler(void)
 {
 	struct nfct_handle *h;
 
-	h = nfct_open(CONFIG(netlink).subsys_id, CONFIG(netlink).groups);
+	h = nfct_open_ns(CONFIG(netlink).subsys_id, CONFIG(netlink).groups,
+					 STATE(ns_fd));
 	if (h == NULL)
 		return NULL;
 
@@ -175,7 +177,7 @@  int nl_flush_conntrack_table_selective(void)
 	struct nfct_handle *h;
 	int ret;
 
-	h = nfct_open(CONNTRACK, 0);
+	h = nfct_open_ns(CONNTRACK, 0, STATE(ns_fd));
 	if (h == NULL) {
 		dlog(LOG_ERR, "cannot open handle");
 		return -1;
diff --git a/src/read_config_lex.l b/src/read_config_lex.l
index 31fa32e..7a09c52 100644
--- a/src/read_config_lex.l
+++ b/src/read_config_lex.l
@@ -91,6 +91,7 @@  notrack		[N|n][O|o][T|t][R|r][A|a][C|c][K|k]
 "SocketBufferSizeMaxGrowth"	{ return T_BUFFER_SIZE_MAX_GROWN; /* alias */ }
 "NetlinkBufferSize"		{ return T_BUFFER_SIZE; }
 "NetlinkBufferSizeMaxGrowth"	{ return T_BUFFER_SIZE_MAX_GROWN; }
+"NetlinkNamespace"		{ return T_NETLINK_NAMESPACE; }
 "Mode"				{ return T_SYNC_MODE; }
 "ListenTo"			{ return T_LISTEN_TO; }
 "Family"			{ return T_FAMILY; }
diff --git a/src/read_config_yy.y b/src/read_config_yy.y
index c9235d3..b0985e7 100644
--- a/src/read_config_yy.y
+++ b/src/read_config_yy.y
@@ -87,7 +87,7 @@  enum {
 %token T_DISABLE_INTERNAL_CACHE T_DISABLE_EXTERNAL_CACHE T_ERROR_QUEUE_LENGTH
 %token T_OPTIONS T_TCP_WINDOW_TRACKING T_EXPECT_SYNC
 %token T_HELPER T_HELPER_QUEUE_NUM T_HELPER_POLICY T_HELPER_EXPECT_MAX
-%token T_HELPER_EXPECT_TIMEOUT
+%token T_HELPER_EXPECT_TIMEOUT T_NETLINK_NAMESPACE
 
 %token <string> T_IP T_PATH_VAL
 %token <val> T_NUMBER
@@ -1113,6 +1113,7 @@  general_line: hashsize
 	    | unix_line
 	    | netlink_buffer_size
 	    | netlink_buffer_size_max_grown
+	    | netlink_namespace
 	    | family
 	    | event_iterations_limit
 	    | poll_secs
@@ -1158,6 +1159,11 @@  netlink_events_reliable : T_NETLINK_EVENTS_RELIABLE T_OFF
 	conf.netlink.events_reliable = 0;
 };
 
+netlink_namespace : T_NETLINK_NAMESPACE T_PATH_VAL
+{
+	strncpy(conf.netlink_namespace, $2, FILENAME_MAXLEN);
+};
+
 nice : T_NICE T_SIGNED_NUMBER
 {
 	conf.nice = $2;
diff --git a/src/run.c b/src/run.c
index 3337694..7aa5032 100644
--- a/src/run.c
+++ b/src/run.c
@@ -30,6 +30,7 @@ 
 #include "origin.h"
 #include "date.h"
 #include "internal.h"
+#include "namespace.h"
 
 #include <errno.h>
 #include <signal.h>
@@ -252,6 +253,8 @@  init(void)
 		return -1;
 
 	/* Initialization */
+	init_namespaces();
+
 	if (CONFIG(flags) & (CTD_SYNC_MODE | CTD_STATS_MODE))
 		if (ctnl_init() < 0)
 			return -1;
diff --git a/src/sync-mode.c b/src/sync-mode.c
index e69ecfe..de58b8c 100644
--- a/src/sync-mode.c
+++ b/src/sync-mode.c
@@ -31,6 +31,7 @@ 
 #include "origin.h"
 #include "internal.h"
 #include "external.h"
+#include "namespace.h"
 
 #include <errno.h>
 #include <unistd.h>
@@ -451,7 +452,8 @@  static int init_sync(void)
 			tx_queue_cb, NULL, STATE(fds)) == -1)
 		return -1;
 
-	STATE_SYNC(commit).h = nfct_open(CONFIG(netlink).subsys_id, 0);
+	STATE_SYNC(commit).h = nfct_open_ns(CONFIG(netlink).subsys_id, 0,
+										STATE(ns_fd));
 	if (STATE_SYNC(commit).h == NULL) {
 		dlog(LOG_ERR, "can't create handler to commit");
 		return -1;
diff --git a/src/sync-notrack.c b/src/sync-notrack.c
index a7df4e7..d8010fb 100644
--- a/src/sync-notrack.c
+++ b/src/sync-notrack.c
@@ -24,6 +24,7 @@ 
 #include "log.h"
 #include "cache.h"
 #include "fds.h"
+#include "namespace.h"
 
 #include <string.h>
 
@@ -102,7 +103,7 @@  static void kernel_resync(void)
 	u_int32_t family = AF_UNSPEC;
 	int ret;
 
-	h = nfct_open(CONFIG(netlink).subsys_id, 0);
+	h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
 	if (h == NULL) {
 		dlog(LOG_ERR, "can't allocate memory for the internal cache");
 		return;