Message ID | alpine.DEB.2.00.1211292202150.25841@blackhole.kfki.hu |
---|---|
State | Superseded |
Headers | show |
Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> wrote: Hi Jozsef, this looks really good, two minor nits below. > diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c > index ac635a7..128885d 100644 > --- a/net/ipv4/netfilter/iptable_nat.c > +++ b/net/ipv4/netfilter/iptable_nat.c > @@ -17,6 +17,7 @@ > #include <net/netfilter/nf_nat.h> > #include <net/netfilter/nf_nat_core.h> > #include <net/netfilter/nf_nat_l3proto.h> > +#include <net/netfilter/nf_conntrack_ecache.h> > > static const struct xt_table nf_nat_ipv4_table = { > .name = "nat", > @@ -134,6 +135,24 @@ nf_nat_ipv4_fn(unsigned int hooknum, > /* ESTABLISHED */ > NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || > ctinfo == IP_CT_ESTABLISHED_REPLY); > + if (hooknum == NF_INET_POST_ROUTING && > + CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && > + nat->masq_index && nat->masq_index != out->ifindex) { > + /* Outgoing interface changed, kill ct. */ Would it be possible to use nf_ct_kill_acct() here instead of > + if (del_timer(&ct->timeout)) { > + if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { [..] ? > --- a/net/ipv6/netfilter/ip6table_nat.c > +++ b/net/ipv6/netfilter/ip6table_nat.c > @@ -19,6 +19,7 @@ > #include <net/netfilter/nf_nat.h> > #include <net/netfilter/nf_nat_core.h> > #include <net/netfilter/nf_nat_l3proto.h> [..] > static const struct xt_table nf_nat_ipv6_table = { > + if (hooknum == NF_INET_POST_ROUTING && > + CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && > + nat->masq_index && nat->masq_index != out->ifindex) { > + /* Outgoing interface changed, kill ct. */ > + if (del_timer(&ct->timeout)) { perhaps this could be a helper in include/net/netfilter/nf_nat.h? It would avoid the code duplication and the needed #if IS_ENABLED() MASQ check. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Florian, On Thu, 29 Nov 2012, Florian Westphal wrote: > Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> wrote: > > this looks really good, two minor nits below. > > > diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c > > index ac635a7..128885d 100644 > > --- a/net/ipv4/netfilter/iptable_nat.c > > +++ b/net/ipv4/netfilter/iptable_nat.c > > @@ -17,6 +17,7 @@ > > #include <net/netfilter/nf_nat.h> > > #include <net/netfilter/nf_nat_core.h> > > #include <net/netfilter/nf_nat_l3proto.h> > > +#include <net/netfilter/nf_conntrack_ecache.h> > > > > static const struct xt_table nf_nat_ipv4_table = { > > .name = "nat", > > @@ -134,6 +135,24 @@ nf_nat_ipv4_fn(unsigned int hooknum, > > /* ESTABLISHED */ > > NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || > > ctinfo == IP_CT_ESTABLISHED_REPLY); > > + if (hooknum == NF_INET_POST_ROUTING && > > + CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && > > + nat->masq_index && nat->masq_index != out->ifindex) { > > + /* Outgoing interface changed, kill ct. */ > > Would it be possible to use nf_ct_kill_acct() here instead of > > > + if (del_timer(&ct->timeout)) { > > + if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { > [..] Yes, that'd be even better. > > --- a/net/ipv6/netfilter/ip6table_nat.c > > +++ b/net/ipv6/netfilter/ip6table_nat.c > > @@ -19,6 +19,7 @@ > > #include <net/netfilter/nf_nat.h> > > #include <net/netfilter/nf_nat_core.h> > > #include <net/netfilter/nf_nat_l3proto.h> > [..] > > static const struct xt_table nf_nat_ipv6_table = { > > + if (hooknum == NF_INET_POST_ROUTING && > > + CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && > > + nat->masq_index && nat->masq_index != out->ifindex) { > > + /* Outgoing interface changed, kill ct. */ > > + if (del_timer(&ct->timeout)) { > > perhaps this could be a helper in include/net/netfilter/nf_nat.h? > > It would avoid the code duplication and the needed #if IS_ENABLED() MASQ > check. You mean an inline function? I'll send an updated version tomorrow :-). Best regards, Jozsef - E-mail : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences H-1525 Budapest 114, POB. 49, Hungary -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Nov 29, 2012 at 10:26:40PM +0100, Florian Westphal wrote: > Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> wrote: > > Hi Jozsef, > > this looks really good, two minor nits below. > > > diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c > > index ac635a7..128885d 100644 > > --- a/net/ipv4/netfilter/iptable_nat.c > > +++ b/net/ipv4/netfilter/iptable_nat.c > > @@ -17,6 +17,7 @@ > > #include <net/netfilter/nf_nat.h> > > #include <net/netfilter/nf_nat_core.h> > > #include <net/netfilter/nf_nat_l3proto.h> > > +#include <net/netfilter/nf_conntrack_ecache.h> > > > > static const struct xt_table nf_nat_ipv4_table = { > > .name = "nat", > > @@ -134,6 +135,24 @@ nf_nat_ipv4_fn(unsigned int hooknum, > > /* ESTABLISHED */ > > NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || > > ctinfo == IP_CT_ESTABLISHED_REPLY); > > + if (hooknum == NF_INET_POST_ROUTING && > > + CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && > > + nat->masq_index && nat->masq_index != out->ifindex) { > > + /* Outgoing interface changed, kill ct. */ > > Would it be possible to use nf_ct_kill_acct() here instead of > > > + if (del_timer(&ct->timeout)) { > > + if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { > [..] > > ? > > > --- a/net/ipv6/netfilter/ip6table_nat.c > > +++ b/net/ipv6/netfilter/ip6table_nat.c > > @@ -19,6 +19,7 @@ > > #include <net/netfilter/nf_nat.h> > > #include <net/netfilter/nf_nat_core.h> > > #include <net/netfilter/nf_nat_l3proto.h> > [..] > > static const struct xt_table nf_nat_ipv6_table = { > > + if (hooknum == NF_INET_POST_ROUTING && > > + CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && > > + nat->masq_index && nat->masq_index != out->ifindex) { > > + /* Outgoing interface changed, kill ct. */ > > + if (del_timer(&ct->timeout)) { > > perhaps this could be a helper in include/net/netfilter/nf_nat.h? > > It would avoid the code duplication and the needed #if IS_ENABLED() MASQ > check. I'd suggest a hook function that is set via rcu_pointer_assign in the init path of the masquerade target. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Jozsef, On Thu, Nov 29, 2012 at 10:12:54PM +0100, Jozsef Kadlecsik wrote: > Hi, > > This is the second variant of the patch which addresses the route > changes affecting already masqueraded connections. > > When the route changes (backup default route, VPNs), the packets are sent > out with wrong source address. The patch addresses the issue by comparing > the outgoing interface directly with the masquerade interface in the nat > table. It *is* MASQUERADE specific, so probably the inserted code could be > enclosed in a proper ifdef. > > Events are inefficient, because it'd require scanning the whole conntrack > table at any route change and re-checking the route for all entry, which > is simply way too expensive. I still have to waste the bullet of proposing to do this from user-space. I think a new command for the conntrack utility to iterate over the table and kill entries with wrong masq_index would be relatively simple. You will have to pass the ifindex you want to compare from user-space. People have to update their scripts when they consider that this action is required and we save doing this for each packet. > Comments are highly welcomed. > > Best regards, > Jozsef > > --- > net/ipv4/netfilter/iptable_nat.c | 19 +++++++++++++++++++ > net/ipv6/netfilter/ip6table_nat.c | 19 +++++++++++++++++++ > 2 files changed, 38 insertions(+), 0 deletions(-) > > diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c > index ac635a7..128885d 100644 > --- a/net/ipv4/netfilter/iptable_nat.c > +++ b/net/ipv4/netfilter/iptable_nat.c > @@ -17,6 +17,7 @@ > #include <net/netfilter/nf_nat.h> > #include <net/netfilter/nf_nat_core.h> > #include <net/netfilter/nf_nat_l3proto.h> > +#include <net/netfilter/nf_conntrack_ecache.h> > > static const struct xt_table nf_nat_ipv4_table = { > .name = "nat", > @@ -134,6 +135,24 @@ nf_nat_ipv4_fn(unsigned int hooknum, > /* ESTABLISHED */ > NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || > ctinfo == IP_CT_ESTABLISHED_REPLY); > + if (hooknum == NF_INET_POST_ROUTING && > + CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && > + nat->masq_index && nat->masq_index != out->ifindex) { > + /* Outgoing interface changed, kill ct. */ > + if (del_timer(&ct->timeout)) { > + if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { > + nf_ct_delete_from_lists(ct); > + nf_ct_insert_dying_list(ct); > + nf_ct_put(ct); > + return NF_DROP; > + } > + set_bit(IPS_DYING_BIT, &ct->status); > + nf_ct_delete_from_lists(ct); > + nf_ct_put(ct); > + } > + nf_ct_put(ct); > + return NF_DROP; > + } > } > > return nf_nat_packet(ct, ctinfo, hooknum, skb); > diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c > index fa84cf8..d06d7de 100644 > --- a/net/ipv6/netfilter/ip6table_nat.c > +++ b/net/ipv6/netfilter/ip6table_nat.c > @@ -19,6 +19,7 @@ > #include <net/netfilter/nf_nat.h> > #include <net/netfilter/nf_nat_core.h> > #include <net/netfilter/nf_nat_l3proto.h> > +#include <net/netfilter/nf_conntrack_ecache.h> > > static const struct xt_table nf_nat_ipv6_table = { > .name = "nat", > @@ -137,6 +138,24 @@ nf_nat_ipv6_fn(unsigned int hooknum, > /* ESTABLISHED */ > NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || > ctinfo == IP_CT_ESTABLISHED_REPLY); > + if (hooknum == NF_INET_POST_ROUTING && > + CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && > + nat->masq_index && nat->masq_index != out->ifindex) { > + /* Outgoing interface changed, kill ct. */ > + if (del_timer(&ct->timeout)) { > + if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { > + nf_ct_delete_from_lists(ct); > + nf_ct_insert_dying_list(ct); > + nf_ct_put(ct); > + return NF_DROP; > + } > + set_bit(IPS_DYING_BIT, &ct->status); > + nf_ct_delete_from_lists(ct); > + nf_ct_put(ct); > + } > + nf_ct_put(ct); > + return NF_DROP; > + } > } > > return nf_nat_packet(ct, ctinfo, hooknum, skb); > -- > 1.7.0.4 > > > Best regards, > Jozsef > - > E-mail : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu > PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt > Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences > H-1525 Budapest 114, POB. 49, Hungary > -- > To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 29 Nov 2012, Pablo Neira Ayuso wrote: > On Thu, Nov 29, 2012 at 10:26:40PM +0100, Florian Westphal wrote: > > Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> wrote: > > > > > --- a/net/ipv6/netfilter/ip6table_nat.c > > > +++ b/net/ipv6/netfilter/ip6table_nat.c > > > @@ -19,6 +19,7 @@ > > > #include <net/netfilter/nf_nat.h> > > > #include <net/netfilter/nf_nat_core.h> > > > #include <net/netfilter/nf_nat_l3proto.h> > > [..] > > > static const struct xt_table nf_nat_ipv6_table = { > > > + if (hooknum == NF_INET_POST_ROUTING && > > > + CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && > > > + nat->masq_index && nat->masq_index != out->ifindex) { > > > + /* Outgoing interface changed, kill ct. */ > > > + if (del_timer(&ct->timeout)) { > > > > perhaps this could be a helper in include/net/netfilter/nf_nat.h? > > > > It would avoid the code duplication and the needed #if IS_ENABLED() MASQ > > check. > > I'd suggest a hook function that is set via rcu_pointer_assign in the > init path of the masquerade target. I have started to write it but it looks over-complicated compared how tiny the code is: both ipt_MASQUERADE and ip6t_MASQUERADE could set the hook function, so either it requires an reference counter or there should be two hooks. And the actual function defined in the nat core, that means three exported objects. Also, the setting of the hook function must always be checked in nf_nat_ipv[46]_fn which is pretty same as checking masq_index first. I'm going to send the new version of the patch for comments. Best regards, Jozsef - E-mail : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences H-1525 Budapest 114, POB. 49, Hungary -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Pablo, On Thu, 29 Nov 2012, Pablo Neira Ayuso wrote: > On Thu, Nov 29, 2012 at 10:12:54PM +0100, Jozsef Kadlecsik wrote: > > Hi, > > > > This is the second variant of the patch which addresses the route > > changes affecting already masqueraded connections. > > > > When the route changes (backup default route, VPNs), the packets are sent > > out with wrong source address. The patch addresses the issue by comparing > > the outgoing interface directly with the masquerade interface in the nat > > table. It *is* MASQUERADE specific, so probably the inserted code could be > > enclosed in a proper ifdef. > > > > Events are inefficient, because it'd require scanning the whole conntrack > > table at any route change and re-checking the route for all entry, which > > is simply way too expensive. > > I still have to waste the bullet of proposing to do this from > user-space. :-) > I think a new command for the conntrack utility to iterate over the > table and kill entries with wrong masq_index would be relatively > simple. You will have to pass the ifindex you want to compare from > user-space. Unfortunately the userspace suffers the same problems than my first attempt, the notification did: - I'm not aware of any easy way to get "route changed" events in userspace: as far as I know routing daemons (quagga) or openvpn don't generate such things. - The event itself not enough, the related interface is required too. - The event and the interface is not enough, the exact rule should be known otherwise the full routing table must be looked up. - The event, interface and exact rule is not enough, because even if the rule matches a conntrack entry, it might be that the route for the entry did not change - so we have to look up the route table anyway. - We are back that actually the event is enough :-). So in this case, if we can get such events (but how), we should have to iterate over the whole conntrack table, whatever large it is, and execute full route lookups for every element. In my approach the route is already computed and we just verify runtime that the outgoing interface corresponds to the one which was used at the first packet. Best regards, Jozsef - E-mail : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences H-1525 Budapest 114, POB. 49, Hungary -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Nov 30, 2012 at 09:37:15PM +0100, Jozsef Kadlecsik wrote: > Hi Pablo, > > On Thu, 29 Nov 2012, Pablo Neira Ayuso wrote: > > > On Thu, Nov 29, 2012 at 10:12:54PM +0100, Jozsef Kadlecsik wrote: > > > Hi, > > > > > > This is the second variant of the patch which addresses the route > > > changes affecting already masqueraded connections. > > > > > > When the route changes (backup default route, VPNs), the packets are sent > > > out with wrong source address. The patch addresses the issue by comparing > > > the outgoing interface directly with the masquerade interface in the nat > > > table. It *is* MASQUERADE specific, so probably the inserted code could be > > > enclosed in a proper ifdef. > > > > > > Events are inefficient, because it'd require scanning the whole conntrack > > > table at any route change and re-checking the route for all entry, which > > > is simply way too expensive. > > > > I still have to waste the bullet of proposing to do this from > > user-space. > > :-) > > > I think a new command for the conntrack utility to iterate over the > > table and kill entries with wrong masq_index would be relatively > > simple. You will have to pass the ifindex you want to compare from > > user-space. > > Unfortunately the userspace suffers the same problems than my first > attempt, the notification did: > > - I'm not aware of any easy way to get "route changed" events in > userspace: as far as I know routing daemons (quagga) or openvpn don't > generate such things. > - The event itself not enough, the related interface is required too. > - The event and the interface is not enough, the exact rule should be > known otherwise the full routing table must be looked up. > - The event, interface and exact rule is not enough, because even if > the rule matches a conntrack entry, it might be that the route for the > entry did not change - so we have to look up the route table anyway. > - We are back that actually the event is enough :-). > > So in this case, if we can get such events (but how), we should have to > iterate over the whole conntrack table, whatever large it is, and execute > full route lookups for every element. > > In my approach the route is already computed and we just verify runtime > that the outgoing interface corresponds to the one which was used at the > first packet. OK, you convinced me. The last V4 patch looks very small to handle this case. I'll take it. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index ac635a7..128885d 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -17,6 +17,7 @@ #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_l3proto.h> +#include <net/netfilter/nf_conntrack_ecache.h> static const struct xt_table nf_nat_ipv4_table = { .name = "nat", @@ -134,6 +135,24 @@ nf_nat_ipv4_fn(unsigned int hooknum, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); + if (hooknum == NF_INET_POST_ROUTING && + CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && + nat->masq_index && nat->masq_index != out->ifindex) { + /* Outgoing interface changed, kill ct. */ + if (del_timer(&ct->timeout)) { + if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { + nf_ct_delete_from_lists(ct); + nf_ct_insert_dying_list(ct); + nf_ct_put(ct); + return NF_DROP; + } + set_bit(IPS_DYING_BIT, &ct->status); + nf_ct_delete_from_lists(ct); + nf_ct_put(ct); + } + nf_ct_put(ct); + return NF_DROP; + } } return nf_nat_packet(ct, ctinfo, hooknum, skb); diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index fa84cf8..d06d7de 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -19,6 +19,7 @@ #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_l3proto.h> +#include <net/netfilter/nf_conntrack_ecache.h> static const struct xt_table nf_nat_ipv6_table = { .name = "nat", @@ -137,6 +138,24 @@ nf_nat_ipv6_fn(unsigned int hooknum, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); + if (hooknum == NF_INET_POST_ROUTING && + CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && + nat->masq_index && nat->masq_index != out->ifindex) { + /* Outgoing interface changed, kill ct. */ + if (del_timer(&ct->timeout)) { + if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { + nf_ct_delete_from_lists(ct); + nf_ct_insert_dying_list(ct); + nf_ct_put(ct); + return NF_DROP; + } + set_bit(IPS_DYING_BIT, &ct->status); + nf_ct_delete_from_lists(ct); + nf_ct_put(ct); + } + nf_ct_put(ct); + return NF_DROP; + } } return nf_nat_packet(ct, ctinfo, hooknum, skb);