Message ID | 1357776944-28805-1-git-send-email-willemb@google.com |
---|---|
State | Superseded |
Headers | show |
Hi Willem, On Wed, Jan 09, 2013 at 07:15:44PM -0500, Willem de Bruijn wrote: > Changes: > - v3: reverted no longer needed changes to x_tables.c > - v2: use a fixed size match structure to communicate between > kernel and userspace. > > Support arbitrary linux socket filter (BPF) programs as iptables > match rules. This allows for very expressive filters, and on > platforms with BPF JIT appears competitive with traditional hardcoded > iptables rules. > > At least, on an x86_64 that achieves 40K netperf TCP_STREAM without > any iptables rules (40 GBps), > > inserting 100x this bpf rule gives 28K > > ./iptables -A OUTPUT -m bpf --bytecode '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,6 0 0 0,' -j > > (as generated by tcpdump -i any -ddd ip proto 20 | tr '\n' ',') That code generated by tcpdump will not work. tcpdump generates BPF code assuming that offset 0 is the link layer header, while iptables considers that offset 0 is the network layer. More comments below: > inserting 100x this u32 rule gives 21K > > ./iptables -A OUTPUT -m u32 --u32 '6&0xFF=0x20' -j DROP > > The two are logically equivalent, as far as I can tell. Let me know > if my test methodology is flawed in some way. Even in cases where > slower, the filter adds functionality currently lacking in iptables, > such as access to sk_buff fields like rxhash and queue_mapping. > --- > include/uapi/linux/netfilter/xt_bpf.h | 17 ++++++++ > net/netfilter/Kconfig | 9 ++++ > net/netfilter/Makefile | 1 + > net/netfilter/xt_bpf.c | 73 +++++++++++++++++++++++++++++++++ > 4 files changed, 100 insertions(+), 0 deletions(-) > create mode 100644 include/uapi/linux/netfilter/xt_bpf.h > create mode 100644 net/netfilter/xt_bpf.c > > diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h > new file mode 100644 > index 0000000..5dda450 > --- /dev/null > +++ b/include/uapi/linux/netfilter/xt_bpf.h > @@ -0,0 +1,17 @@ > +#ifndef _XT_BPF_H > +#define _XT_BPF_H > + > +#include <linux/filter.h> > +#include <linux/types.h> > + > +#define XT_BPF_MAX_NUM_INSTR 64 > + > +struct xt_bpf_info { > + __u16 bpf_program_num_elem; > + struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR]; > + > + /* only used in the kernel */ > + struct sk_filter *filter __attribute__((aligned(8))); > +}; > + > +#endif /*_XT_BPF_H */ > diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig > index fefa514..d45720f 100644 > --- a/net/netfilter/Kconfig > +++ b/net/netfilter/Kconfig > @@ -798,6 +798,15 @@ config NETFILTER_XT_MATCH_ADDRTYPE > If you want to compile it as a module, say M here and read > <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. > > +config NETFILTER_XT_MATCH_BPF > + tristate '"bpf" match support' > + depends on NETFILTER_ADVANCED > + help > + BPF matching applies a linux socket filter to each packet and > + accepts those for which the filter returns non-zero. > + > + To compile it as a module, choose M here. If unsure, say N. > + > config NETFILTER_XT_MATCH_CLUSTER > tristate '"cluster" match support' > depends on NF_CONNTRACK > diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile > index 3259697..6d6194525 100644 > --- a/net/netfilter/Makefile > +++ b/net/netfilter/Makefile > @@ -98,6 +98,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o > > # matches > obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o > +obj-$(CONFIG_NETFILTER_XT_MATCH_BPF) += xt_bpf.o > obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o > obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o > obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o > diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c > new file mode 100644 > index 0000000..1bdfab8 > --- /dev/null > +++ b/net/netfilter/xt_bpf.c > @@ -0,0 +1,73 @@ > +/* Xtables module to match packets using a BPF filter. > + * Copyright 2013 Google Inc. > + * Written by Willem de Bruijn <willemb@google.com> > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + */ > + > +#include <linux/module.h> > +#include <linux/skbuff.h> > +#include <linux/ipv6.h> > +#include <linux/filter.h> > +#include <net/ip.h> > + > +#include <linux/netfilter/xt_bpf.h> > +#include <linux/netfilter/x_tables.h> > + > +MODULE_AUTHOR("Willem de Bruijn <willemb@google.com>"); > +MODULE_DESCRIPTION("Xtables: BPF filter match"); > +MODULE_LICENSE("GPL"); Please, add MODULE_ALIAS("ipt_bpf"); MODULE_ALIAS("ip6t_bpf"); otherwise module auto-loading will not work. > + > +static int bpf_mt_check(const struct xt_mtchk_param *par) > +{ > + struct xt_bpf_info *info = par->matchinfo; > + struct sock_fprog program; > + > + program.len = info->bpf_program_num_elem; > + program.filter = info->bpf_program; sparse reports a warning here above. I've been trying to find a quick solution, please, have a look at it. > + if (sk_unattached_filter_create(&info->filter, &program)) { > + pr_info("bpf: check failed: parse error\n"); > + return -EINVAL; > + } > + > + return 0; > +} > + > +static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) > +{ > + const struct xt_bpf_info *info = par->matchinfo; > + > + return SK_RUN_FILTER(info->filter, skb); > +} > + > +static void bpf_mt_destroy(const struct xt_mtdtor_param *par) > +{ > + const struct xt_bpf_info *info = par->matchinfo; > + sk_unattached_filter_destroy(info->filter); > +} > + > +static struct xt_match bpf_mt_reg __read_mostly = { > + .name = "bpf", > + .revision = 0, > + .family = NFPROTO_UNSPEC, > + .checkentry = bpf_mt_check, > + .match = bpf_mt, > + .destroy = bpf_mt_destroy, > + .matchsize = sizeof(struct xt_bpf_info), > + .me = THIS_MODULE, > +}; > + > +static int __init bpf_mt_init(void) > +{ > + return xt_register_match(&bpf_mt_reg); > +} > + > +static void __exit bpf_mt_exit(void) > +{ > + xt_unregister_match(&bpf_mt_reg); > +} > + > +module_init(bpf_mt_init); > +module_exit(bpf_mt_exit); > -- > 1.7.7.3 > -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jan 17, 2013 at 6:53 PM, Pablo Neira Ayuso <pablo@netfilter.org> wrote: > Hi Willem, > > On Wed, Jan 09, 2013 at 07:15:44PM -0500, Willem de Bruijn wrote: >> Changes: >> - v3: reverted no longer needed changes to x_tables.c >> - v2: use a fixed size match structure to communicate between >> kernel and userspace. >> >> Support arbitrary linux socket filter (BPF) programs as iptables >> match rules. This allows for very expressive filters, and on >> platforms with BPF JIT appears competitive with traditional hardcoded >> iptables rules. >> >> At least, on an x86_64 that achieves 40K netperf TCP_STREAM without >> any iptables rules (40 GBps), >> >> inserting 100x this bpf rule gives 28K >> >> ./iptables -A OUTPUT -m bpf --bytecode '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,6 0 0 0,' -j >> >> (as generated by tcpdump -i any -ddd ip proto 20 | tr '\n' ',') > > That code generated by tcpdump will not work. > > tcpdump generates BPF code assuming that offset 0 is the link layer > header, while iptables considers that offset 0 is the network layer. Ah, yes, of course. We discussed that earlier. I removed the statement from the commit message. Such hints belong in the libxt_bpf man page, if anywhere. To compile code right now, the little bpf compiler that I emailed before can be downloaded from http://code.google.com/p/kernel/downloads/detail?name=bpf2decimal.c I don't think that a compiler has to be shipped with iptables itself, let alone make iptables link against libraries. That said, it is not impossible to detect pcap.h in configure.ac and optionally enable a "-m bpf --string" mode that calls pcap_compile_nopcap from within libxt_bpf, so let me know if you would like me to code that up. I can also try to send a patch to tcpdump that extends compilation (`-ddd -y <type>`) to arbitrary link layer types. > More comments below: > >> inserting 100x this u32 rule gives 21K >> >> ./iptables -A OUTPUT -m u32 --u32 '6&0xFF=0x20' -j DROP >> >> The two are logically equivalent, as far as I can tell. Let me know >> if my test methodology is flawed in some way. Even in cases where >> slower, the filter adds functionality currently lacking in iptables, >> such as access to sk_buff fields like rxhash and queue_mapping. >> --- >> include/uapi/linux/netfilter/xt_bpf.h | 17 ++++++++ >> net/netfilter/Kconfig | 9 ++++ >> net/netfilter/Makefile | 1 + >> net/netfilter/xt_bpf.c | 73 +++++++++++++++++++++++++++++++++ >> 4 files changed, 100 insertions(+), 0 deletions(-) >> create mode 100644 include/uapi/linux/netfilter/xt_bpf.h >> create mode 100644 net/netfilter/xt_bpf.c >> >> diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h >> new file mode 100644 >> index 0000000..5dda450 >> --- /dev/null >> +++ b/include/uapi/linux/netfilter/xt_bpf.h >> @@ -0,0 +1,17 @@ >> +#ifndef _XT_BPF_H >> +#define _XT_BPF_H >> + >> +#include <linux/filter.h> >> +#include <linux/types.h> >> + >> +#define XT_BPF_MAX_NUM_INSTR 64 >> + >> +struct xt_bpf_info { >> + __u16 bpf_program_num_elem; >> + struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR]; >> + >> + /* only used in the kernel */ >> + struct sk_filter *filter __attribute__((aligned(8))); >> +}; >> + >> +#endif /*_XT_BPF_H */ >> diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig >> index fefa514..d45720f 100644 >> --- a/net/netfilter/Kconfig >> +++ b/net/netfilter/Kconfig >> @@ -798,6 +798,15 @@ config NETFILTER_XT_MATCH_ADDRTYPE >> If you want to compile it as a module, say M here and read >> <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. >> >> +config NETFILTER_XT_MATCH_BPF >> + tristate '"bpf" match support' >> + depends on NETFILTER_ADVANCED >> + help >> + BPF matching applies a linux socket filter to each packet and >> + accepts those for which the filter returns non-zero. >> + >> + To compile it as a module, choose M here. If unsure, say N. >> + >> config NETFILTER_XT_MATCH_CLUSTER >> tristate '"cluster" match support' >> depends on NF_CONNTRACK >> diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile >> index 3259697..6d6194525 100644 >> --- a/net/netfilter/Makefile >> +++ b/net/netfilter/Makefile >> @@ -98,6 +98,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o >> >> # matches >> obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o >> +obj-$(CONFIG_NETFILTER_XT_MATCH_BPF) += xt_bpf.o >> obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o >> obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o >> obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o >> diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c >> new file mode 100644 >> index 0000000..1bdfab8 >> --- /dev/null >> +++ b/net/netfilter/xt_bpf.c >> @@ -0,0 +1,73 @@ >> +/* Xtables module to match packets using a BPF filter. >> + * Copyright 2013 Google Inc. >> + * Written by Willem de Bruijn <willemb@google.com> >> + * >> + * This program is free software; you can redistribute it and/or modify >> + * it under the terms of the GNU General Public License version 2 as >> + * published by the Free Software Foundation. >> + */ >> + >> +#include <linux/module.h> >> +#include <linux/skbuff.h> >> +#include <linux/ipv6.h> >> +#include <linux/filter.h> >> +#include <net/ip.h> >> + >> +#include <linux/netfilter/xt_bpf.h> >> +#include <linux/netfilter/x_tables.h> >> + >> +MODULE_AUTHOR("Willem de Bruijn <willemb@google.com>"); >> +MODULE_DESCRIPTION("Xtables: BPF filter match"); >> +MODULE_LICENSE("GPL"); > > Please, add > > MODULE_ALIAS("ipt_bpf"); > MODULE_ALIAS("ip6t_bpf"); Done. > otherwise module auto-loading will not work. > >> + >> +static int bpf_mt_check(const struct xt_mtchk_param *par) >> +{ >> + struct xt_bpf_info *info = par->matchinfo; >> + struct sock_fprog program; >> + >> + program.len = info->bpf_program_num_elem; >> + program.filter = info->bpf_program; > > sparse reports a warning here above. I've been trying to find a quick > solution, please, have a look at it. Thanks for catching this. Apparently, program.filter is annotated as __user. A cast made the warning disappear, and is safe as far as I can see, since it only forces the kernel to be more conservative in accessing the memory. >> + if (sk_unattached_filter_create(&info->filter, &program)) { >> + pr_info("bpf: check failed: parse error\n"); >> + return -EINVAL; >> + } >> + >> + return 0; >> +} >> + >> +static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) >> +{ >> + const struct xt_bpf_info *info = par->matchinfo; >> + >> + return SK_RUN_FILTER(info->filter, skb); >> +} >> + >> +static void bpf_mt_destroy(const struct xt_mtdtor_param *par) >> +{ >> + const struct xt_bpf_info *info = par->matchinfo; >> + sk_unattached_filter_destroy(info->filter); >> +} >> + >> +static struct xt_match bpf_mt_reg __read_mostly = { >> + .name = "bpf", >> + .revision = 0, >> + .family = NFPROTO_UNSPEC, >> + .checkentry = bpf_mt_check, >> + .match = bpf_mt, >> + .destroy = bpf_mt_destroy, >> + .matchsize = sizeof(struct xt_bpf_info), >> + .me = THIS_MODULE, >> +}; >> + >> +static int __init bpf_mt_init(void) >> +{ >> + return xt_register_match(&bpf_mt_reg); >> +} >> + >> +static void __exit bpf_mt_exit(void) >> +{ >> + xt_unregister_match(&bpf_mt_reg); >> +} >> + >> +module_init(bpf_mt_init); >> +module_exit(bpf_mt_exit); >> -- >> 1.7.7.3 >> -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Jan 18, 2013 at 11:48:34AM -0500, Willem de Bruijn wrote: [...] > To compile code right now, the little bpf compiler that I emailed > before can be downloaded from > http://code.google.com/p/kernel/downloads/detail?name=bpf2decimal.c > > I don't think that a compiler has to be shipped with iptables itself, > let alone make iptables link against libraries. That said, it is not > impossible to detect pcap.h in configure.ac and optionally enable a > "-m bpf --string" mode that calls pcap_compile_nopcap from within > libxt_bpf, so let me know if you would like me to code that up. I can > also try to send a patch to tcpdump that extends compilation (`-ddd -y > <type>`) to arbitrary link layer types. We have to decide if: a) we add a new hard library dependency to iptables (libpcap) for just for one single module, that is, the libxt_bpf depends on libpcap. or b) provide a separate utility to generate the BPF filter in text-based format from some utility that accepts tcpdump-like syntax. The utility can be distributed in the utils directory and it would not be mandatory to compile it if libpcap is not present. I'd like to hear pro and cons arguments from others on this. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso <pablo@netfilter.org> wrote: > On Fri, Jan 18, 2013 at 11:48:34AM -0500, Willem de Bruijn wrote: > [...] > > To compile code right now, the little bpf compiler that I emailed > > before can be downloaded from > > http://code.google.com/p/kernel/downloads/detail?name=bpf2decimal.c > > > > I don't think that a compiler has to be shipped with iptables itself, > > let alone make iptables link against libraries. That said, it is not > > impossible to detect pcap.h in configure.ac and optionally enable a > > "-m bpf --string" mode that calls pcap_compile_nopcap from within > > libxt_bpf, so let me know if you would like me to code that up. I can > > also try to send a patch to tcpdump that extends compilation (`-ddd -y > > <type>`) to arbitrary link layer types. > > We have to decide if: > > a) we add a new hard library dependency to iptables (libpcap) for just > for one single module, that is, the libxt_bpf depends on libpcap. > > or > > b) provide a separate utility to generate the BPF filter in text-based > format from some utility that accepts tcpdump-like syntax. The utility > can be distributed in the utils directory and it would not be > mandatory to compile it if libpcap is not present. > > I'd like to hear pro and cons arguments from others on this. a) is arguably more user friendly, however, I don't think we can retain the 'text representation' for iptables-save so users would still be confronted with the compiled data at some point (i.e., they need to write down the original expression anyway to figure out what the rule they added 6 months back actually does...) I would go with b) for now; we can always move to a) later on, but not the other way around (would kill backwards compatibility). -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 22 Jan 2013, Florian Westphal wrote: > Pablo Neira Ayuso <pablo@netfilter.org> wrote: > > On Fri, Jan 18, 2013 at 11:48:34AM -0500, Willem de Bruijn wrote: > > [...] > > > To compile code right now, the little bpf compiler that I emailed > > > before can be downloaded from > > > http://code.google.com/p/kernel/downloads/detail?name=bpf2decimal.c > > > > > > I don't think that a compiler has to be shipped with iptables itself, > > > let alone make iptables link against libraries. That said, it is not > > > impossible to detect pcap.h in configure.ac and optionally enable a > > > "-m bpf --string" mode that calls pcap_compile_nopcap from within > > > libxt_bpf, so let me know if you would like me to code that up. I can > > > also try to send a patch to tcpdump that extends compilation (`-ddd -y > > > <type>`) to arbitrary link layer types. > > > > We have to decide if: > > > > a) we add a new hard library dependency to iptables (libpcap) for just > > for one single module, that is, the libxt_bpf depends on libpcap. > > > > or > > > > b) provide a separate utility to generate the BPF filter in text-based > > format from some utility that accepts tcpdump-like syntax. The utility > > can be distributed in the utils directory and it would not be > > mandatory to compile it if libpcap is not present. > > > > I'd like to hear pro and cons arguments from others on this. > > a) is arguably more user friendly, however, I don't think we can > retain the 'text representation' for iptables-save so users > would still be confronted with the compiled data at some point > (i.e., they need to write down the original expression anyway to > figure out what the rule they added 6 months back actually does...) > > I would go with b) for now; we can always move to a) later on, but not > the other way around (would kill backwards compatibility). Yes, let's go with b). (But from packaging point of view utils/bpf2decimal.c depending on libpcap is not much different from extensions/libxt_bpf.c depending on libpcap.) Best regards, Jozsef - E-mail : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences H-1525 Budapest 114, POB. 49, Hungary -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
With (b) the static iptables binary doesn't grow by the size of the libpcap library. I think this is a worthy goal. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jan 22, 2013 at 10:46:17AM +0100, Jozsef Kadlecsik wrote: > On Tue, 22 Jan 2013, Florian Westphal wrote: > > > Pablo Neira Ayuso <pablo@netfilter.org> wrote: > > > On Fri, Jan 18, 2013 at 11:48:34AM -0500, Willem de Bruijn wrote: > > > [...] > > > > To compile code right now, the little bpf compiler that I emailed > > > > before can be downloaded from > > > > http://code.google.com/p/kernel/downloads/detail?name=bpf2decimal.c > > > > > > > > I don't think that a compiler has to be shipped with iptables itself, > > > > let alone make iptables link against libraries. That said, it is not > > > > impossible to detect pcap.h in configure.ac and optionally enable a > > > > "-m bpf --string" mode that calls pcap_compile_nopcap from within > > > > libxt_bpf, so let me know if you would like me to code that up. I can > > > > also try to send a patch to tcpdump that extends compilation (`-ddd -y > > > > <type>`) to arbitrary link layer types. > > > > > > We have to decide if: > > > > > > a) we add a new hard library dependency to iptables (libpcap) for just > > > for one single module, that is, the libxt_bpf depends on libpcap. > > > > > > or > > > > > > b) provide a separate utility to generate the BPF filter in text-based > > > format from some utility that accepts tcpdump-like syntax. The utility > > > can be distributed in the utils directory and it would not be > > > mandatory to compile it if libpcap is not present. > > > > > > I'd like to hear pro and cons arguments from others on this. > > > > a) is arguably more user friendly, however, I don't think we can > > retain the 'text representation' for iptables-save so users > > would still be confronted with the compiled data at some point > > (i.e., they need to write down the original expression anyway to > > figure out what the rule they added 6 months back actually does...) > > > > I would go with b) for now; we can always move to a) later on, but not > > the other way around (would kill backwards compatibility). > > Yes, let's go with b). (But from packaging point of view > utils/bpf2decimal.c depending on libpcap is not much different from > extensions/libxt_bpf.c depending on libpcap.) We can skip that dependency by adding an independent configure.ac and Makefile for this under iptables/utils/nfbpf. Thus, iptables itself will not depend on libpcap. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jan 22, 2013 at 3:46 AM, Florian Westphal <fw@strlen.de> wrote: > Pablo Neira Ayuso <pablo@netfilter.org> wrote: >> On Fri, Jan 18, 2013 at 11:48:34AM -0500, Willem de Bruijn wrote: >> [...] >> > To compile code right now, the little bpf compiler that I emailed >> > before can be downloaded from >> > http://code.google.com/p/kernel/downloads/detail?name=bpf2decimal.c >> > >> > I don't think that a compiler has to be shipped with iptables itself, >> > let alone make iptables link against libraries. That said, it is not >> > impossible to detect pcap.h in configure.ac and optionally enable a >> > "-m bpf --string" mode that calls pcap_compile_nopcap from within >> > libxt_bpf, so let me know if you would like me to code that up. I can >> > also try to send a patch to tcpdump that extends compilation (`-ddd -y >> > <type>`) to arbitrary link layer types. >> >> We have to decide if: >> >> a) we add a new hard library dependency to iptables (libpcap) for just >> for one single module, that is, the libxt_bpf depends on libpcap. >> >> or >> >> b) provide a separate utility to generate the BPF filter in text-based >> format from some utility that accepts tcpdump-like syntax. The utility >> can be distributed in the utils directory and it would not be >> mandatory to compile it if libpcap is not present. >> >> I'd like to hear pro and cons arguments from others on this. > > a) is arguably more user friendly, however, I don't think we can > retain the 'text representation' for iptables-save so users > would still be confronted with the compiled data at some point > (i.e., they need to write down the original expression anyway to > figure out what the rule they added 6 months back actually does...) > > I would go with b) for now; we can always move to a) later on, but not > the other way around (would kill backwards compatibility). This sounds like the consensus (for the record, I also prefer this less disruptive approach). In that case, I can submit a revised libxt_bpf with your suggested changes right away, Pablo, and we can leave the separate userspace tool for a later commit. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jan 23, 2013 at 10:59:28AM -0500, Willem de Bruijn wrote: > >> b) provide a separate utility to generate the BPF filter in text-based > >> format from some utility that accepts tcpdump-like syntax. The utility > >> can be distributed in the utils directory and it would not be > >> mandatory to compile it if libpcap is not present. [...] > > I would go with b) for now; we can always move to a) later on, but not > > the other way around (would kill backwards compatibility). > > This sounds like the consensus (for the record, I also prefer this less > disruptive approach). In that case, I can submit a revised libxt_bpf with your > suggested changes right away, Pablo, and we can leave the separate > userspace tool for a later commit. Either way is fine, but please we should have that utility compiler integrated in the iptables tree by when 3.9-rc1 is released. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jan 23, 2013 at 11:21 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote: > On Wed, Jan 23 2013 at 10:59:28AM -0500, Willem de Bruijn wrote: >> >> b) provide a separate utility to generate the BPF filter in text-based >> >> format from some utility that accepts tcpdump-like syntax. The utility >> >> can be distributed in the utils directory and it would not be >> >> mandatory to compile it if libpcap is not present. > [...] >> > I would go with b) for now; we can always move to a) later on, but not >> > the other way around (would kill backwards compatibility). >> >> This sounds like the consensus (for the record, I also prefer this less >> disruptive approach). In that case, I can submit a revised libxt_bpf with your >> suggested changes right away, Pablo, and we can leave the separate >> userspace tool for a later commit. > > Either way is fine, but please we should have that utility compiler > integrated in the iptables tree by when 3.9-rc1 is released. Okay. I'll prepare a separate patch with the pcap-based utility, then. Since utils is built as part of the root make invocation, I think it's better to test for pcap.h in the root configure.ac and add a test in utils/Makefile.am to build this tool if found, as opposed to creating a separate configure.ac under utils. We can also discuss these details after the first version of the patch, of course. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jan 23, 2013 at 11:38:20AM -0500, Willem de Bruijn wrote: > On Wed, Jan 23, 2013 at 11:21 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote: > > On Wed, Jan 23 2013 at 10:59:28AM -0500, Willem de Bruijn wrote: > >> >> b) provide a separate utility to generate the BPF filter in text-based > >> >> format from some utility that accepts tcpdump-like syntax. The utility > >> >> can be distributed in the utils directory and it would not be > >> >> mandatory to compile it if libpcap is not present. > > [...] > >> > I would go with b) for now; we can always move to a) later on, but not > >> > the other way around (would kill backwards compatibility). > >> > >> This sounds like the consensus (for the record, I also prefer this less > >> disruptive approach). In that case, I can submit a revised libxt_bpf with your > >> suggested changes right away, Pablo, and we can leave the separate > >> userspace tool for a later commit. > > > > Either way is fine, but please we should have that utility compiler > > integrated in the iptables tree by when 3.9-rc1 is released. > > Okay. I'll prepare a separate patch with the pcap-based utility, then. > > Since utils is built as part of the root make invocation, I think it's > better to test for pcap.h in the root configure.ac and add a test in > utils/Makefile.am to build this tool if found, as opposed to creating > a separate configure.ac under utils. We can also discuss these > details after the first version of the patch, of course. That's fine by now, and it's way less bloat. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jan 23, 2013 at 1:56 PM, Pablo Neira Ayuso <pablo@netfilter.org> wrote: > On Wed, Jan 23, 2013 at 11:38:20AM -0500, Willem de Bruijn wrote: >> On Wed, Jan 23, 2013 at 11:21 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote: >> > On Wed, Jan 23 2013 at 10:59:28AM -0500, Willem de Bruijn wrote: >> >> >> b) provide a separate utility to generate the BPF filter in text-based >> >> >> format from some utility that accepts tcpdump-like syntax. The utility >> >> >> can be distributed in the utils directory and it would not be >> >> >> mandatory to compile it if libpcap is not present. >> > [...] >> >> > I would go with b) for now; we can always move to a) later on, but not >> >> > the other way around (would kill backwards compatibility). >> >> >> >> This sounds like the consensus (for the record, I also prefer this less >> >> disruptive approach). In that case, I can submit a revised libxt_bpf with your >> >> suggested changes right away, Pablo, and we can leave the separate >> >> userspace tool for a later commit. >> > >> > Either way is fine, but please we should have that utility compiler >> > integrated in the iptables tree by when 3.9-rc1 is released. >> >> Okay. I'll prepare a separate patch with the pcap-based utility, then. Just sent the patch. I'm no expert at autoconf and automake, so the build logic can conceivably be shorter, but it works for me and the logic is straightforward. I forgot to mention in the commit message which versions of the tools I used: tested on a ubuntu 12.04 with autoconf 2.68, automake 1.9.6 and libtool 2.4.2. >> Since utils is built as part of the root make invocation, I think it's >> better to test for pcap.h in the root configure.ac and add a test in >> utils/Makefile.am to build this tool if found, as opposed to creating >> a separate configure.ac under utils. We can also discuss these >> details after the first version of the patch, of course. > > That's fine by now, and it's way less bloat. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
at a guess, there should be a --with/without option for it, and if --with=tool is specified build/configure should fail if support libraries are missing On Sun, Feb 17, 2013 at 7:52 PM, Willem de Bruijn <willemb@google.com> wrote: > On Wed, Jan 23, 2013 at 1:56 PM, Pablo Neira Ayuso <pablo@netfilter.org> wrote: >> On Wed, Jan 23, 2013 at 11:38:20AM -0500, Willem de Bruijn wrote: >>> On Wed, Jan 23, 2013 at 11:21 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote: >>> > On Wed, Jan 23 2013 at 10:59:28AM -0500, Willem de Bruijn wrote: >>> >> >> b) provide a separate utility to generate the BPF filter in text-based >>> >> >> format from some utility that accepts tcpdump-like syntax. The utility >>> >> >> can be distributed in the utils directory and it would not be >>> >> >> mandatory to compile it if libpcap is not present. >>> > [...] >>> >> > I would go with b) for now; we can always move to a) later on, but not >>> >> > the other way around (would kill backwards compatibility). >>> >> >>> >> This sounds like the consensus (for the record, I also prefer this less >>> >> disruptive approach). In that case, I can submit a revised libxt_bpf with your >>> >> suggested changes right away, Pablo, and we can leave the separate >>> >> userspace tool for a later commit. >>> > >>> > Either way is fine, but please we should have that utility compiler >>> > integrated in the iptables tree by when 3.9-rc1 is released. >>> >>> Okay. I'll prepare a separate patch with the pcap-based utility, then. > > Just sent the patch. I'm no expert at autoconf and automake, so the > build logic can conceivably be shorter, but it works for me and the > logic is straightforward. I forgot to mention in the commit message > which versions of the tools I used: tested on a ubuntu 12.04 with > autoconf 2.68, automake 1.9.6 and libtool 2.4.2. > >>> Since utils is built as part of the root make invocation, I think it's >>> better to test for pcap.h in the root configure.ac and add a test in >>> utils/Makefile.am to build this tool if found, as opposed to creating >>> a separate configure.ac under utils. We can also discuss these >>> details after the first version of the patch, of course. >> >> That's fine by now, and it's way less bloat. > -- > To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, Feb 23, 2013 at 9:15 PM, Maciej Żenczykowski <zenczykowski@gmail.com> wrote: > at a guess, there should be a --with/without option for it, and if > --with=tool is specified build/configure should fail if support > libraries are missing Agreed on the fail hard. After a brief offline discussion on --enable vs --with, will respin with optional --enable-bpf-compiler. The option is disabled by default. If it is enabled and pcap cannot be found, build fails. If no further comments, I'll respin shortly. > > On Sun, Feb 17, 2013 at 7:52 PM, Willem de Bruijn <willemb@google.com> wrote: >> On Wed, Jan 23, 2013 at 1:56 PM, Pablo Neira Ayuso <pablo@netfilter.org> wrote: >>> On Wed, Jan 23, 2013 at 11:38:20AM -0500, Willem de Bruijn wrote: >>>> On Wed, Jan 23, 2013 at 11:21 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote: >>>> > On Wed, Jan 23 2013 at 10:59:28AM -0500, Willem de Bruijn wrote: >>>> >> >> b) provide a separate utility to generate the BPF filter in text-based >>>> >> >> format from some utility that accepts tcpdump-like syntax. The utility >>>> >> >> can be distributed in the utils directory and it would not be >>>> >> >> mandatory to compile it if libpcap is not present. >>>> > [...] >>>> >> > I would go with b) for now; we can always move to a) later on, but not >>>> >> > the other way around (would kill backwards compatibility). >>>> >> >>>> >> This sounds like the consensus (for the record, I also prefer this less >>>> >> disruptive approach). In that case, I can submit a revised libxt_bpf with your >>>> >> suggested changes right away, Pablo, and we can leave the separate >>>> >> userspace tool for a later commit. >>>> > >>>> > Either way is fine, but please we should have that utility compiler >>>> > integrated in the iptables tree by when 3.9-rc1 is released. >>>> >>>> Okay. I'll prepare a separate patch with the pcap-based utility, then. >> >> Just sent the patch. I'm no expert at autoconf and automake, so the >> build logic can conceivably be shorter, but it works for me and the >> logic is straightforward. I forgot to mention in the commit message >> which versions of the tools I used: tested on a ubuntu 12.04 with >> autoconf 2.68, automake 1.9.6 and libtool 2.4.2. >> >>>> Since utils is built as part of the root make invocation, I think it's >>>> better to test for pcap.h in the root configure.ac and add a test in >>>> utils/Makefile.am to build this tool if found, as opposed to creating >>>> a separate configure.ac under utils. We can also discuss these >>>> details after the first version of the patch, of course. >>> >>> That's fine by now, and it's way less bloat. >> -- >> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h new file mode 100644 index 0000000..5dda450 --- /dev/null +++ b/include/uapi/linux/netfilter/xt_bpf.h @@ -0,0 +1,17 @@ +#ifndef _XT_BPF_H +#define _XT_BPF_H + +#include <linux/filter.h> +#include <linux/types.h> + +#define XT_BPF_MAX_NUM_INSTR 64 + +struct xt_bpf_info { + __u16 bpf_program_num_elem; + struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR]; + + /* only used in the kernel */ + struct sk_filter *filter __attribute__((aligned(8))); +}; + +#endif /*_XT_BPF_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index fefa514..d45720f 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -798,6 +798,15 @@ config NETFILTER_XT_MATCH_ADDRTYPE If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +config NETFILTER_XT_MATCH_BPF + tristate '"bpf" match support' + depends on NETFILTER_ADVANCED + help + BPF matching applies a linux socket filter to each packet and + accepts those for which the filter returns non-zero. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_CLUSTER tristate '"cluster" match support' depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 3259697..6d6194525 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -98,6 +98,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o # matches obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o +obj-$(CONFIG_NETFILTER_XT_MATCH_BPF) += xt_bpf.o obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c new file mode 100644 index 0000000..1bdfab8 --- /dev/null +++ b/net/netfilter/xt_bpf.c @@ -0,0 +1,73 @@ +/* Xtables module to match packets using a BPF filter. + * Copyright 2013 Google Inc. + * Written by Willem de Bruijn <willemb@google.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ipv6.h> +#include <linux/filter.h> +#include <net/ip.h> + +#include <linux/netfilter/xt_bpf.h> +#include <linux/netfilter/x_tables.h> + +MODULE_AUTHOR("Willem de Bruijn <willemb@google.com>"); +MODULE_DESCRIPTION("Xtables: BPF filter match"); +MODULE_LICENSE("GPL"); + +static int bpf_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_bpf_info *info = par->matchinfo; + struct sock_fprog program; + + program.len = info->bpf_program_num_elem; + program.filter = info->bpf_program; + if (sk_unattached_filter_create(&info->filter, &program)) { + pr_info("bpf: check failed: parse error\n"); + return -EINVAL; + } + + return 0; +} + +static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_bpf_info *info = par->matchinfo; + + return SK_RUN_FILTER(info->filter, skb); +} + +static void bpf_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_bpf_info *info = par->matchinfo; + sk_unattached_filter_destroy(info->filter); +} + +static struct xt_match bpf_mt_reg __read_mostly = { + .name = "bpf", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = bpf_mt_check, + .match = bpf_mt, + .destroy = bpf_mt_destroy, + .matchsize = sizeof(struct xt_bpf_info), + .me = THIS_MODULE, +}; + +static int __init bpf_mt_init(void) +{ + return xt_register_match(&bpf_mt_reg); +} + +static void __exit bpf_mt_exit(void) +{ + xt_unregister_match(&bpf_mt_reg); +} + +module_init(bpf_mt_init); +module_exit(bpf_mt_exit);