diff mbox

[next,v3] iptables: add xt_bpf match

Message ID 1357776944-28805-1-git-send-email-willemb@google.com
State Superseded
Headers show

Commit Message

Willem de Bruijn Jan. 10, 2013, 12:15 a.m. UTC
Changes:
- v3: reverted no longer needed changes to x_tables.c
- v2: use a fixed size match structure to communicate between
      kernel and userspace.

Support arbitrary linux socket filter (BPF) programs as iptables
match rules. This allows for very expressive filters, and on
platforms with BPF JIT appears competitive with traditional hardcoded
iptables rules.

At least, on an x86_64 that achieves 40K netperf TCP_STREAM without
any iptables rules (40 GBps),

inserting 100x this bpf rule gives 28K

    ./iptables -A OUTPUT -m bpf --bytecode '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,6 0 0 0,' -j

    (as generated by tcpdump -i any -ddd ip proto 20 | tr '\n' ',')

inserting 100x this u32 rule gives 21K

    ./iptables -A OUTPUT -m u32 --u32 '6&0xFF=0x20' -j DROP

The two are logically equivalent, as far as I can tell. Let me know
if my test methodology is flawed in some way. Even in cases where
slower, the filter adds functionality currently lacking in iptables,
such as access to sk_buff fields like rxhash and queue_mapping.
---
 include/uapi/linux/netfilter/xt_bpf.h |   17 ++++++++
 net/netfilter/Kconfig                 |    9 ++++
 net/netfilter/Makefile                |    1 +
 net/netfilter/xt_bpf.c                |   73 +++++++++++++++++++++++++++++++++
 4 files changed, 100 insertions(+), 0 deletions(-)
 create mode 100644 include/uapi/linux/netfilter/xt_bpf.h
 create mode 100644 net/netfilter/xt_bpf.c

Comments

Pablo Neira Ayuso Jan. 17, 2013, 11:53 p.m. UTC | #1
Hi Willem,

On Wed, Jan 09, 2013 at 07:15:44PM -0500, Willem de Bruijn wrote:
> Changes:
> - v3: reverted no longer needed changes to x_tables.c
> - v2: use a fixed size match structure to communicate between
>       kernel and userspace.
> 
> Support arbitrary linux socket filter (BPF) programs as iptables
> match rules. This allows for very expressive filters, and on
> platforms with BPF JIT appears competitive with traditional hardcoded
> iptables rules.
> 
> At least, on an x86_64 that achieves 40K netperf TCP_STREAM without
> any iptables rules (40 GBps),
> 
> inserting 100x this bpf rule gives 28K
> 
>     ./iptables -A OUTPUT -m bpf --bytecode '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,6 0 0 0,' -j
> 
>     (as generated by tcpdump -i any -ddd ip proto 20 | tr '\n' ',')

That code generated by tcpdump will not work.

tcpdump generates BPF code assuming that offset 0 is the link layer
header, while iptables considers that offset 0 is the network layer.

More comments below:

> inserting 100x this u32 rule gives 21K
> 
>     ./iptables -A OUTPUT -m u32 --u32 '6&0xFF=0x20' -j DROP
> 
> The two are logically equivalent, as far as I can tell. Let me know
> if my test methodology is flawed in some way. Even in cases where
> slower, the filter adds functionality currently lacking in iptables,
> such as access to sk_buff fields like rxhash and queue_mapping.
> ---
>  include/uapi/linux/netfilter/xt_bpf.h |   17 ++++++++
>  net/netfilter/Kconfig                 |    9 ++++
>  net/netfilter/Makefile                |    1 +
>  net/netfilter/xt_bpf.c                |   73 +++++++++++++++++++++++++++++++++
>  4 files changed, 100 insertions(+), 0 deletions(-)
>  create mode 100644 include/uapi/linux/netfilter/xt_bpf.h
>  create mode 100644 net/netfilter/xt_bpf.c
> 
> diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h
> new file mode 100644
> index 0000000..5dda450
> --- /dev/null
> +++ b/include/uapi/linux/netfilter/xt_bpf.h
> @@ -0,0 +1,17 @@
> +#ifndef _XT_BPF_H
> +#define _XT_BPF_H
> +
> +#include <linux/filter.h>
> +#include <linux/types.h>
> +
> +#define XT_BPF_MAX_NUM_INSTR	64
> +
> +struct xt_bpf_info {
> +	__u16 bpf_program_num_elem;
> +	struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR];
> +
> +	/* only used in the kernel */
> +	struct sk_filter *filter __attribute__((aligned(8)));
> +};
> +
> +#endif /*_XT_BPF_H */
> diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
> index fefa514..d45720f 100644
> --- a/net/netfilter/Kconfig
> +++ b/net/netfilter/Kconfig
> @@ -798,6 +798,15 @@ config NETFILTER_XT_MATCH_ADDRTYPE
>  	  If you want to compile it as a module, say M here and read
>  	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
>  
> +config NETFILTER_XT_MATCH_BPF
> +	tristate '"bpf" match support'
> +	depends on NETFILTER_ADVANCED
> +	help
> +	  BPF matching applies a linux socket filter to each packet and
> +          accepts those for which the filter returns non-zero.
> +
> +	  To compile it as a module, choose M here.  If unsure, say N.
> +
>  config NETFILTER_XT_MATCH_CLUSTER
>  	tristate '"cluster" match support'
>  	depends on NF_CONNTRACK
> diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
> index 3259697..6d6194525 100644
> --- a/net/netfilter/Makefile
> +++ b/net/netfilter/Makefile
> @@ -98,6 +98,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
>  
>  # matches
>  obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o
> +obj-$(CONFIG_NETFILTER_XT_MATCH_BPF) += xt_bpf.o
>  obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
>  obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
>  obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
> diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
> new file mode 100644
> index 0000000..1bdfab8
> --- /dev/null
> +++ b/net/netfilter/xt_bpf.c
> @@ -0,0 +1,73 @@
> +/* Xtables module to match packets using a BPF filter.
> + * Copyright 2013 Google Inc.
> + * Written by Willem de Bruijn <willemb@google.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/module.h>
> +#include <linux/skbuff.h>
> +#include <linux/ipv6.h>
> +#include <linux/filter.h>
> +#include <net/ip.h>
> +
> +#include <linux/netfilter/xt_bpf.h>
> +#include <linux/netfilter/x_tables.h>
> +
> +MODULE_AUTHOR("Willem de Bruijn <willemb@google.com>");
> +MODULE_DESCRIPTION("Xtables: BPF filter match");
> +MODULE_LICENSE("GPL");

Please, add

MODULE_ALIAS("ipt_bpf");
MODULE_ALIAS("ip6t_bpf");

otherwise module auto-loading will not work.

> +
> +static int bpf_mt_check(const struct xt_mtchk_param *par)
> +{
> +	struct xt_bpf_info *info = par->matchinfo;
> +	struct sock_fprog program;
> +
> +	program.len = info->bpf_program_num_elem;
> +	program.filter = info->bpf_program;

sparse reports a warning here above. I've been trying to find a quick
solution, please, have a look at it.

> +	if (sk_unattached_filter_create(&info->filter, &program)) {
> +		pr_info("bpf: check failed: parse error\n");
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
> +{
> +	const struct xt_bpf_info *info = par->matchinfo;
> +
> +	return SK_RUN_FILTER(info->filter, skb);
> +}
> +
> +static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
> +{
> +	const struct xt_bpf_info *info = par->matchinfo;
> +	sk_unattached_filter_destroy(info->filter);
> +}
> +
> +static struct xt_match bpf_mt_reg __read_mostly = {
> +	.name		= "bpf",
> +	.revision	= 0,
> +	.family		= NFPROTO_UNSPEC,
> +	.checkentry	= bpf_mt_check,
> +	.match		= bpf_mt,
> +	.destroy	= bpf_mt_destroy,
> +	.matchsize	= sizeof(struct xt_bpf_info),
> +	.me		= THIS_MODULE,
> +};
> +
> +static int __init bpf_mt_init(void)
> +{
> +	return xt_register_match(&bpf_mt_reg);
> +}
> +
> +static void __exit bpf_mt_exit(void)
> +{
> +	xt_unregister_match(&bpf_mt_reg);
> +}
> +
> +module_init(bpf_mt_init);
> +module_exit(bpf_mt_exit);
> -- 
> 1.7.7.3
> 
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Willem de Bruijn Jan. 18, 2013, 4:48 p.m. UTC | #2
On Thu, Jan 17, 2013 at 6:53 PM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> Hi Willem,
>
> On Wed, Jan 09, 2013 at 07:15:44PM -0500, Willem de Bruijn wrote:
>> Changes:
>> - v3: reverted no longer needed changes to x_tables.c
>> - v2: use a fixed size match structure to communicate between
>>       kernel and userspace.
>>
>> Support arbitrary linux socket filter (BPF) programs as iptables
>> match rules. This allows for very expressive filters, and on
>> platforms with BPF JIT appears competitive with traditional hardcoded
>> iptables rules.
>>
>> At least, on an x86_64 that achieves 40K netperf TCP_STREAM without
>> any iptables rules (40 GBps),
>>
>> inserting 100x this bpf rule gives 28K
>>
>>     ./iptables -A OUTPUT -m bpf --bytecode '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,6 0 0 0,' -j
>>
>>     (as generated by tcpdump -i any -ddd ip proto 20 | tr '\n' ',')
>
> That code generated by tcpdump will not work.
>
> tcpdump generates BPF code assuming that offset 0 is the link layer
> header, while iptables considers that offset 0 is the network layer.

Ah, yes, of course. We discussed that earlier. I removed the statement
from the commit message. Such hints belong in the libxt_bpf man page,
if anywhere.

To compile code right now, the little bpf compiler that I emailed
before can be downloaded from
http://code.google.com/p/kernel/downloads/detail?name=bpf2decimal.c

I don't think that a compiler has to be shipped with iptables itself,
let alone make iptables link against libraries. That said,  it is not
impossible to detect pcap.h in configure.ac and optionally enable a
"-m bpf --string" mode that calls pcap_compile_nopcap from within
libxt_bpf, so let me know if you would like me to code that up. I can
also try to send a patch to tcpdump that extends compilation (`-ddd -y
<type>`) to arbitrary link layer types.

> More comments below:
>
>> inserting 100x this u32 rule gives 21K
>>
>>     ./iptables -A OUTPUT -m u32 --u32 '6&0xFF=0x20' -j DROP
>>
>> The two are logically equivalent, as far as I can tell. Let me know
>> if my test methodology is flawed in some way. Even in cases where
>> slower, the filter adds functionality currently lacking in iptables,
>> such as access to sk_buff fields like rxhash and queue_mapping.
>> ---
>>  include/uapi/linux/netfilter/xt_bpf.h |   17 ++++++++
>>  net/netfilter/Kconfig                 |    9 ++++
>>  net/netfilter/Makefile                |    1 +
>>  net/netfilter/xt_bpf.c                |   73 +++++++++++++++++++++++++++++++++
>>  4 files changed, 100 insertions(+), 0 deletions(-)
>>  create mode 100644 include/uapi/linux/netfilter/xt_bpf.h
>>  create mode 100644 net/netfilter/xt_bpf.c
>>
>> diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h
>> new file mode 100644
>> index 0000000..5dda450
>> --- /dev/null
>> +++ b/include/uapi/linux/netfilter/xt_bpf.h
>> @@ -0,0 +1,17 @@
>> +#ifndef _XT_BPF_H
>> +#define _XT_BPF_H
>> +
>> +#include <linux/filter.h>
>> +#include <linux/types.h>
>> +
>> +#define XT_BPF_MAX_NUM_INSTR 64
>> +
>> +struct xt_bpf_info {
>> +     __u16 bpf_program_num_elem;
>> +     struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR];
>> +
>> +     /* only used in the kernel */
>> +     struct sk_filter *filter __attribute__((aligned(8)));
>> +};
>> +
>> +#endif /*_XT_BPF_H */
>> diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
>> index fefa514..d45720f 100644
>> --- a/net/netfilter/Kconfig
>> +++ b/net/netfilter/Kconfig
>> @@ -798,6 +798,15 @@ config NETFILTER_XT_MATCH_ADDRTYPE
>>         If you want to compile it as a module, say M here and read
>>         <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
>>
>> +config NETFILTER_XT_MATCH_BPF
>> +     tristate '"bpf" match support'
>> +     depends on NETFILTER_ADVANCED
>> +     help
>> +       BPF matching applies a linux socket filter to each packet and
>> +          accepts those for which the filter returns non-zero.
>> +
>> +       To compile it as a module, choose M here.  If unsure, say N.
>> +
>>  config NETFILTER_XT_MATCH_CLUSTER
>>       tristate '"cluster" match support'
>>       depends on NF_CONNTRACK
>> diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
>> index 3259697..6d6194525 100644
>> --- a/net/netfilter/Makefile
>> +++ b/net/netfilter/Makefile
>> @@ -98,6 +98,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
>>
>>  # matches
>>  obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o
>> +obj-$(CONFIG_NETFILTER_XT_MATCH_BPF) += xt_bpf.o
>>  obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
>>  obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
>>  obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
>> diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
>> new file mode 100644
>> index 0000000..1bdfab8
>> --- /dev/null
>> +++ b/net/netfilter/xt_bpf.c
>> @@ -0,0 +1,73 @@
>> +/* Xtables module to match packets using a BPF filter.
>> + * Copyright 2013 Google Inc.
>> + * Written by Willem de Bruijn <willemb@google.com>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#include <linux/module.h>
>> +#include <linux/skbuff.h>
>> +#include <linux/ipv6.h>
>> +#include <linux/filter.h>
>> +#include <net/ip.h>
>> +
>> +#include <linux/netfilter/xt_bpf.h>
>> +#include <linux/netfilter/x_tables.h>
>> +
>> +MODULE_AUTHOR("Willem de Bruijn <willemb@google.com>");
>> +MODULE_DESCRIPTION("Xtables: BPF filter match");
>> +MODULE_LICENSE("GPL");
>
> Please, add
>
> MODULE_ALIAS("ipt_bpf");
> MODULE_ALIAS("ip6t_bpf");

Done.

> otherwise module auto-loading will not work.
>
>> +
>> +static int bpf_mt_check(const struct xt_mtchk_param *par)
>> +{
>> +     struct xt_bpf_info *info = par->matchinfo;
>> +     struct sock_fprog program;
>> +
>> +     program.len = info->bpf_program_num_elem;
>> +     program.filter = info->bpf_program;
>
> sparse reports a warning here above. I've been trying to find a quick
> solution, please, have a look at it.

Thanks for catching this. Apparently, program.filter is annotated as
__user. A cast made the warning disappear, and is safe as far as I can
see, since it only forces the kernel to be more conservative in
accessing the memory.

>> +     if (sk_unattached_filter_create(&info->filter, &program)) {
>> +             pr_info("bpf: check failed: parse error\n");
>> +             return -EINVAL;
>> +     }
>> +
>> +     return 0;
>> +}
>> +
>> +static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
>> +{
>> +     const struct xt_bpf_info *info = par->matchinfo;
>> +
>> +     return SK_RUN_FILTER(info->filter, skb);
>> +}
>> +
>> +static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
>> +{
>> +     const struct xt_bpf_info *info = par->matchinfo;
>> +     sk_unattached_filter_destroy(info->filter);
>> +}
>> +
>> +static struct xt_match bpf_mt_reg __read_mostly = {
>> +     .name           = "bpf",
>> +     .revision       = 0,
>> +     .family         = NFPROTO_UNSPEC,
>> +     .checkentry     = bpf_mt_check,
>> +     .match          = bpf_mt,
>> +     .destroy        = bpf_mt_destroy,
>> +     .matchsize      = sizeof(struct xt_bpf_info),
>> +     .me             = THIS_MODULE,
>> +};
>> +
>> +static int __init bpf_mt_init(void)
>> +{
>> +     return xt_register_match(&bpf_mt_reg);
>> +}
>> +
>> +static void __exit bpf_mt_exit(void)
>> +{
>> +     xt_unregister_match(&bpf_mt_reg);
>> +}
>> +
>> +module_init(bpf_mt_init);
>> +module_exit(bpf_mt_exit);
>> --
>> 1.7.7.3
>>
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso Jan. 21, 2013, 1:44 p.m. UTC | #3
On Fri, Jan 18, 2013 at 11:48:34AM -0500, Willem de Bruijn wrote:
[...]
> To compile code right now, the little bpf compiler that I emailed
> before can be downloaded from
> http://code.google.com/p/kernel/downloads/detail?name=bpf2decimal.c
> 
> I don't think that a compiler has to be shipped with iptables itself,
> let alone make iptables link against libraries. That said,  it is not
> impossible to detect pcap.h in configure.ac and optionally enable a
> "-m bpf --string" mode that calls pcap_compile_nopcap from within
> libxt_bpf, so let me know if you would like me to code that up. I can
> also try to send a patch to tcpdump that extends compilation (`-ddd -y
> <type>`) to arbitrary link layer types.

We have to decide if:

a) we add a new hard library dependency to iptables (libpcap) for just
for one single module, that is, the libxt_bpf depends on libpcap.

or

b) provide a separate utility to generate the BPF filter in text-based
format from some utility that accepts tcpdump-like syntax. The utility
can be distributed in the utils directory and it would not be
mandatory to compile it if libpcap is not present.

I'd like to hear pro and cons arguments from others on this.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florian Westphal Jan. 22, 2013, 8:46 a.m. UTC | #4
Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Fri, Jan 18, 2013 at 11:48:34AM -0500, Willem de Bruijn wrote:
> [...]
> > To compile code right now, the little bpf compiler that I emailed
> > before can be downloaded from
> > http://code.google.com/p/kernel/downloads/detail?name=bpf2decimal.c
> > 
> > I don't think that a compiler has to be shipped with iptables itself,
> > let alone make iptables link against libraries. That said,  it is not
> > impossible to detect pcap.h in configure.ac and optionally enable a
> > "-m bpf --string" mode that calls pcap_compile_nopcap from within
> > libxt_bpf, so let me know if you would like me to code that up. I can
> > also try to send a patch to tcpdump that extends compilation (`-ddd -y
> > <type>`) to arbitrary link layer types.
> 
> We have to decide if:
> 
> a) we add a new hard library dependency to iptables (libpcap) for just
> for one single module, that is, the libxt_bpf depends on libpcap.
> 
> or
> 
> b) provide a separate utility to generate the BPF filter in text-based
> format from some utility that accepts tcpdump-like syntax. The utility
> can be distributed in the utils directory and it would not be
> mandatory to compile it if libpcap is not present.
> 
> I'd like to hear pro and cons arguments from others on this.

a) is arguably more user friendly, however, I don't think we can
   retain the 'text representation' for iptables-save so users
   would still be confronted with the compiled data at some point
   (i.e., they need to write down the original expression anyway to
    figure out what the rule they added 6 months back actually does...)

I would go with b) for now; we can always move to a) later on, but not
the other way around (would kill backwards compatibility).
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jozsef Kadlecsik Jan. 22, 2013, 9:46 a.m. UTC | #5
On Tue, 22 Jan 2013, Florian Westphal wrote:

> Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > On Fri, Jan 18, 2013 at 11:48:34AM -0500, Willem de Bruijn wrote:
> > [...]
> > > To compile code right now, the little bpf compiler that I emailed
> > > before can be downloaded from
> > > http://code.google.com/p/kernel/downloads/detail?name=bpf2decimal.c
> > > 
> > > I don't think that a compiler has to be shipped with iptables itself,
> > > let alone make iptables link against libraries. That said,  it is not
> > > impossible to detect pcap.h in configure.ac and optionally enable a
> > > "-m bpf --string" mode that calls pcap_compile_nopcap from within
> > > libxt_bpf, so let me know if you would like me to code that up. I can
> > > also try to send a patch to tcpdump that extends compilation (`-ddd -y
> > > <type>`) to arbitrary link layer types.
> > 
> > We have to decide if:
> > 
> > a) we add a new hard library dependency to iptables (libpcap) for just
> > for one single module, that is, the libxt_bpf depends on libpcap.
> > 
> > or
> > 
> > b) provide a separate utility to generate the BPF filter in text-based
> > format from some utility that accepts tcpdump-like syntax. The utility
> > can be distributed in the utils directory and it would not be
> > mandatory to compile it if libpcap is not present.
> > 
> > I'd like to hear pro and cons arguments from others on this.
> 
> a) is arguably more user friendly, however, I don't think we can
>    retain the 'text representation' for iptables-save so users
>    would still be confronted with the compiled data at some point
>    (i.e., they need to write down the original expression anyway to
>     figure out what the rule they added 6 months back actually does...)
> 
> I would go with b) for now; we can always move to a) later on, but not
> the other way around (would kill backwards compatibility).

Yes, let's go with b). (But from packaging point of view 
utils/bpf2decimal.c depending on libpcap is not much different from 
extensions/libxt_bpf.c depending on libpcap.)

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Maciej Żenczykowski Jan. 22, 2013, 10:03 a.m. UTC | #6
With (b) the static iptables binary doesn't grow by the size of the
libpcap library.
I think this is a worthy goal.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso Jan. 22, 2013, 11:11 a.m. UTC | #7
On Tue, Jan 22, 2013 at 10:46:17AM +0100, Jozsef Kadlecsik wrote:
> On Tue, 22 Jan 2013, Florian Westphal wrote:
> 
> > Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > > On Fri, Jan 18, 2013 at 11:48:34AM -0500, Willem de Bruijn wrote:
> > > [...]
> > > > To compile code right now, the little bpf compiler that I emailed
> > > > before can be downloaded from
> > > > http://code.google.com/p/kernel/downloads/detail?name=bpf2decimal.c
> > > > 
> > > > I don't think that a compiler has to be shipped with iptables itself,
> > > > let alone make iptables link against libraries. That said,  it is not
> > > > impossible to detect pcap.h in configure.ac and optionally enable a
> > > > "-m bpf --string" mode that calls pcap_compile_nopcap from within
> > > > libxt_bpf, so let me know if you would like me to code that up. I can
> > > > also try to send a patch to tcpdump that extends compilation (`-ddd -y
> > > > <type>`) to arbitrary link layer types.
> > > 
> > > We have to decide if:
> > > 
> > > a) we add a new hard library dependency to iptables (libpcap) for just
> > > for one single module, that is, the libxt_bpf depends on libpcap.
> > > 
> > > or
> > > 
> > > b) provide a separate utility to generate the BPF filter in text-based
> > > format from some utility that accepts tcpdump-like syntax. The utility
> > > can be distributed in the utils directory and it would not be
> > > mandatory to compile it if libpcap is not present.
> > > 
> > > I'd like to hear pro and cons arguments from others on this.
> > 
> > a) is arguably more user friendly, however, I don't think we can
> >    retain the 'text representation' for iptables-save so users
> >    would still be confronted with the compiled data at some point
> >    (i.e., they need to write down the original expression anyway to
> >     figure out what the rule they added 6 months back actually does...)
> > 
> > I would go with b) for now; we can always move to a) later on, but not
> > the other way around (would kill backwards compatibility).
> 
> Yes, let's go with b). (But from packaging point of view 
> utils/bpf2decimal.c depending on libpcap is not much different from 
> extensions/libxt_bpf.c depending on libpcap.)

We can skip that dependency by adding an independent configure.ac and
Makefile for this under iptables/utils/nfbpf. Thus, iptables itself
will not depend on libpcap.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Willem de Bruijn Jan. 23, 2013, 3:59 p.m. UTC | #8
On Tue, Jan 22, 2013 at 3:46 AM, Florian Westphal <fw@strlen.de> wrote:
> Pablo Neira Ayuso <pablo@netfilter.org> wrote:
>> On Fri, Jan 18, 2013 at 11:48:34AM -0500, Willem de Bruijn wrote:
>> [...]
>> > To compile code right now, the little bpf compiler that I emailed
>> > before can be downloaded from
>> > http://code.google.com/p/kernel/downloads/detail?name=bpf2decimal.c
>> >
>> > I don't think that a compiler has to be shipped with iptables itself,
>> > let alone make iptables link against libraries. That said,  it is not
>> > impossible to detect pcap.h in configure.ac and optionally enable a
>> > "-m bpf --string" mode that calls pcap_compile_nopcap from within
>> > libxt_bpf, so let me know if you would like me to code that up. I can
>> > also try to send a patch to tcpdump that extends compilation (`-ddd -y
>> > <type>`) to arbitrary link layer types.
>>
>> We have to decide if:
>>
>> a) we add a new hard library dependency to iptables (libpcap) for just
>> for one single module, that is, the libxt_bpf depends on libpcap.
>>
>> or
>>
>> b) provide a separate utility to generate the BPF filter in text-based
>> format from some utility that accepts tcpdump-like syntax. The utility
>> can be distributed in the utils directory and it would not be
>> mandatory to compile it if libpcap is not present.
>>
>> I'd like to hear pro and cons arguments from others on this.
>
> a) is arguably more user friendly, however, I don't think we can
>    retain the 'text representation' for iptables-save so users
>    would still be confronted with the compiled data at some point
>    (i.e., they need to write down the original expression anyway to
>     figure out what the rule they added 6 months back actually does...)
>
> I would go with b) for now; we can always move to a) later on, but not
> the other way around (would kill backwards compatibility).

This sounds like the consensus (for the record, I also prefer this less
disruptive approach). In that case, I can submit a revised libxt_bpf with your
suggested changes right away, Pablo, and we can leave the separate
userspace tool for a later commit.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso Jan. 23, 2013, 4:21 p.m. UTC | #9
On Wed, Jan 23, 2013 at 10:59:28AM -0500, Willem de Bruijn wrote:
> >> b) provide a separate utility to generate the BPF filter in text-based
> >> format from some utility that accepts tcpdump-like syntax. The utility
> >> can be distributed in the utils directory and it would not be
> >> mandatory to compile it if libpcap is not present.
[...]
> > I would go with b) for now; we can always move to a) later on, but not
> > the other way around (would kill backwards compatibility).
> 
> This sounds like the consensus (for the record, I also prefer this less
> disruptive approach). In that case, I can submit a revised libxt_bpf with your
> suggested changes right away, Pablo, and we can leave the separate
> userspace tool for a later commit.

Either way is fine, but please we should have that utility compiler
integrated in the iptables tree by when 3.9-rc1 is released.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Willem de Bruijn Jan. 23, 2013, 4:38 p.m. UTC | #10
On Wed, Jan 23, 2013 at 11:21 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Wed, Jan 23 2013 at 10:59:28AM -0500, Willem de Bruijn wrote:
>> >> b) provide a separate utility to generate the BPF filter in text-based
>> >> format from some utility that accepts tcpdump-like syntax. The utility
>> >> can be distributed in the utils directory and it would not be
>> >> mandatory to compile it if libpcap is not present.
> [...]
>> > I would go with b) for now; we can always move to a) later on, but not
>> > the other way around (would kill backwards compatibility).
>>
>> This sounds like the consensus (for the record, I also prefer this less
>> disruptive approach). In that case, I can submit a revised libxt_bpf with your
>> suggested changes right away, Pablo, and we can leave the separate
>> userspace tool for a later commit.
>
> Either way is fine, but please we should have that utility compiler
> integrated in the iptables tree by when 3.9-rc1 is released.

Okay. I'll prepare a separate patch with the pcap-based utility, then.

Since utils is built as part of the root make invocation, I think it's
better to test for pcap.h in the root configure.ac and add a test in
utils/Makefile.am to build this tool if found, as opposed to creating
a separate configure.ac under utils. We can also discuss these
details after the first version of the patch, of course.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso Jan. 23, 2013, 6:56 p.m. UTC | #11
On Wed, Jan 23, 2013 at 11:38:20AM -0500, Willem de Bruijn wrote:
> On Wed, Jan 23, 2013 at 11:21 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > On Wed, Jan 23 2013 at 10:59:28AM -0500, Willem de Bruijn wrote:
> >> >> b) provide a separate utility to generate the BPF filter in text-based
> >> >> format from some utility that accepts tcpdump-like syntax. The utility
> >> >> can be distributed in the utils directory and it would not be
> >> >> mandatory to compile it if libpcap is not present.
> > [...]
> >> > I would go with b) for now; we can always move to a) later on, but not
> >> > the other way around (would kill backwards compatibility).
> >>
> >> This sounds like the consensus (for the record, I also prefer this less
> >> disruptive approach). In that case, I can submit a revised libxt_bpf with your
> >> suggested changes right away, Pablo, and we can leave the separate
> >> userspace tool for a later commit.
> >
> > Either way is fine, but please we should have that utility compiler
> > integrated in the iptables tree by when 3.9-rc1 is released.
> 
> Okay. I'll prepare a separate patch with the pcap-based utility, then.
> 
> Since utils is built as part of the root make invocation, I think it's
> better to test for pcap.h in the root configure.ac and add a test in
> utils/Makefile.am to build this tool if found, as opposed to creating
> a separate configure.ac under utils. We can also discuss these
> details after the first version of the patch, of course.

That's fine by now, and it's way less bloat.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Willem de Bruijn Feb. 18, 2013, 3:52 a.m. UTC | #12
On Wed, Jan 23, 2013 at 1:56 PM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Wed, Jan 23, 2013 at 11:38:20AM -0500, Willem de Bruijn wrote:
>> On Wed, Jan 23, 2013 at 11:21 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
>> > On Wed, Jan 23 2013 at 10:59:28AM -0500, Willem de Bruijn wrote:
>> >> >> b) provide a separate utility to generate the BPF filter in text-based
>> >> >> format from some utility that accepts tcpdump-like syntax. The utility
>> >> >> can be distributed in the utils directory and it would not be
>> >> >> mandatory to compile it if libpcap is not present.
>> > [...]
>> >> > I would go with b) for now; we can always move to a) later on, but not
>> >> > the other way around (would kill backwards compatibility).
>> >>
>> >> This sounds like the consensus (for the record, I also prefer this less
>> >> disruptive approach). In that case, I can submit a revised libxt_bpf with your
>> >> suggested changes right away, Pablo, and we can leave the separate
>> >> userspace tool for a later commit.
>> >
>> > Either way is fine, but please we should have that utility compiler
>> > integrated in the iptables tree by when 3.9-rc1 is released.
>>
>> Okay. I'll prepare a separate patch with the pcap-based utility, then.

Just sent the patch. I'm no expert at autoconf and automake, so the
build logic can conceivably be shorter, but it works for me and the
logic is straightforward. I forgot to mention in the commit message
which versions of the tools I used: tested on a ubuntu 12.04 with
autoconf 2.68, automake 1.9.6 and libtool 2.4.2.

>> Since utils is built as part of the root make invocation, I think it's
>> better to test for pcap.h in the root configure.ac and add a test in
>> utils/Makefile.am to build this tool if found, as opposed to creating
>> a separate configure.ac under utils. We can also discuss these
>> details after the first version of the patch, of course.
>
> That's fine by now, and it's way less bloat.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Maciej Żenczykowski Feb. 24, 2013, 2:15 a.m. UTC | #13
at a guess, there should be a --with/without option for it, and if
--with=tool is specified build/configure should fail if support
libraries are missing


On Sun, Feb 17, 2013 at 7:52 PM, Willem de Bruijn <willemb@google.com> wrote:
> On Wed, Jan 23, 2013 at 1:56 PM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
>> On Wed, Jan 23, 2013 at 11:38:20AM -0500, Willem de Bruijn wrote:
>>> On Wed, Jan 23, 2013 at 11:21 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
>>> > On Wed, Jan 23 2013 at 10:59:28AM -0500, Willem de Bruijn wrote:
>>> >> >> b) provide a separate utility to generate the BPF filter in text-based
>>> >> >> format from some utility that accepts tcpdump-like syntax. The utility
>>> >> >> can be distributed in the utils directory and it would not be
>>> >> >> mandatory to compile it if libpcap is not present.
>>> > [...]
>>> >> > I would go with b) for now; we can always move to a) later on, but not
>>> >> > the other way around (would kill backwards compatibility).
>>> >>
>>> >> This sounds like the consensus (for the record, I also prefer this less
>>> >> disruptive approach). In that case, I can submit a revised libxt_bpf with your
>>> >> suggested changes right away, Pablo, and we can leave the separate
>>> >> userspace tool for a later commit.
>>> >
>>> > Either way is fine, but please we should have that utility compiler
>>> > integrated in the iptables tree by when 3.9-rc1 is released.
>>>
>>> Okay. I'll prepare a separate patch with the pcap-based utility, then.
>
> Just sent the patch. I'm no expert at autoconf and automake, so the
> build logic can conceivably be shorter, but it works for me and the
> logic is straightforward. I forgot to mention in the commit message
> which versions of the tools I used: tested on a ubuntu 12.04 with
> autoconf 2.68, automake 1.9.6 and libtool 2.4.2.
>
>>> Since utils is built as part of the root make invocation, I think it's
>>> better to test for pcap.h in the root configure.ac and add a test in
>>> utils/Makefile.am to build this tool if found, as opposed to creating
>>> a separate configure.ac under utils. We can also discuss these
>>> details after the first version of the patch, of course.
>>
>> That's fine by now, and it's way less bloat.
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Willem de Bruijn Feb. 27, 2013, 8:39 p.m. UTC | #14
On Sat, Feb 23, 2013 at 9:15 PM, Maciej Żenczykowski
<zenczykowski@gmail.com> wrote:
> at a guess, there should be a --with/without option for it, and if
> --with=tool is specified build/configure should fail if support
> libraries are missing

Agreed on the fail hard.

After a brief offline discussion on --enable vs --with, will respin
with optional --enable-bpf-compiler. The option is disabled by
default. If it is enabled and pcap cannot be found, build fails.

If no further comments, I'll respin shortly.

>
> On Sun, Feb 17, 2013 at 7:52 PM, Willem de Bruijn <willemb@google.com> wrote:
>> On Wed, Jan 23, 2013 at 1:56 PM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
>>> On Wed, Jan 23, 2013 at 11:38:20AM -0500, Willem de Bruijn wrote:
>>>> On Wed, Jan 23, 2013 at 11:21 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
>>>> > On Wed, Jan 23 2013 at 10:59:28AM -0500, Willem de Bruijn wrote:
>>>> >> >> b) provide a separate utility to generate the BPF filter in text-based
>>>> >> >> format from some utility that accepts tcpdump-like syntax. The utility
>>>> >> >> can be distributed in the utils directory and it would not be
>>>> >> >> mandatory to compile it if libpcap is not present.
>>>> > [...]
>>>> >> > I would go with b) for now; we can always move to a) later on, but not
>>>> >> > the other way around (would kill backwards compatibility).
>>>> >>
>>>> >> This sounds like the consensus (for the record, I also prefer this less
>>>> >> disruptive approach). In that case, I can submit a revised libxt_bpf with your
>>>> >> suggested changes right away, Pablo, and we can leave the separate
>>>> >> userspace tool for a later commit.
>>>> >
>>>> > Either way is fine, but please we should have that utility compiler
>>>> > integrated in the iptables tree by when 3.9-rc1 is released.
>>>>
>>>> Okay. I'll prepare a separate patch with the pcap-based utility, then.
>>
>> Just sent the patch. I'm no expert at autoconf and automake, so the
>> build logic can conceivably be shorter, but it works for me and the
>> logic is straightforward. I forgot to mention in the commit message
>> which versions of the tools I used: tested on a ubuntu 12.04 with
>> autoconf 2.68, automake 1.9.6 and libtool 2.4.2.
>>
>>>> Since utils is built as part of the root make invocation, I think it's
>>>> better to test for pcap.h in the root configure.ac and add a test in
>>>> utils/Makefile.am to build this tool if found, as opposed to creating
>>>> a separate configure.ac under utils. We can also discuss these
>>>> details after the first version of the patch, of course.
>>>
>>> That's fine by now, and it's way less bloat.
>> --
>> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h
new file mode 100644
index 0000000..5dda450
--- /dev/null
+++ b/include/uapi/linux/netfilter/xt_bpf.h
@@ -0,0 +1,17 @@ 
+#ifndef _XT_BPF_H
+#define _XT_BPF_H
+
+#include <linux/filter.h>
+#include <linux/types.h>
+
+#define XT_BPF_MAX_NUM_INSTR	64
+
+struct xt_bpf_info {
+	__u16 bpf_program_num_elem;
+	struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR];
+
+	/* only used in the kernel */
+	struct sk_filter *filter __attribute__((aligned(8)));
+};
+
+#endif /*_XT_BPF_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index fefa514..d45720f 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -798,6 +798,15 @@  config NETFILTER_XT_MATCH_ADDRTYPE
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_BPF
+	tristate '"bpf" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  BPF matching applies a linux socket filter to each packet and
+          accepts those for which the filter returns non-zero.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_CLUSTER
 	tristate '"cluster" match support'
 	depends on NF_CONNTRACK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 3259697..6d6194525 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -98,6 +98,7 @@  obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
 
 # matches
 obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_BPF) += xt_bpf.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
new file mode 100644
index 0000000..1bdfab8
--- /dev/null
+++ b/net/netfilter/xt_bpf.c
@@ -0,0 +1,73 @@ 
+/* Xtables module to match packets using a BPF filter.
+ * Copyright 2013 Google Inc.
+ * Written by Willem de Bruijn <willemb@google.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/filter.h>
+#include <net/ip.h>
+
+#include <linux/netfilter/xt_bpf.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("Willem de Bruijn <willemb@google.com>");
+MODULE_DESCRIPTION("Xtables: BPF filter match");
+MODULE_LICENSE("GPL");
+
+static int bpf_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_bpf_info *info = par->matchinfo;
+	struct sock_fprog program;
+
+	program.len = info->bpf_program_num_elem;
+	program.filter = info->bpf_program;
+	if (sk_unattached_filter_create(&info->filter, &program)) {
+		pr_info("bpf: check failed: parse error\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_bpf_info *info = par->matchinfo;
+
+	return SK_RUN_FILTER(info->filter, skb);
+}
+
+static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_bpf_info *info = par->matchinfo;
+	sk_unattached_filter_destroy(info->filter);
+}
+
+static struct xt_match bpf_mt_reg __read_mostly = {
+	.name		= "bpf",
+	.revision	= 0,
+	.family		= NFPROTO_UNSPEC,
+	.checkentry	= bpf_mt_check,
+	.match		= bpf_mt,
+	.destroy	= bpf_mt_destroy,
+	.matchsize	= sizeof(struct xt_bpf_info),
+	.me		= THIS_MODULE,
+};
+
+static int __init bpf_mt_init(void)
+{
+	return xt_register_match(&bpf_mt_reg);
+}
+
+static void __exit bpf_mt_exit(void)
+{
+	xt_unregister_match(&bpf_mt_reg);
+}
+
+module_init(bpf_mt_init);
+module_exit(bpf_mt_exit);