diff mbox

[next] iptables: add xt_bpf match

Message ID 1355089978-24463-1-git-send-email-willemb@google.com
State Not Applicable
Headers show

Commit Message

Willem de Bruijn Dec. 9, 2012, 9:52 p.m. UTC
Support arbitrary linux socket filter (BPF) programs as iptables
match rules. This allows for very expressive filters, and on
platforms with BPF JIT appears competitive with traditional hardcoded
iptables rules.

At least, on an x86_64 that achieves 40K netperf TCP_STREAM without
any iptables rules (40 GBps),

inserting 100x this bpf rule gives 28K

    ./iptables -A OUTPUT -m bpf --bytecode '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,6 0 0 0,' -j

    (as generated by tcpdump -i any -ddd ip proto 20 | tr '\n' ',')

inserting 100x this u32 rule gives 21K

    ./iptables -A OUTPUT -m u32 --u32 '6&0xFF=0x20' -j DROP

The two are logically equivalent, as far as I can tell. Let me know
if my test methodology is flawed in some way. Even in cases where
slower, the filter adds functionality currently lacking in iptables,
such as access to sk_buff fields like rxhash and queue_mapping.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 include/linux/netfilter/xt_bpf.h |   17 +++++++
 net/netfilter/Kconfig            |    9 ++++
 net/netfilter/Makefile           |    1 +
 net/netfilter/x_tables.c         |    5 +-
 net/netfilter/xt_bpf.c           |   86 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 116 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/netfilter/xt_bpf.h
 create mode 100644 net/netfilter/xt_bpf.c

Comments

Pablo Neira Ayuso Jan. 8, 2013, 3:21 a.m. UTC | #1
Hi Willem,

On Sun, Dec 09, 2012 at 04:52:58PM -0500, Willem de Bruijn wrote:
> Support arbitrary linux socket filter (BPF) programs as iptables
> match rules. This allows for very expressive filters, and on
> platforms with BPF JIT appears competitive with traditional hardcoded
> iptables rules.
> 
> At least, on an x86_64 that achieves 40K netperf TCP_STREAM without
> any iptables rules (40 GBps),
> 
> inserting 100x this bpf rule gives 28K
> 
>     ./iptables -A OUTPUT -m bpf --bytecode '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,6 0 0 0,' -j
> 
>     (as generated by tcpdump -i any -ddd ip proto 20 | tr '\n' ',')
> 
> inserting 100x this u32 rule gives 21K
> 
>     ./iptables -A OUTPUT -m u32 --u32 '6&0xFF=0x20' -j DROP
> 
> The two are logically equivalent, as far as I can tell. Let me know
> if my test methodology is flawed in some way. Even in cases where
> slower, the filter adds functionality currently lacking in iptables,
> such as access to sk_buff fields like rxhash and queue_mapping.
> 
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> ---
>  include/linux/netfilter/xt_bpf.h |   17 +++++++
>  net/netfilter/Kconfig            |    9 ++++
>  net/netfilter/Makefile           |    1 +
>  net/netfilter/x_tables.c         |    5 +-
>  net/netfilter/xt_bpf.c           |   86 ++++++++++++++++++++++++++++++++++++++
>  5 files changed, 116 insertions(+), 2 deletions(-)
>  create mode 100644 include/linux/netfilter/xt_bpf.h
>  create mode 100644 net/netfilter/xt_bpf.c
> 
> diff --git a/include/linux/netfilter/xt_bpf.h b/include/linux/netfilter/xt_bpf.h
> new file mode 100644
> index 0000000..23502c0
> --- /dev/null
> +++ b/include/linux/netfilter/xt_bpf.h
> @@ -0,0 +1,17 @@
> +#ifndef _XT_BPF_H
> +#define _XT_BPF_H
> +
> +#include <linux/filter.h>
> +#include <linux/types.h>
> +
> +struct xt_bpf_info {
> +	__u16 bpf_program_num_elem;
> +
> +	/* only used in kernel */
> +	struct sk_filter *filter __attribute__((aligned(8)));

I see. You set match->userspacesize to zero in libxt_bpf to skip the
comparison of that internal struct sk_filter *filter.

> +
> +	/* variable size, based on program_num_elem */
> +	struct sock_filter bpf_program[0];

While testing this I noticed:

iptables -I OUTPUT -m bpf --bytecode   \
        '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,6 0 0 0' -j ACCEPT

Note that this works but it should not.

iptables -D OUTPUT -m bpf --bytecode   \
        '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,1 0 0 0' -j ACCEPT
                                                               ^
Mind that 1, it's a different filter, but it deletes the previous
filter without problems here.

A quick look at make_delete_mask() in iptables tells me that the
changes you made to userspace to allow variable size matches are not
enough to generate a sane mask (which is fundamental while looking for
a matching rule during the deletion).
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Willem de Bruijn Jan. 9, 2013, 1:58 a.m. UTC | #2
On Mon, Jan 7, 2013 at 10:21 PM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> Hi Willem,
>
> On Sun, Dec 09, 2012 at 04:52:58PM -0500, Willem de Bruijn wrote:
>> Support arbitrary linux socket filter (BPF) programs as iptables
>> match rules. This allows for very expressive filters, and on
>> platforms with BPF JIT appears competitive with traditional hardcoded
>> iptables rules.
>>
>> At least, on an x86_64 that achieves 40K netperf TCP_STREAM without
>> any iptables rules (40 GBps),
>>
>> inserting 100x this bpf rule gives 28K
>>
>>     ./iptables -A OUTPUT -m bpf --bytecode '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,6 0 0 0,' -j
>>
>>     (as generated by tcpdump -i any -ddd ip proto 20 | tr '\n' ',')
>>
>> inserting 100x this u32 rule gives 21K
>>
>>     ./iptables -A OUTPUT -m u32 --u32 '6&0xFF=0x20' -j DROP
>>
>> The two are logically equivalent, as far as I can tell. Let me know
>> if my test methodology is flawed in some way. Even in cases where
>> slower, the filter adds functionality currently lacking in iptables,
>> such as access to sk_buff fields like rxhash and queue_mapping.
>>
>> Signed-off-by: Willem de Bruijn <willemb@google.com>
>> ---
>>  include/linux/netfilter/xt_bpf.h |   17 +++++++
>>  net/netfilter/Kconfig            |    9 ++++
>>  net/netfilter/Makefile           |    1 +
>>  net/netfilter/x_tables.c         |    5 +-
>>  net/netfilter/xt_bpf.c           |   86 ++++++++++++++++++++++++++++++++++++++
>>  5 files changed, 116 insertions(+), 2 deletions(-)
>>  create mode 100644 include/linux/netfilter/xt_bpf.h
>>  create mode 100644 net/netfilter/xt_bpf.c
>>
>> diff --git a/include/linux/netfilter/xt_bpf.h b/include/linux/netfilter/xt_bpf.h
>> new file mode 100644
>> index 0000000..23502c0
>> --- /dev/null
>> +++ b/include/linux/netfilter/xt_bpf.h
>> @@ -0,0 +1,17 @@
>> +#ifndef _XT_BPF_H
>> +#define _XT_BPF_H
>> +
>> +#include <linux/filter.h>
>> +#include <linux/types.h>
>> +
>> +struct xt_bpf_info {
>> +     __u16 bpf_program_num_elem;
>> +
>> +     /* only used in kernel */
>> +     struct sk_filter *filter __attribute__((aligned(8)));
>
> I see. You set match->userspacesize to zero in libxt_bpf to skip the
> comparison of that internal struct sk_filter *filter.
>
>> +
>> +     /* variable size, based on program_num_elem */
>> +     struct sock_filter bpf_program[0];
>
> While testing this I noticed:
>
> iptables -I OUTPUT -m bpf --bytecode   \
>         '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,6 0 0 0' -j ACCEPT
>
> Note that this works but it should not.
>
> iptables -D OUTPUT -m bpf --bytecode   \
>         '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,1 0 0 0' -j ACCEPT
>                                                                ^
> Mind that 1, it's a different filter, but it deletes the previous
> filter without problems here.
>
> A quick look at make_delete_mask() in iptables tells me that the
> changes you made to userspace to allow variable size matches are not
> enough to generate a sane mask (which is fundamental while looking for
> a matching rule during the deletion).

Thanks for finding this, Pablo. I completely forgot to check that.

I've never looked at that deletion code before. Will read it and
hopefully propose a simple fix in a few days. An earlier version of
the patch used a statically sized struct, by the way, like xt_string
does (XT_STRING_MAX_PATTERN_SIZE). If it is easier to
incorporate, we can always revert to that.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso Jan. 9, 2013, 9:52 a.m. UTC | #3
Hi Willem,

On Tue, Jan 08, 2013 at 08:58:37PM -0500, Willem de Bruijn wrote:
> On Mon, Jan 7, 2013 at 10:21 PM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > Hi Willem,
> >
> > On Sun, Dec 09, 2012 at 04:52:58PM -0500, Willem de Bruijn wrote:
> >> Support arbitrary linux socket filter (BPF) programs as iptables
> >> match rules. This allows for very expressive filters, and on
> >> platforms with BPF JIT appears competitive with traditional hardcoded
> >> iptables rules.
> >>
> >> At least, on an x86_64 that achieves 40K netperf TCP_STREAM without
> >> any iptables rules (40 GBps),
> >>
> >> inserting 100x this bpf rule gives 28K
> >>
> >>     ./iptables -A OUTPUT -m bpf --bytecode '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,6 0 0 0,' -j
> >>
> >>     (as generated by tcpdump -i any -ddd ip proto 20 | tr '\n' ',')
> >>
> >> inserting 100x this u32 rule gives 21K
> >>
> >>     ./iptables -A OUTPUT -m u32 --u32 '6&0xFF=0x20' -j DROP
> >>
> >> The two are logically equivalent, as far as I can tell. Let me know
> >> if my test methodology is flawed in some way. Even in cases where
> >> slower, the filter adds functionality currently lacking in iptables,
> >> such as access to sk_buff fields like rxhash and queue_mapping.
> >>
> >> Signed-off-by: Willem de Bruijn <willemb@google.com>
> >> ---
> >>  include/linux/netfilter/xt_bpf.h |   17 +++++++
> >>  net/netfilter/Kconfig            |    9 ++++
> >>  net/netfilter/Makefile           |    1 +
> >>  net/netfilter/x_tables.c         |    5 +-
> >>  net/netfilter/xt_bpf.c           |   86 ++++++++++++++++++++++++++++++++++++++
> >>  5 files changed, 116 insertions(+), 2 deletions(-)
> >>  create mode 100644 include/linux/netfilter/xt_bpf.h
> >>  create mode 100644 net/netfilter/xt_bpf.c
> >>
> >> diff --git a/include/linux/netfilter/xt_bpf.h b/include/linux/netfilter/xt_bpf.h
> >> new file mode 100644
> >> index 0000000..23502c0
> >> --- /dev/null
> >> +++ b/include/linux/netfilter/xt_bpf.h
> >> @@ -0,0 +1,17 @@
> >> +#ifndef _XT_BPF_H
> >> +#define _XT_BPF_H
> >> +
> >> +#include <linux/filter.h>
> >> +#include <linux/types.h>
> >> +
> >> +struct xt_bpf_info {
> >> +     __u16 bpf_program_num_elem;
> >> +
> >> +     /* only used in kernel */
> >> +     struct sk_filter *filter __attribute__((aligned(8)));
> >
> > I see. You set match->userspacesize to zero in libxt_bpf to skip the
> > comparison of that internal struct sk_filter *filter.
> >
> >> +
> >> +     /* variable size, based on program_num_elem */
> >> +     struct sock_filter bpf_program[0];
> >
> > While testing this I noticed:
> >
> > iptables -I OUTPUT -m bpf --bytecode   \
> >         '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,6 0 0 0' -j ACCEPT
> >
> > Note that this works but it should not.
> >
> > iptables -D OUTPUT -m bpf --bytecode   \
> >         '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,1 0 0 0' -j ACCEPT
> >                                                                ^
> > Mind that 1, it's a different filter, but it deletes the previous
> > filter without problems here.
> >
> > A quick look at make_delete_mask() in iptables tells me that the
> > changes you made to userspace to allow variable size matches are not
> > enough to generate a sane mask (which is fundamental while looking for
> > a matching rule during the deletion).
> 
> Thanks for finding this, Pablo. I completely forgot to check that.
> 
> I've never looked at that deletion code before. Will read it and
> hopefully propose a simple fix in a few days. An earlier version of
> the patch used a statically sized struct, by the way, like xt_string
> does (XT_STRING_MAX_PATTERN_SIZE). If it is easier to
> incorporate, we can always revert to that.

I prefer if this sticks to static size by now. The problem is that
BPF_MAXINSNS is probably too much to allocate per rule. So you'll have
to limit this to some reasonable amount of lines in the filter.

Please, also check that iptables-save and iptables-restore work fine,
there is also some problem with the existing code.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Willem de Bruijn Jan. 10, 2013, 12:08 a.m. UTC | #4
On Wed, Jan 9, 2013 at 4:52 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> Hi Willem,
>
> On Tue, Jan 08, 2013 at 08:58:37PM -0500, Willem de Bruijn wrote:
>> On Mon, Jan 7, 2013 at 10:21 PM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
>> > Hi Willem,
>> >
>> > On Sun, Dec 09, 2012 at 04:52:58PM -0500, Willem de Bruijn wrote:
>> >> Support arbitrary linux socket filter (BPF) programs as iptables
>> >> match rules. This allows for very expressive filters, and on
>> >> platforms with BPF JIT appears competitive with traditional hardcoded
>> >> iptables rules.
>> >>
>> >> At least, on an x86_64 that achieves 40K netperf TCP_STREAM without
>> >> any iptables rules (40 GBps),
>> >>
>> >> inserting 100x this bpf rule gives 28K
>> >>
>> >>     ./iptables -A OUTPUT -m bpf --bytecode '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,6 0 0 0,' -j
>> >>
>> >>     (as generated by tcpdump -i any -ddd ip proto 20 | tr '\n' ',')
>> >>
>> >> inserting 100x this u32 rule gives 21K
>> >>
>> >>     ./iptables -A OUTPUT -m u32 --u32 '6&0xFF=0x20' -j DROP
>> >>
>> >> The two are logically equivalent, as far as I can tell. Let me know
>> >> if my test methodology is flawed in some way. Even in cases where
>> >> slower, the filter adds functionality currently lacking in iptables,
>> >> such as access to sk_buff fields like rxhash and queue_mapping.
>> >>
>> >> Signed-off-by: Willem de Bruijn <willemb@google.com>
>> >> ---
>> >>  include/linux/netfilter/xt_bpf.h |   17 +++++++
>> >>  net/netfilter/Kconfig            |    9 ++++
>> >>  net/netfilter/Makefile           |    1 +
>> >>  net/netfilter/x_tables.c         |    5 +-
>> >>  net/netfilter/xt_bpf.c           |   86 ++++++++++++++++++++++++++++++++++++++
>> >>  5 files changed, 116 insertions(+), 2 deletions(-)
>> >>  create mode 100644 include/linux/netfilter/xt_bpf.h
>> >>  create mode 100644 net/netfilter/xt_bpf.c
>> >>
>> >> diff --git a/include/linux/netfilter/xt_bpf.h b/include/linux/netfilter/xt_bpf.h
>> >> new file mode 100644
>> >> index 0000000..23502c0
>> >> --- /dev/null
>> >> +++ b/include/linux/netfilter/xt_bpf.h
>> >> @@ -0,0 +1,17 @@
>> >> +#ifndef _XT_BPF_H
>> >> +#define _XT_BPF_H
>> >> +
>> >> +#include <linux/filter.h>
>> >> +#include <linux/types.h>
>> >> +
>> >> +struct xt_bpf_info {
>> >> +     __u16 bpf_program_num_elem;
>> >> +
>> >> +     /* only used in kernel */
>> >> +     struct sk_filter *filter __attribute__((aligned(8)));
>> >
>> > I see. You set match->userspacesize to zero in libxt_bpf to skip the
>> > comparison of that internal struct sk_filter *filter.
>> >
>> >> +
>> >> +     /* variable size, based on program_num_elem */
>> >> +     struct sock_filter bpf_program[0];
>> >
>> > While testing this I noticed:
>> >
>> > iptables -I OUTPUT -m bpf --bytecode   \
>> >         '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,6 0 0 0' -j ACCEPT
>> >
>> > Note that this works but it should not.
>> >
>> > iptables -D OUTPUT -m bpf --bytecode   \
>> >         '6,40 0 0 14, 21 0 3 2048,48 0 0 25,21 0 1 20,6 0 0 96,1 0 0 0' -j ACCEPT
>> >                                                                ^
>> > Mind that 1, it's a different filter, but it deletes the previous
>> > filter without problems here.
>> >
>> > A quick look at make_delete_mask() in iptables tells me that the
>> > changes you made to userspace to allow variable size matches are not
>> > enough to generate a sane mask (which is fundamental while looking for
>> > a matching rule during the deletion).
>>
>> Thanks for finding this, Pablo. I completely forgot to check that.
>>
>> I've never looked at that deletion code before. Will read it and
>> hopefully propose a simple fix in a few days. An earlier version of
>> the patch used a statically sized struct, by the way, like xt_string
>> does (XT_STRING_MAX_PATTERN_SIZE). If it is easier to
>> incorporate, we can always revert to that.
>
> I prefer if this sticks to static size by now.

Okay. That is actually a lot simpler.

> The problem is that
> BPF_MAXINSNS is probably too much to allocate per rule. So you'll have
> to limit this to some reasonable amount of lines in the filter.
> Please, also check that iptables-save and iptables-restore work fine,
> there is also some problem with the existing code.

Done. I'll send updated patches right after this. Verified that they work using

## test append
# fail: more rules than num_rules
./iptables -A OUTPUT -m bpf --bytecode '6,40 0 0 14, 21 0 3 2048,48 0
0 25,21 0 1 20,6 0 0 96,6 0 0 0,6 0 0 0' -j ACCEPT
# fail: fewer rules than num_rules
./iptables -A OUTPUT -m bpf --bytecode '6,40 0 0 14, 21 0 3 2048,48 0
0 25,21 0 1 20,6 0 0 96' -j ACCEPT
# pass: correct
./iptables -A OUTPUT -m bpf --bytecode '6,40 0 0 14, 21 0 3 2048,48 0
0 25,21 0 1 20,6 0 0 96,6 0 0 0' -j ACCEPT

## test delete
# fail: differs
./iptables -D OUTPUT -m bpf --bytecode '6,40 0 0 14, 21 0 3 2048,48 0
0 25,21 0 1 20,6 0 0 96,6 0 0 1' -j ACCEPT
# pass: same
./iptables -D OUTPUT -m bpf --bytecode '6,40 0 0 14, 21 0 3 2048,48 0
0 25,21 0 1 20,6 0 0 96,6 0 0 0' -j ACCEPT

## test save/restore
./iptables-save > out && cat out && ./iptables-restore < out && echo "OK"

I did not retest the datapath for this revision.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/netfilter/xt_bpf.h b/include/linux/netfilter/xt_bpf.h
new file mode 100644
index 0000000..23502c0
--- /dev/null
+++ b/include/linux/netfilter/xt_bpf.h
@@ -0,0 +1,17 @@ 
+#ifndef _XT_BPF_H
+#define _XT_BPF_H
+
+#include <linux/filter.h>
+#include <linux/types.h>
+
+struct xt_bpf_info {
+	__u16 bpf_program_num_elem;
+
+	/* only used in kernel */
+	struct sk_filter *filter __attribute__((aligned(8)));
+
+	/* variable size, based on program_num_elem */
+	struct sock_filter bpf_program[0];
+};
+
+#endif /*_XT_BPF_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index fefa514..d45720f 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -798,6 +798,15 @@  config NETFILTER_XT_MATCH_ADDRTYPE
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_BPF
+	tristate '"bpf" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  BPF matching applies a linux socket filter to each packet and
+          accepts those for which the filter returns non-zero.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_CLUSTER
 	tristate '"cluster" match support'
 	depends on NF_CONNTRACK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 3259697..6d6194525 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -98,6 +98,7 @@  obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
 
 # matches
 obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_BPF) += xt_bpf.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8d987c3..26306be 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -379,8 +379,9 @@  int xt_check_match(struct xt_mtchk_param *par,
 	if (XT_ALIGN(par->match->matchsize) != size &&
 	    par->match->matchsize != -1) {
 		/*
-		 * ebt_among is exempt from centralized matchsize checking
-		 * because it uses a dynamic-size data set.
+		 * matches of variable size length, such as ebt_among,
+		 * are exempt from centralized matchsize checking. They
+		 * skip the test by setting xt_match.matchsize to -1.
 		 */
 		pr_err("%s_tables: %s.%u match: invalid size "
 		       "%u (kernel) != (user) %u\n",
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
new file mode 100644
index 0000000..312cb90
--- /dev/null
+++ b/net/netfilter/xt_bpf.c
@@ -0,0 +1,86 @@ 
+/* Xtables module to match packets using a BPF filter.
+ * Copyright 2012 Google Inc.
+ * Written by Willem de Bruijn <willemb@google.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/filter.h>
+#include <net/ip.h>
+
+#include <linux/netfilter/xt_bpf.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("Willem de Bruijn <willemb@google.com>");
+MODULE_DESCRIPTION("Xtables: BPF filter match");
+MODULE_LICENSE("GPL");
+
+static int bpf_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_bpf_info *info = par->matchinfo;
+	const struct xt_entry_match *match;
+	struct sock_fprog program;
+	int expected_len;
+
+	match = container_of(par->matchinfo, const struct xt_entry_match, data);
+	expected_len = sizeof(struct xt_entry_match) +
+		       sizeof(struct xt_bpf_info) +
+		       (sizeof(struct sock_filter) *
+			info->bpf_program_num_elem);
+
+	if (match->u.match_size != expected_len) {
+		pr_info("bpf: check failed: incorrect length\n");
+		return -EINVAL;
+	}
+
+	program.len = info->bpf_program_num_elem;
+	program.filter = info->bpf_program;
+	if (sk_unattached_filter_create(&info->filter, &program)) {
+		pr_info("bpf: check failed: parse error\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_bpf_info *info = par->matchinfo;
+
+	return SK_RUN_FILTER(info->filter, skb);
+}
+
+static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_bpf_info *info = par->matchinfo;
+	sk_unattached_filter_destroy(info->filter);
+}
+
+static struct xt_match bpf_mt_reg __read_mostly = {
+	.name		= "bpf",
+	.revision	= 0,
+	.family		= NFPROTO_UNSPEC,
+	.checkentry	= bpf_mt_check,
+	.match		= bpf_mt,
+	.destroy	= bpf_mt_destroy,
+	.matchsize	= -1, /* skip xt_check_match because of dynamic len */
+	.me		= THIS_MODULE,
+};
+
+static int __init bpf_mt_init(void)
+{
+	return xt_register_match(&bpf_mt_reg);
+}
+
+static void __exit bpf_mt_exit(void)
+{
+	xt_unregister_match(&bpf_mt_reg);
+}
+
+module_init(bpf_mt_init);
+module_exit(bpf_mt_exit);