diff mbox

[v14,01/13] sk_run_filter: add BPF_S_ANC_SECCOMP_LD_W

Message ID 0c55cb258e0b5bbd615923ee2a9f06b9.squirrel@webmail.greenhost.nl
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Indan Zupancic March 13, 2012, 10:04 a.m. UTC
Hello,

I made a quick pseudo-patch for BPF JIT support. As far as I can tell,
the actual code itself is very simple, just:

case BPF_S_ANC_SECCOMP_LD_W:
	/* SECCOMP doesn't use SKB, no need to preserve %rdi. */
	t_offset = seccomp_bpf_load - (image + addrs[i]);
	EMIT1_off32(0xbf, K); /* mov imm32,%rdi */
	EMIT1_off32(0xe8, t_offset); /* call */
	break;

EAX is set directly as it's the return register, EBX is preserved by the
callee, RDI and other registers are unused by seccomp, so no need for
trampoline code AFAIK.

The rest of the patch just makes the JIT code suitable for sharing.
Only real change is that after this patch unused insns memory is freed.

The code is untested and even uncompiled, as I've only access to my 32-bit
laptop at the moment.

Would be interesting to know if this actually works and what the performance
difference is for seccomp.

Greetings,

Indan


 arch/x86/net/bpf_jit_comp.c |   47 ++++++++++++++++++++----------------------
 include/linux/filter.h      |   14 +++++++-----
 net/core/filter.c           |   27 ++++++++++++++++++++++--
 3 files changed, 54 insertions(+), 34 deletions(-)

---




--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Will Drewry March 13, 2012, 3:43 p.m. UTC | #1
On Tue, Mar 13, 2012 at 5:04 AM, Indan Zupancic <indan@nul.nu> wrote:
> Hello,
>
> I made a quick pseudo-patch for BPF JIT support. As far as I can tell,
> the actual code itself is very simple, just:

Awesome - yet another reason this approach is nicer.  When I'm done
working up v15, I'll pull in this patch and see what explodes and/or
runs really fast.

cheers!
will

> case BPF_S_ANC_SECCOMP_LD_W:
>        /* SECCOMP doesn't use SKB, no need to preserve %rdi. */
>        t_offset = seccomp_bpf_load - (image + addrs[i]);
>        EMIT1_off32(0xbf, K); /* mov imm32,%rdi */
>        EMIT1_off32(0xe8, t_offset); /* call */
>        break;
>
> EAX is set directly as it's the return register, EBX is preserved by the
> callee, RDI and other registers are unused by seccomp, so no need for
> trampoline code AFAIK.
>
> The rest of the patch just makes the JIT code suitable for sharing.
> Only real change is that after this patch unused insns memory is freed.
>
> The code is untested and even uncompiled, as I've only access to my 32-bit
> laptop at the moment.
>
> Would be interesting to know if this actually works and what the performance
> difference is for seccomp.
>
> Greetings,
>
> Indan
>
>
>  arch/x86/net/bpf_jit_comp.c |   47 ++++++++++++++++++++----------------------
>  include/linux/filter.h      |   14 +++++++-----
>  net/core/filter.c           |   27 ++++++++++++++++++++++--
>  3 files changed, 54 insertions(+), 34 deletions(-)
>
> ---
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 7c1b765..3cd6626 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -118,7 +118,7 @@ static inline void bpf_flush_icache(void *start, void *end)
>  }
>
>
> -void bpf_jit_compile(struct sk_filter *fp)
> +bpf_func_t bpf_jit_compile(const struct sock_filter* filter, int flen, int use_skb)
>  {
>        u8 temp[64];
>        u8 *prog;
> @@ -131,15 +131,13 @@ void bpf_jit_compile(struct sk_filter *fp)
>        int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */
>        unsigned int cleanup_addr; /* epilogue code offset */
>        unsigned int *addrs;
> -       const struct sock_filter *filter = fp->insns;
> -       int flen = fp->len;
>
>        if (!bpf_jit_enable)
> -               return;
> +               return NULL;
>
>        addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
>        if (addrs == NULL)
> -               return;
> +               return NULL;
>
>        /* Before first pass, make a rough estimation of addrs[]
>         * each bpf instruction is translated to less than 64 bytes
> @@ -151,11 +149,16 @@ void bpf_jit_compile(struct sk_filter *fp)
>        cleanup_addr = proglen; /* epilogue address */
>
>        for (pass = 0; pass < 10; pass++) {
> -               u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen;
> +               u8 seen_or_pass0 = seen;
>                /* no prologue/epilogue for trivial filters (RET something) */
>                proglen = 0;
>                prog = temp;
>
> +               if (pass == 0) {
> +                       seen_or_pass0 = SEEN_XREG | SEEN_MEM;
> +                       if (use_skb)
> +                               seen_or_pass0 |= SEEN_DATAREF;
> +               }
>                if (seen_or_pass0) {
>                        EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
>                        EMIT4(0x48, 0x83, 0xec, 96);    /* subq  $96,%rsp       */
> @@ -472,6 +475,14 @@ void bpf_jit_compile(struct sk_filter *fp)
>                                CLEAR_A();
>  #endif
>                                break;
> +#ifdef CONFIG_SECCOMP_FILTER
> +                       case BPF_S_ANC_SECCOMP_LD_W:
> +                               /* SECCOMP doesn't use SKB, no need to preserve %rdi. */
> +                               t_offset = seccomp_bpf_load - (image + addrs[i]);
> +                               EMIT1_off32(0xbf, K); /* mov imm32,%rdi */
> +                               EMIT1_off32(0xe8, t_offset); /* call */
> +                               break;
> +#endif
>                        case BPF_S_LD_W_ABS:
>                                func = sk_load_word;
>  common_load:                   seen |= SEEN_DATAREF;
> @@ -588,13 +599,14 @@ cond_branch:                      f_offset = addrs[i + filter[i].jf] - addrs[i];
>                                /* hmm, too complex filter, give up with jit compiler */
>                                goto out;
>                        }
> +                       BUG_ON(!use_skb && (seen & SEEN_DATAREF));
>                        ilen = prog - temp;
>                        if (image) {
>                                if (unlikely(proglen + ilen > oldproglen)) {
>                                        pr_err("bpb_jit_compile fatal error\n");
>                                        kfree(addrs);
>                                        module_free(NULL, image);
> -                                       return;
> +                                       return NULL;
>                                }
>                                memcpy(image + proglen, temp, ilen);
>                        }
> @@ -635,28 +647,13 @@ cond_branch:                      f_offset = addrs[i + filter[i].jf] - addrs[i];
>                                       16, 1, image, proglen, false);
>
>                bpf_flush_icache(image, image + proglen);
> -
> -               fp->bpf_func = (void *)image;
>        }
>  out:
>        kfree(addrs);
> -       return;
> +       return (void *)image;
>  }
>
> -static void jit_free_defer(struct work_struct *arg)
> +void bpf_jit_free(bpf_func_t image)
>  {
> -       module_free(NULL, arg);
> -}
> -
> -/* run from softirq, we must use a work_struct to call
> - * module_free() from process context
> - */
> -void bpf_jit_free(struct sk_filter *fp)
> -{
> -       if (fp->bpf_func != sk_run_filter) {
> -               struct work_struct *work = (struct work_struct *)fp->bpf_func;
> -
> -               INIT_WORK(work, jit_free_defer);
> -               schedule_work(work);
> -       }
> +       module_free(NULL, image);
>  }
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 8eeb205..292ccca 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -135,12 +135,13 @@ struct sock_fprog {       /* Required for SO_ATTACH_FILTER. */
>  struct sk_buff;
>  struct sock;
>
> +typedef unsigned int (*bpf_func_t)(const struct sk_buff*, const struct sock_filter*);
> +
>  struct sk_filter
>  {
>        atomic_t                refcnt;
>        unsigned int            len;    /* Number of filter blocks */
> -       unsigned int            (*bpf_func)(const struct sk_buff *skb,
> -                                           const struct sock_filter *filter);
> +       bpf_func_t              bpf_func;
>        struct rcu_head         rcu;
>        struct sock_filter      insns[0];
>  };
> @@ -158,14 +159,15 @@ extern int sk_detach_filter(struct sock *sk);
>  extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen);
>
>  #ifdef CONFIG_BPF_JIT
> -extern void bpf_jit_compile(struct sk_filter *fp);
> -extern void bpf_jit_free(struct sk_filter *fp);
> +extern bpf_func_t bpf_jit_compile(const struct sock_filter*, int flen, int use_skb);
> +extern void bpf_jit_free(bpf_funct_t);
>  #define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
>  #else
> -static inline void bpf_jit_compile(struct sk_filter *fp)
> +static inline bpf_func_t bpf_jit_compile(const struct sock_filter*, int flen, int use_skb)
>  {
> +       return NULL;
>  }
> -static inline void bpf_jit_free(struct sk_filter *fp)
> +static inline void bpf_jit_free(bpf_func_t)
>  {
>  }
>  #define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 5dea452..03e3ea3 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -574,6 +574,14 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
>  }
>  EXPORT_SYMBOL(sk_chk_filter);
>
> +/* run from softirq, we must use a work_struct to call
> + * bpf_jit_free() from process context
> + */
> +static void jit_free_defer(struct work_struct *arg)
> +{
> +       bpf_jit_free((bpf_func_t)arg);
> +}
> +
>  /**
>  *     sk_filter_release_rcu - Release a socket filter by rcu_head
>  *     @rcu: rcu_head that contains the sk_filter to free
> @@ -582,7 +590,12 @@ void sk_filter_release_rcu(struct rcu_head *rcu)
>  {
>        struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
>
> -       bpf_jit_free(fp);
> +       if (fp->bpf_func != sk_run_filter) {
> +               struct work_struct *work = (struct work_struct *)fp->bpf_func;
> +
> +               INIT_WORK(work, jit_free_defer);
> +               schedule_work(work);
> +       }
>        kfree(fp);
>  }
>  EXPORT_SYMBOL(sk_filter_release_rcu);
> @@ -599,9 +612,10 @@ EXPORT_SYMBOL(sk_filter_release_rcu);
>  */
>  int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
>  {
> -       struct sk_filter *fp, *old_fp;
> +       struct sk_filter *fp, *old_fp, *new_fp;
>        unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
>        int err;
> +       bpf_func_t jit;
>
>        /* Make sure new filter is there and in the right amounts. */
>        if (fprog->filter == NULL)
> @@ -625,7 +639,14 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
>                return err;
>        }
>
> -       bpf_jit_compile(fp);
> +       jit = bpf_jit_compile(fp->insns, fp->len, 1);
> +       if (jit) {
> +               fp->bpf_func = jit;
> +               /* Free unused insns memory */
> +               newfp = krealloc(fp, sizeof(*fp), GFP_KERNEL);
> +               if (newfp)
> +                       fp = newfp;
> +       }
>
>        old_fp = rcu_dereference_protected(sk->sk_filter,
>                                           sock_owned_by_user(sk));
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet March 13, 2012, 5:13 p.m. UTC | #2
On Tue, 2012-03-13 at 11:04 +0100, Indan Zupancic wrote:
> Hello,
> 
> I made a quick pseudo-patch for BPF JIT support. As far as I can tell,
> the actual code itself is very simple, just:
> 

> -	bpf_jit_compile(fp);
> +	jit = bpf_jit_compile(fp->insns, fp->len, 1);
> +	if (jit) {
> +		fp->bpf_func = jit;
> +		/* Free unused insns memory */
> +		newfp = krealloc(fp, sizeof(*fp), GFP_KERNEL);
> +		if (newfp)
> +			fp = newfp;
> +	}
> 
>  	old_fp = rcu_dereference_protected(sk->sk_filter,
>  					   sock_owned_by_user(sk));

Dont mix unrelated changes in a single patch.

Current krealloc() doesnt alloc a new area if the current one is bigger
than 'new_size', but this might change in next kernel versions.

So this part of your patch does nothing.

If it did, this kind of 'optimization' can actually be not good, because
sizeof(*fp) is small enough (less than half cache line size) to trigger
a possible false sharing issue. (other part of the cache line could be
used to hold a often dirtied object)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Indan Zupancic March 14, 2012, 5:12 a.m. UTC | #3
Hello,

On Tue, March 13, 2012 18:13, Eric Dumazet wrote:
> On Tue, 2012-03-13 at 11:04 +0100, Indan Zupancic wrote:
>> -	bpf_jit_compile(fp);
>> +	jit = bpf_jit_compile(fp->insns, fp->len, 1);
>> +	if (jit) {
>> +		fp->bpf_func = jit;
>> +		/* Free unused insns memory */
>> +		newfp = krealloc(fp, sizeof(*fp), GFP_KERNEL);
>> +		if (newfp)
>> +			fp = newfp;
>> +	}
>>
>>  	old_fp = rcu_dereference_protected(sk->sk_filter,
>>  					   sock_owned_by_user(sk));
>
> Dont mix unrelated changes in a single patch.

I know, but it was just a quick proof-of-concept. It just showed
a possible advantage of the API change for the current networking
code. The real patch would be split into an API change, the memory
freeing change, and the seccomp support.

>
> Current krealloc() doesnt alloc a new area if the current one is bigger
> than 'new_size', but this might change in next kernel versions.
>
> So this part of your patch does nothing.

Problem is that 'old_size' can be up to 32kB in size and it would be nice
if that memory could be released. If it isn't, then using JIT increases
memory usage, while also not accounting it to the socket.

>
> If it did, this kind of 'optimization' can actually be not good, because
> sizeof(*fp) is small enough (less than half cache line size) to trigger
> a possible false sharing issue. (other part of the cache line could be
> used to hold a often dirtied object)

It could avoid this by allocating at least a cache size. But this is a
problem for all small kmalloc's, isn't it?

What about something like the below:

@@ -625,7 +639,18 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 		return err;
 	}

-	bpf_jit_compile(fp);
+	jit = bpf_jit_compile(fp->insns, fp->len, 1);
+	if (jit) {
+		int size = max(cache_line_size(), sizeof(*fp));
+		fp->bpf_func = jit;
+		/* Free unused insns memory */
+		newfp = kmalloc(size, GFP_KERNEL);
+		if (newfp) {
+			memcpy(newfp, fp, size);
+			kfree(fp);
+			fp = newfp;
+		}
+	}

 	old_fp = rcu_dereference_protected(sk->sk_filter,
 					   sock_owned_by_user(sk));

Not sure whether to use cache_line_size() or just L1_CACHE_BYTES or
something else.

Greeings,

Indan


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet March 14, 2012, 5:55 a.m. UTC | #4
Le mercredi 14 mars 2012 à 06:12 +0100, Indan Zupancic a écrit :

> Problem is that 'old_size' can be up to 32kB in size and it would be nice
> if that memory could be released. If it isn't, then using JIT increases
> memory usage, while also not accounting it to the socket.
> 

It is accounted for, since jit size is in relation with standard filter
size. Check sock_kmalloc()

Fact we can have a litle underestimation was already the case without
jit, since kmalloc() does a roundup to next power of two.

I dont think this discussion has anything to do with SECCOMP anyway.

These accounting dont need to be 100% precise, we only want a limit to
prevent rogue users from using all kernel memory.

> >
> > If it did, this kind of 'optimization' can actually be not good, because
> > sizeof(*fp) is small enough (less than half cache line size) to trigger
> > a possible false sharing issue. (other part of the cache line could be
> > used to hold a often dirtied object)
> 
> It could avoid this by allocating at least a cache size. But this is a
> problem for all small kmalloc's, isn't it?

Its a problem that was already met on several critical paths :

# find net|xargs grep -n L1_CACHE_BYTES
net/core/dev_addr_lists.c:51:	if (alloc_size < L1_CACHE_BYTES)
net/core/dev_addr_lists.c:52:		alloc_size = L1_CACHE_BYTES;
net/core/net-sysfs.c:586:	    RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
net/core/net-sysfs.c:1111:	    XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL);
net/ipv6/ip6_fib.c:1612:	size = max_t(size_t, size, L1_CACHE_BYTES);
net/ipv4/fib_frontend.c:1049:	size = max_t(size_t, size, L1_CACHE_BYTES);



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Indan Zupancic March 14, 2012, 7:59 a.m. UTC | #5
On Wed, March 14, 2012 06:55, Eric Dumazet wrote:
> Le mercredi 14 mars 2012 à 06:12 +0100, Indan Zupancic a écrit :
>
>> Problem is that 'old_size' can be up to 32kB in size and it would be nice
>> if that memory could be released. If it isn't, then using JIT increases
>> memory usage, while also not accounting it to the socket.
>>
>
> It is accounted for, since jit size is in relation with standard filter
> size. Check sock_kmalloc()
>
> Fact we can have a litle underestimation was already the case without
> jit, since kmalloc() does a roundup to next power of two.

OK.

> I dont think this discussion has anything to do with SECCOMP anyway.

Certainly true.

> These accounting dont need to be 100% precise, we only want a limit to
> prevent rogue users from using all kernel memory.

Fair enough.

The only remaining question is, is it worth the extra code to release
up to 32kB of unused memory? It seems a waste to not free it, but if
people think it's not worth it then let's just leave it around.

>> >
>> > If it did, this kind of 'optimization' can actually be not good, because
>> > sizeof(*fp) is small enough (less than half cache line size) to trigger
>> > a possible false sharing issue. (other part of the cache line could be
>> > used to hold a often dirtied object)
>>
>> It could avoid this by allocating at least a cache size. But this is a
>> problem for all small kmalloc's, isn't it?
>
> Its a problem that was already met on several critical paths :
>
> # find net|xargs grep -n L1_CACHE_BYTES
> net/core/dev_addr_lists.c:51:	if (alloc_size < L1_CACHE_BYTES)
> net/core/dev_addr_lists.c:52:		alloc_size = L1_CACHE_BYTES;
> net/core/net-sysfs.c:586:	    RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
> net/core/net-sysfs.c:1111:	    XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL);
> net/ipv6/ip6_fib.c:1612:	size = max_t(size_t, size, L1_CACHE_BYTES);
> net/ipv4/fib_frontend.c:1049:	size = max_t(size_t, size, L1_CACHE_BYTES);

So using L1_CACHE_BYTES is the more common thing to do, I'll keep that in
mind for next time.

Thanks,

Indan


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet March 14, 2012, 8:05 a.m. UTC | #6
Le mercredi 14 mars 2012 à 08:59 +0100, Indan Zupancic a écrit :

> The only remaining question is, is it worth the extra code to release
> up to 32kB of unused memory? It seems a waste to not free it, but if
> people think it's not worth it then let's just leave it around.

Quite frankly its not an issue, given JIT BPF is not yet default
enabled.

I am not sure all bugs were found and fixed. I would warn users before
considering using it in production.

If you have time, I would appreciate if you could double check and find
last bugs in it.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7c1b765..3cd6626 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -118,7 +118,7 @@  static inline void bpf_flush_icache(void *start, void *end)
 }


-void bpf_jit_compile(struct sk_filter *fp)
+bpf_func_t bpf_jit_compile(const struct sock_filter* filter, int flen, int use_skb)
 {
 	u8 temp[64];
 	u8 *prog;
@@ -131,15 +131,13 @@  void bpf_jit_compile(struct sk_filter *fp)
 	int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */
 	unsigned int cleanup_addr; /* epilogue code offset */
 	unsigned int *addrs;
-	const struct sock_filter *filter = fp->insns;
-	int flen = fp->len;

 	if (!bpf_jit_enable)
-		return;
+		return NULL;

 	addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
 	if (addrs == NULL)
-		return;
+		return NULL;

 	/* Before first pass, make a rough estimation of addrs[]
 	 * each bpf instruction is translated to less than 64 bytes
@@ -151,11 +149,16 @@  void bpf_jit_compile(struct sk_filter *fp)
 	cleanup_addr = proglen; /* epilogue address */

 	for (pass = 0; pass < 10; pass++) {
-		u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen;
+		u8 seen_or_pass0 = seen;
 		/* no prologue/epilogue for trivial filters (RET something) */
 		proglen = 0;
 		prog = temp;

+		if (pass == 0) {
+			seen_or_pass0 = SEEN_XREG | SEEN_MEM;
+			if (use_skb)
+				seen_or_pass0 |= SEEN_DATAREF;
+		}
 		if (seen_or_pass0) {
 			EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
 			EMIT4(0x48, 0x83, 0xec, 96);	/* subq  $96,%rsp	*/
@@ -472,6 +475,14 @@  void bpf_jit_compile(struct sk_filter *fp)
 				CLEAR_A();
 #endif
 				break;
+#ifdef CONFIG_SECCOMP_FILTER
+			case BPF_S_ANC_SECCOMP_LD_W:
+				/* SECCOMP doesn't use SKB, no need to preserve %rdi. */
+				t_offset = seccomp_bpf_load - (image + addrs[i]);
+				EMIT1_off32(0xbf, K); /* mov imm32,%rdi */
+				EMIT1_off32(0xe8, t_offset); /* call */
+				break;
+#endif
 			case BPF_S_LD_W_ABS:
 				func = sk_load_word;
 common_load:			seen |= SEEN_DATAREF;
@@ -588,13 +599,14 @@  cond_branch:			f_offset = addrs[i + filter[i].jf] - addrs[i];
 				/* hmm, too complex filter, give up with jit compiler */
 				goto out;
 			}
+			BUG_ON(!use_skb && (seen & SEEN_DATAREF));
 			ilen = prog - temp;
 			if (image) {
 				if (unlikely(proglen + ilen > oldproglen)) {
 					pr_err("bpb_jit_compile fatal error\n");
 					kfree(addrs);
 					module_free(NULL, image);
-					return;
+					return NULL;
 				}
 				memcpy(image + proglen, temp, ilen);
 			}
@@ -635,28 +647,13 @@  cond_branch:			f_offset = addrs[i + filter[i].jf] - addrs[i];
 				       16, 1, image, proglen, false);

 		bpf_flush_icache(image, image + proglen);
-
-		fp->bpf_func = (void *)image;
 	}
 out:
 	kfree(addrs);
-	return;
+	return (void *)image;
 }

-static void jit_free_defer(struct work_struct *arg)
+void bpf_jit_free(bpf_func_t image)
 {
-	module_free(NULL, arg);
-}
-
-/* run from softirq, we must use a work_struct to call
- * module_free() from process context
- */
-void bpf_jit_free(struct sk_filter *fp)
-{
-	if (fp->bpf_func != sk_run_filter) {
-		struct work_struct *work = (struct work_struct *)fp->bpf_func;
-
-		INIT_WORK(work, jit_free_defer);
-		schedule_work(work);
-	}
+	module_free(NULL, image);
 }
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 8eeb205..292ccca 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -135,12 +135,13 @@  struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 struct sk_buff;
 struct sock;

+typedef unsigned int (*bpf_func_t)(const struct sk_buff*, const struct sock_filter*);
+
 struct sk_filter
 {
 	atomic_t		refcnt;
 	unsigned int         	len;	/* Number of filter blocks */
-	unsigned int		(*bpf_func)(const struct sk_buff *skb,
-					    const struct sock_filter *filter);
+	bpf_func_t		bpf_func;
 	struct rcu_head		rcu;
 	struct sock_filter     	insns[0];
 };
@@ -158,14 +159,15 @@  extern int sk_detach_filter(struct sock *sk);
 extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen);

 #ifdef CONFIG_BPF_JIT
-extern void bpf_jit_compile(struct sk_filter *fp);
-extern void bpf_jit_free(struct sk_filter *fp);
+extern bpf_func_t bpf_jit_compile(const struct sock_filter*, int flen, int use_skb);
+extern void bpf_jit_free(bpf_funct_t);
 #define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
 #else
-static inline void bpf_jit_compile(struct sk_filter *fp)
+static inline bpf_func_t bpf_jit_compile(const struct sock_filter*, int flen, int use_skb)
 {
+	return NULL;
 }
-static inline void bpf_jit_free(struct sk_filter *fp)
+static inline void bpf_jit_free(bpf_func_t)
 {
 }
 #define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
diff --git a/net/core/filter.c b/net/core/filter.c
index 5dea452..03e3ea3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -574,6 +574,14 @@  int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 }
 EXPORT_SYMBOL(sk_chk_filter);

+/* run from softirq, we must use a work_struct to call
+ * bpf_jit_free() from process context
+ */
+static void jit_free_defer(struct work_struct *arg)
+{
+	bpf_jit_free((bpf_func_t)arg);
+}
+
 /**
  * 	sk_filter_release_rcu - Release a socket filter by rcu_head
  *	@rcu: rcu_head that contains the sk_filter to free
@@ -582,7 +590,12 @@  void sk_filter_release_rcu(struct rcu_head *rcu)
 {
 	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);

-	bpf_jit_free(fp);
+	if (fp->bpf_func != sk_run_filter) {
+		struct work_struct *work = (struct work_struct *)fp->bpf_func;
+
+		INIT_WORK(work, jit_free_defer);
+		schedule_work(work);
+	}
 	kfree(fp);
 }
 EXPORT_SYMBOL(sk_filter_release_rcu);
@@ -599,9 +612,10 @@  EXPORT_SYMBOL(sk_filter_release_rcu);
  */
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 {
-	struct sk_filter *fp, *old_fp;
+	struct sk_filter *fp, *old_fp, *new_fp;
 	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
 	int err;
+	bpf_func_t jit;

 	/* Make sure new filter is there and in the right amounts. */
 	if (fprog->filter == NULL)
@@ -625,7 +639,14 @@  int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 		return err;
 	}

-	bpf_jit_compile(fp);
+	jit = bpf_jit_compile(fp->insns, fp->len, 1);
+	if (jit) {
+		fp->bpf_func = jit;
+		/* Free unused insns memory */
+		newfp = krealloc(fp, sizeof(*fp), GFP_KERNEL);
+		if (newfp)
+			fp = newfp;
+	}

 	old_fp = rcu_dereference_protected(sk->sk_filter,
 					   sock_owned_by_user(sk));