Message ID | 0c55cb258e0b5bbd615923ee2a9f06b9.squirrel@webmail.greenhost.nl |
---|---|
State | Not Applicable, archived |
Delegated to: | David Miller |
Headers | show |
On Tue, Mar 13, 2012 at 5:04 AM, Indan Zupancic <indan@nul.nu> wrote: > Hello, > > I made a quick pseudo-patch for BPF JIT support. As far as I can tell, > the actual code itself is very simple, just: Awesome - yet another reason this approach is nicer. When I'm done working up v15, I'll pull in this patch and see what explodes and/or runs really fast. cheers! will > case BPF_S_ANC_SECCOMP_LD_W: > /* SECCOMP doesn't use SKB, no need to preserve %rdi. */ > t_offset = seccomp_bpf_load - (image + addrs[i]); > EMIT1_off32(0xbf, K); /* mov imm32,%rdi */ > EMIT1_off32(0xe8, t_offset); /* call */ > break; > > EAX is set directly as it's the return register, EBX is preserved by the > callee, RDI and other registers are unused by seccomp, so no need for > trampoline code AFAIK. > > The rest of the patch just makes the JIT code suitable for sharing. > Only real change is that after this patch unused insns memory is freed. > > The code is untested and even uncompiled, as I've only access to my 32-bit > laptop at the moment. > > Would be interesting to know if this actually works and what the performance > difference is for seccomp. > > Greetings, > > Indan > > > arch/x86/net/bpf_jit_comp.c | 47 ++++++++++++++++++++---------------------- > include/linux/filter.h | 14 +++++++----- > net/core/filter.c | 27 ++++++++++++++++++++++-- > 3 files changed, 54 insertions(+), 34 deletions(-) > > --- > > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c > index 7c1b765..3cd6626 100644 > --- a/arch/x86/net/bpf_jit_comp.c > +++ b/arch/x86/net/bpf_jit_comp.c > @@ -118,7 +118,7 @@ static inline void bpf_flush_icache(void *start, void *end) > } > > > -void bpf_jit_compile(struct sk_filter *fp) > +bpf_func_t bpf_jit_compile(const struct sock_filter* filter, int flen, int use_skb) > { > u8 temp[64]; > u8 *prog; > @@ -131,15 +131,13 @@ void bpf_jit_compile(struct sk_filter *fp) > int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */ > unsigned int cleanup_addr; /* epilogue code offset */ > unsigned int *addrs; > - const struct sock_filter *filter = fp->insns; > - int flen = fp->len; > > if (!bpf_jit_enable) > - return; > + return NULL; > > addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL); > if (addrs == NULL) > - return; > + return NULL; > > /* Before first pass, make a rough estimation of addrs[] > * each bpf instruction is translated to less than 64 bytes > @@ -151,11 +149,16 @@ void bpf_jit_compile(struct sk_filter *fp) > cleanup_addr = proglen; /* epilogue address */ > > for (pass = 0; pass < 10; pass++) { > - u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen; > + u8 seen_or_pass0 = seen; > /* no prologue/epilogue for trivial filters (RET something) */ > proglen = 0; > prog = temp; > > + if (pass == 0) { > + seen_or_pass0 = SEEN_XREG | SEEN_MEM; > + if (use_skb) > + seen_or_pass0 |= SEEN_DATAREF; > + } > if (seen_or_pass0) { > EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */ > EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */ > @@ -472,6 +475,14 @@ void bpf_jit_compile(struct sk_filter *fp) > CLEAR_A(); > #endif > break; > +#ifdef CONFIG_SECCOMP_FILTER > + case BPF_S_ANC_SECCOMP_LD_W: > + /* SECCOMP doesn't use SKB, no need to preserve %rdi. */ > + t_offset = seccomp_bpf_load - (image + addrs[i]); > + EMIT1_off32(0xbf, K); /* mov imm32,%rdi */ > + EMIT1_off32(0xe8, t_offset); /* call */ > + break; > +#endif > case BPF_S_LD_W_ABS: > func = sk_load_word; > common_load: seen |= SEEN_DATAREF; > @@ -588,13 +599,14 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; > /* hmm, too complex filter, give up with jit compiler */ > goto out; > } > + BUG_ON(!use_skb && (seen & SEEN_DATAREF)); > ilen = prog - temp; > if (image) { > if (unlikely(proglen + ilen > oldproglen)) { > pr_err("bpb_jit_compile fatal error\n"); > kfree(addrs); > module_free(NULL, image); > - return; > + return NULL; > } > memcpy(image + proglen, temp, ilen); > } > @@ -635,28 +647,13 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; > 16, 1, image, proglen, false); > > bpf_flush_icache(image, image + proglen); > - > - fp->bpf_func = (void *)image; > } > out: > kfree(addrs); > - return; > + return (void *)image; > } > > -static void jit_free_defer(struct work_struct *arg) > +void bpf_jit_free(bpf_func_t image) > { > - module_free(NULL, arg); > -} > - > -/* run from softirq, we must use a work_struct to call > - * module_free() from process context > - */ > -void bpf_jit_free(struct sk_filter *fp) > -{ > - if (fp->bpf_func != sk_run_filter) { > - struct work_struct *work = (struct work_struct *)fp->bpf_func; > - > - INIT_WORK(work, jit_free_defer); > - schedule_work(work); > - } > + module_free(NULL, image); > } > diff --git a/include/linux/filter.h b/include/linux/filter.h > index 8eeb205..292ccca 100644 > --- a/include/linux/filter.h > +++ b/include/linux/filter.h > @@ -135,12 +135,13 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ > struct sk_buff; > struct sock; > > +typedef unsigned int (*bpf_func_t)(const struct sk_buff*, const struct sock_filter*); > + > struct sk_filter > { > atomic_t refcnt; > unsigned int len; /* Number of filter blocks */ > - unsigned int (*bpf_func)(const struct sk_buff *skb, > - const struct sock_filter *filter); > + bpf_func_t bpf_func; > struct rcu_head rcu; > struct sock_filter insns[0]; > }; > @@ -158,14 +159,15 @@ extern int sk_detach_filter(struct sock *sk); > extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); > > #ifdef CONFIG_BPF_JIT > -extern void bpf_jit_compile(struct sk_filter *fp); > -extern void bpf_jit_free(struct sk_filter *fp); > +extern bpf_func_t bpf_jit_compile(const struct sock_filter*, int flen, int use_skb); > +extern void bpf_jit_free(bpf_funct_t); > #define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns) > #else > -static inline void bpf_jit_compile(struct sk_filter *fp) > +static inline bpf_func_t bpf_jit_compile(const struct sock_filter*, int flen, int use_skb) > { > + return NULL; > } > -static inline void bpf_jit_free(struct sk_filter *fp) > +static inline void bpf_jit_free(bpf_func_t) > { > } > #define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns) > diff --git a/net/core/filter.c b/net/core/filter.c > index 5dea452..03e3ea3 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -574,6 +574,14 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) > } > EXPORT_SYMBOL(sk_chk_filter); > > +/* run from softirq, we must use a work_struct to call > + * bpf_jit_free() from process context > + */ > +static void jit_free_defer(struct work_struct *arg) > +{ > + bpf_jit_free((bpf_func_t)arg); > +} > + > /** > * sk_filter_release_rcu - Release a socket filter by rcu_head > * @rcu: rcu_head that contains the sk_filter to free > @@ -582,7 +590,12 @@ void sk_filter_release_rcu(struct rcu_head *rcu) > { > struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); > > - bpf_jit_free(fp); > + if (fp->bpf_func != sk_run_filter) { > + struct work_struct *work = (struct work_struct *)fp->bpf_func; > + > + INIT_WORK(work, jit_free_defer); > + schedule_work(work); > + } > kfree(fp); > } > EXPORT_SYMBOL(sk_filter_release_rcu); > @@ -599,9 +612,10 @@ EXPORT_SYMBOL(sk_filter_release_rcu); > */ > int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) > { > - struct sk_filter *fp, *old_fp; > + struct sk_filter *fp, *old_fp, *new_fp; > unsigned int fsize = sizeof(struct sock_filter) * fprog->len; > int err; > + bpf_func_t jit; > > /* Make sure new filter is there and in the right amounts. */ > if (fprog->filter == NULL) > @@ -625,7 +639,14 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) > return err; > } > > - bpf_jit_compile(fp); > + jit = bpf_jit_compile(fp->insns, fp->len, 1); > + if (jit) { > + fp->bpf_func = jit; > + /* Free unused insns memory */ > + newfp = krealloc(fp, sizeof(*fp), GFP_KERNEL); > + if (newfp) > + fp = newfp; > + } > > old_fp = rcu_dereference_protected(sk->sk_filter, > sock_owned_by_user(sk)); > > > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 2012-03-13 at 11:04 +0100, Indan Zupancic wrote: > Hello, > > I made a quick pseudo-patch for BPF JIT support. As far as I can tell, > the actual code itself is very simple, just: > > - bpf_jit_compile(fp); > + jit = bpf_jit_compile(fp->insns, fp->len, 1); > + if (jit) { > + fp->bpf_func = jit; > + /* Free unused insns memory */ > + newfp = krealloc(fp, sizeof(*fp), GFP_KERNEL); > + if (newfp) > + fp = newfp; > + } > > old_fp = rcu_dereference_protected(sk->sk_filter, > sock_owned_by_user(sk)); Dont mix unrelated changes in a single patch. Current krealloc() doesnt alloc a new area if the current one is bigger than 'new_size', but this might change in next kernel versions. So this part of your patch does nothing. If it did, this kind of 'optimization' can actually be not good, because sizeof(*fp) is small enough (less than half cache line size) to trigger a possible false sharing issue. (other part of the cache line could be used to hold a often dirtied object) -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hello, On Tue, March 13, 2012 18:13, Eric Dumazet wrote: > On Tue, 2012-03-13 at 11:04 +0100, Indan Zupancic wrote: >> - bpf_jit_compile(fp); >> + jit = bpf_jit_compile(fp->insns, fp->len, 1); >> + if (jit) { >> + fp->bpf_func = jit; >> + /* Free unused insns memory */ >> + newfp = krealloc(fp, sizeof(*fp), GFP_KERNEL); >> + if (newfp) >> + fp = newfp; >> + } >> >> old_fp = rcu_dereference_protected(sk->sk_filter, >> sock_owned_by_user(sk)); > > Dont mix unrelated changes in a single patch. I know, but it was just a quick proof-of-concept. It just showed a possible advantage of the API change for the current networking code. The real patch would be split into an API change, the memory freeing change, and the seccomp support. > > Current krealloc() doesnt alloc a new area if the current one is bigger > than 'new_size', but this might change in next kernel versions. > > So this part of your patch does nothing. Problem is that 'old_size' can be up to 32kB in size and it would be nice if that memory could be released. If it isn't, then using JIT increases memory usage, while also not accounting it to the socket. > > If it did, this kind of 'optimization' can actually be not good, because > sizeof(*fp) is small enough (less than half cache line size) to trigger > a possible false sharing issue. (other part of the cache line could be > used to hold a often dirtied object) It could avoid this by allocating at least a cache size. But this is a problem for all small kmalloc's, isn't it? What about something like the below: @@ -625,7 +639,18 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) return err; } - bpf_jit_compile(fp); + jit = bpf_jit_compile(fp->insns, fp->len, 1); + if (jit) { + int size = max(cache_line_size(), sizeof(*fp)); + fp->bpf_func = jit; + /* Free unused insns memory */ + newfp = kmalloc(size, GFP_KERNEL); + if (newfp) { + memcpy(newfp, fp, size); + kfree(fp); + fp = newfp; + } + } old_fp = rcu_dereference_protected(sk->sk_filter, sock_owned_by_user(sk)); Not sure whether to use cache_line_size() or just L1_CACHE_BYTES or something else. Greeings, Indan -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Le mercredi 14 mars 2012 à 06:12 +0100, Indan Zupancic a écrit : > Problem is that 'old_size' can be up to 32kB in size and it would be nice > if that memory could be released. If it isn't, then using JIT increases > memory usage, while also not accounting it to the socket. > It is accounted for, since jit size is in relation with standard filter size. Check sock_kmalloc() Fact we can have a litle underestimation was already the case without jit, since kmalloc() does a roundup to next power of two. I dont think this discussion has anything to do with SECCOMP anyway. These accounting dont need to be 100% precise, we only want a limit to prevent rogue users from using all kernel memory. > > > > If it did, this kind of 'optimization' can actually be not good, because > > sizeof(*fp) is small enough (less than half cache line size) to trigger > > a possible false sharing issue. (other part of the cache line could be > > used to hold a often dirtied object) > > It could avoid this by allocating at least a cache size. But this is a > problem for all small kmalloc's, isn't it? Its a problem that was already met on several critical paths : # find net|xargs grep -n L1_CACHE_BYTES net/core/dev_addr_lists.c:51: if (alloc_size < L1_CACHE_BYTES) net/core/dev_addr_lists.c:52: alloc_size = L1_CACHE_BYTES; net/core/net-sysfs.c:586: RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES), net/core/net-sysfs.c:1111: XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL); net/ipv6/ip6_fib.c:1612: size = max_t(size_t, size, L1_CACHE_BYTES); net/ipv4/fib_frontend.c:1049: size = max_t(size_t, size, L1_CACHE_BYTES); -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, March 14, 2012 06:55, Eric Dumazet wrote: > Le mercredi 14 mars 2012 à 06:12 +0100, Indan Zupancic a écrit : > >> Problem is that 'old_size' can be up to 32kB in size and it would be nice >> if that memory could be released. If it isn't, then using JIT increases >> memory usage, while also not accounting it to the socket. >> > > It is accounted for, since jit size is in relation with standard filter > size. Check sock_kmalloc() > > Fact we can have a litle underestimation was already the case without > jit, since kmalloc() does a roundup to next power of two. OK. > I dont think this discussion has anything to do with SECCOMP anyway. Certainly true. > These accounting dont need to be 100% precise, we only want a limit to > prevent rogue users from using all kernel memory. Fair enough. The only remaining question is, is it worth the extra code to release up to 32kB of unused memory? It seems a waste to not free it, but if people think it's not worth it then let's just leave it around. >> > >> > If it did, this kind of 'optimization' can actually be not good, because >> > sizeof(*fp) is small enough (less than half cache line size) to trigger >> > a possible false sharing issue. (other part of the cache line could be >> > used to hold a often dirtied object) >> >> It could avoid this by allocating at least a cache size. But this is a >> problem for all small kmalloc's, isn't it? > > Its a problem that was already met on several critical paths : > > # find net|xargs grep -n L1_CACHE_BYTES > net/core/dev_addr_lists.c:51: if (alloc_size < L1_CACHE_BYTES) > net/core/dev_addr_lists.c:52: alloc_size = L1_CACHE_BYTES; > net/core/net-sysfs.c:586: RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES), > net/core/net-sysfs.c:1111: XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL); > net/ipv6/ip6_fib.c:1612: size = max_t(size_t, size, L1_CACHE_BYTES); > net/ipv4/fib_frontend.c:1049: size = max_t(size_t, size, L1_CACHE_BYTES); So using L1_CACHE_BYTES is the more common thing to do, I'll keep that in mind for next time. Thanks, Indan -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Le mercredi 14 mars 2012 à 08:59 +0100, Indan Zupancic a écrit : > The only remaining question is, is it worth the extra code to release > up to 32kB of unused memory? It seems a waste to not free it, but if > people think it's not worth it then let's just leave it around. Quite frankly its not an issue, given JIT BPF is not yet default enabled. I am not sure all bugs were found and fixed. I would warn users before considering using it in production. If you have time, I would appreciate if you could double check and find last bugs in it. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 7c1b765..3cd6626 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -118,7 +118,7 @@ static inline void bpf_flush_icache(void *start, void *end) } -void bpf_jit_compile(struct sk_filter *fp) +bpf_func_t bpf_jit_compile(const struct sock_filter* filter, int flen, int use_skb) { u8 temp[64]; u8 *prog; @@ -131,15 +131,13 @@ void bpf_jit_compile(struct sk_filter *fp) int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */ unsigned int cleanup_addr; /* epilogue code offset */ unsigned int *addrs; - const struct sock_filter *filter = fp->insns; - int flen = fp->len; if (!bpf_jit_enable) - return; + return NULL; addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL); if (addrs == NULL) - return; + return NULL; /* Before first pass, make a rough estimation of addrs[] * each bpf instruction is translated to less than 64 bytes @@ -151,11 +149,16 @@ void bpf_jit_compile(struct sk_filter *fp) cleanup_addr = proglen; /* epilogue address */ for (pass = 0; pass < 10; pass++) { - u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen; + u8 seen_or_pass0 = seen; /* no prologue/epilogue for trivial filters (RET something) */ proglen = 0; prog = temp; + if (pass == 0) { + seen_or_pass0 = SEEN_XREG | SEEN_MEM; + if (use_skb) + seen_or_pass0 |= SEEN_DATAREF; + } if (seen_or_pass0) { EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */ EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */ @@ -472,6 +475,14 @@ void bpf_jit_compile(struct sk_filter *fp) CLEAR_A(); #endif break; +#ifdef CONFIG_SECCOMP_FILTER + case BPF_S_ANC_SECCOMP_LD_W: + /* SECCOMP doesn't use SKB, no need to preserve %rdi. */ + t_offset = seccomp_bpf_load - (image + addrs[i]); + EMIT1_off32(0xbf, K); /* mov imm32,%rdi */ + EMIT1_off32(0xe8, t_offset); /* call */ + break; +#endif case BPF_S_LD_W_ABS: func = sk_load_word; common_load: seen |= SEEN_DATAREF; @@ -588,13 +599,14 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; /* hmm, too complex filter, give up with jit compiler */ goto out; } + BUG_ON(!use_skb && (seen & SEEN_DATAREF)); ilen = prog - temp; if (image) { if (unlikely(proglen + ilen > oldproglen)) { pr_err("bpb_jit_compile fatal error\n"); kfree(addrs); module_free(NULL, image); - return; + return NULL; } memcpy(image + proglen, temp, ilen); } @@ -635,28 +647,13 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; 16, 1, image, proglen, false); bpf_flush_icache(image, image + proglen); - - fp->bpf_func = (void *)image; } out: kfree(addrs); - return; + return (void *)image; } -static void jit_free_defer(struct work_struct *arg) +void bpf_jit_free(bpf_func_t image) { - module_free(NULL, arg); -} - -/* run from softirq, we must use a work_struct to call - * module_free() from process context - */ -void bpf_jit_free(struct sk_filter *fp) -{ - if (fp->bpf_func != sk_run_filter) { - struct work_struct *work = (struct work_struct *)fp->bpf_func; - - INIT_WORK(work, jit_free_defer); - schedule_work(work); - } + module_free(NULL, image); } diff --git a/include/linux/filter.h b/include/linux/filter.h index 8eeb205..292ccca 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -135,12 +135,13 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ struct sk_buff; struct sock; +typedef unsigned int (*bpf_func_t)(const struct sk_buff*, const struct sock_filter*); + struct sk_filter { atomic_t refcnt; unsigned int len; /* Number of filter blocks */ - unsigned int (*bpf_func)(const struct sk_buff *skb, - const struct sock_filter *filter); + bpf_func_t bpf_func; struct rcu_head rcu; struct sock_filter insns[0]; }; @@ -158,14 +159,15 @@ extern int sk_detach_filter(struct sock *sk); extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); #ifdef CONFIG_BPF_JIT -extern void bpf_jit_compile(struct sk_filter *fp); -extern void bpf_jit_free(struct sk_filter *fp); +extern bpf_func_t bpf_jit_compile(const struct sock_filter*, int flen, int use_skb); +extern void bpf_jit_free(bpf_funct_t); #define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns) #else -static inline void bpf_jit_compile(struct sk_filter *fp) +static inline bpf_func_t bpf_jit_compile(const struct sock_filter*, int flen, int use_skb) { + return NULL; } -static inline void bpf_jit_free(struct sk_filter *fp) +static inline void bpf_jit_free(bpf_func_t) { } #define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns) diff --git a/net/core/filter.c b/net/core/filter.c index 5dea452..03e3ea3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -574,6 +574,14 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) } EXPORT_SYMBOL(sk_chk_filter); +/* run from softirq, we must use a work_struct to call + * bpf_jit_free() from process context + */ +static void jit_free_defer(struct work_struct *arg) +{ + bpf_jit_free((bpf_func_t)arg); +} + /** * sk_filter_release_rcu - Release a socket filter by rcu_head * @rcu: rcu_head that contains the sk_filter to free @@ -582,7 +590,12 @@ void sk_filter_release_rcu(struct rcu_head *rcu) { struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); - bpf_jit_free(fp); + if (fp->bpf_func != sk_run_filter) { + struct work_struct *work = (struct work_struct *)fp->bpf_func; + + INIT_WORK(work, jit_free_defer); + schedule_work(work); + } kfree(fp); } EXPORT_SYMBOL(sk_filter_release_rcu); @@ -599,9 +612,10 @@ EXPORT_SYMBOL(sk_filter_release_rcu); */ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { - struct sk_filter *fp, *old_fp; + struct sk_filter *fp, *old_fp, *new_fp; unsigned int fsize = sizeof(struct sock_filter) * fprog->len; int err; + bpf_func_t jit; /* Make sure new filter is there and in the right amounts. */ if (fprog->filter == NULL) @@ -625,7 +639,14 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) return err; } - bpf_jit_compile(fp); + jit = bpf_jit_compile(fp->insns, fp->len, 1); + if (jit) { + fp->bpf_func = jit; + /* Free unused insns memory */ + newfp = krealloc(fp, sizeof(*fp), GFP_KERNEL); + if (newfp) + fp = newfp; + } old_fp = rcu_dereference_protected(sk->sk_filter, sock_owned_by_user(sk));