Message ID | 1472386756-23085-1-git-send-email-jhs@emojatatu.com |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
On Sun, Aug 28, 2016 at 08:19:16AM -0400, Jamal Hadi Salim wrote: > --- /dev/null > +++ b/include/uapi/linux/tc_act/tc_skbmod.h > @@ -0,0 +1,49 @@ > +/* > + * Copyright (c) 2016, Jamal Hadi Salim > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + * more details. > + * > + * You should have received a copy of the GNU General Public License along with > + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple > + * Place - Suite 330, Boston, MA 02111-1307 USA. the address is incorrect. That's why it's recommended to skip this section in all headers. First paragraph in the above 'This program is ... GPL' would have been enough.
On Sun, 2016-08-28 at 08:19 -0400, Jamal Hadi Salim wrote: > From: Jamal Hadi Salim <jhs@mojatatu.com> ... > +static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a, > + struct tcf_result *res) > +{ > + struct tcf_skbmod *d = to_skbmod(a); > + > + spin_lock(&d->tcf_lock); > + tcf_lastuse_update(&d->tcf_tm); > + bstats_update(&d->tcf_bstats, skb); > + > + if (d->flags & SKBMOD_F_DMAC) > + ether_addr_copy(eth_hdr(skb)->h_dest, d->eth_dst); > + if (d->flags & SKBMOD_F_SMAC) > + ether_addr_copy(eth_hdr(skb)->h_source, d->eth_src); > + if (d->flags & SKBMOD_F_ETYPE) > + eth_hdr(skb)->h_proto = d->eth_type; > + if (d->flags & SKBMOD_F_SWAPMAC) { > + u8 tmpaddr[ETH_ALEN]; > + /*XXX: I am sure we can come up with something more efficient */ > + ether_addr_copy(tmpaddr, eth_hdr(skb)->h_dest); > + ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source); > + ether_addr_copy(eth_hdr(skb)->h_source, tmpaddr); > + } > + > + spin_unlock(&d->tcf_lock); > + return d->tcf_action; > +} Adding an action with a spinlock held in fast path in 2016 is a way to tell people : It is a toy, do not use it for real. Sorry guys. Friends do not let friends do that anymore.
On Sun, Aug 28, 2016 at 9:07 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > > Adding an action with a spinlock held in fast path in 2016 is > a way to tell people : It is a toy, do not use it for real. > > Sorry guys. Friends do not let friends do that anymore. > Please stop joking, this is not funny at all: % git grep tcf_lock -- net/sched
On 16-08-28 10:48 AM, Alexei Starovoitov wrote: > On Sun, Aug 28, 2016 at 08:19:16AM -0400, Jamal Hadi Salim wrote: > the address is incorrect. > That's why it's recommended to skip this section in all headers. > First paragraph in the above 'This program is ... GPL' would have been enough. > I am sure i cutnpasted that from some other file in net/sched. Will fix. cheers, jamal
On 16-08-28 12:07 PM, Eric Dumazet wrote: > Adding an action with a spinlock held in fast path in 2016 is > a way to tell people : It is a toy, do not use it for real. > > Sorry guys. Friends do not let friends do that anymore. Generally agree. Cong says he is working on generic patches. Let me see if i can try something. Trickery with RCU is not one of my virtues - so i may get it wrong but hope you can comment. cheers, jamal
On 08/29/2016 12:38 PM, Jamal Hadi Salim wrote: > On 16-08-28 12:07 PM, Eric Dumazet wrote: > >> Adding an action with a spinlock held in fast path in 2016 is >> a way to tell people : It is a toy, do not use it for real. >> >> Sorry guys. Friends do not let friends do that anymore. > > Generally agree. > Cong says he is working on generic patches. Let me see if i can > try something. Trickery with RCU is not one of my virtues - > so i may get it wrong but hope you can comment. You could probably just keep these meta data in a separate struct managed by RCU that tcf_skbmod points to. One more thing I commented on your last patch already is that you would also need f.e. skb_ensure_writable() before mangling.
On 16-08-29 06:38 AM, Jamal Hadi Salim wrote: > Let me see if i can > try something. Trickery with RCU is not one of my virtues - > so i may get it wrong but hope you can comment. Something like attached? Compile tested. I tried to emulate gact - but greping around I havent seen sample code which uses rcu_read_un/lock + synchronize_rcu this way [so be gentle if i got it wrong ;->] I am not sure where Cong is going but probably introducing a rcu head in each policy will allow to swap and free old one with call_rcu may be a better approach. The cost may come in slowing down replace operations. cheers, jamal static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_skbmod *d = to_skbmod(a); int action = READ_ONCE(d->tcf_action); u64 flags = READ_ONCE(d->flags); tcf_lastuse_update(&d->tcf_tm); bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); if (unlikely(action == TC_ACT_SHOT)) { qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats)); return action; } flags = READ_ONCE(d->flags); rcu_read_lock(); if (flags & SKBMOD_F_DMAC) ether_addr_copy(eth_hdr(skb)->h_dest, d->eth_dst); if (flags & SKBMOD_F_SMAC) ether_addr_copy(eth_hdr(skb)->h_source, d->eth_src); if (flags & SKBMOD_F_ETYPE) eth_hdr(skb)->h_proto = d->eth_type; if (flags & SKBMOD_F_SWAPMAC) { u8 tmpaddr[ETH_ALEN]; /*XXX: I am sure we can come up with something more efficient */ ether_addr_copy(tmpaddr, eth_hdr(skb)->h_dest); ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source); ether_addr_copy(eth_hdr(skb)->h_source, tmpaddr); } rcu_read_unlock(); return action; } static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); struct nlattr *tb[TCA_SKBMOD_MAX + 1]; struct tc_skbmod *parm; struct tcf_skbmod *d; u32 lflags = 0; u8 *daddr = NULL; u8 *saddr = NULL; u16 eth_type = 0; bool exists = false; int ret = 0, err; if (nla == NULL) return -EINVAL; err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy); if (err < 0) return err; if (tb[TCA_SKBMOD_PARMS] == NULL) return -EINVAL; if (tb[TCA_SKBMOD_DMAC]) { daddr = nla_data(tb[TCA_SKBMOD_DMAC]); lflags |= SKBMOD_F_DMAC; } if (tb[TCA_SKBMOD_SMAC]) { saddr = nla_data(tb[TCA_SKBMOD_SMAC]); lflags |= SKBMOD_F_SMAC; } if (tb[TCA_SKBMOD_ETYPE]) { eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]); lflags |= SKBMOD_F_ETYPE; } parm = nla_data(tb[TCA_SKBMOD_PARMS]); if (parm->flags & SKBMOD_F_SWAPMAC) lflags = SKBMOD_F_SWAPMAC; exists = tcf_hash_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (!lflags) { return -EINVAL; } if (!exists) { ret = tcf_hash_create(tn, parm->index, est, a, &act_skbmod_ops, bind, false); if (ret) return ret; d = to_skbmod(*a); ret = ACT_P_CREATED; } else { d = to_skbmod(*a); tcf_hash_release(*a, bind); if (!ovr) return -EEXIST; } ASSERT_RTNL(); d->flags = lflags; d->tcf_action = parm->action; if (ovr) spin_lock_bh(&d->tcf_lock); if (lflags & SKBMOD_F_DMAC) ether_addr_copy(d->eth_dst, daddr); if (lflags & SKBMOD_F_SMAC) ether_addr_copy(d->eth_src, saddr); if (lflags & SKBMOD_F_ETYPE) d->eth_type = htons(eth_type); if (ovr) { spin_unlock_bh(&d->tcf_lock); synchronize_rcu(); } if (ret == ACT_P_CREATED) tcf_hash_insert(tn, *a); return ret; }
On 16-08-29 07:00 AM, Daniel Borkmann wrote: > You could probably just keep these meta data in a separate struct > managed by RCU that tcf_skbmod points to. > Which seem like could be made generic for all actions. Maybe thats what Cong is thinking. > One more thing I commented on your last patch already is that you > would also need f.e. skb_ensure_writable() before mangling. Sorry missed that. Let me give it an attempt. I think challenge is going to be what length to use. For now it is ethernet; but i had a change which swapped VLANs that i took out. cheers, jamal
On 16-08-29 07:40 AM, Jamal Hadi Salim wrote: > On 16-08-29 07:00 AM, Daniel Borkmann wrote: > Sorry missed that. Let me give it an attempt. I think challenge is > going to be what length to use. For now it is ethernet; but i had > a change which swapped VLANs that i took out. > something like this? ----- /* XXX: if you are going to edit more fields beyond ethernet header * (example when you add IP header replacement or vlan swap) * then MAX_EDIT_LEN needs to change appropriately */ #define MAX_EDIT_LEN ETH_HLEN static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_skbmod *d = to_skbmod(a); int action = READ_ONCE(d->tcf_action); int err; u64 flags; err = skb_ensure_writable(skb, ETH_HLEN); if (unlikely(err)) /* best policy is to drop on the floor */ action = TC_ACT_SHOT; tcf_lastuse_update(&d->tcf_tm); bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); if (unlikely(action == TC_ACT_SHOT)) { qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats)); return action; } ---- cheers, jamal
On Sun, 2016-08-28 at 22:10 -0700, Cong Wang wrote: > On Sun, Aug 28, 2016 at 9:07 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > > > > Adding an action with a spinlock held in fast path in 2016 is > > a way to tell people : It is a toy, do not use it for real. > > > > Sorry guys. Friends do not let friends do that anymore. > > > > Please stop joking, this is not funny at all: > > % git grep tcf_lock -- net/sched This is absolutely trivial to write this skbmod without taking a lock. If you guys do not care about that, I will simply NACK any new lazy code. Do not try to call me funny, simply acknowledge the mere facts and try to keep your calm Cong.
On Mon, 2016-08-29 at 07:35 -0400, Jamal Hadi Salim wrote: > flags = READ_ONCE(d->flags); > rcu_read_lock(); > if (flags & SKBMOD_F_DMAC) > ether_addr_copy(eth_hdr(skb)->h_dest, d->eth_dst); > if (flags & SKBMOD_F_SMAC) > ether_addr_copy(eth_hdr(skb)->h_source, d->eth_src); > if (flags & SKBMOD_F_ETYPE) > eth_hdr(skb)->h_proto = d->eth_type; > if (flags & SKBMOD_F_SWAPMAC) { > u8 tmpaddr[ETH_ALEN]; > /*XXX: I am sure we can come up with something more > efficient */ > ether_addr_copy(tmpaddr, eth_hdr(skb)->h_dest); > ether_addr_copy(eth_hdr(skb)->h_dest, > eth_hdr(skb)->h_source); > ether_addr_copy(eth_hdr(skb)->h_source, tmpaddr); > } > rcu_read_unlock(); You would need to store everything in an object, managed by rcu. struct my_rcu_safe_struct { u32 flags; u8 eth_dst[ETH_ALEN]; u8 eth_src[ETH_ALEN]; __be16 eth_type; }; And then allocate a new one when you need to update the infos (from tcf_skbmod_init(()) RCU : Read Copy Update. Then in the reader you would use rcu_read_lock(); myptr = rcu_dereference(d->ptr); if (myptr) { if (myptr->flags & SKBMOD_F_DMAC) ether_addr_copy(eth_hdr(skb)->h_dest, myptr->eth_dst); if (myptr->flags & SKBMOD_F_SMAC) ether_addr_copy(eth_hdr(skb)->h_source, myptr->eth_src); if (myptr->flags & SKBMOD_F_ETYPE) eth_hdr(skb)->h_proto = myptr->eth_type; } rcu_read_unlock();
On 16-08-29 02:20 PM, Eric Dumazet wrote: > > You would need to store everything in an object, managed by rcu. > > struct my_rcu_safe_struct { > u32 flags; > u8 eth_dst[ETH_ALEN]; > u8 eth_src[ETH_ALEN]; > __be16 eth_type; > }; > > And then allocate a new one when you need to update the infos (from > tcf_skbmod_init(()) > > RCU : Read Copy Update. > > Then in the reader you would use > > rcu_read_lock(); > myptr = rcu_dereference(d->ptr); > if (myptr) { > if (myptr->flags & SKBMOD_F_DMAC) > ether_addr_copy(eth_hdr(skb)->h_dest, myptr->eth_dst); > if (myptr->flags & SKBMOD_F_SMAC) > ether_addr_copy(eth_hdr(skb)->h_source, myptr->eth_src); > if (myptr->flags & SKBMOD_F_ETYPE) > eth_hdr(skb)->h_proto = myptr->eth_type; > } > rcu_read_unlock(); Thanks Eric. This is the approach i thought Cong was going to take but in a way that it applies to all actions. It requires I do this extra allocation per update/create - I am not sure how much of a big deal that is (we take pride in our update rate). Let me work and post something simple that captures these ideas then wait to see what Cong has in mind for the general approach. cheers, jamal
On 16-08-30 07:12 AM, Jamal Hadi Salim wrote: > > Thanks Eric. > This is the approach i thought Cong was going to take but in a way that > it applies to all actions. > It requires I do this extra allocation per update/create - I am not sure > how much of a big deal that is (we take pride in our update rate). > Let me work and post something simple that captures these ideas > then wait to see what Cong has in mind for the general approach. As attached here ... compile tested. cheers, jamal From header file: ----- struct tcf_skbmod_params { u64 flags; /*up to 64 types of operations; extend if needed */ u8 eth_dst[ETH_ALEN]; u16 eth_type; u8 eth_src[ETH_ALEN]; }; struct tcf_skbmod { struct tc_action common; struct tcf_skbmod_params *skbmod_p; }; #define to_skbmod(a) ((struct tcf_skbmod *)a) ------ #define MAX_EDIT_LEN ETH_HLEN static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_skbmod *d = to_skbmod(a); int action = READ_ONCE(d->tcf_action); struct tcf_skbmod_params *p; u64 flags; int err; /* XXX: if you are going to edit more fields beyond ethernet header * (example when you add IP header replacement or vlan swap) * then MAX_EDIT_LEN needs to change appropriately */ err = skb_ensure_writable(skb, ETH_HLEN); if (unlikely(err)) /* best policy is to drop on the floor */ action = TC_ACT_SHOT; tcf_lastuse_update(&d->tcf_tm); bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); if (unlikely(action == TC_ACT_SHOT)) { qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats)); return action; } rcu_read_lock(); p = rcu_dereference(d->skbmod_p); flags = p->flags; if (flags & SKBMOD_F_DMAC) ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst); if (flags & SKBMOD_F_SMAC) ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src); if (flags & SKBMOD_F_ETYPE) eth_hdr(skb)->h_proto = p->eth_type; if (flags & SKBMOD_F_SWAPMAC) { u8 tmpaddr[ETH_ALEN]; /*XXX: I am sure we can come up with something more efficient */ ether_addr_copy(tmpaddr, eth_hdr(skb)->h_dest); ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source); ether_addr_copy(eth_hdr(skb)->h_source, tmpaddr); } rcu_read_unlock(); return action; } static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); struct nlattr *tb[TCA_SKBMOD_MAX + 1]; struct tc_skbmod *parm; struct tcf_skbmod *d; struct tcf_skbmod_params *p, *p_old; u32 lflags = 0; u8 *daddr = NULL; u8 *saddr = NULL; u16 eth_type = 0; bool exists = false; int ret = 0, err; if (nla == NULL) return -EINVAL; err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy); if (err < 0) return err; if (tb[TCA_SKBMOD_PARMS] == NULL) return -EINVAL; if (tb[TCA_SKBMOD_DMAC]) { daddr = nla_data(tb[TCA_SKBMOD_DMAC]); lflags |= SKBMOD_F_DMAC; } if (tb[TCA_SKBMOD_SMAC]) { saddr = nla_data(tb[TCA_SKBMOD_SMAC]); lflags |= SKBMOD_F_SMAC; } if (tb[TCA_SKBMOD_ETYPE]) { eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]); lflags |= SKBMOD_F_ETYPE; } parm = nla_data(tb[TCA_SKBMOD_PARMS]); if (parm->flags & SKBMOD_F_SWAPMAC) lflags = SKBMOD_F_SWAPMAC; exists = tcf_hash_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (!lflags) { return -EINVAL; } if (!exists) { ret = tcf_hash_create(tn, parm->index, est, a, &act_skbmod_ops, bind, false); if (ret) return ret; d = to_skbmod(*a); ret = ACT_P_CREATED; } else { d = to_skbmod(*a); tcf_hash_release(*a, bind); if (!ovr) return -EEXIST; } ASSERT_RTNL(); p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); if (unlikely (!p)) { if (ovr) tcf_hash_release(*a, bind); return -ENOMEM; } p->flags = lflags; d->tcf_action = parm->action; p_old = rtnl_dereference(d->skbmod_p); if (ovr) spin_lock_bh(&d->tcf_lock); if (lflags & SKBMOD_F_DMAC) ether_addr_copy(p->eth_dst, daddr); if (lflags & SKBMOD_F_SMAC) ether_addr_copy(p->eth_src, saddr); if (lflags & SKBMOD_F_ETYPE) p->eth_type = htons(eth_type); rcu_assign_pointer(d->skbmod_p, p); if (ovr) { spin_unlock_bh(&d->tcf_lock); synchronize_rcu(); } kfree(p_old); if (ret == ACT_P_CREATED) tcf_hash_insert(tn, *a); return ret; }
On Tue, 2016-08-30 at 07:12 -0400, Jamal Hadi Salim wrote: > On 16-08-29 02:20 PM, Eric Dumazet wrote: > > > > > You would need to store everything in an object, managed by rcu. > > > > struct my_rcu_safe_struct { > > u32 flags; > > u8 eth_dst[ETH_ALEN]; > > u8 eth_src[ETH_ALEN]; > > __be16 eth_type; > > }; > > > > And then allocate a new one when you need to update the infos (from > > tcf_skbmod_init(()) > > > > RCU : Read Copy Update. > > > > Then in the reader you would use > > > > rcu_read_lock(); > > myptr = rcu_dereference(d->ptr); > > if (myptr) { > > if (myptr->flags & SKBMOD_F_DMAC) > > ether_addr_copy(eth_hdr(skb)->h_dest, myptr->eth_dst); > > if (myptr->flags & SKBMOD_F_SMAC) > > ether_addr_copy(eth_hdr(skb)->h_source, myptr->eth_src); > > if (myptr->flags & SKBMOD_F_ETYPE) > > eth_hdr(skb)->h_proto = myptr->eth_type; > > } > > rcu_read_unlock(); > > Thanks Eric. > This is the approach i thought Cong was going to take but in a way that > it applies to all actions. > It requires I do this extra allocation per update/create - I am not sure > how much of a big deal that is (we take pride in our update rate). > Let me work and post something simple that captures these ideas > then wait to see what Cong has in mind for the general approach. The percpu stats infra is already there, you have to change tcf_hash_create() last parameter to 'true'. For example you have to add in struct my_rcu_safe_struct a 'struct rcu_head rcu;' field to be able to use kfree_rcu() instead of synchronize_rcu() Some very simple actions do not even need rcu_dereference() RCU have many different variants. Sometimes READ_ONCE() and WRITE_ONCE() are enough, when all the 'flags' can be in a single integer, and this avoids an extra dereference and possible cache miss.
On Tue, 2016-08-30 at 07:57 -0400, Jamal Hadi Salim wrote: > rcu_assign_pointer(d->skbmod_p, p); > if (ovr) { > spin_unlock_bh(&d->tcf_lock); > synchronize_rcu(); > } > > kfree(p_old); Overall patch looks good Jamal, thanks. synchronize_rcu() might bee to expensive if you plan to change actions hundred of times per second. You could instead add a 'struct rcu_head rcu;' field in struct tcf_skbmod_params (but make sure this is not exported to user space) Then : if (ovr) spin_unlock_bh(&d->tcf_lock); kfree_rcu(p_old, rcu);
On Tue, 2016-08-30 at 07:57 -0400, Jamal Hadi Salim wrote: > if (flags & SKBMOD_F_SWAPMAC) { > u8 tmpaddr[ETH_ALEN]; > /*XXX: I am sure we can come up with something more efficient */ > ether_addr_copy(tmpaddr, eth_hdr(skb)->h_dest); > ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source); > ether_addr_copy(eth_hdr(skb)->h_source, tmpaddr); > } While ether_addr_copy() is accepting u8 pointers, data must be aligned to u16 at least. (See comments in include/linux/etherdevice.h) Some arches/compilers might do things here that would generate a trap if tmpaddr is not aligned.
On 16-08-30 08:35 AM, Eric Dumazet wrote: > synchronize_rcu() might bee to expensive if you plan to change actions > hundred of times per second. > > You could instead add a 'struct rcu_head rcu;' field in struct > tcf_skbmod_params (but make sure this is not exported to user space) > > Then : > > if (ovr) > spin_unlock_bh(&d->tcf_lock); > kfree_rcu(p_old, rcu); > Ok, working on this variant. Will post today or tommorow. cheers, jamal
ng On 16-08-30 08:44 AM, Eric Dumazet wrote: > On Tue, 2016-08-30 at 07:57 -0400, Jamal Hadi Salim wrote: >> if (flags & SKBMOD_F_SWAPMAC) { >> u8 tmpaddr[ETH_ALEN]; >> /*XXX: I am sure we can come up with something more efficient */ >> ether_addr_copy(tmpaddr, eth_hdr(skb)->h_dest); >> ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source); >> ether_addr_copy(eth_hdr(skb)->h_source, tmpaddr); >> } > > While ether_addr_copy() is accepting u8 pointers, data must be aligned to u16 at least. > > (See comments in include/linux/etherdevice.h) > > Some arches/compilers might do things here that would generate a trap > if tmpaddr is not aligned. > Hrm. How do you suggest dealing with this? cheers, jamal
On Tue, 2016-09-06 at 08:08 -0400, Jamal Hadi Salim wrote: > ng > On 16-08-30 08:44 AM, Eric Dumazet wrote: > > On Tue, 2016-08-30 at 07:57 -0400, Jamal Hadi Salim wrote: > >> if (flags & SKBMOD_F_SWAPMAC) { > >> u8 tmpaddr[ETH_ALEN]; > >> /*XXX: I am sure we can come up with something more efficient */ > >> ether_addr_copy(tmpaddr, eth_hdr(skb)->h_dest); > >> ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source); > >> ether_addr_copy(eth_hdr(skb)->h_source, tmpaddr); > >> } > > > > While ether_addr_copy() is accepting u8 pointers, data must be aligned to u16 at least. > > > > (See comments in include/linux/etherdevice.h) > > > > Some arches/compilers might do things here that would generate a trap > > if tmpaddr is not aligned. > > > > Hrm. How do you suggest dealing with this? Just use u16 in the array ? u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */ ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest); ...
On 16-09-06 10:25 AM, Eric Dumazet wrote: > > Just use u16 in the array ? > > u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */ > > ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest); > ... > Ok - thanks Eric. BTW: Nobody has answered my other question: I can get stats to increment with: bstats_update(&d->tcf_bstats, skb); but not: bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); Note, test is on a VM. cheers, jamal
On Thu, 2016-09-08 at 06:38 -0400, Jamal Hadi Salim wrote: > On 16-09-06 10:25 AM, Eric Dumazet wrote: > > > > > Just use u16 in the array ? > > > > u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */ > > > > ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest); > > ... > > > > Ok - thanks Eric. BTW: > Nobody has answered my other question: > I can get stats to increment with: > bstats_update(&d->tcf_bstats, skb); > but not: > bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); This simply works for me. But maybe you read the stats at the wrong place ? Pleas give us more details, thanks ! lpaa24:~# tc -s action list action mirred ; tc -s -d qd sh dev ifb10 action order 0: mirred (Egress Redirect to device ifb10) stolen index 3 ref 1 bind 1 installed 18 sec Action statistics: Sent 6647111559 bytes 4433436 pkt (dropped 0, overlimits 14473 requeues 0) backlog 0b 0p requeues 0 qdisc mq 1: root Sent 6394816791 bytes 4225732 pkt (dropped 88025, overlimits 0 requeues 0) backlog 108069552b 71406p requeues 0 qdisc netem 8016: parent 1:5 limit 100000 delay 50.0ms loss 2% Sent 776873149 bytes 513412 pkt (dropped 10766, overlimits 0 requeues 0) rate 721506Kbit 59595pps backlog 6952420b 4594p requeues 0 qdisc netem 8018: parent 1:7 limit 100000 delay 50.0ms loss 2% Sent 1290830041 bytes 852967 pkt (dropped 18497, overlimits 0 requeues 0) rate 1246Mbit 102941pps backlog 76607984b 50616p requeues 0 qdisc netem 8013: parent 1:2 limit 100000 delay 50.0ms loss 2% Sent 783039400 bytes 517411 pkt (dropped 10512, overlimits 0 requeues 0) rate 471897Kbit 38979pps backlog 688870b 455p requeues 0 qdisc netem 8015: parent 1:4 limit 100000 delay 50.0ms loss 2% Sent 715503442 bytes 472806 pkt (dropped 9734, overlimits 0 requeues 0) rate 432373Kbit 35713pps backlog 1627550b 1075p requeues 0 qdisc netem 8019: parent 1:8 limit 100000 delay 50.0ms loss 2% Sent 735599060 bytes 486064 pkt (dropped 9915, overlimits 0 requeues 0) rate 441280Kbit 36448pps backlog 2747910b 1815p requeues 0 qdisc netem 8017: parent 1:6 limit 100000 delay 50.0ms loss 2% Sent 755679663 bytes 499311 pkt (dropped 10130, overlimits 0 requeues 0) rate 445158Kbit 36765pps backlog 2174290b 1439p requeues 0 qdisc netem 8014: parent 1:3 limit 100000 delay 50.0ms loss 2% Sent 689812958 bytes 455833 pkt (dropped 9583, overlimits 0 requeues 0) rate 566772Kbit 46812pps backlog 13988176b 9244p requeues 0 qdisc netem 8012: parent 1:1 limit 100000 delay 50.0ms loss 2% Sent 647479078 bytes 427928 pkt (dropped 8888, overlimits 0 requeues 0) rate 559420Kbit 46207pps backlog 3282352b 2168p requeues 0
On Thu, 2016-09-08 at 09:02 -0700, Eric Dumazet wrote: > On Thu, 2016-09-08 at 06:38 -0400, Jamal Hadi Salim wrote: > > On 16-09-06 10:25 AM, Eric Dumazet wrote: > > > > > > > > Just use u16 in the array ? > > > > > > u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */ > > > > > > ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest); > > > ... > > > > > > > Ok - thanks Eric. BTW: > > Nobody has answered my other question: > > I can get stats to increment with: > > bstats_update(&d->tcf_bstats, skb); > > but not: > > bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); > > This simply works for me. > > But maybe you read the stats at the wrong place ? > > Pleas give us more details, thanks ! And double check the tcf_hash_create() call : Last parameter is a boolean, and you must set it to true if you want per cpu stats !
On 16-09-08 12:11 PM, Eric Dumazet wrote: > On Thu, 2016-09-08 at 09:02 -0700, Eric Dumazet wrote: >> On Thu, 2016-09-08 at 06:38 -0400, Jamal Hadi Salim wrote: >>> On 16-09-06 10:25 AM, Eric Dumazet wrote: >> This simply works for me. >> >> But maybe you read the stats at the wrong place ? >> >> Pleas give us more details, thanks ! > > And double check the tcf_hash_create() call : > > Last parameter is a boolean, and you must set it to true if you want per > cpu stats ! > Dammit, yes that was it;-> Sigh. I shouldve caught it earlier. Thanks! Now i feel more peaceful submitting the patch. cheers, jamal >
diff --git a/include/net/tc_act/tc_skbmod.h b/include/net/tc_act/tc_skbmod.h new file mode 100644 index 0000000..8ea0b25 --- /dev/null +++ b/include/net/tc_act/tc_skbmod.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2016, Jamal Hadi Salim + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, see <http://www.gnu.org/licenses/>. + * + * Author: Jamal Hadi Salim <jhs@mojatatu.com> + */ + +#ifndef __NET_TC_SKBMOD_H +#define __NET_TC_SKBMOD_H + +#include <net/act_api.h> +#include <linux/tc_act/tc_skbmod.h> + +struct tcf_skbmod { + struct tc_action common; + u64 flags; /*up to 64 types of operations; extend if needed */ + u8 eth_dst[ETH_ALEN]; + u16 eth_type; + u8 eth_src[ETH_ALEN]; +}; +#define to_skbmod(a) ((struct tcf_skbmod *)a) + +#endif /* __NET_TC_SKBMOD_H */ diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h new file mode 100644 index 0000000..a1fdff5 --- /dev/null +++ b/include/uapi/linux/tc_act/tc_skbmod.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, Jamal Hadi Salim + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Jamal Hadi Salim + */ + +#ifndef __LINUX_TC_SKBMOD_H +#define __LINUX_TC_SKBMOD_H + +#include <linux/pkt_cls.h> + +#define TCA_ACT_SKBMOD 15 + +#define SKBMOD_F_DMAC 0x1 +#define SKBMOD_F_SMAC 0x2 +#define SKBMOD_F_ETYPE 0x4 +#define SKBMOD_F_SWAPMAC 0x8 + +struct tc_skbmod { + tc_gen; + __u64 flags; +}; + +enum { + TCA_SKBMOD_UNSPEC, + TCA_SKBMOD_TM, + TCA_SKBMOD_PARMS, + TCA_SKBMOD_DMAC, + TCA_SKBMOD_SMAC, + TCA_SKBMOD_ETYPE, + TCA_SKBMOD_PAD, + __TCA_SKBMOD_MAX +}; +#define TCA_SKBMOD_MAX (__TCA_SKBMOD_MAX - 1) + +#endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index ccf931b..34b556d 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -749,6 +749,17 @@ config NET_ACT_CONNMARK To compile this code as a module, choose M here: the module will be called act_connmark. +config NET_ACT_SKBMOD + tristate "skb data modification action" + depends on NET_CLS_ACT + ---help--- + Say Y here to allow modification of skb data + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_skbmod. + config NET_ACT_IFE tristate "Inter-FE action based on IETF ForCES InterFE LFB" depends on NET_CLS_ACT diff --git a/net/sched/Makefile b/net/sched/Makefile index ae088a5..e82eff8 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o +obj-$(CONFIG_NET_ACT_SKBMOD) += act_skbmod.o obj-$(CONFIG_NET_ACT_IFE) += act_ife.o obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c new file mode 100644 index 0000000..506ad28 --- /dev/null +++ b/net/sched/act_skbmod.c @@ -0,0 +1,257 @@ +/* + * copyright Jamal Hadi Salim (2016) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, see <http://www.gnu.org/licenses/>. + * + * Author: Jamal Hadi Salim <jhs@mojatatu.com> + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> + +#include <linux/tc_act/tc_skbmod.h> +#include <net/tc_act/tc_skbmod.h> + +#define SKBMOD_TAB_MASK 15 + +static int skbmod_net_id; +static struct tc_action_ops act_skbmod_ops; + +static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_skbmod *d = to_skbmod(a); + + spin_lock(&d->tcf_lock); + tcf_lastuse_update(&d->tcf_tm); + bstats_update(&d->tcf_bstats, skb); + + if (d->flags & SKBMOD_F_DMAC) + ether_addr_copy(eth_hdr(skb)->h_dest, d->eth_dst); + if (d->flags & SKBMOD_F_SMAC) + ether_addr_copy(eth_hdr(skb)->h_source, d->eth_src); + if (d->flags & SKBMOD_F_ETYPE) + eth_hdr(skb)->h_proto = d->eth_type; + if (d->flags & SKBMOD_F_SWAPMAC) { + u8 tmpaddr[ETH_ALEN]; + /*XXX: I am sure we can come up with something more efficient */ + ether_addr_copy(tmpaddr, eth_hdr(skb)->h_dest); + ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source); + ether_addr_copy(eth_hdr(skb)->h_source, tmpaddr); + } + + spin_unlock(&d->tcf_lock); + return d->tcf_action; +} + +static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { + [TCA_SKBMOD_PARMS] = { .len = sizeof(struct tc_skbmod) }, + [TCA_SKBMOD_DMAC] = { .len = ETH_ALEN }, + [TCA_SKBMOD_SMAC] = { .len = ETH_ALEN }, + [TCA_SKBMOD_ETYPE] = { .type = NLA_U16 }, +}; + +static int tcf_skbmod_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, + int ovr, int bind) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + struct nlattr *tb[TCA_SKBMOD_MAX + 1]; + struct tc_skbmod *parm; + struct tcf_skbmod *d; + u32 lflags = 0; + u8 *daddr = NULL; + u8 *saddr = NULL; + u16 eth_type = 0; + + bool exists = false; + int ret = 0, err; + + if (nla == NULL) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy); + if (err < 0) + return err; + + if (tb[TCA_SKBMOD_PARMS] == NULL) + return -EINVAL; + + if (tb[TCA_SKBMOD_DMAC]) { + daddr = nla_data(tb[TCA_SKBMOD_DMAC]); + lflags |= SKBMOD_F_DMAC; + } + + if (tb[TCA_SKBMOD_SMAC]) { + saddr = nla_data(tb[TCA_SKBMOD_SMAC]); + lflags |= SKBMOD_F_SMAC; + } + + if (tb[TCA_SKBMOD_ETYPE]) { + eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]); + lflags |= SKBMOD_F_ETYPE; + } + + parm = nla_data(tb[TCA_SKBMOD_PARMS]); + + if (parm->flags & SKBMOD_F_SWAPMAC) + lflags = SKBMOD_F_SWAPMAC; + + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + if (!lflags) { + return -EINVAL; + } + + if (!exists) { + ret = tcf_hash_create(tn, parm->index, est, a, + &act_skbmod_ops, bind, false); + if (ret) + return ret; + + d = to_skbmod(*a); + ret = ACT_P_CREATED; + } else { + d = to_skbmod(*a); + tcf_hash_release(*a, bind); + if (!ovr) + return -EEXIST; + } + + spin_lock_bh(&d->tcf_lock); + + if (lflags & SKBMOD_F_DMAC) + ether_addr_copy(d->eth_dst, daddr); + if (lflags & SKBMOD_F_SMAC) + ether_addr_copy(d->eth_src, saddr); + if (lflags & SKBMOD_F_ETYPE) + d->eth_type = htons(eth_type); + + d->flags = lflags; + d->tcf_action = parm->action; + + spin_unlock_bh(&d->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(tn, *a); + return ret; +} + +static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_skbmod *d = to_skbmod(a); + struct tc_skbmod opt = { + .index = d->tcf_index, + .refcnt = d->tcf_refcnt - ref, + .bindcnt = d->tcf_bindcnt - bind, + .action = d->tcf_action, + .flags = d->flags, + }; + struct tcf_t t; + + if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + if ((d->flags & SKBMOD_F_DMAC) && + nla_put(skb, TCA_SKBMOD_DMAC, ETH_ALEN, d->eth_dst)) + goto nla_put_failure; + if ((d->flags & SKBMOD_F_SMAC) && + nla_put(skb, TCA_SKBMOD_SMAC, ETH_ALEN, d->eth_src)) + goto nla_put_failure; + if ((d->flags & SKBMOD_F_ETYPE) && + nla_put_u16(skb, TCA_SKBMOD_ETYPE, ntohs(d->eth_type))) + goto nla_put_failure; + + tcf_tm_dump(&t, &d->tcf_tm); + if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD)) + goto nla_put_failure; + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tcf_generic_walker(tn, skb, cb, type, ops); +} + +static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tcf_hash_search(tn, a, index); +} + +static struct tc_action_ops act_skbmod_ops = { + .kind = "skbmod", + .type = TCA_ACT_SKBMOD, + .owner = THIS_MODULE, + .act = tcf_skbmod_run, + .dump = tcf_skbmod_dump, + .init = tcf_skbmod_init, + .walk = tcf_skbmod_walker, + .lookup = tcf_skbmod_search, + .size = sizeof(struct tcf_skbmod), +}; + +static __net_init int skbmod_init_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK); +} + +static void __net_exit skbmod_exit_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + tc_action_net_exit(tn); +} + +static struct pernet_operations skbmod_net_ops = { + .init = skbmod_init_net, + .exit = skbmod_exit_net, + .id = &skbmod_net_id, + .size = sizeof(struct tc_action_net), +}; + +MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>"); +MODULE_DESCRIPTION("SKB data mod-ing"); +MODULE_LICENSE("GPL"); + +static int __init skbmod_init_module(void) +{ + return tcf_register_action(&act_skbmod_ops, &skbmod_net_ops); +} + +static void __exit skbmod_cleanup_module(void) +{ + tcf_unregister_action(&act_skbmod_ops, &skbmod_net_ops); +} + +module_init(skbmod_init_module); +module_exit(skbmod_cleanup_module);