Message ID | 1467470065-17613-1-git-send-email-jhs@emojatatu.com |
---|---|
State | Rejected, archived |
Delegated to: | David Miller |
Headers | show |
On 02/07/16 16:34, Jamal Hadi Salim wrote: > From: Jamal Hadi Salim <jhs@mojatatu.com> > > Often redirecting or mirroring requires that we set the dstMAC address > of the target device. While it is possible to pipe to a pedit action, > this patch obsoletes the need for that. This is a justified feature because > the dst MAC addresses rewrite is such a common use case. > > Sample usage: > sudo $TC filter add dev $ETH parent 1: protocol ip prio 10 \ > u32 match ip protocol 1 0xff flowid 1:2 \ > action mirred egress redirect dev $SPANPORT dst 02:15:15:15:15:15 > > This will match all icmp packets going out on dev $ETH and > redirect them to dev $SPANPORT while setting their dst MAC address > to 02:15:15:15:15:15 > > Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com> > --- > include/net/tc_act/tc_mirred.h | 4 +++- > include/uapi/linux/tc_act/tc_mirred.h | 7 ++++--- > net/sched/act_mirred.c | 24 +++++++++++++++++++++++- > 3 files changed, 30 insertions(+), 5 deletions(-) > LGTM, thanks! Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
On Sat, Jul 2, 2016 at 7:34 AM, Jamal Hadi Salim <jhs@mojatatu.com> wrote: > From: Jamal Hadi Salim <jhs@mojatatu.com> > > Often redirecting or mirroring requires that we set the dstMAC address > of the target device. While it is possible to pipe to a pedit action, > this patch obsoletes the need for that. This is a justified feature because > the dst MAC addresses rewrite is such a common use case. Maybe, instead of allowing to set arbitrary MAC address, how about just allow setting to the MAC address of the target device? If so we only need a boolean flag for user-space. IOW, do we really need such flexibility?
On 16-07-02 12:58 PM, Cong Wang wrote: > On Sat, Jul 2, 2016 at 7:34 AM, Jamal Hadi Salim <jhs@mojatatu.com> wrote: >> From: Jamal Hadi Salim <jhs@mojatatu.com> >> >> Often redirecting or mirroring requires that we set the dstMAC address >> of the target device. While it is possible to pipe to a pedit action, >> this patch obsoletes the need for that. This is a justified feature because >> the dst MAC addresses rewrite is such a common use case. > > Maybe, instead of allowing to set arbitrary MAC address, how about > just allow setting to the MAC address of the target device? You cant discover the remote MAC address; I have a wire connected between two machines. I set it to the MAC address of the remote machine i know of. And there are times I really want to set it to some arbitrary values (although that is more test mode than production). cheers, jamal > If so we only need a boolean flag for user-space. > > IOW, do we really need such flexibility? >
Sat, Jul 02, 2016 at 04:34:25PM CEST, jhs@mojatatu.com wrote: >From: Jamal Hadi Salim <jhs@mojatatu.com> > >Often redirecting or mirroring requires that we set the dstMAC address >of the target device. While it is possible to pipe to a pedit action, >this patch obsoletes the need for that. This is a justified feature because >the dst MAC addresses rewrite is such a common use case. We can't we have set of actions each doing its own job? I don't like merging functionality into actions :( > >Sample usage: >sudo $TC filter add dev $ETH parent 1: protocol ip prio 10 \ >u32 match ip protocol 1 0xff flowid 1:2 \ >action mirred egress redirect dev $SPANPORT dst 02:15:15:15:15:15 > >This will match all icmp packets going out on dev $ETH and >redirect them to dev $SPANPORT while setting their dst MAC address >to 02:15:15:15:15:15 > >Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com> >--- > include/net/tc_act/tc_mirred.h | 4 +++- > include/uapi/linux/tc_act/tc_mirred.h | 7 ++++--- > net/sched/act_mirred.c | 24 +++++++++++++++++++++++- > 3 files changed, 30 insertions(+), 5 deletions(-) > >diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h >index e891835..7e8bced 100644 >--- a/include/net/tc_act/tc_mirred.h >+++ b/include/net/tc_act/tc_mirred.h >@@ -6,10 +6,12 @@ > > struct tcf_mirred { > struct tcf_common common; >+ struct net_device __rcu *tcfm_dev; > int tcfm_eaction; > int tcfm_ifindex; > int tcfm_ok_push; >- struct net_device __rcu *tcfm_dev; >+ u8 eth_dst[ETH_ALEN]; >+ /* XXX 6 bytes hole here*/ > struct list_head tcfm_list; > }; > #define to_mirred(a) \ >diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h >index 3d7a2b3..aaca1ff 100644 >--- a/include/uapi/linux/tc_act/tc_mirred.h >+++ b/include/uapi/linux/tc_act/tc_mirred.h >@@ -9,20 +9,21 @@ > #define TCA_EGRESS_MIRROR 2 /* mirror packet to EGRESS */ > #define TCA_INGRESS_REDIR 3 /* packet redirect to INGRESS*/ > #define TCA_INGRESS_MIRROR 4 /* mirror packet to INGRESS */ >- >+ > struct tc_mirred { > tc_gen; > int eaction; /* one of IN/EGRESS_MIRROR/REDIR */ > __u32 ifindex; /* ifindex of egress port */ > }; >- >+ > enum { > TCA_MIRRED_UNSPEC, > TCA_MIRRED_TM, > TCA_MIRRED_PARMS, > TCA_MIRRED_PAD, >+ TCA_MIRRED_DMAC, > __TCA_MIRRED_MAX > }; > #define TCA_MIRRED_MAX (__TCA_MIRRED_MAX - 1) >- >+ > #endif >diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c >index 5b135d3..1cc14f5 100644 >--- a/net/sched/act_mirred.c >+++ b/net/sched/act_mirred.c >@@ -49,6 +49,7 @@ static void tcf_mirred_release(struct tc_action *a, int bind) > > static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { > [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, >+ [TCA_MIRRED_DMAC] = { .len = ETH_ALEN}, > }; > > static int mirred_net_id; >@@ -64,15 +65,24 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, > struct net_device *dev; > int ret, ok_push = 0; > bool exists = false; >+ u8 *daddr = NULL; > > if (nla == NULL) > return -EINVAL; >+ > ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy); > if (ret < 0) > return ret; >+ > if (tb[TCA_MIRRED_PARMS] == NULL) > return -EINVAL; >+ > parm = nla_data(tb[TCA_MIRRED_PARMS]); >+ if (tb[TCA_MIRRED_DMAC]) { >+ daddr = nla_data(tb[TCA_MIRRED_DMAC]); >+ if (is_zero_ether_addr(daddr)) >+ return -EINVAL; >+ } > > exists = tcf_hash_check(tn, parm->index, a, bind); > if (exists && bind) >@@ -126,6 +136,9 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, > } > m = to_mirred(a); > >+ if (daddr) >+ ether_addr_copy(m->eth_dst, daddr); >+ > ASSERT_RTNL(); > m->tcf_action = parm->action; > m->tcfm_eaction = parm->eaction; >@@ -190,6 +203,9 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, > > skb2->skb_iif = skb->dev->ifindex; > skb2->dev = dev; >+ if (!is_zero_ether_addr(m->eth_dst)) >+ ether_addr_copy(eth_hdr(skb2)->h_dest, m->eth_dst); >+ > err = dev_queue_xmit(skb2); > > if (err) { >@@ -203,7 +219,8 @@ out: > return retval; > } > >-static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) >+static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, >+ int ref) > { > unsigned char *b = skb_tail_pointer(skb); > struct tcf_mirred *m = a->priv; >@@ -220,6 +237,11 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i > if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt)) > goto nla_put_failure; > >+ if (!is_zero_ether_addr(m->eth_dst)) { >+ if (nla_put(skb, TCA_MIRRED_DMAC, ETH_ALEN, m->eth_dst)) >+ goto nla_put_failure; >+ } >+ > tcf_tm_dump(&t, &m->tcf_tm); > if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD)) > goto nla_put_failure; >-- >1.9.1 >
From: Jamal Hadi Salim <jhs@mojatatu.com> Date: Sat, 2 Jul 2016 10:34:25 -0400 > From: Jamal Hadi Salim <jhs@mojatatu.com> > > Often redirecting or mirroring requires that we set the dstMAC address > of the target device. While it is possible to pipe to a pedit action, > this patch obsoletes the need for that. This is a justified feature because > the dst MAC addresses rewrite is such a common use case. > > Sample usage: > sudo $TC filter add dev $ETH parent 1: protocol ip prio 10 \ > u32 match ip protocol 1 0xff flowid 1:2 \ > action mirred egress redirect dev $SPANPORT dst 02:15:15:15:15:15 > > This will match all icmp packets going out on dev $ETH and > redirect them to dev $SPANPORT while setting their dst MAC address > to 02:15:15:15:15:15 > > Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com> I agree with Jiri. The whole model is to chain together the actions that you need to accomplish whatever it is you want to do. This means actions are as specialized as possible, and do one thing as precisely and efficiently as possible. It seems unnecessary to break this model and start to make swiss army knife actions. Every time we find some "common use case" will we turn yet another action into a Frankenstein thing that does more than it was designed to do? I don't want to apply this and start us down this road, seriously...
On 16-07-04 06:14 PM, David Miller wrote: > I agree with Jiri. > > The whole model is to chain together the actions that you need to > accomplish whatever it is you want to do. > > This means actions are as specialized as possible, and do one thing > as precisely and efficiently as possible. > > It seems unnecessary to break this model and start to make swiss army > knife actions. > > Every time we find some "common use case" will we turn yet another > action into a Frankenstein thing that does more than it was designed > to do? > I agree there is grey line and i too subscribe to the faith of small-actions granularity. But it is not exact dogma guys. Even grep ends up adding features over time;-> I am/was on the fence on submitting this patch. Here's the thought process that convinced my fingers to type the patch: When you have a switch between you and the destination target (in my case several that i have no control over) and when said switches learn the MAC addresses, then there is confusion when they start seeing the same MAC address showing up in conflicting ports. Second arguement usability, from: sudo $TC filter add dev $ETH parent ffff: pref 11 protocol ip u32 \ match ip protocol 1 0xff flowid 1:2 \ action pedit munge offset -14 u8 set 0x02 \ munge offset -13 u8 set 0x15 \ munge offset -12 u8 set 0x15 \ munge offset -11 u8 set 0x15 \ munge offset -10 u16 set 0x1515 \ pipe \ action mirred egress redirect dev $SPANPORT to: $TC filter add dev $ETH parent 1: protocol ip prio 10 \ u32 match ip protocol 1 0xff flowid 1:2 \ action mirred egress redirect dev $SPANPORT dst 02:15:15:15:15:15 given that I have to do this many many times in scripts and the second policy is better eye candy. And last but not least: there is a small performance improvement because i have to execute less instructions. Choices are ugly and slow vs fast and nicer;-> Of course what i am doing makes it more complex to offload with existing ASICs. But it is an optional feature so it could be worked around; we need to start dealing with capabilities and reject offloads for certain policies. Alternative: introduce another action, I'll call it skbmod (like skbedit) and anytime someone finds something they are prototyping with pedit is ugly they could migrate the modifier into skbmod. First user dstmac. So my policy would be: $TC filter add dev $ETH parent 1: protocol ip prio 10 \ u32 match ip protocol 1 0xff flowid 1:2 \ action skbmod dstmac 02:15:15:15:15:15 \ action mirred egress redirect dev $SPANPORT > I don't want to apply this and start us down this road, seriously... That cat is out of the bag, at least for tc bpf. An opaque blob of a single action could do whatever it wants. cheers, jamal
On Tue, Jul 5, 2016 at 4:04 AM, Jamal Hadi Salim <jhs@mojatatu.com> wrote: > Second arguement usability, from: > > sudo $TC filter add dev $ETH parent ffff: pref 11 protocol ip u32 \ > match ip protocol 1 0xff flowid 1:2 \ > action pedit munge offset -14 u8 set 0x02 \ > munge offset -13 u8 set 0x15 \ > munge offset -12 u8 set 0x15 \ > munge offset -11 u8 set 0x15 \ > munge offset -10 u16 set 0x1515 \ > pipe \ > action mirred egress redirect dev $SPANPORT > > to: > $TC filter add dev $ETH parent 1: protocol ip prio 10 \ > u32 match ip protocol 1 0xff flowid 1:2 \ > action mirred egress redirect dev $SPANPORT dst 02:15:15:15:15:15 > > given that I have to do this many many times in scripts and the > second policy is better eye candy. How about adding a "wrapper" in iproute2 pedit action to make it accept "dst mac xx:xx:xx:xx:xx:xx"? Like what we did for u32 filters.
On 16-07-05 02:19 PM, Cong Wang wrote: > On Tue, Jul 5, 2016 at 4:04 AM, Jamal Hadi Salim <jhs@mojatatu.com> wrote: >> Second arguement usability, from: >> to: >> $TC filter add dev $ETH parent 1: protocol ip prio 10 \ >> u32 match ip protocol 1 0xff flowid 1:2 \ >> action mirred egress redirect dev $SPANPORT dst 02:15:15:15:15:15 >> >> given that I have to do this many many times in scripts and the >> second policy is better eye candy. > > How about adding a "wrapper" in iproute2 pedit action to make > it accept "dst mac xx:xx:xx:xx:xx:xx"? Like what we did for u32 > filters. > Thats what we do today. Its fugly and debugging is nasty (having to stare at 10s or hundreds of outputs looking at offset and bytes when something goes wrong). It is ok to use pedit as a starting point because you can quickly craft things without any kernel changes. But when you start heavily using something handcrafted its usability starts affecting you. I will work on and send the skbmod patch next time i get cycles. cheers, jamal
diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index e891835..7e8bced 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -6,10 +6,12 @@ struct tcf_mirred { struct tcf_common common; + struct net_device __rcu *tcfm_dev; int tcfm_eaction; int tcfm_ifindex; int tcfm_ok_push; - struct net_device __rcu *tcfm_dev; + u8 eth_dst[ETH_ALEN]; + /* XXX 6 bytes hole here*/ struct list_head tcfm_list; }; #define to_mirred(a) \ diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h index 3d7a2b3..aaca1ff 100644 --- a/include/uapi/linux/tc_act/tc_mirred.h +++ b/include/uapi/linux/tc_act/tc_mirred.h @@ -9,20 +9,21 @@ #define TCA_EGRESS_MIRROR 2 /* mirror packet to EGRESS */ #define TCA_INGRESS_REDIR 3 /* packet redirect to INGRESS*/ #define TCA_INGRESS_MIRROR 4 /* mirror packet to INGRESS */ - + struct tc_mirred { tc_gen; int eaction; /* one of IN/EGRESS_MIRROR/REDIR */ __u32 ifindex; /* ifindex of egress port */ }; - + enum { TCA_MIRRED_UNSPEC, TCA_MIRRED_TM, TCA_MIRRED_PARMS, TCA_MIRRED_PAD, + TCA_MIRRED_DMAC, __TCA_MIRRED_MAX }; #define TCA_MIRRED_MAX (__TCA_MIRRED_MAX - 1) - + #endif diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 5b135d3..1cc14f5 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -49,6 +49,7 @@ static void tcf_mirred_release(struct tc_action *a, int bind) static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, + [TCA_MIRRED_DMAC] = { .len = ETH_ALEN}, }; static int mirred_net_id; @@ -64,15 +65,24 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct net_device *dev; int ret, ok_push = 0; bool exists = false; + u8 *daddr = NULL; if (nla == NULL) return -EINVAL; + ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy); if (ret < 0) return ret; + if (tb[TCA_MIRRED_PARMS] == NULL) return -EINVAL; + parm = nla_data(tb[TCA_MIRRED_PARMS]); + if (tb[TCA_MIRRED_DMAC]) { + daddr = nla_data(tb[TCA_MIRRED_DMAC]); + if (is_zero_ether_addr(daddr)) + return -EINVAL; + } exists = tcf_hash_check(tn, parm->index, a, bind); if (exists && bind) @@ -126,6 +136,9 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } m = to_mirred(a); + if (daddr) + ether_addr_copy(m->eth_dst, daddr); + ASSERT_RTNL(); m->tcf_action = parm->action; m->tcfm_eaction = parm->eaction; @@ -190,6 +203,9 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; + if (!is_zero_ether_addr(m->eth_dst)) + ether_addr_copy(eth_hdr(skb2)->h_dest, m->eth_dst); + err = dev_queue_xmit(skb2); if (err) { @@ -203,7 +219,8 @@ out: return retval; } -static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, + int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_mirred *m = a->priv; @@ -220,6 +237,11 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt)) goto nla_put_failure; + if (!is_zero_ether_addr(m->eth_dst)) { + if (nla_put(skb, TCA_MIRRED_DMAC, ETH_ALEN, m->eth_dst)) + goto nla_put_failure; + } + tcf_tm_dump(&t, &m->tcf_tm); if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD)) goto nla_put_failure;