diff mbox

[v2,net-next,1/1] net sched actions: mirred add support for setting Dst MAC address

Message ID 1467470065-17613-1-git-send-email-jhs@emojatatu.com
State Rejected, archived
Delegated to: David Miller
Headers show

Commit Message

Jamal Hadi Salim July 2, 2016, 2:34 p.m. UTC
From: Jamal Hadi Salim <jhs@mojatatu.com>

Often redirecting or mirroring requires that we set the dstMAC address
of the target device. While it is possible to pipe to a pedit action,
this patch obsoletes the need for that. This is a justified feature because
the dst MAC addresses rewrite is such a common use case.

Sample usage:
sudo $TC filter add dev $ETH parent 1: protocol ip prio 10 \
u32 match ip protocol 1 0xff flowid 1:2 \
action mirred egress redirect dev $SPANPORT dst 02:15:15:15:15:15

This will match all icmp packets going out on dev $ETH and
redirect them to dev $SPANPORT while setting their dst MAC address
to 02:15:15:15:15:15

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 include/net/tc_act/tc_mirred.h        |  4 +++-
 include/uapi/linux/tc_act/tc_mirred.h |  7 ++++---
 net/sched/act_mirred.c                | 24 +++++++++++++++++++++++-
 3 files changed, 30 insertions(+), 5 deletions(-)

Comments

Nikolay Aleksandrov July 2, 2016, 3:16 p.m. UTC | #1
On 02/07/16 16:34, Jamal Hadi Salim wrote:
> From: Jamal Hadi Salim <jhs@mojatatu.com>
> 
> Often redirecting or mirroring requires that we set the dstMAC address
> of the target device. While it is possible to pipe to a pedit action,
> this patch obsoletes the need for that. This is a justified feature because
> the dst MAC addresses rewrite is such a common use case.
> 
> Sample usage:
> sudo $TC filter add dev $ETH parent 1: protocol ip prio 10 \
> u32 match ip protocol 1 0xff flowid 1:2 \
> action mirred egress redirect dev $SPANPORT dst 02:15:15:15:15:15
> 
> This will match all icmp packets going out on dev $ETH and
> redirect them to dev $SPANPORT while setting their dst MAC address
> to 02:15:15:15:15:15
> 
> Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
> ---
>  include/net/tc_act/tc_mirred.h        |  4 +++-
>  include/uapi/linux/tc_act/tc_mirred.h |  7 ++++---
>  net/sched/act_mirred.c                | 24 +++++++++++++++++++++++-
>  3 files changed, 30 insertions(+), 5 deletions(-)
> 

LGTM, thanks!

Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Cong Wang July 2, 2016, 4:58 p.m. UTC | #2
On Sat, Jul 2, 2016 at 7:34 AM, Jamal Hadi Salim <jhs@mojatatu.com> wrote:
> From: Jamal Hadi Salim <jhs@mojatatu.com>
>
> Often redirecting or mirroring requires that we set the dstMAC address
> of the target device. While it is possible to pipe to a pedit action,
> this patch obsoletes the need for that. This is a justified feature because
> the dst MAC addresses rewrite is such a common use case.

Maybe, instead of allowing to set arbitrary MAC address, how about
just allow setting to the MAC address of the target device?
If so we only need a boolean flag for user-space.

IOW, do we really need such flexibility?
Jamal Hadi Salim July 2, 2016, 9:38 p.m. UTC | #3
On 16-07-02 12:58 PM, Cong Wang wrote:
> On Sat, Jul 2, 2016 at 7:34 AM, Jamal Hadi Salim <jhs@mojatatu.com> wrote:
>> From: Jamal Hadi Salim <jhs@mojatatu.com>
>>
>> Often redirecting or mirroring requires that we set the dstMAC address
>> of the target device. While it is possible to pipe to a pedit action,
>> this patch obsoletes the need for that. This is a justified feature because
>> the dst MAC addresses rewrite is such a common use case.
>
> Maybe, instead of allowing to set arbitrary MAC address, how about
> just allow setting to the MAC address of the target device?

You cant discover the remote MAC address; I have a wire
connected between two machines. I set it to the MAC address
of the remote machine i know of. And there are times I really
want to set it to some arbitrary values (although that is more
test mode than production).

cheers,
jamal

> If so we only need a boolean flag for user-space.
>
> IOW, do we really need such flexibility?
>
Jiri Pirko July 4, 2016, 3:25 p.m. UTC | #4
Sat, Jul 02, 2016 at 04:34:25PM CEST, jhs@mojatatu.com wrote:
>From: Jamal Hadi Salim <jhs@mojatatu.com>
>
>Often redirecting or mirroring requires that we set the dstMAC address
>of the target device. While it is possible to pipe to a pedit action,
>this patch obsoletes the need for that. This is a justified feature because
>the dst MAC addresses rewrite is such a common use case.

We can't we have set of actions each doing its own job? I don't like
merging functionality into actions :(


>
>Sample usage:
>sudo $TC filter add dev $ETH parent 1: protocol ip prio 10 \
>u32 match ip protocol 1 0xff flowid 1:2 \
>action mirred egress redirect dev $SPANPORT dst 02:15:15:15:15:15
>
>This will match all icmp packets going out on dev $ETH and
>redirect them to dev $SPANPORT while setting their dst MAC address
>to 02:15:15:15:15:15
>
>Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
>---
> include/net/tc_act/tc_mirred.h        |  4 +++-
> include/uapi/linux/tc_act/tc_mirred.h |  7 ++++---
> net/sched/act_mirred.c                | 24 +++++++++++++++++++++++-
> 3 files changed, 30 insertions(+), 5 deletions(-)
>
>diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
>index e891835..7e8bced 100644
>--- a/include/net/tc_act/tc_mirred.h
>+++ b/include/net/tc_act/tc_mirred.h
>@@ -6,10 +6,12 @@
> 
> struct tcf_mirred {
> 	struct tcf_common	common;
>+	struct net_device __rcu	*tcfm_dev;
> 	int			tcfm_eaction;
> 	int			tcfm_ifindex;
> 	int			tcfm_ok_push;
>-	struct net_device __rcu	*tcfm_dev;
>+	u8			eth_dst[ETH_ALEN];
>+	/* XXX 6 bytes hole here*/
> 	struct list_head	tcfm_list;
> };
> #define to_mirred(a) \
>diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h
>index 3d7a2b3..aaca1ff 100644
>--- a/include/uapi/linux/tc_act/tc_mirred.h
>+++ b/include/uapi/linux/tc_act/tc_mirred.h
>@@ -9,20 +9,21 @@
> #define TCA_EGRESS_MIRROR 2 /* mirror packet to EGRESS */
> #define TCA_INGRESS_REDIR 3  /* packet redirect to INGRESS*/
> #define TCA_INGRESS_MIRROR 4 /* mirror packet to INGRESS */
>-                                                                                
>+
> struct tc_mirred {
> 	tc_gen;
> 	int                     eaction;   /* one of IN/EGRESS_MIRROR/REDIR */
> 	__u32                   ifindex;  /* ifindex of egress port */
> };
>-                                                                                
>+
> enum {
> 	TCA_MIRRED_UNSPEC,
> 	TCA_MIRRED_TM,
> 	TCA_MIRRED_PARMS,
> 	TCA_MIRRED_PAD,
>+	TCA_MIRRED_DMAC,
> 	__TCA_MIRRED_MAX
> };
> #define TCA_MIRRED_MAX (__TCA_MIRRED_MAX - 1)
>-                                                                                
>+
> #endif
>diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
>index 5b135d3..1cc14f5 100644
>--- a/net/sched/act_mirred.c
>+++ b/net/sched/act_mirred.c
>@@ -49,6 +49,7 @@ static void tcf_mirred_release(struct tc_action *a, int bind)
> 
> static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
> 	[TCA_MIRRED_PARMS]	= { .len = sizeof(struct tc_mirred) },
>+	[TCA_MIRRED_DMAC] = { .len = ETH_ALEN},
> };
> 
> static int mirred_net_id;
>@@ -64,15 +65,24 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
> 	struct net_device *dev;
> 	int ret, ok_push = 0;
> 	bool exists = false;
>+	u8 *daddr = NULL;
> 
> 	if (nla == NULL)
> 		return -EINVAL;
>+
> 	ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy);
> 	if (ret < 0)
> 		return ret;
>+
> 	if (tb[TCA_MIRRED_PARMS] == NULL)
> 		return -EINVAL;
>+
> 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
>+	if (tb[TCA_MIRRED_DMAC]) {
>+		daddr = nla_data(tb[TCA_MIRRED_DMAC]);
>+		if (is_zero_ether_addr(daddr))
>+			return -EINVAL;
>+	}
> 
> 	exists = tcf_hash_check(tn, parm->index, a, bind);
> 	if (exists && bind)
>@@ -126,6 +136,9 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
> 	}
> 	m = to_mirred(a);
> 
>+	if (daddr)
>+		ether_addr_copy(m->eth_dst, daddr);
>+
> 	ASSERT_RTNL();
> 	m->tcf_action = parm->action;
> 	m->tcfm_eaction = parm->eaction;
>@@ -190,6 +203,9 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
> 
> 	skb2->skb_iif = skb->dev->ifindex;
> 	skb2->dev = dev;
>+	if (!is_zero_ether_addr(m->eth_dst))
>+		ether_addr_copy(eth_hdr(skb2)->h_dest, m->eth_dst);
>+
> 	err = dev_queue_xmit(skb2);
> 
> 	if (err) {
>@@ -203,7 +219,8 @@ out:
> 	return retval;
> }
> 
>-static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
>+static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
>+			   int ref)
> {
> 	unsigned char *b = skb_tail_pointer(skb);
> 	struct tcf_mirred *m = a->priv;
>@@ -220,6 +237,11 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
> 	if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt))
> 		goto nla_put_failure;
> 
>+	if (!is_zero_ether_addr(m->eth_dst)) {
>+		if (nla_put(skb, TCA_MIRRED_DMAC, ETH_ALEN, m->eth_dst))
>+			goto nla_put_failure;
>+	}
>+
> 	tcf_tm_dump(&t, &m->tcf_tm);
> 	if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD))
> 		goto nla_put_failure;
>-- 
>1.9.1
>
David Miller July 4, 2016, 10:14 p.m. UTC | #5
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Sat,  2 Jul 2016 10:34:25 -0400

> From: Jamal Hadi Salim <jhs@mojatatu.com>
> 
> Often redirecting or mirroring requires that we set the dstMAC address
> of the target device. While it is possible to pipe to a pedit action,
> this patch obsoletes the need for that. This is a justified feature because
> the dst MAC addresses rewrite is such a common use case.
> 
> Sample usage:
> sudo $TC filter add dev $ETH parent 1: protocol ip prio 10 \
> u32 match ip protocol 1 0xff flowid 1:2 \
> action mirred egress redirect dev $SPANPORT dst 02:15:15:15:15:15
> 
> This will match all icmp packets going out on dev $ETH and
> redirect them to dev $SPANPORT while setting their dst MAC address
> to 02:15:15:15:15:15
> 
> Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>

I agree with Jiri.

The whole model is to chain together the actions that you need to
accomplish whatever it is you want to do.

This means actions are as specialized as possible, and do one thing
as precisely and efficiently as possible.

It seems unnecessary to break this model and start to make swiss army
knife actions.

Every time we find some "common use case" will we turn yet another
action into a Frankenstein thing that does more than it was designed
to do?

I don't want to apply this and start us down this road, seriously...
Jamal Hadi Salim July 5, 2016, 11:04 a.m. UTC | #6
On 16-07-04 06:14 PM, David Miller wrote:

> I agree with Jiri.
>
> The whole model is to chain together the actions that you need to
> accomplish whatever it is you want to do.
>
> This means actions are as specialized as possible, and do one thing
> as precisely and efficiently as possible.
>
> It seems unnecessary to break this model and start to make swiss army
> knife actions.
>
> Every time we find some "common use case" will we turn yet another
> action into a Frankenstein thing that does more than it was designed
> to do?
>

I agree there is grey line and i too subscribe to the faith of
small-actions granularity. But it is not exact dogma guys.
Even grep ends up adding features over time;->
I am/was on the fence on submitting this patch. Here's the
thought process that convinced my fingers to type the patch:

When you have a switch between you and the destination target (in my
case several that i have no control over) and when said switches
learn the MAC addresses, then there is confusion when they start
seeing the same MAC address showing up in conflicting ports.

Second arguement  usability, from:

sudo $TC filter add dev $ETH parent ffff: pref 11 protocol ip u32 \
match ip protocol 1 0xff flowid 1:2 \
action pedit munge offset -14 u8 set 0x02 \
         munge offset -13 u8 set 0x15 \
         munge offset -12 u8 set 0x15 \
         munge offset -11 u8 set 0x15 \
         munge offset -10 u16 set 0x1515 \
         pipe \
action mirred egress redirect dev $SPANPORT

to:
$TC filter add dev $ETH parent 1: protocol ip prio 10 \
     u32 match ip protocol 1 0xff flowid 1:2 \
     action mirred egress redirect dev $SPANPORT dst 02:15:15:15:15:15

given that I have to do this many many times in scripts and the
second policy is better eye candy.

And last but not least: there is a small performance improvement
because i have to execute less instructions.

Choices are ugly and slow vs fast and nicer;->

Of course what i am doing makes it more complex to offload with
existing ASICs. But it is an optional feature so it could be
worked around; we need to start dealing with capabilities and
reject offloads for certain policies.

Alternative: introduce another action, I'll call it skbmod
(like skbedit) and anytime someone finds something they are
prototyping with pedit is ugly they could migrate the modifier
into skbmod. First user dstmac.
So my policy would be:

$TC filter add dev $ETH parent 1: protocol ip prio 10 \
u32 match ip protocol 1 0xff flowid 1:2 \
action skbmod dstmac 02:15:15:15:15:15 \
action mirred egress redirect dev $SPANPORT

> I don't want to apply this and start us down this road, seriously...

That cat is out of the bag, at least for tc bpf. An opaque blob
of a single action could do whatever it wants.

cheers,
jamal
Cong Wang July 5, 2016, 6:19 p.m. UTC | #7
On Tue, Jul 5, 2016 at 4:04 AM, Jamal Hadi Salim <jhs@mojatatu.com> wrote:
> Second arguement  usability, from:
>
> sudo $TC filter add dev $ETH parent ffff: pref 11 protocol ip u32 \
> match ip protocol 1 0xff flowid 1:2 \
> action pedit munge offset -14 u8 set 0x02 \
>         munge offset -13 u8 set 0x15 \
>         munge offset -12 u8 set 0x15 \
>         munge offset -11 u8 set 0x15 \
>         munge offset -10 u16 set 0x1515 \
>         pipe \
> action mirred egress redirect dev $SPANPORT
>
> to:
> $TC filter add dev $ETH parent 1: protocol ip prio 10 \
>     u32 match ip protocol 1 0xff flowid 1:2 \
>     action mirred egress redirect dev $SPANPORT dst 02:15:15:15:15:15
>
> given that I have to do this many many times in scripts and the
> second policy is better eye candy.

How about adding a "wrapper" in iproute2 pedit action to make
it accept "dst mac xx:xx:xx:xx:xx:xx"? Like what we did for u32
filters.
Jamal Hadi Salim July 6, 2016, 10:02 a.m. UTC | #8
On 16-07-05 02:19 PM, Cong Wang wrote:
> On Tue, Jul 5, 2016 at 4:04 AM, Jamal Hadi Salim <jhs@mojatatu.com> wrote:
>> Second arguement  usability, from:

>> to:
>> $TC filter add dev $ETH parent 1: protocol ip prio 10 \
>>      u32 match ip protocol 1 0xff flowid 1:2 \
>>      action mirred egress redirect dev $SPANPORT dst 02:15:15:15:15:15
>>
>> given that I have to do this many many times in scripts and the
>> second policy is better eye candy.
>
> How about adding a "wrapper" in iproute2 pedit action to make
> it accept "dst mac xx:xx:xx:xx:xx:xx"? Like what we did for u32
> filters.
>

Thats what we do today. Its fugly and debugging is nasty
(having to stare at 10s or hundreds of outputs looking at
offset and bytes when something goes wrong).
It is ok to use pedit as a starting point because you can
quickly craft things without any kernel changes.
But when you start heavily using something handcrafted
its usability starts affecting you.
I will work on and send the skbmod patch next time i
get cycles.

cheers,
jamal
diff mbox

Patch

diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index e891835..7e8bced 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -6,10 +6,12 @@ 
 
 struct tcf_mirred {
 	struct tcf_common	common;
+	struct net_device __rcu	*tcfm_dev;
 	int			tcfm_eaction;
 	int			tcfm_ifindex;
 	int			tcfm_ok_push;
-	struct net_device __rcu	*tcfm_dev;
+	u8			eth_dst[ETH_ALEN];
+	/* XXX 6 bytes hole here*/
 	struct list_head	tcfm_list;
 };
 #define to_mirred(a) \
diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h
index 3d7a2b3..aaca1ff 100644
--- a/include/uapi/linux/tc_act/tc_mirred.h
+++ b/include/uapi/linux/tc_act/tc_mirred.h
@@ -9,20 +9,21 @@ 
 #define TCA_EGRESS_MIRROR 2 /* mirror packet to EGRESS */
 #define TCA_INGRESS_REDIR 3  /* packet redirect to INGRESS*/
 #define TCA_INGRESS_MIRROR 4 /* mirror packet to INGRESS */
-                                                                                
+
 struct tc_mirred {
 	tc_gen;
 	int                     eaction;   /* one of IN/EGRESS_MIRROR/REDIR */
 	__u32                   ifindex;  /* ifindex of egress port */
 };
-                                                                                
+
 enum {
 	TCA_MIRRED_UNSPEC,
 	TCA_MIRRED_TM,
 	TCA_MIRRED_PARMS,
 	TCA_MIRRED_PAD,
+	TCA_MIRRED_DMAC,
 	__TCA_MIRRED_MAX
 };
 #define TCA_MIRRED_MAX (__TCA_MIRRED_MAX - 1)
-                                                                                
+
 #endif
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 5b135d3..1cc14f5 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -49,6 +49,7 @@  static void tcf_mirred_release(struct tc_action *a, int bind)
 
 static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
 	[TCA_MIRRED_PARMS]	= { .len = sizeof(struct tc_mirred) },
+	[TCA_MIRRED_DMAC] = { .len = ETH_ALEN},
 };
 
 static int mirred_net_id;
@@ -64,15 +65,24 @@  static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	struct net_device *dev;
 	int ret, ok_push = 0;
 	bool exists = false;
+	u8 *daddr = NULL;
 
 	if (nla == NULL)
 		return -EINVAL;
+
 	ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy);
 	if (ret < 0)
 		return ret;
+
 	if (tb[TCA_MIRRED_PARMS] == NULL)
 		return -EINVAL;
+
 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
+	if (tb[TCA_MIRRED_DMAC]) {
+		daddr = nla_data(tb[TCA_MIRRED_DMAC]);
+		if (is_zero_ether_addr(daddr))
+			return -EINVAL;
+	}
 
 	exists = tcf_hash_check(tn, parm->index, a, bind);
 	if (exists && bind)
@@ -126,6 +136,9 @@  static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	}
 	m = to_mirred(a);
 
+	if (daddr)
+		ether_addr_copy(m->eth_dst, daddr);
+
 	ASSERT_RTNL();
 	m->tcf_action = parm->action;
 	m->tcfm_eaction = parm->eaction;
@@ -190,6 +203,9 @@  static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 
 	skb2->skb_iif = skb->dev->ifindex;
 	skb2->dev = dev;
+	if (!is_zero_ether_addr(m->eth_dst))
+		ether_addr_copy(eth_hdr(skb2)->h_dest, m->eth_dst);
+
 	err = dev_queue_xmit(skb2);
 
 	if (err) {
@@ -203,7 +219,8 @@  out:
 	return retval;
 }
 
-static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
+			   int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_mirred *m = a->priv;
@@ -220,6 +237,11 @@  static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
 	if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 
+	if (!is_zero_ether_addr(m->eth_dst)) {
+		if (nla_put(skb, TCA_MIRRED_DMAC, ETH_ALEN, m->eth_dst))
+			goto nla_put_failure;
+	}
+
 	tcf_tm_dump(&t, &m->tcf_tm);
 	if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD))
 		goto nla_put_failure;