Message ID | 1472647584-6713-5-git-send-email-hadarh@mellanox.com |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
Wed, Aug 31, 2016 at 02:46:24PM CEST, hadarh@mellanox.com wrote: >From: Amir Vadai <amir@vadai.me> > >This action could be used before redirecting packets to a shared tunnel >device, or when redirecting packets arriving from a such a device. > >The action will release the metadata created by the tunnel device >(decap), or set the metadata with the specified values for encap >operation. > >For example, the following flower filter will forward all ICMP packets >destined to 11.11.11.2 through the shared vxlan device 'vxlan0'. Before >redirecting, a metadata for the vxlan tunnel is created using the >tunnel_key action and it's arguments: > >$ filter add dev net0 protocol ip parent ffff: \ > flower \ > ip_proto 1 \ > dst_ip 11.11.11.2 \ > action tunnel_key set \ > src_ip 11.11.0.1 \ > dst_ip 11.11.0.2 \ > id 11 \ > action mirred egress redirect dev vxlan0 > >Signed-off-by: Amir Vadai <amir@vadai.me> >Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com> Looks fine to me Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Hi, On Wed, 31 Aug 2016 15:46:24 +0300 Hadar Hen Zion <hadarh@mellanox.com> wrote: > +static int tunnel_key_init(struct net *net, struct nlattr *nla, > + struct nlattr *est, struct tc_action **a, > + int ovr, int bind) > +{ > + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); > + struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; > + struct metadata_dst *metadata = NULL; > + struct tc_tunnel_key *parm; > + struct tcf_tunnel_key *t; > + struct tcf_tunnel_key_params *params_old; > + struct tcf_tunnel_key_params *params_new; > + __be64 key_id; > + bool exists = false; > + int ret = 0; > + int err; > + > + if (!nla) > + return -EINVAL; > + > + err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy); > + if (err < 0) > + return err; > + > + if (!tb[TCA_TUNNEL_KEY_PARMS]) > + return -EINVAL; > + > + parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]); > + exists = tcf_hash_check(tn, parm->index, a, bind); > + if (exists && bind) > + return 0; > + > + switch (parm->t_action) { > + case TCA_TUNNEL_KEY_ACT_RELEASE: > + break; > + case TCA_TUNNEL_KEY_ACT_SET: > + if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) { > + ret = -EINVAL; > + goto err_out; > + } > + > + key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID])); > + > + if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] && > + tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) { > + __be32 saddr; > + __be32 daddr; > + > + saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]); > + daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]); > + > + metadata = __ip_tun_set_dst(saddr, daddr, 0, 0, > + TUNNEL_KEY, key_id, 0); > + } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] && > + tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) { > + struct in6_addr saddr; > + struct in6_addr daddr; > + > + saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]); > + daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]); > + > + metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0, > + TUNNEL_KEY, key_id, 0); > + } > + > + if (!metadata) { > + ret = -EINVAL; > + goto err_out; > + } > + > + metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX; > + break; > + default: > + goto err_out; > + } > + > + if (!exists) { > + ret = tcf_hash_create(tn, parm->index, est, a, > + &act_tunnel_key_ops, bind, true); > + if (ret) > + return ret; > + > + ret = ACT_P_CREATED; > + } else { > + tcf_hash_release(*a, bind); > + if (!ovr) > + return -EEXIST; > + } > + > + t = to_tunnel_key(*a); > + > + ASSERT_RTNL(); > + params_new = kzalloc(sizeof(*params_new), > + GFP_KERNEL); nit: Fits oneline. Fix if patch needs other amendments. > + if (unlikely(!params_new)) { > + if (ovr) > + tcf_hash_release(*a, bind); > + return -ENOMEM; Seems we need to call tcf_hash_release regardless 'ovr': In case (!exist), we've created a new hash few lines above. Therefore in failure, don't we need a tcf_hash_release()? Am I missing something? > + } > + > + params_old = rtnl_dereference(t->params); > + > + t->tcf_action = parm->action; > + params_new->tcft_action = parm->t_action; > + params_new->tcft_enc_metadata = metadata; > + > + rcu_assign_pointer(t->params, params_new); > + > + if (params_old) > + kfree_rcu(params_old, rcu); > + > + if (ret == ACT_P_CREATED) > + tcf_hash_insert(tn, *a); > + > + return ret; > + > +err_out: > + if (exists) > + tcf_hash_release(*a, bind); > + return ret; > +} > + > +static void tunnel_key_release(struct tc_action *a, int bind) > +{ > + struct tcf_tunnel_key *t = to_tunnel_key(a); > + struct tcf_tunnel_key_params *params; > + > + rcu_read_lock(); > + params = rcu_dereference(t->params); > + > + if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) > + dst_release(¶ms->tcft_enc_metadata->dst); > + > + rcu_read_unlock(); Not an RCU expert, maybe I'm off... This alters params in some way (dst_release), so shouldn't it be considered an UPDATE, involving 'params' replacement? Current code declares it as an rcu read section. Thanks, Shmulik
On Wed, Aug 31, 2016 at 5:46 AM, Hadar Hen Zion <hadarh@mellanox.com> wrote: > > From: Amir Vadai <amir@vadai.me> > > This action could be used before redirecting packets to a shared tunnel > device, or when redirecting packets arriving from a such a device. > > > + > +struct tcf_tunnel_key_params { > + struct rcu_head rcu; > + int tcft_action; Also add " int action;" (see why later) > + struct metadata_dst *tcft_enc_metadata; > +}; > + > + > +static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, > + struct tcf_result *res) > +{ > + struct tcf_tunnel_key *t = to_tunnel_key(a); > + struct tcf_tunnel_key_params *params; > + int action; > + > + rcu_read_lock(); > + > + params = rcu_dereference(t->params); > + > + tcf_lastuse_update(&t->tcf_tm); > + bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb); > + action = t->tcf_action; Ideally, you should read param->action instead of t->tcf_action to be completely clean. > + > + switch (params->tcft_action) { > + case TCA_TUNNEL_KEY_ACT_RELEASE: > + skb_dst_drop(skb); > + break; > + case TCA_TUNNEL_KEY_ACT_SET: > + skb_dst_drop(skb); > + skb_dst_set(skb, dst_clone(¶ms->tcft_enc_metadata->dst)); > + break; > + default: > + WARN_ONCE(1, "Bad tunnel_key action.\n"); > + break; > + } > + > + rcu_read_unlock(); > + > + return action; > +} >
On Wed, Aug 31, 2016 at 9:39 PM, Eric Dumazet <edumazet@google.com> wrote: > On Wed, Aug 31, 2016 at 5:46 AM, Hadar Hen Zion <hadarh@mellanox.com> wrote: >> >> From: Amir Vadai <amir@vadai.me> >> >> This action could be used before redirecting packets to a shared tunnel >> device, or when redirecting packets arriving from a such a device. >> >> >> + >> +struct tcf_tunnel_key_params { >> + struct rcu_head rcu; >> + int tcft_action; > > Also add " int action;" > > (see why later) > >> + struct metadata_dst *tcft_enc_metadata; >> +}; >> + > > > >> + >> +static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, >> + struct tcf_result *res) >> +{ >> + struct tcf_tunnel_key *t = to_tunnel_key(a); >> + struct tcf_tunnel_key_params *params; >> + int action; >> + >> + rcu_read_lock(); >> + >> + params = rcu_dereference(t->params); >> + >> + tcf_lastuse_update(&t->tcf_tm); >> + bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb); >> + action = t->tcf_action; > > Ideally, you should read param->action instead of t->tcf_action to be > completely clean. As you suggested above, I can do it by adding "int action" to struct tcf_tunnel_key_paramse. But, it means that act_tunnel_key would have a different behavior than all the other actions and even though "struct tc_action" has a designated parameters to store this action we won't use it. So it won't be completely clean... Do you think we have a cleaner way to protect it? > >> + >> + switch (params->tcft_action) { >> + case TCA_TUNNEL_KEY_ACT_RELEASE: >> + skb_dst_drop(skb); >> + break; >> + case TCA_TUNNEL_KEY_ACT_SET: >> + skb_dst_drop(skb); >> + skb_dst_set(skb, dst_clone(¶ms->tcft_enc_metadata->dst)); >> + break; >> + default: >> + WARN_ONCE(1, "Bad tunnel_key action.\n"); >> + break; >> + } >> + >> + rcu_read_unlock(); >> + >> + return action; >> +} >>
On Wed, Aug 31, 2016 at 8:44 PM, Shmulik Ladkani <shmulik.ladkani@gmail.com> wrote: > Hi, > > On Wed, 31 Aug 2016 15:46:24 +0300 Hadar Hen Zion <hadarh@mellanox.com> wrote: >> +static int tunnel_key_init(struct net *net, struct nlattr *nla, >> + struct nlattr *est, struct tc_action **a, >> + int ovr, int bind) >> +{ >> + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); >> + struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; >> + struct metadata_dst *metadata = NULL; >> + struct tc_tunnel_key *parm; >> + struct tcf_tunnel_key *t; >> + struct tcf_tunnel_key_params *params_old; >> + struct tcf_tunnel_key_params *params_new; >> + __be64 key_id; >> + bool exists = false; >> + int ret = 0; >> + int err; >> + >> + if (!nla) >> + return -EINVAL; >> + >> + err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy); >> + if (err < 0) >> + return err; >> + >> + if (!tb[TCA_TUNNEL_KEY_PARMS]) >> + return -EINVAL; >> + >> + parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]); >> + exists = tcf_hash_check(tn, parm->index, a, bind); >> + if (exists && bind) >> + return 0; >> + >> + switch (parm->t_action) { >> + case TCA_TUNNEL_KEY_ACT_RELEASE: >> + break; >> + case TCA_TUNNEL_KEY_ACT_SET: >> + if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) { >> + ret = -EINVAL; >> + goto err_out; >> + } >> + >> + key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID])); >> + >> + if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] && >> + tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) { >> + __be32 saddr; >> + __be32 daddr; >> + >> + saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]); >> + daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]); >> + >> + metadata = __ip_tun_set_dst(saddr, daddr, 0, 0, >> + TUNNEL_KEY, key_id, 0); >> + } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] && >> + tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) { >> + struct in6_addr saddr; >> + struct in6_addr daddr; >> + >> + saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]); >> + daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]); >> + >> + metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0, >> + TUNNEL_KEY, key_id, 0); >> + } >> + >> + if (!metadata) { >> + ret = -EINVAL; >> + goto err_out; >> + } >> + >> + metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX; >> + break; >> + default: >> + goto err_out; >> + } >> + >> + if (!exists) { >> + ret = tcf_hash_create(tn, parm->index, est, a, >> + &act_tunnel_key_ops, bind, true); >> + if (ret) >> + return ret; >> + >> + ret = ACT_P_CREATED; >> + } else { >> + tcf_hash_release(*a, bind); >> + if (!ovr) >> + return -EEXIST; >> + } >> + >> + t = to_tunnel_key(*a); >> + >> + ASSERT_RTNL(); >> + params_new = kzalloc(sizeof(*params_new), >> + GFP_KERNEL); > > nit: Fits oneline. Fix if patch needs other amendments. Sure, will do. > >> + if (unlikely(!params_new)) { >> + if (ovr) >> + tcf_hash_release(*a, bind); >> + return -ENOMEM; > > Seems we need to call tcf_hash_release regardless 'ovr': > In case (!exist), we've created a new hash few lines above. > Therefore in failure, don't we need a tcf_hash_release()? > Am I missing something? You are right, "if (ovr)" line should be removed. > >> + } >> + >> + params_old = rtnl_dereference(t->params); >> + >> + t->tcf_action = parm->action; >> + params_new->tcft_action = parm->t_action; >> + params_new->tcft_enc_metadata = metadata; >> + >> + rcu_assign_pointer(t->params, params_new); >> + >> + if (params_old) >> + kfree_rcu(params_old, rcu); >> + >> + if (ret == ACT_P_CREATED) >> + tcf_hash_insert(tn, *a); >> + >> + return ret; >> + >> +err_out: >> + if (exists) >> + tcf_hash_release(*a, bind); >> + return ret; >> +} >> + >> +static void tunnel_key_release(struct tc_action *a, int bind) >> +{ >> + struct tcf_tunnel_key *t = to_tunnel_key(a); >> + struct tcf_tunnel_key_params *params; >> + >> + rcu_read_lock(); >> + params = rcu_dereference(t->params); >> + >> + if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) >> + dst_release(¶ms->tcft_enc_metadata->dst); >> + >> + rcu_read_unlock(); > > Not an RCU expert, maybe I'm off... > This alters params in some way (dst_release), so shouldn't it be > considered an UPDATE, involving 'params' replacement? > Current code declares it as an rcu read section. > dst_release function is using call_rcu to release the dst, so i think we are safe here. > Thanks, > Shmulik
On Thu, 2016-09-01 at 12:28 +0300, Hadar Hen Zion wrote: > > As you suggested above, I can do it by adding "int action" to struct > tcf_tunnel_key_paramse. > But, it means that act_tunnel_key would have a different behavior than > all the other actions and even though > "struct tc_action" has a designated parameters to store this action we > won't use it. > So it won't be completely clean... > > Do you think we have a cleaner way to protect it? Fact that the act_ modules had a spinlock made them all share the same structure. Now we want RCU protection, here is the thing. Say you want to access 3 different fields, A, B and C. If you put A and B in the rcu protected pointer, but leave C in the 'control part, protected by spinlock' Then your fast path wont be able to have a consistent view of 3 variables A, B C. It might read an old value of A & B, and the recently updated C, Or it might read an old C, and the updated values of A & B As Cong very kindly pointed to us/me, if we want to be 'clean', we want to make sure we read a consistent 3-tuple. I will send updates when I have time to act_mirred.c
On Thu, Sep 1, 2016 at 4:16 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > On Thu, 2016-09-01 at 12:28 +0300, Hadar Hen Zion wrote: > >> >> As you suggested above, I can do it by adding "int action" to struct >> tcf_tunnel_key_paramse. >> But, it means that act_tunnel_key would have a different behavior than >> all the other actions and even though >> "struct tc_action" has a designated parameters to store this action we >> won't use it. >> So it won't be completely clean... >> >> Do you think we have a cleaner way to protect it? > > Fact that the act_ modules had a spinlock made them all share the same > structure. > > Now we want RCU protection, here is the thing. > > Say you want to access 3 different fields, A, B and C. > > If you put A and B in the rcu protected pointer, but leave C in the > 'control part, protected by spinlock' > > Then your fast path wont be able to have a consistent view of 3 > variables A, B C. > > It might read an old value of A & B, and the recently updated C, > > Or it might read an old C, and the updated values of A & B Yes, agree. I'll add 'action' to struct tcf_tunnel_key_params. Thanks, Hadar > > As Cong very kindly pointed to us/me, if we want to be 'clean', we want > to make sure we read a consistent 3-tuple. > > I will send updates when I have time to act_mirred.c > >
On Thu, 1 Sep 2016 14:59:28 +0300 Hadar Hen Zion <hadarh@dev.mellanox.co.il> wrote: > > Seems we need to call tcf_hash_release regardless 'ovr': > > In case (!exist), we've created a new hash few lines above. > > Therefore in failure, don't we need a tcf_hash_release()? > > Am I missing something? > > You are right, "if (ovr)" line should be removed. Looking at it again, seems the right condition should be (pls verify): if (ret == ACT_P_CREATED) tcf_hash_release(*a, bind); Thanks, Shmulik
diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h new file mode 100644 index 0000000..8610504 --- /dev/null +++ b/include/net/tc_act/tc_tunnel_key.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2016, Amir Vadai <amir@vadai.me> + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __NET_TC_TUNNEL_KEY_H +#define __NET_TC_TUNNEL_KEY_H + +#include <net/act_api.h> + +struct tcf_tunnel_key_params { + struct rcu_head rcu; + int tcft_action; + struct metadata_dst *tcft_enc_metadata; +}; + +struct tcf_tunnel_key { + struct tc_action common; + struct tcf_tunnel_key_params *params; +}; + +#define to_tunnel_key(a) ((struct tcf_tunnel_key *)a) + +#endif /* __NET_TC_TUNNEL_KEY_H */ + diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h new file mode 100644 index 0000000..f9ddf53 --- /dev/null +++ b/include/uapi/linux/tc_act/tc_tunnel_key.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016, Amir Vadai <amir@vadai.me> + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_TC_TUNNEL_KEY_H +#define __LINUX_TC_TUNNEL_KEY_H + +#include <linux/pkt_cls.h> + +#define TCA_ACT_TUNNEL_KEY 17 + +#define TCA_TUNNEL_KEY_ACT_SET 1 +#define TCA_TUNNEL_KEY_ACT_RELEASE 2 + +struct tc_tunnel_key { + tc_gen; + int t_action; +}; + +enum { + TCA_TUNNEL_KEY_UNSPEC, + TCA_TUNNEL_KEY_TM, + TCA_TUNNEL_KEY_PARMS, + TCA_TUNNEL_KEY_ENC_IPV4_SRC, /* be32 */ + TCA_TUNNEL_KEY_ENC_IPV4_DST, /* be32 */ + TCA_TUNNEL_KEY_ENC_IPV6_SRC, /* struct in6_addr */ + TCA_TUNNEL_KEY_ENC_IPV6_DST, /* struct in6_addr */ + TCA_TUNNEL_KEY_ENC_KEY_ID, /* be64 */ + TCA_TUNNEL_KEY_PAD, + __TCA_TUNNEL_KEY_MAX, +}; + +#define TCA_TUNNEL_KEY_MAX (__TCA_TUNNEL_KEY_MAX - 1) + +#endif + diff --git a/net/sched/Kconfig b/net/sched/Kconfig index ccf931b..72e3426 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -761,6 +761,17 @@ config NET_ACT_IFE To compile this code as a module, choose M here: the module will be called act_ife. +config NET_ACT_TUNNEL_KEY + tristate "IP tunnel metadata manipulation" + depends on NET_CLS_ACT + ---help--- + Say Y here to set/release ip tunnel metadata. + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_tunnel_key. + config NET_IFE_SKBMARK tristate "Support to encoding decoding skb mark on IFE action" depends on NET_ACT_IFE diff --git a/net/sched/Makefile b/net/sched/Makefile index ae088a5..b9d046b 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o obj-$(CONFIG_NET_ACT_IFE) += act_ife.o obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o +obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c new file mode 100644 index 0000000..62c4202 --- /dev/null +++ b/net/sched/act_tunnel_key.c @@ -0,0 +1,349 @@ +/* + * Copyright (c) 2016, Amir Vadai <amir@vadai.me> + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> +#include <net/dst.h> +#include <net/dst_metadata.h> + +#include <linux/tc_act/tc_tunnel_key.h> +#include <net/tc_act/tc_tunnel_key.h> + +#define TUNNEL_KEY_TAB_MASK 15 + +static int tunnel_key_net_id; +static struct tc_action_ops act_tunnel_key_ops; + +static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + int action; + + rcu_read_lock(); + + params = rcu_dereference(t->params); + + tcf_lastuse_update(&t->tcf_tm); + bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb); + action = t->tcf_action; + + switch (params->tcft_action) { + case TCA_TUNNEL_KEY_ACT_RELEASE: + skb_dst_drop(skb); + break; + case TCA_TUNNEL_KEY_ACT_SET: + skb_dst_drop(skb); + skb_dst_set(skb, dst_clone(¶ms->tcft_enc_metadata->dst)); + break; + default: + WARN_ONCE(1, "Bad tunnel_key action.\n"); + break; + } + + rcu_read_unlock(); + + return action; +} + +static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { + [TCA_TUNNEL_KEY_PARMS] = { .len = sizeof(struct tc_tunnel_key) }, + [TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 }, + [TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NLA_U32 }, + [TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, + [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, + [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 }, +}; + +static int tunnel_key_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, + int ovr, int bind) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; + struct metadata_dst *metadata = NULL; + struct tc_tunnel_key *parm; + struct tcf_tunnel_key *t; + struct tcf_tunnel_key_params *params_old; + struct tcf_tunnel_key_params *params_new; + __be64 key_id; + bool exists = false; + int ret = 0; + int err; + + if (!nla) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy); + if (err < 0) + return err; + + if (!tb[TCA_TUNNEL_KEY_PARMS]) + return -EINVAL; + + parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]); + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + switch (parm->t_action) { + case TCA_TUNNEL_KEY_ACT_RELEASE: + break; + case TCA_TUNNEL_KEY_ACT_SET: + if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) { + ret = -EINVAL; + goto err_out; + } + + key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID])); + + if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] && + tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) { + __be32 saddr; + __be32 daddr; + + saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]); + daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]); + + metadata = __ip_tun_set_dst(saddr, daddr, 0, 0, + TUNNEL_KEY, key_id, 0); + } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] && + tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) { + struct in6_addr saddr; + struct in6_addr daddr; + + saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]); + daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]); + + metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0, + TUNNEL_KEY, key_id, 0); + } + + if (!metadata) { + ret = -EINVAL; + goto err_out; + } + + metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX; + break; + default: + goto err_out; + } + + if (!exists) { + ret = tcf_hash_create(tn, parm->index, est, a, + &act_tunnel_key_ops, bind, true); + if (ret) + return ret; + + ret = ACT_P_CREATED; + } else { + tcf_hash_release(*a, bind); + if (!ovr) + return -EEXIST; + } + + t = to_tunnel_key(*a); + + ASSERT_RTNL(); + params_new = kzalloc(sizeof(*params_new), + GFP_KERNEL); + if (unlikely(!params_new)) { + if (ovr) + tcf_hash_release(*a, bind); + return -ENOMEM; + } + + params_old = rtnl_dereference(t->params); + + t->tcf_action = parm->action; + params_new->tcft_action = parm->t_action; + params_new->tcft_enc_metadata = metadata; + + rcu_assign_pointer(t->params, params_new); + + if (params_old) + kfree_rcu(params_old, rcu); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(tn, *a); + + return ret; + +err_out: + if (exists) + tcf_hash_release(*a, bind); + return ret; +} + +static void tunnel_key_release(struct tc_action *a, int bind) +{ + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + + rcu_read_lock(); + params = rcu_dereference(t->params); + + if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) + dst_release(¶ms->tcft_enc_metadata->dst); + + rcu_read_unlock(); +} + +static int tunnel_key_dump_addresses(struct sk_buff *skb, + const struct ip_tunnel_info *info) +{ + unsigned short family = ip_tunnel_info_af(info); + + if (family == AF_INET) { + __be32 saddr = info->key.u.ipv4.src; + __be32 daddr = info->key.u.ipv4.dst; + + if (!nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_SRC, saddr) && + !nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_DST, daddr)) + return 0; + } + + if (family == AF_INET6) { + const struct in6_addr *saddr6 = &info->key.u.ipv6.src; + const struct in6_addr *daddr6 = &info->key.u.ipv6.dst; + + if (!nla_put_in6_addr(skb, + TCA_TUNNEL_KEY_ENC_IPV6_SRC, saddr6) && + !nla_put_in6_addr(skb, + TCA_TUNNEL_KEY_ENC_IPV6_DST, daddr6)) + return 0; + } + + return -EINVAL; +} + +static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + struct tc_tunnel_key opt = { + .index = t->tcf_index, + .refcnt = t->tcf_refcnt - ref, + .bindcnt = t->tcf_bindcnt - bind, + .action = t->tcf_action, + }; + struct tcf_t tm; + int ret = -1; + + rcu_read_lock(); + params = rcu_dereference(t->params); + + opt.t_action = params->tcft_action; + + if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) { + struct ip_tunnel_key *key = + ¶ms->tcft_enc_metadata->u.tun_info.key; + __be32 key_id = tunnel_id_to_key32(key->tun_id); + + if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) || + tunnel_key_dump_addresses(skb, + ¶ms->tcft_enc_metadata->u.tun_info)) + goto nla_put_failure; + } + + tcf_tm_dump(&tm, &t->tcf_tm); + if (nla_put_64bit(skb, TCA_TUNNEL_KEY_TM, sizeof(tm), + &tm, TCA_TUNNEL_KEY_PAD)) + goto nla_put_failure; + + ret = skb->len; + goto out; + +nla_put_failure: + nlmsg_trim(skb, b); +out: + rcu_read_unlock(); + + return ret; +} + +static int tunnel_key_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tcf_generic_walker(tn, skb, cb, type, ops); +} + +static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tcf_hash_search(tn, a, index); +} + +static struct tc_action_ops act_tunnel_key_ops = { + .kind = "tunnel_key", + .type = TCA_ACT_TUNNEL_KEY, + .owner = THIS_MODULE, + .act = tunnel_key_act, + .dump = tunnel_key_dump, + .init = tunnel_key_init, + .cleanup = tunnel_key_release, + .walk = tunnel_key_walker, + .lookup = tunnel_key_search, + .size = sizeof(struct tcf_tunnel_key), +}; + +static __net_init int tunnel_key_init_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK); +} + +static void __net_exit tunnel_key_exit_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + tc_action_net_exit(tn); +} + +static struct pernet_operations tunnel_key_net_ops = { + .init = tunnel_key_init_net, + .exit = tunnel_key_exit_net, + .id = &tunnel_key_net_id, + .size = sizeof(struct tc_action_net), +}; + +static int __init tunnel_key_init_module(void) +{ + return tcf_register_action(&act_tunnel_key_ops, &tunnel_key_net_ops); +} + +static void __exit tunnel_key_cleanup_module(void) +{ + tcf_unregister_action(&act_tunnel_key_ops, &tunnel_key_net_ops); +} + +module_init(tunnel_key_init_module); +module_exit(tunnel_key_cleanup_module); + +MODULE_AUTHOR("Amir Vadai <amir@vadai.me>"); +MODULE_DESCRIPTION("ip tunnel manipulation actions"); +MODULE_LICENSE("GPL v2");