Message ID | 1588631301-21564-2-git-send-email-roopa@cumulusnetworks.com |
---|---|
State | RFC |
Delegated to: | David Miller |
Headers | show |
Series | Support for fdb ECMP nexthop groups | expand |
On Mon, May 4, 2020 at 3:28 PM Roopa Prabhu <roopa@cumulusnetworks.com> wrote: > > From: Roopa Prabhu <roopa@cumulusnetworks.com> > > This patch introduces ecmp nexthops and nexthop groups > for mac fdb entries. In subsequent patches this is used > by the vxlan driver fdb entries. The use case is > E-VPN multihoming [1,2,3] which requires bridged vxlan traffic > to be load balanced to remote switches (vteps) belonging to > the same multi-homed ethernet segment (This is analogous to > a multi-homed LAG but over vxlan). > > Changes include new nexthop flag NHA_FDB for nexthops > referenced by fdb entries. These nexthops only have ip. > This patch includes appropriate checks to avoid routes > referencing such nexthops. > > example: > $ip nexthop add id 12 via 172.16.1.2 fdb > $ip nexthop add id 13 via 172.16.1.3 fdb > $ip nexthop add id 102 group 12/13 fdb > > $bridge fdb add 02:02:00:00:00:13 dev vxlan1000 nhid 101 self > > [1] E-VPN https://tools.ietf.org/html/rfc7432 > [2] E-VPN VxLAN: https://tools.ietf.org/html/rfc8365 > [3] LPC talk with mention of nexthop groups for L2 ecmp > http://vger.kernel.org/lpc_net2018_talks/scaling_bridge_fdb_database_slidesV3.pdf > > Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com> > --- > include/net/nexthop.h | 14 ++++++ > include/uapi/linux/nexthop.h | 1 + > net/ipv4/nexthop.c | 101 +++++++++++++++++++++++++++++++++---------- > 3 files changed, 93 insertions(+), 23 deletions(-) > > diff --git a/include/net/nexthop.h b/include/net/nexthop.h > index c440ccc..3ad4e97 100644 > --- a/include/net/nexthop.h > +++ b/include/net/nexthop.h > @@ -26,6 +26,7 @@ struct nh_config { > u8 nh_family; > u8 nh_protocol; > u8 nh_blackhole; > + u8 nh_fdb; > u32 nh_flags; > > int nh_ifindex; > @@ -52,6 +53,7 @@ struct nh_info { > > u8 family; > bool reject_nh; > + bool fdb_nh; > > union { > struct fib_nh_common fib_nhc; > @@ -80,6 +82,7 @@ struct nexthop { > struct rb_node rb_node; /* entry on netns rbtree */ > struct list_head fi_list; /* v4 entries using nh */ > struct list_head f6i_list; /* v6 entries using nh */ > + struct list_head fdb_list; /* fdb entries using this nh */ > struct list_head grp_list; /* nh group entries using this nh */ > struct net *net; > > @@ -88,6 +91,7 @@ struct nexthop { > u8 protocol; /* app managing this nh */ > u8 nh_flags; > bool is_group; > + bool is_fdb_nh; > > refcount_t refcnt; > struct rcu_head rcu; > @@ -304,4 +308,14 @@ static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) > int nexthop_for_each_fib6_nh(struct nexthop *nh, > int (*cb)(struct fib6_nh *nh, void *arg), > void *arg); > + > +static inline struct nh_info *nexthop_path_fdb(struct nexthop *nh, u32 hash) > +{ > + struct nh_info *nhi; > + > + nh = nexthop_select_path(nh, hash); > + nhi = rcu_dereference(nh->nh_info); > + > + return nhi; > +} > #endif > diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h > index 7b61867..19a234a 100644 > --- a/include/uapi/linux/nexthop.h > +++ b/include/uapi/linux/nexthop.h > @@ -48,6 +48,7 @@ enum { > */ > NHA_GROUPS, /* flag; only return nexthop groups in dump */ > NHA_MASTER, /* u32; only return nexthops with given master dev */ > + NHA_FDB, /* nexthop belongs to a bridge fdb */ > > __NHA_MAX, > }; > diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c > index 3957364..98f8d2a 100644 > --- a/net/ipv4/nexthop.c > +++ b/net/ipv4/nexthop.c > @@ -33,6 +33,7 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = { > [NHA_ENCAP] = { .type = NLA_NESTED }, > [NHA_GROUPS] = { .type = NLA_FLAG }, > [NHA_MASTER] = { .type = NLA_U32 }, > + [NHA_FDB] = { .type = NLA_FLAG }, > }; > > static unsigned int nh_dev_hashfn(unsigned int val) > @@ -107,6 +108,7 @@ static struct nexthop *nexthop_alloc(void) > INIT_LIST_HEAD(&nh->fi_list); > INIT_LIST_HEAD(&nh->f6i_list); > INIT_LIST_HEAD(&nh->grp_list); > + INIT_LIST_HEAD(&nh->fdb_list); > } > return nh; > } > @@ -227,6 +229,9 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, > if (nla_put_u32(skb, NHA_ID, nh->id)) > goto nla_put_failure; > > + if (nh->is_fdb_nh && nla_put_flag(skb, NHA_FDB)) > + goto nla_put_failure; > + > if (nh->is_group) { > struct nh_group *nhg = rtnl_dereference(nh->nh_grp); > > @@ -241,7 +246,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, > if (nla_put_flag(skb, NHA_BLACKHOLE)) > goto nla_put_failure; > goto out; > - } else { > + } else if (!nh->is_fdb_nh) { > const struct net_device *dev; > > dev = nhi->fib_nhc.nhc_dev; > @@ -393,6 +398,7 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], > unsigned int len = nla_len(tb[NHA_GROUP]); > struct nexthop_grp *nhg; > unsigned int i, j; > + u8 nhg_fdb = 0; > > if (len & (sizeof(struct nexthop_grp) - 1)) { > NL_SET_ERR_MSG(extack, > @@ -421,6 +427,8 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], > } > } > > + if (tb[NHA_FDB]) > + nhg_fdb = 1; > nhg = nla_data(tb[NHA_GROUP]); > for (i = 0; i < len; ++i) { > struct nexthop *nh; > @@ -432,11 +440,16 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], > } > if (!valid_group_nh(nh, len, extack)) > return -EINVAL; > + if (nhg_fdb && !nh->is_fdb_nh) { > + NL_SET_ERR_MSG(extack, "FDB Multipath group can only have fdb nexthops"); > + return -EINVAL; > + } > } > for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) { > if (!tb[i]) > continue; > - > + if (tb[NHA_FDB]) > + continue; > NL_SET_ERR_MSG(extack, > "No other attributes can be set in nexthop groups"); > return -EINVAL; > @@ -495,6 +508,9 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) > if (hash > atomic_read(&nhge->upper_bound)) > continue; > > + if (nhge->nh->is_fdb_nh) > + return nhge->nh; > + > /* nexthops always check if it is good and does > * not rely on a sysctl for this behavior > */ > @@ -564,6 +580,11 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, > { > struct nh_info *nhi; > > + if (nh->is_fdb_nh) { > + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); > + return -EINVAL; > + } > + > /* fib6_src is unique to a fib6_info and limits the ability to cache > * routes in fib6_nh within a nexthop that is potentially shared > * across multiple fib entries. If the config wants to use source > @@ -640,6 +661,12 @@ int fib_check_nexthop(struct nexthop *nh, u8 scope, > { > int err = 0; > > + if (nh->is_fdb_nh) { > + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); > + err = -EINVAL; > + goto out; > + } > + > if (nh->is_group) { > struct nh_group *nhg; > > @@ -1125,6 +1152,9 @@ static struct nexthop *nexthop_create_group(struct net *net, > nh_group_rebalance(nhg); > } > > + if (cfg->nh_fdb) > + nh->is_fdb_nh = 1; > + > rcu_assign_pointer(nh->nh_grp, nhg); > > return nh; > @@ -1152,7 +1182,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, > .fc_encap = cfg->nh_encap, > .fc_encap_type = cfg->nh_encap_type, > }; > - u32 tb_id = l3mdev_fib_table(cfg->dev); > + u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN); > int err; > > err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack); > @@ -1161,6 +1191,9 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, > goto out; > } > > + if (nh->is_fdb_nh) > + goto out; > + > /* sets nh_dev if successful */ > err = fib_check_nh(net, fib_nh, tb_id, 0, extack); > if (!err) { > @@ -1227,6 +1260,9 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, > nhi->family = cfg->nh_family; > nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK; > > + if (cfg->nh_fdb) > + nh->is_fdb_nh = 1; > + > if (cfg->nh_blackhole) { > nhi->reject_nh = 1; > cfg->nh_ifindex = net->loopback_dev->ifindex; > @@ -1248,7 +1284,8 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, > } > > /* add the entry to the device based hash */ > - nexthop_devhash_add(net, nhi); > + if (!nh->is_fdb_nh) > + nexthop_devhash_add(net, nhi); > > rcu_assign_pointer(nh->nh_info, nhi); > > @@ -1367,6 +1404,9 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, > NL_SET_ERR_MSG(extack, "Invalid group type"); > goto out; > } > + if (tb[NHA_FDB]) > + cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]); > + > err = nh_check_attr_group(net, tb, extack); > > /* no other attributes should be set */ > @@ -1385,26 +1425,38 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, > goto out; > } > > - if (!tb[NHA_OIF]) { > - NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole nexthops"); > + if (tb[NHA_FDB]) { > + if (tb[NHA_OIF] || > + tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) { > + NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap or oif"); > + goto out; > + } > + > + cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]); > + } > + all this strict checking still missed the check for blackhole and reject flags. I will include them in the non-RC version. > + if (!cfg->nh_fdb && !tb[NHA_OIF]) { > + NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops"); > goto out; > } > > - cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); > - if (cfg->nh_ifindex) > - cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); > + if (!cfg->nh_fdb && tb[NHA_OIF]) { > + cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); > + if (cfg->nh_ifindex) > + cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); > > - if (!cfg->dev) { > - NL_SET_ERR_MSG(extack, "Invalid device index"); > - goto out; > - } else if (!(cfg->dev->flags & IFF_UP)) { > - NL_SET_ERR_MSG(extack, "Nexthop device is not up"); > - err = -ENETDOWN; > - goto out; > - } else if (!netif_carrier_ok(cfg->dev)) { > - NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); > - err = -ENETDOWN; > - goto out; > + if (!cfg->dev) { > + NL_SET_ERR_MSG(extack, "Invalid device index"); > + goto out; > + } else if (!(cfg->dev->flags & IFF_UP)) { > + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); > + err = -ENETDOWN; > + goto out; > + } else if (!netif_carrier_ok(cfg->dev)) { > + NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); > + err = -ENETDOWN; > + goto out; > + } > } > > err = -EINVAL; > @@ -1633,7 +1685,7 @@ static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx, > > static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, > int *master_idx, bool *group_filter, > - struct netlink_callback *cb) > + bool *fdb_filter, struct netlink_callback *cb) > { > struct netlink_ext_ack *extack = cb->extack; > struct nlattr *tb[NHA_MAX + 1]; > @@ -1670,6 +1722,9 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, > case NHA_GROUPS: > *group_filter = true; > break; > + case NHA_FDB: > + *fdb_filter = true; > + break; > default: > NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request"); > return -EINVAL; > @@ -1688,17 +1743,17 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, > /* rtnl */ > static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) > { > + bool group_filter = false, fdb_filter = false; > struct nhmsg *nhm = nlmsg_data(cb->nlh); > int dev_filter_idx = 0, master_idx = 0; > struct net *net = sock_net(skb->sk); > struct rb_root *root = &net->nexthop.rb_root; > - bool group_filter = false; > struct rb_node *node; > int idx = 0, s_idx; > int err; > > err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx, > - &group_filter, cb); > + &group_filter, &fdb_filter, cb); > if (err < 0) > return err; > > -- > 2.1.4 >
On 5/4/20 4:28 PM, Roopa Prabhu wrote: > include/net/nexthop.h | 14 ++++++ > include/uapi/linux/nexthop.h | 1 + > net/ipv4/nexthop.c | 101 +++++++++++++++++++++++++++++++++---------- > 3 files changed, 93 insertions(+), 23 deletions(-) pretty cool that you can extend this from routes to fdb entries with such a small change stat. > > diff --git a/include/net/nexthop.h b/include/net/nexthop.h > index c440ccc..3ad4e97 100644 > --- a/include/net/nexthop.h > +++ b/include/net/nexthop.h > @@ -26,6 +26,7 @@ struct nh_config { > u8 nh_family; > u8 nh_protocol; > u8 nh_blackhole; > + u8 nh_fdb; > u32 nh_flags; > > int nh_ifindex; > @@ -52,6 +53,7 @@ struct nh_info { > > u8 family; > bool reject_nh; > + bool fdb_nh; > > union { > struct fib_nh_common fib_nhc; > @@ -80,6 +82,7 @@ struct nexthop { > struct rb_node rb_node; /* entry on netns rbtree */ > struct list_head fi_list; /* v4 entries using nh */ > struct list_head f6i_list; /* v6 entries using nh */ > + struct list_head fdb_list; /* fdb entries using this nh */ > struct list_head grp_list; /* nh group entries using this nh */ > struct net *net; > > @@ -88,6 +91,7 @@ struct nexthop { > u8 protocol; /* app managing this nh */ > u8 nh_flags; > bool is_group; > + bool is_fdb_nh; > > refcount_t refcnt; > struct rcu_head rcu; > @@ -304,4 +308,14 @@ static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) > int nexthop_for_each_fib6_nh(struct nexthop *nh, > int (*cb)(struct fib6_nh *nh, void *arg), > void *arg); > + > +static inline struct nh_info *nexthop_path_fdb(struct nexthop *nh, u32 hash) this is used in the next patch. Any way to not leak the nh_info struct into vxlan code? Right now nh_info is only used in nexthop.{c,h}. > +{ > + struct nh_info *nhi; > + > + nh = nexthop_select_path(nh, hash); > + nhi = rcu_dereference(nh->nh_info); > + > + return nhi; > +} > #endif > diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h > index 7b61867..19a234a 100644 > --- a/include/uapi/linux/nexthop.h > +++ b/include/uapi/linux/nexthop.h > @@ -48,6 +48,7 @@ enum { > */ > NHA_GROUPS, /* flag; only return nexthop groups in dump */ > NHA_MASTER, /* u32; only return nexthops with given master dev */ > + NHA_FDB, /* nexthop belongs to a bridge fdb */ please add the 'type' to the comment; I tried to make this uapi file completely self-documenting. ie., no one should have to consult the code to know what kind of attribute NHA_FDB is. > > __NHA_MAX, > }; > diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c > index 3957364..98f8d2a 100644 > --- a/net/ipv4/nexthop.c > +++ b/net/ipv4/nexthop.c > @@ -33,6 +33,7 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = { > [NHA_ENCAP] = { .type = NLA_NESTED }, > [NHA_GROUPS] = { .type = NLA_FLAG }, > [NHA_MASTER] = { .type = NLA_U32 }, > + [NHA_FDB] = { .type = NLA_FLAG }, > }; > > static unsigned int nh_dev_hashfn(unsigned int val) > @@ -107,6 +108,7 @@ static struct nexthop *nexthop_alloc(void) > INIT_LIST_HEAD(&nh->fi_list); > INIT_LIST_HEAD(&nh->f6i_list); > INIT_LIST_HEAD(&nh->grp_list); > + INIT_LIST_HEAD(&nh->fdb_list); > } > return nh; > } > @@ -227,6 +229,9 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, > if (nla_put_u32(skb, NHA_ID, nh->id)) > goto nla_put_failure; > > + if (nh->is_fdb_nh && nla_put_flag(skb, NHA_FDB)) > + goto nla_put_failure; > + > if (nh->is_group) { > struct nh_group *nhg = rtnl_dereference(nh->nh_grp); > > @@ -241,7 +246,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, > if (nla_put_flag(skb, NHA_BLACKHOLE)) > goto nla_put_failure; > goto out; > - } else { > + } else if (!nh->is_fdb_nh) { > const struct net_device *dev; > > dev = nhi->fib_nhc.nhc_dev; > @@ -393,6 +398,7 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], > unsigned int len = nla_len(tb[NHA_GROUP]); > struct nexthop_grp *nhg; > unsigned int i, j; > + u8 nhg_fdb = 0; > > if (len & (sizeof(struct nexthop_grp) - 1)) { > NL_SET_ERR_MSG(extack, > @@ -421,6 +427,8 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], > } > } > > + if (tb[NHA_FDB]) > + nhg_fdb = 1; > nhg = nla_data(tb[NHA_GROUP]); > for (i = 0; i < len; ++i) { > struct nexthop *nh; > @@ -432,11 +440,16 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], > } > if (!valid_group_nh(nh, len, extack)) > return -EINVAL; > + if (nhg_fdb && !nh->is_fdb_nh) { > + NL_SET_ERR_MSG(extack, "FDB Multipath group can only have fdb nexthops"); > + return -EINVAL; > + } you should check the reverse as well -- non-nhg_fdb can not use an fdb nh. ie., a group can not be a mix of fdb and route entries. Make sure the selftests covers the permutations as well. > } > for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) { > if (!tb[i]) > continue; > - > + if (tb[NHA_FDB]) > + continue; > NL_SET_ERR_MSG(extack, > "No other attributes can be set in nexthop groups"); > return -EINVAL;
On Mon, May 4, 2020 at 8:23 PM David Ahern <dsahern@gmail.com> wrote: > > On 5/4/20 4:28 PM, Roopa Prabhu wrote: > > include/net/nexthop.h | 14 ++++++ > > include/uapi/linux/nexthop.h | 1 + > > net/ipv4/nexthop.c | 101 +++++++++++++++++++++++++++++++++---------- > > 3 files changed, 93 insertions(+), 23 deletions(-) > > pretty cool that you can extend this from routes to fdb entries with > such a small change stat. yep, exactly. everything fell into place so neatly. > > > > > diff --git a/include/net/nexthop.h b/include/net/nexthop.h > > index c440ccc..3ad4e97 100644 > > --- a/include/net/nexthop.h > > +++ b/include/net/nexthop.h > > @@ -26,6 +26,7 @@ struct nh_config { > > u8 nh_family; > > u8 nh_protocol; > > u8 nh_blackhole; > > + u8 nh_fdb; > > u32 nh_flags; > > > > int nh_ifindex; > > @@ -52,6 +53,7 @@ struct nh_info { > > > > u8 family; > > bool reject_nh; > > + bool fdb_nh; > > > > union { > > struct fib_nh_common fib_nhc; > > @@ -80,6 +82,7 @@ struct nexthop { > > struct rb_node rb_node; /* entry on netns rbtree */ > > struct list_head fi_list; /* v4 entries using nh */ > > struct list_head f6i_list; /* v6 entries using nh */ > > + struct list_head fdb_list; /* fdb entries using this nh */ > > struct list_head grp_list; /* nh group entries using this nh */ > > struct net *net; > > > > @@ -88,6 +91,7 @@ struct nexthop { > > u8 protocol; /* app managing this nh */ > > u8 nh_flags; > > bool is_group; > > + bool is_fdb_nh; > > > > refcount_t refcnt; > > struct rcu_head rcu; > > @@ -304,4 +308,14 @@ static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) > > int nexthop_for_each_fib6_nh(struct nexthop *nh, > > int (*cb)(struct fib6_nh *nh, void *arg), > > void *arg); > > + > > +static inline struct nh_info *nexthop_path_fdb(struct nexthop *nh, u32 hash) > > this is used in the next patch. Any way to not leak the nh_info struct > into vxlan code? Right now nh_info is only used in nexthop.{c,h}. > yes, sure I think I can do that. I will add an api to get the nh family and use nh everywhere. > > +{ > > + struct nh_info *nhi; > > + > > + nh = nexthop_select_path(nh, hash); > > + nhi = rcu_dereference(nh->nh_info); > > + > > + return nhi; > > +} > > #endif > > diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h > > index 7b61867..19a234a 100644 > > --- a/include/uapi/linux/nexthop.h > > +++ b/include/uapi/linux/nexthop.h > > @@ -48,6 +48,7 @@ enum { > > */ > > NHA_GROUPS, /* flag; only return nexthop groups in dump */ > > NHA_MASTER, /* u32; only return nexthops with given master dev */ > > + NHA_FDB, /* nexthop belongs to a bridge fdb */ > > please add the 'type' to the comment; I tried to make this uapi file > completely self-documenting. ie., no one should have to consult the code > to know what kind of attribute NHA_FDB is. > yep, will do. not sure how i missed it, > > > > __NHA_MAX, > > }; > > diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c > > index 3957364..98f8d2a 100644 > > --- a/net/ipv4/nexthop.c > > +++ b/net/ipv4/nexthop.c > > @@ -33,6 +33,7 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = { > > [NHA_ENCAP] = { .type = NLA_NESTED }, > > [NHA_GROUPS] = { .type = NLA_FLAG }, > > [NHA_MASTER] = { .type = NLA_U32 }, > > + [NHA_FDB] = { .type = NLA_FLAG }, > > }; > > > > static unsigned int nh_dev_hashfn(unsigned int val) > > @@ -107,6 +108,7 @@ static struct nexthop *nexthop_alloc(void) > > INIT_LIST_HEAD(&nh->fi_list); > > INIT_LIST_HEAD(&nh->f6i_list); > > INIT_LIST_HEAD(&nh->grp_list); > > + INIT_LIST_HEAD(&nh->fdb_list); > > } > > return nh; > > } > > @@ -227,6 +229,9 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, > > if (nla_put_u32(skb, NHA_ID, nh->id)) > > goto nla_put_failure; > > > > + if (nh->is_fdb_nh && nla_put_flag(skb, NHA_FDB)) > > + goto nla_put_failure; > > + > > if (nh->is_group) { > > struct nh_group *nhg = rtnl_dereference(nh->nh_grp); > > > > @@ -241,7 +246,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, > > if (nla_put_flag(skb, NHA_BLACKHOLE)) > > goto nla_put_failure; > > goto out; > > - } else { > > + } else if (!nh->is_fdb_nh) { > > const struct net_device *dev; > > > > dev = nhi->fib_nhc.nhc_dev; > > @@ -393,6 +398,7 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], > > unsigned int len = nla_len(tb[NHA_GROUP]); > > struct nexthop_grp *nhg; > > unsigned int i, j; > > + u8 nhg_fdb = 0; > > > > if (len & (sizeof(struct nexthop_grp) - 1)) { > > NL_SET_ERR_MSG(extack, > > @@ -421,6 +427,8 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], > > } > > } > > > > + if (tb[NHA_FDB]) > > + nhg_fdb = 1; > > nhg = nla_data(tb[NHA_GROUP]); > > for (i = 0; i < len; ++i) { > > struct nexthop *nh; > > @@ -432,11 +440,16 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], > > } > > if (!valid_group_nh(nh, len, extack)) > > return -EINVAL; > > + if (nhg_fdb && !nh->is_fdb_nh) { > > + NL_SET_ERR_MSG(extack, "FDB Multipath group can only have fdb nexthops"); > > + return -EINVAL; > > + } > > you should check the reverse as well -- non-nhg_fdb can not use an fdb > nh. ie., a group can not be a mix of fdb and route entries. > > Make sure the selftests covers the permutations as well. > yep, will do > > } > > for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) { > > if (!tb[i]) > > continue; > > - > > + if (tb[NHA_FDB]) > > + continue; > > NL_SET_ERR_MSG(extack, > > "No other attributes can be set in nexthop groups"); > > return -EINVAL; > >
diff --git a/include/net/nexthop.h b/include/net/nexthop.h index c440ccc..3ad4e97 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -26,6 +26,7 @@ struct nh_config { u8 nh_family; u8 nh_protocol; u8 nh_blackhole; + u8 nh_fdb; u32 nh_flags; int nh_ifindex; @@ -52,6 +53,7 @@ struct nh_info { u8 family; bool reject_nh; + bool fdb_nh; union { struct fib_nh_common fib_nhc; @@ -80,6 +82,7 @@ struct nexthop { struct rb_node rb_node; /* entry on netns rbtree */ struct list_head fi_list; /* v4 entries using nh */ struct list_head f6i_list; /* v6 entries using nh */ + struct list_head fdb_list; /* fdb entries using this nh */ struct list_head grp_list; /* nh group entries using this nh */ struct net *net; @@ -88,6 +91,7 @@ struct nexthop { u8 protocol; /* app managing this nh */ u8 nh_flags; bool is_group; + bool is_fdb_nh; refcount_t refcnt; struct rcu_head rcu; @@ -304,4 +308,14 @@ static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) int nexthop_for_each_fib6_nh(struct nexthop *nh, int (*cb)(struct fib6_nh *nh, void *arg), void *arg); + +static inline struct nh_info *nexthop_path_fdb(struct nexthop *nh, u32 hash) +{ + struct nh_info *nhi; + + nh = nexthop_select_path(nh, hash); + nhi = rcu_dereference(nh->nh_info); + + return nhi; +} #endif diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h index 7b61867..19a234a 100644 --- a/include/uapi/linux/nexthop.h +++ b/include/uapi/linux/nexthop.h @@ -48,6 +48,7 @@ enum { */ NHA_GROUPS, /* flag; only return nexthop groups in dump */ NHA_MASTER, /* u32; only return nexthops with given master dev */ + NHA_FDB, /* nexthop belongs to a bridge fdb */ __NHA_MAX, }; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 3957364..98f8d2a 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -33,6 +33,7 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = { [NHA_ENCAP] = { .type = NLA_NESTED }, [NHA_GROUPS] = { .type = NLA_FLAG }, [NHA_MASTER] = { .type = NLA_U32 }, + [NHA_FDB] = { .type = NLA_FLAG }, }; static unsigned int nh_dev_hashfn(unsigned int val) @@ -107,6 +108,7 @@ static struct nexthop *nexthop_alloc(void) INIT_LIST_HEAD(&nh->fi_list); INIT_LIST_HEAD(&nh->f6i_list); INIT_LIST_HEAD(&nh->grp_list); + INIT_LIST_HEAD(&nh->fdb_list); } return nh; } @@ -227,6 +229,9 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, if (nla_put_u32(skb, NHA_ID, nh->id)) goto nla_put_failure; + if (nh->is_fdb_nh && nla_put_flag(skb, NHA_FDB)) + goto nla_put_failure; + if (nh->is_group) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); @@ -241,7 +246,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, if (nla_put_flag(skb, NHA_BLACKHOLE)) goto nla_put_failure; goto out; - } else { + } else if (!nh->is_fdb_nh) { const struct net_device *dev; dev = nhi->fib_nhc.nhc_dev; @@ -393,6 +398,7 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], unsigned int len = nla_len(tb[NHA_GROUP]); struct nexthop_grp *nhg; unsigned int i, j; + u8 nhg_fdb = 0; if (len & (sizeof(struct nexthop_grp) - 1)) { NL_SET_ERR_MSG(extack, @@ -421,6 +427,8 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], } } + if (tb[NHA_FDB]) + nhg_fdb = 1; nhg = nla_data(tb[NHA_GROUP]); for (i = 0; i < len; ++i) { struct nexthop *nh; @@ -432,11 +440,16 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], } if (!valid_group_nh(nh, len, extack)) return -EINVAL; + if (nhg_fdb && !nh->is_fdb_nh) { + NL_SET_ERR_MSG(extack, "FDB Multipath group can only have fdb nexthops"); + return -EINVAL; + } } for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) { if (!tb[i]) continue; - + if (tb[NHA_FDB]) + continue; NL_SET_ERR_MSG(extack, "No other attributes can be set in nexthop groups"); return -EINVAL; @@ -495,6 +508,9 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) if (hash > atomic_read(&nhge->upper_bound)) continue; + if (nhge->nh->is_fdb_nh) + return nhge->nh; + /* nexthops always check if it is good and does * not rely on a sysctl for this behavior */ @@ -564,6 +580,11 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, { struct nh_info *nhi; + if (nh->is_fdb_nh) { + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); + return -EINVAL; + } + /* fib6_src is unique to a fib6_info and limits the ability to cache * routes in fib6_nh within a nexthop that is potentially shared * across multiple fib entries. If the config wants to use source @@ -640,6 +661,12 @@ int fib_check_nexthop(struct nexthop *nh, u8 scope, { int err = 0; + if (nh->is_fdb_nh) { + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); + err = -EINVAL; + goto out; + } + if (nh->is_group) { struct nh_group *nhg; @@ -1125,6 +1152,9 @@ static struct nexthop *nexthop_create_group(struct net *net, nh_group_rebalance(nhg); } + if (cfg->nh_fdb) + nh->is_fdb_nh = 1; + rcu_assign_pointer(nh->nh_grp, nhg); return nh; @@ -1152,7 +1182,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, .fc_encap = cfg->nh_encap, .fc_encap_type = cfg->nh_encap_type, }; - u32 tb_id = l3mdev_fib_table(cfg->dev); + u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN); int err; err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack); @@ -1161,6 +1191,9 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, goto out; } + if (nh->is_fdb_nh) + goto out; + /* sets nh_dev if successful */ err = fib_check_nh(net, fib_nh, tb_id, 0, extack); if (!err) { @@ -1227,6 +1260,9 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, nhi->family = cfg->nh_family; nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK; + if (cfg->nh_fdb) + nh->is_fdb_nh = 1; + if (cfg->nh_blackhole) { nhi->reject_nh = 1; cfg->nh_ifindex = net->loopback_dev->ifindex; @@ -1248,7 +1284,8 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, } /* add the entry to the device based hash */ - nexthop_devhash_add(net, nhi); + if (!nh->is_fdb_nh) + nexthop_devhash_add(net, nhi); rcu_assign_pointer(nh->nh_info, nhi); @@ -1367,6 +1404,9 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, NL_SET_ERR_MSG(extack, "Invalid group type"); goto out; } + if (tb[NHA_FDB]) + cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]); + err = nh_check_attr_group(net, tb, extack); /* no other attributes should be set */ @@ -1385,26 +1425,38 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, goto out; } - if (!tb[NHA_OIF]) { - NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole nexthops"); + if (tb[NHA_FDB]) { + if (tb[NHA_OIF] || + tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) { + NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap or oif"); + goto out; + } + + cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]); + } + + if (!cfg->nh_fdb && !tb[NHA_OIF]) { + NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops"); goto out; } - cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); - if (cfg->nh_ifindex) - cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); + if (!cfg->nh_fdb && tb[NHA_OIF]) { + cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); + if (cfg->nh_ifindex) + cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); - if (!cfg->dev) { - NL_SET_ERR_MSG(extack, "Invalid device index"); - goto out; - } else if (!(cfg->dev->flags & IFF_UP)) { - NL_SET_ERR_MSG(extack, "Nexthop device is not up"); - err = -ENETDOWN; - goto out; - } else if (!netif_carrier_ok(cfg->dev)) { - NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); - err = -ENETDOWN; - goto out; + if (!cfg->dev) { + NL_SET_ERR_MSG(extack, "Invalid device index"); + goto out; + } else if (!(cfg->dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); + err = -ENETDOWN; + goto out; + } else if (!netif_carrier_ok(cfg->dev)) { + NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); + err = -ENETDOWN; + goto out; + } } err = -EINVAL; @@ -1633,7 +1685,7 @@ static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx, static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, int *master_idx, bool *group_filter, - struct netlink_callback *cb) + bool *fdb_filter, struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; struct nlattr *tb[NHA_MAX + 1]; @@ -1670,6 +1722,9 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, case NHA_GROUPS: *group_filter = true; break; + case NHA_FDB: + *fdb_filter = true; + break; default: NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request"); return -EINVAL; @@ -1688,17 +1743,17 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, /* rtnl */ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) { + bool group_filter = false, fdb_filter = false; struct nhmsg *nhm = nlmsg_data(cb->nlh); int dev_filter_idx = 0, master_idx = 0; struct net *net = sock_net(skb->sk); struct rb_root *root = &net->nexthop.rb_root; - bool group_filter = false; struct rb_node *node; int idx = 0, s_idx; int err; err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx, - &group_filter, cb); + &group_filter, &fdb_filter, cb); if (err < 0) return err;