Message ID | 8c6c957ddd81ecd7a1242a3b5c8daed8527732bc.1732630344.git.felix.huettner@stackit.cloud |
---|---|
State | Changes Requested |
Delegated to: | Eelco Chaudron |
Headers | show |
Series | OVS Patches for OVN Fabric Integration | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | warning | apply and check: warning |
Hello, Felix, On Tue, Nov 26, 2024 at 3:37 PM Felix Huettner via dev <ovs-dev@openvswitch.org> wrote: > > Multipath routes have separate structures for each of the nexthops. > Previously if a multipath route was received it was treated as a route > without a nexthop. Support for multipath routes is indeed useful, thank you for spotting the lack of support for it! > Now these nexthops are parsed (up to a limit) and the ovs router uses > the first route in the list. > OVN will use the other routes. While it is true that we will use this in OVN, remember that this is a base library, and this might be generally useful for any consumer of this library, no? > Signed-off-by: Felix Huettner <felix.huettner@stackit.cloud> > --- > lib/route-table.c | 151 +++++++++++++++++++++++++++++++++++++--------- > lib/route-table.h | 12 +++- > 2 files changed, 134 insertions(+), 29 deletions(-) > > diff --git a/lib/route-table.c b/lib/route-table.c > index a9a9c6b85..06e391184 100644 > --- a/lib/route-table.c > +++ b/lib/route-table.c > @@ -203,6 +203,110 @@ route_table_reset(void) > } > } > > +static bool > +route_table_add_nexthop(struct route_table_msg *change, > + bool ipv4, > + struct nlattr *nl_gw, > + uint32_t ifindex) > +{ > + if (change->rd.n_nexthops >= MAX_ROUTE_DATA_NEXTHOP) { > + VLOG_DBG("tried to add more nexthops to a route than possible"); > + return false; > + } > + > + struct route_data_nexthop *nh = &change->rd.nexthops[ > + change->rd.n_nexthops]; > + > + if (nl_gw) { > + if (ipv4) { > + ovs_be32 gw; > + gw = nl_attr_get_be32(nl_gw); > + in6_addr_set_mapped_ipv4(&nh->rta_gw, gw); > + } else { > + nh->rta_gw = nl_attr_get_in6_addr(nl_gw); > + } > + } > + > + if (ifindex && !if_indextoname(ifindex, nh->ifname)) { > + int error = errno; > + > + VLOG_DBG_RL(&rl, "Could not find interface name[%u]: %s", > + ifindex, ovs_strerror(error)); > + if (error == ENXIO) { > + change->relevant = false; > + } else { > + return false; > + } > + } > + change->rd.n_nexthops++; > + return true; > +} > + > + > +/* The following function uses the RTNH defines of rtnetlink.h > + * As these rely on casts from char * to struct rtnexthop * clang will issue > + * a warning about alignment even though the defines will ensure that the data > + * is alligned. */ > +#ifdef __clang__ > +#pragma clang diagnostic push > +#pragma clang diagnostic ignored "-Wcast-align" > +#endif This will not fly and needs to be reworked. Open vSwitch has a very good netlink library, including alternatives to the RTNH macros you have used here. Please have a look at how parsing of nested attributes is handled elsewhere in the code base. > +static bool > +route_table_parse_multipath(struct route_table_msg *change, > + struct nlattr *multipath, > + bool ipv4) > +{ > + static const struct nl_policy policy[] = { > + [RTA_GATEWAY] = { .type = NL_A_U32, .optional = true }, > + }; > + > + static const struct nl_policy policy6[] = { > + [RTA_GATEWAY] = { .type = NL_A_IPV6, .optional = true }, > + }; > + > + struct nlattr *attrs[ARRAY_SIZE(policy)]; > + > + size_t len = RTA_PAYLOAD((struct rtattr *) multipath); > + struct rtnexthop *rtnh = RTA_DATA(multipath); > + > + while (len >= sizeof(*rtnh) && len >= rtnh->rtnh_len && > + change->rd.n_nexthops < MAX_ROUTE_DATA_NEXTHOP) { > + > + struct ofpbuf buf; > + bool parsed; > + > + ofpbuf_use_const(&buf, RTNH_DATA(rtnh), > + rtnh->rtnh_len - sizeof(*rtnh)); > + if (ipv4) { > + parsed = nl_policy_parse(&buf, 0, policy, attrs, > + ARRAY_SIZE(policy)); > + } else { > + parsed = nl_policy_parse(&buf, 0, policy6, attrs, > + ARRAY_SIZE(policy6)); > + } > + > + if (!parsed) { > + VLOG_DBG_RL(&rl, "received unparseable rtnetlink route message"); > + return false; > + } > + > + if (!route_table_add_nexthop(change, ipv4, attrs[RTA_GATEWAY], > + rtnh->rtnh_ifindex)) { > + return false; > + } > + > + len -= RTNH_ALIGN(rtnh->rtnh_len); > + rtnh = RTNH_NEXT(rtnh); > + } > + > + return true; > +} > + > +#ifdef __clang__ > +#pragma clang diagnostic pop > +#endif > + > /* Return RTNLGRP_IPV4_ROUTE or RTNLGRP_IPV6_ROUTE on success, 0 on parse > * error. */ > int > @@ -220,6 +324,7 @@ route_table_parse_ns(struct ofpbuf *buf, void *change_, > [RTA_PREFSRC] = { .type = NL_A_U32, .optional = true }, > [RTA_TABLE] = { .type = NL_A_U32, .optional = true }, > [RTA_PRIORITY] = { .type = NL_A_U32, .optional = true }, > + [RTA_MULTIPATH] = { .type = NL_A_NESTED, .optional = true }, > }; > > static const struct nl_policy policy6[] = { > @@ -230,6 +335,7 @@ route_table_parse_ns(struct ofpbuf *buf, void *change_, > [RTA_PREFSRC] = { .type = NL_A_IPV6, .optional = true }, > [RTA_TABLE] = { .type = NL_A_U32, .optional = true }, > [RTA_PRIORITY] = { .type = NL_A_U32, .optional = true }, > + [RTA_MULTIPATH] = { .type = NL_A_NESTED, .optional = true }, > }; > > struct nlattr *attrs[ARRAY_SIZE(policy)]; > @@ -252,7 +358,6 @@ route_table_parse_ns(struct ofpbuf *buf, void *change_, > if (parsed) { > const struct nlmsghdr *nlmsg; > uint32_t table_id; > - int rta_oif; /* Output interface index. */ > > nlmsg = buf->data; > > @@ -286,21 +391,6 @@ route_table_parse_ns(struct ofpbuf *buf, void *change_, > change->rd.local = rtm->rtm_type == RTN_LOCAL; > change->rd.plen = rtm->rtm_dst_len; > change->rd.rtm_protocol = rtm->rtm_protocol; > - if (attrs[RTA_OIF]) { > - rta_oif = nl_attr_get_u32(attrs[RTA_OIF]); > - > - if (!if_indextoname(rta_oif, change->rd.ifname)) { > - int error = errno; > - > - VLOG_DBG_RL(&rl, "Could not find interface name[%u]: %s", > - rta_oif, ovs_strerror(error)); > - if (error == ENXIO) { > - change->relevant = false; > - } else { > - return 0; > - } > - } > - } > > if (attrs[RTA_DST]) { > if (ipv4) { > @@ -323,21 +413,28 @@ route_table_parse_ns(struct ofpbuf *buf, void *change_, > nl_attr_get_in6_addr(attrs[RTA_PREFSRC]); > } > } > - if (attrs[RTA_GATEWAY]) { > - if (ipv4) { > - ovs_be32 gw; > - gw = nl_attr_get_be32(attrs[RTA_GATEWAY]); > - in6_addr_set_mapped_ipv4(&change->rd.rta_gw, gw); > - } else { > - change->rd.rta_gw = nl_attr_get_in6_addr(attrs[RTA_GATEWAY]); > - } > - } > if (attrs[RTA_MARK]) { > change->rd.mark = nl_attr_get_u32(attrs[RTA_MARK]); > } > if (attrs[RTA_PRIORITY]) { > change->rd.rta_priority = nl_attr_get_u32(attrs[RTA_PRIORITY]); > } > + > + if (attrs[RTA_GATEWAY] || attrs[RTA_OIF]) { > + uint32_t ifindex = 0; > + if (attrs[RTA_OIF]) { > + ifindex = nl_attr_get_u32(attrs[RTA_OIF]); > + } > + if (!route_table_add_nexthop(change, ipv4, attrs[RTA_GATEWAY], > + ifindex)) { > + return 0; > + } In the act of collapsing the handling of RTA_GATEWAY and RTA_OIF you are removing existing error handling, and I'm wondering if this is necessary at all? Would you ever receive a message from the kernel with RTA_GATEWAY, RTA_OIF and RTA_MULTIPATH set at the same time? A less intrusive change might be to leave the existing handling as is, and handle the RTA_MULTIPATH attribute if you get one. Looking at the iproute2 source code [0], there is no branching between these attributes, they are just handled in sequence. 0: https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/tree/ip/iproute.c#n1005 > + } else if (attrs[RTA_MULTIPATH]) { > + if (!route_table_parse_multipath(change, > + attrs[RTA_MULTIPATH], ipv4)) { > + return 0; > + } > + } > } else { > VLOG_DBG_RL(&rl, "received unparseable rtnetlink route message"); > return 0; > @@ -364,8 +461,8 @@ route_table_handle_msg(const struct route_table_msg *change, > const struct route_data *rd = &change->rd; > > ovs_router_insert(rd->mark, &rd->rta_dst, rd->rtm_dst_len, > - rd->local, rd->ifname, &rd->rta_gw, > - &rd->rta_prefsrc); > + rd->local, rd->nexthops[0].ifname, > + &rd->nexthops[0].rta_gw, &rd->rta_prefsrc); > } > } > > diff --git a/lib/route-table.h b/lib/route-table.h > index 1e9f51a98..5a0e317df 100644 > --- a/lib/route-table.h > +++ b/lib/route-table.h > @@ -26,6 +26,13 @@ > > #include "openvswitch/ofpbuf.h" > > +#define MAX_ROUTE_DATA_NEXTHOP 8 Do you have a reference for the choice of this number? > + > +struct route_data_nexthop { > + struct in6_addr rta_gw; > + char ifname[IFNAMSIZ]; /* Interface name. */ > +}; > + > struct route_data { > /* Copied from struct rtmsg. */ > unsigned char rtm_dst_len; > @@ -34,13 +41,14 @@ struct route_data { > /* Extracted from Netlink attributes. */ > struct in6_addr rta_dst; /* 0 if missing. */ > struct in6_addr rta_prefsrc; /* 0 if missing. */ > - struct in6_addr rta_gw; > - char ifname[IFNAMSIZ]; /* Interface name. */ Now that this header is exposed, technically this is an API change, since this used to be a private to route-table.c struct, I guess changing this is fine. However, it would be better if you reordered the patches so that this change was made prior to exposing the interface. That way you would avoid having a breaking API change in the git history. -- Frode Nordahl > uint32_t mark; > uint32_t rta_table_id; /* 0 if missing. */ > unsigned char plen; > unsigned char rtm_protocol; > uint32_t rta_priority; > + > + size_t n_nexthops; > + struct route_data_nexthop nexthops[MAX_ROUTE_DATA_NEXTHOP]; > }; > > /* A digested version of a route message sent down by the kernel to indicate > -- > 2.47.0 > > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
diff --git a/lib/route-table.c b/lib/route-table.c index a9a9c6b85..06e391184 100644 --- a/lib/route-table.c +++ b/lib/route-table.c @@ -203,6 +203,110 @@ route_table_reset(void) } } +static bool +route_table_add_nexthop(struct route_table_msg *change, + bool ipv4, + struct nlattr *nl_gw, + uint32_t ifindex) +{ + if (change->rd.n_nexthops >= MAX_ROUTE_DATA_NEXTHOP) { + VLOG_DBG("tried to add more nexthops to a route than possible"); + return false; + } + + struct route_data_nexthop *nh = &change->rd.nexthops[ + change->rd.n_nexthops]; + + if (nl_gw) { + if (ipv4) { + ovs_be32 gw; + gw = nl_attr_get_be32(nl_gw); + in6_addr_set_mapped_ipv4(&nh->rta_gw, gw); + } else { + nh->rta_gw = nl_attr_get_in6_addr(nl_gw); + } + } + + if (ifindex && !if_indextoname(ifindex, nh->ifname)) { + int error = errno; + + VLOG_DBG_RL(&rl, "Could not find interface name[%u]: %s", + ifindex, ovs_strerror(error)); + if (error == ENXIO) { + change->relevant = false; + } else { + return false; + } + } + change->rd.n_nexthops++; + return true; +} + + +/* The following function uses the RTNH defines of rtnetlink.h + * As these rely on casts from char * to struct rtnexthop * clang will issue + * a warning about alignment even though the defines will ensure that the data + * is alligned. */ +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wcast-align" +#endif + +static bool +route_table_parse_multipath(struct route_table_msg *change, + struct nlattr *multipath, + bool ipv4) +{ + static const struct nl_policy policy[] = { + [RTA_GATEWAY] = { .type = NL_A_U32, .optional = true }, + }; + + static const struct nl_policy policy6[] = { + [RTA_GATEWAY] = { .type = NL_A_IPV6, .optional = true }, + }; + + struct nlattr *attrs[ARRAY_SIZE(policy)]; + + size_t len = RTA_PAYLOAD((struct rtattr *) multipath); + struct rtnexthop *rtnh = RTA_DATA(multipath); + + while (len >= sizeof(*rtnh) && len >= rtnh->rtnh_len && + change->rd.n_nexthops < MAX_ROUTE_DATA_NEXTHOP) { + + struct ofpbuf buf; + bool parsed; + + ofpbuf_use_const(&buf, RTNH_DATA(rtnh), + rtnh->rtnh_len - sizeof(*rtnh)); + if (ipv4) { + parsed = nl_policy_parse(&buf, 0, policy, attrs, + ARRAY_SIZE(policy)); + } else { + parsed = nl_policy_parse(&buf, 0, policy6, attrs, + ARRAY_SIZE(policy6)); + } + + if (!parsed) { + VLOG_DBG_RL(&rl, "received unparseable rtnetlink route message"); + return false; + } + + if (!route_table_add_nexthop(change, ipv4, attrs[RTA_GATEWAY], + rtnh->rtnh_ifindex)) { + return false; + } + + len -= RTNH_ALIGN(rtnh->rtnh_len); + rtnh = RTNH_NEXT(rtnh); + } + + return true; +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + /* Return RTNLGRP_IPV4_ROUTE or RTNLGRP_IPV6_ROUTE on success, 0 on parse * error. */ int @@ -220,6 +324,7 @@ route_table_parse_ns(struct ofpbuf *buf, void *change_, [RTA_PREFSRC] = { .type = NL_A_U32, .optional = true }, [RTA_TABLE] = { .type = NL_A_U32, .optional = true }, [RTA_PRIORITY] = { .type = NL_A_U32, .optional = true }, + [RTA_MULTIPATH] = { .type = NL_A_NESTED, .optional = true }, }; static const struct nl_policy policy6[] = { @@ -230,6 +335,7 @@ route_table_parse_ns(struct ofpbuf *buf, void *change_, [RTA_PREFSRC] = { .type = NL_A_IPV6, .optional = true }, [RTA_TABLE] = { .type = NL_A_U32, .optional = true }, [RTA_PRIORITY] = { .type = NL_A_U32, .optional = true }, + [RTA_MULTIPATH] = { .type = NL_A_NESTED, .optional = true }, }; struct nlattr *attrs[ARRAY_SIZE(policy)]; @@ -252,7 +358,6 @@ route_table_parse_ns(struct ofpbuf *buf, void *change_, if (parsed) { const struct nlmsghdr *nlmsg; uint32_t table_id; - int rta_oif; /* Output interface index. */ nlmsg = buf->data; @@ -286,21 +391,6 @@ route_table_parse_ns(struct ofpbuf *buf, void *change_, change->rd.local = rtm->rtm_type == RTN_LOCAL; change->rd.plen = rtm->rtm_dst_len; change->rd.rtm_protocol = rtm->rtm_protocol; - if (attrs[RTA_OIF]) { - rta_oif = nl_attr_get_u32(attrs[RTA_OIF]); - - if (!if_indextoname(rta_oif, change->rd.ifname)) { - int error = errno; - - VLOG_DBG_RL(&rl, "Could not find interface name[%u]: %s", - rta_oif, ovs_strerror(error)); - if (error == ENXIO) { - change->relevant = false; - } else { - return 0; - } - } - } if (attrs[RTA_DST]) { if (ipv4) { @@ -323,21 +413,28 @@ route_table_parse_ns(struct ofpbuf *buf, void *change_, nl_attr_get_in6_addr(attrs[RTA_PREFSRC]); } } - if (attrs[RTA_GATEWAY]) { - if (ipv4) { - ovs_be32 gw; - gw = nl_attr_get_be32(attrs[RTA_GATEWAY]); - in6_addr_set_mapped_ipv4(&change->rd.rta_gw, gw); - } else { - change->rd.rta_gw = nl_attr_get_in6_addr(attrs[RTA_GATEWAY]); - } - } if (attrs[RTA_MARK]) { change->rd.mark = nl_attr_get_u32(attrs[RTA_MARK]); } if (attrs[RTA_PRIORITY]) { change->rd.rta_priority = nl_attr_get_u32(attrs[RTA_PRIORITY]); } + + if (attrs[RTA_GATEWAY] || attrs[RTA_OIF]) { + uint32_t ifindex = 0; + if (attrs[RTA_OIF]) { + ifindex = nl_attr_get_u32(attrs[RTA_OIF]); + } + if (!route_table_add_nexthop(change, ipv4, attrs[RTA_GATEWAY], + ifindex)) { + return 0; + } + } else if (attrs[RTA_MULTIPATH]) { + if (!route_table_parse_multipath(change, + attrs[RTA_MULTIPATH], ipv4)) { + return 0; + } + } } else { VLOG_DBG_RL(&rl, "received unparseable rtnetlink route message"); return 0; @@ -364,8 +461,8 @@ route_table_handle_msg(const struct route_table_msg *change, const struct route_data *rd = &change->rd; ovs_router_insert(rd->mark, &rd->rta_dst, rd->rtm_dst_len, - rd->local, rd->ifname, &rd->rta_gw, - &rd->rta_prefsrc); + rd->local, rd->nexthops[0].ifname, + &rd->nexthops[0].rta_gw, &rd->rta_prefsrc); } } diff --git a/lib/route-table.h b/lib/route-table.h index 1e9f51a98..5a0e317df 100644 --- a/lib/route-table.h +++ b/lib/route-table.h @@ -26,6 +26,13 @@ #include "openvswitch/ofpbuf.h" +#define MAX_ROUTE_DATA_NEXTHOP 8 + +struct route_data_nexthop { + struct in6_addr rta_gw; + char ifname[IFNAMSIZ]; /* Interface name. */ +}; + struct route_data { /* Copied from struct rtmsg. */ unsigned char rtm_dst_len; @@ -34,13 +41,14 @@ struct route_data { /* Extracted from Netlink attributes. */ struct in6_addr rta_dst; /* 0 if missing. */ struct in6_addr rta_prefsrc; /* 0 if missing. */ - struct in6_addr rta_gw; - char ifname[IFNAMSIZ]; /* Interface name. */ uint32_t mark; uint32_t rta_table_id; /* 0 if missing. */ unsigned char plen; unsigned char rtm_protocol; uint32_t rta_priority; + + size_t n_nexthops; + struct route_data_nexthop nexthops[MAX_ROUTE_DATA_NEXTHOP]; }; /* A digested version of a route message sent down by the kernel to indicate
Multipath routes have separate structures for each of the nexthops. Previously if a multipath route was received it was treated as a route without a nexthop. Now these nexthops are parsed (up to a limit) and the ovs router uses the first route in the list. OVN will use the other routes. Signed-off-by: Felix Huettner <felix.huettner@stackit.cloud> --- lib/route-table.c | 151 +++++++++++++++++++++++++++++++++++++--------- lib/route-table.h | 12 +++- 2 files changed, 134 insertions(+), 29 deletions(-)