diff mbox series

[ovs-dev,v2,5/6] route-table: Support parsing multipath routes.

Message ID 8c6c957ddd81ecd7a1242a3b5c8daed8527732bc.1732630344.git.felix.huettner@stackit.cloud
State Changes Requested
Delegated to: Eelco Chaudron
Headers show
Series OVS Patches for OVN Fabric Integration | expand

Checks

Context Check Description
ovsrobot/apply-robot warning apply and check: warning

Commit Message

Felix Huettner Nov. 26, 2024, 2:37 p.m. UTC
Multipath routes have separate structures for each of the nexthops.
Previously if a multipath route was received it was treated as a route
without a nexthop.
Now these nexthops are parsed (up to a limit) and the ovs router uses
the first route in the list.
OVN will use the other routes.

Signed-off-by: Felix Huettner <felix.huettner@stackit.cloud>
---
 lib/route-table.c | 151 +++++++++++++++++++++++++++++++++++++---------
 lib/route-table.h |  12 +++-
 2 files changed, 134 insertions(+), 29 deletions(-)

Comments

Frode Nordahl Nov. 29, 2024, 8:53 a.m. UTC | #1
Hello, Felix,

On Tue, Nov 26, 2024 at 3:37 PM Felix Huettner via dev
<ovs-dev@openvswitch.org> wrote:
>
> Multipath routes have separate structures for each of the nexthops.
> Previously if a multipath route was received it was treated as a route
> without a nexthop.

Support for multipath routes is indeed useful, thank you for spotting
the lack of support for it!

> Now these nexthops are parsed (up to a limit) and the ovs router uses
> the first route in the list.

> OVN will use the other routes.

While it is true that we will use this in OVN, remember that this is a
base library, and this might be generally useful for any consumer of
this library, no?

> Signed-off-by: Felix Huettner <felix.huettner@stackit.cloud>
> ---
>  lib/route-table.c | 151 +++++++++++++++++++++++++++++++++++++---------
>  lib/route-table.h |  12 +++-
>  2 files changed, 134 insertions(+), 29 deletions(-)
>
> diff --git a/lib/route-table.c b/lib/route-table.c
> index a9a9c6b85..06e391184 100644
> --- a/lib/route-table.c
> +++ b/lib/route-table.c
> @@ -203,6 +203,110 @@ route_table_reset(void)
>      }
>  }
>
> +static bool
> +route_table_add_nexthop(struct route_table_msg *change,
> +                        bool ipv4,
> +                        struct nlattr *nl_gw,
> +                        uint32_t ifindex)
> +{
> +    if (change->rd.n_nexthops >= MAX_ROUTE_DATA_NEXTHOP) {
> +        VLOG_DBG("tried to add more nexthops to a route than possible");
> +        return false;
> +    }
> +
> +    struct route_data_nexthop *nh = &change->rd.nexthops[
> +            change->rd.n_nexthops];
> +
> +    if (nl_gw) {
> +        if (ipv4) {
> +            ovs_be32 gw;
> +            gw = nl_attr_get_be32(nl_gw);
> +            in6_addr_set_mapped_ipv4(&nh->rta_gw, gw);
> +        } else {
> +            nh->rta_gw = nl_attr_get_in6_addr(nl_gw);
> +        }
> +    }
> +
> +    if (ifindex && !if_indextoname(ifindex, nh->ifname)) {
> +        int error = errno;
> +
> +        VLOG_DBG_RL(&rl, "Could not find interface name[%u]: %s",
> +                    ifindex, ovs_strerror(error));
> +        if (error == ENXIO) {
> +            change->relevant = false;
> +        } else {
> +            return false;
> +        }
> +    }
> +    change->rd.n_nexthops++;
> +    return true;
> +}
> +
> +
> +/* The following function uses the RTNH defines of rtnetlink.h
> + * As these rely on casts from char * to struct rtnexthop * clang will issue
> + * a warning about alignment even though the defines will ensure that the data
> + * is alligned. */
> +#ifdef __clang__
> +#pragma clang diagnostic push
> +#pragma clang diagnostic ignored "-Wcast-align"
> +#endif

This will not fly and needs to be reworked.

Open vSwitch has a very good netlink library, including alternatives
to the RTNH macros you have used here. Please have a look at how
parsing of nested attributes is handled elsewhere in the code base.

> +static bool
> +route_table_parse_multipath(struct route_table_msg *change,
> +                            struct nlattr *multipath,
> +                            bool ipv4)
> +{
> +    static const struct nl_policy policy[] = {
> +        [RTA_GATEWAY] = { .type = NL_A_U32, .optional = true },
> +    };
> +
> +    static const struct nl_policy policy6[] = {
> +        [RTA_GATEWAY] = { .type = NL_A_IPV6, .optional = true },
> +    };
> +
> +    struct nlattr *attrs[ARRAY_SIZE(policy)];
> +
> +    size_t len = RTA_PAYLOAD((struct rtattr *) multipath);
> +    struct rtnexthop *rtnh = RTA_DATA(multipath);
> +
> +    while (len >= sizeof(*rtnh) && len >= rtnh->rtnh_len &&
> +           change->rd.n_nexthops < MAX_ROUTE_DATA_NEXTHOP) {
> +
> +        struct ofpbuf buf;
> +        bool parsed;
> +
> +        ofpbuf_use_const(&buf, RTNH_DATA(rtnh),
> +                         rtnh->rtnh_len - sizeof(*rtnh));
> +        if (ipv4) {
> +            parsed = nl_policy_parse(&buf, 0, policy, attrs,
> +                                     ARRAY_SIZE(policy));
> +        } else {
> +            parsed = nl_policy_parse(&buf, 0, policy6, attrs,
> +                                     ARRAY_SIZE(policy6));
> +        }
> +
> +        if (!parsed) {
> +            VLOG_DBG_RL(&rl, "received unparseable rtnetlink route message");
> +            return false;
> +        }
> +
> +        if (!route_table_add_nexthop(change, ipv4, attrs[RTA_GATEWAY],
> +                                     rtnh->rtnh_ifindex)) {
> +            return false;
> +        }
> +
> +        len -= RTNH_ALIGN(rtnh->rtnh_len);
> +        rtnh = RTNH_NEXT(rtnh);
> +    }
> +
> +    return true;
> +}
> +
> +#ifdef __clang__
> +#pragma clang diagnostic pop
> +#endif
> +
>  /* Return RTNLGRP_IPV4_ROUTE or RTNLGRP_IPV6_ROUTE on success, 0 on parse
>   * error. */
>  int
> @@ -220,6 +324,7 @@ route_table_parse_ns(struct ofpbuf *buf, void *change_,
>          [RTA_PREFSRC] = { .type = NL_A_U32, .optional = true },
>          [RTA_TABLE] = { .type = NL_A_U32, .optional = true },
>          [RTA_PRIORITY] = { .type = NL_A_U32, .optional = true },
> +        [RTA_MULTIPATH] = { .type = NL_A_NESTED, .optional = true },
>      };
>
>      static const struct nl_policy policy6[] = {
> @@ -230,6 +335,7 @@ route_table_parse_ns(struct ofpbuf *buf, void *change_,
>          [RTA_PREFSRC] = { .type = NL_A_IPV6, .optional = true },
>          [RTA_TABLE] = { .type = NL_A_U32, .optional = true },
>          [RTA_PRIORITY] = { .type = NL_A_U32, .optional = true },
> +        [RTA_MULTIPATH] = { .type = NL_A_NESTED, .optional = true },
>      };
>
>      struct nlattr *attrs[ARRAY_SIZE(policy)];
> @@ -252,7 +358,6 @@ route_table_parse_ns(struct ofpbuf *buf, void *change_,
>      if (parsed) {
>          const struct nlmsghdr *nlmsg;
>          uint32_t table_id;
> -        int rta_oif;      /* Output interface index. */
>
>          nlmsg = buf->data;
>
> @@ -286,21 +391,6 @@ route_table_parse_ns(struct ofpbuf *buf, void *change_,
>          change->rd.local = rtm->rtm_type == RTN_LOCAL;
>          change->rd.plen = rtm->rtm_dst_len;
>          change->rd.rtm_protocol = rtm->rtm_protocol;
> -        if (attrs[RTA_OIF]) {
> -            rta_oif = nl_attr_get_u32(attrs[RTA_OIF]);
> -
> -            if (!if_indextoname(rta_oif, change->rd.ifname)) {
> -                int error = errno;
> -
> -                VLOG_DBG_RL(&rl, "Could not find interface name[%u]: %s",
> -                            rta_oif, ovs_strerror(error));
> -                if (error == ENXIO) {
> -                    change->relevant = false;
> -                } else {
> -                    return 0;
> -                }
> -            }
> -        }
>
>          if (attrs[RTA_DST]) {
>              if (ipv4) {
> @@ -323,21 +413,28 @@ route_table_parse_ns(struct ofpbuf *buf, void *change_,
>                      nl_attr_get_in6_addr(attrs[RTA_PREFSRC]);
>              }
>          }
> -        if (attrs[RTA_GATEWAY]) {
> -            if (ipv4) {
> -                ovs_be32 gw;
> -                gw = nl_attr_get_be32(attrs[RTA_GATEWAY]);
> -                in6_addr_set_mapped_ipv4(&change->rd.rta_gw, gw);
> -            } else {
> -                change->rd.rta_gw = nl_attr_get_in6_addr(attrs[RTA_GATEWAY]);
> -            }
> -        }
>          if (attrs[RTA_MARK]) {
>              change->rd.mark = nl_attr_get_u32(attrs[RTA_MARK]);
>          }
>          if (attrs[RTA_PRIORITY]) {
>              change->rd.rta_priority = nl_attr_get_u32(attrs[RTA_PRIORITY]);
>          }
> +
> +        if (attrs[RTA_GATEWAY] || attrs[RTA_OIF]) {
> +            uint32_t ifindex = 0;
> +            if (attrs[RTA_OIF]) {
> +                ifindex = nl_attr_get_u32(attrs[RTA_OIF]);
> +            }
> +            if (!route_table_add_nexthop(change, ipv4, attrs[RTA_GATEWAY],
> +                                         ifindex)) {
> +                return 0;
> +            }

In the act of collapsing the handling of RTA_GATEWAY and RTA_OIF you
are removing existing error handling, and I'm wondering if this is
necessary at all? Would you ever receive a message from the kernel
with RTA_GATEWAY, RTA_OIF and RTA_MULTIPATH set at the same time?

A less intrusive change might be to leave the existing handling as is,
and handle the RTA_MULTIPATH attribute if you get one.

Looking at the iproute2 source code [0], there is no branching between
these attributes, they are just handled in sequence.

0: https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/tree/ip/iproute.c#n1005


> +        } else if (attrs[RTA_MULTIPATH]) {
> +            if (!route_table_parse_multipath(change,
> +                                             attrs[RTA_MULTIPATH], ipv4)) {
> +                return 0;
> +            }
> +        }
>      } else {
>          VLOG_DBG_RL(&rl, "received unparseable rtnetlink route message");
>          return 0;
> @@ -364,8 +461,8 @@ route_table_handle_msg(const struct route_table_msg *change,
>          const struct route_data *rd = &change->rd;
>
>          ovs_router_insert(rd->mark, &rd->rta_dst, rd->rtm_dst_len,
> -                          rd->local, rd->ifname, &rd->rta_gw,
> -                          &rd->rta_prefsrc);
> +                          rd->local, rd->nexthops[0].ifname,
> +                          &rd->nexthops[0].rta_gw, &rd->rta_prefsrc);
>      }
>  }
>
> diff --git a/lib/route-table.h b/lib/route-table.h
> index 1e9f51a98..5a0e317df 100644
> --- a/lib/route-table.h
> +++ b/lib/route-table.h
> @@ -26,6 +26,13 @@
>
>  #include "openvswitch/ofpbuf.h"
>
> +#define MAX_ROUTE_DATA_NEXTHOP 8

Do you have a reference for the choice of this number?

> +
> +struct route_data_nexthop {
> +    struct in6_addr rta_gw;
> +    char ifname[IFNAMSIZ]; /* Interface name. */
> +};
> +
>  struct route_data {
>      /* Copied from struct rtmsg. */
>      unsigned char rtm_dst_len;
> @@ -34,13 +41,14 @@ struct route_data {
>      /* Extracted from Netlink attributes. */
>      struct in6_addr rta_dst; /* 0 if missing. */
>      struct in6_addr rta_prefsrc; /* 0 if missing. */
> -    struct in6_addr rta_gw;
> -    char ifname[IFNAMSIZ]; /* Interface name. */

Now that this header is exposed, technically this is an API change,
since this used to be a private to route-table.c struct, I guess
changing this is fine.

However, it would be better if you reordered the patches so that this
change was made prior to exposing the interface. That way you would
avoid having a breaking API change in the git history.

--
Frode Nordahl

>      uint32_t mark;
>      uint32_t rta_table_id; /* 0 if missing. */
>      unsigned char plen;
>      unsigned char rtm_protocol;
>      uint32_t rta_priority;
> +
> +    size_t n_nexthops;
> +    struct route_data_nexthop nexthops[MAX_ROUTE_DATA_NEXTHOP];
>  };
>
>  /* A digested version of a route message sent down by the kernel to indicate
> --
> 2.47.0
>
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
diff mbox series

Patch

diff --git a/lib/route-table.c b/lib/route-table.c
index a9a9c6b85..06e391184 100644
--- a/lib/route-table.c
+++ b/lib/route-table.c
@@ -203,6 +203,110 @@  route_table_reset(void)
     }
 }
 
+static bool
+route_table_add_nexthop(struct route_table_msg *change,
+                        bool ipv4,
+                        struct nlattr *nl_gw,
+                        uint32_t ifindex)
+{
+    if (change->rd.n_nexthops >= MAX_ROUTE_DATA_NEXTHOP) {
+        VLOG_DBG("tried to add more nexthops to a route than possible");
+        return false;
+    }
+
+    struct route_data_nexthop *nh = &change->rd.nexthops[
+            change->rd.n_nexthops];
+
+    if (nl_gw) {
+        if (ipv4) {
+            ovs_be32 gw;
+            gw = nl_attr_get_be32(nl_gw);
+            in6_addr_set_mapped_ipv4(&nh->rta_gw, gw);
+        } else {
+            nh->rta_gw = nl_attr_get_in6_addr(nl_gw);
+        }
+    }
+
+    if (ifindex && !if_indextoname(ifindex, nh->ifname)) {
+        int error = errno;
+
+        VLOG_DBG_RL(&rl, "Could not find interface name[%u]: %s",
+                    ifindex, ovs_strerror(error));
+        if (error == ENXIO) {
+            change->relevant = false;
+        } else {
+            return false;
+        }
+    }
+    change->rd.n_nexthops++;
+    return true;
+}
+
+
+/* The following function uses the RTNH defines of rtnetlink.h
+ * As these rely on casts from char * to struct rtnexthop * clang will issue
+ * a warning about alignment even though the defines will ensure that the data
+ * is alligned. */
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wcast-align"
+#endif
+
+static bool
+route_table_parse_multipath(struct route_table_msg *change,
+                            struct nlattr *multipath,
+                            bool ipv4)
+{
+    static const struct nl_policy policy[] = {
+        [RTA_GATEWAY] = { .type = NL_A_U32, .optional = true },
+    };
+
+    static const struct nl_policy policy6[] = {
+        [RTA_GATEWAY] = { .type = NL_A_IPV6, .optional = true },
+    };
+
+    struct nlattr *attrs[ARRAY_SIZE(policy)];
+
+    size_t len = RTA_PAYLOAD((struct rtattr *) multipath);
+    struct rtnexthop *rtnh = RTA_DATA(multipath);
+
+    while (len >= sizeof(*rtnh) && len >= rtnh->rtnh_len &&
+           change->rd.n_nexthops < MAX_ROUTE_DATA_NEXTHOP) {
+
+        struct ofpbuf buf;
+        bool parsed;
+
+        ofpbuf_use_const(&buf, RTNH_DATA(rtnh),
+                         rtnh->rtnh_len - sizeof(*rtnh));
+        if (ipv4) {
+            parsed = nl_policy_parse(&buf, 0, policy, attrs,
+                                     ARRAY_SIZE(policy));
+        } else {
+            parsed = nl_policy_parse(&buf, 0, policy6, attrs,
+                                     ARRAY_SIZE(policy6));
+        }
+
+        if (!parsed) {
+            VLOG_DBG_RL(&rl, "received unparseable rtnetlink route message");
+            return false;
+        }
+
+        if (!route_table_add_nexthop(change, ipv4, attrs[RTA_GATEWAY],
+                                     rtnh->rtnh_ifindex)) {
+            return false;
+        }
+
+        len -= RTNH_ALIGN(rtnh->rtnh_len);
+        rtnh = RTNH_NEXT(rtnh);
+    }
+
+    return true;
+}
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
 /* Return RTNLGRP_IPV4_ROUTE or RTNLGRP_IPV6_ROUTE on success, 0 on parse
  * error. */
 int
@@ -220,6 +324,7 @@  route_table_parse_ns(struct ofpbuf *buf, void *change_,
         [RTA_PREFSRC] = { .type = NL_A_U32, .optional = true },
         [RTA_TABLE] = { .type = NL_A_U32, .optional = true },
         [RTA_PRIORITY] = { .type = NL_A_U32, .optional = true },
+        [RTA_MULTIPATH] = { .type = NL_A_NESTED, .optional = true },
     };
 
     static const struct nl_policy policy6[] = {
@@ -230,6 +335,7 @@  route_table_parse_ns(struct ofpbuf *buf, void *change_,
         [RTA_PREFSRC] = { .type = NL_A_IPV6, .optional = true },
         [RTA_TABLE] = { .type = NL_A_U32, .optional = true },
         [RTA_PRIORITY] = { .type = NL_A_U32, .optional = true },
+        [RTA_MULTIPATH] = { .type = NL_A_NESTED, .optional = true },
     };
 
     struct nlattr *attrs[ARRAY_SIZE(policy)];
@@ -252,7 +358,6 @@  route_table_parse_ns(struct ofpbuf *buf, void *change_,
     if (parsed) {
         const struct nlmsghdr *nlmsg;
         uint32_t table_id;
-        int rta_oif;      /* Output interface index. */
 
         nlmsg = buf->data;
 
@@ -286,21 +391,6 @@  route_table_parse_ns(struct ofpbuf *buf, void *change_,
         change->rd.local = rtm->rtm_type == RTN_LOCAL;
         change->rd.plen = rtm->rtm_dst_len;
         change->rd.rtm_protocol = rtm->rtm_protocol;
-        if (attrs[RTA_OIF]) {
-            rta_oif = nl_attr_get_u32(attrs[RTA_OIF]);
-
-            if (!if_indextoname(rta_oif, change->rd.ifname)) {
-                int error = errno;
-
-                VLOG_DBG_RL(&rl, "Could not find interface name[%u]: %s",
-                            rta_oif, ovs_strerror(error));
-                if (error == ENXIO) {
-                    change->relevant = false;
-                } else {
-                    return 0;
-                }
-            }
-        }
 
         if (attrs[RTA_DST]) {
             if (ipv4) {
@@ -323,21 +413,28 @@  route_table_parse_ns(struct ofpbuf *buf, void *change_,
                     nl_attr_get_in6_addr(attrs[RTA_PREFSRC]);
             }
         }
-        if (attrs[RTA_GATEWAY]) {
-            if (ipv4) {
-                ovs_be32 gw;
-                gw = nl_attr_get_be32(attrs[RTA_GATEWAY]);
-                in6_addr_set_mapped_ipv4(&change->rd.rta_gw, gw);
-            } else {
-                change->rd.rta_gw = nl_attr_get_in6_addr(attrs[RTA_GATEWAY]);
-            }
-        }
         if (attrs[RTA_MARK]) {
             change->rd.mark = nl_attr_get_u32(attrs[RTA_MARK]);
         }
         if (attrs[RTA_PRIORITY]) {
             change->rd.rta_priority = nl_attr_get_u32(attrs[RTA_PRIORITY]);
         }
+
+        if (attrs[RTA_GATEWAY] || attrs[RTA_OIF]) {
+            uint32_t ifindex = 0;
+            if (attrs[RTA_OIF]) {
+                ifindex = nl_attr_get_u32(attrs[RTA_OIF]);
+            }
+            if (!route_table_add_nexthop(change, ipv4, attrs[RTA_GATEWAY],
+                                         ifindex)) {
+                return 0;
+            }
+        } else if (attrs[RTA_MULTIPATH]) {
+            if (!route_table_parse_multipath(change,
+                                             attrs[RTA_MULTIPATH], ipv4)) {
+                return 0;
+            }
+        }
     } else {
         VLOG_DBG_RL(&rl, "received unparseable rtnetlink route message");
         return 0;
@@ -364,8 +461,8 @@  route_table_handle_msg(const struct route_table_msg *change,
         const struct route_data *rd = &change->rd;
 
         ovs_router_insert(rd->mark, &rd->rta_dst, rd->rtm_dst_len,
-                          rd->local, rd->ifname, &rd->rta_gw,
-                          &rd->rta_prefsrc);
+                          rd->local, rd->nexthops[0].ifname,
+                          &rd->nexthops[0].rta_gw, &rd->rta_prefsrc);
     }
 }
 
diff --git a/lib/route-table.h b/lib/route-table.h
index 1e9f51a98..5a0e317df 100644
--- a/lib/route-table.h
+++ b/lib/route-table.h
@@ -26,6 +26,13 @@ 
 
 #include "openvswitch/ofpbuf.h"
 
+#define MAX_ROUTE_DATA_NEXTHOP 8
+
+struct route_data_nexthop {
+    struct in6_addr rta_gw;
+    char ifname[IFNAMSIZ]; /* Interface name. */
+};
+
 struct route_data {
     /* Copied from struct rtmsg. */
     unsigned char rtm_dst_len;
@@ -34,13 +41,14 @@  struct route_data {
     /* Extracted from Netlink attributes. */
     struct in6_addr rta_dst; /* 0 if missing. */
     struct in6_addr rta_prefsrc; /* 0 if missing. */
-    struct in6_addr rta_gw;
-    char ifname[IFNAMSIZ]; /* Interface name. */
     uint32_t mark;
     uint32_t rta_table_id; /* 0 if missing. */
     unsigned char plen;
     unsigned char rtm_protocol;
     uint32_t rta_priority;
+
+    size_t n_nexthops;
+    struct route_data_nexthop nexthops[MAX_ROUTE_DATA_NEXTHOP];
 };
 
 /* A digested version of a route message sent down by the kernel to indicate