diff mbox series

[ovs-dev,2/8] ovs-router: Add infrastructure for multi-table routing.

Message ID 20250619125016.2660985-3-dchumak@nvidia.com
State Changes Requested
Delegated to: Ilya Maximets
Headers show
Series ovs-router: Multi-table routing infrastructure. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed

Commit Message

Dima Chumak June 19, 2025, 12:50 p.m. UTC
For route lookup based on the destination address a single routing table
is sufficient. For a more advanced routing when a lookup needs to take
into consideration other parameters, such as a source address, a
multi-table lookup is needed.

This change introduces infrastructure for using multiple routing
tables that can be added dynamically, as a pre-step towards importing
non-default routing tables from kernel.

Signed-off-by: Dima Chumak <dchumak@nvidia.com>
---
 lib/netdev-dummy.c |   6 +--
 lib/ovs-router.c   | 131 +++++++++++++++++++++++++++++++++++----------
 lib/ovs-router.h   |  20 ++++++-
 lib/route-table.c  |   2 +-
 4 files changed, 124 insertions(+), 35 deletions(-)
diff mbox series

Patch

diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index 7d5abb87e33b..0c51e2ea22c1 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -2238,8 +2238,8 @@  netdev_dummy_ip4addr(struct unixctl_conn *conn, int argc OVS_UNUSED,
 
             /* Insert local route entry for the new address. */
             in6_addr_set_mapped_ipv4(&ip6, ip.s_addr);
-            ovs_router_force_insert(0, &ip6, plen + 96, true, argv[1],
-                                    &in6addr_any, &ip6);
+            ovs_router_force_insert(CLS_DEFAULT, 0, &ip6, plen + 96, true,
+                                    argv[1], &in6addr_any, &ip6);
 
             unixctl_command_reply(conn, "OK");
         } else {
@@ -2272,7 +2272,7 @@  netdev_dummy_ip6addr(struct unixctl_conn *conn, int argc OVS_UNUSED,
             netdev_dummy_add_in6(netdev, &ip6, &mask);
 
             /* Insert local route entry for the new address. */
-            ovs_router_force_insert(0, &ip6, plen, true, argv[1],
+            ovs_router_force_insert(CLS_DEFAULT, 0, &ip6, plen, true, argv[1],
                                     &in6addr_any, &ip6);
 
             unixctl_command_reply(conn, "OK");
diff --git a/lib/ovs-router.c b/lib/ovs-router.c
index 2827a6e43574..6b47e7c533e5 100644
--- a/lib/ovs-router.c
+++ b/lib/ovs-router.c
@@ -50,10 +50,17 @@ 
 
 VLOG_DEFINE_THIS_MODULE(ovs_router);
 
+struct clsmap_node {
+    struct hmap_node hash_node;
+    struct classifier cls;
+    uint32_t table;
+};
+
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 
 static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
-static struct classifier cls;
+static struct hmap clsmap = HMAP_INITIALIZER(&clsmap);
+static struct classifier default_cls;
 
 /* By default, use the system routing table.  For system-independent testing,
  * the unit tests disable using the system routing table. */
@@ -71,6 +78,59 @@  struct ovs_router_entry {
     uint32_t mark;
 };
 
+static void
+rt_entry_delete__(const struct cls_rule *cr, struct classifier *cls);
+
+static struct classifier *
+cls_find(uint32_t table)
+{
+    struct clsmap_node *node;
+
+    if (ovs_router_is_standard_table_id(table)) {
+        return &default_cls;
+    }
+
+    ovs_mutex_lock(&mutex);
+    HMAP_FOR_EACH_IN_BUCKET (node, hash_node, hash_int(table, 0), &clsmap) {
+        if (node->table == table) {
+            ovs_mutex_unlock(&mutex);
+            return &node->cls;
+        }
+    }
+    ovs_mutex_unlock(&mutex);
+
+    return NULL;
+}
+
+static struct classifier *
+cls_create(uint32_t table)
+{
+    struct clsmap_node *node;
+
+    node = xmalloc(sizeof *node);
+    classifier_init(&node->cls, NULL);
+    node->table = table;
+    ovs_mutex_lock(&mutex);
+    hmap_insert(&clsmap, &node->hash_node, hash_int(table, 0));
+    ovs_mutex_unlock(&mutex);
+
+    return &node->cls;
+}
+
+static void
+cls_destroy(struct classifier *cls)
+{
+    struct ovs_router_entry *rt;
+
+    classifier_defer(cls);
+    CLS_FOR_EACH (rt, cr, cls) {
+        if (rt->priority == rt->plen || rt->local) {
+            rt_entry_delete__(&rt->cr, cls);
+        }
+    }
+    classifier_publish(cls);
+}
+
 static struct ovs_router_entry *
 ovs_router_entry_cast(const struct cls_rule *cr)
 {
@@ -117,8 +177,8 @@  ovs_router_lookup(uint32_t mark, const struct in6_addr *ip6_dst,
         const struct cls_rule *cr_src;
         struct flow flow_src = {.ipv6_dst = *src, .pkt_mark = mark};
 
-        cr_src = classifier_lookup(&cls, OVS_VERSION_MAX, &flow_src, NULL,
-                                   NULL);
+        cr_src = classifier_lookup(&default_cls, OVS_VERSION_MAX, &flow_src,
+                                   NULL, NULL);
         if (cr_src) {
             struct ovs_router_entry *p_src = ovs_router_entry_cast(cr_src);
             if (!p_src->local) {
@@ -129,7 +189,7 @@  ovs_router_lookup(uint32_t mark, const struct in6_addr *ip6_dst,
         }
     }
 
-    cr = classifier_lookup(&cls, OVS_VERSION_MAX, &flow, NULL, NULL);
+    cr = classifier_lookup(&default_cls, OVS_VERSION_MAX, &flow, NULL, NULL);
     if (cr) {
         struct ovs_router_entry *p = ovs_router_entry_cast(cr);
 
@@ -257,8 +317,8 @@  out:
 }
 
 static int
-ovs_router_insert__(uint32_t mark, uint8_t priority, bool local,
-                    const struct in6_addr *ip6_dst,
+ovs_router_insert__(uint32_t table, uint32_t mark, uint8_t priority,
+                    bool local, const struct in6_addr *ip6_dst,
                     uint8_t plen, const char output_netdev[],
                     const struct in6_addr *gw,
                     const struct in6_addr *ip6_src)
@@ -268,6 +328,7 @@  ovs_router_insert__(uint32_t mark, uint8_t priority, bool local,
                         struct in6_addr *prefsrc);
     const struct cls_rule *cr;
     struct ovs_router_entry *p;
+    struct classifier *cls;
     struct match match;
     int err;
 
@@ -307,8 +368,12 @@  ovs_router_insert__(uint32_t mark, uint8_t priority, bool local,
     /* Longest prefix matches first. */
     cls_rule_init(&p->cr, &match, priority);
 
+    cls = cls_find(table);
+    if (!cls) {
+        cls = cls_create(table);
+    }
     ovs_mutex_lock(&mutex);
-    cr = classifier_replace(&cls, &p->cr, OVS_VERSION_MIN, NULL, 0);
+    cr = classifier_replace(cls, &p->cr, OVS_VERSION_MIN, NULL, 0);
     ovs_mutex_unlock(&mutex);
 
     if (cr) {
@@ -321,13 +386,13 @@  ovs_router_insert__(uint32_t mark, uint8_t priority, bool local,
 }
 
 void
-ovs_router_insert(uint32_t mark, const struct in6_addr *ip_dst, uint8_t plen,
-                  bool local, const char output_netdev[],
+ovs_router_insert(uint32_t table, uint32_t mark, const struct in6_addr *ip_dst,
+                  uint8_t plen, bool local, const char output_netdev[],
                   const struct in6_addr *gw, const struct in6_addr *prefsrc)
 {
     if (use_system_routing_table) {
         uint8_t priority = local ? plen + 64 : plen;
-        ovs_router_insert__(mark, priority, local, ip_dst, plen,
+        ovs_router_insert__(table, mark, priority, local, ip_dst, plen,
                             output_netdev, gw, prefsrc);
     }
 }
@@ -335,24 +400,25 @@  ovs_router_insert(uint32_t mark, const struct in6_addr *ip_dst, uint8_t plen,
 /* The same as 'ovs_router_insert', but it adds the route even if updates
  * from the system routing table are disabled.  Used for unit tests. */
 void
-ovs_router_force_insert(uint32_t mark, const struct in6_addr *ip_dst,
+ovs_router_force_insert(uint32_t table, uint32_t mark,
+                        const struct in6_addr *ip_dst,
                         uint8_t plen, bool local, const char output_netdev[],
                         const struct in6_addr *gw,
                         const struct in6_addr *prefsrc)
 {
     uint8_t priority = local ? plen + 64 : plen;
 
-    ovs_router_insert__(mark, priority, local, ip_dst, plen,
+    ovs_router_insert__(table, mark, priority, local, ip_dst, plen,
                         output_netdev, gw, prefsrc);
 }
 
 static void
-rt_entry_delete__(const struct cls_rule *cr)
+rt_entry_delete__(const struct cls_rule *cr, struct classifier *cls)
 {
     struct ovs_router_entry *p = ovs_router_entry_cast(cr);
 
     tnl_port_map_delete_ipdev(p->output_netdev);
-    classifier_remove_assert(&cls, cr);
+    classifier_remove_assert(cls, cr);
     ovsrcu_postpone(rt_entry_free, ovs_router_entry_cast(cr));
 }
 
@@ -370,10 +436,10 @@  rt_entry_delete(uint32_t mark, uint8_t priority,
     cls_rule_init(&rule, &match, priority);
 
     /* Find the exact rule. */
-    cr = classifier_find_rule_exactly(&cls, &rule, OVS_VERSION_MAX);
+    cr = classifier_find_rule_exactly(&default_cls, &rule, OVS_VERSION_MAX);
     if (cr) {
         ovs_mutex_lock(&mutex);
-        rt_entry_delete__(cr);
+        rt_entry_delete__(cr, &default_cls);
         ovs_mutex_unlock(&mutex);
 
         res = true;
@@ -469,8 +535,8 @@  ovs_router_add(struct unixctl_conn *conn, int argc,
         in6_addr_set_mapped_ipv4(&src6, src);
     }
 
-    err = ovs_router_insert__(mark, plen + 32, false, &ip6, plen, argv[2],
-                              &gw6, &src6);
+    err = ovs_router_insert__(CLS_DEFAULT, mark, plen + 32, false, &ip6, plen,
+                              argv[2], &gw6, &src6);
     if (err) {
         unixctl_command_reply_error(conn, "Error while inserting route.");
     } else {
@@ -512,7 +578,7 @@  ovs_router_del(struct unixctl_conn *conn, int argc OVS_UNUSED,
 static void
 ovs_router_show_json(struct json **routes)
 {
-    int n_rules = classifier_count(&cls);
+    int n_rules = classifier_count(&default_cls);
     struct json **json_entries = NULL;
     struct ovs_router_entry *rt;
     struct ds ds;
@@ -525,7 +591,7 @@  ovs_router_show_json(struct json **routes)
     json_entries = xmalloc(n_rules * sizeof *json_entries);
     ds_init(&ds);
 
-    CLS_FOR_EACH (rt, cr, &cls) {
+    CLS_FOR_EACH (rt, cr, &default_cls) {
         bool user = rt->priority != rt->plen && !rt->local;
         uint8_t plen = rt->plen;
         struct json *json, *nh;
@@ -581,7 +647,7 @@  ovs_router_show_text(struct ds *ds)
     struct ovs_router_entry *rt;
 
     ds_put_format(ds, "Route Table:\n");
-    CLS_FOR_EACH (rt, cr, &cls) {
+    CLS_FOR_EACH (rt, cr, &default_cls) {
         uint8_t plen;
         if (rt->priority == rt->plen || rt->local) {
             ds_put_format(ds, "Cached: ");
@@ -671,16 +737,15 @@  ovs_router_lookup_cmd(struct unixctl_conn *conn, int argc,
 void
 ovs_router_flush(void)
 {
-    struct ovs_router_entry *rt;
+    struct clsmap_node *node;
 
     ovs_mutex_lock(&mutex);
-    classifier_defer(&cls);
-    CLS_FOR_EACH(rt, cr, &cls) {
-        if (rt->priority == rt->plen || rt->local) {
-            rt_entry_delete__(&rt->cr);
-        }
+    HMAP_FOR_EACH_POP (node, hash_node, &clsmap) {
+        cls_destroy(&node->cls);
+        classifier_destroy(&node->cls);
+        free(node);
     }
-    classifier_publish(&cls);
+    cls_destroy(&default_cls);
     ovs_mutex_unlock(&mutex);
     seq_change(tnl_conf_seq);
 }
@@ -689,6 +754,11 @@  static void
 ovs_router_flush_handler(void *aux OVS_UNUSED)
 {
     ovs_router_flush();
+
+    ovs_mutex_lock(&mutex);
+    hmap_destroy(&clsmap);
+    hmap_init(&clsmap);
+    ovs_mutex_unlock(&mutex);
 }
 
 void
@@ -697,8 +767,11 @@  ovs_router_init(void)
     static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
 
     if (ovsthread_once_start(&once)) {
+        ovs_mutex_lock(&mutex);
+        hmap_init(&clsmap);
+        classifier_init(&default_cls, NULL);
+        ovs_mutex_unlock(&mutex);
         fatal_signal_add_hook(ovs_router_flush_handler, NULL, NULL, true);
-        classifier_init(&cls, NULL);
         unixctl_command_register("ovs/route/add",
                                  "ip/plen dev [gw] "
                                  "[pkt_mark=mark] [src=src_ip]",
diff --git a/lib/ovs-router.h b/lib/ovs-router.h
index c2a92324b6da..256ac1d420ab 100644
--- a/lib/ovs-router.h
+++ b/lib/ovs-router.h
@@ -29,6 +29,15 @@ 
 extern "C" {
 #endif
 
+enum {
+#ifdef HAVE_NETLINK
+    CLS_DEFAULT = RT_TABLE_MAIN,
+#else
+    CLS_DEFAULT = 254, /* RT_TABLE_MAIN */
+#endif
+    CLS_ALL = UINT_MAX,
+};
+
 #ifdef HAVE_NETLINK
 static inline bool ovs_router_is_standard_table_id(uint32_t table_id)
 {
@@ -37,17 +46,24 @@  static inline bool ovs_router_is_standard_table_id(uint32_t table_id)
            || table_id == RT_TABLE_MAIN
            || table_id == RT_TABLE_LOCAL;
 }
+#else
+static inline bool ovs_router_is_standard_table_id(uint32_t table_id)
+{
+    return !table_id || table_id == CLS_DEFAULT;
+}
 #endif
 
 bool ovs_router_lookup(uint32_t mark, const struct in6_addr *ip_dst,
                        char output_netdev[],
                        struct in6_addr *src, struct in6_addr *gw);
 void ovs_router_init(void);
-void ovs_router_insert(uint32_t mark, const struct in6_addr *ip_dst,
+void ovs_router_insert(uint32_t table, uint32_t mark,
+                       const struct in6_addr *ip_dst,
                        uint8_t plen, bool local,
                        const char output_netdev[], const struct in6_addr *gw,
                        const struct in6_addr *prefsrc);
-void ovs_router_force_insert(uint32_t mark, const struct in6_addr *ip_dst,
+void ovs_router_force_insert(uint32_t table, uint32_t mark,
+                             const struct in6_addr *ip_dst,
                              uint8_t plen, bool local,
                              const char output_netdev[],
                              const struct in6_addr *gw,
diff --git a/lib/route-table.c b/lib/route-table.c
index 23e43bd02541..a6117da9cc1b 100644
--- a/lib/route-table.c
+++ b/lib/route-table.c
@@ -547,7 +547,7 @@  route_table_handle_msg(const struct route_table_msg *change,
         rdnh = CONTAINER_OF(ovs_list_front(&change->rd.nexthops),
                             const struct route_data_nexthop, nexthop_node);
 
-        ovs_router_insert(rd->rta_mark, &rd->rta_dst,
+        ovs_router_insert(CLS_DEFAULT, rd->rta_mark, &rd->rta_dst,
                           IN6_IS_ADDR_V4MAPPED(&rd->rta_dst)
                           ? rd->rtm_dst_len + 96 : rd->rtm_dst_len,
                           rd->rtn_local, rdnh->ifname, &rdnh->addr,