diff mbox series

[ovs-dev,8/8] router-table: Add back-off to periodic router reset.

Message ID 20250619125016.2660985-9-dchumak@nvidia.com
State Changes Requested
Delegated to: Ilya Maximets
Headers show
Series ovs-router: Multi-table routing infrastructure. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/cirrus-robot success cirrus build: passed
ovsrobot/github-robot-_Build_and_Test success github build: passed

Commit Message

Dima Chumak June 19, 2025, 12:50 p.m. UTC
OVS periodically polls netlink socket for route and rule updates from
the kernel, and it will trigger full router table reset upon any
relevant change. In the event of a high volume notification updates,
combined with a big overall number of routes and rules, the control
thread may start getting starved spending most of the cycles parsing
router updates:

  wakeup due to [POLLIN] on fd 91 (NETLINK_ROUTE<->NETLINK_ROUTE) at lib/netlink-socket.c:1418 (96% CPU usage)
  wakeup due to [POLLIN] on fd 96 (FIFO pipe:[140158448]) at lib/ovs-rcu.c:259 (96% CPU usage)
  wakeup due to [POLLIN] on fd 96 (FIFO pipe:[140158448]) at lib/ovs-rcu.c:259 (99% CPU usage)
  wakeup due to [POLLIN] on fd 14 (<->/var/run/openvswitch/db.sock) at lib/stream-fd.c:157 (99% CPU usage)
  wakeup due to [POLLIN] on fd 96 (FIFO pipe:[140158448]) at vswitchd/bridge.c:431 (99% CPU usage)
  wakeup due to 105-ms timeout at vswitchd/bridge.c:3195 (99% CPU usage)

Such behavior was triggered for example when adding 500 rules with a
table lookup action, where each rule referenced a unique table. And each
table had 100 routes. In total it amounted in 50k routes in the custom
tables.

To optimize CPU usage under such conditions a back-off mechanism is used
before doing router reset, to reduce the frequency of a full dump of
router tables and rules. The back-off mechanism is dynamic in a sense
that it takes into consideration the duration of the last router reset
operation so the back-off delay is also increasing with a higher volume
of total routes and rules.

Signed-off-by: Dima Chumak <dchumak@nvidia.com>
---
 lib/route-table.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)
diff mbox series

Patch

diff --git a/lib/route-table.c b/lib/route-table.c
index 3b12e7fa92ec..7b32d5de3c9e 100644
--- a/lib/route-table.c
+++ b/lib/route-table.c
@@ -45,6 +45,9 @@ 
  * old headers.  (We can't test for it with #ifdef because it's an enum.) */
 #define RTA_MARK 16
 
+#define ROUTE_TABLE_RESET_BACKOFF_MIN 1
+#define ROUTE_TABLE_RESET_BACKOFF_MAX 32
+
 VLOG_DEFINE_THIS_MODULE(route_table);
 
 COVERAGE_DEFINE(route_table_dump);
@@ -66,6 +69,7 @@  static struct nln_notifier *name_notifier = NULL;
 
 static bool route_table_valid = false;
 static bool rules_valid = false;
+static long long route_table_reset_last_ms;
 
 static int route_nln_parse(struct ofpbuf *, void *change);
 
@@ -119,6 +123,8 @@  route_table_init(void)
     ovs_assert(!rule_notifier);
     ovs_assert(!rule6_notifier);
 
+    route_table_reset_last_ms = time_msec();
+
     ovs_router_init();
     nln = nln_create(NETLINK_ROUTE, route_nln_parse, &nln_rtmsg_change);
 
@@ -147,15 +153,51 @@  void
 route_table_run(void)
     OVS_EXCLUDED(route_table_mutex)
 {
+    static uint64_t backoff = ROUTE_TABLE_RESET_BACKOFF_MIN;
+    static long long last_reset_duration_ms = 1;
+    static long long last_backoff_ms;
+
     ovs_mutex_lock(&route_table_mutex);
     if (nln) {
+        long long prev_reset_duration_ms = last_reset_duration_ms;
+        long long ms_since_backoff;
+        long long ms_since_reset;
+
         rtnetlink_run();
         nln_run(nln);
 
+        ms_since_reset = time_msec() - route_table_reset_last_ms;
         if (!route_table_valid || !rules_valid) {
+            struct timeval start, end;
+
+            if (ms_since_reset < backoff * last_reset_duration_ms) {
+                goto out;
+            }
+
+            if (ms_since_reset < 2 * backoff * last_reset_duration_ms) {
+                if (backoff < ROUTE_TABLE_RESET_BACKOFF_MAX) {
+                    last_backoff_ms = time_msec();
+                    backoff <<= 1;
+                }
+            }
+
+            xgettimeofday(&start);
             route_table_reset();
+            xgettimeofday(&end);
+
+            last_reset_duration_ms =
+                timeval_to_msec(&end) - timeval_to_msec(&start);
+        }
+
+        ms_since_backoff = time_msec() - last_backoff_ms ;
+        if (ms_since_backoff > 2 * backoff * prev_reset_duration_ms) {
+            if (backoff > ROUTE_TABLE_RESET_BACKOFF_MIN) {
+                last_backoff_ms = time_msec();
+                backoff >>= 1;
+            }
         }
     }
+out:
     ovs_mutex_unlock(&route_table_mutex);
 }
 
@@ -276,6 +318,7 @@  route_table_reset(void)
         }
     }
     rules_dump();
+    route_table_reset_last_ms = time_msec();
 }
 
 static void