@@ -45,6 +45,9 @@
* old headers. (We can't test for it with #ifdef because it's an enum.) */
#define RTA_MARK 16
+#define ROUTE_TABLE_RESET_BACKOFF_MIN 1
+#define ROUTE_TABLE_RESET_BACKOFF_MAX 32
+
VLOG_DEFINE_THIS_MODULE(route_table);
COVERAGE_DEFINE(route_table_dump);
@@ -66,6 +69,7 @@ static struct nln_notifier *name_notifier = NULL;
static bool route_table_valid = false;
static bool rules_valid = false;
+static long long route_table_reset_last_ms;
static int route_nln_parse(struct ofpbuf *, void *change);
@@ -119,6 +123,8 @@ route_table_init(void)
ovs_assert(!rule_notifier);
ovs_assert(!rule6_notifier);
+ route_table_reset_last_ms = time_msec();
+
ovs_router_init();
nln = nln_create(NETLINK_ROUTE, route_nln_parse, &nln_rtmsg_change);
@@ -147,15 +153,51 @@ void
route_table_run(void)
OVS_EXCLUDED(route_table_mutex)
{
+ static uint64_t backoff = ROUTE_TABLE_RESET_BACKOFF_MIN;
+ static long long last_reset_duration_ms = 1;
+ static long long last_backoff_ms;
+
ovs_mutex_lock(&route_table_mutex);
if (nln) {
+ long long prev_reset_duration_ms = last_reset_duration_ms;
+ long long ms_since_backoff;
+ long long ms_since_reset;
+
rtnetlink_run();
nln_run(nln);
+ ms_since_reset = time_msec() - route_table_reset_last_ms;
if (!route_table_valid || !rules_valid) {
+ struct timeval start, end;
+
+ if (ms_since_reset < backoff * last_reset_duration_ms) {
+ goto out;
+ }
+
+ if (ms_since_reset < 2 * backoff * last_reset_duration_ms) {
+ if (backoff < ROUTE_TABLE_RESET_BACKOFF_MAX) {
+ last_backoff_ms = time_msec();
+ backoff <<= 1;
+ }
+ }
+
+ xgettimeofday(&start);
route_table_reset();
+ xgettimeofday(&end);
+
+ last_reset_duration_ms =
+ timeval_to_msec(&end) - timeval_to_msec(&start);
+ }
+
+ ms_since_backoff = time_msec() - last_backoff_ms ;
+ if (ms_since_backoff > 2 * backoff * prev_reset_duration_ms) {
+ if (backoff > ROUTE_TABLE_RESET_BACKOFF_MIN) {
+ last_backoff_ms = time_msec();
+ backoff >>= 1;
+ }
}
}
+out:
ovs_mutex_unlock(&route_table_mutex);
}
@@ -276,6 +318,7 @@ route_table_reset(void)
}
}
rules_dump();
+ route_table_reset_last_ms = time_msec();
}
static void
OVS periodically polls netlink socket for route and rule updates from the kernel, and it will trigger full router table reset upon any relevant change. In the event of a high volume notification updates, combined with a big overall number of routes and rules, the control thread may start getting starved spending most of the cycles parsing router updates: wakeup due to [POLLIN] on fd 91 (NETLINK_ROUTE<->NETLINK_ROUTE) at lib/netlink-socket.c:1418 (96% CPU usage) wakeup due to [POLLIN] on fd 96 (FIFO pipe:[140158448]) at lib/ovs-rcu.c:259 (96% CPU usage) wakeup due to [POLLIN] on fd 96 (FIFO pipe:[140158448]) at lib/ovs-rcu.c:259 (99% CPU usage) wakeup due to [POLLIN] on fd 14 (<->/var/run/openvswitch/db.sock) at lib/stream-fd.c:157 (99% CPU usage) wakeup due to [POLLIN] on fd 96 (FIFO pipe:[140158448]) at vswitchd/bridge.c:431 (99% CPU usage) wakeup due to 105-ms timeout at vswitchd/bridge.c:3195 (99% CPU usage) Such behavior was triggered for example when adding 500 rules with a table lookup action, where each rule referenced a unique table. And each table had 100 routes. In total it amounted in 50k routes in the custom tables. To optimize CPU usage under such conditions a back-off mechanism is used before doing router reset, to reduce the frequency of a full dump of router tables and rules. The back-off mechanism is dynamic in a sense that it takes into consideration the duration of the last router reset operation so the back-off delay is also increasing with a higher volume of total routes and rules. Signed-off-by: Dima Chumak <dchumak@nvidia.com> --- lib/route-table.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+)