diff mbox series

[ovs-dev,v2,6/6] pinctrl.c: Add delay after ARP packet

Message ID 20220617090759.513730-7-amusil@redhat.com
State Changes Requested
Headers show
Series MAC binding aging mechanism | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed
ovsrobot/github-robot-_ovn-kubernetes fail github build: failed

Commit Message

Ales Musil June 17, 2022, 9:07 a.m. UTC
The ovn-controller had a race condition over MAC
binding table with other controllers. When multiple
controllers received GARP from single source usually
the one who was able to win the race put it into SB.
The others got transaction error which triggered
full recompute even if it's not needed.

In order to reduce the chance of multiple controllers
trying to insert same row at the same time add slight
delay to the MAC binding processing. This delay
is random interval between 0-49 in ms. This greatly
reduces the chance that multiple controllers will try
to add MAC binding at exactly the same time.

Local testing with this delay vs without show significantly
reduced chance of hitting the transaction error.

During the testing 10 GARPs was sent to two controllers
at the same time. Without proposed fix at least one controller
had multiple transaction errors present every test iteration.

With the proposed fix the transaction error was reduced to a
single one when it happened which was usually in less than
10% of the iterations.

As was mentioned before the race condition can still happen,
but the chance is greatly reduced.

Suggested-by: Daniel Alvarez Sanchez <dalvarez@redhat.com>
Reported-at: https://bugzilla.redhat.com/2084668
Signed-off-by: Ales Musil <amusil@redhat.com>
---
 controller/pinctrl.c | 38 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index b95336ec0..d075d900b 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -4081,6 +4081,31 @@  pinctrl_destroy(void)
 /* Contains "struct mac_binding"s. */
 static struct hmap put_mac_bindings;
 
+#define MAX_PUT_MAC_BINDINGS_DELAY 50
+
+struct put_mac_bindings_delay {
+    long long last_insert;
+    uint32_t delay;
+};
+
+static struct put_mac_bindings_delay put_mac_bindings_delay;
+
+static bool
+put_mac_bindings_delay_has_expired(void)
+    OVS_REQUIRES(pinctrl_mutex)
+{
+    return time_msec() - put_mac_bindings_delay.last_insert >=
+           put_mac_bindings_delay.delay;
+}
+
+static void
+put_mac_binding_delay_renew(void)
+    OVS_REQUIRES(pinctrl_mutex)
+{
+    put_mac_bindings_delay.delay = random_range(MAX_PUT_MAC_BINDINGS_DELAY);
+    put_mac_bindings_delay.last_insert = time_msec();
+}
+
 static void
 init_put_mac_bindings(void)
 {
@@ -4119,6 +4144,11 @@  pinctrl_handle_put_mac_binding(const struct flow *md,
         return;
     }
 
+    /* Add slight delay to the packet processing if we are past the old one. */
+    if (put_mac_bindings_delay_has_expired()) {
+        put_mac_binding_delay_renew();
+    }
+
     /* We can send the buffered packet once the main ovn-controller
      * thread calls pinctrl_run() and it writes the mac_bindings stored
      * in 'put_mac_bindings' hmap into the Southbound MAC_Binding table. */
@@ -4280,6 +4310,11 @@  run_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn,
         return;
     }
 
+    /* We didn't go over the random delay yet. */
+    if (!put_mac_bindings_delay_has_expired()) {
+        return;
+    }
+
     const struct mac_binding *mb;
     HMAP_FOR_EACH (mb, hmap_node, &put_mac_bindings) {
         run_put_mac_binding(ovnsb_idl_txn, sbrec_datapath_binding_by_key,
@@ -4336,9 +4371,10 @@  run_buffered_binding(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
 
 static void
 wait_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn)
+    OVS_REQUIRES(pinctrl_mutex)
 {
     if (ovnsb_idl_txn && !hmap_is_empty(&put_mac_bindings)) {
-        poll_immediate_wake();
+        poll_timer_wait_until(put_mac_bindings_delay.delay);
     }
 }