diff mbox series

[ovs-dev,v2,3/5] ovsdb raft: Enable cooperative multitasking.

Message ID 20240112230018.34044-4-frode.nordahl@canonical.com
State Changes Requested
Delegated to: Ilya Maximets
Headers show
Series Introduce cooperative multitasking to improve OVSDB RAFT cluster operation. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Frode Nordahl Jan. 12, 2024, 11 p.m. UTC
The OVSDB server is mostly synchronous and single threaded.  The
OVSDB RAFT storage engine operate under strict deadlines with
operational impact should the deadline be overrun.

Register for cooperative multitasking so that long running
processing elsewhere in the program may yield to allow stable
maintenance of the cluster.

Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
---
 ovsdb/raft.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

Comments

Ilya Maximets Jan. 16, 2024, 8:33 p.m. UTC | #1
On 1/13/24 00:00, Frode Nordahl wrote:
> The OVSDB server is mostly synchronous and single threaded.  The
> OVSDB RAFT storage engine operate under strict deadlines with
> operational impact should the deadline be overrun.
> 
> Register for cooperative multitasking so that long running
> processing elsewhere in the program may yield to allow stable
> maintenance of the cluster.
> 
> Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
> ---
>  ovsdb/raft.c | 22 +++++++++++++++++++++-
>  1 file changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/ovsdb/raft.c b/ovsdb/raft.c
> index 8effd9ad1..10c2ef106 100644
> --- a/ovsdb/raft.c
> +++ b/ovsdb/raft.c
> @@ -22,6 +22,7 @@
>  #include <errno.h>
>  #include <unistd.h>
>  
> +#include "cooperative-multitasking.h"
>  #include "hash.h"
>  #include "jsonrpc.h"
>  #include "lockfile.h"
> @@ -993,10 +994,13 @@ raft_reset_election_timer(struct raft *raft)
>      raft->election_timeout = raft->election_base + duration;
>  }
>  
> +#define RAFT_TIMER_THRESHOLD(t) t / 3

Nit: Better to parenthesize the body of the macro, just in case.

> +
>  static void
>  raft_reset_ping_timer(struct raft *raft)
>  {
> -    raft->ping_timeout = time_msec() + raft->election_timer / 3;
> +    raft->ping_timeout =
> +        time_msec() + RAFT_TIMER_THRESHOLD(raft->election_timer);
>  }
>  
>  static void
> @@ -1371,6 +1375,8 @@ raft_take_leadership(struct raft *raft)
>      }
>  }
>  
> +static void raft_run_cb(void *arg);
> +
>  /* Closes everything owned by 'raft' that might be visible outside the process:
>   * network connections, commands, etc.  This is part of closing 'raft'; it is
>   * also used if 'raft' has failed in an unrecoverable way. */
> @@ -1397,6 +1403,8 @@ raft_close__(struct raft *raft)
>      LIST_FOR_EACH_SAFE (conn, list_node, &raft->conns) {
>          raft_conn_close(conn);
>      }
> +
> +    cooperative_multitasking_remove(&raft_run_cb, raft);
>  }
>  
>  /* Closes and frees 'raft'.
> @@ -2114,6 +2122,10 @@ raft_run(struct raft *raft)
>          raft_reset_ping_timer(raft);
>      }
>  
> +    cooperative_multitasking_set(
> +        &raft_run_cb, (void *) raft, time_msec(),
> +        RAFT_TIMER_THRESHOLD(raft->election_timer), "raft_run");

I was running tests with ovn-heater and every time we have an idle
state, ovsdb-server yields, because it waits for ping_timer exactly,
wakes up and it's already time to call raft_run, so it always yields
in jsonrpc server run.
If we shift the time here just a little bit we can avoid almost all
the yields in the idle state, since the server will have a little
time to get to the raft_run in a normal way.

I tested with ovn-heater 500-density-heavy run and I see almost no
yields in idle state if I increase the threshold 10% over the ping
timer, i.e.:

     cooperative_multitasking_set(
         &raft_run_cb, (void *) raft, time_msec(),
-        RAFT_TIMER_THRESHOLD(raft->election_timer), "raft_run");
+        RAFT_TIMER_THRESHOLD(raft->election_timer)
+        + RAFT_TIMER_THRESHOLD(raft->election_timer) / 10, "raft_run");

It's not super important, but saves a lot of yield logging, if debug
logs are enabled.   Seems easier to debug if those are filtered out.

What do you think?

> +
>      /* Do this only at the end; if we did it as soon as we set raft->left or
>       * raft->failed in handling the RemoveServerReply, then it could easily
>       * cause references to freed memory in RPC sessions, etc. */
> @@ -2122,6 +2134,14 @@ raft_run(struct raft *raft)
>      }
>  }
>  
> +static void
> +raft_run_cb(void *arg)
> +{
> +    struct raft *raft = (struct raft *) arg;
> +
> +    raft_run(raft);
> +}
> +
>  static void
>  raft_wait_session(struct jsonrpc_session *js)
>  {
Frode Nordahl Jan. 16, 2024, 9:55 p.m. UTC | #2
On Tue, Jan 16, 2024 at 9:33 PM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 1/13/24 00:00, Frode Nordahl wrote:
> > The OVSDB server is mostly synchronous and single threaded.  The
> > OVSDB RAFT storage engine operate under strict deadlines with
> > operational impact should the deadline be overrun.
> >
> > Register for cooperative multitasking so that long running
> > processing elsewhere in the program may yield to allow stable
> > maintenance of the cluster.
> >
> > Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
> > ---
> >  ovsdb/raft.c | 22 +++++++++++++++++++++-
> >  1 file changed, 21 insertions(+), 1 deletion(-)
> >
> > diff --git a/ovsdb/raft.c b/ovsdb/raft.c
> > index 8effd9ad1..10c2ef106 100644
> > --- a/ovsdb/raft.c
> > +++ b/ovsdb/raft.c
> > @@ -22,6 +22,7 @@
> >  #include <errno.h>
> >  #include <unistd.h>
> >
> > +#include "cooperative-multitasking.h"
> >  #include "hash.h"
> >  #include "jsonrpc.h"
> >  #include "lockfile.h"
> > @@ -993,10 +994,13 @@ raft_reset_election_timer(struct raft *raft)
> >      raft->election_timeout = raft->election_base + duration;
> >  }
> >
> > +#define RAFT_TIMER_THRESHOLD(t) t / 3
>
> Nit: Better to parenthesize the body of the macro, just in case.

Ack.

> > +
> >  static void
> >  raft_reset_ping_timer(struct raft *raft)
> >  {
> > -    raft->ping_timeout = time_msec() + raft->election_timer / 3;
> > +    raft->ping_timeout =
> > +        time_msec() + RAFT_TIMER_THRESHOLD(raft->election_timer);
> >  }
> >
> >  static void
> > @@ -1371,6 +1375,8 @@ raft_take_leadership(struct raft *raft)
> >      }
> >  }
> >
> > +static void raft_run_cb(void *arg);
> > +
> >  /* Closes everything owned by 'raft' that might be visible outside the process:
> >   * network connections, commands, etc.  This is part of closing 'raft'; it is
> >   * also used if 'raft' has failed in an unrecoverable way. */
> > @@ -1397,6 +1403,8 @@ raft_close__(struct raft *raft)
> >      LIST_FOR_EACH_SAFE (conn, list_node, &raft->conns) {
> >          raft_conn_close(conn);
> >      }
> > +
> > +    cooperative_multitasking_remove(&raft_run_cb, raft);
> >  }
> >
> >  /* Closes and frees 'raft'.
> > @@ -2114,6 +2122,10 @@ raft_run(struct raft *raft)
> >          raft_reset_ping_timer(raft);
> >      }
> >
> > +    cooperative_multitasking_set(
> > +        &raft_run_cb, (void *) raft, time_msec(),
> > +        RAFT_TIMER_THRESHOLD(raft->election_timer), "raft_run");
>
> I was running tests with ovn-heater and every time we have an idle
> state, ovsdb-server yields, because it waits for ping_timer exactly,
> wakes up and it's already time to call raft_run, so it always yields
> in jsonrpc server run.
> If we shift the time here just a little bit we can avoid almost all
> the yields in the idle state, since the server will have a little
> time to get to the raft_run in a normal way.
>
> I tested with ovn-heater 500-density-heavy run and I see almost no
> yields in idle state if I increase the threshold 10% over the ping
> timer, i.e.:
>
>      cooperative_multitasking_set(
>          &raft_run_cb, (void *) raft, time_msec(),
> -        RAFT_TIMER_THRESHOLD(raft->election_timer), "raft_run");
> +        RAFT_TIMER_THRESHOLD(raft->election_timer)
> +        + RAFT_TIMER_THRESHOLD(raft->election_timer) / 10, "raft_run");
>
> It's not super important, but saves a lot of yield logging, if debug
> logs are enabled.   Seems easier to debug if those are filtered out.
>
> What do you think?

The yield firing in idle state was bothering me as well and this looks
like a much simpler fix than changing the actual timers. Thanks!
diff mbox series

Patch

diff --git a/ovsdb/raft.c b/ovsdb/raft.c
index 8effd9ad1..10c2ef106 100644
--- a/ovsdb/raft.c
+++ b/ovsdb/raft.c
@@ -22,6 +22,7 @@ 
 #include <errno.h>
 #include <unistd.h>
 
+#include "cooperative-multitasking.h"
 #include "hash.h"
 #include "jsonrpc.h"
 #include "lockfile.h"
@@ -993,10 +994,13 @@  raft_reset_election_timer(struct raft *raft)
     raft->election_timeout = raft->election_base + duration;
 }
 
+#define RAFT_TIMER_THRESHOLD(t) t / 3
+
 static void
 raft_reset_ping_timer(struct raft *raft)
 {
-    raft->ping_timeout = time_msec() + raft->election_timer / 3;
+    raft->ping_timeout =
+        time_msec() + RAFT_TIMER_THRESHOLD(raft->election_timer);
 }
 
 static void
@@ -1371,6 +1375,8 @@  raft_take_leadership(struct raft *raft)
     }
 }
 
+static void raft_run_cb(void *arg);
+
 /* Closes everything owned by 'raft' that might be visible outside the process:
  * network connections, commands, etc.  This is part of closing 'raft'; it is
  * also used if 'raft' has failed in an unrecoverable way. */
@@ -1397,6 +1403,8 @@  raft_close__(struct raft *raft)
     LIST_FOR_EACH_SAFE (conn, list_node, &raft->conns) {
         raft_conn_close(conn);
     }
+
+    cooperative_multitasking_remove(&raft_run_cb, raft);
 }
 
 /* Closes and frees 'raft'.
@@ -2114,6 +2122,10 @@  raft_run(struct raft *raft)
         raft_reset_ping_timer(raft);
     }
 
+    cooperative_multitasking_set(
+        &raft_run_cb, (void *) raft, time_msec(),
+        RAFT_TIMER_THRESHOLD(raft->election_timer), "raft_run");
+
     /* Do this only at the end; if we did it as soon as we set raft->left or
      * raft->failed in handling the RemoveServerReply, then it could easily
      * cause references to freed memory in RPC sessions, etc. */
@@ -2122,6 +2134,14 @@  raft_run(struct raft *raft)
     }
 }
 
+static void
+raft_run_cb(void *arg)
+{
+    struct raft *raft = (struct raft *) arg;
+
+    raft_run(raft);
+}
+
 static void
 raft_wait_session(struct jsonrpc_session *js)
 {