diff mbox series

[ovs-dev,5/6] ovsdb-server: Do storage run at top of main loop.

Message ID 20240110192939.15220-6-frode.nordahl@canonical.com
State Changes Requested
Delegated to: Ilya Maximets
Headers show
Series Introduce cooperative multitasking to improve OVSDB RAFT cluster operation. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Frode Nordahl Jan. 10, 2024, 7:29 p.m. UTC
During testing of the implementation of cooperative multitasking
in the ovsdb-server we noticed that the very first yield being
called in the jsonrpc-server always fired.

This indicates that the operations being after/before storage run
is taking too long as it currently is.

Moving the storage run section to the top of the main loop lead
to successful results using less yield calls as documented in the
commit message of the next commit in this series.

Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
---
 ovsdb/ovsdb-server.c | 56 +++++++++++++++++++++++---------------------
 1 file changed, 29 insertions(+), 27 deletions(-)

Comments

Ilya Maximets Jan. 12, 2024, 12:27 a.m. UTC | #1
On 1/10/24 20:29, Frode Nordahl wrote:
> During testing of the implementation of cooperative multitasking
> in the ovsdb-server we noticed that the very first yield being
> called in the jsonrpc-server always fired.
> 
> This indicates that the operations being after/before storage run
> is taking too long as it currently is.
> 
> Moving the storage run section to the top of the main loop lead
> to successful results using less yield calls as documented in the
> commit message of the next commit in this series.

I don't really understand what exactly is taking so much time and
why moving this part of the code helps.  Could you elaborate?

Best regards, Ilya Maximets.

> 
> Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
> ---
>  ovsdb/ovsdb-server.c | 56 +++++++++++++++++++++++---------------------
>  1 file changed, 29 insertions(+), 27 deletions(-)
> 
> diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c
> index 4d29043f4..e1d4b1125 100644
> --- a/ovsdb/ovsdb-server.c
> +++ b/ovsdb/ovsdb-server.c
> @@ -195,6 +195,34 @@ main_loop(struct server_config *config,
>      ssl_error = NULL;
>      remotes_error = NULL;
>      while (!*exiting) {
> +        SHASH_FOR_EACH_SAFE (node, all_dbs) {
> +            struct db *db = node->data;
> +
> +            ovsdb_storage_run(db->db->storage);
> +            read_db(config, db);
> +            /* Run triggers after storage_run and read_db to make sure new raft
> +             * updates are utilized in current iteration. */
> +            if (ovsdb_trigger_run(db->db, time_msec())) {
> +                /* The message below is currently the only reason to disconnect
> +                 * all clients. */
> +                ovsdb_jsonrpc_server_reconnect(
> +                    jsonrpc, false,
> +                    xasprintf("committed %s database schema conversion",
> +                              db->db->name));
> +            }
> +            if (ovsdb_storage_is_dead(db->db->storage)) {
> +                VLOG_INFO("%s: removing database because storage disconnected "
> +                          "permanently", node->name);
> +                remove_db(config, node,
> +                          xasprintf("removing database %s because storage "
> +                                    "disconnected permanently", node->name));
> +            } else if (!ovsdb_snapshot_in_progress(db->db)
> +                       && (ovsdb_storage_should_snapshot(db->db->storage) ||
> +                           ovsdb_snapshot_ready(db->db))) {
> +                log_and_free_error(ovsdb_snapshot(db->db, trim_memory));
> +            }
> +        }
> +
>          memory_run();
>          if (memory_should_report()) {
>              struct simap usage;
> @@ -221,6 +249,7 @@ main_loop(struct server_config *config,
>              reconfigure_remotes(jsonrpc, all_dbs, remotes),
>              &remotes_error);
>          report_error_if_changed(reconfigure_ssl(all_dbs), &ssl_error);
> +
>          ovsdb_jsonrpc_server_run(jsonrpc);
>  
>          if (*is_backup) {
> @@ -233,33 +262,6 @@ main_loop(struct server_config *config,
>  
>          ovsdb_relay_run();
>  
> -        SHASH_FOR_EACH_SAFE (node, all_dbs) {
> -            struct db *db = node->data;
> -
> -            ovsdb_storage_run(db->db->storage);
> -            read_db(config, db);
> -            /* Run triggers after storage_run and read_db to make sure new raft
> -             * updates are utilized in current iteration. */
> -            if (ovsdb_trigger_run(db->db, time_msec())) {
> -                /* The message below is currently the only reason to disconnect
> -                 * all clients. */
> -                ovsdb_jsonrpc_server_reconnect(
> -                    jsonrpc, false,
> -                    xasprintf("committed %s database schema conversion",
> -                              db->db->name));
> -            }
> -            if (ovsdb_storage_is_dead(db->db->storage)) {
> -                VLOG_INFO("%s: removing database because storage disconnected "
> -                          "permanently", node->name);
> -                remove_db(config, node,
> -                          xasprintf("removing database %s because storage "
> -                                    "disconnected permanently", node->name));
> -            } else if (!ovsdb_snapshot_in_progress(db->db)
> -                       && (ovsdb_storage_should_snapshot(db->db->storage) ||
> -                           ovsdb_snapshot_ready(db->db))) {
> -                log_and_free_error(ovsdb_snapshot(db->db, trim_memory));
> -            }
> -        }
>          if (run_process) {
>              process_run();
>              if (process_exited(run_process)) {
Ilya Maximets Jan. 12, 2024, 4:20 p.m. UTC | #2
On 1/12/24 01:27, Ilya Maximets wrote:
> On 1/10/24 20:29, Frode Nordahl wrote:
>> During testing of the implementation of cooperative multitasking
>> in the ovsdb-server we noticed that the very first yield being
>> called in the jsonrpc-server always fired.
>>
>> This indicates that the operations being after/before storage run
>> is taking too long as it currently is.
>>
>> Moving the storage run section to the top of the main loop lead
>> to successful results using less yield calls as documented in the
>> commit message of the next commit in this series.
> 
> I don't really understand what exactly is taking so much time and
> why moving this part of the code helps.  Could you elaborate?

I think, I got it.  In the current version of a patch set we have:

main():
0.    open_db()
          raft_alloc()  <-- register callback
          read_db()     <-- takes a lot of time
      main_loop()
1.        run jsonrpc   <-- yields and warns because read_db
                            already took a lot of time.
              yield()
                  raft_run()  <-- updates the callback
2.        storage_run() 
              raft_run()      <-- updates the callback

The solution proposed in this patch is to swap 1 and 2,
so we call raft_run() directly before we yield for the
first time.

I suppose, my seggestion to not have _register() function would
solve that problem, because the callback will not be registered
until the first call to raft_run().

Is that correct?

> 
> Best regards, Ilya Maximets.
Frode Nordahl Jan. 12, 2024, 10:10 p.m. UTC | #3
On Fri, Jan 12, 2024 at 5:20 PM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 1/12/24 01:27, Ilya Maximets wrote:
> > On 1/10/24 20:29, Frode Nordahl wrote:
> >> During testing of the implementation of cooperative multitasking
> >> in the ovsdb-server we noticed that the very first yield being
> >> called in the jsonrpc-server always fired.
> >>
> >> This indicates that the operations being after/before storage run
> >> is taking too long as it currently is.
> >>
> >> Moving the storage run section to the top of the main loop lead
> >> to successful results using less yield calls as documented in the
> >> commit message of the next commit in this series.
> >
> > I don't really understand what exactly is taking so much time and
> > why moving this part of the code helps.  Could you elaborate?
>
> I think, I got it.  In the current version of a patch set we have:
>
> main():
> 0.    open_db()
>           raft_alloc()  <-- register callback
>           read_db()     <-- takes a lot of time
>       main_loop()
> 1.        run jsonrpc   <-- yields and warns because read_db
>                             already took a lot of time.
>               yield()
>                   raft_run()  <-- updates the callback
> 2.        storage_run()
>               raft_run()      <-- updates the callback
>
> The solution proposed in this patch is to swap 1 and 2,
> so we call raft_run() directly before we yield for the
> first time.
>
> I suppose, my seggestion to not have _register() function would
> solve that problem, because the callback will not be registered
> until the first call to raft_run().
>
> Is that correct?

Unfortunately, also when only having the _set() function the first
yield always fires.

Putting log lines before/after every major function call in the
main_loop provides something like this:
2024-01-12T22:01:46.968Z|00204|ovsdb_server|DBG|HELLO 0
2024-01-12T22:01:46.968Z|00205|ovsdb_server|DBG|HELLO 1
2024-01-12T22:01:46.968Z|00206|ovsdb_server|DBG|HELLO 2
2024-01-12T22:01:46.968Z|00207|ovsdb_server|DBG|HELLO 3
2024-01-12T22:01:46.968Z|00208|ovsdb_server|DBG|HELLO 4
2024-01-12T22:01:46.968Z|00209|ovsdb_server|DBG|HELLO 5
2024-01-12T22:01:46.968Z|00210|ovsdb_server|DBG|HELLO 6
2024-01-12T22:01:47.134Z|00211|ovsdb_server|DBG|HELLO 0
2024-01-12T22:01:47.134Z|00212|ovsdb_server|DBG|HELLO 1
2024-01-12T22:01:47.134Z|00213|ovsdb_server|DBG|HELLO 2
2024-01-12T22:01:47.134Z|00214|ovsdb_server|DBG|HELLO 3
2024-01-12T22:01:47.134Z|00215|ovsdb_server|DBG|HELLO 4
2024-01-12T22:01:47.134Z|00216|ovsdb_server|DBG|HELLO 5
2024-01-12T22:01:47.135Z|00217|cooperative_multitasking|DBG|ovsdb/jsonrpc-server.c:604:
yield for raft_run(0x560b47e225e0): elapsed(167) >= threshold(166),
overrun: 1

So I actually wonder if the wait timers set up in raft_wait() are too
optimistic? Then again, those timers can't ever take into account the
number of clients serviced by for example the call to
ovsdb_jsonrpc_server_run().

I know that reordering the whole thing is a bit drastic, and I kind of
regret proposing that.

Would an alternative be to slip in a yield at the top of the main_loop
function or a SHASH_FOR_EACH_SAFE (node, all_dbs) that just does
ovsdb_storage_run()?
Ilya Maximets Jan. 12, 2024, 10:47 p.m. UTC | #4
On 1/12/24 23:10, Frode Nordahl wrote:
> On Fri, Jan 12, 2024 at 5:20 PM Ilya Maximets <i.maximets@ovn.org> wrote:
>>
>> On 1/12/24 01:27, Ilya Maximets wrote:
>>> On 1/10/24 20:29, Frode Nordahl wrote:
>>>> During testing of the implementation of cooperative multitasking
>>>> in the ovsdb-server we noticed that the very first yield being
>>>> called in the jsonrpc-server always fired.
>>>>
>>>> This indicates that the operations being after/before storage run
>>>> is taking too long as it currently is.
>>>>
>>>> Moving the storage run section to the top of the main loop lead
>>>> to successful results using less yield calls as documented in the
>>>> commit message of the next commit in this series.
>>>
>>> I don't really understand what exactly is taking so much time and
>>> why moving this part of the code helps.  Could you elaborate?
>>
>> I think, I got it.  In the current version of a patch set we have:
>>
>> main():
>> 0.    open_db()
>>           raft_alloc()  <-- register callback
>>           read_db()     <-- takes a lot of time
>>       main_loop()
>> 1.        run jsonrpc   <-- yields and warns because read_db
>>                             already took a lot of time.
>>               yield()
>>                   raft_run()  <-- updates the callback
>> 2.        storage_run()
>>               raft_run()      <-- updates the callback
>>
>> The solution proposed in this patch is to swap 1 and 2,
>> so we call raft_run() directly before we yield for the
>> first time.
>>
>> I suppose, my seggestion to not have _register() function would
>> solve that problem, because the callback will not be registered
>> until the first call to raft_run().
>>
>> Is that correct?
> 
> Unfortunately, also when only having the _set() function the first
> yield always fires.
> 
> Putting log lines before/after every major function call in the
> main_loop provides something like this:
> 2024-01-12T22:01:46.968Z|00204|ovsdb_server|DBG|HELLO 0
> 2024-01-12T22:01:46.968Z|00205|ovsdb_server|DBG|HELLO 1
> 2024-01-12T22:01:46.968Z|00206|ovsdb_server|DBG|HELLO 2
> 2024-01-12T22:01:46.968Z|00207|ovsdb_server|DBG|HELLO 3
> 2024-01-12T22:01:46.968Z|00208|ovsdb_server|DBG|HELLO 4
> 2024-01-12T22:01:46.968Z|00209|ovsdb_server|DBG|HELLO 5
> 2024-01-12T22:01:46.968Z|00210|ovsdb_server|DBG|HELLO 6
> 2024-01-12T22:01:47.134Z|00211|ovsdb_server|DBG|HELLO 0
> 2024-01-12T22:01:47.134Z|00212|ovsdb_server|DBG|HELLO 1
> 2024-01-12T22:01:47.134Z|00213|ovsdb_server|DBG|HELLO 2
> 2024-01-12T22:01:47.134Z|00214|ovsdb_server|DBG|HELLO 3
> 2024-01-12T22:01:47.134Z|00215|ovsdb_server|DBG|HELLO 4
> 2024-01-12T22:01:47.134Z|00216|ovsdb_server|DBG|HELLO 5
> 2024-01-12T22:01:47.135Z|00217|cooperative_multitasking|DBG|ovsdb/jsonrpc-server.c:604:
> yield for raft_run(0x560b47e225e0): elapsed(167) >= threshold(166),
> overrun: 1

But this is a debug log, i.e. it doesn't actually fire.
The overrun here is just 1ms, which should be fine.
Or am I missing something?

> 
> So I actually wonder if the wait timers set up in raft_wait() are too
> optimistic? Then again, those timers can't ever take into account the
> number of clients serviced by for example the call to
> ovsdb_jsonrpc_server_run().

You seem to use 500ms as a RAFT election timer here, that sounds like
a small number to use.  I'd not recommend anything smaller than a
default 1 second.  300ms ping cadence should be fine.  You can't predict
the total workload due to unknown number of clients, but hundreds of
milliseconds is a lot of time for a modern CPU, so should generally be
fine.

If I understand the log above correctly, It shows 2 iterations of the
loop, right?  And between them ovsdb-server sleeps for 168-ish ms
in the poll_block.  Likely because it is a timeout for a ping timer.

I we want the yield to not fire when poll_block wakes up on exact time
the ping timer should fire (it always will wake up at that time), we
may increase the threshold.  E.g. add something like 10-100 ms to it.
i.e. mkae yilding slightly less frequent than scheduled wakeups for
raft_wait.

Does that make sense?

> 
> I know that reordering the whole thing is a bit drastic, and I kind of
> regret proposing that.
> 
> Would an alternative be to slip in a yield at the top of the main_loop
> function or a SHASH_FOR_EACH_SAFE (node, all_dbs) that just does
> ovsdb_storage_run()?
>
Ilya Maximets Jan. 12, 2024, 10:51 p.m. UTC | #5
On 1/12/24 23:47, Ilya Maximets wrote:
> On 1/12/24 23:10, Frode Nordahl wrote:
>> On Fri, Jan 12, 2024 at 5:20 PM Ilya Maximets <i.maximets@ovn.org> wrote:
>>>
>>> On 1/12/24 01:27, Ilya Maximets wrote:
>>>> On 1/10/24 20:29, Frode Nordahl wrote:
>>>>> During testing of the implementation of cooperative multitasking
>>>>> in the ovsdb-server we noticed that the very first yield being
>>>>> called in the jsonrpc-server always fired.
>>>>>
>>>>> This indicates that the operations being after/before storage run
>>>>> is taking too long as it currently is.
>>>>>
>>>>> Moving the storage run section to the top of the main loop lead
>>>>> to successful results using less yield calls as documented in the
>>>>> commit message of the next commit in this series.
>>>>
>>>> I don't really understand what exactly is taking so much time and
>>>> why moving this part of the code helps.  Could you elaborate?
>>>
>>> I think, I got it.  In the current version of a patch set we have:
>>>
>>> main():
>>> 0.    open_db()
>>>           raft_alloc()  <-- register callback
>>>           read_db()     <-- takes a lot of time
>>>       main_loop()
>>> 1.        run jsonrpc   <-- yields and warns because read_db
>>>                             already took a lot of time.
>>>               yield()
>>>                   raft_run()  <-- updates the callback
>>> 2.        storage_run()
>>>               raft_run()      <-- updates the callback
>>>
>>> The solution proposed in this patch is to swap 1 and 2,
>>> so we call raft_run() directly before we yield for the
>>> first time.
>>>
>>> I suppose, my seggestion to not have _register() function would
>>> solve that problem, because the callback will not be registered
>>> until the first call to raft_run().
>>>
>>> Is that correct?
>>
>> Unfortunately, also when only having the _set() function the first
>> yield always fires.
>>
>> Putting log lines before/after every major function call in the
>> main_loop provides something like this:
>> 2024-01-12T22:01:46.968Z|00204|ovsdb_server|DBG|HELLO 0
>> 2024-01-12T22:01:46.968Z|00205|ovsdb_server|DBG|HELLO 1
>> 2024-01-12T22:01:46.968Z|00206|ovsdb_server|DBG|HELLO 2
>> 2024-01-12T22:01:46.968Z|00207|ovsdb_server|DBG|HELLO 3
>> 2024-01-12T22:01:46.968Z|00208|ovsdb_server|DBG|HELLO 4
>> 2024-01-12T22:01:46.968Z|00209|ovsdb_server|DBG|HELLO 5
>> 2024-01-12T22:01:46.968Z|00210|ovsdb_server|DBG|HELLO 6
>> 2024-01-12T22:01:47.134Z|00211|ovsdb_server|DBG|HELLO 0
>> 2024-01-12T22:01:47.134Z|00212|ovsdb_server|DBG|HELLO 1
>> 2024-01-12T22:01:47.134Z|00213|ovsdb_server|DBG|HELLO 2
>> 2024-01-12T22:01:47.134Z|00214|ovsdb_server|DBG|HELLO 3
>> 2024-01-12T22:01:47.134Z|00215|ovsdb_server|DBG|HELLO 4
>> 2024-01-12T22:01:47.134Z|00216|ovsdb_server|DBG|HELLO 5
>> 2024-01-12T22:01:47.135Z|00217|cooperative_multitasking|DBG|ovsdb/jsonrpc-server.c:604:
>> yield for raft_run(0x560b47e225e0): elapsed(167) >= threshold(166),
>> overrun: 1
> 
> But this is a debug log, i.e. it doesn't actually fire.
> The overrun here is just 1ms, which should be fine.
> Or am I missing something?

Sorry, it is firing, it's just not a warning at this point.
But the logic below about poll_timer_wait_until(raft->ping_timeout);
waking the thread up at exactly ping timeout and causing the next
possible yield to fire still stands.

> 
>>
>> So I actually wonder if the wait timers set up in raft_wait() are too
>> optimistic? Then again, those timers can't ever take into account the
>> number of clients serviced by for example the call to
>> ovsdb_jsonrpc_server_run().
> 
> You seem to use 500ms as a RAFT election timer here, that sounds like
> a small number to use.  I'd not recommend anything smaller than a
> default 1 second.  300ms ping cadence should be fine.  You can't predict
> the total workload due to unknown number of clients, but hundreds of
> milliseconds is a lot of time for a modern CPU, so should generally be
> fine.
> 
> If I understand the log above correctly, It shows 2 iterations of the
> loop, right?  And between them ovsdb-server sleeps for 168-ish ms
> in the poll_block.  Likely because it is a timeout for a ping timer.
> 
> I we want the yield to not fire when poll_block wakes up on exact time
> the ping timer should fire (it always will wake up at that time), we
> may increase the threshold.  E.g. add something like 10-100 ms to it.
> i.e. mkae yilding slightly less frequent than scheduled wakeups for
> raft_wait.
> 
> Does that make sense?
> 
>>
>> I know that reordering the whole thing is a bit drastic, and I kind of
>> regret proposing that.
>>
>> Would an alternative be to slip in a yield at the top of the main_loop
>> function or a SHASH_FOR_EACH_SAFE (node, all_dbs) that just does
>> ovsdb_storage_run()?
>>
>
diff mbox series

Patch

diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c
index 4d29043f4..e1d4b1125 100644
--- a/ovsdb/ovsdb-server.c
+++ b/ovsdb/ovsdb-server.c
@@ -195,6 +195,34 @@  main_loop(struct server_config *config,
     ssl_error = NULL;
     remotes_error = NULL;
     while (!*exiting) {
+        SHASH_FOR_EACH_SAFE (node, all_dbs) {
+            struct db *db = node->data;
+
+            ovsdb_storage_run(db->db->storage);
+            read_db(config, db);
+            /* Run triggers after storage_run and read_db to make sure new raft
+             * updates are utilized in current iteration. */
+            if (ovsdb_trigger_run(db->db, time_msec())) {
+                /* The message below is currently the only reason to disconnect
+                 * all clients. */
+                ovsdb_jsonrpc_server_reconnect(
+                    jsonrpc, false,
+                    xasprintf("committed %s database schema conversion",
+                              db->db->name));
+            }
+            if (ovsdb_storage_is_dead(db->db->storage)) {
+                VLOG_INFO("%s: removing database because storage disconnected "
+                          "permanently", node->name);
+                remove_db(config, node,
+                          xasprintf("removing database %s because storage "
+                                    "disconnected permanently", node->name));
+            } else if (!ovsdb_snapshot_in_progress(db->db)
+                       && (ovsdb_storage_should_snapshot(db->db->storage) ||
+                           ovsdb_snapshot_ready(db->db))) {
+                log_and_free_error(ovsdb_snapshot(db->db, trim_memory));
+            }
+        }
+
         memory_run();
         if (memory_should_report()) {
             struct simap usage;
@@ -221,6 +249,7 @@  main_loop(struct server_config *config,
             reconfigure_remotes(jsonrpc, all_dbs, remotes),
             &remotes_error);
         report_error_if_changed(reconfigure_ssl(all_dbs), &ssl_error);
+
         ovsdb_jsonrpc_server_run(jsonrpc);
 
         if (*is_backup) {
@@ -233,33 +262,6 @@  main_loop(struct server_config *config,
 
         ovsdb_relay_run();
 
-        SHASH_FOR_EACH_SAFE (node, all_dbs) {
-            struct db *db = node->data;
-
-            ovsdb_storage_run(db->db->storage);
-            read_db(config, db);
-            /* Run triggers after storage_run and read_db to make sure new raft
-             * updates are utilized in current iteration. */
-            if (ovsdb_trigger_run(db->db, time_msec())) {
-                /* The message below is currently the only reason to disconnect
-                 * all clients. */
-                ovsdb_jsonrpc_server_reconnect(
-                    jsonrpc, false,
-                    xasprintf("committed %s database schema conversion",
-                              db->db->name));
-            }
-            if (ovsdb_storage_is_dead(db->db->storage)) {
-                VLOG_INFO("%s: removing database because storage disconnected "
-                          "permanently", node->name);
-                remove_db(config, node,
-                          xasprintf("removing database %s because storage "
-                                    "disconnected permanently", node->name));
-            } else if (!ovsdb_snapshot_in_progress(db->db)
-                       && (ovsdb_storage_should_snapshot(db->db->storage) ||
-                           ovsdb_snapshot_ready(db->db))) {
-                log_and_free_error(ovsdb_snapshot(db->db, trim_memory));
-            }
-        }
         if (run_process) {
             process_run();
             if (process_exited(run_process)) {