diff mbox series

[ovs-dev] ovsdb: Add raft memory usage to memory report.

Message ID 20200522173630.417106-1-i.maximets@ovn.org
State Accepted
Delegated to: Ilya Maximets
Headers show
Series [ovs-dev] ovsdb: Add raft memory usage to memory report. | expand

Commit Message

Ilya Maximets May 22, 2020, 5:36 p.m. UTC
Memory reports could be found in logs or by calling 'memory/show'
appctl command.  For ovsdb-server it includes information about db
cells, monitor connections with their backlog size, etc.  But it
doesn't contain any information about memory consumed by raft.
Backlogs of raft connections could be insanely large because of
snapshot installation requests that simply contains the whole database.
In not that healthy clusters where one of ovsdb servers is not able to
timely handle all the incoming raft traffic, backlog on a sender's side
could cause significant memory consumption issues.

Adding new 'raft-connections' and 'raft-backlog' counters to the
memory report to better track such conditions.

Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
---
 ovsdb/ovsdb.c   |  4 ++++
 ovsdb/raft.c    | 16 ++++++++++++++++
 ovsdb/raft.h    |  2 ++
 ovsdb/storage.c | 10 ++++++++++
 ovsdb/storage.h |  3 +++
 5 files changed, 35 insertions(+)

Comments

Han Zhou May 22, 2020, 8:36 p.m. UTC | #1
On Fri, May 22, 2020 at 10:36 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> Memory reports could be found in logs or by calling 'memory/show'
> appctl command.  For ovsdb-server it includes information about db
> cells, monitor connections with their backlog size, etc.  But it
> doesn't contain any information about memory consumed by raft.
> Backlogs of raft connections could be insanely large because of
> snapshot installation requests that simply contains the whole database.
> In not that healthy clusters where one of ovsdb servers is not able to
> timely handle all the incoming raft traffic, backlog on a sender's side
> could cause significant memory consumption issues.
>
> Adding new 'raft-connections' and 'raft-backlog' counters to the
> memory report to better track such conditions.
>
> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
> ---
>  ovsdb/ovsdb.c   |  4 ++++
>  ovsdb/raft.c    | 16 ++++++++++++++++
>  ovsdb/raft.h    |  2 ++
>  ovsdb/storage.c | 10 ++++++++++
>  ovsdb/storage.h |  3 +++
>  5 files changed, 35 insertions(+)
>
> diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c
> index 7e683e681..2da117cb3 100644
> --- a/ovsdb/ovsdb.c
> +++ b/ovsdb/ovsdb.c
> @@ -502,6 +502,10 @@ ovsdb_get_memory_usage(const struct ovsdb *db,
struct simap *usage)
>      }
>
>      simap_increase(usage, "cells", cells);
> +
> +    if (db->storage) {
> +        ovsdb_storage_get_memory_usage(db->storage, usage);
> +    }
>  }
>
>  struct ovsdb_table *
> diff --git a/ovsdb/raft.c b/ovsdb/raft.c
> index 18f29973e..515eadab3 100644
> --- a/ovsdb/raft.c
> +++ b/ovsdb/raft.c
> @@ -36,6 +36,7 @@
>  #include "ovsdb/log.h"
>  #include "raft-rpc.h"
>  #include "random.h"
> +#include "simap.h"
>  #include "socket-util.h"
>  #include "stream.h"
>  #include "timeval.h"
> @@ -1014,6 +1015,21 @@ raft_get_sid(const struct raft *raft)
>      return &raft->sid;
>  }
>
> +/* Adds memory consumption info to 'usage' for later use by
memory_report(). */
> +void
> +raft_get_memory_usage(const struct raft *raft, struct simap *usage)
> +{
> +    struct raft_conn *conn;
> +    int cnt = 0;
> +
> +    LIST_FOR_EACH (conn, list_node, &raft->conns) {
> +        simap_increase(usage, "raft-backlog",
> +                       jsonrpc_session_get_backlog(conn->js));
> +        cnt++;
> +    }
> +    simap_increase(usage, "raft-connections", cnt);
> +}
> +
>  /* Returns true if 'raft' has completed joining its cluster, has not
left or
>   * initiated leaving the cluster, does not have failed disk storage, and
is
>   * apparently connected to the leader in a healthy way (or is itself the
> diff --git a/ovsdb/raft.h b/ovsdb/raft.h
> index 3d448995a..99d5307e5 100644
> --- a/ovsdb/raft.h
> +++ b/ovsdb/raft.h
> @@ -67,6 +67,7 @@
>  struct json;
>  struct ovsdb_log;
>  struct raft;
> +struct simap;
>  struct sset;
>
>  #define RAFT_MAGIC "CLUSTER"
> @@ -113,6 +114,7 @@ const struct uuid *raft_get_cid(const struct raft *);
>  const struct uuid *raft_get_sid(const struct raft *);
>  bool raft_is_connected(const struct raft *);
>  bool raft_is_leader(const struct raft *);
> +void raft_get_memory_usage(const struct raft *, struct simap *usage);
>
>  /* Joining a cluster. */
>  bool raft_is_joining(const struct raft *);
> diff --git a/ovsdb/storage.c b/ovsdb/storage.c
> index e26252b06..7b4ad16f6 100644
> --- a/ovsdb/storage.c
> +++ b/ovsdb/storage.c
> @@ -26,6 +26,7 @@
>  #include "ovsdb.h"
>  #include "raft.h"
>  #include "random.h"
> +#include "simap.h"
>  #include "timeval.h"
>  #include "util.h"
>
> @@ -188,6 +189,15 @@ ovsdb_storage_get_applied_index(const struct
ovsdb_storage *storage)
>      return storage->raft ? raft_get_applied_index(storage->raft) : 0;
>  }
>
> +void
> +ovsdb_storage_get_memory_usage(const struct ovsdb_storage *storage,
> +                               struct simap *usage)
> +{
> +    if (storage->raft) {
> +        raft_get_memory_usage(storage->raft, usage);
> +    }
> +}
> +
>  void
>  ovsdb_storage_run(struct ovsdb_storage *storage)
>  {
> diff --git a/ovsdb/storage.h b/ovsdb/storage.h
> index 8a9bbab70..a22396891 100644
> --- a/ovsdb/storage.h
> +++ b/ovsdb/storage.h
> @@ -23,6 +23,7 @@
>  struct json;
>  struct ovsdb_schema;
>  struct ovsdb_storage;
> +struct simap;
>  struct uuid;
>
>  struct ovsdb_error *ovsdb_storage_open(const char *filename, bool rw,
> @@ -39,6 +40,8 @@ bool ovsdb_storage_is_leader(const struct ovsdb_storage
*);
>  const struct uuid *ovsdb_storage_get_cid(const struct ovsdb_storage *);
>  const struct uuid *ovsdb_storage_get_sid(const struct ovsdb_storage *);
>  uint64_t ovsdb_storage_get_applied_index(const struct ovsdb_storage *);
> +void ovsdb_storage_get_memory_usage(const struct ovsdb_storage *,
> +                                    struct simap *usage);
>
>  void ovsdb_storage_run(struct ovsdb_storage *);
>  void ovsdb_storage_wait(struct ovsdb_storage *);
> --
> 2.25.4
>

Acked-by: Han Zhou <hzhou@ovn.org>
Ilya Maximets May 26, 2020, 8:10 p.m. UTC | #2
On 5/22/20 10:36 PM, Han Zhou wrote:
> 
> 
> On Fri, May 22, 2020 at 10:36 AM Ilya Maximets <i.maximets@ovn.org <mailto:i.maximets@ovn.org>> wrote:
>>
>> Memory reports could be found in logs or by calling 'memory/show'
>> appctl command.  For ovsdb-server it includes information about db
>> cells, monitor connections with their backlog size, etc.  But it
>> doesn't contain any information about memory consumed by raft.
>> Backlogs of raft connections could be insanely large because of
>> snapshot installation requests that simply contains the whole database.
>> In not that healthy clusters where one of ovsdb servers is not able to
>> timely handle all the incoming raft traffic, backlog on a sender's side
>> could cause significant memory consumption issues.
>>
>> Adding new 'raft-connections' and 'raft-backlog' counters to the
>> memory report to better track such conditions.
>>
>> Signed-off-by: Ilya Maximets <i.maximets@ovn.org <mailto:i.maximets@ovn.org>>
>> ---
>>  ovsdb/ovsdb.c   |  4 ++++
>>  ovsdb/raft.c    | 16 ++++++++++++++++
>>  ovsdb/raft.h    |  2 ++
>>  ovsdb/storage.c | 10 ++++++++++
>>  ovsdb/storage.h |  3 +++
>>  5 files changed, 35 insertions(+)
>>
> Acked-by: Han Zhou <hzhou@ovn.org <mailto:hzhou@ovn.org>>

Applied to master. Thanks!

Best regards, Ilya Maximets.
diff mbox series

Patch

diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c
index 7e683e681..2da117cb3 100644
--- a/ovsdb/ovsdb.c
+++ b/ovsdb/ovsdb.c
@@ -502,6 +502,10 @@  ovsdb_get_memory_usage(const struct ovsdb *db, struct simap *usage)
     }
 
     simap_increase(usage, "cells", cells);
+
+    if (db->storage) {
+        ovsdb_storage_get_memory_usage(db->storage, usage);
+    }
 }
 
 struct ovsdb_table *
diff --git a/ovsdb/raft.c b/ovsdb/raft.c
index 18f29973e..515eadab3 100644
--- a/ovsdb/raft.c
+++ b/ovsdb/raft.c
@@ -36,6 +36,7 @@ 
 #include "ovsdb/log.h"
 #include "raft-rpc.h"
 #include "random.h"
+#include "simap.h"
 #include "socket-util.h"
 #include "stream.h"
 #include "timeval.h"
@@ -1014,6 +1015,21 @@  raft_get_sid(const struct raft *raft)
     return &raft->sid;
 }
 
+/* Adds memory consumption info to 'usage' for later use by memory_report(). */
+void
+raft_get_memory_usage(const struct raft *raft, struct simap *usage)
+{
+    struct raft_conn *conn;
+    int cnt = 0;
+
+    LIST_FOR_EACH (conn, list_node, &raft->conns) {
+        simap_increase(usage, "raft-backlog",
+                       jsonrpc_session_get_backlog(conn->js));
+        cnt++;
+    }
+    simap_increase(usage, "raft-connections", cnt);
+}
+
 /* Returns true if 'raft' has completed joining its cluster, has not left or
  * initiated leaving the cluster, does not have failed disk storage, and is
  * apparently connected to the leader in a healthy way (or is itself the
diff --git a/ovsdb/raft.h b/ovsdb/raft.h
index 3d448995a..99d5307e5 100644
--- a/ovsdb/raft.h
+++ b/ovsdb/raft.h
@@ -67,6 +67,7 @@ 
 struct json;
 struct ovsdb_log;
 struct raft;
+struct simap;
 struct sset;
 
 #define RAFT_MAGIC "CLUSTER"
@@ -113,6 +114,7 @@  const struct uuid *raft_get_cid(const struct raft *);
 const struct uuid *raft_get_sid(const struct raft *);
 bool raft_is_connected(const struct raft *);
 bool raft_is_leader(const struct raft *);
+void raft_get_memory_usage(const struct raft *, struct simap *usage);
 
 /* Joining a cluster. */
 bool raft_is_joining(const struct raft *);
diff --git a/ovsdb/storage.c b/ovsdb/storage.c
index e26252b06..7b4ad16f6 100644
--- a/ovsdb/storage.c
+++ b/ovsdb/storage.c
@@ -26,6 +26,7 @@ 
 #include "ovsdb.h"
 #include "raft.h"
 #include "random.h"
+#include "simap.h"
 #include "timeval.h"
 #include "util.h"
 
@@ -188,6 +189,15 @@  ovsdb_storage_get_applied_index(const struct ovsdb_storage *storage)
     return storage->raft ? raft_get_applied_index(storage->raft) : 0;
 }
 
+void
+ovsdb_storage_get_memory_usage(const struct ovsdb_storage *storage,
+                               struct simap *usage)
+{
+    if (storage->raft) {
+        raft_get_memory_usage(storage->raft, usage);
+    }
+}
+
 void
 ovsdb_storage_run(struct ovsdb_storage *storage)
 {
diff --git a/ovsdb/storage.h b/ovsdb/storage.h
index 8a9bbab70..a22396891 100644
--- a/ovsdb/storage.h
+++ b/ovsdb/storage.h
@@ -23,6 +23,7 @@ 
 struct json;
 struct ovsdb_schema;
 struct ovsdb_storage;
+struct simap;
 struct uuid;
 
 struct ovsdb_error *ovsdb_storage_open(const char *filename, bool rw,
@@ -39,6 +40,8 @@  bool ovsdb_storage_is_leader(const struct ovsdb_storage *);
 const struct uuid *ovsdb_storage_get_cid(const struct ovsdb_storage *);
 const struct uuid *ovsdb_storage_get_sid(const struct ovsdb_storage *);
 uint64_t ovsdb_storage_get_applied_index(const struct ovsdb_storage *);
+void ovsdb_storage_get_memory_usage(const struct ovsdb_storage *,
+                                    struct simap *usage);
 
 void ovsdb_storage_run(struct ovsdb_storage *);
 void ovsdb_storage_wait(struct ovsdb_storage *);