diff mbox series

[ovs-dev,v2] utilities: increase OVSDB inactivity probe interval for ovn-*ctl

Message ID 20230503005531.4005891-1-odivlad@gmail.com
State Changes Requested
Headers show
Series [ovs-dev,v2] utilities: increase OVSDB inactivity probe interval for ovn-*ctl | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_ovn-kubernetes success github build: passed
ovsrobot/github-robot-_Build_and_Test fail github build: failed

Commit Message

Vladislav Odintsov May 3, 2023, 12:55 a.m. UTC
For large OVN_Southbound (or other) databases the default interval of 5000 ms
could be not sufficient to run.  This patch adds configuration of OVSDB
inactivity probes for ovn-*ctl utilities.

Initially, on the utility start the hardcoded value of 120000 ms is set.
For daemon-mode it is possible to configure intervals in OVN Northbound
database:
OVN_Northbound.NB_Global.options.dbctl_inactivity_probe for ovn-nbctl and
ovn-sbctl utilities.

If no DB configuration option was provided, the initial (120000 ms) interval
is left.

Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
---
 lib/ovn-util.h           |  2 ++
 ovn-nb.xml               | 20 ++++++++++++++++++++
 utilities/ovn-dbctl.c    | 18 ++++++++++++++++++
 utilities/ovn-dbctl.h    |  1 +
 utilities/ovn-ic-nbctl.c |  4 ++++
 utilities/ovn-ic-sbctl.c |  4 ++++
 utilities/ovn-nbctl.c    | 15 +++++++++++++++
 utilities/ovn-sbctl.c    | 15 +++++++++++++++
 8 files changed, 79 insertions(+)

Comments

Dumitru Ceara May 3, 2023, 1:03 p.m. UTC | #1
Hi Vladislav,

On 5/3/23 02:55, Vladislav Odintsov wrote:
> For large OVN_Southbound (or other) databases the default interval of 5000 ms
> could be not sufficient to run.  This patch adds configuration of OVSDB
> inactivity probes for ovn-*ctl utilities.
> 
> Initially, on the utility start the hardcoded value of 120000 ms is set.
> For daemon-mode it is possible to configure intervals in OVN Northbound
> database:
> OVN_Northbound.NB_Global.options.dbctl_inactivity_probe for ovn-nbctl and
> ovn-sbctl utilities.
> 
> If no DB configuration option was provided, the initial (120000 ms) interval
> is left.

Nit: I would add here something like "A non-zero inactivity probe
interval is required in order to allow DB clients to detect connection
failures due to network issues."

> 
> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
> ---
>  lib/ovn-util.h           |  2 ++
>  ovn-nb.xml               | 20 ++++++++++++++++++++
>  utilities/ovn-dbctl.c    | 18 ++++++++++++++++++
>  utilities/ovn-dbctl.h    |  1 +
>  utilities/ovn-ic-nbctl.c |  4 ++++
>  utilities/ovn-ic-sbctl.c |  4 ++++
>  utilities/ovn-nbctl.c    | 15 +++++++++++++++
>  utilities/ovn-sbctl.c    | 15 +++++++++++++++

I'm not sure whether we should we mention this user visible change in
the NEWS file.

>  8 files changed, 79 insertions(+)
> 
> diff --git a/lib/ovn-util.h b/lib/ovn-util.h
> index 7cf861dbc..47e53ca74 100644
> --- a/lib/ovn-util.h
> +++ b/lib/ovn-util.h
> @@ -154,6 +154,8 @@ void set_idl_probe_interval(struct ovsdb_idl *idl, const char *remote,
>  #define OVN_MAX_DP_VXLAN_KEY ((1u << 12) - 1)
>  #define OVN_MAX_DP_VXLAN_KEY_LOCAL (OVN_MAX_DP_KEY - OVN_MAX_DP_GLOBAL_NUM)
>  
> +#define DEFAULT_UTILS_PROBE_INTERVAL_MSEC 120000

I think this should go to ovn-dbctl.h instead but whoever applies the
patch can move this.  So I don't think there's a need for v3, therefore:

Acked-by: Dumitru Ceara <dceara@redhat.com>

Ilya, Mark, Numan, Han, what do you guys think?

Regards,
Dumitru

> +
>  struct hmap;
>  void ovn_destroy_tnlids(struct hmap *tnlids);
>  bool ovn_add_tnlid(struct hmap *set, uint32_t tnlid);
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index fd32070f2..a82f31a92 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -215,6 +215,26 @@
>          </p>
>        </column>
>  
> +      <column name="options" key="dbctl_probe_interval">
> +        <p>
> +          The inactivity probe interval of the connection to the OVN Northbound
> +          and Southbound databases from <code>ovn-nbctl</code> and
> +          <code>ovn-sbctl</code> utilities respectively, in milliseconds.
> +          If the value is zero, it disables the connection keepalive feature.
> +        </p>
> +
> +        <p>
> +          If the value is nonzero, then it will be forced to a value of
> +          at least 1000 ms.
> +        </p>
> +
> +        <p>
> +          If the value less then zero, then the default inactivity probe
> +          interval for <code>ovn-nbctl</code> and <code>ovn-sbctl</code> would
> +          be left intact (120000 ms).
> +        </p>
> +      </column>
> +
>        <column name="options" key="northd_trim_timeout">
>          <p>
>            When used, this configuration value specifies the time, in
> diff --git a/utilities/ovn-dbctl.c b/utilities/ovn-dbctl.c
> index 369a6a663..c6aebcbbb 100644
> --- a/utilities/ovn-dbctl.c
> +++ b/utilities/ovn-dbctl.c
> @@ -205,6 +205,9 @@ ovn_dbctl_main(int argc, char *argv[],
>      ovsdb_idl_set_remote(idl, db, daemon_mode);
>      ovsdb_idl_set_leader_only(idl, leader_only);
>  
> +    /* Set reasonable high probe interval. */
> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
> +
>      if (daemon_mode) {
>          server_loop(dbctl_options, idl, argc, argv_);
>      } else {
> @@ -1094,6 +1097,17 @@ out:
>      free(argv);
>  }
>  
> +static void
> +update_inactivity_probe(struct server_cmd_run_ctx *ctx)
> +{
> +    int inactivity_probe = ctx->dbctl_options->get_inactivity_probe(ctx->idl);
> +    if (inactivity_probe < 0) {
> +        inactivity_probe = DEFAULT_UTILS_PROBE_INTERVAL_MSEC;
> +    }
> +
> +    set_idl_probe_interval(ctx->idl, db, inactivity_probe);
> +}
> +
>  static void
>  server_loop(const struct ovn_dbctl_options *dbctl_options,
>              struct ovsdb_idl *idl, int argc, char *argv[])
> @@ -1125,6 +1139,10 @@ server_loop(const struct ovn_dbctl_options *dbctl_options,
>  
>      for (;;) {
>          update_ssl_config();
> +
> +        /* Configure inactivity probe from connected DB. */
> +        update_inactivity_probe(&server_cmd_run_ctx);
> +
>          memory_run();
>          if (memory_should_report()) {
>              struct simap usage = SIMAP_INITIALIZER(&usage);
> diff --git a/utilities/ovn-dbctl.h b/utilities/ovn-dbctl.h
> index a1fbede6b..54c232dd6 100644
> --- a/utilities/ovn-dbctl.h
> +++ b/utilities/ovn-dbctl.h
> @@ -52,6 +52,7 @@ struct ovn_dbctl_options {
>                            const struct timer *wait_timeout,
>                            long long int start_time, bool print_wait_time);
>  
> +    int (*get_inactivity_probe)(struct ovsdb_idl *);
>      struct ctl_context *(*ctx_create)(void);
>      void (*ctx_destroy)(struct ctl_context *);
>  };
> diff --git a/utilities/ovn-ic-nbctl.c b/utilities/ovn-ic-nbctl.c
> index f3d8039a8..19d8b054f 100644
> --- a/utilities/ovn-ic-nbctl.c
> +++ b/utilities/ovn-ic-nbctl.c
> @@ -116,6 +116,10 @@ main(int argc, char *argv[])
>      ovsdb_idl_set_remote(idl, db, false);
>      ovsdb_idl_set_db_change_aware(idl, false);
>      ovsdb_idl_set_leader_only(idl, leader_only);
> +
> +    /* Set reasonable high probe interval. */
> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
> +
>      run_prerequisites(commands, n_commands, idl);
>  
>      /* Execute the commands.
> diff --git a/utilities/ovn-ic-sbctl.c b/utilities/ovn-ic-sbctl.c
> index 3060b48b9..215d09d30 100644
> --- a/utilities/ovn-ic-sbctl.c
> +++ b/utilities/ovn-ic-sbctl.c
> @@ -115,6 +115,10 @@ main(int argc, char *argv[])
>      ovsdb_idl_set_remote(idl, db, false);
>      ovsdb_idl_set_db_change_aware(idl, false);
>      ovsdb_idl_set_leader_only(idl, leader_only);
> +
> +    /* Set reasonable high probe interval. */
> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
> +
>      run_prerequisites(commands, n_commands, idl);
>  
>      /* Execute the commands.
> diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
> index 45572fd30..49c5b294a 100644
> --- a/utilities/ovn-nbctl.c
> +++ b/utilities/ovn-nbctl.c
> @@ -153,6 +153,20 @@ nbctl_post_execute(struct ovsdb_idl *idl, struct ovsdb_idl_txn *txn,
>      }
>  }
>  
> +static int
> +get_inactivity_probe(struct ovsdb_idl *idl)
> +{
> +    const struct nbrec_nb_global *nb = nbrec_nb_global_first(idl);
> +    int interval = -1;
> +
> +    if (nb) {
> +        interval = smap_get_int(&nb->options, "dbctl_probe_interval",
> +                                interval);
> +    }
> +
> +    return interval;
> +}
> +
>  static char * OVS_WARN_UNUSED_RESULT dhcp_options_get(
>      struct ctl_context *ctx, const char *id, bool must_exist,
>      const struct nbrec_dhcp_options **);
> @@ -7935,6 +7949,7 @@ main(int argc, char *argv[])
>          .add_base_prerequisites = nbctl_add_base_prerequisites,
>          .pre_execute = nbctl_pre_execute,
>          .post_execute = nbctl_post_execute,
> +        .get_inactivity_probe = get_inactivity_probe,
>  
>          .ctx_create = nbctl_ctx_create,
>          .ctx_destroy = nbctl_ctx_destroy,
> diff --git a/utilities/ovn-sbctl.c b/utilities/ovn-sbctl.c
> index 542ab9ffa..76465fe7e 100644
> --- a/utilities/ovn-sbctl.c
> +++ b/utilities/ovn-sbctl.c
> @@ -150,6 +150,20 @@ Other options:\n\
>   * gracefully.  */
>  #define ctl_fatal dont_use_ctl_fatal_use_ctl_error_and_return
>  
> +static int
> +get_inactivity_probe(struct ovsdb_idl *idl)
> +{
> +    const struct sbrec_sb_global *sb = sbrec_sb_global_first(idl);
> +    int interval = -1;
> +
> +    if (sb) {
> +        interval = smap_get_int(&sb->options, "dbctl_probe_interval",
> +                                interval);
> +    }
> +
> +    return interval;
> +}
> +
>  /* ovs-sbctl specific context.  Inherits the 'struct ctl_context' as base. */
>  struct sbctl_context {
>      struct ctl_context base;
> @@ -1590,6 +1604,7 @@ main(int argc, char *argv[])
>          .add_base_prerequisites = sbctl_add_base_prerequisites,
>          .pre_execute = sbctl_pre_execute,
>          .post_execute = NULL,
> +        .get_inactivity_probe = get_inactivity_probe,
>  
>          .ctx_create = sbctl_ctx_create,
>          .ctx_destroy = sbctl_ctx_destroy,
Mark Michelson May 3, 2023, 1:47 p.m. UTC | #2
On 5/3/23 09:03, Dumitru Ceara wrote:
> Hi Vladislav,
> 
> On 5/3/23 02:55, Vladislav Odintsov wrote:
>> For large OVN_Southbound (or other) databases the default interval of 5000 ms
>> could be not sufficient to run.  This patch adds configuration of OVSDB
>> inactivity probes for ovn-*ctl utilities.
>>
>> Initially, on the utility start the hardcoded value of 120000 ms is set.
>> For daemon-mode it is possible to configure intervals in OVN Northbound
>> database:
>> OVN_Northbound.NB_Global.options.dbctl_inactivity_probe for ovn-nbctl and
>> ovn-sbctl utilities.
>>
>> If no DB configuration option was provided, the initial (120000 ms) interval
>> is left.
> 
> Nit: I would add here something like "A non-zero inactivity probe
> interval is required in order to allow DB clients to detect connection
> failures due to network issues."
> 
>>
>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
>> ---
>>   lib/ovn-util.h           |  2 ++
>>   ovn-nb.xml               | 20 ++++++++++++++++++++
>>   utilities/ovn-dbctl.c    | 18 ++++++++++++++++++
>>   utilities/ovn-dbctl.h    |  1 +
>>   utilities/ovn-ic-nbctl.c |  4 ++++
>>   utilities/ovn-ic-sbctl.c |  4 ++++
>>   utilities/ovn-nbctl.c    | 15 +++++++++++++++
>>   utilities/ovn-sbctl.c    | 15 +++++++++++++++
> 
> I'm not sure whether we should we mention this user visible change in
> the NEWS file.

I think it's worth mentioning in the NEWS file.

> 
>>   8 files changed, 79 insertions(+)
>>
>> diff --git a/lib/ovn-util.h b/lib/ovn-util.h
>> index 7cf861dbc..47e53ca74 100644
>> --- a/lib/ovn-util.h
>> +++ b/lib/ovn-util.h
>> @@ -154,6 +154,8 @@ void set_idl_probe_interval(struct ovsdb_idl *idl, const char *remote,
>>   #define OVN_MAX_DP_VXLAN_KEY ((1u << 12) - 1)
>>   #define OVN_MAX_DP_VXLAN_KEY_LOCAL (OVN_MAX_DP_KEY - OVN_MAX_DP_GLOBAL_NUM)
>>   
>> +#define DEFAULT_UTILS_PROBE_INTERVAL_MSEC 120000
> 
> I think this should go to ovn-dbctl.h instead but whoever applies the
> patch can move this.  So I don't think there's a need for v3, therefore:

I also agree with this suggestion.

> 
> Acked-by: Dumitru Ceara <dceara@redhat.com>
> 
> Ilya, Mark, Numan, Han, what do you guys think?

Before I can ack this patch, I want to bring up two points:

1) This patch updates ovn-nb.xml, but it does not update ovn-sb.xml to 
document the new SB option.

2) I'm not a big fan of the name "dbctl_probe_interval", because the 
term "dbctl" is not well-known to most OVN users. My suggestion would be 
to call the NB option "nbctl_probe_interval" or 
"ovn_nbctl_probe_interval" and to call the SB option 
"sbctl_probe_interval" or "ovn_sbctl_probe_interval". This makes it more 
clear that the option pertains to the "ovn-nbctl" and "ovn-sbctl" 
utilities. However, if others disagree with me on this, then we can use 
"dbctl_probe_interval" .

I also have a couple of small findings below.

> 
> Regards,
> Dumitru
> 
>> +
>>   struct hmap;
>>   void ovn_destroy_tnlids(struct hmap *tnlids);
>>   bool ovn_add_tnlid(struct hmap *set, uint32_t tnlid);
>> diff --git a/ovn-nb.xml b/ovn-nb.xml
>> index fd32070f2..a82f31a92 100644
>> --- a/ovn-nb.xml
>> +++ b/ovn-nb.xml
>> @@ -215,6 +215,26 @@
>>           </p>
>>         </column>
>>   
>> +      <column name="options" key="dbctl_probe_interval">
>> +        <p>
>> +          The inactivity probe interval of the connection to the OVN Northbound
>> +          and Southbound databases from <code>ovn-nbctl</code> and
>> +          <code>ovn-sbctl</code> utilities respectively, in milliseconds.
>> +          If the value is zero, it disables the connection keepalive feature.
>> +        </p>

The documentation makes it sound as though the NB dbctl_probe_interval 
affects *both* ovn-nbctl and ovn-sbctl. However, this only affects 
ovn-nbctl. ovn-sbctl is controlled by the SB database.

>> +
>> +        <p>
>> +          If the value is nonzero, then it will be forced to a value of
>> +          at least 1000 ms.
>> +        </p>
>> +
>> +        <p>
>> +          If the value less then zero, then the default inactivity probe

"If the value is less than zero"

>> +          interval for <code>ovn-nbctl</code> and <code>ovn-sbctl</code> would
>> +          be left intact (120000 ms).
>> +        </p>
>> +      </column>
>> +
>>         <column name="options" key="northd_trim_timeout">
>>           <p>
>>             When used, this configuration value specifies the time, in
>> diff --git a/utilities/ovn-dbctl.c b/utilities/ovn-dbctl.c
>> index 369a6a663..c6aebcbbb 100644
>> --- a/utilities/ovn-dbctl.c
>> +++ b/utilities/ovn-dbctl.c
>> @@ -205,6 +205,9 @@ ovn_dbctl_main(int argc, char *argv[],
>>       ovsdb_idl_set_remote(idl, db, daemon_mode);
>>       ovsdb_idl_set_leader_only(idl, leader_only);
>>   
>> +    /* Set reasonable high probe interval. */
>> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
>> +
>>       if (daemon_mode) {
>>           server_loop(dbctl_options, idl, argc, argv_);
>>       } else {
>> @@ -1094,6 +1097,17 @@ out:
>>       free(argv);
>>   }
>>   
>> +static void
>> +update_inactivity_probe(struct server_cmd_run_ctx *ctx)
>> +{
>> +    int inactivity_probe = ctx->dbctl_options->get_inactivity_probe(ctx->idl);
>> +    if (inactivity_probe < 0) {
>> +        inactivity_probe = DEFAULT_UTILS_PROBE_INTERVAL_MSEC;
>> +    }
>> +
>> +    set_idl_probe_interval(ctx->idl, db, inactivity_probe);
>> +}
>> +
>>   static void
>>   server_loop(const struct ovn_dbctl_options *dbctl_options,
>>               struct ovsdb_idl *idl, int argc, char *argv[])
>> @@ -1125,6 +1139,10 @@ server_loop(const struct ovn_dbctl_options *dbctl_options,
>>   
>>       for (;;) {
>>           update_ssl_config();
>> +
>> +        /* Configure inactivity probe from connected DB. */
>> +        update_inactivity_probe(&server_cmd_run_ctx);
>> +
>>           memory_run();
>>           if (memory_should_report()) {
>>               struct simap usage = SIMAP_INITIALIZER(&usage);
>> diff --git a/utilities/ovn-dbctl.h b/utilities/ovn-dbctl.h
>> index a1fbede6b..54c232dd6 100644
>> --- a/utilities/ovn-dbctl.h
>> +++ b/utilities/ovn-dbctl.h
>> @@ -52,6 +52,7 @@ struct ovn_dbctl_options {
>>                             const struct timer *wait_timeout,
>>                             long long int start_time, bool print_wait_time);
>>   
>> +    int (*get_inactivity_probe)(struct ovsdb_idl *);
>>       struct ctl_context *(*ctx_create)(void);
>>       void (*ctx_destroy)(struct ctl_context *);
>>   };
>> diff --git a/utilities/ovn-ic-nbctl.c b/utilities/ovn-ic-nbctl.c
>> index f3d8039a8..19d8b054f 100644
>> --- a/utilities/ovn-ic-nbctl.c
>> +++ b/utilities/ovn-ic-nbctl.c
>> @@ -116,6 +116,10 @@ main(int argc, char *argv[])
>>       ovsdb_idl_set_remote(idl, db, false);
>>       ovsdb_idl_set_db_change_aware(idl, false);
>>       ovsdb_idl_set_leader_only(idl, leader_only);
>> +
>> +    /* Set reasonable high probe interval. */
>> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
>> +
>>       run_prerequisites(commands, n_commands, idl);
>>   
>>       /* Execute the commands.
>> diff --git a/utilities/ovn-ic-sbctl.c b/utilities/ovn-ic-sbctl.c
>> index 3060b48b9..215d09d30 100644
>> --- a/utilities/ovn-ic-sbctl.c
>> +++ b/utilities/ovn-ic-sbctl.c
>> @@ -115,6 +115,10 @@ main(int argc, char *argv[])
>>       ovsdb_idl_set_remote(idl, db, false);
>>       ovsdb_idl_set_db_change_aware(idl, false);
>>       ovsdb_idl_set_leader_only(idl, leader_only);
>> +
>> +    /* Set reasonable high probe interval. */
>> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
>> +
>>       run_prerequisites(commands, n_commands, idl);
>>   
>>       /* Execute the commands.
>> diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
>> index 45572fd30..49c5b294a 100644
>> --- a/utilities/ovn-nbctl.c
>> +++ b/utilities/ovn-nbctl.c
>> @@ -153,6 +153,20 @@ nbctl_post_execute(struct ovsdb_idl *idl, struct ovsdb_idl_txn *txn,
>>       }
>>   }
>>   
>> +static int
>> +get_inactivity_probe(struct ovsdb_idl *idl)
>> +{
>> +    const struct nbrec_nb_global *nb = nbrec_nb_global_first(idl);
>> +    int interval = -1;

Why does this default to -1? Wouldn't it make more sense to use 
DEFAULT_UTILS_PROBE_INTERVAL_MSEC here?

>> +
>> +    if (nb) {
>> +        interval = smap_get_int(&nb->options, "dbctl_probe_interval",
>> +                                interval);
>> +    }
>> +
>> +    return interval;
>> +}
>> +
>>   static char * OVS_WARN_UNUSED_RESULT dhcp_options_get(
>>       struct ctl_context *ctx, const char *id, bool must_exist,
>>       const struct nbrec_dhcp_options **);
>> @@ -7935,6 +7949,7 @@ main(int argc, char *argv[])
>>           .add_base_prerequisites = nbctl_add_base_prerequisites,
>>           .pre_execute = nbctl_pre_execute,
>>           .post_execute = nbctl_post_execute,
>> +        .get_inactivity_probe = get_inactivity_probe,
>>   
>>           .ctx_create = nbctl_ctx_create,
>>           .ctx_destroy = nbctl_ctx_destroy,
>> diff --git a/utilities/ovn-sbctl.c b/utilities/ovn-sbctl.c
>> index 542ab9ffa..76465fe7e 100644
>> --- a/utilities/ovn-sbctl.c
>> +++ b/utilities/ovn-sbctl.c
>> @@ -150,6 +150,20 @@ Other options:\n\
>>    * gracefully.  */
>>   #define ctl_fatal dont_use_ctl_fatal_use_ctl_error_and_return
>>   
>> +static int
>> +get_inactivity_probe(struct ovsdb_idl *idl)
>> +{
>> +    const struct sbrec_sb_global *sb = sbrec_sb_global_first(idl);
>> +    int interval = -1;

Same comment here about using -1 as the default.

>> +
>> +    if (sb) {
>> +        interval = smap_get_int(&sb->options, "dbctl_probe_interval",
>> +                                interval);
>> +    }
>> +
>> +    return interval;
>> +}
>> +
>>   /* ovs-sbctl specific context.  Inherits the 'struct ctl_context' as base. */
>>   struct sbctl_context {
>>       struct ctl_context base;
>> @@ -1590,6 +1604,7 @@ main(int argc, char *argv[])
>>           .add_base_prerequisites = sbctl_add_base_prerequisites,
>>           .pre_execute = sbctl_pre_execute,
>>           .post_execute = NULL,
>> +        .get_inactivity_probe = get_inactivity_probe,
>>   
>>           .ctx_create = sbctl_ctx_create,
>>           .ctx_destroy = sbctl_ctx_destroy,
>
Dumitru Ceara May 3, 2023, 1:56 p.m. UTC | #3
On 5/3/23 15:47, Mark Michelson wrote:
> On 5/3/23 09:03, Dumitru Ceara wrote:
>> Hi Vladislav,
>>
>> On 5/3/23 02:55, Vladislav Odintsov wrote:
>>> For large OVN_Southbound (or other) databases the default interval of
>>> 5000 ms
>>> could be not sufficient to run.  This patch adds configuration of OVSDB
>>> inactivity probes for ovn-*ctl utilities.
>>>
>>> Initially, on the utility start the hardcoded value of 120000 ms is set.
>>> For daemon-mode it is possible to configure intervals in OVN Northbound
>>> database:
>>> OVN_Northbound.NB_Global.options.dbctl_inactivity_probe for ovn-nbctl
>>> and
>>> ovn-sbctl utilities.
>>>
>>> If no DB configuration option was provided, the initial (120000 ms)
>>> interval
>>> is left.
>>
>> Nit: I would add here something like "A non-zero inactivity probe
>> interval is required in order to allow DB clients to detect connection
>> failures due to network issues."
>>
>>>
>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
>>> ---
>>>   lib/ovn-util.h           |  2 ++
>>>   ovn-nb.xml               | 20 ++++++++++++++++++++
>>>   utilities/ovn-dbctl.c    | 18 ++++++++++++++++++
>>>   utilities/ovn-dbctl.h    |  1 +
>>>   utilities/ovn-ic-nbctl.c |  4 ++++
>>>   utilities/ovn-ic-sbctl.c |  4 ++++
>>>   utilities/ovn-nbctl.c    | 15 +++++++++++++++
>>>   utilities/ovn-sbctl.c    | 15 +++++++++++++++
>>
>> I'm not sure whether we should we mention this user visible change in
>> the NEWS file.
> 
> I think it's worth mentioning in the NEWS file.
> 
>>
>>>   8 files changed, 79 insertions(+)
>>>
>>> diff --git a/lib/ovn-util.h b/lib/ovn-util.h
>>> index 7cf861dbc..47e53ca74 100644
>>> --- a/lib/ovn-util.h
>>> +++ b/lib/ovn-util.h
>>> @@ -154,6 +154,8 @@ void set_idl_probe_interval(struct ovsdb_idl
>>> *idl, const char *remote,
>>>   #define OVN_MAX_DP_VXLAN_KEY ((1u << 12) - 1)
>>>   #define OVN_MAX_DP_VXLAN_KEY_LOCAL (OVN_MAX_DP_KEY -
>>> OVN_MAX_DP_GLOBAL_NUM)
>>>   +#define DEFAULT_UTILS_PROBE_INTERVAL_MSEC 120000
>>
>> I think this should go to ovn-dbctl.h instead but whoever applies the
>> patch can move this.  So I don't think there's a need for v3, therefore:
> 
> I also agree with this suggestion.
> 
>>
>> Acked-by: Dumitru Ceara <dceara@redhat.com>
>>
>> Ilya, Mark, Numan, Han, what do you guys think?
> 
> Before I can ack this patch, I want to bring up two points:
> 
> 1) This patch updates ovn-nb.xml, but it does not update ovn-sb.xml to
> document the new SB option.
> 

We already don't document everything that applies to both.

> 2) I'm not a big fan of the name "dbctl_probe_interval", because the
> term "dbctl" is not well-known to most OVN users. My suggestion would be
> to call the NB option "nbctl_probe_interval" or
> "ovn_nbctl_probe_interval" and to call the SB option
> "sbctl_probe_interval" or "ovn_sbctl_probe_interval". This makes it more
> clear that the option pertains to the "ovn-nbctl" and "ovn-sbctl"
> utilities. However, if others disagree with me on this, then we can use
> "dbctl_probe_interval" .
> 

The problem is northd propagates all NB.NB_Global.options to
SB.SB_Global.options.

> I also have a couple of small findings below.
> 
>>
>> Regards,
>> Dumitru
>>
>>> +
>>>   struct hmap;
>>>   void ovn_destroy_tnlids(struct hmap *tnlids);
>>>   bool ovn_add_tnlid(struct hmap *set, uint32_t tnlid);
>>> diff --git a/ovn-nb.xml b/ovn-nb.xml
>>> index fd32070f2..a82f31a92 100644
>>> --- a/ovn-nb.xml
>>> +++ b/ovn-nb.xml
>>> @@ -215,6 +215,26 @@
>>>           </p>
>>>         </column>
>>>   +      <column name="options" key="dbctl_probe_interval">
>>> +        <p>
>>> +          The inactivity probe interval of the connection to the OVN
>>> Northbound
>>> +          and Southbound databases from <code>ovn-nbctl</code> and
>>> +          <code>ovn-sbctl</code> utilities respectively, in
>>> milliseconds.
>>> +          If the value is zero, it disables the connection keepalive
>>> feature.
>>> +        </p>
> 
> The documentation makes it sound as though the NB dbctl_probe_interval
> affects *both* ovn-nbctl and ovn-sbctl. However, this only affects
> ovn-nbctl. ovn-sbctl is controlled by the SB database.
> 

Due to the fact that the option is propagated by northd it eventually
applies to both.  But thinking some more about it, you're right, it's
probably better to add explicit nb and sb options.

>>> +
>>> +        <p>
>>> +          If the value is nonzero, then it will be forced to a value of
>>> +          at least 1000 ms.
>>> +        </p>
>>> +
>>> +        <p>
>>> +          If the value less then zero, then the default inactivity
>>> probe
> 
> "If the value is less than zero"
> 
>>> +          interval for <code>ovn-nbctl</code> and
>>> <code>ovn-sbctl</code> would
>>> +          be left intact (120000 ms).
>>> +        </p>
>>> +      </column>
>>> +
>>>         <column name="options" key="northd_trim_timeout">
>>>           <p>
>>>             When used, this configuration value specifies the time, in
>>> diff --git a/utilities/ovn-dbctl.c b/utilities/ovn-dbctl.c
>>> index 369a6a663..c6aebcbbb 100644
>>> --- a/utilities/ovn-dbctl.c
>>> +++ b/utilities/ovn-dbctl.c
>>> @@ -205,6 +205,9 @@ ovn_dbctl_main(int argc, char *argv[],
>>>       ovsdb_idl_set_remote(idl, db, daemon_mode);
>>>       ovsdb_idl_set_leader_only(idl, leader_only);
>>>   +    /* Set reasonable high probe interval. */
>>> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
>>> +
>>>       if (daemon_mode) {
>>>           server_loop(dbctl_options, idl, argc, argv_);
>>>       } else {
>>> @@ -1094,6 +1097,17 @@ out:
>>>       free(argv);
>>>   }
>>>   +static void
>>> +update_inactivity_probe(struct server_cmd_run_ctx *ctx)
>>> +{
>>> +    int inactivity_probe =
>>> ctx->dbctl_options->get_inactivity_probe(ctx->idl);
>>> +    if (inactivity_probe < 0) {
>>> +        inactivity_probe = DEFAULT_UTILS_PROBE_INTERVAL_MSEC;
>>> +    }
>>> +
>>> +    set_idl_probe_interval(ctx->idl, db, inactivity_probe);
>>> +}
>>> +
>>>   static void
>>>   server_loop(const struct ovn_dbctl_options *dbctl_options,
>>>               struct ovsdb_idl *idl, int argc, char *argv[])
>>> @@ -1125,6 +1139,10 @@ server_loop(const struct ovn_dbctl_options
>>> *dbctl_options,
>>>         for (;;) {
>>>           update_ssl_config();
>>> +
>>> +        /* Configure inactivity probe from connected DB. */
>>> +        update_inactivity_probe(&server_cmd_run_ctx);
>>> +
>>>           memory_run();
>>>           if (memory_should_report()) {
>>>               struct simap usage = SIMAP_INITIALIZER(&usage);
>>> diff --git a/utilities/ovn-dbctl.h b/utilities/ovn-dbctl.h
>>> index a1fbede6b..54c232dd6 100644
>>> --- a/utilities/ovn-dbctl.h
>>> +++ b/utilities/ovn-dbctl.h
>>> @@ -52,6 +52,7 @@ struct ovn_dbctl_options {
>>>                             const struct timer *wait_timeout,
>>>                             long long int start_time, bool
>>> print_wait_time);
>>>   +    int (*get_inactivity_probe)(struct ovsdb_idl *);
>>>       struct ctl_context *(*ctx_create)(void);
>>>       void (*ctx_destroy)(struct ctl_context *);
>>>   };
>>> diff --git a/utilities/ovn-ic-nbctl.c b/utilities/ovn-ic-nbctl.c
>>> index f3d8039a8..19d8b054f 100644
>>> --- a/utilities/ovn-ic-nbctl.c
>>> +++ b/utilities/ovn-ic-nbctl.c
>>> @@ -116,6 +116,10 @@ main(int argc, char *argv[])
>>>       ovsdb_idl_set_remote(idl, db, false);
>>>       ovsdb_idl_set_db_change_aware(idl, false);
>>>       ovsdb_idl_set_leader_only(idl, leader_only);
>>> +
>>> +    /* Set reasonable high probe interval. */
>>> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
>>> +
>>>       run_prerequisites(commands, n_commands, idl);
>>>         /* Execute the commands.
>>> diff --git a/utilities/ovn-ic-sbctl.c b/utilities/ovn-ic-sbctl.c
>>> index 3060b48b9..215d09d30 100644
>>> --- a/utilities/ovn-ic-sbctl.c
>>> +++ b/utilities/ovn-ic-sbctl.c
>>> @@ -115,6 +115,10 @@ main(int argc, char *argv[])
>>>       ovsdb_idl_set_remote(idl, db, false);
>>>       ovsdb_idl_set_db_change_aware(idl, false);
>>>       ovsdb_idl_set_leader_only(idl, leader_only);
>>> +
>>> +    /* Set reasonable high probe interval. */
>>> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
>>> +
>>>       run_prerequisites(commands, n_commands, idl);
>>>         /* Execute the commands.
>>> diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
>>> index 45572fd30..49c5b294a 100644
>>> --- a/utilities/ovn-nbctl.c
>>> +++ b/utilities/ovn-nbctl.c
>>> @@ -153,6 +153,20 @@ nbctl_post_execute(struct ovsdb_idl *idl, struct
>>> ovsdb_idl_txn *txn,
>>>       }
>>>   }
>>>   +static int
>>> +get_inactivity_probe(struct ovsdb_idl *idl)
>>> +{
>>> +    const struct nbrec_nb_global *nb = nbrec_nb_global_first(idl);
>>> +    int interval = -1;
> 
> Why does this default to -1? Wouldn't it make more sense to use
> DEFAULT_UTILS_PROBE_INTERVAL_MSEC here?
> 
>>> +
>>> +    if (nb) {
>>> +        interval = smap_get_int(&nb->options, "dbctl_probe_interval",
>>> +                                interval);
>>> +    }
>>> +
>>> +    return interval;
>>> +}
>>> +
>>>   static char * OVS_WARN_UNUSED_RESULT dhcp_options_get(
>>>       struct ctl_context *ctx, const char *id, bool must_exist,
>>>       const struct nbrec_dhcp_options **);
>>> @@ -7935,6 +7949,7 @@ main(int argc, char *argv[])
>>>           .add_base_prerequisites = nbctl_add_base_prerequisites,
>>>           .pre_execute = nbctl_pre_execute,
>>>           .post_execute = nbctl_post_execute,
>>> +        .get_inactivity_probe = get_inactivity_probe,
>>>             .ctx_create = nbctl_ctx_create,
>>>           .ctx_destroy = nbctl_ctx_destroy,
>>> diff --git a/utilities/ovn-sbctl.c b/utilities/ovn-sbctl.c
>>> index 542ab9ffa..76465fe7e 100644
>>> --- a/utilities/ovn-sbctl.c
>>> +++ b/utilities/ovn-sbctl.c
>>> @@ -150,6 +150,20 @@ Other options:\n\
>>>    * gracefully.  */
>>>   #define ctl_fatal dont_use_ctl_fatal_use_ctl_error_and_return
>>>   +static int
>>> +get_inactivity_probe(struct ovsdb_idl *idl)
>>> +{
>>> +    const struct sbrec_sb_global *sb = sbrec_sb_global_first(idl);
>>> +    int interval = -1;
> 
> Same comment here about using -1 as the default.
> 
>>> +
>>> +    if (sb) {
>>> +        interval = smap_get_int(&sb->options, "dbctl_probe_interval",
>>> +                                interval);
>>> +    }
>>> +
>>> +    return interval;
>>> +}
>>> +
>>>   /* ovs-sbctl specific context.  Inherits the 'struct ctl_context'
>>> as base. */
>>>   struct sbctl_context {
>>>       struct ctl_context base;
>>> @@ -1590,6 +1604,7 @@ main(int argc, char *argv[])
>>>           .add_base_prerequisites = sbctl_add_base_prerequisites,
>>>           .pre_execute = sbctl_pre_execute,
>>>           .post_execute = NULL,
>>> +        .get_inactivity_probe = get_inactivity_probe,
>>>             .ctx_create = sbctl_ctx_create,
>>>           .ctx_destroy = sbctl_ctx_destroy,
>>
>
Vladislav Odintsov May 3, 2023, 2:12 p.m. UTC | #4
Hi Dumitru and Mark,

thanks for the review!

> On 3 May 2023, at 16:56, Dumitru Ceara <dceara@redhat.com> wrote:
> 
> On 5/3/23 15:47, Mark Michelson wrote:
>> On 5/3/23 09:03, Dumitru Ceara wrote:
>>> Hi Vladislav,
>>> 
>>> On 5/3/23 02:55, Vladislav Odintsov wrote:
>>>> For large OVN_Southbound (or other) databases the default interval of
>>>> 5000 ms
>>>> could be not sufficient to run.  This patch adds configuration of OVSDB
>>>> inactivity probes for ovn-*ctl utilities.
>>>> 
>>>> Initially, on the utility start the hardcoded value of 120000 ms is set.
>>>> For daemon-mode it is possible to configure intervals in OVN Northbound
>>>> database:
>>>> OVN_Northbound.NB_Global.options.dbctl_inactivity_probe for ovn-nbctl
>>>> and
>>>> ovn-sbctl utilities.
>>>> 
>>>> If no DB configuration option was provided, the initial (120000 ms)
>>>> interval
>>>> is left.
>>> 
>>> Nit: I would add here something like "A non-zero inactivity probe
>>> interval is required in order to allow DB clients to detect connection
>>> failures due to network issues."
>>> 
>>>> 
>>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
>>>> ---
>>>>   lib/ovn-util.h           |  2 ++
>>>>   ovn-nb.xml               | 20 ++++++++++++++++++++
>>>>   utilities/ovn-dbctl.c    | 18 ++++++++++++++++++
>>>>   utilities/ovn-dbctl.h    |  1 +
>>>>   utilities/ovn-ic-nbctl.c |  4 ++++
>>>>   utilities/ovn-ic-sbctl.c |  4 ++++
>>>>   utilities/ovn-nbctl.c    | 15 +++++++++++++++
>>>>   utilities/ovn-sbctl.c    | 15 +++++++++++++++
>>> 
>>> I'm not sure whether we should we mention this user visible change in
>>> the NEWS file.
>> 
>> I think it's worth mentioning in the NEWS file.
>> 
>>> 
>>>>   8 files changed, 79 insertions(+)
>>>> 
>>>> diff --git a/lib/ovn-util.h b/lib/ovn-util.h
>>>> index 7cf861dbc..47e53ca74 100644
>>>> --- a/lib/ovn-util.h
>>>> +++ b/lib/ovn-util.h
>>>> @@ -154,6 +154,8 @@ void set_idl_probe_interval(struct ovsdb_idl
>>>> *idl, const char *remote,
>>>>   #define OVN_MAX_DP_VXLAN_KEY ((1u << 12) - 1)
>>>>   #define OVN_MAX_DP_VXLAN_KEY_LOCAL (OVN_MAX_DP_KEY -
>>>> OVN_MAX_DP_GLOBAL_NUM)
>>>>   +#define DEFAULT_UTILS_PROBE_INTERVAL_MSEC 120000
>>> 
>>> I think this should go to ovn-dbctl.h instead but whoever applies the
>>> patch can move this.  So I don't think there's a need for v3, therefore:
>> 
>> I also agree with this suggestion.

I’m also fine with this change and have no objection to fix this on the apply time if needed.

>> 
>>> 
>>> Acked-by: Dumitru Ceara <dceara@redhat.com>
>>> 
>>> Ilya, Mark, Numan, Han, what do you guys think?
>> 
>> Before I can ack this patch, I want to bring up two points:
>> 
>> 1) This patch updates ovn-nb.xml, but it does not update ovn-sb.xml to
>> document the new SB option.
>> 
> 
> We already don't document everything that applies to both.
> 
>> 2) I'm not a big fan of the name "dbctl_probe_interval", because the
>> term "dbctl" is not well-known to most OVN users. My suggestion would be
>> to call the NB option "nbctl_probe_interval" or
>> "ovn_nbctl_probe_interval" and to call the SB option
>> "sbctl_probe_interval" or "ovn_sbctl_probe_interval". This makes it more
>> clear that the option pertains to the "ovn-nbctl" and "ovn-sbctl"
>> utilities. However, if others disagree with me on this, then we can use
>> "dbctl_probe_interval" .
>> 
> 
> The problem is northd propagates all NB.NB_Global.options to
> SB.SB_Global.options.

Yes, this is due to the facts of northd logic for propagation options from NB_Global to SB_Global table.

> 
>> I also have a couple of small findings below.
>> 
>>> 
>>> Regards,
>>> Dumitru
>>> 
>>>> +
>>>>   struct hmap;
>>>>   void ovn_destroy_tnlids(struct hmap *tnlids);
>>>>   bool ovn_add_tnlid(struct hmap *set, uint32_t tnlid);
>>>> diff --git a/ovn-nb.xml b/ovn-nb.xml
>>>> index fd32070f2..a82f31a92 100644
>>>> --- a/ovn-nb.xml
>>>> +++ b/ovn-nb.xml
>>>> @@ -215,6 +215,26 @@
>>>>           </p>
>>>>         </column>
>>>>   +      <column name="options" key="dbctl_probe_interval">
>>>> +        <p>
>>>> +          The inactivity probe interval of the connection to the OVN
>>>> Northbound
>>>> +          and Southbound databases from <code>ovn-nbctl</code> and
>>>> +          <code>ovn-sbctl</code> utilities respectively, in
>>>> milliseconds.
>>>> +          If the value is zero, it disables the connection keepalive
>>>> feature.
>>>> +        </p>
>> 
>> The documentation makes it sound as though the NB dbctl_probe_interval
>> affects *both* ovn-nbctl and ovn-sbctl. However, this only affects
>> ovn-nbctl. ovn-sbctl is controlled by the SB database.
>> 
> 
> Due to the fact that the option is propagated by northd it eventually
> applies to both.  But thinking some more about it, you're right, it's
> probably better to add explicit nb and sb options.

This was my first approach, where I tried to read nbctl_… and sbctl_… options from appropriate databases, but faced with northd syncing (and deletion option from SB). 
I decided to make a common option thinking that for northd there is similar logic where NB and SB connections are configured from one option (northd_probe_interval).

You you think it’s worth to split options, so I need some time find out how to prevent northd from syncing custom option from NB to SB and how to make it not to delete new option from SB.

> 
>>>> +
>>>> +        <p>
>>>> +          If the value is nonzero, then it will be forced to a value of
>>>> +          at least 1000 ms.
>>>> +        </p>
>>>> +
>>>> +        <p>
>>>> +          If the value less then zero, then the default inactivity
>>>> probe
>> 
>> "If the value is less than zero"
>> 
>>>> +          interval for <code>ovn-nbctl</code> and
>>>> <code>ovn-sbctl</code> would
>>>> +          be left intact (120000 ms).
>>>> +        </p>
>>>> +      </column>
>>>> +
>>>>         <column name="options" key="northd_trim_timeout">
>>>>           <p>
>>>>             When used, this configuration value specifies the time, in
>>>> diff --git a/utilities/ovn-dbctl.c b/utilities/ovn-dbctl.c
>>>> index 369a6a663..c6aebcbbb 100644
>>>> --- a/utilities/ovn-dbctl.c
>>>> +++ b/utilities/ovn-dbctl.c
>>>> @@ -205,6 +205,9 @@ ovn_dbctl_main(int argc, char *argv[],
>>>>       ovsdb_idl_set_remote(idl, db, daemon_mode);
>>>>       ovsdb_idl_set_leader_only(idl, leader_only);
>>>>   +    /* Set reasonable high probe interval. */
>>>> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
>>>> +
>>>>       if (daemon_mode) {
>>>>           server_loop(dbctl_options, idl, argc, argv_);
>>>>       } else {
>>>> @@ -1094,6 +1097,17 @@ out:
>>>>       free(argv);
>>>>   }
>>>>   +static void
>>>> +update_inactivity_probe(struct server_cmd_run_ctx *ctx)
>>>> +{
>>>> +    int inactivity_probe =
>>>> ctx->dbctl_options->get_inactivity_probe(ctx->idl);
>>>> +    if (inactivity_probe < 0) {
>>>> +        inactivity_probe = DEFAULT_UTILS_PROBE_INTERVAL_MSEC;
>>>> +    }
>>>> +
>>>> +    set_idl_probe_interval(ctx->idl, db, inactivity_probe);
>>>> +}
>>>> +
>>>>   static void
>>>>   server_loop(const struct ovn_dbctl_options *dbctl_options,
>>>>               struct ovsdb_idl *idl, int argc, char *argv[])
>>>> @@ -1125,6 +1139,10 @@ server_loop(const struct ovn_dbctl_options
>>>> *dbctl_options,
>>>>         for (;;) {
>>>>           update_ssl_config();
>>>> +
>>>> +        /* Configure inactivity probe from connected DB. */
>>>> +        update_inactivity_probe(&server_cmd_run_ctx);
>>>> +
>>>>           memory_run();
>>>>           if (memory_should_report()) {
>>>>               struct simap usage = SIMAP_INITIALIZER(&usage);
>>>> diff --git a/utilities/ovn-dbctl.h b/utilities/ovn-dbctl.h
>>>> index a1fbede6b..54c232dd6 100644
>>>> --- a/utilities/ovn-dbctl.h
>>>> +++ b/utilities/ovn-dbctl.h
>>>> @@ -52,6 +52,7 @@ struct ovn_dbctl_options {
>>>>                             const struct timer *wait_timeout,
>>>>                             long long int start_time, bool
>>>> print_wait_time);
>>>>   +    int (*get_inactivity_probe)(struct ovsdb_idl *);
>>>>       struct ctl_context *(*ctx_create)(void);
>>>>       void (*ctx_destroy)(struct ctl_context *);
>>>>   };
>>>> diff --git a/utilities/ovn-ic-nbctl.c b/utilities/ovn-ic-nbctl.c
>>>> index f3d8039a8..19d8b054f 100644
>>>> --- a/utilities/ovn-ic-nbctl.c
>>>> +++ b/utilities/ovn-ic-nbctl.c
>>>> @@ -116,6 +116,10 @@ main(int argc, char *argv[])
>>>>       ovsdb_idl_set_remote(idl, db, false);
>>>>       ovsdb_idl_set_db_change_aware(idl, false);
>>>>       ovsdb_idl_set_leader_only(idl, leader_only);
>>>> +
>>>> +    /* Set reasonable high probe interval. */
>>>> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
>>>> +
>>>>       run_prerequisites(commands, n_commands, idl);
>>>>         /* Execute the commands.
>>>> diff --git a/utilities/ovn-ic-sbctl.c b/utilities/ovn-ic-sbctl.c
>>>> index 3060b48b9..215d09d30 100644
>>>> --- a/utilities/ovn-ic-sbctl.c
>>>> +++ b/utilities/ovn-ic-sbctl.c
>>>> @@ -115,6 +115,10 @@ main(int argc, char *argv[])
>>>>       ovsdb_idl_set_remote(idl, db, false);
>>>>       ovsdb_idl_set_db_change_aware(idl, false);
>>>>       ovsdb_idl_set_leader_only(idl, leader_only);
>>>> +
>>>> +    /* Set reasonable high probe interval. */
>>>> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
>>>> +
>>>>       run_prerequisites(commands, n_commands, idl);
>>>>         /* Execute the commands.
>>>> diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
>>>> index 45572fd30..49c5b294a 100644
>>>> --- a/utilities/ovn-nbctl.c
>>>> +++ b/utilities/ovn-nbctl.c
>>>> @@ -153,6 +153,20 @@ nbctl_post_execute(struct ovsdb_idl *idl, struct
>>>> ovsdb_idl_txn *txn,
>>>>       }
>>>>   }
>>>>   +static int
>>>> +get_inactivity_probe(struct ovsdb_idl *idl)
>>>> +{
>>>> +    const struct nbrec_nb_global *nb = nbrec_nb_global_first(idl);
>>>> +    int interval = -1;
>> 
>> Why does this default to -1? Wouldn't it make more sense to use
>> DEFAULT_UTILS_PROBE_INTERVAL_MSEC here?

Yeap, you’re right.

>> 
>>>> +
>>>> +    if (nb) {
>>>> +        interval = smap_get_int(&nb->options, "dbctl_probe_interval",
>>>> +                                interval);
>>>> +    }
>>>> +
>>>> +    return interval;
>>>> +}
>>>> +
>>>>   static char * OVS_WARN_UNUSED_RESULT dhcp_options_get(
>>>>       struct ctl_context *ctx, const char *id, bool must_exist,
>>>>       const struct nbrec_dhcp_options **);
>>>> @@ -7935,6 +7949,7 @@ main(int argc, char *argv[])
>>>>           .add_base_prerequisites = nbctl_add_base_prerequisites,
>>>>           .pre_execute = nbctl_pre_execute,
>>>>           .post_execute = nbctl_post_execute,
>>>> +        .get_inactivity_probe = get_inactivity_probe,
>>>>             .ctx_create = nbctl_ctx_create,
>>>>           .ctx_destroy = nbctl_ctx_destroy,
>>>> diff --git a/utilities/ovn-sbctl.c b/utilities/ovn-sbctl.c
>>>> index 542ab9ffa..76465fe7e 100644
>>>> --- a/utilities/ovn-sbctl.c
>>>> +++ b/utilities/ovn-sbctl.c
>>>> @@ -150,6 +150,20 @@ Other options:\n\
>>>>    * gracefully.  */
>>>>   #define ctl_fatal dont_use_ctl_fatal_use_ctl_error_and_return
>>>>   +static int
>>>> +get_inactivity_probe(struct ovsdb_idl *idl)
>>>> +{
>>>> +    const struct sbrec_sb_global *sb = sbrec_sb_global_first(idl);
>>>> +    int interval = -1;
>> 
>> Same comment here about using -1 as the default.
>> 
>>>> +
>>>> +    if (sb) {
>>>> +        interval = smap_get_int(&sb->options, "dbctl_probe_interval",
>>>> +                                interval);
>>>> +    }
>>>> +
>>>> +    return interval;
>>>> +}
>>>> +
>>>>   /* ovs-sbctl specific context.  Inherits the 'struct ctl_context'
>>>> as base. */
>>>>   struct sbctl_context {
>>>>       struct ctl_context base;
>>>> @@ -1590,6 +1604,7 @@ main(int argc, char *argv[])
>>>>           .add_base_prerequisites = sbctl_add_base_prerequisites,
>>>>           .pre_execute = sbctl_pre_execute,
>>>>           .post_execute = NULL,
>>>> +        .get_inactivity_probe = get_inactivity_probe,
>>>>             .ctx_create = sbctl_ctx_create,
>>>>           .ctx_destroy = sbctl_ctx_destroy,
>>> 
>> 
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Regards,
Vladislav Odintsov
Dumitru Ceara May 3, 2023, 2:18 p.m. UTC | #5
On 5/3/23 16:12, Vladislav Odintsov wrote:
> Hi Dumitru and Mark,
> 
> thanks for the review!
> 
>> On 3 May 2023, at 16:56, Dumitru Ceara <dceara@redhat.com> wrote:
>>
>> On 5/3/23 15:47, Mark Michelson wrote:
>>> On 5/3/23 09:03, Dumitru Ceara wrote:
>>>> Hi Vladislav,
>>>>
>>>> On 5/3/23 02:55, Vladislav Odintsov wrote:
>>>>> For large OVN_Southbound (or other) databases the default interval of
>>>>> 5000 ms
>>>>> could be not sufficient to run.  This patch adds configuration of OVSDB
>>>>> inactivity probes for ovn-*ctl utilities.
>>>>>
>>>>> Initially, on the utility start the hardcoded value of 120000 ms is set.
>>>>> For daemon-mode it is possible to configure intervals in OVN Northbound
>>>>> database:
>>>>> OVN_Northbound.NB_Global.options.dbctl_inactivity_probe for ovn-nbctl
>>>>> and
>>>>> ovn-sbctl utilities.
>>>>>
>>>>> If no DB configuration option was provided, the initial (120000 ms)
>>>>> interval
>>>>> is left.
>>>>
>>>> Nit: I would add here something like "A non-zero inactivity probe
>>>> interval is required in order to allow DB clients to detect connection
>>>> failures due to network issues."
>>>>
>>>>>
>>>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
>>>>> ---
>>>>>   lib/ovn-util.h           |  2 ++
>>>>>   ovn-nb.xml               | 20 ++++++++++++++++++++
>>>>>   utilities/ovn-dbctl.c    | 18 ++++++++++++++++++
>>>>>   utilities/ovn-dbctl.h    |  1 +
>>>>>   utilities/ovn-ic-nbctl.c |  4 ++++
>>>>>   utilities/ovn-ic-sbctl.c |  4 ++++
>>>>>   utilities/ovn-nbctl.c    | 15 +++++++++++++++
>>>>>   utilities/ovn-sbctl.c    | 15 +++++++++++++++
>>>>
>>>> I'm not sure whether we should we mention this user visible change in
>>>> the NEWS file.
>>>
>>> I think it's worth mentioning in the NEWS file.
>>>
>>>>
>>>>>   8 files changed, 79 insertions(+)
>>>>>
>>>>> diff --git a/lib/ovn-util.h b/lib/ovn-util.h
>>>>> index 7cf861dbc..47e53ca74 100644
>>>>> --- a/lib/ovn-util.h
>>>>> +++ b/lib/ovn-util.h
>>>>> @@ -154,6 +154,8 @@ void set_idl_probe_interval(struct ovsdb_idl
>>>>> *idl, const char *remote,
>>>>>   #define OVN_MAX_DP_VXLAN_KEY ((1u << 12) - 1)
>>>>>   #define OVN_MAX_DP_VXLAN_KEY_LOCAL (OVN_MAX_DP_KEY -
>>>>> OVN_MAX_DP_GLOBAL_NUM)
>>>>>   +#define DEFAULT_UTILS_PROBE_INTERVAL_MSEC 120000
>>>>
>>>> I think this should go to ovn-dbctl.h instead but whoever applies the
>>>> patch can move this.  So I don't think there's a need for v3, therefore:
>>>
>>> I also agree with this suggestion.
> 
> I’m also fine with this change and have no objection to fix this on the apply time if needed.
> 
>>>
>>>>
>>>> Acked-by: Dumitru Ceara <dceara@redhat.com>
>>>>
>>>> Ilya, Mark, Numan, Han, what do you guys think?
>>>
>>> Before I can ack this patch, I want to bring up two points:
>>>
>>> 1) This patch updates ovn-nb.xml, but it does not update ovn-sb.xml to
>>> document the new SB option.
>>>
>>
>> We already don't document everything that applies to both.
>>
>>> 2) I'm not a big fan of the name "dbctl_probe_interval", because the
>>> term "dbctl" is not well-known to most OVN users. My suggestion would be
>>> to call the NB option "nbctl_probe_interval" or
>>> "ovn_nbctl_probe_interval" and to call the SB option
>>> "sbctl_probe_interval" or "ovn_sbctl_probe_interval". This makes it more
>>> clear that the option pertains to the "ovn-nbctl" and "ovn-sbctl"
>>> utilities. However, if others disagree with me on this, then we can use
>>> "dbctl_probe_interval" .
>>>
>>
>> The problem is northd propagates all NB.NB_Global.options to
>> SB.SB_Global.options.
> 
> Yes, this is due to the facts of northd logic for propagation options from NB_Global to SB_Global table.
> 
>>
>>> I also have a couple of small findings below.
>>>
>>>>
>>>> Regards,
>>>> Dumitru
>>>>
>>>>> +
>>>>>   struct hmap;
>>>>>   void ovn_destroy_tnlids(struct hmap *tnlids);
>>>>>   bool ovn_add_tnlid(struct hmap *set, uint32_t tnlid);
>>>>> diff --git a/ovn-nb.xml b/ovn-nb.xml
>>>>> index fd32070f2..a82f31a92 100644
>>>>> --- a/ovn-nb.xml
>>>>> +++ b/ovn-nb.xml
>>>>> @@ -215,6 +215,26 @@
>>>>>           </p>
>>>>>         </column>
>>>>>   +      <column name="options" key="dbctl_probe_interval">
>>>>> +        <p>
>>>>> +          The inactivity probe interval of the connection to the OVN
>>>>> Northbound
>>>>> +          and Southbound databases from <code>ovn-nbctl</code> and
>>>>> +          <code>ovn-sbctl</code> utilities respectively, in
>>>>> milliseconds.
>>>>> +          If the value is zero, it disables the connection keepalive
>>>>> feature.
>>>>> +        </p>
>>>
>>> The documentation makes it sound as though the NB dbctl_probe_interval
>>> affects *both* ovn-nbctl and ovn-sbctl. However, this only affects
>>> ovn-nbctl. ovn-sbctl is controlled by the SB database.
>>>
>>
>> Due to the fact that the option is propagated by northd it eventually
>> applies to both.  But thinking some more about it, you're right, it's
>> probably better to add explicit nb and sb options.
> 
> This was my first approach, where I tried to read nbctl_… and sbctl_… options from appropriate databases, but faced with northd syncing (and deletion option from SB). 
> I decided to make a common option thinking that for northd there is similar logic where NB and SB connections are configured from one option (northd_probe_interval).
> 
> You you think it’s worth to split options, so I need some time find out how to prevent northd from syncing custom option from NB to SB and how to make it not to delete new option from SB.
> 

It's not really great but we already do that for
"lb_hairpin_use_ct_mark".  Maybe something similar?

>>
>>>>> +
>>>>> +        <p>
>>>>> +          If the value is nonzero, then it will be forced to a value of
>>>>> +          at least 1000 ms.
>>>>> +        </p>
>>>>> +
>>>>> +        <p>
>>>>> +          If the value less then zero, then the default inactivity
>>>>> probe
>>>
>>> "If the value is less than zero"
>>>
>>>>> +          interval for <code>ovn-nbctl</code> and
>>>>> <code>ovn-sbctl</code> would
>>>>> +          be left intact (120000 ms).
>>>>> +        </p>
>>>>> +      </column>
>>>>> +
>>>>>         <column name="options" key="northd_trim_timeout">
>>>>>           <p>
>>>>>             When used, this configuration value specifies the time, in
>>>>> diff --git a/utilities/ovn-dbctl.c b/utilities/ovn-dbctl.c
>>>>> index 369a6a663..c6aebcbbb 100644
>>>>> --- a/utilities/ovn-dbctl.c
>>>>> +++ b/utilities/ovn-dbctl.c
>>>>> @@ -205,6 +205,9 @@ ovn_dbctl_main(int argc, char *argv[],
>>>>>       ovsdb_idl_set_remote(idl, db, daemon_mode);
>>>>>       ovsdb_idl_set_leader_only(idl, leader_only);
>>>>>   +    /* Set reasonable high probe interval. */
>>>>> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
>>>>> +
>>>>>       if (daemon_mode) {
>>>>>           server_loop(dbctl_options, idl, argc, argv_);
>>>>>       } else {
>>>>> @@ -1094,6 +1097,17 @@ out:
>>>>>       free(argv);
>>>>>   }
>>>>>   +static void
>>>>> +update_inactivity_probe(struct server_cmd_run_ctx *ctx)
>>>>> +{
>>>>> +    int inactivity_probe =
>>>>> ctx->dbctl_options->get_inactivity_probe(ctx->idl);
>>>>> +    if (inactivity_probe < 0) {
>>>>> +        inactivity_probe = DEFAULT_UTILS_PROBE_INTERVAL_MSEC;
>>>>> +    }
>>>>> +
>>>>> +    set_idl_probe_interval(ctx->idl, db, inactivity_probe);
>>>>> +}
>>>>> +
>>>>>   static void
>>>>>   server_loop(const struct ovn_dbctl_options *dbctl_options,
>>>>>               struct ovsdb_idl *idl, int argc, char *argv[])
>>>>> @@ -1125,6 +1139,10 @@ server_loop(const struct ovn_dbctl_options
>>>>> *dbctl_options,
>>>>>         for (;;) {
>>>>>           update_ssl_config();
>>>>> +
>>>>> +        /* Configure inactivity probe from connected DB. */
>>>>> +        update_inactivity_probe(&server_cmd_run_ctx);
>>>>> +
>>>>>           memory_run();
>>>>>           if (memory_should_report()) {
>>>>>               struct simap usage = SIMAP_INITIALIZER(&usage);
>>>>> diff --git a/utilities/ovn-dbctl.h b/utilities/ovn-dbctl.h
>>>>> index a1fbede6b..54c232dd6 100644
>>>>> --- a/utilities/ovn-dbctl.h
>>>>> +++ b/utilities/ovn-dbctl.h
>>>>> @@ -52,6 +52,7 @@ struct ovn_dbctl_options {
>>>>>                             const struct timer *wait_timeout,
>>>>>                             long long int start_time, bool
>>>>> print_wait_time);
>>>>>   +    int (*get_inactivity_probe)(struct ovsdb_idl *);
>>>>>       struct ctl_context *(*ctx_create)(void);
>>>>>       void (*ctx_destroy)(struct ctl_context *);
>>>>>   };
>>>>> diff --git a/utilities/ovn-ic-nbctl.c b/utilities/ovn-ic-nbctl.c
>>>>> index f3d8039a8..19d8b054f 100644
>>>>> --- a/utilities/ovn-ic-nbctl.c
>>>>> +++ b/utilities/ovn-ic-nbctl.c
>>>>> @@ -116,6 +116,10 @@ main(int argc, char *argv[])
>>>>>       ovsdb_idl_set_remote(idl, db, false);
>>>>>       ovsdb_idl_set_db_change_aware(idl, false);
>>>>>       ovsdb_idl_set_leader_only(idl, leader_only);
>>>>> +
>>>>> +    /* Set reasonable high probe interval. */
>>>>> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
>>>>> +
>>>>>       run_prerequisites(commands, n_commands, idl);
>>>>>         /* Execute the commands.
>>>>> diff --git a/utilities/ovn-ic-sbctl.c b/utilities/ovn-ic-sbctl.c
>>>>> index 3060b48b9..215d09d30 100644
>>>>> --- a/utilities/ovn-ic-sbctl.c
>>>>> +++ b/utilities/ovn-ic-sbctl.c
>>>>> @@ -115,6 +115,10 @@ main(int argc, char *argv[])
>>>>>       ovsdb_idl_set_remote(idl, db, false);
>>>>>       ovsdb_idl_set_db_change_aware(idl, false);
>>>>>       ovsdb_idl_set_leader_only(idl, leader_only);
>>>>> +
>>>>> +    /* Set reasonable high probe interval. */
>>>>> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
>>>>> +
>>>>>       run_prerequisites(commands, n_commands, idl);
>>>>>         /* Execute the commands.
>>>>> diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
>>>>> index 45572fd30..49c5b294a 100644
>>>>> --- a/utilities/ovn-nbctl.c
>>>>> +++ b/utilities/ovn-nbctl.c
>>>>> @@ -153,6 +153,20 @@ nbctl_post_execute(struct ovsdb_idl *idl, struct
>>>>> ovsdb_idl_txn *txn,
>>>>>       }
>>>>>   }
>>>>>   +static int
>>>>> +get_inactivity_probe(struct ovsdb_idl *idl)
>>>>> +{
>>>>> +    const struct nbrec_nb_global *nb = nbrec_nb_global_first(idl);
>>>>> +    int interval = -1;
>>>
>>> Why does this default to -1? Wouldn't it make more sense to use
>>> DEFAULT_UTILS_PROBE_INTERVAL_MSEC here?
> 
> Yeap, you’re right.
> 
>>>
>>>>> +
>>>>> +    if (nb) {
>>>>> +        interval = smap_get_int(&nb->options, "dbctl_probe_interval",
>>>>> +                                interval);
>>>>> +    }
>>>>> +
>>>>> +    return interval;
>>>>> +}
>>>>> +
>>>>>   static char * OVS_WARN_UNUSED_RESULT dhcp_options_get(
>>>>>       struct ctl_context *ctx, const char *id, bool must_exist,
>>>>>       const struct nbrec_dhcp_options **);
>>>>> @@ -7935,6 +7949,7 @@ main(int argc, char *argv[])
>>>>>           .add_base_prerequisites = nbctl_add_base_prerequisites,
>>>>>           .pre_execute = nbctl_pre_execute,
>>>>>           .post_execute = nbctl_post_execute,
>>>>> +        .get_inactivity_probe = get_inactivity_probe,
>>>>>             .ctx_create = nbctl_ctx_create,
>>>>>           .ctx_destroy = nbctl_ctx_destroy,
>>>>> diff --git a/utilities/ovn-sbctl.c b/utilities/ovn-sbctl.c
>>>>> index 542ab9ffa..76465fe7e 100644
>>>>> --- a/utilities/ovn-sbctl.c
>>>>> +++ b/utilities/ovn-sbctl.c
>>>>> @@ -150,6 +150,20 @@ Other options:\n\
>>>>>    * gracefully.  */
>>>>>   #define ctl_fatal dont_use_ctl_fatal_use_ctl_error_and_return
>>>>>   +static int
>>>>> +get_inactivity_probe(struct ovsdb_idl *idl)
>>>>> +{
>>>>> +    const struct sbrec_sb_global *sb = sbrec_sb_global_first(idl);
>>>>> +    int interval = -1;
>>>
>>> Same comment here about using -1 as the default.
>>>
>>>>> +
>>>>> +    if (sb) {
>>>>> +        interval = smap_get_int(&sb->options, "dbctl_probe_interval",
>>>>> +                                interval);
>>>>> +    }
>>>>> +
>>>>> +    return interval;
>>>>> +}
>>>>> +
>>>>>   /* ovs-sbctl specific context.  Inherits the 'struct ctl_context'
>>>>> as base. */
>>>>>   struct sbctl_context {
>>>>>       struct ctl_context base;
>>>>> @@ -1590,6 +1604,7 @@ main(int argc, char *argv[])
>>>>>           .add_base_prerequisites = sbctl_add_base_prerequisites,
>>>>>           .pre_execute = sbctl_pre_execute,
>>>>>           .post_execute = NULL,
>>>>> +        .get_inactivity_probe = get_inactivity_probe,
>>>>>             .ctx_create = sbctl_ctx_create,
>>>>>           .ctx_destroy = sbctl_ctx_destroy,
>>>>
>>>
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> 
> 
> Regards,
> Vladislav Odintsov
> 
>
Vladislav Odintsov May 4, 2023, 5:27 p.m. UTC | #6
Hi Dumitru, Mark,

The new version (v4) has been submitted at https://patchwork.ozlabs.org/project/ovn/patch/20230504165510.4026066-1-odivlad@gmail.com/

> On 3 May 2023, at 17:18, Dumitru Ceara <dceara@redhat.com> wrote:
> 
> On 5/3/23 16:12, Vladislav Odintsov wrote:
>> Hi Dumitru and Mark,
>> 
>> thanks for the review!
>> 
>>> On 3 May 2023, at 16:56, Dumitru Ceara <dceara@redhat.com> wrote:
>>> 
>>> On 5/3/23 15:47, Mark Michelson wrote:
>>>> On 5/3/23 09:03, Dumitru Ceara wrote:
>>>>> Hi Vladislav,
>>>>> 
>>>>> On 5/3/23 02:55, Vladislav Odintsov wrote:
>>>>>> For large OVN_Southbound (or other) databases the default interval of
>>>>>> 5000 ms
>>>>>> could be not sufficient to run.  This patch adds configuration of OVSDB
>>>>>> inactivity probes for ovn-*ctl utilities.
>>>>>> 
>>>>>> Initially, on the utility start the hardcoded value of 120000 ms is set.
>>>>>> For daemon-mode it is possible to configure intervals in OVN Northbound
>>>>>> database:
>>>>>> OVN_Northbound.NB_Global.options.dbctl_inactivity_probe for ovn-nbctl
>>>>>> and
>>>>>> ovn-sbctl utilities.
>>>>>> 
>>>>>> If no DB configuration option was provided, the initial (120000 ms)
>>>>>> interval
>>>>>> is left.
>>>>> 
>>>>> Nit: I would add here something like "A non-zero inactivity probe
>>>>> interval is required in order to allow DB clients to detect connection
>>>>> failures due to network issues."
>>>>> 
>>>>>> 
>>>>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
>>>>>> ---
>>>>>>  lib/ovn-util.h           |  2 ++
>>>>>>  ovn-nb.xml               | 20 ++++++++++++++++++++
>>>>>>  utilities/ovn-dbctl.c    | 18 ++++++++++++++++++
>>>>>>  utilities/ovn-dbctl.h    |  1 +
>>>>>>  utilities/ovn-ic-nbctl.c |  4 ++++
>>>>>>  utilities/ovn-ic-sbctl.c |  4 ++++
>>>>>>  utilities/ovn-nbctl.c    | 15 +++++++++++++++
>>>>>>  utilities/ovn-sbctl.c    | 15 +++++++++++++++
>>>>> 
>>>>> I'm not sure whether we should we mention this user visible change in
>>>>> the NEWS file.
>>>> 
>>>> I think it's worth mentioning in the NEWS file.
>>>> 
>>>>> 
>>>>>>  8 files changed, 79 insertions(+)
>>>>>> 
>>>>>> diff --git a/lib/ovn-util.h b/lib/ovn-util.h
>>>>>> index 7cf861dbc..47e53ca74 100644
>>>>>> --- a/lib/ovn-util.h
>>>>>> +++ b/lib/ovn-util.h
>>>>>> @@ -154,6 +154,8 @@ void set_idl_probe_interval(struct ovsdb_idl
>>>>>> *idl, const char *remote,
>>>>>>  #define OVN_MAX_DP_VXLAN_KEY ((1u << 12) - 1)
>>>>>>  #define OVN_MAX_DP_VXLAN_KEY_LOCAL (OVN_MAX_DP_KEY -
>>>>>> OVN_MAX_DP_GLOBAL_NUM)
>>>>>>  +#define DEFAULT_UTILS_PROBE_INTERVAL_MSEC 120000
>>>>> 
>>>>> I think this should go to ovn-dbctl.h instead but whoever applies the
>>>>> patch can move this.  So I don't think there's a need for v3, therefore:
>>>> 
>>>> I also agree with this suggestion.
>> 
>> I’m also fine with this change and have no objection to fix this on the apply time if needed.
>> 
>>>> 
>>>>> 
>>>>> Acked-by: Dumitru Ceara <dceara@redhat.com>
>>>>> 
>>>>> Ilya, Mark, Numan, Han, what do you guys think?
>>>> 
>>>> Before I can ack this patch, I want to bring up two points:
>>>> 
>>>> 1) This patch updates ovn-nb.xml, but it does not update ovn-sb.xml to
>>>> document the new SB option.
>>>> 
>>> 
>>> We already don't document everything that applies to both.
>>> 
>>>> 2) I'm not a big fan of the name "dbctl_probe_interval", because the
>>>> term "dbctl" is not well-known to most OVN users. My suggestion would be
>>>> to call the NB option "nbctl_probe_interval" or
>>>> "ovn_nbctl_probe_interval" and to call the SB option
>>>> "sbctl_probe_interval" or "ovn_sbctl_probe_interval". This makes it more
>>>> clear that the option pertains to the "ovn-nbctl" and "ovn-sbctl"
>>>> utilities. However, if others disagree with me on this, then we can use
>>>> "dbctl_probe_interval" .
>>>> 
>>> 
>>> The problem is northd propagates all NB.NB_Global.options to
>>> SB.SB_Global.options.
>> 
>> Yes, this is due to the facts of northd logic for propagation options from NB_Global to SB_Global table.
>> 
>>> 
>>>> I also have a couple of small findings below.
>>>> 
>>>>> 
>>>>> Regards,
>>>>> Dumitru
>>>>> 
>>>>>> +
>>>>>>  struct hmap;
>>>>>>  void ovn_destroy_tnlids(struct hmap *tnlids);
>>>>>>  bool ovn_add_tnlid(struct hmap *set, uint32_t tnlid);
>>>>>> diff --git a/ovn-nb.xml b/ovn-nb.xml
>>>>>> index fd32070f2..a82f31a92 100644
>>>>>> --- a/ovn-nb.xml
>>>>>> +++ b/ovn-nb.xml
>>>>>> @@ -215,6 +215,26 @@
>>>>>>          </p>
>>>>>>        </column>
>>>>>>  +      <column name="options" key="dbctl_probe_interval">
>>>>>> +        <p>
>>>>>> +          The inactivity probe interval of the connection to the OVN
>>>>>> Northbound
>>>>>> +          and Southbound databases from <code>ovn-nbctl</code> and
>>>>>> +          <code>ovn-sbctl</code> utilities respectively, in
>>>>>> milliseconds.
>>>>>> +          If the value is zero, it disables the connection keepalive
>>>>>> feature.
>>>>>> +        </p>
>>>> 
>>>> The documentation makes it sound as though the NB dbctl_probe_interval
>>>> affects *both* ovn-nbctl and ovn-sbctl. However, this only affects
>>>> ovn-nbctl. ovn-sbctl is controlled by the SB database.
>>>> 
>>> 
>>> Due to the fact that the option is propagated by northd it eventually
>>> applies to both.  But thinking some more about it, you're right, it's
>>> probably better to add explicit nb and sb options.
>> 
>> This was my first approach, where I tried to read nbctl_… and sbctl_… options from appropriate databases, but faced with northd syncing (and deletion option from SB). 
>> I decided to make a common option thinking that for northd there is similar logic where NB and SB connections are configured from one option (northd_probe_interval).
>> 
>> You you think it’s worth to split options, so I need some time find out how to prevent northd from syncing custom option from NB to SB and how to make it not to delete new option from SB.
>> 
> 
> It's not really great but we already do that for
> "lb_hairpin_use_ct_mark".  Maybe something similar?
> 
>>> 
>>>>>> +
>>>>>> +        <p>
>>>>>> +          If the value is nonzero, then it will be forced to a value of
>>>>>> +          at least 1000 ms.
>>>>>> +        </p>
>>>>>> +
>>>>>> +        <p>
>>>>>> +          If the value less then zero, then the default inactivity
>>>>>> probe
>>>> 
>>>> "If the value is less than zero"
>>>> 
>>>>>> +          interval for <code>ovn-nbctl</code> and
>>>>>> <code>ovn-sbctl</code> would
>>>>>> +          be left intact (120000 ms).
>>>>>> +        </p>
>>>>>> +      </column>
>>>>>> +
>>>>>>        <column name="options" key="northd_trim_timeout">
>>>>>>          <p>
>>>>>>            When used, this configuration value specifies the time, in
>>>>>> diff --git a/utilities/ovn-dbctl.c b/utilities/ovn-dbctl.c
>>>>>> index 369a6a663..c6aebcbbb 100644
>>>>>> --- a/utilities/ovn-dbctl.c
>>>>>> +++ b/utilities/ovn-dbctl.c
>>>>>> @@ -205,6 +205,9 @@ ovn_dbctl_main(int argc, char *argv[],
>>>>>>      ovsdb_idl_set_remote(idl, db, daemon_mode);
>>>>>>      ovsdb_idl_set_leader_only(idl, leader_only);
>>>>>>  +    /* Set reasonable high probe interval. */
>>>>>> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
>>>>>> +
>>>>>>      if (daemon_mode) {
>>>>>>          server_loop(dbctl_options, idl, argc, argv_);
>>>>>>      } else {
>>>>>> @@ -1094,6 +1097,17 @@ out:
>>>>>>      free(argv);
>>>>>>  }
>>>>>>  +static void
>>>>>> +update_inactivity_probe(struct server_cmd_run_ctx *ctx)
>>>>>> +{
>>>>>> +    int inactivity_probe =
>>>>>> ctx->dbctl_options->get_inactivity_probe(ctx->idl);
>>>>>> +    if (inactivity_probe < 0) {
>>>>>> +        inactivity_probe = DEFAULT_UTILS_PROBE_INTERVAL_MSEC;
>>>>>> +    }
>>>>>> +
>>>>>> +    set_idl_probe_interval(ctx->idl, db, inactivity_probe);
>>>>>> +}
>>>>>> +
>>>>>>  static void
>>>>>>  server_loop(const struct ovn_dbctl_options *dbctl_options,
>>>>>>              struct ovsdb_idl *idl, int argc, char *argv[])
>>>>>> @@ -1125,6 +1139,10 @@ server_loop(const struct ovn_dbctl_options
>>>>>> *dbctl_options,
>>>>>>        for (;;) {
>>>>>>          update_ssl_config();
>>>>>> +
>>>>>> +        /* Configure inactivity probe from connected DB. */
>>>>>> +        update_inactivity_probe(&server_cmd_run_ctx);
>>>>>> +
>>>>>>          memory_run();
>>>>>>          if (memory_should_report()) {
>>>>>>              struct simap usage = SIMAP_INITIALIZER(&usage);
>>>>>> diff --git a/utilities/ovn-dbctl.h b/utilities/ovn-dbctl.h
>>>>>> index a1fbede6b..54c232dd6 100644
>>>>>> --- a/utilities/ovn-dbctl.h
>>>>>> +++ b/utilities/ovn-dbctl.h
>>>>>> @@ -52,6 +52,7 @@ struct ovn_dbctl_options {
>>>>>>                            const struct timer *wait_timeout,
>>>>>>                            long long int start_time, bool
>>>>>> print_wait_time);
>>>>>>  +    int (*get_inactivity_probe)(struct ovsdb_idl *);
>>>>>>      struct ctl_context *(*ctx_create)(void);
>>>>>>      void (*ctx_destroy)(struct ctl_context *);
>>>>>>  };
>>>>>> diff --git a/utilities/ovn-ic-nbctl.c b/utilities/ovn-ic-nbctl.c
>>>>>> index f3d8039a8..19d8b054f 100644
>>>>>> --- a/utilities/ovn-ic-nbctl.c
>>>>>> +++ b/utilities/ovn-ic-nbctl.c
>>>>>> @@ -116,6 +116,10 @@ main(int argc, char *argv[])
>>>>>>      ovsdb_idl_set_remote(idl, db, false);
>>>>>>      ovsdb_idl_set_db_change_aware(idl, false);
>>>>>>      ovsdb_idl_set_leader_only(idl, leader_only);
>>>>>> +
>>>>>> +    /* Set reasonable high probe interval. */
>>>>>> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
>>>>>> +
>>>>>>      run_prerequisites(commands, n_commands, idl);
>>>>>>        /* Execute the commands.
>>>>>> diff --git a/utilities/ovn-ic-sbctl.c b/utilities/ovn-ic-sbctl.c
>>>>>> index 3060b48b9..215d09d30 100644
>>>>>> --- a/utilities/ovn-ic-sbctl.c
>>>>>> +++ b/utilities/ovn-ic-sbctl.c
>>>>>> @@ -115,6 +115,10 @@ main(int argc, char *argv[])
>>>>>>      ovsdb_idl_set_remote(idl, db, false);
>>>>>>      ovsdb_idl_set_db_change_aware(idl, false);
>>>>>>      ovsdb_idl_set_leader_only(idl, leader_only);
>>>>>> +
>>>>>> +    /* Set reasonable high probe interval. */
>>>>>> +    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
>>>>>> +
>>>>>>      run_prerequisites(commands, n_commands, idl);
>>>>>>        /* Execute the commands.
>>>>>> diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
>>>>>> index 45572fd30..49c5b294a 100644
>>>>>> --- a/utilities/ovn-nbctl.c
>>>>>> +++ b/utilities/ovn-nbctl.c
>>>>>> @@ -153,6 +153,20 @@ nbctl_post_execute(struct ovsdb_idl *idl, struct
>>>>>> ovsdb_idl_txn *txn,
>>>>>>      }
>>>>>>  }
>>>>>>  +static int
>>>>>> +get_inactivity_probe(struct ovsdb_idl *idl)
>>>>>> +{
>>>>>> +    const struct nbrec_nb_global *nb = nbrec_nb_global_first(idl);
>>>>>> +    int interval = -1;
>>>> 
>>>> Why does this default to -1? Wouldn't it make more sense to use
>>>> DEFAULT_UTILS_PROBE_INTERVAL_MSEC here?
>> 
>> Yeap, you’re right.
>> 
>>>> 
>>>>>> +
>>>>>> +    if (nb) {
>>>>>> +        interval = smap_get_int(&nb->options, "dbctl_probe_interval",
>>>>>> +                                interval);
>>>>>> +    }
>>>>>> +
>>>>>> +    return interval;
>>>>>> +}
>>>>>> +
>>>>>>  static char * OVS_WARN_UNUSED_RESULT dhcp_options_get(
>>>>>>      struct ctl_context *ctx, const char *id, bool must_exist,
>>>>>>      const struct nbrec_dhcp_options **);
>>>>>> @@ -7935,6 +7949,7 @@ main(int argc, char *argv[])
>>>>>>          .add_base_prerequisites = nbctl_add_base_prerequisites,
>>>>>>          .pre_execute = nbctl_pre_execute,
>>>>>>          .post_execute = nbctl_post_execute,
>>>>>> +        .get_inactivity_probe = get_inactivity_probe,
>>>>>>            .ctx_create = nbctl_ctx_create,
>>>>>>          .ctx_destroy = nbctl_ctx_destroy,
>>>>>> diff --git a/utilities/ovn-sbctl.c b/utilities/ovn-sbctl.c
>>>>>> index 542ab9ffa..76465fe7e 100644
>>>>>> --- a/utilities/ovn-sbctl.c
>>>>>> +++ b/utilities/ovn-sbctl.c
>>>>>> @@ -150,6 +150,20 @@ Other options:\n\
>>>>>>   * gracefully.  */
>>>>>>  #define ctl_fatal dont_use_ctl_fatal_use_ctl_error_and_return
>>>>>>  +static int
>>>>>> +get_inactivity_probe(struct ovsdb_idl *idl)
>>>>>> +{
>>>>>> +    const struct sbrec_sb_global *sb = sbrec_sb_global_first(idl);
>>>>>> +    int interval = -1;
>>>> 
>>>> Same comment here about using -1 as the default.
>>>> 
>>>>>> +
>>>>>> +    if (sb) {
>>>>>> +        interval = smap_get_int(&sb->options, "dbctl_probe_interval",
>>>>>> +                                interval);
>>>>>> +    }
>>>>>> +
>>>>>> +    return interval;
>>>>>> +}
>>>>>> +
>>>>>>  /* ovs-sbctl specific context.  Inherits the 'struct ctl_context'
>>>>>> as base. */
>>>>>>  struct sbctl_context {
>>>>>>      struct ctl_context base;
>>>>>> @@ -1590,6 +1604,7 @@ main(int argc, char *argv[])
>>>>>>          .add_base_prerequisites = sbctl_add_base_prerequisites,
>>>>>>          .pre_execute = sbctl_pre_execute,
>>>>>>          .post_execute = NULL,
>>>>>> +        .get_inactivity_probe = get_inactivity_probe,
>>>>>>            .ctx_create = sbctl_ctx_create,
>>>>>>          .ctx_destroy = sbctl_ctx_destroy,
>>>>> 
>>>> 
>>> 
>>> _______________________________________________
>>> dev mailing list
>>> dev@openvswitch.org
>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>> 
>> 
>> Regards,
>> Vladislav Odintsov
>> 
>> 
> 


Regards,
Vladislav Odintsov
diff mbox series

Patch

diff --git a/lib/ovn-util.h b/lib/ovn-util.h
index 7cf861dbc..47e53ca74 100644
--- a/lib/ovn-util.h
+++ b/lib/ovn-util.h
@@ -154,6 +154,8 @@  void set_idl_probe_interval(struct ovsdb_idl *idl, const char *remote,
 #define OVN_MAX_DP_VXLAN_KEY ((1u << 12) - 1)
 #define OVN_MAX_DP_VXLAN_KEY_LOCAL (OVN_MAX_DP_KEY - OVN_MAX_DP_GLOBAL_NUM)
 
+#define DEFAULT_UTILS_PROBE_INTERVAL_MSEC 120000
+
 struct hmap;
 void ovn_destroy_tnlids(struct hmap *tnlids);
 bool ovn_add_tnlid(struct hmap *set, uint32_t tnlid);
diff --git a/ovn-nb.xml b/ovn-nb.xml
index fd32070f2..a82f31a92 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -215,6 +215,26 @@ 
         </p>
       </column>
 
+      <column name="options" key="dbctl_probe_interval">
+        <p>
+          The inactivity probe interval of the connection to the OVN Northbound
+          and Southbound databases from <code>ovn-nbctl</code> and
+          <code>ovn-sbctl</code> utilities respectively, in milliseconds.
+          If the value is zero, it disables the connection keepalive feature.
+        </p>
+
+        <p>
+          If the value is nonzero, then it will be forced to a value of
+          at least 1000 ms.
+        </p>
+
+        <p>
+          If the value less then zero, then the default inactivity probe
+          interval for <code>ovn-nbctl</code> and <code>ovn-sbctl</code> would
+          be left intact (120000 ms).
+        </p>
+      </column>
+
       <column name="options" key="northd_trim_timeout">
         <p>
           When used, this configuration value specifies the time, in
diff --git a/utilities/ovn-dbctl.c b/utilities/ovn-dbctl.c
index 369a6a663..c6aebcbbb 100644
--- a/utilities/ovn-dbctl.c
+++ b/utilities/ovn-dbctl.c
@@ -205,6 +205,9 @@  ovn_dbctl_main(int argc, char *argv[],
     ovsdb_idl_set_remote(idl, db, daemon_mode);
     ovsdb_idl_set_leader_only(idl, leader_only);
 
+    /* Set reasonable high probe interval. */
+    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
+
     if (daemon_mode) {
         server_loop(dbctl_options, idl, argc, argv_);
     } else {
@@ -1094,6 +1097,17 @@  out:
     free(argv);
 }
 
+static void
+update_inactivity_probe(struct server_cmd_run_ctx *ctx)
+{
+    int inactivity_probe = ctx->dbctl_options->get_inactivity_probe(ctx->idl);
+    if (inactivity_probe < 0) {
+        inactivity_probe = DEFAULT_UTILS_PROBE_INTERVAL_MSEC;
+    }
+
+    set_idl_probe_interval(ctx->idl, db, inactivity_probe);
+}
+
 static void
 server_loop(const struct ovn_dbctl_options *dbctl_options,
             struct ovsdb_idl *idl, int argc, char *argv[])
@@ -1125,6 +1139,10 @@  server_loop(const struct ovn_dbctl_options *dbctl_options,
 
     for (;;) {
         update_ssl_config();
+
+        /* Configure inactivity probe from connected DB. */
+        update_inactivity_probe(&server_cmd_run_ctx);
+
         memory_run();
         if (memory_should_report()) {
             struct simap usage = SIMAP_INITIALIZER(&usage);
diff --git a/utilities/ovn-dbctl.h b/utilities/ovn-dbctl.h
index a1fbede6b..54c232dd6 100644
--- a/utilities/ovn-dbctl.h
+++ b/utilities/ovn-dbctl.h
@@ -52,6 +52,7 @@  struct ovn_dbctl_options {
                           const struct timer *wait_timeout,
                           long long int start_time, bool print_wait_time);
 
+    int (*get_inactivity_probe)(struct ovsdb_idl *);
     struct ctl_context *(*ctx_create)(void);
     void (*ctx_destroy)(struct ctl_context *);
 };
diff --git a/utilities/ovn-ic-nbctl.c b/utilities/ovn-ic-nbctl.c
index f3d8039a8..19d8b054f 100644
--- a/utilities/ovn-ic-nbctl.c
+++ b/utilities/ovn-ic-nbctl.c
@@ -116,6 +116,10 @@  main(int argc, char *argv[])
     ovsdb_idl_set_remote(idl, db, false);
     ovsdb_idl_set_db_change_aware(idl, false);
     ovsdb_idl_set_leader_only(idl, leader_only);
+
+    /* Set reasonable high probe interval. */
+    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
+
     run_prerequisites(commands, n_commands, idl);
 
     /* Execute the commands.
diff --git a/utilities/ovn-ic-sbctl.c b/utilities/ovn-ic-sbctl.c
index 3060b48b9..215d09d30 100644
--- a/utilities/ovn-ic-sbctl.c
+++ b/utilities/ovn-ic-sbctl.c
@@ -115,6 +115,10 @@  main(int argc, char *argv[])
     ovsdb_idl_set_remote(idl, db, false);
     ovsdb_idl_set_db_change_aware(idl, false);
     ovsdb_idl_set_leader_only(idl, leader_only);
+
+    /* Set reasonable high probe interval. */
+    set_idl_probe_interval(idl, db, DEFAULT_UTILS_PROBE_INTERVAL_MSEC);
+
     run_prerequisites(commands, n_commands, idl);
 
     /* Execute the commands.
diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
index 45572fd30..49c5b294a 100644
--- a/utilities/ovn-nbctl.c
+++ b/utilities/ovn-nbctl.c
@@ -153,6 +153,20 @@  nbctl_post_execute(struct ovsdb_idl *idl, struct ovsdb_idl_txn *txn,
     }
 }
 
+static int
+get_inactivity_probe(struct ovsdb_idl *idl)
+{
+    const struct nbrec_nb_global *nb = nbrec_nb_global_first(idl);
+    int interval = -1;
+
+    if (nb) {
+        interval = smap_get_int(&nb->options, "dbctl_probe_interval",
+                                interval);
+    }
+
+    return interval;
+}
+
 static char * OVS_WARN_UNUSED_RESULT dhcp_options_get(
     struct ctl_context *ctx, const char *id, bool must_exist,
     const struct nbrec_dhcp_options **);
@@ -7935,6 +7949,7 @@  main(int argc, char *argv[])
         .add_base_prerequisites = nbctl_add_base_prerequisites,
         .pre_execute = nbctl_pre_execute,
         .post_execute = nbctl_post_execute,
+        .get_inactivity_probe = get_inactivity_probe,
 
         .ctx_create = nbctl_ctx_create,
         .ctx_destroy = nbctl_ctx_destroy,
diff --git a/utilities/ovn-sbctl.c b/utilities/ovn-sbctl.c
index 542ab9ffa..76465fe7e 100644
--- a/utilities/ovn-sbctl.c
+++ b/utilities/ovn-sbctl.c
@@ -150,6 +150,20 @@  Other options:\n\
  * gracefully.  */
 #define ctl_fatal dont_use_ctl_fatal_use_ctl_error_and_return
 
+static int
+get_inactivity_probe(struct ovsdb_idl *idl)
+{
+    const struct sbrec_sb_global *sb = sbrec_sb_global_first(idl);
+    int interval = -1;
+
+    if (sb) {
+        interval = smap_get_int(&sb->options, "dbctl_probe_interval",
+                                interval);
+    }
+
+    return interval;
+}
+
 /* ovs-sbctl specific context.  Inherits the 'struct ctl_context' as base. */
 struct sbctl_context {
     struct ctl_context base;
@@ -1590,6 +1604,7 @@  main(int argc, char *argv[])
         .add_base_prerequisites = sbctl_add_base_prerequisites,
         .pre_execute = sbctl_pre_execute,
         .post_execute = NULL,
+        .get_inactivity_probe = get_inactivity_probe,
 
         .ctx_create = sbctl_ctx_create,
         .ctx_destroy = sbctl_ctx_destroy,