diff mbox series

[ovs-dev,v5,3/3] ovsdb-idl: Force IDL retry when missing updates encountered.

Message ID 20200507112055.31976.98059.stgit@dceara.remote.csb
State Superseded
Headers show
Series Avoid ovsdb-idl IDL inconsistencies. | expand

Commit Message

Dumitru Ceara May 7, 2020, 11:21 a.m. UTC
Adds a generic recovery mechanism which triggers an IDL retry with fast
resync disabled in case the IDL has detected that it ended up in an
inconsistent state due to other bugs in the ovsdb-server/ovsdb-idl
implementation.

CC: Andy Zhou <azhou@ovn.org>
Fixes: db2b5757328c ("lib: add monitor2 support in ovsdb-idl.")
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
---
 lib/ovsdb-idl.c |   37 +++++++++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 4 deletions(-)

Comments

Han Zhou May 28, 2020, 7:08 a.m. UTC | #1
Hi Dumitru, please see my comments below.

On Thu, May 7, 2020 at 4:21 AM Dumitru Ceara <dceara@redhat.com> wrote:
>
> Adds a generic recovery mechanism which triggers an IDL retry with fast
> resync disabled in case the IDL has detected that it ended up in an
> inconsistent state due to other bugs in the ovsdb-server/ovsdb-idl
> implementation.
>
> CC: Andy Zhou <azhou@ovn.org>
> Fixes: db2b5757328c ("lib: add monitor2 support in ovsdb-idl.")
> Signed-off-by: Dumitru Ceara <dceara@redhat.com>
> ---
>  lib/ovsdb-idl.c |   37 +++++++++++++++++++++++++++++++++----
>  1 file changed, 33 insertions(+), 4 deletions(-)
>
> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
> index 557f61c..076f0a1 100644
> --- a/lib/ovsdb-idl.c
> +++ b/lib/ovsdb-idl.c
> @@ -328,7 +328,8 @@ static bool ovsdb_idl_process_update(struct
ovsdb_idl_table *,
>  static bool ovsdb_idl_process_update2(struct ovsdb_idl_table *,
>                                        const struct uuid *,
>                                        const char *operation,
> -                                      const struct json *row);
> +                                      const struct json *row,
> +                                      bool *inconsistent);
>  static void ovsdb_idl_insert_row(struct ovsdb_idl_row *, const struct
json *);
>  static void ovsdb_idl_delete_row(struct ovsdb_idl_row *);
>  static bool ovsdb_idl_modify_row(struct ovsdb_idl_row *, const struct
json *);
> @@ -2420,10 +2421,26 @@ ovsdb_idl_db_parse_update__(struct ovsdb_idl_db
*db,
>                      row = shash_find_data(json_object(row_update),
operation);
>
>                      if (row)  {
> +                        bool inconsistent = false;
> +
>                          if (ovsdb_idl_process_update2(table, &uuid,
operation,
> -                                                      row)) {
> +                                                      row,
&inconsistent)) {
>                              db->change_seqno++;
>                          }
> +
> +                        /* If the db is in an inconsistent state, clear
the
> +                         * db->last_id and retry to get the complete
view of
> +                         * the database.
> +                         */
> +                        if (inconsistent) {
> +                            memset(&db->last_id, 0, sizeof db->last_id);
> +                            ovsdb_idl_retry(db->idl);

Just to confirm, we want to retry regardless of the monitor version, right?
I.e. even if we are not using monitor_cond_since (e.g. the server doesn't
support it), but if there is an inconsistency, retry would still help.
Right?

> +                            return ovsdb_error(NULL,
> +                                               "<row_update2> received
for "
> +                                               "inconsistent IDL: "
> +                                               "reconnecting IDL with "
> +                                               "fast resync disabled");

The message could be revised to "... reconnecting IDL and resync all
data.", because I feel "with fast resync disabled" sounds like we are not
using monitor_cond_since.

Thanks,
Han

> +                        }
>                          break;
>                      }
>                  }
> @@ -2547,16 +2564,26 @@ ovsdb_idl_process_update(struct ovsdb_idl_table
*table,
>  }
>
>  /* Returns true if a column with mode OVSDB_IDL_MODE_RW changed, false
> - * otherwise. */
> + * otherwise.
> + *
> + * NOTE: When processing the "modify" updates, the IDL can determine that
> + * previous updates were missed (e.g., due to bugs) and that rows that
don't
> + * exist locally should be updated. This indicates that the
> + * IDL is in an inconsistent state and, unlike in
ovsdb_idl_process_update(),
> + * the missing rows cannot be inserted. If this is the case,
'inconsistent'
> + * is set to true to indicate the catastrophic failure.
> + */
>  static bool
>  ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
>                            const struct uuid *uuid,
>                            const char *operation,
> -                          const struct json *json_row)
> +                          const struct json *json_row,
> +                          bool *inconsistent)
>  {
>      struct ovsdb_idl_row *row;
>
>      row = ovsdb_idl_get_row(table, uuid);
> +    *inconsistent = false;
>      if (!strcmp(operation, "delete")) {
>          /* Delete row. */
>          if (row && !ovsdb_idl_row_is_orphan(row)) {
> @@ -2588,11 +2615,13 @@ ovsdb_idl_process_update2(struct ovsdb_idl_table
*table,
>                  VLOG_WARN_RL(&semantic_rl, "cannot modify missing but "
>                               "referenced row "UUID_FMT" in table %s",
>                               UUID_ARGS(uuid), table->class_->name);
> +                *inconsistent = true;
>                  return false;
>              }
>          } else {
>              VLOG_WARN_RL(&semantic_rl, "cannot modify missing row
"UUID_FMT" "
>                           "in table %s", UUID_ARGS(uuid),
table->class_->name);
> +            *inconsistent = true;
>              return false;
>          }
>      } else {
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Dumitru Ceara May 28, 2020, 8:24 a.m. UTC | #2
On 5/28/20 9:08 AM, Han Zhou wrote:
> Hi Dumitru, please see my comments below.
> 
> On Thu, May 7, 2020 at 4:21 AM Dumitru Ceara <dceara@redhat.com
> <mailto:dceara@redhat.com>> wrote:
>>
>> Adds a generic recovery mechanism which triggers an IDL retry with fast
>> resync disabled in case the IDL has detected that it ended up in an
>> inconsistent state due to other bugs in the ovsdb-server/ovsdb-idl
>> implementation.
>>
>> CC: Andy Zhou <azhou@ovn.org <mailto:azhou@ovn.org>>
>> Fixes: db2b5757328c ("lib: add monitor2 support in ovsdb-idl.")
>> Signed-off-by: Dumitru Ceara <dceara@redhat.com
> <mailto:dceara@redhat.com>>
>> ---
>>  lib/ovsdb-idl.c |   37 +++++++++++++++++++++++++++++++++----
>>  1 file changed, 33 insertions(+), 4 deletions(-)
>>
>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
>> index 557f61c..076f0a1 100644
>> --- a/lib/ovsdb-idl.c
>> +++ b/lib/ovsdb-idl.c
>> @@ -328,7 +328,8 @@ static bool ovsdb_idl_process_update(struct
> ovsdb_idl_table *,
>>  static bool ovsdb_idl_process_update2(struct ovsdb_idl_table *,
>>                                        const struct uuid *,
>>                                        const char *operation,
>> -                                      const struct json *row);
>> +                                      const struct json *row,
>> +                                      bool *inconsistent);
>>  static void ovsdb_idl_insert_row(struct ovsdb_idl_row *, const struct
> json *);
>>  static void ovsdb_idl_delete_row(struct ovsdb_idl_row *);
>>  static bool ovsdb_idl_modify_row(struct ovsdb_idl_row *, const struct
> json *);
>> @@ -2420,10 +2421,26 @@ ovsdb_idl_db_parse_update__(struct
> ovsdb_idl_db *db,
>>                      row = shash_find_data(json_object(row_update),
> operation);
>>
>>                      if (row)  {
>> +                        bool inconsistent = false;
>> +
>>                          if (ovsdb_idl_process_update2(table, &uuid,
> operation,
>> -                                                      row)) {
>> +                                                      row,
> &inconsistent)) {
>>                              db->change_seqno++;
>>                          }
>> +
>> +                        /* If the db is in an inconsistent state,
> clear the
>> +                         * db->last_id and retry to get the complete
> view of
>> +                         * the database.
>> +                         */
>> +                        if (inconsistent) {
>> +                            memset(&db->last_id, 0, sizeof db->last_id);
>> +                            ovsdb_idl_retry(db->idl);
> 
> Just to confirm, we want to retry regardless of the monitor version,
> right? I.e. even if we are not using monitor_cond_since (e.g. the server
> doesn't support it), but if there is an inconsistency, retry would still
> help. Right?
> 

Right, even if we're just using monitor_cond, if we detect an
inconsistency we need to retry. In that case last_id would've already
been 0 but it doesn't hurt to reset it.

>> +                            return ovsdb_error(NULL,
>> +                                               "<row_update2>
> received for "
>> +                                               "inconsistent IDL: "
>> +                                               "reconnecting IDL with "
>> +                                               "fast resync disabled");
> 
> The message could be revised to "... reconnecting IDL and resync all
> data.", because I feel "with fast resync disabled" sounds like we are
> not using monitor_cond_since.

Sounds better indeed, I'll change it.

Thanks,
Dumitru

> 
> Thanks,
> Han
> 
>> +                        }
>>                          break;
>>                      }
>>                  }
>> @@ -2547,16 +2564,26 @@ ovsdb_idl_process_update(struct
> ovsdb_idl_table *table,
>>  }
>>
>>  /* Returns true if a column with mode OVSDB_IDL_MODE_RW changed, false
>> - * otherwise. */
>> + * otherwise.
>> + *
>> + * NOTE: When processing the "modify" updates, the IDL can determine that
>> + * previous updates were missed (e.g., due to bugs) and that rows
> that don't
>> + * exist locally should be updated. This indicates that the
>> + * IDL is in an inconsistent state and, unlike in
> ovsdb_idl_process_update(),
>> + * the missing rows cannot be inserted. If this is the case,
> 'inconsistent'
>> + * is set to true to indicate the catastrophic failure.
>> + */
>>  static bool
>>  ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
>>                            const struct uuid *uuid,
>>                            const char *operation,
>> -                          const struct json *json_row)
>> +                          const struct json *json_row,
>> +                          bool *inconsistent)
>>  {
>>      struct ovsdb_idl_row *row;
>>
>>      row = ovsdb_idl_get_row(table, uuid);
>> +    *inconsistent = false;
>>      if (!strcmp(operation, "delete")) {
>>          /* Delete row. */
>>          if (row && !ovsdb_idl_row_is_orphan(row)) {
>> @@ -2588,11 +2615,13 @@ ovsdb_idl_process_update2(struct
> ovsdb_idl_table *table,
>>                  VLOG_WARN_RL(&semantic_rl, "cannot modify missing but "
>>                               "referenced row "UUID_FMT" in table %s",
>>                               UUID_ARGS(uuid), table->class_->name);
>> +                *inconsistent = true;
>>                  return false;
>>              }
>>          } else {
>>              VLOG_WARN_RL(&semantic_rl, "cannot modify missing row
> "UUID_FMT" "
>>                           "in table %s", UUID_ARGS(uuid),
> table->class_->name);
>> +            *inconsistent = true;
>>              return false;
>>          }
>>      } else {
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org <mailto:dev@openvswitch.org>
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
diff mbox series

Patch

diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index 557f61c..076f0a1 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -328,7 +328,8 @@  static bool ovsdb_idl_process_update(struct ovsdb_idl_table *,
 static bool ovsdb_idl_process_update2(struct ovsdb_idl_table *,
                                       const struct uuid *,
                                       const char *operation,
-                                      const struct json *row);
+                                      const struct json *row,
+                                      bool *inconsistent);
 static void ovsdb_idl_insert_row(struct ovsdb_idl_row *, const struct json *);
 static void ovsdb_idl_delete_row(struct ovsdb_idl_row *);
 static bool ovsdb_idl_modify_row(struct ovsdb_idl_row *, const struct json *);
@@ -2420,10 +2421,26 @@  ovsdb_idl_db_parse_update__(struct ovsdb_idl_db *db,
                     row = shash_find_data(json_object(row_update), operation);
 
                     if (row)  {
+                        bool inconsistent = false;
+
                         if (ovsdb_idl_process_update2(table, &uuid, operation,
-                                                      row)) {
+                                                      row, &inconsistent)) {
                             db->change_seqno++;
                         }
+
+                        /* If the db is in an inconsistent state, clear the
+                         * db->last_id and retry to get the complete view of
+                         * the database.
+                         */
+                        if (inconsistent) {
+                            memset(&db->last_id, 0, sizeof db->last_id);
+                            ovsdb_idl_retry(db->idl);
+                            return ovsdb_error(NULL,
+                                               "<row_update2> received for "
+                                               "inconsistent IDL: "
+                                               "reconnecting IDL with "
+                                               "fast resync disabled");
+                        }
                         break;
                     }
                 }
@@ -2547,16 +2564,26 @@  ovsdb_idl_process_update(struct ovsdb_idl_table *table,
 }
 
 /* Returns true if a column with mode OVSDB_IDL_MODE_RW changed, false
- * otherwise. */
+ * otherwise.
+ *
+ * NOTE: When processing the "modify" updates, the IDL can determine that
+ * previous updates were missed (e.g., due to bugs) and that rows that don't
+ * exist locally should be updated. This indicates that the
+ * IDL is in an inconsistent state and, unlike in ovsdb_idl_process_update(),
+ * the missing rows cannot be inserted. If this is the case, 'inconsistent'
+ * is set to true to indicate the catastrophic failure.
+ */
 static bool
 ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
                           const struct uuid *uuid,
                           const char *operation,
-                          const struct json *json_row)
+                          const struct json *json_row,
+                          bool *inconsistent)
 {
     struct ovsdb_idl_row *row;
 
     row = ovsdb_idl_get_row(table, uuid);
+    *inconsistent = false;
     if (!strcmp(operation, "delete")) {
         /* Delete row. */
         if (row && !ovsdb_idl_row_is_orphan(row)) {
@@ -2588,11 +2615,13 @@  ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
                 VLOG_WARN_RL(&semantic_rl, "cannot modify missing but "
                              "referenced row "UUID_FMT" in table %s",
                              UUID_ARGS(uuid), table->class_->name);
+                *inconsistent = true;
                 return false;
             }
         } else {
             VLOG_WARN_RL(&semantic_rl, "cannot modify missing row "UUID_FMT" "
                          "in table %s", UUID_ARGS(uuid), table->class_->name);
+            *inconsistent = true;
             return false;
         }
     } else {