diff mbox series

[ovs-dev] ovsdb-idl: Force reconnect when missing updates encountered.

Message ID 1585257749-29702-1-git-send-email-dceara@redhat.com
State Superseded
Headers show
Series [ovs-dev] ovsdb-idl: Force reconnect when missing updates encountered. | expand

Commit Message

Dumitru Ceara March 26, 2020, 9:22 p.m. UTC
When processing update2 messages, if the IDL detects that previous
updates were missed, clear db->last_id and trigger a reconnect such that
the IDL refreshes its complete view of the database.

Such scenarios can happen for example when bugs in
ovsdb-server/ovsdb-idl are encountered. One such situation is reported
in https://bugzilla.redhat.com/show_bug.cgi?id=1808580.

This commit doesn't fix the root cause of the bug above but adds the
recovery mechanism to avoid staying in an inconsistent state for ever.

CC: Andy Zhou <azhou@ovn.org>
Fixes: db2b5757328c ("lib: add monitor2 support in ovsdb-idl.")
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
---
 lib/ovsdb-idl.c | 37 +++++++++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 4 deletions(-)

Comments

Mark Michelson April 2, 2020, 8:44 p.m. UTC | #1
On 3/26/20 5:22 PM, Dumitru Ceara wrote:
> When processing update2 messages, if the IDL detects that previous
> updates were missed, clear db->last_id and trigger a reconnect such that
> the IDL refreshes its complete view of the database.
> 
> Such scenarios can happen for example when bugs in
> ovsdb-server/ovsdb-idl are encountered. One such situation is reported
> in https://bugzilla.redhat.com/show_bug.cgi?id=1808580.
> 
> This commit doesn't fix the root cause of the bug above but adds the
> recovery mechanism to avoid staying in an inconsistent state for ever.
> 
> CC: Andy Zhou <azhou@ovn.org>
> Fixes: db2b5757328c ("lib: add monitor2 support in ovsdb-idl.")
> Signed-off-by: Dumitru Ceara <dceara@redhat.com>
> ---
>   lib/ovsdb-idl.c | 37 +++++++++++++++++++++++++++++++++----
>   1 file changed, 33 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
> index 190143f..b47c0c8 100644
> --- a/lib/ovsdb-idl.c
> +++ b/lib/ovsdb-idl.c
> @@ -324,7 +324,8 @@ static bool ovsdb_idl_process_update(struct ovsdb_idl_table *,
>   static bool ovsdb_idl_process_update2(struct ovsdb_idl_table *,
>                                         const struct uuid *,
>                                         const char *operation,
> -                                      const struct json *row);
> +                                      const struct json *row,
> +                                      bool *inconsistent);
>   static void ovsdb_idl_insert_row(struct ovsdb_idl_row *, const struct json *);
>   static void ovsdb_idl_delete_row(struct ovsdb_idl_row *);
>   static bool ovsdb_idl_modify_row(struct ovsdb_idl_row *, const struct json *);
> @@ -2320,10 +2321,26 @@ ovsdb_idl_db_parse_update__(struct ovsdb_idl_db *db,
>                       row = shash_find_data(json_object(row_update), operation);
>   
>                       if (row)  {
> +                        bool inconsistent = false;
> +
>                           if (ovsdb_idl_process_update2(table, &uuid, operation,
> -                                                      row)) {
> +                                                      row, &inconsistent)) {
>                               db->change_seqno++;
>                           }
> +
> +                        /* If the db is in an inconsistent state, clear the
> +                         * db->last_id and retry to retrieve the complete DB.
> +                         */
> +                        if (inconsistent) {
> +                            memset(&db->last_id, 0, sizeof db->last_id);
> +                            ovsdb_idl_retry(db->idl);
> +                            return ovsdb_syntax_error(row_update, NULL,
> +                                                      "<row_update2> "
> +                                                      "for table \"%s\" "
> +                                                      "cannot be processed: "
> +                                                      "missed updates",
> +                                                      table->class_->name);

A couple of notes on this error:

1) The error message here is a bit redundant since there's already a 
warning printed in each of the cases that "inconsistent" gets set true. 
It may make more sense to state what's being done as a result of the 
inconsistent state instead of the fact that there's a problem.

2) I don't think this is a syntax error. It's probably better to use 
ovsdb_error() or ovsdb_internal_error(). ovsdb_internal_error() prints a 
backtrace, and doesn't actually appear to be used anywhere, so it may be 
better to just use ovsdb_error().

> +                        }
>                           break;
>                       }
>                   }
> @@ -2447,16 +2464,26 @@ ovsdb_idl_process_update(struct ovsdb_idl_table *table,
>   }
>   
>   /* Returns true if a column with mode OVSDB_IDL_MODE_RW changed, false
> - * otherwise. */
> + * otherwise.
> + *
> + * NOTE: When processing the "modify" updates, the IDL can determine that
> + * previous updates were missed (e.g., due to bugs) and that rows that don't
> + * exist locally should be updated. This indicates that the
> + * IDL is in an inconsistent state and, unlike in ovsdb_idl_process_update(),
> + * the missing rows cannot be inserted. If this is the case, 'inconsistent'
> + * is set to true to indicate the catastrophic failure.
> + */
>   static bool
>   ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
>                             const struct uuid *uuid,
>                             const char *operation,
> -                          const struct json *json_row)
> +                          const struct json *json_row,
> +                          bool *inconsistent)
>   {
>       struct ovsdb_idl_row *row;
>   
>       row = ovsdb_idl_get_row(table, uuid);
> +    *inconsistent = false;
>       if (!strcmp(operation, "delete")) {
>           /* Delete row. */
>           if (row && !ovsdb_idl_row_is_orphan(row)) {
> @@ -2488,11 +2515,13 @@ ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
>                   VLOG_WARN_RL(&semantic_rl, "cannot modify missing but "
>                                "referenced row "UUID_FMT" in table %s",
>                                UUID_ARGS(uuid), table->class_->name);
> +                *inconsistent = true;
>                   return false;
>               }
>           } else {
>               VLOG_WARN_RL(&semantic_rl, "cannot modify missing row "UUID_FMT" "
>                            "in table %s", UUID_ARGS(uuid), table->class_->name);
> +            *inconsistent = true;
>               return false;
>           }
>       } else {
>
Dumitru Ceara April 3, 2020, 7:03 a.m. UTC | #2
On 4/2/20 10:44 PM, Mark Michelson wrote:
> On 3/26/20 5:22 PM, Dumitru Ceara wrote:
>> When processing update2 messages, if the IDL detects that previous
>> updates were missed, clear db->last_id and trigger a reconnect such that
>> the IDL refreshes its complete view of the database.
>>
>> Such scenarios can happen for example when bugs in
>> ovsdb-server/ovsdb-idl are encountered. One such situation is reported
>> in https://bugzilla.redhat.com/show_bug.cgi?id=1808580.
>>
>> This commit doesn't fix the root cause of the bug above but adds the
>> recovery mechanism to avoid staying in an inconsistent state for ever.
>>
>> CC: Andy Zhou <azhou@ovn.org>
>> Fixes: db2b5757328c ("lib: add monitor2 support in ovsdb-idl.")
>> Signed-off-by: Dumitru Ceara <dceara@redhat.com>
>> ---
>>   lib/ovsdb-idl.c | 37 +++++++++++++++++++++++++++++++++----
>>   1 file changed, 33 insertions(+), 4 deletions(-)
>>
>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
>> index 190143f..b47c0c8 100644
>> --- a/lib/ovsdb-idl.c
>> +++ b/lib/ovsdb-idl.c
>> @@ -324,7 +324,8 @@ static bool ovsdb_idl_process_update(struct
>> ovsdb_idl_table *,
>>   static bool ovsdb_idl_process_update2(struct ovsdb_idl_table *,
>>                                         const struct uuid *,
>>                                         const char *operation,
>> -                                      const struct json *row);
>> +                                      const struct json *row,
>> +                                      bool *inconsistent);
>>   static void ovsdb_idl_insert_row(struct ovsdb_idl_row *, const
>> struct json *);
>>   static void ovsdb_idl_delete_row(struct ovsdb_idl_row *);
>>   static bool ovsdb_idl_modify_row(struct ovsdb_idl_row *, const
>> struct json *);
>> @@ -2320,10 +2321,26 @@ ovsdb_idl_db_parse_update__(struct
>> ovsdb_idl_db *db,
>>                       row = shash_find_data(json_object(row_update),
>> operation);
>>                         if (row)  {
>> +                        bool inconsistent = false;
>> +
>>                           if (ovsdb_idl_process_update2(table, &uuid,
>> operation,
>> -                                                      row)) {
>> +                                                      row,
>> &inconsistent)) {
>>                               db->change_seqno++;
>>                           }
>> +
>> +                        /* If the db is in an inconsistent state,
>> clear the
>> +                         * db->last_id and retry to retrieve the
>> complete DB.
>> +                         */
>> +                        if (inconsistent) {
>> +                            memset(&db->last_id, 0, sizeof db->last_id);
>> +                            ovsdb_idl_retry(db->idl);
>> +                            return ovsdb_syntax_error(row_update, NULL,
>> +                                                      "<row_update2> "
>> +                                                      "for table
>> \"%s\" "
>> +                                                      "cannot be
>> processed: "
>> +                                                      "missed updates",
>> +                                                     
>> table->class_->name);
> 
> A couple of notes on this error:
> 
> 1) The error message here is a bit redundant since there's already a
> warning printed in each of the cases that "inconsistent" gets set true.
> It may make more sense to state what's being done as a result of the
> inconsistent state instead of the fact that there's a problem.
> 
> 2) I don't think this is a syntax error. It's probably better to use
> ovsdb_error() or ovsdb_internal_error(). ovsdb_internal_error() prints a
> backtrace, and doesn't actually appear to be used anywhere, so it may be
> better to just use ovsdb_error().
> 

Hi Mark,

Thanks for the review. Using ovsdb_error() sounds better indeed. I'll
send an updated v2.

Regards,
Dumitru

>> +                        }
>>                           break;
>>                       }
>>                   }
>> @@ -2447,16 +2464,26 @@ ovsdb_idl_process_update(struct
>> ovsdb_idl_table *table,
>>   }
>>     /* Returns true if a column with mode OVSDB_IDL_MODE_RW changed,
>> false
>> - * otherwise. */
>> + * otherwise.
>> + *
>> + * NOTE: When processing the "modify" updates, the IDL can determine
>> that
>> + * previous updates were missed (e.g., due to bugs) and that rows
>> that don't
>> + * exist locally should be updated. This indicates that the
>> + * IDL is in an inconsistent state and, unlike in
>> ovsdb_idl_process_update(),
>> + * the missing rows cannot be inserted. If this is the case,
>> 'inconsistent'
>> + * is set to true to indicate the catastrophic failure.
>> + */
>>   static bool
>>   ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
>>                             const struct uuid *uuid,
>>                             const char *operation,
>> -                          const struct json *json_row)
>> +                          const struct json *json_row,
>> +                          bool *inconsistent)
>>   {
>>       struct ovsdb_idl_row *row;
>>         row = ovsdb_idl_get_row(table, uuid);
>> +    *inconsistent = false;
>>       if (!strcmp(operation, "delete")) {
>>           /* Delete row. */
>>           if (row && !ovsdb_idl_row_is_orphan(row)) {
>> @@ -2488,11 +2515,13 @@ ovsdb_idl_process_update2(struct
>> ovsdb_idl_table *table,
>>                   VLOG_WARN_RL(&semantic_rl, "cannot modify missing but "
>>                                "referenced row "UUID_FMT" in table %s",
>>                                UUID_ARGS(uuid), table->class_->name);
>> +                *inconsistent = true;
>>                   return false;
>>               }
>>           } else {
>>               VLOG_WARN_RL(&semantic_rl, "cannot modify missing row
>> "UUID_FMT" "
>>                            "in table %s", UUID_ARGS(uuid),
>> table->class_->name);
>> +            *inconsistent = true;
>>               return false;
>>           }
>>       } else {
>>
>
diff mbox series

Patch

diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index 190143f..b47c0c8 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -324,7 +324,8 @@  static bool ovsdb_idl_process_update(struct ovsdb_idl_table *,
 static bool ovsdb_idl_process_update2(struct ovsdb_idl_table *,
                                       const struct uuid *,
                                       const char *operation,
-                                      const struct json *row);
+                                      const struct json *row,
+                                      bool *inconsistent);
 static void ovsdb_idl_insert_row(struct ovsdb_idl_row *, const struct json *);
 static void ovsdb_idl_delete_row(struct ovsdb_idl_row *);
 static bool ovsdb_idl_modify_row(struct ovsdb_idl_row *, const struct json *);
@@ -2320,10 +2321,26 @@  ovsdb_idl_db_parse_update__(struct ovsdb_idl_db *db,
                     row = shash_find_data(json_object(row_update), operation);
 
                     if (row)  {
+                        bool inconsistent = false;
+
                         if (ovsdb_idl_process_update2(table, &uuid, operation,
-                                                      row)) {
+                                                      row, &inconsistent)) {
                             db->change_seqno++;
                         }
+
+                        /* If the db is in an inconsistent state, clear the
+                         * db->last_id and retry to retrieve the complete DB.
+                         */
+                        if (inconsistent) {
+                            memset(&db->last_id, 0, sizeof db->last_id);
+                            ovsdb_idl_retry(db->idl);
+                            return ovsdb_syntax_error(row_update, NULL,
+                                                      "<row_update2> "
+                                                      "for table \"%s\" "
+                                                      "cannot be processed: "
+                                                      "missed updates",
+                                                      table->class_->name);
+                        }
                         break;
                     }
                 }
@@ -2447,16 +2464,26 @@  ovsdb_idl_process_update(struct ovsdb_idl_table *table,
 }
 
 /* Returns true if a column with mode OVSDB_IDL_MODE_RW changed, false
- * otherwise. */
+ * otherwise.
+ *
+ * NOTE: When processing the "modify" updates, the IDL can determine that
+ * previous updates were missed (e.g., due to bugs) and that rows that don't
+ * exist locally should be updated. This indicates that the
+ * IDL is in an inconsistent state and, unlike in ovsdb_idl_process_update(),
+ * the missing rows cannot be inserted. If this is the case, 'inconsistent'
+ * is set to true to indicate the catastrophic failure.
+ */
 static bool
 ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
                           const struct uuid *uuid,
                           const char *operation,
-                          const struct json *json_row)
+                          const struct json *json_row,
+                          bool *inconsistent)
 {
     struct ovsdb_idl_row *row;
 
     row = ovsdb_idl_get_row(table, uuid);
+    *inconsistent = false;
     if (!strcmp(operation, "delete")) {
         /* Delete row. */
         if (row && !ovsdb_idl_row_is_orphan(row)) {
@@ -2488,11 +2515,13 @@  ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
                 VLOG_WARN_RL(&semantic_rl, "cannot modify missing but "
                              "referenced row "UUID_FMT" in table %s",
                              UUID_ARGS(uuid), table->class_->name);
+                *inconsistent = true;
                 return false;
             }
         } else {
             VLOG_WARN_RL(&semantic_rl, "cannot modify missing row "UUID_FMT" "
                          "in table %s", UUID_ARGS(uuid), table->class_->name);
+            *inconsistent = true;
             return false;
         }
     } else {