diff mbox series

[ovs-dev] ovsdb-idl: Fix *_is_new() IDL functions

Message ID 20200929183433.925570-1-mark.d.gray@redhat.com
State Superseded
Headers show
Series [ovs-dev] ovsdb-idl: Fix *_is_new() IDL functions | expand

Commit Message

Mark Gray Sept. 29, 2020, 6:34 p.m. UTC
Currently all functions of the type *_is_new() always return
'false'. This patch resolves this issue by using the
'OVSDB_IDL_CHANGE_INSERT' 'change_seqno' instead of the
'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno' to determine if a row
is new and by resetting the 'OVSDB_IDL_CHANGE_INSERT'
'change_seqno' on clear.

Further to this, the code is also updated to match this
behaviour:

When a row is inserted, the 'OVSDB_IDL_CHANGE_INSERT'
'change_seqno' is updated to match the new database
change_seqno. The 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno'
is not set for inserted rows (only for updated rows).

At the end of a run, ovsdb_idl_db_track_clear() should be
called to clear all tracking information, this includes
resetting all row 'change_seqno' to zero. This will ensure
that subsequent runs will not see a previously 'new' row.

add_tracked_change_for_references() is updated to only
track rows that reference the current row.

Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Suggested-by: Dumitru Ceara <dceara@redhat.com>
Reported-at: https://bugzilla.redhat.com/1883562
---
 lib/ovsdb-idl.c     | 39 +++++++++++++++++++++++++++------------
 ovsdb/ovsdb-idlc.in |  2 +-
 2 files changed, 28 insertions(+), 13 deletions(-)

Comments

Dumitru Ceara Sept. 30, 2020, 9:21 a.m. UTC | #1
On 9/29/20 8:34 PM, Mark Gray wrote:
> Currently all functions of the type *_is_new() always return
> 'false'. This patch resolves this issue by using the
> 'OVSDB_IDL_CHANGE_INSERT' 'change_seqno' instead of the
> 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno' to determine if a row
> is new and by resetting the 'OVSDB_IDL_CHANGE_INSERT'
> 'change_seqno' on clear.
> 
> Further to this, the code is also updated to match this
> behaviour:
> 
> When a row is inserted, the 'OVSDB_IDL_CHANGE_INSERT'
> 'change_seqno' is updated to match the new database
> change_seqno. The 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno'
> is not set for inserted rows (only for updated rows).
> 
> At the end of a run, ovsdb_idl_db_track_clear() should be
> called to clear all tracking information, this includes
> resetting all row 'change_seqno' to zero. This will ensure
> that subsequent runs will not see a previously 'new' row.
> 
> add_tracked_change_for_references() is updated to only
> track rows that reference the current row.
> 
> Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
> Suggested-by: Dumitru Ceara <dceara@redhat.com>
> Reported-at: https://bugzilla.redhat.com/1883562

Hi Mark,

Thanks for working on this, it definitely looks like a nasty bug to me!

We were lucky to not hit it in ovn-controller before. That's just
because all the "update" operations were handled there as "delete" +
"insert" but that is not mandatory and might change.

> ---
>  lib/ovsdb-idl.c     | 39 +++++++++++++++++++++++++++------------
>  ovsdb/ovsdb-idlc.in |  2 +-
>  2 files changed, 28 insertions(+), 13 deletions(-)
> 
> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
> index d8f221ca6..3cfd73871 100644
> --- a/lib/ovsdb-idl.c
> +++ b/lib/ovsdb-idl.c
> @@ -1959,6 +1959,11 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
>                      free(row->updated);
>                      row->updated = NULL;
>                  }
> +
> +                row->change_seqno[OVSDB_IDL_CHANGE_INSERT] =
> +                    row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] =
> +                    row->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0;
> +
>                  ovs_list_remove(&row->track_node);
>                  ovs_list_init(&row->track_node);
>                  if (ovsdb_idl_row_is_orphan(row) && row->tracked_old_datum) {
> @@ -2684,24 +2689,27 @@ ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
>      return OVSDB_IDL_UPDATE_DB_CHANGED;
>  }
>  
> -/* Recursively add rows to tracked change lists for current row
> - * and the rows that reference this row. */
> +/* Recursively add rows to tracked change lists for all rows that reference
> +   'row'. */
>  static void
>  add_tracked_change_for_references(struct ovsdb_idl_row *row)
>  {
> -    if (ovs_list_is_empty(&row->track_node) &&
> -            ovsdb_idl_track_is_set(row->table)) {
> -        ovs_list_push_back(&row->table->track_list,
> -                           &row->track_node);
> -        row->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> -            = row->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> -            = row->table->db->change_seqno + 1;
> -
>          const struct ovsdb_idl_arc *arc;
>          LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) {
> -            add_tracked_change_for_references(arc->src);
> +            struct ovsdb_idl_row *ref = arc->src;
> +
> +            if (ovs_list_is_empty(&ref->track_node) &&
> +                ovsdb_idl_track_is_set(ref->table)) {
> +                    ovs_list_push_back(&ref->table->track_list,
> +                                       &ref->track_node);
> +
> +                ref->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> +                    = ref->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> +                    = ref->table->db->change_seqno + 1;
> +
> +                add_tracked_change_for_references(ref);
> +            }
>          }
> -    }
>  }
>  
>  
> @@ -2767,7 +2775,14 @@ ovsdb_idl_row_change__(struct ovsdb_idl_row *row, const struct json *row_json,
>                      row->change_seqno[change]
>                          = row->table->change_seqno[change]
>                          = row->table->db->change_seqno + 1;
> +
>                      if (table->modes[column_idx] & OVSDB_IDL_TRACK) {
> +                        if (ovs_list_is_empty(&row->track_node) &&
> +                            ovsdb_idl_track_is_set(row->table)) {
> +                            ovs_list_push_back(&row->table->track_list,
> +                                               &row->track_node);
> +                        }
> +
>                          add_tracked_change_for_references(row);
>                          if (!row->updated) {
>                              row->updated = bitmap_allocate(class->n_columns);
> diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in
> index 698fe25f3..89c3eb682 100755
> --- a/ovsdb/ovsdb-idlc.in
> +++ b/ovsdb/ovsdb-idlc.in
> @@ -282,7 +282,7 @@ const struct %(s)s *%(s)s_table_track_get_first(const struct %(s)s_table *);
>  /* Returns true if 'row' was inserted since the last change tracking reset. */
>  static inline bool %(s)s_is_new(const struct %(s)s *row)
>  {
> -    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_MODIFY) == 0;
> +    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_INSERT) > 0;

This causes some issues with records created by the IDL client (e.g.,
ovn-controller). Those are created with ovsdb_idl_txn_insert() and we
don't set the OVSDB_IDL_CHANGE_INSERT change_seqno in that case.

Essentially *_is_new() will never return true for rows inserted locally.

Regards,
Dumitru

>  }
>  
>  /* Returns true if 'row' was deleted since the last change tracking reset. */
>
Dumitru Ceara Sept. 30, 2020, 9:32 a.m. UTC | #2
On 9/30/20 11:21 AM, Dumitru Ceara wrote:
> On 9/29/20 8:34 PM, Mark Gray wrote:
>> Currently all functions of the type *_is_new() always return
>> 'false'. This patch resolves this issue by using the
>> 'OVSDB_IDL_CHANGE_INSERT' 'change_seqno' instead of the
>> 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno' to determine if a row
>> is new and by resetting the 'OVSDB_IDL_CHANGE_INSERT'
>> 'change_seqno' on clear.
>>
>> Further to this, the code is also updated to match this
>> behaviour:
>>
>> When a row is inserted, the 'OVSDB_IDL_CHANGE_INSERT'
>> 'change_seqno' is updated to match the new database
>> change_seqno. The 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno'
>> is not set for inserted rows (only for updated rows).
>>
>> At the end of a run, ovsdb_idl_db_track_clear() should be
>> called to clear all tracking information, this includes
>> resetting all row 'change_seqno' to zero. This will ensure
>> that subsequent runs will not see a previously 'new' row.
>>
>> add_tracked_change_for_references() is updated to only
>> track rows that reference the current row.
>>
>> Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
>> Suggested-by: Dumitru Ceara <dceara@redhat.com>
>> Reported-at: https://bugzilla.redhat.com/1883562
> 
> Hi Mark,
> 
> Thanks for working on this, it definitely looks like a nasty bug to me!
> 
> We were lucky to not hit it in ovn-controller before. That's just
> because all the "update" operations were handled there as "delete" +
> "insert" but that is not mandatory and might change.
> 
>> ---
>>  lib/ovsdb-idl.c     | 39 +++++++++++++++++++++++++++------------
>>  ovsdb/ovsdb-idlc.in |  2 +-
>>  2 files changed, 28 insertions(+), 13 deletions(-)
>>
>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
>> index d8f221ca6..3cfd73871 100644
>> --- a/lib/ovsdb-idl.c
>> +++ b/lib/ovsdb-idl.c
>> @@ -1959,6 +1959,11 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
>>                      free(row->updated);
>>                      row->updated = NULL;
>>                  }
>> +
>> +                row->change_seqno[OVSDB_IDL_CHANGE_INSERT] =
>> +                    row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] =
>> +                    row->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0;
>> +
>>                  ovs_list_remove(&row->track_node);
>>                  ovs_list_init(&row->track_node);
>>                  if (ovsdb_idl_row_is_orphan(row) && row->tracked_old_datum) {
>> @@ -2684,24 +2689,27 @@ ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
>>      return OVSDB_IDL_UPDATE_DB_CHANGED;
>>  }
>>  
>> -/* Recursively add rows to tracked change lists for current row
>> - * and the rows that reference this row. */
>> +/* Recursively add rows to tracked change lists for all rows that reference
>> +   'row'. */
>>  static void
>>  add_tracked_change_for_references(struct ovsdb_idl_row *row)
>>  {
>> -    if (ovs_list_is_empty(&row->track_node) &&
>> -            ovsdb_idl_track_is_set(row->table)) {
>> -        ovs_list_push_back(&row->table->track_list,
>> -                           &row->track_node);
>> -        row->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>> -            = row->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>> -            = row->table->db->change_seqno + 1;
>> -
>>          const struct ovsdb_idl_arc *arc;
>>          LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) {
>> -            add_tracked_change_for_references(arc->src);
>> +            struct ovsdb_idl_row *ref = arc->src;
>> +
>> +            if (ovs_list_is_empty(&ref->track_node) &&
>> +                ovsdb_idl_track_is_set(ref->table)) {
>> +                    ovs_list_push_back(&ref->table->track_list,
>> +                                       &ref->track_node);
>> +
>> +                ref->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>> +                    = ref->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>> +                    = ref->table->db->change_seqno + 1;
>> +
>> +                add_tracked_change_for_references(ref);
>> +            }
>>          }
>> -    }
>>  }
>>  
>>  
>> @@ -2767,7 +2775,14 @@ ovsdb_idl_row_change__(struct ovsdb_idl_row *row, const struct json *row_json,
>>                      row->change_seqno[change]
>>                          = row->table->change_seqno[change]
>>                          = row->table->db->change_seqno + 1;
>> +
>>                      if (table->modes[column_idx] & OVSDB_IDL_TRACK) {
>> +                        if (ovs_list_is_empty(&row->track_node) &&
>> +                            ovsdb_idl_track_is_set(row->table)) {
>> +                            ovs_list_push_back(&row->table->track_list,
>> +                                               &row->track_node);
>> +                        }
>> +
>>                          add_tracked_change_for_references(row);
>>                          if (!row->updated) {
>>                              row->updated = bitmap_allocate(class->n_columns);
>> diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in
>> index 698fe25f3..89c3eb682 100755
>> --- a/ovsdb/ovsdb-idlc.in
>> +++ b/ovsdb/ovsdb-idlc.in
>> @@ -282,7 +282,7 @@ const struct %(s)s *%(s)s_table_track_get_first(const struct %(s)s_table *);
>>  /* Returns true if 'row' was inserted since the last change tracking reset. */
>>  static inline bool %(s)s_is_new(const struct %(s)s *row)
>>  {
>> -    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_MODIFY) == 0;
>> +    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_INSERT) > 0;
> 
> This causes some issues with records created by the IDL client (e.g.,
> ovn-controller). Those are created with ovsdb_idl_txn_insert() and we
> don't set the OVSDB_IDL_CHANGE_INSERT change_seqno in that case.
> 
> Essentially *_is_new() will never return true for rows inserted locally.
> 

It would also be nice if we could add some OVS self tests in
ovsdb-idl.at for all these changes.

Thanks,
Dumitru
Han Zhou Sept. 30, 2020, 8:04 p.m. UTC | #3
On Wed, Sep 30, 2020 at 2:21 AM Dumitru Ceara <dceara@redhat.com> wrote:
>
> On 9/29/20 8:34 PM, Mark Gray wrote:
> > Currently all functions of the type *_is_new() always return
> > 'false'. This patch resolves this issue by using the
> > 'OVSDB_IDL_CHANGE_INSERT' 'change_seqno' instead of the
> > 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno' to determine if a row
> > is new and by resetting the 'OVSDB_IDL_CHANGE_INSERT'
> > 'change_seqno' on clear.
> >
> > Further to this, the code is also updated to match this
> > behaviour:
> >
> > When a row is inserted, the 'OVSDB_IDL_CHANGE_INSERT'
> > 'change_seqno' is updated to match the new database
> > change_seqno. The 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno'
> > is not set for inserted rows (only for updated rows).
> >
> > At the end of a run, ovsdb_idl_db_track_clear() should be
> > called to clear all tracking information, this includes
> > resetting all row 'change_seqno' to zero. This will ensure
> > that subsequent runs will not see a previously 'new' row.
> >
> > add_tracked_change_for_references() is updated to only
> > track rows that reference the current row.
> >
> > Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
> > Suggested-by: Dumitru Ceara <dceara@redhat.com>
> > Reported-at: https://bugzilla.redhat.com/1883562
>
> Hi Mark,
>
> Thanks for working on this, it definitely looks like a nasty bug to me!
>
> We were lucky to not hit it in ovn-controller before. That's just
> because all the "update" operations were handled there as "delete" +
> "insert" but that is not mandatory and might change.
>

Agree. Thanks for this critical finding! It was my bad - commit ca545a787ac
introduced this bug, which was fixing another bug related to is_new. I
should have added test cases to avoid the miss. Apologize.

For this fix, I don't understand why we need to check
OVSDB_IDL_CHANGE_INSERT. I think the root cause is that in
add_tracked_change_for_references() it updated the OVSDB_IDL_CHANGE_MODIFY
seqno for the initial inserted *new* row, while it shouldn't. So should the
fix only include the changes in add_tracked_change_for_references() and
ovsdb_idl_row_change__()? Why do we need to change the implementation of
_is_new() itself? Could you explain?

> > ---
> >  lib/ovsdb-idl.c     | 39 +++++++++++++++++++++++++++------------
> >  ovsdb/ovsdb-idlc.in |  2 +-
> >  2 files changed, 28 insertions(+), 13 deletions(-)
> >
> > diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
> > index d8f221ca6..3cfd73871 100644
> > --- a/lib/ovsdb-idl.c
> > +++ b/lib/ovsdb-idl.c
> > @@ -1959,6 +1959,11 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
> >                      free(row->updated);
> >                      row->updated = NULL;
> >                  }
> > +
> > +                row->change_seqno[OVSDB_IDL_CHANGE_INSERT] =
> > +                    row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] =
> > +                    row->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0;
> > +
> >                  ovs_list_remove(&row->track_node);
> >                  ovs_list_init(&row->track_node);
> >                  if (ovsdb_idl_row_is_orphan(row) &&
row->tracked_old_datum) {
> > @@ -2684,24 +2689,27 @@ ovsdb_idl_process_update2(struct
ovsdb_idl_table *table,
> >      return OVSDB_IDL_UPDATE_DB_CHANGED;
> >  }
> >
> > -/* Recursively add rows to tracked change lists for current row
> > - * and the rows that reference this row. */
> > +/* Recursively add rows to tracked change lists for all rows that
reference
> > +   'row'. */
> >  static void
> >  add_tracked_change_for_references(struct ovsdb_idl_row *row)
> >  {
> > -    if (ovs_list_is_empty(&row->track_node) &&
> > -            ovsdb_idl_track_is_set(row->table)) {
> > -        ovs_list_push_back(&row->table->track_list,
> > -                           &row->track_node);
> > -        row->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> > -            = row->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> > -            = row->table->db->change_seqno + 1;
> > -
> >          const struct ovsdb_idl_arc *arc;
> >          LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) {
> > -            add_tracked_change_for_references(arc->src);
> > +            struct ovsdb_idl_row *ref = arc->src;
> > +
> > +            if (ovs_list_is_empty(&ref->track_node) &&
> > +                ovsdb_idl_track_is_set(ref->table)) {
> > +                    ovs_list_push_back(&ref->table->track_list,
> > +                                       &ref->track_node);
> > +
> > +                ref->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> > +                    = ref->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> > +                    = ref->table->db->change_seqno + 1;
> > +
> > +                add_tracked_change_for_references(ref);
> > +            }
> >          }
> > -    }
> >  }
> >
> >
> > @@ -2767,7 +2775,14 @@ ovsdb_idl_row_change__(struct ovsdb_idl_row
*row, const struct json *row_json,
> >                      row->change_seqno[change]
> >                          = row->table->change_seqno[change]
> >                          = row->table->db->change_seqno + 1;
> > +
> >                      if (table->modes[column_idx] & OVSDB_IDL_TRACK) {
> > +                        if (ovs_list_is_empty(&row->track_node) &&
> > +                            ovsdb_idl_track_is_set(row->table)) {
> > +                            ovs_list_push_back(&row->table->track_list,
> > +                                               &row->track_node);
> > +                        }
> > +
> >                          add_tracked_change_for_references(row);
> >                          if (!row->updated) {
> >                              row->updated =
bitmap_allocate(class->n_columns);
> > diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in
> > index 698fe25f3..89c3eb682 100755
> > --- a/ovsdb/ovsdb-idlc.in
> > +++ b/ovsdb/ovsdb-idlc.in
> > @@ -282,7 +282,7 @@ const struct %(s)s
*%(s)s_table_track_get_first(const struct %(s)s_table *);
> >  /* Returns true if 'row' was inserted since the last change tracking
reset. */
> >  static inline bool %(s)s_is_new(const struct %(s)s *row)
> >  {
> > -    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_MODIFY) == 0;
> > +    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_INSERT) > 0;
>
> This causes some issues with records created by the IDL client (e.g.,
> ovn-controller). Those are created with ovsdb_idl_txn_insert() and we
> don't set the OVSDB_IDL_CHANGE_INSERT change_seqno in that case.
>
> Essentially *_is_new() will never return true for rows inserted locally.
>
Hi Dumitru, I have a different understanding here. I think this is not a
problem. Whatever changes made locally by the IDL will be discarded when
committing to server and then rely on the notifications from the server to
finally update the IDL data, which updates the change_seqno accordingly.

> Regards,
> Dumitru
>
> >  }
> >
> >  /* Returns true if 'row' was deleted since the last change tracking
reset. */
> >
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Mark Gray Oct. 1, 2020, 9:30 a.m. UTC | #4
On 30/09/2020 21:04, Han Zhou wrote:
> On Wed, Sep 30, 2020 at 2:21 AM Dumitru Ceara <dceara@redhat.com> wrote:
>>
>> On 9/29/20 8:34 PM, Mark Gray wrote:
>>> Currently all functions of the type *_is_new() always return
>>> 'false'. This patch resolves this issue by using the
>>> 'OVSDB_IDL_CHANGE_INSERT' 'change_seqno' instead of the
>>> 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno' to determine if a row
>>> is new and by resetting the 'OVSDB_IDL_CHANGE_INSERT'
>>> 'change_seqno' on clear.
>>>
>>> Further to this, the code is also updated to match this
>>> behaviour:
>>>
>>> When a row is inserted, the 'OVSDB_IDL_CHANGE_INSERT'
>>> 'change_seqno' is updated to match the new database
>>> change_seqno. The 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno'
>>> is not set for inserted rows (only for updated rows).
>>>
>>> At the end of a run, ovsdb_idl_db_track_clear() should be
>>> called to clear all tracking information, this includes
>>> resetting all row 'change_seqno' to zero. This will ensure
>>> that subsequent runs will not see a previously 'new' row.
>>>
>>> add_tracked_change_for_references() is updated to only
>>> track rows that reference the current row.
>>>
>>> Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
>>> Suggested-by: Dumitru Ceara <dceara@redhat.com>
>>> Reported-at: https://bugzilla.redhat.com/1883562
>>
>> Hi Mark,
>>
>> Thanks for working on this, it definitely looks like a nasty bug to me!
>>
>> We were lucky to not hit it in ovn-controller before. That's just
>> because all the "update" operations were handled there as "delete" +
>> "insert" but that is not mandatory and might change.>>
> 
> Agree. Thanks for this critical finding! It was my bad - commit ca545a787ac
> introduced this bug, which was fixing another bug related to is_new. I
> should have added test cases to avoid the miss. Apologize.

No worries, I will reference this commit in the commit message.
> 
> For this fix, I don't understand why we need to check
> OVSDB_IDL_CHANGE_INSERT. I think the root cause is that in
> add_tracked_change_for_references() it updated the OVSDB_IDL_CHANGE_MODIFY
> seqno for the initial inserted *new* row, while it shouldn't. So should the
> fix only include the changes in add_tracked_change_for_references() and
> ovsdb_idl_row_change__()? Why do we need to change the implementation of
> _is_new() itself? Could you explain?

In the case of a newly inserted row, the change_seqno triplet will still
be {X, 0, 0}. It will remain as such until the row is modified, at which
point it will become {X, Y, 0}. This means that the _*is_new() function
will continue to return true until the row is modified which is not
correct behaviour. If we reset the change_seqno triplet on each run, it
will still not work. In the end, it is probably better to use the
OVSDB_IDL_CHANGE_INSERT element to track this.

On a seperate point, I don't expect add_tracked_change_for_references()
will ever get called on insert as nothing will be referenced at that
stage? Am I correct in my understanding?

>>> ---
>>>  lib/ovsdb-idl.c     | 39 +++++++++++++++++++++++++++------------
>>>  ovsdb/ovsdb-idlc.in |  2 +-
>>>  2 files changed, 28 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
>>> index d8f221ca6..3cfd73871 100644
>>> --- a/lib/ovsdb-idl.c
>>> +++ b/lib/ovsdb-idl.c
>>> @@ -1959,6 +1959,11 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
>>>                      free(row->updated);
>>>                      row->updated = NULL;
>>>                  }
>>> +
>>> +                row->change_seqno[OVSDB_IDL_CHANGE_INSERT] =
>>> +                    row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] =
>>> +                    row->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0;
>>> +
>>>                  ovs_list_remove(&row->track_node);
>>>                  ovs_list_init(&row->track_node);
>>>                  if (ovsdb_idl_row_is_orphan(row) &&
> row->tracked_old_datum) {
>>> @@ -2684,24 +2689,27 @@ ovsdb_idl_process_update2(struct
> ovsdb_idl_table *table,
>>>      return OVSDB_IDL_UPDATE_DB_CHANGED;
>>>  }
>>>
>>> -/* Recursively add rows to tracked change lists for current row
>>> - * and the rows that reference this row. */
>>> +/* Recursively add rows to tracked change lists for all rows that
> reference
>>> +   'row'. */
>>>  static void
>>>  add_tracked_change_for_references(struct ovsdb_idl_row *row)
>>>  {
>>> -    if (ovs_list_is_empty(&row->track_node) &&
>>> -            ovsdb_idl_track_is_set(row->table)) {
>>> -        ovs_list_push_back(&row->table->track_list,
>>> -                           &row->track_node);
>>> -        row->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>> -            = row->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>> -            = row->table->db->change_seqno + 1;
>>> -
>>>          const struct ovsdb_idl_arc *arc;
>>>          LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) {
>>> -            add_tracked_change_for_references(arc->src);
>>> +            struct ovsdb_idl_row *ref = arc->src;
>>> +
>>> +            if (ovs_list_is_empty(&ref->track_node) &&
>>> +                ovsdb_idl_track_is_set(ref->table)) {
>>> +                    ovs_list_push_back(&ref->table->track_list,
>>> +                                       &ref->track_node);
>>> +
>>> +                ref->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>> +                    = ref->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>> +                    = ref->table->db->change_seqno + 1;
>>> +
>>> +                add_tracked_change_for_references(ref);
>>> +            }
>>>          }
>>> -    }
>>>  }
>>>
>>>
>>> @@ -2767,7 +2775,14 @@ ovsdb_idl_row_change__(struct ovsdb_idl_row
> *row, const struct json *row_json,
>>>                      row->change_seqno[change]
>>>                          = row->table->change_seqno[change]
>>>                          = row->table->db->change_seqno + 1;
>>> +
>>>                      if (table->modes[column_idx] & OVSDB_IDL_TRACK) {
>>> +                        if (ovs_list_is_empty(&row->track_node) &&
>>> +                            ovsdb_idl_track_is_set(row->table)) {
>>> +                            ovs_list_push_back(&row->table->track_list,
>>> +                                               &row->track_node);
>>> +                        }
>>> +
>>>                          add_tracked_change_for_references(row);
>>>                          if (!row->updated) {
>>>                              row->updated =
> bitmap_allocate(class->n_columns);
>>> diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in
>>> index 698fe25f3..89c3eb682 100755
>>> --- a/ovsdb/ovsdb-idlc.in
>>> +++ b/ovsdb/ovsdb-idlc.in
>>> @@ -282,7 +282,7 @@ const struct %(s)s
> *%(s)s_table_track_get_first(const struct %(s)s_table *);
>>>  /* Returns true if 'row' was inserted since the last change tracking
> reset. */
>>>  static inline bool %(s)s_is_new(const struct %(s)s *row)
>>>  {
>>> -    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_MODIFY) == 0;
>>> +    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_INSERT) > 0;
>>
>> This causes some issues with records created by the IDL client (e.g.,
>> ovn-controller). Those are created with ovsdb_idl_txn_insert() and we
>> don't set the OVSDB_IDL_CHANGE_INSERT change_seqno in that case.
>>
>> Essentially *_is_new() will never return true for rows inserted locally.
>>
> Hi Dumitru, I have a different understanding here. I think this is not a
> problem. Whatever changes made locally by the IDL will be discarded when
> committing to server and then rely on the notifications from the server to
> finally update the IDL data, which updates the change_seqno accordingly.

Dumitru appears to be right. There is a failure on one ovn UT with this
change. It gets resolved by the fix that Dumitru suggests.

> 
>> Regards,
>> Dumitru
>>
>>>  }
>>>
>>>  /* Returns true if 'row' was deleted since the last change tracking
> reset. */
>>>
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Mark Gray Oct. 1, 2020, 9:32 a.m. UTC | #5
On 30/09/2020 10:32, Dumitru Ceara wrote:
> On 9/30/20 11:21 AM, Dumitru Ceara wrote:
>> On 9/29/20 8:34 PM, Mark Gray wrote:
>>> Currently all functions of the type *_is_new() always return
>>> 'false'. This patch resolves this issue by using the
>>> 'OVSDB_IDL_CHANGE_INSERT' 'change_seqno' instead of the
>>> 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno' to determine if a row
>>> is new and by resetting the 'OVSDB_IDL_CHANGE_INSERT'
>>> 'change_seqno' on clear.
>>>
>>> Further to this, the code is also updated to match this
>>> behaviour:
>>>
>>> When a row is inserted, the 'OVSDB_IDL_CHANGE_INSERT'
>>> 'change_seqno' is updated to match the new database
>>> change_seqno. The 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno'
>>> is not set for inserted rows (only for updated rows).
>>>
>>> At the end of a run, ovsdb_idl_db_track_clear() should be
>>> called to clear all tracking information, this includes
>>> resetting all row 'change_seqno' to zero. This will ensure
>>> that subsequent runs will not see a previously 'new' row.
>>>
>>> add_tracked_change_for_references() is updated to only
>>> track rows that reference the current row.
>>>
>>> Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
>>> Suggested-by: Dumitru Ceara <dceara@redhat.com>
>>> Reported-at: https://bugzilla.redhat.com/1883562
>>
>> Hi Mark,
>>
>> Thanks for working on this, it definitely looks like a nasty bug to me!
>>
>> We were lucky to not hit it in ovn-controller before. That's just
>> because all the "update" operations were handled there as "delete" +
>> "insert" but that is not mandatory and might change.

Very lucky!

>>
>>> ---
>>>  lib/ovsdb-idl.c     | 39 +++++++++++++++++++++++++++------------
>>>  ovsdb/ovsdb-idlc.in |  2 +-
>>>  2 files changed, 28 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
>>> index d8f221ca6..3cfd73871 100644
>>> --- a/lib/ovsdb-idl.c
>>> +++ b/lib/ovsdb-idl.c
>>> @@ -1959,6 +1959,11 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
>>>                      free(row->updated);
>>>                      row->updated = NULL;
>>>                  }
>>> +
>>> +                row->change_seqno[OVSDB_IDL_CHANGE_INSERT] =
>>> +                    row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] =
>>> +                    row->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0;
>>> +
>>>                  ovs_list_remove(&row->track_node);
>>>                  ovs_list_init(&row->track_node);
>>>                  if (ovsdb_idl_row_is_orphan(row) && row->tracked_old_datum) {
>>> @@ -2684,24 +2689,27 @@ ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
>>>      return OVSDB_IDL_UPDATE_DB_CHANGED;
>>>  }
>>>  
>>> -/* Recursively add rows to tracked change lists for current row
>>> - * and the rows that reference this row. */
>>> +/* Recursively add rows to tracked change lists for all rows that reference
>>> +   'row'. */
>>>  static void
>>>  add_tracked_change_for_references(struct ovsdb_idl_row *row)
>>>  {
>>> -    if (ovs_list_is_empty(&row->track_node) &&
>>> -            ovsdb_idl_track_is_set(row->table)) {
>>> -        ovs_list_push_back(&row->table->track_list,
>>> -                           &row->track_node);
>>> -        row->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>> -            = row->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>> -            = row->table->db->change_seqno + 1;
>>> -
>>>          const struct ovsdb_idl_arc *arc;
>>>          LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) {
>>> -            add_tracked_change_for_references(arc->src);
>>> +            struct ovsdb_idl_row *ref = arc->src;
>>> +
>>> +            if (ovs_list_is_empty(&ref->track_node) &&
>>> +                ovsdb_idl_track_is_set(ref->table)) {
>>> +                    ovs_list_push_back(&ref->table->track_list,
>>> +                                       &ref->track_node);
>>> +
>>> +                ref->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>> +                    = ref->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>> +                    = ref->table->db->change_seqno + 1;
>>> +
>>> +                add_tracked_change_for_references(ref);
>>> +            }
>>>          }
>>> -    }
>>>  }
>>>  
>>>  
>>> @@ -2767,7 +2775,14 @@ ovsdb_idl_row_change__(struct ovsdb_idl_row *row, const struct json *row_json,
>>>                      row->change_seqno[change]
>>>                          = row->table->change_seqno[change]
>>>                          = row->table->db->change_seqno + 1;
>>> +
>>>                      if (table->modes[column_idx] & OVSDB_IDL_TRACK) {
>>> +                        if (ovs_list_is_empty(&row->track_node) &&
>>> +                            ovsdb_idl_track_is_set(row->table)) {
>>> +                            ovs_list_push_back(&row->table->track_list,
>>> +                                               &row->track_node);
>>> +                        }
>>> +
>>>                          add_tracked_change_for_references(row);
>>>                          if (!row->updated) {
>>>                              row->updated = bitmap_allocate(class->n_columns);
>>> diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in
>>> index 698fe25f3..89c3eb682 100755
>>> --- a/ovsdb/ovsdb-idlc.in
>>> +++ b/ovsdb/ovsdb-idlc.in
>>> @@ -282,7 +282,7 @@ const struct %(s)s *%(s)s_table_track_get_first(const struct %(s)s_table *);
>>>  /* Returns true if 'row' was inserted since the last change tracking reset. */
>>>  static inline bool %(s)s_is_new(const struct %(s)s *row)
>>>  {
>>> -    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_MODIFY) == 0;
>>> +    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_INSERT) > 0;
>>
>> This causes some issues with records created by the IDL client (e.g.,
>> ovn-controller). Those are created with ovsdb_idl_txn_insert() and we
>> don't set the OVSDB_IDL_CHANGE_INSERT change_seqno in that case.
>>
>> Essentially *_is_new() will never return true for rows inserted locally.
>>
> 
> It would also be nice if we could add some OVS self tests in
> ovsdb-idl.at for all these changes.

Good idea, I will have a look at this suite of tests and see what can be
done. Thanks for the suggestion and review.
> 
> Thanks,
> Dumitru
>
Han Zhou Oct. 1, 2020, 11:13 p.m. UTC | #6
On Thu, Oct 1, 2020 at 2:30 AM Mark Gray <mark.d.gray@redhat.com> wrote:
>
> On 30/09/2020 21:04, Han Zhou wrote:
> > On Wed, Sep 30, 2020 at 2:21 AM Dumitru Ceara <dceara@redhat.com> wrote:
> >>
> >> On 9/29/20 8:34 PM, Mark Gray wrote:
> >>> Currently all functions of the type *_is_new() always return
> >>> 'false'. This patch resolves this issue by using the
> >>> 'OVSDB_IDL_CHANGE_INSERT' 'change_seqno' instead of the
> >>> 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno' to determine if a row
> >>> is new and by resetting the 'OVSDB_IDL_CHANGE_INSERT'
> >>> 'change_seqno' on clear.
> >>>
> >>> Further to this, the code is also updated to match this
> >>> behaviour:
> >>>
> >>> When a row is inserted, the 'OVSDB_IDL_CHANGE_INSERT'
> >>> 'change_seqno' is updated to match the new database
> >>> change_seqno. The 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno'
> >>> is not set for inserted rows (only for updated rows).
> >>>
> >>> At the end of a run, ovsdb_idl_db_track_clear() should be
> >>> called to clear all tracking information, this includes
> >>> resetting all row 'change_seqno' to zero. This will ensure
> >>> that subsequent runs will not see a previously 'new' row.
> >>>
> >>> add_tracked_change_for_references() is updated to only
> >>> track rows that reference the current row.
> >>>
> >>> Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
> >>> Suggested-by: Dumitru Ceara <dceara@redhat.com>
> >>> Reported-at: https://bugzilla.redhat.com/1883562
> >>
> >> Hi Mark,
> >>
> >> Thanks for working on this, it definitely looks like a nasty bug to me!
> >>
> >> We were lucky to not hit it in ovn-controller before. That's just
> >> because all the "update" operations were handled there as "delete" +
> >> "insert" but that is not mandatory and might change.>>
> >
> > Agree. Thanks for this critical finding! It was my bad - commit
ca545a787ac
> > introduced this bug, which was fixing another bug related to is_new. I
> > should have added test cases to avoid the miss. Apologize.
>
> No worries, I will reference this commit in the commit message.
> >
> > For this fix, I don't understand why we need to check
> > OVSDB_IDL_CHANGE_INSERT. I think the root cause is that in
> > add_tracked_change_for_references() it updated the
OVSDB_IDL_CHANGE_MODIFY
> > seqno for the initial inserted *new* row, while it shouldn't. So should
the
> > fix only include the changes in add_tracked_change_for_references() and
> > ovsdb_idl_row_change__()? Why do we need to change the implementation of
> > _is_new() itself? Could you explain?
>
> In the case of a newly inserted row, the change_seqno triplet will still
> be {X, 0, 0}. It will remain as such until the row is modified, at which
> point it will become {X, Y, 0}. This means that the _*is_new() function
> will continue to return true until the row is modified which is not
> correct behaviour.

Hmm, this is not a problem because if a row inserted in a previous run is
not modified/deleted in the current run, it won't be added to tracking so
no one would call _is_new() for the row. However, there will be a problem
when a row is deleted (see below).

> If we reset the change_seqno triplet on each run, it
> will still not work. In the end, it is probably better to use the
> OVSDB_IDL_CHANGE_INSERT element to track this.
>
Indeed, we should use OVSDB_IDL_CHANGE_INSERT to check _is_new() because if
row is deleted without any modification after insertion, the
OVSDB_IDL_CHANGE_MODIFY seqno would still be zero when it is being deleted.
In that case calling the _is_new() function would return true for a deleted
row, which is wrong. So, your change LGTM.

> On a seperate point, I don't expect add_tracked_change_for_references()
> will ever get called on insert as nothing will be referenced at that
> stage? Am I correct in my understanding?
>
Yes, I think so. More accurately, it will get called once on the inserted
row but will not trigger any recursive calls.

> >>> ---
> >>>  lib/ovsdb-idl.c     | 39 +++++++++++++++++++++++++++------------
> >>>  ovsdb/ovsdb-idlc.in |  2 +-
> >>>  2 files changed, 28 insertions(+), 13 deletions(-)
> >>>
> >>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
> >>> index d8f221ca6..3cfd73871 100644
> >>> --- a/lib/ovsdb-idl.c
> >>> +++ b/lib/ovsdb-idl.c
> >>> @@ -1959,6 +1959,11 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db
*db)
> >>>                      free(row->updated);
> >>>                      row->updated = NULL;
> >>>                  }
> >>> +
> >>> +                row->change_seqno[OVSDB_IDL_CHANGE_INSERT] =
> >>> +                    row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] =
> >>> +                    row->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0;
> >>> +
> >>>                  ovs_list_remove(&row->track_node);
> >>>                  ovs_list_init(&row->track_node);
> >>>                  if (ovsdb_idl_row_is_orphan(row) &&
> > row->tracked_old_datum) {
> >>> @@ -2684,24 +2689,27 @@ ovsdb_idl_process_update2(struct
> > ovsdb_idl_table *table,
> >>>      return OVSDB_IDL_UPDATE_DB_CHANGED;
> >>>  }
> >>>
> >>> -/* Recursively add rows to tracked change lists for current row
> >>> - * and the rows that reference this row. */
> >>> +/* Recursively add rows to tracked change lists for all rows that
> > reference
> >>> +   'row'. */
> >>>  static void
> >>>  add_tracked_change_for_references(struct ovsdb_idl_row *row)
> >>>  {
> >>> -    if (ovs_list_is_empty(&row->track_node) &&
> >>> -            ovsdb_idl_track_is_set(row->table)) {
> >>> -        ovs_list_push_back(&row->table->track_list,
> >>> -                           &row->track_node);
> >>> -        row->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> >>> -            = row->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> >>> -            = row->table->db->change_seqno + 1;
> >>> -
> >>>          const struct ovsdb_idl_arc *arc;
> >>>          LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) {
> >>> -            add_tracked_change_for_references(arc->src);
> >>> +            struct ovsdb_idl_row *ref = arc->src;
> >>> +
> >>> +            if (ovs_list_is_empty(&ref->track_node) &&
> >>> +                ovsdb_idl_track_is_set(ref->table)) {
> >>> +                    ovs_list_push_back(&ref->table->track_list,
> >>> +                                       &ref->track_node);
> >>> +
> >>> +                ref->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> >>> +                    =
ref->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> >>> +                    = ref->table->db->change_seqno + 1;
> >>> +
> >>> +                add_tracked_change_for_references(ref);
> >>> +            }
> >>>          }
> >>> -    }
> >>>  }
> >>>
> >>>
> >>> @@ -2767,7 +2775,14 @@ ovsdb_idl_row_change__(struct ovsdb_idl_row
> > *row, const struct json *row_json,
> >>>                      row->change_seqno[change]
> >>>                          = row->table->change_seqno[change]
> >>>                          = row->table->db->change_seqno + 1;
> >>> +
> >>>                      if (table->modes[column_idx] & OVSDB_IDL_TRACK) {
> >>> +                        if (ovs_list_is_empty(&row->track_node) &&
> >>> +                            ovsdb_idl_track_is_set(row->table)) {
> >>> +
 ovs_list_push_back(&row->table->track_list,
> >>> +                                               &row->track_node);
> >>> +                        }
> >>> +
> >>>                          add_tracked_change_for_references(row);
> >>>                          if (!row->updated) {
> >>>                              row->updated =
> > bitmap_allocate(class->n_columns);
> >>> diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in
> >>> index 698fe25f3..89c3eb682 100755
> >>> --- a/ovsdb/ovsdb-idlc.in
> >>> +++ b/ovsdb/ovsdb-idlc.in
> >>> @@ -282,7 +282,7 @@ const struct %(s)s
> > *%(s)s_table_track_get_first(const struct %(s)s_table *);
> >>>  /* Returns true if 'row' was inserted since the last change tracking
> > reset. */
> >>>  static inline bool %(s)s_is_new(const struct %(s)s *row)
> >>>  {
> >>> -    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_MODIFY) == 0;
> >>> +    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_INSERT) > 0;
> >>
> >> This causes some issues with records created by the IDL client (e.g.,
> >> ovn-controller). Those are created with ovsdb_idl_txn_insert() and we
> >> don't set the OVSDB_IDL_CHANGE_INSERT change_seqno in that case.
> >>
> >> Essentially *_is_new() will never return true for rows inserted
locally.
> >>
> > Hi Dumitru, I have a different understanding here. I think this is not a
> > problem. Whatever changes made locally by the IDL will be discarded when
> > committing to server and then rely on the notifications from the server
to
> > finally update the IDL data, which updates the change_seqno accordingly.
>
> Dumitru appears to be right. There is a failure on one ovn UT with this
> change. It gets resolved by the fix that Dumitru suggests.
>
I am confused here. Which UT fails and what's the suggested fix? If the row
is added locally, I don't expect it to be tracked at all in the current
iteration (while it will be in the iteration after the transaction commit).
Did I miss anything?

Thanks,
Han

> >
> >> Regards,
> >> Dumitru
> >>
> >>>  }
> >>>
> >>>  /* Returns true if 'row' was deleted since the last change tracking
> > reset. */
> >>>
> >>
> >> _______________________________________________
> >> dev mailing list
> >> dev@openvswitch.org
> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
>
Dumitru Ceara Oct. 8, 2020, 6:57 p.m. UTC | #7
On 10/2/20 1:13 AM, Han Zhou wrote:
> 
> 
> On Thu, Oct 1, 2020 at 2:30 AM Mark Gray <mark.d.gray@redhat.com
> <mailto:mark.d.gray@redhat.com>> wrote:
>>
>> On 30/09/2020 21:04, Han Zhou wrote:
>> > On Wed, Sep 30, 2020 at 2:21 AM Dumitru Ceara <dceara@redhat.com
> <mailto:dceara@redhat.com>> wrote:
>> >>
>> >> On 9/29/20 8:34 PM, Mark Gray wrote:
>> >>> Currently all functions of the type *_is_new() always return
>> >>> 'false'. This patch resolves this issue by using the
>> >>> 'OVSDB_IDL_CHANGE_INSERT' 'change_seqno' instead of the
>> >>> 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno' to determine if a row
>> >>> is new and by resetting the 'OVSDB_IDL_CHANGE_INSERT'
>> >>> 'change_seqno' on clear.
>> >>>
>> >>> Further to this, the code is also updated to match this
>> >>> behaviour:
>> >>>
>> >>> When a row is inserted, the 'OVSDB_IDL_CHANGE_INSERT'
>> >>> 'change_seqno' is updated to match the new database
>> >>> change_seqno. The 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno'
>> >>> is not set for inserted rows (only for updated rows).
>> >>>
>> >>> At the end of a run, ovsdb_idl_db_track_clear() should be
>> >>> called to clear all tracking information, this includes
>> >>> resetting all row 'change_seqno' to zero. This will ensure
>> >>> that subsequent runs will not see a previously 'new' row.
>> >>>
>> >>> add_tracked_change_for_references() is updated to only
>> >>> track rows that reference the current row.
>> >>>
>> >>> Signed-off-by: Mark Gray <mark.d.gray@redhat.com
> <mailto:mark.d.gray@redhat.com>>
>> >>> Suggested-by: Dumitru Ceara <dceara@redhat.com
> <mailto:dceara@redhat.com>>
>> >>> Reported-at: https://bugzilla.redhat.com/1883562
>> >>
>> >> Hi Mark,
>> >>
>> >> Thanks for working on this, it definitely looks like a nasty bug to me!
>> >>
>> >> We were lucky to not hit it in ovn-controller before. That's just
>> >> because all the "update" operations were handled there as "delete" +
>> >> "insert" but that is not mandatory and might change.>>
>> >
>> > Agree. Thanks for this critical finding! It was my bad - commit
> ca545a787ac
>> > introduced this bug, which was fixing another bug related to is_new. I
>> > should have added test cases to avoid the miss. Apologize.
>>
>> No worries, I will reference this commit in the commit message.
>> >
>> > For this fix, I don't understand why we need to check
>> > OVSDB_IDL_CHANGE_INSERT. I think the root cause is that in
>> > add_tracked_change_for_references() it updated the
> OVSDB_IDL_CHANGE_MODIFY
>> > seqno for the initial inserted *new* row, while it shouldn't. So
> should the
>> > fix only include the changes in add_tracked_change_for_references() and
>> > ovsdb_idl_row_change__()? Why do we need to change the implementation of
>> > _is_new() itself? Could you explain?
>>
>> In the case of a newly inserted row, the change_seqno triplet will still
>> be {X, 0, 0}. It will remain as such until the row is modified, at which
>> point it will become {X, Y, 0}. This means that the _*is_new() function
>> will continue to return true until the row is modified which is not
>> correct behaviour.
> 
> Hmm, this is not a problem because if a row inserted in a previous run
> is not modified/deleted in the current run, it won't be added to
> tracking so no one would call _is_new() for the row. However, there will
> be a problem when a row is deleted (see below).
> 
>> If we reset the change_seqno triplet on each run, it
>> will still not work. In the end, it is probably better to use the
>> OVSDB_IDL_CHANGE_INSERT element to track this.
>>
> Indeed, we should use OVSDB_IDL_CHANGE_INSERT to check _is_new() because
> if row is deleted without any modification after insertion, the
> OVSDB_IDL_CHANGE_MODIFY seqno would still be zero when it is being
> deleted. In that case calling the _is_new() function would return true
> for a deleted row, which is wrong. So, your change LGTM.
> 
>> On a seperate point, I don't expect add_tracked_change_for_references()
>> will ever get called on insert as nothing will be referenced at that
>> stage? Am I correct in my understanding?
>>
> Yes, I think so. More accurately, it will get called once on the
> inserted row but will not trigger any recursive calls.
> 
>> >>> ---
>> >>>  lib/ovsdb-idl.c     | 39 +++++++++++++++++++++++++++------------
>> >>>  ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> |  2 +-
>> >>>  2 files changed, 28 insertions(+), 13 deletions(-)
>> >>>
>> >>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
>> >>> index d8f221ca6..3cfd73871 100644
>> >>> --- a/lib/ovsdb-idl.c
>> >>> +++ b/lib/ovsdb-idl.c
>> >>> @@ -1959,6 +1959,11 @@ ovsdb_idl_db_track_clear(struct
> ovsdb_idl_db *db)
>> >>>                      free(row->updated);
>> >>>                      row->updated = NULL;
>> >>>                  }
>> >>> +
>> >>> +                row->change_seqno[OVSDB_IDL_CHANGE_INSERT] =
>> >>> +                    row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] =
>> >>> +                    row->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0;
>> >>> +
>> >>>                  ovs_list_remove(&row->track_node);
>> >>>                  ovs_list_init(&row->track_node);
>> >>>                  if (ovsdb_idl_row_is_orphan(row) &&
>> > row->tracked_old_datum) {
>> >>> @@ -2684,24 +2689,27 @@ ovsdb_idl_process_update2(struct
>> > ovsdb_idl_table *table,
>> >>>      return OVSDB_IDL_UPDATE_DB_CHANGED;
>> >>>  }
>> >>>
>> >>> -/* Recursively add rows to tracked change lists for current row
>> >>> - * and the rows that reference this row. */
>> >>> +/* Recursively add rows to tracked change lists for all rows that
>> > reference
>> >>> +   'row'. */
>> >>>  static void
>> >>>  add_tracked_change_for_references(struct ovsdb_idl_row *row)
>> >>>  {
>> >>> -    if (ovs_list_is_empty(&row->track_node) &&
>> >>> -            ovsdb_idl_track_is_set(row->table)) {
>> >>> -        ovs_list_push_back(&row->table->track_list,
>> >>> -                           &row->track_node);
>> >>> -        row->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>> >>> -            = row->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>> >>> -            = row->table->db->change_seqno + 1;
>> >>> -
>> >>>          const struct ovsdb_idl_arc *arc;
>> >>>          LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) {
>> >>> -            add_tracked_change_for_references(arc->src);
>> >>> +            struct ovsdb_idl_row *ref = arc->src;
>> >>> +
>> >>> +            if (ovs_list_is_empty(&ref->track_node) &&
>> >>> +                ovsdb_idl_track_is_set(ref->table)) {
>> >>> +                    ovs_list_push_back(&ref->table->track_list,
>> >>> +                                       &ref->track_node);
>> >>> +
>> >>> +                ref->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>> >>> +                    =
> ref->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>> >>> +                    = ref->table->db->change_seqno + 1;
>> >>> +
>> >>> +                add_tracked_change_for_references(ref);
>> >>> +            }
>> >>>          }
>> >>> -    }
>> >>>  }
>> >>>
>> >>>
>> >>> @@ -2767,7 +2775,14 @@ ovsdb_idl_row_change__(struct ovsdb_idl_row
>> > *row, const struct json *row_json,
>> >>>                      row->change_seqno[change]
>> >>>                          = row->table->change_seqno[change]
>> >>>                          = row->table->db->change_seqno + 1;
>> >>> +
>> >>>                      if (table->modes[column_idx] & OVSDB_IDL_TRACK) {
>> >>> +                        if (ovs_list_is_empty(&row->track_node) &&
>> >>> +                            ovsdb_idl_track_is_set(row->table)) {
>> >>> +                          
>  ovs_list_push_back(&row->table->track_list,
>> >>> +                                               &row->track_node);
>> >>> +                        }
>> >>> +
>> >>>                          add_tracked_change_for_references(row);
>> >>>                          if (!row->updated) {
>> >>>                              row->updated =
>> > bitmap_allocate(class->n_columns);
>> >>> diff --git a/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in>
> b/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in>
>> >>> index 698fe25f3..89c3eb682 100755
>> >>> --- a/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in>
>> >>> +++ b/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in>
>> >>> @@ -282,7 +282,7 @@ const struct %(s)s
>> > *%(s)s_table_track_get_first(const struct %(s)s_table *);
>> >>>  /* Returns true if 'row' was inserted since the last change tracking
>> > reset. */
>> >>>  static inline bool %(s)s_is_new(const struct %(s)s *row)
>> >>>  {
>> >>> -    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_MODIFY) == 0;
>> >>> +    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_INSERT) > 0;
>> >>
>> >> This causes some issues with records created by the IDL client (e.g.,
>> >> ovn-controller). Those are created with ovsdb_idl_txn_insert() and we
>> >> don't set the OVSDB_IDL_CHANGE_INSERT change_seqno in that case.
>> >>
>> >> Essentially *_is_new() will never return true for rows inserted
> locally.
>> >>
>> > Hi Dumitru, I have a different understanding here. I think this is not a
>> > problem. Whatever changes made locally by the IDL will be discarded when
>> > committing to server and then rely on the notifications from the
> server to
>> > finally update the IDL data, which updates the change_seqno accordingly.
>>
>> Dumitru appears to be right. There is a failure on one ovn UT with this
>> change. It gets resolved by the fix that Dumitru suggests.
>>
> I am confused here. Which UT fails and what's the suggested fix? If the
> row is added locally, I don't expect it to be tracked at all in the
> current iteration (while it will be in the iteration after the
> transaction commit). Did I miss anything?
> 
> Thanks,
> Han
> 

Hi Han,

One way to see the issue is:

make sandbox
ovn-sbctl destroy chassis .

At this point ovn-controller continuously tries to transact insert
Chassis_Private even though Chassis_Private was only updated:

2020-10-08T18:42:27.784Z|00024|jsonrpc|DBG|unix:sb1.ovsdb: send request,
method="transact",
params=["OVN_Southbound",{"uuid-name":"row3e499288_5f8c_4859_b82a_adcc474eb114","row":{"name":"chassis-1","chassis":["named-uuid","row6534b1df_bcbb_4b91_8eb1_5bc823f1673e"]},"op":"insert","table":"Chassis_Private"},{"uuid-name":"row9d86c159_0be5_4d13_a8d6_a3414a855511","row":{"ip":"127.0.0.1","options":["map",[["csum","true"]]],"chassis_name":"chassis-1","type":"geneve"},"op":"insert","table":"Encap"},{"uuid-name":"row6534b1df_bcbb_4b91_8eb1_5bc823f1673e","row":{"name":"chassis-1","hostname":"sandbox","other_config":["map",[["datapath-type",""],["iface-types","dummy,dummy-internal,dummy-pmd,erspan,geneve,gre,gtpu,internal,ip6erspan,ip6gre,lisp,patch,stt,system,tap,vxlan"],["is-interconn","false"],["ovn-bridge-mappings",""],["ovn-chassis-mac-mappings",""],["ovn-cms-options",""],["ovn-enable-lflow-cache","true"],["ovn-monitor-all","false"]]],"encaps":["named-uuid","row9d86c159_0be5_4d13_a8d6_a3414a855511"],"external_ids":["map",[["datapath-type",""],["iface-types","dummy,dummy-internal,dummy-pmd,erspan,geneve,gre,gtpu,internal,ip6erspan,ip6gre,lisp,patch,stt,system,tap,vxlan"],["is-interconn","false"],["ovn-bridge-mappings",""],["ovn-chassis-mac-mappings",""],["ovn-cms-options",""],["ovn-enable-lflow-cache","true"],["ovn-monitor-all","false"]]]},"op":"insert","table":"Chassis"},{"comment":"ovn-controller:
registering chassis 'chassis-1'","op":"comment"}], id=14

While, if I set OVSDB_IDL_CHANGE_INSERT in ovsdb_idl_txn_insert() and
redo the same steps as above, ovn-controller doesn't try to insert
Chassis_Private but instead it updates the record.

The only use of _is_new() for potentially locally created records is
here afaik:
https://github.com/ovn-org/ovn/blob/9d2e8d32fb9865513b70408a665184a67564390d/controller/ovn-controller.c#L184

I didn't dig further though.

Regards,
Dumitru
Mark Gray Oct. 10, 2020, 3:42 p.m. UTC | #8
On 08/10/2020 19:57, Dumitru Ceara wrote:
> On 10/2/20 1:13 AM, Han Zhou wrote:
>>
>>
>> On Thu, Oct 1, 2020 at 2:30 AM Mark Gray <mark.d.gray@redhat.com
>> <mailto:mark.d.gray@redhat.com>> wrote:
>>>
>>> On 30/09/2020 21:04, Han Zhou wrote:
>>>> On Wed, Sep 30, 2020 at 2:21 AM Dumitru Ceara <dceara@redhat.com
>> <mailto:dceara@redhat.com>> wrote:
>>>>>
>>>>> On 9/29/20 8:34 PM, Mark Gray wrote:
>>>>>> Currently all functions of the type *_is_new() always return
>>>>>> 'false'. This patch resolves this issue by using the
>>>>>> 'OVSDB_IDL_CHANGE_INSERT' 'change_seqno' instead of the
>>>>>> 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno' to determine if a row
>>>>>> is new and by resetting the 'OVSDB_IDL_CHANGE_INSERT'
>>>>>> 'change_seqno' on clear.
>>>>>>
>>>>>> Further to this, the code is also updated to match this
>>>>>> behaviour:
>>>>>>
>>>>>> When a row is inserted, the 'OVSDB_IDL_CHANGE_INSERT'
>>>>>> 'change_seqno' is updated to match the new database
>>>>>> change_seqno. The 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno'
>>>>>> is not set for inserted rows (only for updated rows).
>>>>>>
>>>>>> At the end of a run, ovsdb_idl_db_track_clear() should be
>>>>>> called to clear all tracking information, this includes
>>>>>> resetting all row 'change_seqno' to zero. This will ensure
>>>>>> that subsequent runs will not see a previously 'new' row.
>>>>>>
>>>>>> add_tracked_change_for_references() is updated to only
>>>>>> track rows that reference the current row.
>>>>>>
>>>>>> Signed-off-by: Mark Gray <mark.d.gray@redhat.com
>> <mailto:mark.d.gray@redhat.com>>
>>>>>> Suggested-by: Dumitru Ceara <dceara@redhat.com
>> <mailto:dceara@redhat.com>>
>>>>>> Reported-at: https://bugzilla.redhat.com/1883562
>>>>>
>>>>> Hi Mark,
>>>>>
>>>>> Thanks for working on this, it definitely looks like a nasty bug to me!
>>>>>
>>>>> We were lucky to not hit it in ovn-controller before. That's just
>>>>> because all the "update" operations were handled there as "delete" +
>>>>> "insert" but that is not mandatory and might change.>>
>>>>
>>>> Agree. Thanks for this critical finding! It was my bad - commit
>> ca545a787ac
>>>> introduced this bug, which was fixing another bug related to is_new. I
>>>> should have added test cases to avoid the miss. Apologize.
>>>
>>> No worries, I will reference this commit in the commit message.
>>>>
>>>> For this fix, I don't understand why we need to check
>>>> OVSDB_IDL_CHANGE_INSERT. I think the root cause is that in
>>>> add_tracked_change_for_references() it updated the
>> OVSDB_IDL_CHANGE_MODIFY
>>>> seqno for the initial inserted *new* row, while it shouldn't. So
>> should the
>>>> fix only include the changes in add_tracked_change_for_references() and
>>>> ovsdb_idl_row_change__()? Why do we need to change the implementation of
>>>> _is_new() itself? Could you explain?
>>>
>>> In the case of a newly inserted row, the change_seqno triplet will still
>>> be {X, 0, 0}. It will remain as such until the row is modified, at which
>>> point it will become {X, Y, 0}. This means that the _*is_new() function
>>> will continue to return true until the row is modified which is not
>>> correct behaviour.
>>
>> Hmm, this is not a problem because if a row inserted in a previous run
>> is not modified/deleted in the current run, it won't be added to
>> tracking so no one would call _is_new() for the row. However, there will
>> be a problem when a row is deleted (see below).
>>
>>> If we reset the change_seqno triplet on each run, it
>>> will still not work. In the end, it is probably better to use the
>>> OVSDB_IDL_CHANGE_INSERT element to track this.
>>>
>> Indeed, we should use OVSDB_IDL_CHANGE_INSERT to check _is_new() because
>> if row is deleted without any modification after insertion, the
>> OVSDB_IDL_CHANGE_MODIFY seqno would still be zero when it is being
>> deleted. In that case calling the _is_new() function would return true
>> for a deleted row, which is wrong. So, your change LGTM.
>>
>>> On a seperate point, I don't expect add_tracked_change_for_references()
>>> will ever get called on insert as nothing will be referenced at that
>>> stage? Am I correct in my understanding?
>>>
>> Yes, I think so. More accurately, it will get called once on the
>> inserted row but will not trigger any recursive calls.
>>
>>>>>> ---
>>>>>>  lib/ovsdb-idl.c     | 39 +++++++++++++++++++++++++++------------
>>>>>>  ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> |  2 +-
>>>>>>  2 files changed, 28 insertions(+), 13 deletions(-)
>>>>>>
>>>>>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
>>>>>> index d8f221ca6..3cfd73871 100644
>>>>>> --- a/lib/ovsdb-idl.c
>>>>>> +++ b/lib/ovsdb-idl.c
>>>>>> @@ -1959,6 +1959,11 @@ ovsdb_idl_db_track_clear(struct
>> ovsdb_idl_db *db)
>>>>>>                      free(row->updated);
>>>>>>                      row->updated = NULL;
>>>>>>                  }
>>>>>> +
>>>>>> +                row->change_seqno[OVSDB_IDL_CHANGE_INSERT] =
>>>>>> +                    row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] =
>>>>>> +                    row->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0;
>>>>>> +
>>>>>>                  ovs_list_remove(&row->track_node);
>>>>>>                  ovs_list_init(&row->track_node);
>>>>>>                  if (ovsdb_idl_row_is_orphan(row) &&
>>>> row->tracked_old_datum) {
>>>>>> @@ -2684,24 +2689,27 @@ ovsdb_idl_process_update2(struct
>>>> ovsdb_idl_table *table,
>>>>>>      return OVSDB_IDL_UPDATE_DB_CHANGED;
>>>>>>  }
>>>>>>
>>>>>> -/* Recursively add rows to tracked change lists for current row
>>>>>> - * and the rows that reference this row. */
>>>>>> +/* Recursively add rows to tracked change lists for all rows that
>>>> reference
>>>>>> +   'row'. */
>>>>>>  static void
>>>>>>  add_tracked_change_for_references(struct ovsdb_idl_row *row)
>>>>>>  {
>>>>>> -    if (ovs_list_is_empty(&row->track_node) &&
>>>>>> -            ovsdb_idl_track_is_set(row->table)) {
>>>>>> -        ovs_list_push_back(&row->table->track_list,
>>>>>> -                           &row->track_node);
>>>>>> -        row->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>>>>> -            = row->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>>>>> -            = row->table->db->change_seqno + 1;
>>>>>> -
>>>>>>          const struct ovsdb_idl_arc *arc;
>>>>>>          LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) {
>>>>>> -            add_tracked_change_for_references(arc->src);
>>>>>> +            struct ovsdb_idl_row *ref = arc->src;
>>>>>> +
>>>>>> +            if (ovs_list_is_empty(&ref->track_node) &&
>>>>>> +                ovsdb_idl_track_is_set(ref->table)) {
>>>>>> +                    ovs_list_push_back(&ref->table->track_list,
>>>>>> +                                       &ref->track_node);
>>>>>> +
>>>>>> +                ref->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>>>>> +                    =
>> ref->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>>>>> +                    = ref->table->db->change_seqno + 1;
>>>>>> +
>>>>>> +                add_tracked_change_for_references(ref);
>>>>>> +            }
>>>>>>          }
>>>>>> -    }
>>>>>>  }
>>>>>>
>>>>>>
>>>>>> @@ -2767,7 +2775,14 @@ ovsdb_idl_row_change__(struct ovsdb_idl_row
>>>> *row, const struct json *row_json,
>>>>>>                      row->change_seqno[change]
>>>>>>                          = row->table->change_seqno[change]
>>>>>>                          = row->table->db->change_seqno + 1;
>>>>>> +
>>>>>>                      if (table->modes[column_idx] & OVSDB_IDL_TRACK) {
>>>>>> +                        if (ovs_list_is_empty(&row->track_node) &&
>>>>>> +                            ovsdb_idl_track_is_set(row->table)) {
>>>>>> +                          
>>  ovs_list_push_back(&row->table->track_list,
>>>>>> +                                               &row->track_node);
>>>>>> +                        }
>>>>>> +
>>>>>>                          add_tracked_change_for_references(row);
>>>>>>                          if (!row->updated) {
>>>>>>                              row->updated =
>>>> bitmap_allocate(class->n_columns);
>>>>>> diff --git a/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in>
>> b/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in>
>>>>>> index 698fe25f3..89c3eb682 100755
>>>>>> --- a/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in>
>>>>>> +++ b/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in>
>>>>>> @@ -282,7 +282,7 @@ const struct %(s)s
>>>> *%(s)s_table_track_get_first(const struct %(s)s_table *);
>>>>>>  /* Returns true if 'row' was inserted since the last change tracking
>>>> reset. */
>>>>>>  static inline bool %(s)s_is_new(const struct %(s)s *row)
>>>>>>  {
>>>>>> -    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_MODIFY) == 0;
>>>>>> +    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_INSERT) > 0;
>>>>>
>>>>> This causes some issues with records created by the IDL client (e.g.,
>>>>> ovn-controller). Those are created with ovsdb_idl_txn_insert() and we
>>>>> don't set the OVSDB_IDL_CHANGE_INSERT change_seqno in that case.
>>>>>
>>>>> Essentially *_is_new() will never return true for rows inserted
>> locally.
>>>>>
>>>> Hi Dumitru, I have a different understanding here. I think this is not a
>>>> problem. Whatever changes made locally by the IDL will be discarded when
>>>> committing to server and then rely on the notifications from the
>> server to
>>>> finally update the IDL data, which updates the change_seqno accordingly.
>>>
>>> Dumitru appears to be right. There is a failure on one ovn UT with this
>>> change. It gets resolved by the fix that Dumitru suggests.
>>>
>> I am confused here. Which UT fails and what's the suggested fix? If the
>> row is added locally, I don't expect it to be tracked at all in the
>> current iteration (while it will be in the iteration after the
>> transaction commit). Did I miss anything?
>>
>> Thanks,
>> Han
>>
> 
> Hi Han,
> 
> One way to see the issue is:
> 
> make sandbox
> ovn-sbctl destroy chassis .
> 
> At this point ovn-controller continuously tries to transact insert
> Chassis_Private even though Chassis_Private was only updated:
> 
> 2020-10-08T18:42:27.784Z|00024|jsonrpc|DBG|unix:sb1.ovsdb: send request,
> method="transact",
> params=["OVN_Southbound",{"uuid-name":"row3e499288_5f8c_4859_b82a_adcc474eb114","row":{"name":"chassis-1","chassis":["named-uuid","row6534b1df_bcbb_4b91_8eb1_5bc823f1673e"]},"op":"insert","table":"Chassis_Private"},{"uuid-name":"row9d86c159_0be5_4d13_a8d6_a3414a855511","row":{"ip":"127.0.0.1","options":["map",[["csum","true"]]],"chassis_name":"chassis-1","type":"geneve"},"op":"insert","table":"Encap"},{"uuid-name":"row6534b1df_bcbb_4b91_8eb1_5bc823f1673e","row":{"name":"chassis-1","hostname":"sandbox","other_config":["map",[["datapath-type",""],["iface-types","dummy,dummy-internal,dummy-pmd,erspan,geneve,gre,gtpu,internal,ip6erspan,ip6gre,lisp,patch,stt,system,tap,vxlan"],["is-interconn","false"],["ovn-bridge-mappings",""],["ovn-chassis-mac-mappings",""],["ovn-cms-options",""],["ovn-enable-lflow-cache","true"],["ovn-monitor-all","false"]]],"encaps":["named-uuid","row9d86c159_0be5_4d13_a8d6_a3414a855511"],"external_ids":["map",[["datapath-type",""],["iface-types","dummy,dummy-internal,dummy-pmd,erspan,geneve,gre,gtpu,internal,ip6erspan,ip6gre,lisp,patch,stt,system,tap,vxlan"],["is-interconn","false"],["ovn-bridge-mappings",""],["ovn-chassis-mac-mappings",""],["ovn-cms-options",""],["ovn-enable-lflow-cache","true"],["ovn-monitor-all","false"]]]},"op":"insert","table":"Chassis"},{"comment":"ovn-controller:
> registering chassis 'chassis-1'","op":"comment"}], id=14
> 
> While, if I set OVSDB_IDL_CHANGE_INSERT in ovsdb_idl_txn_insert() and
> redo the same steps as above, ovn-controller doesn't try to insert
> Chassis_Private but instead it updates the record.
> 
> The only use of _is_new() for potentially locally created records is
> here afaik:
> https://github.com/ovn-org/ovn/blob/9d2e8d32fb9865513b70408a665184a67564390d/controller/ovn-controller.c#L184
> 
> I didn't dig further though.

Sorry for the delay. The following test fails:

 90: ovn -- ensure one gw controller restart in HA doesn't bounce the
master FAILED (ovs-macros.at:220)

<snip>
ovn.at:12213: waiting until test 1 = `ovn-sbctl --bare --columns=_uuid
find Chassis name=gw2 | wc -l`...
ovn.at:12213: wait failed after 30 seconds
./ovs-macros.at:220: hard failure
90. ovn.at:12139: 90. ovn -- ensure one gw controller restart in HA
doesn't bounce the master (ovn.at:12139): FAILED (ovs-macros.at:220)
<snip>

This is similar failure as the repoducer highlighted by Dumitru above.
It is resolved by:

+++ b/lib/ovsdb-idl.c
@@ -4858,6 +4858,11 @@ ovsdb_idl_txn_insert(struct ovsdb_idl_txn *txn,
     hmap_insert(&row->table->rows, &row->hmap_node, uuid_hash(&row->uuid));
     hmap_insert(&txn->txn_rows, &row->txn_node, uuid_hash(&row->uuid));
     ovsdb_idl_add_to_indexes(row);
+
+    row->change_seqno[OVSDB_IDL_CHANGE_INSERT]
+        = row->table->change_seqno[OVSDB_IDL_CHANGE_INSERT]
+        = row->table->db->change_seqno + 1;
+


> 
> Regards,
> Dumitru
>
Mark Gray Oct. 12, 2020, 2:32 p.m. UTC | #9
On 10/10/2020 16:42, Mark Gray wrote:
> On 08/10/2020 19:57, Dumitru Ceara wrote:
>> On 10/2/20 1:13 AM, Han Zhou wrote:
>>>
>>>
>>> On Thu, Oct 1, 2020 at 2:30 AM Mark Gray <mark.d.gray@redhat.com
>>> <mailto:mark.d.gray@redhat.com>> wrote:
>>>>
>>>> On 30/09/2020 21:04, Han Zhou wrote:
>>>>> On Wed, Sep 30, 2020 at 2:21 AM Dumitru Ceara <dceara@redhat.com
>>> <mailto:dceara@redhat.com>> wrote:
>>>>>>
>>>>>> On 9/29/20 8:34 PM, Mark Gray wrote:
>>>>>>> Currently all functions of the type *_is_new() always return
>>>>>>> 'false'. This patch resolves this issue by using the
>>>>>>> 'OVSDB_IDL_CHANGE_INSERT' 'change_seqno' instead of the
>>>>>>> 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno' to determine if a row
>>>>>>> is new and by resetting the 'OVSDB_IDL_CHANGE_INSERT'
>>>>>>> 'change_seqno' on clear.
>>>>>>>
>>>>>>> Further to this, the code is also updated to match this
>>>>>>> behaviour:
>>>>>>>
>>>>>>> When a row is inserted, the 'OVSDB_IDL_CHANGE_INSERT'
>>>>>>> 'change_seqno' is updated to match the new database
>>>>>>> change_seqno. The 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno'
>>>>>>> is not set for inserted rows (only for updated rows).
>>>>>>>
>>>>>>> At the end of a run, ovsdb_idl_db_track_clear() should be
>>>>>>> called to clear all tracking information, this includes
>>>>>>> resetting all row 'change_seqno' to zero. This will ensure
>>>>>>> that subsequent runs will not see a previously 'new' row.
>>>>>>>
>>>>>>> add_tracked_change_for_references() is updated to only
>>>>>>> track rows that reference the current row.
>>>>>>>
>>>>>>> Signed-off-by: Mark Gray <mark.d.gray@redhat.com
>>> <mailto:mark.d.gray@redhat.com>>
>>>>>>> Suggested-by: Dumitru Ceara <dceara@redhat.com
>>> <mailto:dceara@redhat.com>>
>>>>>>> Reported-at: https://bugzilla.redhat.com/1883562
>>>>>>
>>>>>> Hi Mark,
>>>>>>
>>>>>> Thanks for working on this, it definitely looks like a nasty bug to me!
>>>>>>
>>>>>> We were lucky to not hit it in ovn-controller before. That's just
>>>>>> because all the "update" operations were handled there as "delete" +
>>>>>> "insert" but that is not mandatory and might change.>>
>>>>>
>>>>> Agree. Thanks for this critical finding! It was my bad - commit
>>> ca545a787ac
>>>>> introduced this bug, which was fixing another bug related to is_new. I
>>>>> should have added test cases to avoid the miss. Apologize.
>>>>
>>>> No worries, I will reference this commit in the commit message.
>>>>>
>>>>> For this fix, I don't understand why we need to check
>>>>> OVSDB_IDL_CHANGE_INSERT. I think the root cause is that in
>>>>> add_tracked_change_for_references() it updated the
>>> OVSDB_IDL_CHANGE_MODIFY
>>>>> seqno for the initial inserted *new* row, while it shouldn't. So
>>> should the
>>>>> fix only include the changes in add_tracked_change_for_references() and
>>>>> ovsdb_idl_row_change__()? Why do we need to change the implementation of
>>>>> _is_new() itself? Could you explain?
>>>>
>>>> In the case of a newly inserted row, the change_seqno triplet will still
>>>> be {X, 0, 0}. It will remain as such until the row is modified, at which
>>>> point it will become {X, Y, 0}. This means that the _*is_new() function
>>>> will continue to return true until the row is modified which is not
>>>> correct behaviour.
>>>
>>> Hmm, this is not a problem because if a row inserted in a previous run
>>> is not modified/deleted in the current run, it won't be added to
>>> tracking so no one would call _is_new() for the row. However, there will
>>> be a problem when a row is deleted (see below).
>>>
>>>> If we reset the change_seqno triplet on each run, it
>>>> will still not work. In the end, it is probably better to use the
>>>> OVSDB_IDL_CHANGE_INSERT element to track this.
>>>>
>>> Indeed, we should use OVSDB_IDL_CHANGE_INSERT to check _is_new() because
>>> if row is deleted without any modification after insertion, the
>>> OVSDB_IDL_CHANGE_MODIFY seqno would still be zero when it is being
>>> deleted. In that case calling the _is_new() function would return true
>>> for a deleted row, which is wrong. So, your change LGTM.
>>>
>>>> On a seperate point, I don't expect add_tracked_change_for_references()
>>>> will ever get called on insert as nothing will be referenced at that
>>>> stage? Am I correct in my understanding?
>>>>
>>> Yes, I think so. More accurately, it will get called once on the
>>> inserted row but will not trigger any recursive calls.
>>>
>>>>>>> ---
>>>>>>>  lib/ovsdb-idl.c     | 39 +++++++++++++++++++++++++++------------
>>>>>>>  ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> |  2 +-
>>>>>>>  2 files changed, 28 insertions(+), 13 deletions(-)
>>>>>>>
>>>>>>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
>>>>>>> index d8f221ca6..3cfd73871 100644
>>>>>>> --- a/lib/ovsdb-idl.c
>>>>>>> +++ b/lib/ovsdb-idl.c
>>>>>>> @@ -1959,6 +1959,11 @@ ovsdb_idl_db_track_clear(struct
>>> ovsdb_idl_db *db)
>>>>>>>                      free(row->updated);
>>>>>>>                      row->updated = NULL;
>>>>>>>                  }
>>>>>>> +
>>>>>>> +                row->change_seqno[OVSDB_IDL_CHANGE_INSERT] =
>>>>>>> +                    row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] =
>>>>>>> +                    row->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0;
>>>>>>> +
>>>>>>>                  ovs_list_remove(&row->track_node);
>>>>>>>                  ovs_list_init(&row->track_node);
>>>>>>>                  if (ovsdb_idl_row_is_orphan(row) &&
>>>>> row->tracked_old_datum) {
>>>>>>> @@ -2684,24 +2689,27 @@ ovsdb_idl_process_update2(struct
>>>>> ovsdb_idl_table *table,
>>>>>>>      return OVSDB_IDL_UPDATE_DB_CHANGED;
>>>>>>>  }
>>>>>>>
>>>>>>> -/* Recursively add rows to tracked change lists for current row
>>>>>>> - * and the rows that reference this row. */
>>>>>>> +/* Recursively add rows to tracked change lists for all rows that
>>>>> reference
>>>>>>> +   'row'. */
>>>>>>>  static void
>>>>>>>  add_tracked_change_for_references(struct ovsdb_idl_row *row)
>>>>>>>  {
>>>>>>> -    if (ovs_list_is_empty(&row->track_node) &&
>>>>>>> -            ovsdb_idl_track_is_set(row->table)) {
>>>>>>> -        ovs_list_push_back(&row->table->track_list,
>>>>>>> -                           &row->track_node);
>>>>>>> -        row->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>>>>>> -            = row->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>>>>>> -            = row->table->db->change_seqno + 1;
>>>>>>> -
>>>>>>>          const struct ovsdb_idl_arc *arc;
>>>>>>>          LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) {
>>>>>>> -            add_tracked_change_for_references(arc->src);
>>>>>>> +            struct ovsdb_idl_row *ref = arc->src;
>>>>>>> +
>>>>>>> +            if (ovs_list_is_empty(&ref->track_node) &&
>>>>>>> +                ovsdb_idl_track_is_set(ref->table)) {
>>>>>>> +                    ovs_list_push_back(&ref->table->track_list,
>>>>>>> +                                       &ref->track_node);
>>>>>>> +
>>>>>>> +                ref->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>>>>>> +                    =
>>> ref->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>>>>>> +                    = ref->table->db->change_seqno + 1;
>>>>>>> +
>>>>>>> +                add_tracked_change_for_references(ref);
>>>>>>> +            }
>>>>>>>          }
>>>>>>> -    }
>>>>>>>  }
>>>>>>>
>>>>>>>
>>>>>>> @@ -2767,7 +2775,14 @@ ovsdb_idl_row_change__(struct ovsdb_idl_row
>>>>> *row, const struct json *row_json,
>>>>>>>                      row->change_seqno[change]
>>>>>>>                          = row->table->change_seqno[change]
>>>>>>>                          = row->table->db->change_seqno + 1;
>>>>>>> +
>>>>>>>                      if (table->modes[column_idx] & OVSDB_IDL_TRACK) {
>>>>>>> +                        if (ovs_list_is_empty(&row->track_node) &&
>>>>>>> +                            ovsdb_idl_track_is_set(row->table)) {
>>>>>>> +                          
>>>  ovs_list_push_back(&row->table->track_list,
>>>>>>> +                                               &row->track_node);
>>>>>>> +                        }
>>>>>>> +
>>>>>>>                          add_tracked_change_for_references(row);
>>>>>>>                          if (!row->updated) {
>>>>>>>                              row->updated =
>>>>> bitmap_allocate(class->n_columns);
>>>>>>> diff --git a/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in>
>>> b/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in>
>>>>>>> index 698fe25f3..89c3eb682 100755
>>>>>>> --- a/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in>
>>>>>>> +++ b/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in>
>>>>>>> @@ -282,7 +282,7 @@ const struct %(s)s
>>>>> *%(s)s_table_track_get_first(const struct %(s)s_table *);
>>>>>>>  /* Returns true if 'row' was inserted since the last change tracking
>>>>> reset. */
>>>>>>>  static inline bool %(s)s_is_new(const struct %(s)s *row)
>>>>>>>  {
>>>>>>> -    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_MODIFY) == 0;
>>>>>>> +    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_INSERT) > 0;
>>>>>>
>>>>>> This causes some issues with records created by the IDL client (e.g.,
>>>>>> ovn-controller). Those are created with ovsdb_idl_txn_insert() and we
>>>>>> don't set the OVSDB_IDL_CHANGE_INSERT change_seqno in that case.
>>>>>>
>>>>>> Essentially *_is_new() will never return true for rows inserted
>>> locally.
>>>>>>
>>>>> Hi Dumitru, I have a different understanding here. I think this is not a
>>>>> problem. Whatever changes made locally by the IDL will be discarded when
>>>>> committing to server and then rely on the notifications from the
>>> server to
>>>>> finally update the IDL data, which updates the change_seqno accordingly.
>>>>
>>>> Dumitru appears to be right. There is a failure on one ovn UT with this
>>>> change. It gets resolved by the fix that Dumitru suggests.
>>>>
>>> I am confused here. Which UT fails and what's the suggested fix? If the
>>> row is added locally, I don't expect it to be tracked at all in the
>>> current iteration (while it will be in the iteration after the
>>> transaction commit). Did I miss anything?
>>>
>>> Thanks,
>>> Han
>>>
>>
>> Hi Han,
>>
>> One way to see the issue is:
>>
>> make sandbox
>> ovn-sbctl destroy chassis .
>>
>> At this point ovn-controller continuously tries to transact insert
>> Chassis_Private even though Chassis_Private was only updated:
>>
>> 2020-10-08T18:42:27.784Z|00024|jsonrpc|DBG|unix:sb1.ovsdb: send request,
>> method="transact",
>> params=["OVN_Southbound",{"uuid-name":"row3e499288_5f8c_4859_b82a_adcc474eb114","row":{"name":"chassis-1","chassis":["named-uuid","row6534b1df_bcbb_4b91_8eb1_5bc823f1673e"]},"op":"insert","table":"Chassis_Private"},{"uuid-name":"row9d86c159_0be5_4d13_a8d6_a3414a855511","row":{"ip":"127.0.0.1","options":["map",[["csum","true"]]],"chassis_name":"chassis-1","type":"geneve"},"op":"insert","table":"Encap"},{"uuid-name":"row6534b1df_bcbb_4b91_8eb1_5bc823f1673e","row":{"name":"chassis-1","hostname":"sandbox","other_config":["map",[["datapath-type",""],["iface-types","dummy,dummy-internal,dummy-pmd,erspan,geneve,gre,gtpu,internal,ip6erspan,ip6gre,lisp,patch,stt,system,tap,vxlan"],["is-interconn","false"],["ovn-bridge-mappings",""],["ovn-chassis-mac-mappings",""],["ovn-cms-options",""],["ovn-enable-lflow-cache","true"],["ovn-monitor-all","false"]]],"encaps":["named-uuid","row9d86c159_0be5_4d13_a8d6_a3414a855511"],"external_ids":["map",[["datapath-type",""],["iface-types","dummy,dummy-internal,dummy-pmd,erspan,geneve,gre,gtpu,internal,ip6erspan,ip6gre,lisp,patch,stt,system,tap,vxlan"],["is-interconn","false"],["ovn-bridge-mappings",""],["ovn-chassis-mac-mappings",""],["ovn-cms-options",""],["ovn-enable-lflow-cache","true"],["ovn-monitor-all","false"]]]},"op":"insert","table":"Chassis"},{"comment":"ovn-controller:
>> registering chassis 'chassis-1'","op":"comment"}], id=14
>>
>> While, if I set OVSDB_IDL_CHANGE_INSERT in ovsdb_idl_txn_insert() and
>> redo the same steps as above, ovn-controller doesn't try to insert
>> Chassis_Private but instead it updates the record.
>>
>> The only use of _is_new() for potentially locally created records is
>> here afaik:
>> https://github.com/ovn-org/ovn/blob/9d2e8d32fb9865513b70408a665184a67564390d/controller/ovn-controller.c#L184
>>
>> I didn't dig further though.
> 
> Sorry for the delay. The following test fails:
> 
>  90: ovn -- ensure one gw controller restart in HA doesn't bounce the
> master FAILED (ovs-macros.at:220)
> 
> <snip>
> ovn.at:12213: waiting until test 1 = `ovn-sbctl --bare --columns=_uuid
> find Chassis name=gw2 | wc -l`...
> ovn.at:12213: wait failed after 30 seconds
> ./ovs-macros.at:220: hard failure
> 90. ovn.at:12139: 90. ovn -- ensure one gw controller restart in HA
> doesn't bounce the master (ovn.at:12139): FAILED (ovs-macros.at:220)
> <snip>
> 
> This is similar failure as the repoducer highlighted by Dumitru above.
> It is resolved by:
> 
> +++ b/lib/ovsdb-idl.c
> @@ -4858,6 +4858,11 @@ ovsdb_idl_txn_insert(struct ovsdb_idl_txn *txn,
>      hmap_insert(&row->table->rows, &row->hmap_node, uuid_hash(&row->uuid));
>      hmap_insert(&txn->txn_rows, &row->txn_node, uuid_hash(&row->uuid));
>      ovsdb_idl_add_to_indexes(row);
> +
> +    row->change_seqno[OVSDB_IDL_CHANGE_INSERT]
> +        = row->table->change_seqno[OVSDB_IDL_CHANGE_INSERT]
> +        = row->table->db->change_seqno + 1;
> +
> 
> 
>>
>> Regards,
>> Dumitru
>>
> 

v2 published at
https://mail.openvswitch.org/pipermail/ovs-dev/2020-October/375962.html
Han Zhou Oct. 19, 2020, 6:22 p.m. UTC | #10
On Mon, Oct 12, 2020 at 7:32 AM Mark Gray <mark.d.gray@redhat.com> wrote:
>
> On 10/10/2020 16:42, Mark Gray wrote:
> > On 08/10/2020 19:57, Dumitru Ceara wrote:
> >> On 10/2/20 1:13 AM, Han Zhou wrote:
> >>>
> >>>
> >>> On Thu, Oct 1, 2020 at 2:30 AM Mark Gray <mark.d.gray@redhat.com
> >>> <mailto:mark.d.gray@redhat.com>> wrote:
> >>>>
> >>>> On 30/09/2020 21:04, Han Zhou wrote:
> >>>>> On Wed, Sep 30, 2020 at 2:21 AM Dumitru Ceara <dceara@redhat.com
> >>> <mailto:dceara@redhat.com>> wrote:
> >>>>>>
> >>>>>> On 9/29/20 8:34 PM, Mark Gray wrote:
> >>>>>>> Currently all functions of the type *_is_new() always return
> >>>>>>> 'false'. This patch resolves this issue by using the
> >>>>>>> 'OVSDB_IDL_CHANGE_INSERT' 'change_seqno' instead of the
> >>>>>>> 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno' to determine if a row
> >>>>>>> is new and by resetting the 'OVSDB_IDL_CHANGE_INSERT'
> >>>>>>> 'change_seqno' on clear.
> >>>>>>>
> >>>>>>> Further to this, the code is also updated to match this
> >>>>>>> behaviour:
> >>>>>>>
> >>>>>>> When a row is inserted, the 'OVSDB_IDL_CHANGE_INSERT'
> >>>>>>> 'change_seqno' is updated to match the new database
> >>>>>>> change_seqno. The 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno'
> >>>>>>> is not set for inserted rows (only for updated rows).
> >>>>>>>
> >>>>>>> At the end of a run, ovsdb_idl_db_track_clear() should be
> >>>>>>> called to clear all tracking information, this includes
> >>>>>>> resetting all row 'change_seqno' to zero. This will ensure
> >>>>>>> that subsequent runs will not see a previously 'new' row.
> >>>>>>>
> >>>>>>> add_tracked_change_for_references() is updated to only
> >>>>>>> track rows that reference the current row.
> >>>>>>>
> >>>>>>> Signed-off-by: Mark Gray <mark.d.gray@redhat.com
> >>> <mailto:mark.d.gray@redhat.com>>
> >>>>>>> Suggested-by: Dumitru Ceara <dceara@redhat.com
> >>> <mailto:dceara@redhat.com>>
> >>>>>>> Reported-at: https://bugzilla.redhat.com/1883562
> >>>>>>
> >>>>>> Hi Mark,
> >>>>>>
> >>>>>> Thanks for working on this, it definitely looks like a nasty bug
to me!
> >>>>>>
> >>>>>> We were lucky to not hit it in ovn-controller before. That's just
> >>>>>> because all the "update" operations were handled there as "delete"
+
> >>>>>> "insert" but that is not mandatory and might change.>>
> >>>>>
> >>>>> Agree. Thanks for this critical finding! It was my bad - commit
> >>> ca545a787ac
> >>>>> introduced this bug, which was fixing another bug related to
is_new. I
> >>>>> should have added test cases to avoid the miss. Apologize.
> >>>>
> >>>> No worries, I will reference this commit in the commit message.
> >>>>>
> >>>>> For this fix, I don't understand why we need to check
> >>>>> OVSDB_IDL_CHANGE_INSERT. I think the root cause is that in
> >>>>> add_tracked_change_for_references() it updated the
> >>> OVSDB_IDL_CHANGE_MODIFY
> >>>>> seqno for the initial inserted *new* row, while it shouldn't. So
> >>> should the
> >>>>> fix only include the changes in add_tracked_change_for_references()
and
> >>>>> ovsdb_idl_row_change__()? Why do we need to change the
implementation of
> >>>>> _is_new() itself? Could you explain?
> >>>>
> >>>> In the case of a newly inserted row, the change_seqno triplet will
still
> >>>> be {X, 0, 0}. It will remain as such until the row is modified, at
which
> >>>> point it will become {X, Y, 0}. This means that the _*is_new()
function
> >>>> will continue to return true until the row is modified which is not
> >>>> correct behaviour.
> >>>
> >>> Hmm, this is not a problem because if a row inserted in a previous run
> >>> is not modified/deleted in the current run, it won't be added to
> >>> tracking so no one would call _is_new() for the row. However, there
will
> >>> be a problem when a row is deleted (see below).
> >>>
> >>>> If we reset the change_seqno triplet on each run, it
> >>>> will still not work. In the end, it is probably better to use the
> >>>> OVSDB_IDL_CHANGE_INSERT element to track this.
> >>>>
> >>> Indeed, we should use OVSDB_IDL_CHANGE_INSERT to check _is_new()
because
> >>> if row is deleted without any modification after insertion, the
> >>> OVSDB_IDL_CHANGE_MODIFY seqno would still be zero when it is being
> >>> deleted. In that case calling the _is_new() function would return true
> >>> for a deleted row, which is wrong. So, your change LGTM.
> >>>
> >>>> On a seperate point, I don't expect
add_tracked_change_for_references()
> >>>> will ever get called on insert as nothing will be referenced at that
> >>>> stage? Am I correct in my understanding?
> >>>>
> >>> Yes, I think so. More accurately, it will get called once on the
> >>> inserted row but will not trigger any recursive calls.
> >>>
> >>>>>>> ---
> >>>>>>>  lib/ovsdb-idl.c     | 39 +++++++++++++++++++++++++++------------
> >>>>>>>  ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> |  2 +-
> >>>>>>>  2 files changed, 28 insertions(+), 13 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
> >>>>>>> index d8f221ca6..3cfd73871 100644
> >>>>>>> --- a/lib/ovsdb-idl.c
> >>>>>>> +++ b/lib/ovsdb-idl.c
> >>>>>>> @@ -1959,6 +1959,11 @@ ovsdb_idl_db_track_clear(struct
> >>> ovsdb_idl_db *db)
> >>>>>>>                      free(row->updated);
> >>>>>>>                      row->updated = NULL;
> >>>>>>>                  }
> >>>>>>> +
> >>>>>>> +                row->change_seqno[OVSDB_IDL_CHANGE_INSERT] =
> >>>>>>> +                    row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] =
> >>>>>>> +                    row->change_seqno[OVSDB_IDL_CHANGE_DELETE] =
0;
> >>>>>>> +
> >>>>>>>                  ovs_list_remove(&row->track_node);
> >>>>>>>                  ovs_list_init(&row->track_node);
> >>>>>>>                  if (ovsdb_idl_row_is_orphan(row) &&
> >>>>> row->tracked_old_datum) {
> >>>>>>> @@ -2684,24 +2689,27 @@ ovsdb_idl_process_update2(struct
> >>>>> ovsdb_idl_table *table,
> >>>>>>>      return OVSDB_IDL_UPDATE_DB_CHANGED;
> >>>>>>>  }
> >>>>>>>
> >>>>>>> -/* Recursively add rows to tracked change lists for current row
> >>>>>>> - * and the rows that reference this row. */
> >>>>>>> +/* Recursively add rows to tracked change lists for all rows that
> >>>>> reference
> >>>>>>> +   'row'. */
> >>>>>>>  static void
> >>>>>>>  add_tracked_change_for_references(struct ovsdb_idl_row *row)
> >>>>>>>  {
> >>>>>>> -    if (ovs_list_is_empty(&row->track_node) &&
> >>>>>>> -            ovsdb_idl_track_is_set(row->table)) {
> >>>>>>> -        ovs_list_push_back(&row->table->track_list,
> >>>>>>> -                           &row->track_node);
> >>>>>>> -        row->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> >>>>>>> -            = row->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> >>>>>>> -            = row->table->db->change_seqno + 1;
> >>>>>>> -
> >>>>>>>          const struct ovsdb_idl_arc *arc;
> >>>>>>>          LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) {
> >>>>>>> -            add_tracked_change_for_references(arc->src);
> >>>>>>> +            struct ovsdb_idl_row *ref = arc->src;
> >>>>>>> +
> >>>>>>> +            if (ovs_list_is_empty(&ref->track_node) &&
> >>>>>>> +                ovsdb_idl_track_is_set(ref->table)) {
> >>>>>>> +                    ovs_list_push_back(&ref->table->track_list,
> >>>>>>> +                                       &ref->track_node);
> >>>>>>> +
> >>>>>>> +                ref->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> >>>>>>> +                    =
> >>> ref->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> >>>>>>> +                    = ref->table->db->change_seqno + 1;
> >>>>>>> +
> >>>>>>> +                add_tracked_change_for_references(ref);
> >>>>>>> +            }
> >>>>>>>          }
> >>>>>>> -    }
> >>>>>>>  }
> >>>>>>>
> >>>>>>>
> >>>>>>> @@ -2767,7 +2775,14 @@ ovsdb_idl_row_change__(struct ovsdb_idl_row
> >>>>> *row, const struct json *row_json,
> >>>>>>>                      row->change_seqno[change]
> >>>>>>>                          = row->table->change_seqno[change]
> >>>>>>>                          = row->table->db->change_seqno + 1;
> >>>>>>> +
> >>>>>>>                      if (table->modes[column_idx] &
OVSDB_IDL_TRACK) {
> >>>>>>> +                        if (ovs_list_is_empty(&row->track_node)
&&
> >>>>>>> +                            ovsdb_idl_track_is_set(row->table)) {
> >>>>>>> +
> >>>  ovs_list_push_back(&row->table->track_list,
> >>>>>>> +                                               &row->track_node);
> >>>>>>> +                        }
> >>>>>>> +
> >>>>>>>                          add_tracked_change_for_references(row);
> >>>>>>>                          if (!row->updated) {
> >>>>>>>                              row->updated =
> >>>>> bitmap_allocate(class->n_columns);
> >>>>>>> diff --git a/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in>
> >>> b/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in>
> >>>>>>> index 698fe25f3..89c3eb682 100755
> >>>>>>> --- a/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in>
> >>>>>>> +++ b/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in>
> >>>>>>> @@ -282,7 +282,7 @@ const struct %(s)s
> >>>>> *%(s)s_table_track_get_first(const struct %(s)s_table *);
> >>>>>>>  /* Returns true if 'row' was inserted since the last change
tracking
> >>>>> reset. */
> >>>>>>>  static inline bool %(s)s_is_new(const struct %(s)s *row)
> >>>>>>>  {
> >>>>>>> -    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_MODIFY) ==
0;
> >>>>>>> +    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_INSERT) > 0;
> >>>>>>
> >>>>>> This causes some issues with records created by the IDL client
(e.g.,
> >>>>>> ovn-controller). Those are created with ovsdb_idl_txn_insert() and
we
> >>>>>> don't set the OVSDB_IDL_CHANGE_INSERT change_seqno in that case.
> >>>>>>
> >>>>>> Essentially *_is_new() will never return true for rows inserted
> >>> locally.
> >>>>>>
> >>>>> Hi Dumitru, I have a different understanding here. I think this is
not a
> >>>>> problem. Whatever changes made locally by the IDL will be discarded
when
> >>>>> committing to server and then rely on the notifications from the
> >>> server to
> >>>>> finally update the IDL data, which updates the change_seqno
accordingly.
> >>>>
> >>>> Dumitru appears to be right. There is a failure on one ovn UT with
this
> >>>> change. It gets resolved by the fix that Dumitru suggests.
> >>>>
> >>> I am confused here. Which UT fails and what's the suggested fix? If
the
> >>> row is added locally, I don't expect it to be tracked at all in the
> >>> current iteration (while it will be in the iteration after the
> >>> transaction commit). Did I miss anything?
> >>>
> >>> Thanks,
> >>> Han
> >>>
> >>
> >> Hi Han,
> >>
> >> One way to see the issue is:
> >>
> >> make sandbox
> >> ovn-sbctl destroy chassis .
> >>
> >> At this point ovn-controller continuously tries to transact insert
> >> Chassis_Private even though Chassis_Private was only updated:
> >>
> >> 2020-10-08T18:42:27.784Z|00024|jsonrpc|DBG|unix:sb1.ovsdb: send
request,
> >> method="transact",
> >>
params=["OVN_Southbound",{"uuid-name":"row3e499288_5f8c_4859_b82a_adcc474eb114","row":{"name":"chassis-1","chassis":["named-uuid","row6534b1df_bcbb_4b91_8eb1_5bc823f1673e"]},"op":"insert","table":"Chassis_Private"},{"uuid-name":"row9d86c159_0be5_4d13_a8d6_a3414a855511","row":{"ip":"127.0.0.1","options":["map",[["csum","true"]]],"chassis_name":"chassis-1","type":"geneve"},"op":"insert","table":"Encap"},{"uuid-name":"row6534b1df_bcbb_4b91_8eb1_5bc823f1673e","row":{"name":"chassis-1","hostname":"sandbox","other_config":["map",[["datapath-type",""],["iface-types","dummy,dummy-internal,dummy-pmd,erspan,geneve,gre,gtpu,internal,ip6erspan,ip6gre,lisp,patch,stt,system,tap,vxlan"],["is-interconn","false"],["ovn-bridge-mappings",""],["ovn-chassis-mac-mappings",""],["ovn-cms-options",""],["ovn-enable-lflow-cache","true"],["ovn-monitor-all","false"]]],"encaps":["named-uuid","row9d86c159_0be5_4d13_a8d6_a3414a855511"],"external_ids":["map",[["datapath-type",""],["iface-types","dummy,dummy-
 internal
 ,dummy-pmd,erspan,geneve,gre,gtpu,internal,ip6erspan,ip6gre,lisp,patch,stt,system,tap,vxlan"],["is-interconn","false"],["ovn-bridge-mappings",""],["ovn-chassis-mac-mappings",""],["ovn-cms-options",""],["ovn-enable-lflow-cache","true"],["ovn-monitor-all","false"]]]},"op":"insert","table":"Chassis"},{"comment":"ovn-controller:
> >> registering chassis 'chassis-1'","op":"comment"}], id=14
> >>
> >> While, if I set OVSDB_IDL_CHANGE_INSERT in ovsdb_idl_txn_insert() and
> >> redo the same steps as above, ovn-controller doesn't try to insert
> >> Chassis_Private but instead it updates the record.
> >>
> >> The only use of _is_new() for potentially locally created records is
> >> here afaik:
> >>
https://github.com/ovn-org/ovn/blob/9d2e8d32fb9865513b70408a665184a67564390d/controller/ovn-controller.c#L184
> >>

Hi Dumitru and Mark,

Thanks for this information and sorry for the delayed response. As
discussed in the last OVN meeting, I think the original design of _is_new
and _is_deleted were only for evaluating against tracked changes, which
always come from server updates.

If we make the change so that _is_new() can be used to evaluate against
local (uncommitted) changes, it may work but can be confusing. If a row R
is inserted by current transaction locally, _is_new(R) returns true, but
when the transaction is committed the local copy will be discarded, and if
the commit is successful the IDL will finally receive a server notification
and the _is_new(R) evaluation will return true again. I think this could
cause problems for the code that relies on _is_new(R). Moreover, if we
update the OVSDB_IDL_CHANGE_INSERT seq for locally inserted rows, we may
need to do the same for locally modified/deleted rows, to be consistent,
which needs more careful considerations for the implications of the change.

Because of the above concerns, I'd rather suggest to stick to the original
design (maybe document it more clearly) that the functions used only for
tracked changes.

For the only place the _is_new() currently used for evaluating non-tracked
change [0], it was introduced in [1], which we have discussed earlier that
it is better to avoid the complex logic for handling system-id change, but
let the operator to follow the steps of completely deleting and re-adding a
chassis [2]. So I guess this kind of usage would not be necessary at least
for existing use cases.

P.S. Ilya found an old patch that I submitted a year ago [3] but never
merged for fixing the _is_new() problem (that I've forgotten myself), which
is similar to v1 but didn't use the "INSERT" seqno. I think the current
patch (v1) from Mark is better, so @Ilya Maximets <i.maximets@ovn.org>
please ignore my old patch.

[0]
https://github.com/ovn-org/ovn/blob/9d2e8d32fb9865513b70408a665184a67564390d/controller/ovn-controller.c#L184
[1] commit dce1af31b5 ("chassis: Fix chassis_private record updates when
the system-id changes.")
[2]
https://mail.openvswitch.org/pipermail/ovs-dev/2020-September/374801.html
[3]
https://patchwork.ozlabs.org/project/openvswitch/patch/1564100775-80657-1-git-send-email-hzhou8@ebay.com/

Thanks,
Han

> >> I didn't dig further though.
> >
> > Sorry for the delay. The following test fails:
> >
> >  90: ovn -- ensure one gw controller restart in HA doesn't bounce the
> > master FAILED (ovs-macros.at:220)
> >
> > <snip>
> > ovn.at:12213: waiting until test 1 = `ovn-sbctl --bare --columns=_uuid
> > find Chassis name=gw2 | wc -l`...
> > ovn.at:12213: wait failed after 30 seconds
> > ./ovs-macros.at:220: hard failure
> > 90. ovn.at:12139: 90. ovn -- ensure one gw controller restart in HA
> > doesn't bounce the master (ovn.at:12139): FAILED (ovs-macros.at:220)
> > <snip>
> >
> > This is similar failure as the repoducer highlighted by Dumitru above.
> > It is resolved by:
> >
> > +++ b/lib/ovsdb-idl.c
> > @@ -4858,6 +4858,11 @@ ovsdb_idl_txn_insert(struct ovsdb_idl_txn *txn,
> >      hmap_insert(&row->table->rows, &row->hmap_node,
uuid_hash(&row->uuid));
> >      hmap_insert(&txn->txn_rows, &row->txn_node, uuid_hash(&row->uuid));
> >      ovsdb_idl_add_to_indexes(row);
> > +
> > +    row->change_seqno[OVSDB_IDL_CHANGE_INSERT]
> > +        = row->table->change_seqno[OVSDB_IDL_CHANGE_INSERT]
> > +        = row->table->db->change_seqno + 1;
> > +
> >
> >
> >>
> >> Regards,
> >> Dumitru
> >>
> >
>
> v2 published at
> https://mail.openvswitch.org/pipermail/ovs-dev/2020-October/375962.html
Ilya Maximets Oct. 19, 2020, 6:42 p.m. UTC | #11
On 10/19/20 8:22 PM, Han Zhou wrote:
> 
> 
> On Mon, Oct 12, 2020 at 7:32 AM Mark Gray <mark.d.gray@redhat.com <mailto:mark.d.gray@redhat.com>> wrote:
>>
>> On 10/10/2020 16:42, Mark Gray wrote:
>> > On 08/10/2020 19:57, Dumitru Ceara wrote:
>> >> On 10/2/20 1:13 AM, Han Zhou wrote:
>> >>>
>> >>>
>> >>> On Thu, Oct 1, 2020 at 2:30 AM Mark Gray <mark.d.gray@redhat.com <mailto:mark.d.gray@redhat.com>
>> >>> <mailto:mark.d.gray@redhat.com <mailto:mark.d.gray@redhat.com>>> wrote:
>> >>>>
>> >>>> On 30/09/2020 21:04, Han Zhou wrote:
>> >>>>> On Wed, Sep 30, 2020 at 2:21 AM Dumitru Ceara <dceara@redhat.com <mailto:dceara@redhat.com>
>> >>> <mailto:dceara@redhat.com <mailto:dceara@redhat.com>>> wrote:
>> >>>>>>
>> >>>>>> On 9/29/20 8:34 PM, Mark Gray wrote:
>> >>>>>>> Currently all functions of the type *_is_new() always return
>> >>>>>>> 'false'. This patch resolves this issue by using the
>> >>>>>>> 'OVSDB_IDL_CHANGE_INSERT' 'change_seqno' instead of the
>> >>>>>>> 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno' to determine if a row
>> >>>>>>> is new and by resetting the 'OVSDB_IDL_CHANGE_INSERT'
>> >>>>>>> 'change_seqno' on clear.
>> >>>>>>>
>> >>>>>>> Further to this, the code is also updated to match this
>> >>>>>>> behaviour:
>> >>>>>>>
>> >>>>>>> When a row is inserted, the 'OVSDB_IDL_CHANGE_INSERT'
>> >>>>>>> 'change_seqno' is updated to match the new database
>> >>>>>>> change_seqno. The 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno'
>> >>>>>>> is not set for inserted rows (only for updated rows).
>> >>>>>>>
>> >>>>>>> At the end of a run, ovsdb_idl_db_track_clear() should be
>> >>>>>>> called to clear all tracking information, this includes
>> >>>>>>> resetting all row 'change_seqno' to zero. This will ensure
>> >>>>>>> that subsequent runs will not see a previously 'new' row.
>> >>>>>>>
>> >>>>>>> add_tracked_change_for_references() is updated to only
>> >>>>>>> track rows that reference the current row.
>> >>>>>>>
>> >>>>>>> Signed-off-by: Mark Gray <mark.d.gray@redhat.com <mailto:mark.d.gray@redhat.com>
>> >>> <mailto:mark.d.gray@redhat.com <mailto:mark.d.gray@redhat.com>>>
>> >>>>>>> Suggested-by: Dumitru Ceara <dceara@redhat.com <mailto:dceara@redhat.com>
>> >>> <mailto:dceara@redhat.com <mailto:dceara@redhat.com>>>
>> >>>>>>> Reported-at: https://bugzilla.redhat.com/1883562
>> >>>>>>
>> >>>>>> Hi Mark,
>> >>>>>>
>> >>>>>> Thanks for working on this, it definitely looks like a nasty bug to me!
>> >>>>>>
>> >>>>>> We were lucky to not hit it in ovn-controller before. That's just
>> >>>>>> because all the "update" operations were handled there as "delete" +
>> >>>>>> "insert" but that is not mandatory and might change.>>
>> >>>>>
>> >>>>> Agree. Thanks for this critical finding! It was my bad - commit
>> >>> ca545a787ac
>> >>>>> introduced this bug, which was fixing another bug related to is_new. I
>> >>>>> should have added test cases to avoid the miss. Apologize.
>> >>>>
>> >>>> No worries, I will reference this commit in the commit message.
>> >>>>>
>> >>>>> For this fix, I don't understand why we need to check
>> >>>>> OVSDB_IDL_CHANGE_INSERT. I think the root cause is that in
>> >>>>> add_tracked_change_for_references() it updated the
>> >>> OVSDB_IDL_CHANGE_MODIFY
>> >>>>> seqno for the initial inserted *new* row, while it shouldn't. So
>> >>> should the
>> >>>>> fix only include the changes in add_tracked_change_for_references() and
>> >>>>> ovsdb_idl_row_change__()? Why do we need to change the implementation of
>> >>>>> _is_new() itself? Could you explain?
>> >>>>
>> >>>> In the case of a newly inserted row, the change_seqno triplet will still
>> >>>> be {X, 0, 0}. It will remain as such until the row is modified, at which
>> >>>> point it will become {X, Y, 0}. This means that the _*is_new() function
>> >>>> will continue to return true until the row is modified which is not
>> >>>> correct behaviour.
>> >>>
>> >>> Hmm, this is not a problem because if a row inserted in a previous run
>> >>> is not modified/deleted in the current run, it won't be added to
>> >>> tracking so no one would call _is_new() for the row. However, there will
>> >>> be a problem when a row is deleted (see below).
>> >>>
>> >>>> If we reset the change_seqno triplet on each run, it
>> >>>> will still not work. In the end, it is probably better to use the
>> >>>> OVSDB_IDL_CHANGE_INSERT element to track this.
>> >>>>
>> >>> Indeed, we should use OVSDB_IDL_CHANGE_INSERT to check _is_new() because
>> >>> if row is deleted without any modification after insertion, the
>> >>> OVSDB_IDL_CHANGE_MODIFY seqno would still be zero when it is being
>> >>> deleted. In that case calling the _is_new() function would return true
>> >>> for a deleted row, which is wrong. So, your change LGTM.
>> >>>
>> >>>> On a seperate point, I don't expect add_tracked_change_for_references()
>> >>>> will ever get called on insert as nothing will be referenced at that
>> >>>> stage? Am I correct in my understanding?
>> >>>>
>> >>> Yes, I think so. More accurately, it will get called once on the
>> >>> inserted row but will not trigger any recursive calls.
>> >>>
>> >>>>>>> ---
>> >>>>>>>  lib/ovsdb-idl.c     | 39 +++++++++++++++++++++++++++------------
>> >>>>>>>  ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> <http://ovsdb-idlc.in> |  2 +-
>> >>>>>>>  2 files changed, 28 insertions(+), 13 deletions(-)
>> >>>>>>>
>> >>>>>>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
>> >>>>>>> index d8f221ca6..3cfd73871 100644
>> >>>>>>> --- a/lib/ovsdb-idl.c
>> >>>>>>> +++ b/lib/ovsdb-idl.c
>> >>>>>>> @@ -1959,6 +1959,11 @@ ovsdb_idl_db_track_clear(struct
>> >>> ovsdb_idl_db *db)
>> >>>>>>>                      free(row->updated);
>> >>>>>>>                      row->updated = NULL;
>> >>>>>>>                  }
>> >>>>>>> +
>> >>>>>>> +                row->change_seqno[OVSDB_IDL_CHANGE_INSERT] =
>> >>>>>>> +                    row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] =
>> >>>>>>> +                    row->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0;
>> >>>>>>> +
>> >>>>>>>                  ovs_list_remove(&row->track_node);
>> >>>>>>>                  ovs_list_init(&row->track_node);
>> >>>>>>>                  if (ovsdb_idl_row_is_orphan(row) &&
>> >>>>> row->tracked_old_datum) {
>> >>>>>>> @@ -2684,24 +2689,27 @@ ovsdb_idl_process_update2(struct
>> >>>>> ovsdb_idl_table *table,
>> >>>>>>>      return OVSDB_IDL_UPDATE_DB_CHANGED;
>> >>>>>>>  }
>> >>>>>>>
>> >>>>>>> -/* Recursively add rows to tracked change lists for current row
>> >>>>>>> - * and the rows that reference this row. */
>> >>>>>>> +/* Recursively add rows to tracked change lists for all rows that
>> >>>>> reference
>> >>>>>>> +   'row'. */
>> >>>>>>>  static void
>> >>>>>>>  add_tracked_change_for_references(struct ovsdb_idl_row *row)
>> >>>>>>>  {
>> >>>>>>> -    if (ovs_list_is_empty(&row->track_node) &&
>> >>>>>>> -            ovsdb_idl_track_is_set(row->table)) {
>> >>>>>>> -        ovs_list_push_back(&row->table->track_list,
>> >>>>>>> -                           &row->track_node);
>> >>>>>>> -        row->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>> >>>>>>> -            = row->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>> >>>>>>> -            = row->table->db->change_seqno + 1;
>> >>>>>>> -
>> >>>>>>>          const struct ovsdb_idl_arc *arc;
>> >>>>>>>          LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) {
>> >>>>>>> -            add_tracked_change_for_references(arc->src);
>> >>>>>>> +            struct ovsdb_idl_row *ref = arc->src;
>> >>>>>>> +
>> >>>>>>> +            if (ovs_list_is_empty(&ref->track_node) &&
>> >>>>>>> +                ovsdb_idl_track_is_set(ref->table)) {
>> >>>>>>> +                    ovs_list_push_back(&ref->table->track_list,
>> >>>>>>> +                                       &ref->track_node);
>> >>>>>>> +
>> >>>>>>> +                ref->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>> >>>>>>> +                    =
>> >>> ref->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>> >>>>>>> +                    = ref->table->db->change_seqno + 1;
>> >>>>>>> +
>> >>>>>>> +                add_tracked_change_for_references(ref);
>> >>>>>>> +            }
>> >>>>>>>          }
>> >>>>>>> -    }
>> >>>>>>>  }
>> >>>>>>>
>> >>>>>>>
>> >>>>>>> @@ -2767,7 +2775,14 @@ ovsdb_idl_row_change__(struct ovsdb_idl_row
>> >>>>> *row, const struct json *row_json,
>> >>>>>>>                      row->change_seqno[change]
>> >>>>>>>                          = row->table->change_seqno[change]
>> >>>>>>>                          = row->table->db->change_seqno + 1;
>> >>>>>>> +
>> >>>>>>>                      if (table->modes[column_idx] & OVSDB_IDL_TRACK) {
>> >>>>>>> +                        if (ovs_list_is_empty(&row->track_node) &&
>> >>>>>>> +                            ovsdb_idl_track_is_set(row->table)) {
>> >>>>>>> +                          
>> >>>  ovs_list_push_back(&row->table->track_list,
>> >>>>>>> +                                               &row->track_node);
>> >>>>>>> +                        }
>> >>>>>>> +
>> >>>>>>>                          add_tracked_change_for_references(row);
>> >>>>>>>                          if (!row->updated) {
>> >>>>>>>                              row->updated =
>> >>>>> bitmap_allocate(class->n_columns);
>> >>>>>>> diff --git a/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> <http://ovsdb-idlc.in>
>> >>> b/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> <http://ovsdb-idlc.in>
>> >>>>>>> index 698fe25f3..89c3eb682 100755
>> >>>>>>> --- a/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> <http://ovsdb-idlc.in>
>> >>>>>>> +++ b/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> <http://ovsdb-idlc.in>
>> >>>>>>> @@ -282,7 +282,7 @@ const struct %(s)s
>> >>>>> *%(s)s_table_track_get_first(const struct %(s)s_table *);
>> >>>>>>>  /* Returns true if 'row' was inserted since the last change tracking
>> >>>>> reset. */
>> >>>>>>>  static inline bool %(s)s_is_new(const struct %(s)s *row)
>> >>>>>>>  {
>> >>>>>>> -    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_MODIFY) == 0;
>> >>>>>>> +    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_INSERT) > 0;
>> >>>>>>
>> >>>>>> This causes some issues with records created by the IDL client (e.g.,
>> >>>>>> ovn-controller). Those are created with ovsdb_idl_txn_insert() and we
>> >>>>>> don't set the OVSDB_IDL_CHANGE_INSERT change_seqno in that case.
>> >>>>>>
>> >>>>>> Essentially *_is_new() will never return true for rows inserted
>> >>> locally.
>> >>>>>>
>> >>>>> Hi Dumitru, I have a different understanding here. I think this is not a
>> >>>>> problem. Whatever changes made locally by the IDL will be discarded when
>> >>>>> committing to server and then rely on the notifications from the
>> >>> server to
>> >>>>> finally update the IDL data, which updates the change_seqno accordingly.
>> >>>>
>> >>>> Dumitru appears to be right. There is a failure on one ovn UT with this
>> >>>> change. It gets resolved by the fix that Dumitru suggests.
>> >>>>
>> >>> I am confused here. Which UT fails and what's the suggested fix? If the
>> >>> row is added locally, I don't expect it to be tracked at all in the
>> >>> current iteration (while it will be in the iteration after the
>> >>> transaction commit). Did I miss anything?
>> >>>
>> >>> Thanks,
>> >>> Han
>> >>>
>> >>
>> >> Hi Han,
>> >>
>> >> One way to see the issue is:
>> >>
>> >> make sandbox
>> >> ovn-sbctl destroy chassis .
>> >>
>> >> At this point ovn-controller continuously tries to transact insert
>> >> Chassis_Private even though Chassis_Private was only updated:
>> >>
>> >> 2020-10-08T18:42:27.784Z|00024|jsonrpc|DBG|unix:sb1.ovsdb: send request,
>> >> method="transact",
>> >>
> params=["OVN_Southbound",{"uuid-name":"row3e499288_5f8c_4859_b82a_adcc474eb114","row":{"name":"chassis-1","chassis":["named-uuid","row6534b1df_bcbb_4b91_8eb1_5bc823f1673e"]},"op":"insert","table":"Chassis_Private"},{"uuid-name":"row9d86c159_0be5_4d13_a8d6_a3414a855511","row":{"ip":"127.0.0.1","options":["map",[["csum","true"]]],"chassis_name":"chassis-1","type":"geneve"},"op":"insert","table":"Encap"},{"uuid-name":"row6534b1df_bcbb_4b91_8eb1_5bc823f1673e","row":{"name":"chassis-1","hostname":"sandbox","other_config":["map",[["datapath-type",""],["iface-types","dummy,dummy-internal,dummy-pmd,erspan,geneve,gre,gtpu,internal,ip6erspan,ip6gre,lisp,patch,stt,system,tap,vxlan"],["is-interconn","false"],["ovn-bridge-mappings",""],["ovn-chassis-mac-mappings",""],["ovn-cms-options",""],["ovn-enable-lflow-cache","true"],["ovn-monitor-all","false"]]],"encaps":["named-uuid","row9d86c159_0be5_4d13_a8d6_a3414a855511"],"external_ids":["map",[["datapath-type",""],["iface-types","dummy,dummy-internal,dummy-pmd,erspan,geneve,gre,gtpu,internal,ip6erspan,ip6gre,lisp,patch,stt,system,tap,vxlan"],["is-interconn","false"],["ovn-bridge-mappings",""],["ovn-chassis-mac-mappings",""],["ovn-cms-options",""],["ovn-enable-lflow-cache","true"],["ovn-monitor-all","false"]]]},"op":"insert","table":"Chassis"},{"comment":"ovn-controller:
>> >> registering chassis 'chassis-1'","op":"comment"}], id=14
>> >>
>> >> While, if I set OVSDB_IDL_CHANGE_INSERT in ovsdb_idl_txn_insert() and
>> >> redo the same steps as above, ovn-controller doesn't try to insert
>> >> Chassis_Private but instead it updates the record.
>> >>
>> >> The only use of _is_new() for potentially locally created records is
>> >> here afaik:
>> >> https://github.com/ovn-org/ovn/blob/9d2e8d32fb9865513b70408a665184a67564390d/controller/ovn-controller.c#L184
>> >>
> 
> Hi Dumitru and Mark,
> 
> Thanks for this information and sorry for the delayed response. As discussed in the last OVN meeting, I think the original design of _is_new and _is_deleted were only for evaluating against tracked changes, which always come from server updates.
> 
> If we make the change so that _is_new() can be used to evaluate against local (uncommitted) changes, it may work but can be confusing. If a row R is inserted by current transaction locally, _is_new(R) returns true, but when the transaction is committed the local copy will be discarded, and if the commit is successful the IDL will finally receive a server notification and the _is_new(R) evaluation will return true again. I think this could cause problems for the code that relies on _is_new(R). Moreover, if we update the OVSDB_IDL_CHANGE_INSERT seq for locally inserted rows, we may need to do the same for locally modified/deleted rows, to be consistent, which needs more careful considerations for the implications of the change.
> 
> Because of the above concerns, I'd rather suggest to stick to the original design (maybe document it more clearly) that the functions used only for tracked changes.

One even bigger concern from my side is what process should not take any actions
until ovsdb-server accepts the change.  Otherwise it might be very tricky to
handle rejected transactions.  Even if it might be not a likely event in OVN case.

> 
> For the only place the _is_new() currently used for evaluating non-tracked change [0], it was introduced in [1], which we have discussed earlier that it is better to avoid the complex logic for handling system-id change, but let the operator to follow the steps of completely deleting and re-adding a chassis [2]. So I guess this kind of usage would not be necessary at least for existing use cases.
> 
> P.S. Ilya found an old patch that I submitted a year ago [3] but never merged for fixing the _is_new() problem (that I've forgotten myself), which is similar to v1 but didn't use the "INSERT" seqno. I think the current patch (v1) from Mark is better, so @Ilya Maximets <mailto:i.maximets@ovn.org> please ignore my old patch.

OK.  I'll mark it as superseded.

> 
> [0] https://github.com/ovn-org/ovn/blob/9d2e8d32fb9865513b70408a665184a67564390d/controller/ovn-controller.c#L184
> [1] commit dce1af31b5 ("chassis: Fix chassis_private record updates when the system-id changes.")
> [2] https://mail.openvswitch.org/pipermail/ovs-dev/2020-September/374801.html
> [3] https://patchwork.ozlabs.org/project/openvswitch/patch/1564100775-80657-1-git-send-email-hzhou8@ebay.com/
> 
> Thanks,
> Han
> 
>> >> I didn't dig further though.
>> >
>> > Sorry for the delay. The following test fails:
>> >
>> >  90: ovn -- ensure one gw controller restart in HA doesn't bounce the
>> > master FAILED (ovs-macros.at:220 <http://ovs-macros.at:220>)
>> >
>> > <snip>
>> > ovn.at:12213 <http://ovn.at:12213>: waiting until test 1 = `ovn-sbctl --bare --columns=_uuid
>> > find Chassis name=gw2 | wc -l`...
>> > ovn.at:12213 <http://ovn.at:12213>: wait failed after 30 seconds
>> > ./ovs-macros.at:220 <http://ovs-macros.at:220>: hard failure
>> > 90. ovn.at:12139 <http://ovn.at:12139>: 90. ovn -- ensure one gw controller restart in HA
>> > doesn't bounce the master (ovn.at:12139 <http://ovn.at:12139>): FAILED (ovs-macros.at:220 <http://ovs-macros.at:220>)
>> > <snip>
>> >
>> > This is similar failure as the repoducer highlighted by Dumitru above.
>> > It is resolved by:
>> >
>> > +++ b/lib/ovsdb-idl.c
>> > @@ -4858,6 +4858,11 @@ ovsdb_idl_txn_insert(struct ovsdb_idl_txn *txn,
>> >      hmap_insert(&row->table->rows, &row->hmap_node, uuid_hash(&row->uuid));
>> >      hmap_insert(&txn->txn_rows, &row->txn_node, uuid_hash(&row->uuid));
>> >      ovsdb_idl_add_to_indexes(row);
>> > +
>> > +    row->change_seqno[OVSDB_IDL_CHANGE_INSERT]
>> > +        = row->table->change_seqno[OVSDB_IDL_CHANGE_INSERT]
>> > +        = row->table->db->change_seqno + 1;
>> > +
>> >
>> >
>> >>
>> >> Regards,
>> >> Dumitru
>> >>
>> >
>>
>> v2 published at
>> https://mail.openvswitch.org/pipermail/ovs-dev/2020-October/375962.html
Dumitru Ceara Oct. 19, 2020, 6:59 p.m. UTC | #12
On 10/19/20 8:42 PM, Ilya Maximets wrote:
> On 10/19/20 8:22 PM, Han Zhou wrote:
>>
>>
>> On Mon, Oct 12, 2020 at 7:32 AM Mark Gray <mark.d.gray@redhat.com <mailto:mark.d.gray@redhat.com>> wrote:
>>>
>>> On 10/10/2020 16:42, Mark Gray wrote:
>>>> On 08/10/2020 19:57, Dumitru Ceara wrote:
>>>>> On 10/2/20 1:13 AM, Han Zhou wrote:
>>>>>>
>>>>>>
>>>>>> On Thu, Oct 1, 2020 at 2:30 AM Mark Gray <mark.d.gray@redhat.com <mailto:mark.d.gray@redhat.com>
>>>>>> <mailto:mark.d.gray@redhat.com <mailto:mark.d.gray@redhat.com>>> wrote:
>>>>>>>
>>>>>>> On 30/09/2020 21:04, Han Zhou wrote:
>>>>>>>> On Wed, Sep 30, 2020 at 2:21 AM Dumitru Ceara <dceara@redhat.com <mailto:dceara@redhat.com>
>>>>>> <mailto:dceara@redhat.com <mailto:dceara@redhat.com>>> wrote:
>>>>>>>>>
>>>>>>>>> On 9/29/20 8:34 PM, Mark Gray wrote:
>>>>>>>>>> Currently all functions of the type *_is_new() always return
>>>>>>>>>> 'false'. This patch resolves this issue by using the
>>>>>>>>>> 'OVSDB_IDL_CHANGE_INSERT' 'change_seqno' instead of the
>>>>>>>>>> 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno' to determine if a row
>>>>>>>>>> is new and by resetting the 'OVSDB_IDL_CHANGE_INSERT'
>>>>>>>>>> 'change_seqno' on clear.
>>>>>>>>>>
>>>>>>>>>> Further to this, the code is also updated to match this
>>>>>>>>>> behaviour:
>>>>>>>>>>
>>>>>>>>>> When a row is inserted, the 'OVSDB_IDL_CHANGE_INSERT'
>>>>>>>>>> 'change_seqno' is updated to match the new database
>>>>>>>>>> change_seqno. The 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno'
>>>>>>>>>> is not set for inserted rows (only for updated rows).
>>>>>>>>>>
>>>>>>>>>> At the end of a run, ovsdb_idl_db_track_clear() should be
>>>>>>>>>> called to clear all tracking information, this includes
>>>>>>>>>> resetting all row 'change_seqno' to zero. This will ensure
>>>>>>>>>> that subsequent runs will not see a previously 'new' row.
>>>>>>>>>>
>>>>>>>>>> add_tracked_change_for_references() is updated to only
>>>>>>>>>> track rows that reference the current row.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Mark Gray <mark.d.gray@redhat.com <mailto:mark.d.gray@redhat.com>
>>>>>> <mailto:mark.d.gray@redhat.com <mailto:mark.d.gray@redhat.com>>>
>>>>>>>>>> Suggested-by: Dumitru Ceara <dceara@redhat.com <mailto:dceara@redhat.com>
>>>>>> <mailto:dceara@redhat.com <mailto:dceara@redhat.com>>>
>>>>>>>>>> Reported-at: https://bugzilla.redhat.com/1883562
>>>>>>>>>
>>>>>>>>> Hi Mark,
>>>>>>>>>
>>>>>>>>> Thanks for working on this, it definitely looks like a nasty bug to me!
>>>>>>>>>
>>>>>>>>> We were lucky to not hit it in ovn-controller before. That's just
>>>>>>>>> because all the "update" operations were handled there as "delete" +
>>>>>>>>> "insert" but that is not mandatory and might change.>>
>>>>>>>>
>>>>>>>> Agree. Thanks for this critical finding! It was my bad - commit
>>>>>> ca545a787ac
>>>>>>>> introduced this bug, which was fixing another bug related to is_new. I
>>>>>>>> should have added test cases to avoid the miss. Apologize.
>>>>>>>
>>>>>>> No worries, I will reference this commit in the commit message.
>>>>>>>>
>>>>>>>> For this fix, I don't understand why we need to check
>>>>>>>> OVSDB_IDL_CHANGE_INSERT. I think the root cause is that in
>>>>>>>> add_tracked_change_for_references() it updated the
>>>>>> OVSDB_IDL_CHANGE_MODIFY
>>>>>>>> seqno for the initial inserted *new* row, while it shouldn't. So
>>>>>> should the
>>>>>>>> fix only include the changes in add_tracked_change_for_references() and
>>>>>>>> ovsdb_idl_row_change__()? Why do we need to change the implementation of
>>>>>>>> _is_new() itself? Could you explain?
>>>>>>>
>>>>>>> In the case of a newly inserted row, the change_seqno triplet will still
>>>>>>> be {X, 0, 0}. It will remain as such until the row is modified, at which
>>>>>>> point it will become {X, Y, 0}. This means that the _*is_new() function
>>>>>>> will continue to return true until the row is modified which is not
>>>>>>> correct behaviour.
>>>>>>
>>>>>> Hmm, this is not a problem because if a row inserted in a previous run
>>>>>> is not modified/deleted in the current run, it won't be added to
>>>>>> tracking so no one would call _is_new() for the row. However, there will
>>>>>> be a problem when a row is deleted (see below).
>>>>>>
>>>>>>> If we reset the change_seqno triplet on each run, it
>>>>>>> will still not work. In the end, it is probably better to use the
>>>>>>> OVSDB_IDL_CHANGE_INSERT element to track this.
>>>>>>>
>>>>>> Indeed, we should use OVSDB_IDL_CHANGE_INSERT to check _is_new() because
>>>>>> if row is deleted without any modification after insertion, the
>>>>>> OVSDB_IDL_CHANGE_MODIFY seqno would still be zero when it is being
>>>>>> deleted. In that case calling the _is_new() function would return true
>>>>>> for a deleted row, which is wrong. So, your change LGTM.
>>>>>>
>>>>>>> On a seperate point, I don't expect add_tracked_change_for_references()
>>>>>>> will ever get called on insert as nothing will be referenced at that
>>>>>>> stage? Am I correct in my understanding?
>>>>>>>
>>>>>> Yes, I think so. More accurately, it will get called once on the
>>>>>> inserted row but will not trigger any recursive calls.
>>>>>>
>>>>>>>>>> ---
>>>>>>>>>>  lib/ovsdb-idl.c     | 39 +++++++++++++++++++++++++++------------
>>>>>>>>>>  ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> <http://ovsdb-idlc.in> |  2 +-
>>>>>>>>>>  2 files changed, 28 insertions(+), 13 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
>>>>>>>>>> index d8f221ca6..3cfd73871 100644
>>>>>>>>>> --- a/lib/ovsdb-idl.c
>>>>>>>>>> +++ b/lib/ovsdb-idl.c
>>>>>>>>>> @@ -1959,6 +1959,11 @@ ovsdb_idl_db_track_clear(struct
>>>>>> ovsdb_idl_db *db)
>>>>>>>>>>                      free(row->updated);
>>>>>>>>>>                      row->updated = NULL;
>>>>>>>>>>                  }
>>>>>>>>>> +
>>>>>>>>>> +                row->change_seqno[OVSDB_IDL_CHANGE_INSERT] =
>>>>>>>>>> +                    row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] =
>>>>>>>>>> +                    row->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0;
>>>>>>>>>> +
>>>>>>>>>>                  ovs_list_remove(&row->track_node);
>>>>>>>>>>                  ovs_list_init(&row->track_node);
>>>>>>>>>>                  if (ovsdb_idl_row_is_orphan(row) &&
>>>>>>>> row->tracked_old_datum) {
>>>>>>>>>> @@ -2684,24 +2689,27 @@ ovsdb_idl_process_update2(struct
>>>>>>>> ovsdb_idl_table *table,
>>>>>>>>>>      return OVSDB_IDL_UPDATE_DB_CHANGED;
>>>>>>>>>>  }
>>>>>>>>>>
>>>>>>>>>> -/* Recursively add rows to tracked change lists for current row
>>>>>>>>>> - * and the rows that reference this row. */
>>>>>>>>>> +/* Recursively add rows to tracked change lists for all rows that
>>>>>>>> reference
>>>>>>>>>> +   'row'. */
>>>>>>>>>>  static void
>>>>>>>>>>  add_tracked_change_for_references(struct ovsdb_idl_row *row)
>>>>>>>>>>  {
>>>>>>>>>> -    if (ovs_list_is_empty(&row->track_node) &&
>>>>>>>>>> -            ovsdb_idl_track_is_set(row->table)) {
>>>>>>>>>> -        ovs_list_push_back(&row->table->track_list,
>>>>>>>>>> -                           &row->track_node);
>>>>>>>>>> -        row->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>>>>>>>>> -            = row->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>>>>>>>>> -            = row->table->db->change_seqno + 1;
>>>>>>>>>> -
>>>>>>>>>>          const struct ovsdb_idl_arc *arc;
>>>>>>>>>>          LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) {
>>>>>>>>>> -            add_tracked_change_for_references(arc->src);
>>>>>>>>>> +            struct ovsdb_idl_row *ref = arc->src;
>>>>>>>>>> +
>>>>>>>>>> +            if (ovs_list_is_empty(&ref->track_node) &&
>>>>>>>>>> +                ovsdb_idl_track_is_set(ref->table)) {
>>>>>>>>>> +                    ovs_list_push_back(&ref->table->track_list,
>>>>>>>>>> +                                       &ref->track_node);
>>>>>>>>>> +
>>>>>>>>>> +                ref->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>>>>>>>>> +                    =
>>>>>> ref->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
>>>>>>>>>> +                    = ref->table->db->change_seqno + 1;
>>>>>>>>>> +
>>>>>>>>>> +                add_tracked_change_for_references(ref);
>>>>>>>>>> +            }
>>>>>>>>>>          }
>>>>>>>>>> -    }
>>>>>>>>>>  }
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> @@ -2767,7 +2775,14 @@ ovsdb_idl_row_change__(struct ovsdb_idl_row
>>>>>>>> *row, const struct json *row_json,
>>>>>>>>>>                      row->change_seqno[change]
>>>>>>>>>>                          = row->table->change_seqno[change]
>>>>>>>>>>                          = row->table->db->change_seqno + 1;
>>>>>>>>>> +
>>>>>>>>>>                      if (table->modes[column_idx] & OVSDB_IDL_TRACK) {
>>>>>>>>>> +                        if (ovs_list_is_empty(&row->track_node) &&
>>>>>>>>>> +                            ovsdb_idl_track_is_set(row->table)) {
>>>>>>>>>> +                          
>>>>>>  ovs_list_push_back(&row->table->track_list,
>>>>>>>>>> +                                               &row->track_node);
>>>>>>>>>> +                        }
>>>>>>>>>> +
>>>>>>>>>>                          add_tracked_change_for_references(row);
>>>>>>>>>>                          if (!row->updated) {
>>>>>>>>>>                              row->updated =
>>>>>>>> bitmap_allocate(class->n_columns);
>>>>>>>>>> diff --git a/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> <http://ovsdb-idlc.in>
>>>>>> b/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> <http://ovsdb-idlc.in>
>>>>>>>>>> index 698fe25f3..89c3eb682 100755
>>>>>>>>>> --- a/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> <http://ovsdb-idlc.in>
>>>>>>>>>> +++ b/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> <http://ovsdb-idlc.in>
>>>>>>>>>> @@ -282,7 +282,7 @@ const struct %(s)s
>>>>>>>> *%(s)s_table_track_get_first(const struct %(s)s_table *);
>>>>>>>>>>  /* Returns true if 'row' was inserted since the last change tracking
>>>>>>>> reset. */
>>>>>>>>>>  static inline bool %(s)s_is_new(const struct %(s)s *row)
>>>>>>>>>>  {
>>>>>>>>>> -    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_MODIFY) == 0;
>>>>>>>>>> +    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_INSERT) > 0;
>>>>>>>>>
>>>>>>>>> This causes some issues with records created by the IDL client (e.g.,
>>>>>>>>> ovn-controller). Those are created with ovsdb_idl_txn_insert() and we
>>>>>>>>> don't set the OVSDB_IDL_CHANGE_INSERT change_seqno in that case.
>>>>>>>>>
>>>>>>>>> Essentially *_is_new() will never return true for rows inserted
>>>>>> locally.
>>>>>>>>>
>>>>>>>> Hi Dumitru, I have a different understanding here. I think this is not a
>>>>>>>> problem. Whatever changes made locally by the IDL will be discarded when
>>>>>>>> committing to server and then rely on the notifications from the
>>>>>> server to
>>>>>>>> finally update the IDL data, which updates the change_seqno accordingly.
>>>>>>>
>>>>>>> Dumitru appears to be right. There is a failure on one ovn UT with this
>>>>>>> change. It gets resolved by the fix that Dumitru suggests.
>>>>>>>
>>>>>> I am confused here. Which UT fails and what's the suggested fix? If the
>>>>>> row is added locally, I don't expect it to be tracked at all in the
>>>>>> current iteration (while it will be in the iteration after the
>>>>>> transaction commit). Did I miss anything?
>>>>>>
>>>>>> Thanks,
>>>>>> Han
>>>>>>
>>>>>
>>>>> Hi Han,
>>>>>
>>>>> One way to see the issue is:
>>>>>
>>>>> make sandbox
>>>>> ovn-sbctl destroy chassis .
>>>>>
>>>>> At this point ovn-controller continuously tries to transact insert
>>>>> Chassis_Private even though Chassis_Private was only updated:
>>>>>
>>>>> 2020-10-08T18:42:27.784Z|00024|jsonrpc|DBG|unix:sb1.ovsdb: send request,
>>>>> method="transact",
>>>>>
>> params=["OVN_Southbound",{"uuid-name":"row3e499288_5f8c_4859_b82a_adcc474eb114","row":{"name":"chassis-1","chassis":["named-uuid","row6534b1df_bcbb_4b91_8eb1_5bc823f1673e"]},"op":"insert","table":"Chassis_Private"},{"uuid-name":"row9d86c159_0be5_4d13_a8d6_a3414a855511","row":{"ip":"127.0.0.1","options":["map",[["csum","true"]]],"chassis_name":"chassis-1","type":"geneve"},"op":"insert","table":"Encap"},{"uuid-name":"row6534b1df_bcbb_4b91_8eb1_5bc823f1673e","row":{"name":"chassis-1","hostname":"sandbox","other_config":["map",[["datapath-type",""],["iface-types","dummy,dummy-internal,dummy-pmd,erspan,geneve,gre,gtpu,internal,ip6erspan,ip6gre,lisp,patch,stt,system,tap,vxlan"],["is-interconn","false"],["ovn-bridge-mappings",""],["ovn-chassis-mac-mappings",""],["ovn-cms-options",""],["ovn-enable-lflow-cache","true"],["ovn-monitor-all","false"]]],"encaps":["named-uuid","row9d86c159_0be5_4d13_a8d6_a3414a855511"],"external_ids":["map",[["datapath-type",""],["iface-types","dummy,dummy-internal,dummy-pmd,erspan,geneve,gre,gtpu,internal,ip6erspan,ip6gre,lisp,patch,stt,system,tap,vxlan"],["is-interconn","false"],["ovn-bridge-mappings",""],["ovn-chassis-mac-mappings",""],["ovn-cms-options",""],["ovn-enable-lflow-cache","true"],["ovn-monitor-all","false"]]]},"op":"insert","table":"Chassis"},{"comment":"ovn-controller:
>>>>> registering chassis 'chassis-1'","op":"comment"}], id=14
>>>>>
>>>>> While, if I set OVSDB_IDL_CHANGE_INSERT in ovsdb_idl_txn_insert() and
>>>>> redo the same steps as above, ovn-controller doesn't try to insert
>>>>> Chassis_Private but instead it updates the record.
>>>>>
>>>>> The only use of _is_new() for potentially locally created records is
>>>>> here afaik:
>>>>> https://github.com/ovn-org/ovn/blob/9d2e8d32fb9865513b70408a665184a67564390d/controller/ovn-controller.c#L184
>>>>>
>>
>> Hi Dumitru and Mark,
>>
>> Thanks for this information and sorry for the delayed response. As discussed in the last OVN meeting, I think the original design of _is_new and _is_deleted were only for evaluating against tracked changes, which always come from server updates.
>>
>> If we make the change so that _is_new() can be used to evaluate against local (uncommitted) changes, it may work but can be confusing. If a row R is inserted by current transaction locally, _is_new(R) returns true, but when the transaction is committed the local copy will be discarded, and if the commit is successful the IDL will finally receive a server notification and the _is_new(R) evaluation will return true again. I think this could cause problems for the code that relies on _is_new(R). Moreover, if we update the OVSDB_IDL_CHANGE_INSERT seq for locally inserted rows, we may need to do the same for locally modified/deleted rows, to be consistent, which needs more careful considerations for the implications of the change.
>>
>> Because of the above concerns, I'd rather suggest to stick to the original design (maybe document it more clearly) that the functions used only for tracked changes.
> 
> One even bigger concern from my side is what process should not take any actions
> until ovsdb-server accepts the change.  Otherwise it might be very tricky to
> handle rejected transactions.  Even if it might be not a likely event in OVN case.
> 

Actually, the single use of _is_new() on potentially locally inserted records
in ovn-controller is exactly to avoid taking actions on records that have just
been created locally without ovsdb-server accepting the transaction [0]:

https://github.com/ovn-org/ovn/blob/master/controller/ovn-controller.c#L184

We avoid using chassis->header_.uuid as a clause in the monitor condition if
chassis is not yet committed to the DB.

>>
>> For the only place the _is_new() currently used for evaluating non-tracked change [0], it was introduced in [1], which we have discussed earlier that it is better to avoid the complex logic for handling system-id change, but let the operator to follow the steps of completely deleting and re-adding a chassis [2]. So I guess this kind of usage would not be necessary at least for existing use cases.

As mentioned above, even if we refactor [1] by implementing [2] I'm not sure if
we get rid of the need of checking sbrec_chassis_is_new(chassis) in
update_sb_monitors().  I have to look again closely at that part.

Regards,
Dumitru

>>
>> P.S. Ilya found an old patch that I submitted a year ago [3] but never merged for fixing the _is_new() problem (that I've forgotten myself), which is similar to v1 but didn't use the "INSERT" seqno. I think the current patch (v1) from Mark is better, so @Ilya Maximets <mailto:i.maximets@ovn.org> please ignore my old patch.
> 
> OK.  I'll mark it as superseded.
> 
>>
>> [0] https://github.com/ovn-org/ovn/blob/9d2e8d32fb9865513b70408a665184a67564390d/controller/ovn-controller.c#L184
>> [1] commit dce1af31b5 ("chassis: Fix chassis_private record updates when the system-id changes.")
>> [2] https://mail.openvswitch.org/pipermail/ovs-dev/2020-September/374801.html
>> [3] https://patchwork.ozlabs.org/project/openvswitch/patch/1564100775-80657-1-git-send-email-hzhou8@ebay.com/
>>
>> Thanks,
>> Han
>>
>>>>> I didn't dig further though.
>>>>
>>>> Sorry for the delay. The following test fails:
>>>>
>>>>  90: ovn -- ensure one gw controller restart in HA doesn't bounce the
>>>> master FAILED (ovs-macros.at:220 <http://ovs-macros.at:220>)
>>>>
>>>> <snip>
>>>> ovn.at:12213 <http://ovn.at:12213>: waiting until test 1 = `ovn-sbctl --bare --columns=_uuid
>>>> find Chassis name=gw2 | wc -l`...
>>>> ovn.at:12213 <http://ovn.at:12213>: wait failed after 30 seconds
>>>> ./ovs-macros.at:220 <http://ovs-macros.at:220>: hard failure
>>>> 90. ovn.at:12139 <http://ovn.at:12139>: 90. ovn -- ensure one gw controller restart in HA
>>>> doesn't bounce the master (ovn.at:12139 <http://ovn.at:12139>): FAILED (ovs-macros.at:220 <http://ovs-macros.at:220>)
>>>> <snip>
>>>>
>>>> This is similar failure as the repoducer highlighted by Dumitru above.
>>>> It is resolved by:
>>>>
>>>> +++ b/lib/ovsdb-idl.c
>>>> @@ -4858,6 +4858,11 @@ ovsdb_idl_txn_insert(struct ovsdb_idl_txn *txn,
>>>>      hmap_insert(&row->table->rows, &row->hmap_node, uuid_hash(&row->uuid));
>>>>      hmap_insert(&txn->txn_rows, &row->txn_node, uuid_hash(&row->uuid));
>>>>      ovsdb_idl_add_to_indexes(row);
>>>> +
>>>> +    row->change_seqno[OVSDB_IDL_CHANGE_INSERT]
>>>> +        = row->table->change_seqno[OVSDB_IDL_CHANGE_INSERT]
>>>> +        = row->table->db->change_seqno + 1;
>>>> +
>>>>
>>>>
>>>>>
>>>>> Regards,
>>>>> Dumitru
>>>>>
>>>>
>>>
>>> v2 published at
>>> https://mail.openvswitch.org/pipermail/ovs-dev/2020-October/375962.html
>
Han Zhou Oct. 19, 2020, 9:21 p.m. UTC | #13
On Mon, Oct 19, 2020 at 12:00 PM Dumitru Ceara <dceara@redhat.com> wrote:
>
> On 10/19/20 8:42 PM, Ilya Maximets wrote:
> > On 10/19/20 8:22 PM, Han Zhou wrote:
> >>
> >>
> >> On Mon, Oct 12, 2020 at 7:32 AM Mark Gray <mark.d.gray@redhat.com
<mailto:mark.d.gray@redhat.com>> wrote:
> >>>
> >>> On 10/10/2020 16:42, Mark Gray wrote:
> >>>> On 08/10/2020 19:57, Dumitru Ceara wrote:
> >>>>> On 10/2/20 1:13 AM, Han Zhou wrote:
> >>>>>>
> >>>>>>
> >>>>>> On Thu, Oct 1, 2020 at 2:30 AM Mark Gray <mark.d.gray@redhat.com
<mailto:mark.d.gray@redhat.com>
> >>>>>> <mailto:mark.d.gray@redhat.com <mailto:mark.d.gray@redhat.com>>>
wrote:
> >>>>>>>
> >>>>>>> On 30/09/2020 21:04, Han Zhou wrote:
> >>>>>>>> On Wed, Sep 30, 2020 at 2:21 AM Dumitru Ceara <dceara@redhat.com
<mailto:dceara@redhat.com>
> >>>>>> <mailto:dceara@redhat.com <mailto:dceara@redhat.com>>> wrote:
> >>>>>>>>>
> >>>>>>>>> On 9/29/20 8:34 PM, Mark Gray wrote:
> >>>>>>>>>> Currently all functions of the type *_is_new() always return
> >>>>>>>>>> 'false'. This patch resolves this issue by using the
> >>>>>>>>>> 'OVSDB_IDL_CHANGE_INSERT' 'change_seqno' instead of the
> >>>>>>>>>> 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno' to determine if a row
> >>>>>>>>>> is new and by resetting the 'OVSDB_IDL_CHANGE_INSERT'
> >>>>>>>>>> 'change_seqno' on clear.
> >>>>>>>>>>
> >>>>>>>>>> Further to this, the code is also updated to match this
> >>>>>>>>>> behaviour:
> >>>>>>>>>>
> >>>>>>>>>> When a row is inserted, the 'OVSDB_IDL_CHANGE_INSERT'
> >>>>>>>>>> 'change_seqno' is updated to match the new database
> >>>>>>>>>> change_seqno. The 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno'
> >>>>>>>>>> is not set for inserted rows (only for updated rows).
> >>>>>>>>>>
> >>>>>>>>>> At the end of a run, ovsdb_idl_db_track_clear() should be
> >>>>>>>>>> called to clear all tracking information, this includes
> >>>>>>>>>> resetting all row 'change_seqno' to zero. This will ensure
> >>>>>>>>>> that subsequent runs will not see a previously 'new' row.
> >>>>>>>>>>
> >>>>>>>>>> add_tracked_change_for_references() is updated to only
> >>>>>>>>>> track rows that reference the current row.
> >>>>>>>>>>
> >>>>>>>>>> Signed-off-by: Mark Gray <mark.d.gray@redhat.com <mailto:
mark.d.gray@redhat.com>
> >>>>>> <mailto:mark.d.gray@redhat.com <mailto:mark.d.gray@redhat.com>>>
> >>>>>>>>>> Suggested-by: Dumitru Ceara <dceara@redhat.com <mailto:
dceara@redhat.com>
> >>>>>> <mailto:dceara@redhat.com <mailto:dceara@redhat.com>>>
> >>>>>>>>>> Reported-at: https://bugzilla.redhat.com/1883562
> >>>>>>>>>
> >>>>>>>>> Hi Mark,
> >>>>>>>>>
> >>>>>>>>> Thanks for working on this, it definitely looks like a nasty
bug to me!
> >>>>>>>>>
> >>>>>>>>> We were lucky to not hit it in ovn-controller before. That's
just
> >>>>>>>>> because all the "update" operations were handled there as
"delete" +
> >>>>>>>>> "insert" but that is not mandatory and might change.>>
> >>>>>>>>
> >>>>>>>> Agree. Thanks for this critical finding! It was my bad - commit
> >>>>>> ca545a787ac
> >>>>>>>> introduced this bug, which was fixing another bug related to
is_new. I
> >>>>>>>> should have added test cases to avoid the miss. Apologize.
> >>>>>>>
> >>>>>>> No worries, I will reference this commit in the commit message.
> >>>>>>>>
> >>>>>>>> For this fix, I don't understand why we need to check
> >>>>>>>> OVSDB_IDL_CHANGE_INSERT. I think the root cause is that in
> >>>>>>>> add_tracked_change_for_references() it updated the
> >>>>>> OVSDB_IDL_CHANGE_MODIFY
> >>>>>>>> seqno for the initial inserted *new* row, while it shouldn't. So
> >>>>>> should the
> >>>>>>>> fix only include the changes in
add_tracked_change_for_references() and
> >>>>>>>> ovsdb_idl_row_change__()? Why do we need to change the
implementation of
> >>>>>>>> _is_new() itself? Could you explain?
> >>>>>>>
> >>>>>>> In the case of a newly inserted row, the change_seqno triplet
will still
> >>>>>>> be {X, 0, 0}. It will remain as such until the row is modified,
at which
> >>>>>>> point it will become {X, Y, 0}. This means that the _*is_new()
function
> >>>>>>> will continue to return true until the row is modified which is
not
> >>>>>>> correct behaviour.
> >>>>>>
> >>>>>> Hmm, this is not a problem because if a row inserted in a previous
run
> >>>>>> is not modified/deleted in the current run, it won't be added to
> >>>>>> tracking so no one would call _is_new() for the row. However,
there will
> >>>>>> be a problem when a row is deleted (see below).
> >>>>>>
> >>>>>>> If we reset the change_seqno triplet on each run, it
> >>>>>>> will still not work. In the end, it is probably better to use the
> >>>>>>> OVSDB_IDL_CHANGE_INSERT element to track this.
> >>>>>>>
> >>>>>> Indeed, we should use OVSDB_IDL_CHANGE_INSERT to check _is_new()
because
> >>>>>> if row is deleted without any modification after insertion, the
> >>>>>> OVSDB_IDL_CHANGE_MODIFY seqno would still be zero when it is being
> >>>>>> deleted. In that case calling the _is_new() function would return
true
> >>>>>> for a deleted row, which is wrong. So, your change LGTM.
> >>>>>>
> >>>>>>> On a seperate point, I don't expect
add_tracked_change_for_references()
> >>>>>>> will ever get called on insert as nothing will be referenced at
that
> >>>>>>> stage? Am I correct in my understanding?
> >>>>>>>
> >>>>>> Yes, I think so. More accurately, it will get called once on the
> >>>>>> inserted row but will not trigger any recursive calls.
> >>>>>>
> >>>>>>>>>> ---
> >>>>>>>>>>  lib/ovsdb-idl.c     | 39
+++++++++++++++++++++++++++------------
> >>>>>>>>>>  ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> <
http://ovsdb-idlc.in> |  2 +-
> >>>>>>>>>>  2 files changed, 28 insertions(+), 13 deletions(-)
> >>>>>>>>>>
> >>>>>>>>>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
> >>>>>>>>>> index d8f221ca6..3cfd73871 100644
> >>>>>>>>>> --- a/lib/ovsdb-idl.c
> >>>>>>>>>> +++ b/lib/ovsdb-idl.c
> >>>>>>>>>> @@ -1959,6 +1959,11 @@ ovsdb_idl_db_track_clear(struct
> >>>>>> ovsdb_idl_db *db)
> >>>>>>>>>>                      free(row->updated);
> >>>>>>>>>>                      row->updated = NULL;
> >>>>>>>>>>                  }
> >>>>>>>>>> +
> >>>>>>>>>> +                row->change_seqno[OVSDB_IDL_CHANGE_INSERT] =
> >>>>>>>>>> +
 row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] =
> >>>>>>>>>> +
 row->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0;
> >>>>>>>>>> +
> >>>>>>>>>>                  ovs_list_remove(&row->track_node);
> >>>>>>>>>>                  ovs_list_init(&row->track_node);
> >>>>>>>>>>                  if (ovsdb_idl_row_is_orphan(row) &&
> >>>>>>>> row->tracked_old_datum) {
> >>>>>>>>>> @@ -2684,24 +2689,27 @@ ovsdb_idl_process_update2(struct
> >>>>>>>> ovsdb_idl_table *table,
> >>>>>>>>>>      return OVSDB_IDL_UPDATE_DB_CHANGED;
> >>>>>>>>>>  }
> >>>>>>>>>>
> >>>>>>>>>> -/* Recursively add rows to tracked change lists for current
row
> >>>>>>>>>> - * and the rows that reference this row. */
> >>>>>>>>>> +/* Recursively add rows to tracked change lists for all rows
that
> >>>>>>>> reference
> >>>>>>>>>> +   'row'. */
> >>>>>>>>>>  static void
> >>>>>>>>>>  add_tracked_change_for_references(struct ovsdb_idl_row *row)
> >>>>>>>>>>  {
> >>>>>>>>>> -    if (ovs_list_is_empty(&row->track_node) &&
> >>>>>>>>>> -            ovsdb_idl_track_is_set(row->table)) {
> >>>>>>>>>> -        ovs_list_push_back(&row->table->track_list,
> >>>>>>>>>> -                           &row->track_node);
> >>>>>>>>>> -        row->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> >>>>>>>>>> -            =
row->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> >>>>>>>>>> -            = row->table->db->change_seqno + 1;
> >>>>>>>>>> -
> >>>>>>>>>>          const struct ovsdb_idl_arc *arc;
> >>>>>>>>>>          LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) {
> >>>>>>>>>> -            add_tracked_change_for_references(arc->src);
> >>>>>>>>>> +            struct ovsdb_idl_row *ref = arc->src;
> >>>>>>>>>> +
> >>>>>>>>>> +            if (ovs_list_is_empty(&ref->track_node) &&
> >>>>>>>>>> +                ovsdb_idl_track_is_set(ref->table)) {
> >>>>>>>>>> +
 ovs_list_push_back(&ref->table->track_list,
> >>>>>>>>>> +                                       &ref->track_node);
> >>>>>>>>>> +
> >>>>>>>>>> +                ref->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> >>>>>>>>>> +                    =
> >>>>>> ref->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
> >>>>>>>>>> +                    = ref->table->db->change_seqno + 1;
> >>>>>>>>>> +
> >>>>>>>>>> +                add_tracked_change_for_references(ref);
> >>>>>>>>>> +            }
> >>>>>>>>>>          }
> >>>>>>>>>> -    }
> >>>>>>>>>>  }
> >>>>>>>>>>
> >>>>>>>>>>
> >>>>>>>>>> @@ -2767,7 +2775,14 @@ ovsdb_idl_row_change__(struct
ovsdb_idl_row
> >>>>>>>> *row, const struct json *row_json,
> >>>>>>>>>>                      row->change_seqno[change]
> >>>>>>>>>>                          = row->table->change_seqno[change]
> >>>>>>>>>>                          = row->table->db->change_seqno + 1;
> >>>>>>>>>> +
> >>>>>>>>>>                      if (table->modes[column_idx] &
OVSDB_IDL_TRACK) {
> >>>>>>>>>> +                        if
(ovs_list_is_empty(&row->track_node) &&
> >>>>>>>>>> +
 ovsdb_idl_track_is_set(row->table)) {
> >>>>>>>>>> +
> >>>>>>  ovs_list_push_back(&row->table->track_list,
> >>>>>>>>>> +
&row->track_node);
> >>>>>>>>>> +                        }
> >>>>>>>>>> +
> >>>>>>>>>>
 add_tracked_change_for_references(row);
> >>>>>>>>>>                          if (!row->updated) {
> >>>>>>>>>>                              row->updated =
> >>>>>>>> bitmap_allocate(class->n_columns);
> >>>>>>>>>> diff --git a/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> <
http://ovsdb-idlc.in>
> >>>>>> b/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> <http://ovsdb-idlc.in>
> >>>>>>>>>> index 698fe25f3..89c3eb682 100755
> >>>>>>>>>> --- a/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> <
http://ovsdb-idlc.in>
> >>>>>>>>>> +++ b/ovsdb/ovsdb-idlc.in <http://ovsdb-idlc.in> <
http://ovsdb-idlc.in>
> >>>>>>>>>> @@ -282,7 +282,7 @@ const struct %(s)s
> >>>>>>>> *%(s)s_table_track_get_first(const struct %(s)s_table *);
> >>>>>>>>>>  /* Returns true if 'row' was inserted since the last change
tracking
> >>>>>>>> reset. */
> >>>>>>>>>>  static inline bool %(s)s_is_new(const struct %(s)s *row)
> >>>>>>>>>>  {
> >>>>>>>>>> -    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_MODIFY)
== 0;
> >>>>>>>>>> +    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_INSERT)
> 0;
> >>>>>>>>>
> >>>>>>>>> This causes some issues with records created by the IDL client
(e.g.,
> >>>>>>>>> ovn-controller). Those are created with ovsdb_idl_txn_insert()
and we
> >>>>>>>>> don't set the OVSDB_IDL_CHANGE_INSERT change_seqno in that case.
> >>>>>>>>>
> >>>>>>>>> Essentially *_is_new() will never return true for rows inserted
> >>>>>> locally.
> >>>>>>>>>
> >>>>>>>> Hi Dumitru, I have a different understanding here. I think this
is not a
> >>>>>>>> problem. Whatever changes made locally by the IDL will be
discarded when
> >>>>>>>> committing to server and then rely on the notifications from the
> >>>>>> server to
> >>>>>>>> finally update the IDL data, which updates the change_seqno
accordingly.
> >>>>>>>
> >>>>>>> Dumitru appears to be right. There is a failure on one ovn UT
with this
> >>>>>>> change. It gets resolved by the fix that Dumitru suggests.
> >>>>>>>
> >>>>>> I am confused here. Which UT fails and what's the suggested fix?
If the
> >>>>>> row is added locally, I don't expect it to be tracked at all in the
> >>>>>> current iteration (while it will be in the iteration after the
> >>>>>> transaction commit). Did I miss anything?
> >>>>>>
> >>>>>> Thanks,
> >>>>>> Han
> >>>>>>
> >>>>>
> >>>>> Hi Han,
> >>>>>
> >>>>> One way to see the issue is:
> >>>>>
> >>>>> make sandbox
> >>>>> ovn-sbctl destroy chassis .
> >>>>>
> >>>>> At this point ovn-controller continuously tries to transact insert
> >>>>> Chassis_Private even though Chassis_Private was only updated:
> >>>>>
> >>>>> 2020-10-08T18:42:27.784Z|00024|jsonrpc|DBG|unix:sb1.ovsdb: send
request,
> >>>>> method="transact",
> >>>>>
> >>
params=["OVN_Southbound",{"uuid-name":"row3e499288_5f8c_4859_b82a_adcc474eb114","row":{"name":"chassis-1","chassis":["named-uuid","row6534b1df_bcbb_4b91_8eb1_5bc823f1673e"]},"op":"insert","table":"Chassis_Private"},{"uuid-name":"row9d86c159_0be5_4d13_a8d6_a3414a855511","row":{"ip":"127.0.0.1","options":["map",[["csum","true"]]],"chassis_name":"chassis-1","type":"geneve"},"op":"insert","table":"Encap"},{"uuid-name":"row6534b1df_bcbb_4b91_8eb1_5bc823f1673e","row":{"name":"chassis-1","hostname":"sandbox","other_config":["map",[["datapath-type",""],["iface-types","dummy,dummy-internal,dummy-pmd,erspan,geneve,gre,gtpu,internal,ip6erspan,ip6gre,lisp,patch,stt,system,tap,vxlan"],["is-interconn","false"],["ovn-bridge-mappings",""],["ovn-chassis-mac-mappings",""],["ovn-cms-options",""],["ovn-enable-lflow-cache","true"],["ovn-monitor-all","false"]]],"encaps":["named-uuid","row9d86c159_0be5_4d13_a8d6_a3414a855511"],"external_ids":["map",[["datapath-type",""],["iface-types","dummy,dummy-
 internal
 ,dummy-pmd,erspan,geneve,gre,gtpu,internal,ip6erspan,ip6gre,lisp,patch,stt,system,tap,vxlan"],["is-interconn","false"],["ovn-bridge-mappings",""],["ovn-chassis-mac-mappings",""],["ovn-cms-options",""],["ovn-enable-lflow-cache","true"],["ovn-monitor-all","false"]]]},"op":"insert","table":"Chassis"},{"comment":"ovn-controller:
> >>>>> registering chassis 'chassis-1'","op":"comment"}], id=14
> >>>>>
> >>>>> While, if I set OVSDB_IDL_CHANGE_INSERT in ovsdb_idl_txn_insert()
and
> >>>>> redo the same steps as above, ovn-controller doesn't try to insert
> >>>>> Chassis_Private but instead it updates the record.
> >>>>>
> >>>>> The only use of _is_new() for potentially locally created records is
> >>>>> here afaik:
> >>>>>
https://github.com/ovn-org/ovn/blob/9d2e8d32fb9865513b70408a665184a67564390d/controller/ovn-controller.c#L184
> >>>>>
> >>
> >> Hi Dumitru and Mark,
> >>
> >> Thanks for this information and sorry for the delayed response. As
discussed in the last OVN meeting, I think the original design of _is_new
and _is_deleted were only for evaluating against tracked changes, which
always come from server updates.
> >>
> >> If we make the change so that _is_new() can be used to evaluate
against local (uncommitted) changes, it may work but can be confusing. If a
row R is inserted by current transaction locally, _is_new(R) returns true,
but when the transaction is committed the local copy will be discarded, and
if the commit is successful the IDL will finally receive a server
notification and the _is_new(R) evaluation will return true again. I think
this could cause problems for the code that relies on _is_new(R). Moreover,
if we update the OVSDB_IDL_CHANGE_INSERT seq for locally inserted rows, we
may need to do the same for locally modified/deleted rows, to be
consistent, which needs more careful considerations for the implications of
the change.
> >>
> >> Because of the above concerns, I'd rather suggest to stick to the
original design (maybe document it more clearly) that the functions used
only for tracked changes.
> >
> > One even bigger concern from my side is what process should not take
any actions
> > until ovsdb-server accepts the change.  Otherwise it might be very
tricky to
> > handle rejected transactions.  Even if it might be not a likely event
in OVN case.
> >
This is a valid concern, which I've thought about before. It seems OVSDB
IDL was supposed to be used in a declarative way (ideally), i.e. level
triggered instead of edge triggered. The IDL client typically runs in a
loop that takes current status of the IDL as input and computes a desired
output within the transaction and commits back to the server. This way, it
doesn't matter if a DB change is made in current transaction.

However, there are use cases when we do need to handle changes (edge
triggered), typically, in ovn-controller incremental processing.
Fundamental question is, how to handle the OVSDB changes made in current
I-P iteration. Fortunately, ovn-controller doesn't really modify too much
data in OVSDB (SB and local conf DB). For SB DB, they are only chassis
(which we don't include in incremental processing), MAC_binding and
Port_binding, and I don't see any side-effect yet, because those local
changes are not handled and they will be handled in the next iteration when
updates received from SB server, and handled through the change-tracking
(which _is_new is also used to evaluate the type of change for the tracked
records). For OVS_IDL (local conf DB), the first version of I-P doesn't do
any incremental processing (always recompute), but the recent version does
some I-P for interface changes. I think it is similar to the SB I-P, since
we don't really handle the data changed by current iteration (maybe need
more careful check for exceptions).

Other than ovn-controller I-P, I don't see any use case where we have to
rely on the edge-trigger yet. (For the current only use case for the
chassis regarding chassis-private monitoring, please see below)

>
> Actually, the single use of _is_new() on potentially locally inserted
records
> in ovn-controller is exactly to avoid taking actions on records that have
just
> been created locally without ovsdb-server accepting the transaction [0]:
>
>
https://github.com/ovn-org/ovn/blob/master/controller/ovn-controller.c#L184
>
> We avoid using chassis->header_.uuid as a clause in the monitor condition
if
> chassis is not yet committed to the DB.
>
I think using the chassis name in monitor condition should have avoided the
problem (if not considering supporting the system-id change on-the-fly).

> >>
> >> For the only place the _is_new() currently used for evaluating
non-tracked change [0], it was introduced in [1], which we have discussed
earlier that it is better to avoid the complex logic for handling system-id
change, but let the operator to follow the steps of completely deleting and
re-adding a chassis [2]. So I guess this kind of usage would not be
necessary at least for existing use cases.
>
> As mentioned above, even if we refactor [1] by implementing [2] I'm not
sure if
> we get rid of the need of checking sbrec_chassis_is_new(chassis) in
> update_sb_monitors().  I have to look again closely at that part.
>
> Regards,
> Dumitru
>
> >>
> >> P.S. Ilya found an old patch that I submitted a year ago [3] but never
merged for fixing the _is_new() problem (that I've forgotten myself), which
is similar to v1 but didn't use the "INSERT" seqno. I think the current
patch (v1) from Mark is better, so @Ilya Maximets <mailto:i.maximets@ovn.org>
please ignore my old patch.
> >
> > OK.  I'll mark it as superseded.
> >
> >>
> >> [0]
https://github.com/ovn-org/ovn/blob/9d2e8d32fb9865513b70408a665184a67564390d/controller/ovn-controller.c#L184
> >> [1] commit dce1af31b5 ("chassis: Fix chassis_private record updates
when the system-id changes.")
> >> [2]
https://mail.openvswitch.org/pipermail/ovs-dev/2020-September/374801.html
> >> [3]
https://patchwork.ozlabs.org/project/openvswitch/patch/1564100775-80657-1-git-send-email-hzhou8@ebay.com/
> >>
> >> Thanks,
> >> Han
> >>
> >>>>> I didn't dig further though.
> >>>>
> >>>> Sorry for the delay. The following test fails:
> >>>>
> >>>>  90: ovn -- ensure one gw controller restart in HA doesn't bounce the
> >>>> master FAILED (ovs-macros.at:220 <http://ovs-macros.at:220>)
> >>>>
> >>>> <snip>
> >>>> ovn.at:12213 <http://ovn.at:12213>: waiting until test 1 =
`ovn-sbctl --bare --columns=_uuid
> >>>> find Chassis name=gw2 | wc -l`...
> >>>> ovn.at:12213 <http://ovn.at:12213>: wait failed after 30 seconds
> >>>> ./ovs-macros.at:220 <http://ovs-macros.at:220>: hard failure
> >>>> 90. ovn.at:12139 <http://ovn.at:12139>: 90. ovn -- ensure one gw
controller restart in HA
> >>>> doesn't bounce the master (ovn.at:12139 <http://ovn.at:12139>):
FAILED (ovs-macros.at:220 <http://ovs-macros.at:220>)
> >>>> <snip>
> >>>>
> >>>> This is similar failure as the repoducer highlighted by Dumitru
above.
> >>>> It is resolved by:
> >>>>
> >>>> +++ b/lib/ovsdb-idl.c
> >>>> @@ -4858,6 +4858,11 @@ ovsdb_idl_txn_insert(struct ovsdb_idl_txn
*txn,
> >>>>      hmap_insert(&row->table->rows, &row->hmap_node,
uuid_hash(&row->uuid));
> >>>>      hmap_insert(&txn->txn_rows, &row->txn_node,
uuid_hash(&row->uuid));
> >>>>      ovsdb_idl_add_to_indexes(row);
> >>>> +
> >>>> +    row->change_seqno[OVSDB_IDL_CHANGE_INSERT]
> >>>> +        = row->table->change_seqno[OVSDB_IDL_CHANGE_INSERT]
> >>>> +        = row->table->db->change_seqno + 1;
> >>>> +
> >>>>
> >>>>
> >>>>>
> >>>>> Regards,
> >>>>> Dumitru
> >>>>>
> >>>>
> >>>
> >>> v2 published at
> >>>
https://mail.openvswitch.org/pipermail/ovs-dev/2020-October/375962.html
> >
>
Mark Gray Oct. 20, 2020, 8:28 a.m. UTC | #14
On 19/10/2020 22:21, Han Zhou wrote:

>>>> If we make the change so that _is_new() can be used to evaluate
> against local (uncommitted) changes, it may work but can be confusing. If a
> row R is inserted by current transaction locally, _is_new(R) returns true,
> but when the transaction is committed the local copy will be discarded, and
> if the commit is successful the IDL will finally receive a server
> notification and the _is_new(R) evaluation will return true again. I think
> this could cause problems for the code that relies on _is_new(R). Moreover,
> if we update the OVSDB_IDL_CHANGE_INSERT seq for locally inserted rows, we
> may need to do the same for locally modified/deleted rows, to be
> consistent, which needs more careful considerations for the implications of
> the change.
>>>>>>>> Because of the above concerns, I'd rather suggest to stick to the
> original design (maybe document it more clearly) that the functions used
> only for tracked changes.

Thanks for the feedback.

I think I agree with your points here and Ilya's below. It is something
to keep in mind as we may need this type of functionality in the future.
However, we should be able to resolve all our issues without it at this
stage.

I will refactor this patch to remove the change that allows the use of
_is_new() on potentially locally inserted records but I will wait until
Dumitru implements the change to disallow changing the system-id
on-the-fly. Dumitru, if you need any help here, please let me know.

When this is all done, I will then resubmit the change in OVN which I
originally started on which was fixing I-P for changes in
requested-tnl-key!

>>>
>>> One even bigger concern from my side is what process should not take
> any actions
>>> until ovsdb-server accepts the change.  Otherwise it might be very
> tricky to
>>> handle rejected transactions.  Even if it might be not a likely event
> in OVN case.
>>>
> This is a valid concern, which I've thought about before. It seems OVSDB
> IDL was supposed to be used in a declarative way (ideally), i.e. level
> triggered instead of edge triggered. The IDL client typically runs in a
> loop that takes current status of the IDL as input and computes a desired
> output within the transaction and commits back to the server. This way, it
> doesn't matter if a DB change is made in current transaction.
> 
> However, there are use cases when we do need to handle changes (edge
> triggered), typically, in ovn-controller incremental processing.
> Fundamental question is, how to handle the OVSDB changes made in current
> I-P iteration. Fortunately, ovn-controller doesn't really modify too much
> data in OVSDB (SB and local conf DB). For SB DB, they are only chassis
> (which we don't include in incremental processing), MAC_binding and
> Port_binding, and I don't see any side-effect yet, because those local
> changes are not handled and they will be handled in the next iteration when
> updates received from SB server, and handled through the change-tracking
> (which _is_new is also used to evaluate the type of change for the tracked
> records). For OVS_IDL (local conf DB), the first version of I-P doesn't do
> any incremental processing (always recompute), but the recent version does
> some I-P for interface changes. I think it is similar to the SB I-P, since
> we don't really handle the data changed by current iteration (maybe need
> more careful check for exceptions).
> 
> Other than ovn-controller I-P, I don't see any use case where we have to
> rely on the edge-trigger yet. (For the current only use case for the
> chassis regarding chassis-private monitoring, please see below)
> 
>>
>> Actually, the single use of _is_new() on potentially locally inserted
> records
>> in ovn-controller is exactly to avoid taking actions on records that have
> just
>> been created locally without ovsdb-server accepting the transaction [0]:
>>
>>
> https://github.com/ovn-org/ovn/blob/master/controller/ovn-controller.c#L184
>>
>> We avoid using chassis->header_.uuid as a clause in the monitor condition
> if
>> chassis is not yet committed to the DB.
>>
> I think using the chassis name in monitor condition should have avoided the
> problem (if not considering supporting the system-id change on-the-fly).
> 
>>>>
>>>> For the only place the _is_new() currently used for evaluating
> non-tracked change [0], it was introduced in [1], which we have discussed
> earlier that it is better to avoid the complex logic for handling system-id
> change, but let the operator to follow the steps of completely deleting and
> re-adding a chassis [2]. So I guess this kind of usage would not be
> necessary at least for existing use cases.
>>
>> As mentioned above, even if we refactor [1] by implementing [2] I'm not
> sure if
>> we get rid of the need of checking sbrec_chassis_is_new(chassis) in
>> update_sb_monitors().  I have to look again closely at that part.
>>
>> Regards,
>> Dumitru
>>
>>>>
>>>> P.S. Ilya found an old patch that I submitted a year ago [3] but never
> merged for fixing the _is_new() problem (that I've forgotten myself), which
> is similar to v1 but didn't use the "INSERT" seqno. I think the current
> patch (v1) from Mark is better, so @Ilya Maximets <mailto:i.maximets@ovn.org>
> please ignore my old patch.
>>>
>>> OK.  I'll mark it as superseded.
>>>
>>>>
>>>> [0]
> https://github.com/ovn-org/ovn/blob/9d2e8d32fb9865513b70408a665184a67564390d/controller/ovn-controller.c#L184
>>>> [1] commit dce1af31b5 ("chassis: Fix chassis_private record updates
> when the system-id changes.")
>>>> [2]
> https://mail.openvswitch.org/pipermail/ovs-dev/2020-September/374801.html
>>>> [3]
> https://patchwork.ozlabs.org/project/openvswitch/patch/1564100775-80657-1-git-send-email-hzhou8@ebay.com/
>>>>
>>>> Thanks,
>>>> Han
>>>>
>>>>>>> I didn't dig further though.
>>>>>>
>>>>>> Sorry for the delay. The following test fails:
>>>>>>
>>>>>>  90: ovn -- ensure one gw controller restart in HA doesn't bounce the
>>>>>> master FAILED (ovs-macros.at:220 <http://ovs-macros.at:220>)
>>>>>>
>>>>>> <snip>
>>>>>> ovn.at:12213 <http://ovn.at:12213>: waiting until test 1 =
> `ovn-sbctl --bare --columns=_uuid
>>>>>> find Chassis name=gw2 | wc -l`...
>>>>>> ovn.at:12213 <http://ovn.at:12213>: wait failed after 30 seconds
>>>>>> ./ovs-macros.at:220 <http://ovs-macros.at:220>: hard failure
>>>>>> 90. ovn.at:12139 <http://ovn.at:12139>: 90. ovn -- ensure one gw
> controller restart in HA
>>>>>> doesn't bounce the master (ovn.at:12139 <http://ovn.at:12139>):
> FAILED (ovs-macros.at:220 <http://ovs-macros.at:220>)
>>>>>> <snip>
>>>>>>
>>>>>> This is similar failure as the repoducer highlighted by Dumitru
> above.
>>>>>> It is resolved by:
>>>>>>
>>>>>> +++ b/lib/ovsdb-idl.c
>>>>>> @@ -4858,6 +4858,11 @@ ovsdb_idl_txn_insert(struct ovsdb_idl_txn
> *txn,
>>>>>>      hmap_insert(&row->table->rows, &row->hmap_node,
> uuid_hash(&row->uuid));
>>>>>>      hmap_insert(&txn->txn_rows, &row->txn_node,
> uuid_hash(&row->uuid));
>>>>>>      ovsdb_idl_add_to_indexes(row);
>>>>>> +
>>>>>> +    row->change_seqno[OVSDB_IDL_CHANGE_INSERT]
>>>>>> +        = row->table->change_seqno[OVSDB_IDL_CHANGE_INSERT]
>>>>>> +        = row->table->db->change_seqno + 1;
>>>>>> +
>>>>>>
>>>>>>
>>>>>>>
>>>>>>> Regards,
>>>>>>> Dumitru
>>>>>>>
>>>>>>
>>>>>
>>>>> v2 published at
>>>>>
> https://mail.openvswitch.org/pipermail/ovs-dev/2020-October/375962.html
>>>
>>
>
Dumitru Ceara Oct. 20, 2020, 9:23 a.m. UTC | #15
On 10/20/20 10:28 AM, Mark Gray wrote:
> On 19/10/2020 22:21, Han Zhou wrote:
> 
>>>>> If we make the change so that _is_new() can be used to evaluate
>> against local (uncommitted) changes, it may work but can be confusing. If a
>> row R is inserted by current transaction locally, _is_new(R) returns true,
>> but when the transaction is committed the local copy will be discarded, and
>> if the commit is successful the IDL will finally receive a server
>> notification and the _is_new(R) evaluation will return true again. I think
>> this could cause problems for the code that relies on _is_new(R). Moreover,
>> if we update the OVSDB_IDL_CHANGE_INSERT seq for locally inserted rows, we
>> may need to do the same for locally modified/deleted rows, to be
>> consistent, which needs more careful considerations for the implications of
>> the change.
>>>>>>>>> Because of the above concerns, I'd rather suggest to stick to the
>> original design (maybe document it more clearly) that the functions used
>> only for tracked changes.
> 
> Thanks for the feedback.
> 
> I think I agree with your points here and Ilya's below. It is something
> to keep in mind as we may need this type of functionality in the future.
> However, we should be able to resolve all our issues without it at this
> stage.
> 
> I will refactor this patch to remove the change that allows the use of
> _is_new() on potentially locally inserted records but I will wait until
> Dumitru implements the change to disallow changing the system-id
> on-the-fly. Dumitru, if you need any help here, please let me know.
> 

Hi Mark,

I sent a patch to remove the need for sbrec_chassis_is_new() in ovn-controller:

http://patchwork.ozlabs.org/project/ovn/patch/1603185512-8070-1-git-send-email-dceara@redhat.com/

I also tested it with the "ovsdb-idl: Fix *_is_new() IDL functions" patch
applied to OVS and OVN tests pass now.

> When this is all done, I will then resubmit the change in OVN which I
> originally started on which was fixing I-P for changes in
> requested-tnl-key!
> 
>>>>
>>>> One even bigger concern from my side is what process should not take
>> any actions
>>>> until ovsdb-server accepts the change.  Otherwise it might be very
>> tricky to
>>>> handle rejected transactions.  Even if it might be not a likely event
>> in OVN case.
>>>>
>> This is a valid concern, which I've thought about before. It seems OVSDB
>> IDL was supposed to be used in a declarative way (ideally), i.e. level
>> triggered instead of edge triggered. The IDL client typically runs in a
>> loop that takes current status of the IDL as input and computes a desired
>> output within the transaction and commits back to the server. This way, it
>> doesn't matter if a DB change is made in current transaction.
>>
>> However, there are use cases when we do need to handle changes (edge
>> triggered), typically, in ovn-controller incremental processing.
>> Fundamental question is, how to handle the OVSDB changes made in current
>> I-P iteration. Fortunately, ovn-controller doesn't really modify too much
>> data in OVSDB (SB and local conf DB). For SB DB, they are only chassis
>> (which we don't include in incremental processing), MAC_binding and
>> Port_binding, and I don't see any side-effect yet, because those local
>> changes are not handled and they will be handled in the next iteration when
>> updates received from SB server, and handled through the change-tracking
>> (which _is_new is also used to evaluate the type of change for the tracked
>> records). For OVS_IDL (local conf DB), the first version of I-P doesn't do
>> any incremental processing (always recompute), but the recent version does
>> some I-P for interface changes. I think it is similar to the SB I-P, since
>> we don't really handle the data changed by current iteration (maybe need
>> more careful check for exceptions).
>>
>> Other than ovn-controller I-P, I don't see any use case where we have to
>> rely on the edge-trigger yet. (For the current only use case for the
>> chassis regarding chassis-private monitoring, please see below)
>>
>>>
>>> Actually, the single use of _is_new() on potentially locally inserted
>> records
>>> in ovn-controller is exactly to avoid taking actions on records that have
>> just
>>> been created locally without ovsdb-server accepting the transaction [0]:
>>>
>>>
>> https://github.com/ovn-org/ovn/blob/master/controller/ovn-controller.c#L184
>>>
>>> We avoid using chassis->header_.uuid as a clause in the monitor condition
>> if
>>> chassis is not yet committed to the DB.
>>>
>> I think using the chassis name in monitor condition should have avoided the
>> problem (if not considering supporting the system-id change on-the-fly).
>>

Hi Han,

You're right, the patch I mentioned above fixes this.  We still need a change
to completely remove support for changing system-id on-the-fly and to document
how an operator can safely change system-id.  It's still on my list but I
didn't get to it yet.

Regards,
Dumitru

>>>>>
>>>>> For the only place the _is_new() currently used for evaluating
>> non-tracked change [0], it was introduced in [1], which we have discussed
>> earlier that it is better to avoid the complex logic for handling system-id
>> change, but let the operator to follow the steps of completely deleting and
>> re-adding a chassis [2]. So I guess this kind of usage would not be
>> necessary at least for existing use cases.
>>>
>>> As mentioned above, even if we refactor [1] by implementing [2] I'm not
>> sure if
>>> we get rid of the need of checking sbrec_chassis_is_new(chassis) in
>>> update_sb_monitors().  I have to look again closely at that part.
>>>
>>> Regards,
>>> Dumitru
>>>
>>>>>
>>>>> P.S. Ilya found an old patch that I submitted a year ago [3] but never
>> merged for fixing the _is_new() problem (that I've forgotten myself), which
>> is similar to v1 but didn't use the "INSERT" seqno. I think the current
>> patch (v1) from Mark is better, so @Ilya Maximets <mailto:i.maximets@ovn.org>
>> please ignore my old patch.
>>>>
>>>> OK.  I'll mark it as superseded.
>>>>
>>>>>
>>>>> [0]
>> https://github.com/ovn-org/ovn/blob/9d2e8d32fb9865513b70408a665184a67564390d/controller/ovn-controller.c#L184
>>>>> [1] commit dce1af31b5 ("chassis: Fix chassis_private record updates
>> when the system-id changes.")
>>>>> [2]
>> https://mail.openvswitch.org/pipermail/ovs-dev/2020-September/374801.html
>>>>> [3]
>> https://patchwork.ozlabs.org/project/openvswitch/patch/1564100775-80657-1-git-send-email-hzhou8@ebay.com/
>>>>>
>>>>> Thanks,
>>>>> Han
>>>>>
>>>>>>>> I didn't dig further though.
>>>>>>>
>>>>>>> Sorry for the delay. The following test fails:
>>>>>>>
>>>>>>>  90: ovn -- ensure one gw controller restart in HA doesn't bounce the
>>>>>>> master FAILED (ovs-macros.at:220 <http://ovs-macros.at:220>)
>>>>>>>
>>>>>>> <snip>
>>>>>>> ovn.at:12213 <http://ovn.at:12213>: waiting until test 1 =
>> `ovn-sbctl --bare --columns=_uuid
>>>>>>> find Chassis name=gw2 | wc -l`...
>>>>>>> ovn.at:12213 <http://ovn.at:12213>: wait failed after 30 seconds
>>>>>>> ./ovs-macros.at:220 <http://ovs-macros.at:220>: hard failure
>>>>>>> 90. ovn.at:12139 <http://ovn.at:12139>: 90. ovn -- ensure one gw
>> controller restart in HA
>>>>>>> doesn't bounce the master (ovn.at:12139 <http://ovn.at:12139>):
>> FAILED (ovs-macros.at:220 <http://ovs-macros.at:220>)
>>>>>>> <snip>
>>>>>>>
>>>>>>> This is similar failure as the repoducer highlighted by Dumitru
>> above.
>>>>>>> It is resolved by:
>>>>>>>
>>>>>>> +++ b/lib/ovsdb-idl.c
>>>>>>> @@ -4858,6 +4858,11 @@ ovsdb_idl_txn_insert(struct ovsdb_idl_txn
>> *txn,
>>>>>>>      hmap_insert(&row->table->rows, &row->hmap_node,
>> uuid_hash(&row->uuid));
>>>>>>>      hmap_insert(&txn->txn_rows, &row->txn_node,
>> uuid_hash(&row->uuid));
>>>>>>>      ovsdb_idl_add_to_indexes(row);
>>>>>>> +
>>>>>>> +    row->change_seqno[OVSDB_IDL_CHANGE_INSERT]
>>>>>>> +        = row->table->change_seqno[OVSDB_IDL_CHANGE_INSERT]
>>>>>>> +        = row->table->db->change_seqno + 1;
>>>>>>> +
>>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>> Regards,
>>>>>>>> Dumitru
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>> v2 published at
>>>>>>
>> https://mail.openvswitch.org/pipermail/ovs-dev/2020-October/375962.html
>>>>
>>>
>>
>
Mark Gray Oct. 20, 2020, 11 a.m. UTC | #16
On 20/10/2020 10:23, Dumitru Ceara wrote:
> On 10/20/20 10:28 AM, Mark Gray wrote:
>> On 19/10/2020 22:21, Han Zhou wrote:
>>
>>>>>> If we make the change so that _is_new() can be used to evaluate
>>> against local (uncommitted) changes, it may work but can be confusing. If a
>>> row R is inserted by current transaction locally, _is_new(R) returns true,
>>> but when the transaction is committed the local copy will be discarded, and
>>> if the commit is successful the IDL will finally receive a server
>>> notification and the _is_new(R) evaluation will return true again. I think
>>> this could cause problems for the code that relies on _is_new(R). Moreover,
>>> if we update the OVSDB_IDL_CHANGE_INSERT seq for locally inserted rows, we
>>> may need to do the same for locally modified/deleted rows, to be
>>> consistent, which needs more careful considerations for the implications of
>>> the change.
>>>>>>>>>> Because of the above concerns, I'd rather suggest to stick to the
>>> original design (maybe document it more clearly) that the functions used
>>> only for tracked changes.
>>
>> Thanks for the feedback.
>>
>> I think I agree with your points here and Ilya's below. It is something
>> to keep in mind as we may need this type of functionality in the future.
>> However, we should be able to resolve all our issues without it at this
>> stage.
>>
>> I will refactor this patch to remove the change that allows the use of
>> _is_new() on potentially locally inserted records but I will wait until
>> Dumitru implements the change to disallow changing the system-id
>> on-the-fly. Dumitru, if you need any help here, please let me know.
>>
> 
> Hi Mark,
> 
> I sent a patch to remove the need for sbrec_chassis_is_new() in ovn-controller:
> 
> http://patchwork.ozlabs.org/project/ovn/patch/1603185512-8070-1-git-send-email-dceara@redhat.com/
> 
> I also tested it with the "ovsdb-idl: Fix *_is_new() IDL functions" patch
> applied to OVS and OVN tests pass now.

I tested this against my v1 patch and it resolved those failures. I
think I want to spin a v3 of this patch that will remove the check in
the local case but also update the documentation somewhere stating that
_is_new() is only for tracked changes -just to make it clear - in case
anyone tries to use it in this way again.
> 
>> When this is all done, I will then resubmit the change in OVN which I
>> originally started on which was fixing I-P for changes in
>> requested-tnl-key!
>>
>>>>>
>>>>> One even bigger concern from my side is what process should not take
>>> any actions
>>>>> until ovsdb-server accepts the change.  Otherwise it might be very
>>> tricky to
>>>>> handle rejected transactions.  Even if it might be not a likely event
>>> in OVN case.
>>>>>
>>> This is a valid concern, which I've thought about before. It seems OVSDB
>>> IDL was supposed to be used in a declarative way (ideally), i.e. level
>>> triggered instead of edge triggered. The IDL client typically runs in a
>>> loop that takes current status of the IDL as input and computes a desired
>>> output within the transaction and commits back to the server. This way, it
>>> doesn't matter if a DB change is made in current transaction.
>>>
>>> However, there are use cases when we do need to handle changes (edge
>>> triggered), typically, in ovn-controller incremental processing.
>>> Fundamental question is, how to handle the OVSDB changes made in current
>>> I-P iteration. Fortunately, ovn-controller doesn't really modify too much
>>> data in OVSDB (SB and local conf DB). For SB DB, they are only chassis
>>> (which we don't include in incremental processing), MAC_binding and
>>> Port_binding, and I don't see any side-effect yet, because those local
>>> changes are not handled and they will be handled in the next iteration when
>>> updates received from SB server, and handled through the change-tracking
>>> (which _is_new is also used to evaluate the type of change for the tracked
>>> records). For OVS_IDL (local conf DB), the first version of I-P doesn't do
>>> any incremental processing (always recompute), but the recent version does
>>> some I-P for interface changes. I think it is similar to the SB I-P, since
>>> we don't really handle the data changed by current iteration (maybe need
>>> more careful check for exceptions).
>>>
>>> Other than ovn-controller I-P, I don't see any use case where we have to
>>> rely on the edge-trigger yet. (For the current only use case for the
>>> chassis regarding chassis-private monitoring, please see below)
>>>
>>>>
>>>> Actually, the single use of _is_new() on potentially locally inserted
>>> records
>>>> in ovn-controller is exactly to avoid taking actions on records that have
>>> just
>>>> been created locally without ovsdb-server accepting the transaction [0]:
>>>>
>>>>
>>> https://github.com/ovn-org/ovn/blob/master/controller/ovn-controller.c#L184
>>>>
>>>> We avoid using chassis->header_.uuid as a clause in the monitor condition
>>> if
>>>> chassis is not yet committed to the DB.
>>>>
>>> I think using the chassis name in monitor condition should have avoided the
>>> problem (if not considering supporting the system-id change on-the-fly).
>>>
> 
> Hi Han,
> 
> You're right, the patch I mentioned above fixes this.  We still need a change
> to completely remove support for changing system-id on-the-fly and to document
> how an operator can safely change system-id.  It's still on my list but I
> didn't get to it yet.
> 
> Regards,
> Dumitru
> 
>>>>>>
>>>>>> For the only place the _is_new() currently used for evaluating
>>> non-tracked change [0], it was introduced in [1], which we have discussed
>>> earlier that it is better to avoid the complex logic for handling system-id
>>> change, but let the operator to follow the steps of completely deleting and
>>> re-adding a chassis [2]. So I guess this kind of usage would not be
>>> necessary at least for existing use cases.
>>>>
>>>> As mentioned above, even if we refactor [1] by implementing [2] I'm not
>>> sure if
>>>> we get rid of the need of checking sbrec_chassis_is_new(chassis) in
>>>> update_sb_monitors().  I have to look again closely at that part.
>>>>
>>>> Regards,
>>>> Dumitru
>>>>
>>>>>>
>>>>>> P.S. Ilya found an old patch that I submitted a year ago [3] but never
>>> merged for fixing the _is_new() problem (that I've forgotten myself), which
>>> is similar to v1 but didn't use the "INSERT" seqno. I think the current
>>> patch (v1) from Mark is better, so @Ilya Maximets <mailto:i.maximets@ovn.org>
>>> please ignore my old patch.
>>>>>
>>>>> OK.  I'll mark it as superseded.
>>>>>
>>>>>>
>>>>>> [0]
>>> https://github.com/ovn-org/ovn/blob/9d2e8d32fb9865513b70408a665184a67564390d/controller/ovn-controller.c#L184
>>>>>> [1] commit dce1af31b5 ("chassis: Fix chassis_private record updates
>>> when the system-id changes.")
>>>>>> [2]
>>> https://mail.openvswitch.org/pipermail/ovs-dev/2020-September/374801.html
>>>>>> [3]
>>> https://patchwork.ozlabs.org/project/openvswitch/patch/1564100775-80657-1-git-send-email-hzhou8@ebay.com/
>>>>>>
>>>>>> Thanks,
>>>>>> Han
>>>>>>
>>>>>>>>> I didn't dig further though.
>>>>>>>>
>>>>>>>> Sorry for the delay. The following test fails:
>>>>>>>>
>>>>>>>>  90: ovn -- ensure one gw controller restart in HA doesn't bounce the
>>>>>>>> master FAILED (ovs-macros.at:220 <http://ovs-macros.at:220>)
>>>>>>>>
>>>>>>>> <snip>
>>>>>>>> ovn.at:12213 <http://ovn.at:12213>: waiting until test 1 =
>>> `ovn-sbctl --bare --columns=_uuid
>>>>>>>> find Chassis name=gw2 | wc -l`...
>>>>>>>> ovn.at:12213 <http://ovn.at:12213>: wait failed after 30 seconds
>>>>>>>> ./ovs-macros.at:220 <http://ovs-macros.at:220>: hard failure
>>>>>>>> 90. ovn.at:12139 <http://ovn.at:12139>: 90. ovn -- ensure one gw
>>> controller restart in HA
>>>>>>>> doesn't bounce the master (ovn.at:12139 <http://ovn.at:12139>):
>>> FAILED (ovs-macros.at:220 <http://ovs-macros.at:220>)
>>>>>>>> <snip>
>>>>>>>>
>>>>>>>> This is similar failure as the repoducer highlighted by Dumitru
>>> above.
>>>>>>>> It is resolved by:
>>>>>>>>
>>>>>>>> +++ b/lib/ovsdb-idl.c
>>>>>>>> @@ -4858,6 +4858,11 @@ ovsdb_idl_txn_insert(struct ovsdb_idl_txn
>>> *txn,
>>>>>>>>      hmap_insert(&row->table->rows, &row->hmap_node,
>>> uuid_hash(&row->uuid));
>>>>>>>>      hmap_insert(&txn->txn_rows, &row->txn_node,
>>> uuid_hash(&row->uuid));
>>>>>>>>      ovsdb_idl_add_to_indexes(row);
>>>>>>>> +
>>>>>>>> +    row->change_seqno[OVSDB_IDL_CHANGE_INSERT]
>>>>>>>> +        = row->table->change_seqno[OVSDB_IDL_CHANGE_INSERT]
>>>>>>>> +        = row->table->db->change_seqno + 1;
>>>>>>>> +
>>>>>>>>
>>>>>>>>
>>>>>>>>>
>>>>>>>>> Regards,
>>>>>>>>> Dumitru
>>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>> v2 published at
>>>>>>>
>>> https://mail.openvswitch.org/pipermail/ovs-dev/2020-October/375962.html
>>>>>
>>>>
>>>
>>
>
diff mbox series

Patch

diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index d8f221ca6..3cfd73871 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -1959,6 +1959,11 @@  ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
                     free(row->updated);
                     row->updated = NULL;
                 }
+
+                row->change_seqno[OVSDB_IDL_CHANGE_INSERT] =
+                    row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] =
+                    row->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0;
+
                 ovs_list_remove(&row->track_node);
                 ovs_list_init(&row->track_node);
                 if (ovsdb_idl_row_is_orphan(row) && row->tracked_old_datum) {
@@ -2684,24 +2689,27 @@  ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
     return OVSDB_IDL_UPDATE_DB_CHANGED;
 }
 
-/* Recursively add rows to tracked change lists for current row
- * and the rows that reference this row. */
+/* Recursively add rows to tracked change lists for all rows that reference
+   'row'. */
 static void
 add_tracked_change_for_references(struct ovsdb_idl_row *row)
 {
-    if (ovs_list_is_empty(&row->track_node) &&
-            ovsdb_idl_track_is_set(row->table)) {
-        ovs_list_push_back(&row->table->track_list,
-                           &row->track_node);
-        row->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
-            = row->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
-            = row->table->db->change_seqno + 1;
-
         const struct ovsdb_idl_arc *arc;
         LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) {
-            add_tracked_change_for_references(arc->src);
+            struct ovsdb_idl_row *ref = arc->src;
+
+            if (ovs_list_is_empty(&ref->track_node) &&
+                ovsdb_idl_track_is_set(ref->table)) {
+                    ovs_list_push_back(&ref->table->track_list,
+                                       &ref->track_node);
+
+                ref->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
+                    = ref->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
+                    = ref->table->db->change_seqno + 1;
+
+                add_tracked_change_for_references(ref);
+            }
         }
-    }
 }
 
 
@@ -2767,7 +2775,14 @@  ovsdb_idl_row_change__(struct ovsdb_idl_row *row, const struct json *row_json,
                     row->change_seqno[change]
                         = row->table->change_seqno[change]
                         = row->table->db->change_seqno + 1;
+
                     if (table->modes[column_idx] & OVSDB_IDL_TRACK) {
+                        if (ovs_list_is_empty(&row->track_node) &&
+                            ovsdb_idl_track_is_set(row->table)) {
+                            ovs_list_push_back(&row->table->track_list,
+                                               &row->track_node);
+                        }
+
                         add_tracked_change_for_references(row);
                         if (!row->updated) {
                             row->updated = bitmap_allocate(class->n_columns);
diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in
index 698fe25f3..89c3eb682 100755
--- a/ovsdb/ovsdb-idlc.in
+++ b/ovsdb/ovsdb-idlc.in
@@ -282,7 +282,7 @@  const struct %(s)s *%(s)s_table_track_get_first(const struct %(s)s_table *);
 /* Returns true if 'row' was inserted since the last change tracking reset. */
 static inline bool %(s)s_is_new(const struct %(s)s *row)
 {
-    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_MODIFY) == 0;
+    return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_INSERT) > 0;
 }
 
 /* Returns true if 'row' was deleted since the last change tracking reset. */