diff mbox series

[ovs-dev,v2,3/3] ovsdb-idl: Fix use-after-free when deleting orphaned rows.

Message ID 20201130164138.13982.39450.stgit@dceara.remote.csb
State Accepted
Headers show
Series ovsdb-idl: Fix IDL use-after-free and memory leak issues. | expand

Commit Message

Dumitru Ceara Nov. 30, 2020, 4:41 p.m. UTC
It's possible that the IDL client processes multiple jsonrpc updates
in a single ovsdb_idl_run().

Considering the following updates processed in a single IDL run:
1. Update row R1 from table A while R1 is also referenced by row R2 from
   table B:
   - this adds R1 to table A's track_list.
2. Delete row R1 from table A while R1 is also referenced by row R2 from
   table B:
   - because row R2 still refers to row R1, this will create an orphan
     R1.
   - at this point R1 is still in table A's hmap.

When the IDL client calls ovsdb_idl_track_clear() after it has finished
processing the tracked changes, row R1 gets freed leaving a dangling
pointer in table A's hmap.

To fix this we don't free rows in ovsdb_idl_track_clear() if they are
orphan and still referenced by other rows, i.e., the row's 'dst_arcs'
list is not empty.  Later, when all arc sources (e.g., R2) are
deleted, the orphan R1 will be cleaned up as well.

The only exception is when the whole contents of the IDL are flushed,
in ovsdb_idl_db_clear(), in which case it's safe to free all rows.

Reported-by: Ilya Maximets <i.maximets@ovn.org>
Fixes: 932104f483ef ("ovsdb-idl: Add support for change tracking.")
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
---
 lib/ovsdb-idl.c |   21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

Comments

Han Zhou Nov. 30, 2020, 7:45 p.m. UTC | #1
On Mon, Nov 30, 2020 at 8:41 AM Dumitru Ceara <dceara@redhat.com> wrote:
>
> It's possible that the IDL client processes multiple jsonrpc updates
> in a single ovsdb_idl_run().
>
> Considering the following updates processed in a single IDL run:
> 1. Update row R1 from table A while R1 is also referenced by row R2 from
>    table B:
>    - this adds R1 to table A's track_list.
> 2. Delete row R1 from table A while R1 is also referenced by row R2 from
>    table B:
>    - because row R2 still refers to row R1, this will create an orphan
>      R1.
>    - at this point R1 is still in table A's hmap.
>
> When the IDL client calls ovsdb_idl_track_clear() after it has finished
> processing the tracked changes, row R1 gets freed leaving a dangling
> pointer in table A's hmap.
>
> To fix this we don't free rows in ovsdb_idl_track_clear() if they are
> orphan and still referenced by other rows, i.e., the row's 'dst_arcs'
> list is not empty.  Later, when all arc sources (e.g., R2) are
> deleted, the orphan R1 will be cleaned up as well.
>
> The only exception is when the whole contents of the IDL are flushed,
> in ovsdb_idl_db_clear(), in which case it's safe to free all rows.
>
> Reported-by: Ilya Maximets <i.maximets@ovn.org>
> Fixes: 932104f483ef ("ovsdb-idl: Add support for change tracking.")
> Signed-off-by: Dumitru Ceara <dceara@redhat.com>
> ---
>  lib/ovsdb-idl.c |   21 ++++++++++++++++-----
>  1 file changed, 16 insertions(+), 5 deletions(-)
>
> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
> index e0c9833..0841a2e 100644
> --- a/lib/ovsdb-idl.c
> +++ b/lib/ovsdb-idl.c
> @@ -221,7 +221,7 @@ struct ovsdb_idl_db {
>      struct uuid last_id;
>  };
>
> -static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *);
> +static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *, bool
flush_all);
>  static void ovsdb_idl_db_add_column(struct ovsdb_idl_db *,
>                                      const struct ovsdb_idl_column *);
>  static void ovsdb_idl_db_omit(struct ovsdb_idl_db *,
> @@ -660,7 +660,7 @@ ovsdb_idl_db_clear(struct ovsdb_idl_db *db)
>      ovsdb_idl_row_destroy_postprocess(db);
>
>      db->cond_seqno = 0;
> -    ovsdb_idl_db_track_clear(db);
> +    ovsdb_idl_db_track_clear(db, true);
>
>      if (changed) {
>          db->change_seqno++;
> @@ -1955,7 +1955,7 @@ ovsdb_idl_track_is_updated(const struct
ovsdb_idl_row *row,
>   * loop when it is ready to do ovsdb_idl_run() again.
>   */
>  static void
> -ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
> +ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db, bool flush_all)
>  {
>      size_t i;
>
> @@ -1989,7 +1989,18 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
>                          free(row->tracked_old_datum);
>                          row->tracked_old_datum = NULL;
>                      }
> -                    free(row);
> +
> +                    /* Rows that were reused as orphan after being
processed
> +                     * for deletion are still in the table hmap and will
be
> +                     * cleaned up when their src arcs are removed.
> +                     *
> +                     * The exception is when 'destroy' is explicitly set
to
> +                     * 'true' which usually happens when the complete IDL
> +                     * contents are being flushed.
> +                     */
> +                    if (flush_all || ovs_list_is_empty(&row->dst_arcs)) {
> +                        free(row);
> +                    }

Here is a problem. The row R1 is now unparsed and datum freed. However,
when R2 is deleted, R1 will be pushed to the tracked change list again and
when the user examines it in tracked changes and accesses the old data, it
will crash.

>                  }
>              }
>          }
> @@ -2004,7 +2015,7 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
>  void
>  ovsdb_idl_track_clear(struct ovsdb_idl *idl)
>  {
> -    ovsdb_idl_db_track_clear(&idl->data);
> +    ovsdb_idl_db_track_clear(&idl->data, false);
>  }
>
>  static void
>
Ilya Maximets Nov. 30, 2020, 7:56 p.m. UTC | #2
On 11/30/20 8:45 PM, Han Zhou wrote:
> 
> 
> On Mon, Nov 30, 2020 at 8:41 AM Dumitru Ceara <dceara@redhat.com <mailto:dceara@redhat.com>> wrote:
>>
>> It's possible that the IDL client processes multiple jsonrpc updates
>> in a single ovsdb_idl_run().
>>
>> Considering the following updates processed in a single IDL run:
>> 1. Update row R1 from table A while R1 is also referenced by row R2 from
>>    table B:
>>    - this adds R1 to table A's track_list.
>> 2. Delete row R1 from table A while R1 is also referenced by row R2 from
>>    table B:
>>    - because row R2 still refers to row R1, this will create an orphan
>>      R1.
>>    - at this point R1 is still in table A's hmap.
>>
>> When the IDL client calls ovsdb_idl_track_clear() after it has finished
>> processing the tracked changes, row R1 gets freed leaving a dangling
>> pointer in table A's hmap.
>>
>> To fix this we don't free rows in ovsdb_idl_track_clear() if they are
>> orphan and still referenced by other rows, i.e., the row's 'dst_arcs'
>> list is not empty.  Later, when all arc sources (e.g., R2) are
>> deleted, the orphan R1 will be cleaned up as well.
>>
>> The only exception is when the whole contents of the IDL are flushed,
>> in ovsdb_idl_db_clear(), in which case it's safe to free all rows.
>>
>> Reported-by: Ilya Maximets <i.maximets@ovn.org <mailto:i.maximets@ovn.org>>
>> Fixes: 932104f483ef ("ovsdb-idl: Add support for change tracking.")
>> Signed-off-by: Dumitru Ceara <dceara@redhat.com <mailto:dceara@redhat.com>>
>> ---
>>  lib/ovsdb-idl.c |   21 ++++++++++++++++-----
>>  1 file changed, 16 insertions(+), 5 deletions(-)
>>
>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
>> index e0c9833..0841a2e 100644
>> --- a/lib/ovsdb-idl.c
>> +++ b/lib/ovsdb-idl.c
>> @@ -221,7 +221,7 @@ struct ovsdb_idl_db {
>>      struct uuid last_id;
>>  };
>>
>> -static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *);
>> +static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *, bool flush_all);
>>  static void ovsdb_idl_db_add_column(struct ovsdb_idl_db *,
>>                                      const struct ovsdb_idl_column *);
>>  static void ovsdb_idl_db_omit(struct ovsdb_idl_db *,
>> @@ -660,7 +660,7 @@ ovsdb_idl_db_clear(struct ovsdb_idl_db *db)
>>      ovsdb_idl_row_destroy_postprocess(db);
>>
>>      db->cond_seqno = 0;
>> -    ovsdb_idl_db_track_clear(db);
>> +    ovsdb_idl_db_track_clear(db, true);
>>
>>      if (changed) {
>>          db->change_seqno++;
>> @@ -1955,7 +1955,7 @@ ovsdb_idl_track_is_updated(const struct ovsdb_idl_row *row,
>>   * loop when it is ready to do ovsdb_idl_run() again.
>>   */
>>  static void
>> -ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
>> +ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db, bool flush_all)
>>  {
>>      size_t i;
>>
>> @@ -1989,7 +1989,18 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
>>                          free(row->tracked_old_datum);
>>                          row->tracked_old_datum = NULL;
>>                      }
>> -                    free(row);
>> +
>> +                    /* Rows that were reused as orphan after being processed
>> +                     * for deletion are still in the table hmap and will be
>> +                     * cleaned up when their src arcs are removed.
>> +                     *
>> +                     * The exception is when 'destroy' is explicitly set to
>> +                     * 'true' which usually happens when the complete IDL
>> +                     * contents are being flushed.
>> +                     */
>> +                    if (flush_all || ovs_list_is_empty(&row->dst_arcs)) {
>> +                        free(row);
>> +                    }
> 
> Here is a problem. The row R1 is now unparsed and datum freed. However, when R2 is deleted, R1 will be pushed to the tracked change list again and when the user examines it in tracked changes and accesses the old data, it will crash.

It is an ophan row and, also, tracked_old_datum already freed,
so it should not be iterable.  See:
f0d23f67954c ("ovsdb-idl: Fix iteration over tracked rows with no actual data.")

> 
>>                  }
>>              }
>>          }
>> @@ -2004,7 +2015,7 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
>>  void
>>  ovsdb_idl_track_clear(struct ovsdb_idl *idl)
>>  {
>> -    ovsdb_idl_db_track_clear(&idl->data);
>> +    ovsdb_idl_db_track_clear(&idl->data, false);
>>  }
>>
>>  static void
>>
Han Zhou Nov. 30, 2020, 8:18 p.m. UTC | #3
On Mon, Nov 30, 2020 at 11:56 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 11/30/20 8:45 PM, Han Zhou wrote:
> >
> >
> > On Mon, Nov 30, 2020 at 8:41 AM Dumitru Ceara <dceara@redhat.com
<mailto:dceara@redhat.com>> wrote:
> >>
> >> It's possible that the IDL client processes multiple jsonrpc updates
> >> in a single ovsdb_idl_run().
> >>
> >> Considering the following updates processed in a single IDL run:
> >> 1. Update row R1 from table A while R1 is also referenced by row R2
from
> >>    table B:
> >>    - this adds R1 to table A's track_list.
> >> 2. Delete row R1 from table A while R1 is also referenced by row R2
from
> >>    table B:
> >>    - because row R2 still refers to row R1, this will create an orphan
> >>      R1.
> >>    - at this point R1 is still in table A's hmap.
> >>
> >> When the IDL client calls ovsdb_idl_track_clear() after it has finished
> >> processing the tracked changes, row R1 gets freed leaving a dangling
> >> pointer in table A's hmap.
> >>
> >> To fix this we don't free rows in ovsdb_idl_track_clear() if they are
> >> orphan and still referenced by other rows, i.e., the row's 'dst_arcs'
> >> list is not empty.  Later, when all arc sources (e.g., R2) are
> >> deleted, the orphan R1 will be cleaned up as well.
> >>
> >> The only exception is when the whole contents of the IDL are flushed,
> >> in ovsdb_idl_db_clear(), in which case it's safe to free all rows.
> >>
> >> Reported-by: Ilya Maximets <i.maximets@ovn.org <mailto:
i.maximets@ovn.org>>
> >> Fixes: 932104f483ef ("ovsdb-idl: Add support for change tracking.")
> >> Signed-off-by: Dumitru Ceara <dceara@redhat.com <mailto:
dceara@redhat.com>>
> >> ---
> >>  lib/ovsdb-idl.c |   21 ++++++++++++++++-----
> >>  1 file changed, 16 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
> >> index e0c9833..0841a2e 100644
> >> --- a/lib/ovsdb-idl.c
> >> +++ b/lib/ovsdb-idl.c
> >> @@ -221,7 +221,7 @@ struct ovsdb_idl_db {
> >>      struct uuid last_id;
> >>  };
> >>
> >> -static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *);
> >> +static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *, bool
flush_all);
> >>  static void ovsdb_idl_db_add_column(struct ovsdb_idl_db *,
> >>                                      const struct ovsdb_idl_column *);
> >>  static void ovsdb_idl_db_omit(struct ovsdb_idl_db *,
> >> @@ -660,7 +660,7 @@ ovsdb_idl_db_clear(struct ovsdb_idl_db *db)
> >>      ovsdb_idl_row_destroy_postprocess(db);
> >>
> >>      db->cond_seqno = 0;
> >> -    ovsdb_idl_db_track_clear(db);
> >> +    ovsdb_idl_db_track_clear(db, true);
> >>
> >>      if (changed) {
> >>          db->change_seqno++;
> >> @@ -1955,7 +1955,7 @@ ovsdb_idl_track_is_updated(const struct
ovsdb_idl_row *row,
> >>   * loop when it is ready to do ovsdb_idl_run() again.
> >>   */
> >>  static void
> >> -ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
> >> +ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db, bool flush_all)
> >>  {
> >>      size_t i;
> >>
> >> @@ -1989,7 +1989,18 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db
*db)
> >>                          free(row->tracked_old_datum);
> >>                          row->tracked_old_datum = NULL;
> >>                      }
> >> -                    free(row);
> >> +
> >> +                    /* Rows that were reused as orphan after being
processed
> >> +                     * for deletion are still in the table hmap and
will be
> >> +                     * cleaned up when their src arcs are removed.
> >> +                     *
> >> +                     * The exception is when 'destroy' is explicitly
set to
> >> +                     * 'true' which usually happens when the complete
IDL
> >> +                     * contents are being flushed.
> >> +                     */
> >> +                    if (flush_all ||
ovs_list_is_empty(&row->dst_arcs)) {
> >> +                        free(row);
> >> +                    }
> >
> > Here is a problem. The row R1 is now unparsed and datum freed. However,
when R2 is deleted, R1 will be pushed to the tracked change list again and
when the user examines it in tracked changes and accesses the old data, it
will crash.
>
> It is an ophan row and, also, tracked_old_datum already freed,
> so it should not be iterable.  See:
> f0d23f67954c ("ovsdb-idl: Fix iteration over tracked rows with no actual
data.")
>
Thanks Ilya for the explanation. This makes sense. So the row becomes a
"pure orphaned row" after being handled here in ovsdb_idl_db_track_clear().
I wonder if some comments should be added to the code to clarify these
tricky scenarios.

Acked-by: Han Zhou <hzhou@ovn.org>

> >
> >>                  }
> >>              }
> >>          }
> >> @@ -2004,7 +2015,7 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
> >>  void
> >>  ovsdb_idl_track_clear(struct ovsdb_idl *idl)
> >>  {
> >> -    ovsdb_idl_db_track_clear(&idl->data);
> >> +    ovsdb_idl_db_track_clear(&idl->data, false);
> >>  }
> >>
> >>  static void
> >>
>
Dumitru Ceara Dec. 1, 2020, 8:36 a.m. UTC | #4
On Mon, Nov 30, 2020 at 9:18 PM Han Zhou <hzhou@ovn.org> wrote:
>
>
>
> On Mon, Nov 30, 2020 at 11:56 AM Ilya Maximets <i.maximets@ovn.org> wrote:
> >
> > On 11/30/20 8:45 PM, Han Zhou wrote:
> > >
> > >
> > > On Mon, Nov 30, 2020 at 8:41 AM Dumitru Ceara <dceara@redhat.com <mailto:dceara@redhat.com>> wrote:
> > >>
> > >> It's possible that the IDL client processes multiple jsonrpc updates
> > >> in a single ovsdb_idl_run().
> > >>
> > >> Considering the following updates processed in a single IDL run:
> > >> 1. Update row R1 from table A while R1 is also referenced by row R2 from
> > >>    table B:
> > >>    - this adds R1 to table A's track_list.
> > >> 2. Delete row R1 from table A while R1 is also referenced by row R2 from
> > >>    table B:
> > >>    - because row R2 still refers to row R1, this will create an orphan
> > >>      R1.
> > >>    - at this point R1 is still in table A's hmap.
> > >>
> > >> When the IDL client calls ovsdb_idl_track_clear() after it has finished
> > >> processing the tracked changes, row R1 gets freed leaving a dangling
> > >> pointer in table A's hmap.
> > >>
> > >> To fix this we don't free rows in ovsdb_idl_track_clear() if they are
> > >> orphan and still referenced by other rows, i.e., the row's 'dst_arcs'
> > >> list is not empty.  Later, when all arc sources (e.g., R2) are
> > >> deleted, the orphan R1 will be cleaned up as well.
> > >>
> > >> The only exception is when the whole contents of the IDL are flushed,
> > >> in ovsdb_idl_db_clear(), in which case it's safe to free all rows.
> > >>
> > >> Reported-by: Ilya Maximets <i.maximets@ovn.org <mailto:i.maximets@ovn.org>>
> > >> Fixes: 932104f483ef ("ovsdb-idl: Add support for change tracking.")
> > >> Signed-off-by: Dumitru Ceara <dceara@redhat.com <mailto:dceara@redhat.com>>
> > >> ---
> > >>  lib/ovsdb-idl.c |   21 ++++++++++++++++-----
> > >>  1 file changed, 16 insertions(+), 5 deletions(-)
> > >>
> > >> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
> > >> index e0c9833..0841a2e 100644
> > >> --- a/lib/ovsdb-idl.c
> > >> +++ b/lib/ovsdb-idl.c
> > >> @@ -221,7 +221,7 @@ struct ovsdb_idl_db {
> > >>      struct uuid last_id;
> > >>  };
> > >>
> > >> -static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *);
> > >> +static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *, bool flush_all);
> > >>  static void ovsdb_idl_db_add_column(struct ovsdb_idl_db *,
> > >>                                      const struct ovsdb_idl_column *);
> > >>  static void ovsdb_idl_db_omit(struct ovsdb_idl_db *,
> > >> @@ -660,7 +660,7 @@ ovsdb_idl_db_clear(struct ovsdb_idl_db *db)
> > >>      ovsdb_idl_row_destroy_postprocess(db);
> > >>
> > >>      db->cond_seqno = 0;
> > >> -    ovsdb_idl_db_track_clear(db);
> > >> +    ovsdb_idl_db_track_clear(db, true);
> > >>
> > >>      if (changed) {
> > >>          db->change_seqno++;
> > >> @@ -1955,7 +1955,7 @@ ovsdb_idl_track_is_updated(const struct ovsdb_idl_row *row,
> > >>   * loop when it is ready to do ovsdb_idl_run() again.
> > >>   */
> > >>  static void
> > >> -ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
> > >> +ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db, bool flush_all)
> > >>  {
> > >>      size_t i;
> > >>
> > >> @@ -1989,7 +1989,18 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
> > >>                          free(row->tracked_old_datum);
> > >>                          row->tracked_old_datum = NULL;
> > >>                      }
> > >> -                    free(row);
> > >> +
> > >> +                    /* Rows that were reused as orphan after being processed
> > >> +                     * for deletion are still in the table hmap and will be
> > >> +                     * cleaned up when their src arcs are removed.
> > >> +                     *
> > >> +                     * The exception is when 'destroy' is explicitly set to
> > >> +                     * 'true' which usually happens when the complete IDL
> > >> +                     * contents are being flushed.
> > >> +                     */
> > >> +                    if (flush_all || ovs_list_is_empty(&row->dst_arcs)) {
> > >> +                        free(row);
> > >> +                    }
> > >
> > > Here is a problem. The row R1 is now unparsed and datum freed. However, when R2 is deleted, R1 will be pushed to the tracked change list again and when the user examines it in tracked changes and accesses the old data, it will crash.
> >
> > It is an ophan row and, also, tracked_old_datum already freed,
> > so it should not be iterable.  See:
> > f0d23f67954c ("ovsdb-idl: Fix iteration over tracked rows with no actual data.")
> >
> Thanks Ilya for the explanation. This makes sense. So the row becomes a "pure orphaned row" after being handled here in ovsdb_idl_db_track_clear(). I wonder if some comments should be added to the code to clarify these tricky scenarios.
>

Hi Han, Ilya,

Thanks for the reviews!
Would the following incremental work for you to better document the behavior?

Thanks,
Dumitru

diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index 0841a2e..50d790f 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -1992,7 +1992,9 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db
*db, bool flush_all)

                     /* Rows that were reused as orphan after being processed
                      * for deletion are still in the table hmap and will be
-                     * cleaned up when their src arcs are removed.
+                     * cleaned up when their src arcs are removed.  These rows
+                     * will not be reported anymore as "deleted" to IDL
+                     * clients.
                      *
                      * The exception is when 'destroy' is explicitly set to
                      * 'true' which usually happens when the complete IDL
Han Zhou Dec. 2, 2020, 7:59 a.m. UTC | #5
On Tue, Dec 1, 2020 at 12:36 AM Dumitru Ceara <dceara@redhat.com> wrote:
>
> On Mon, Nov 30, 2020 at 9:18 PM Han Zhou <hzhou@ovn.org> wrote:
> >
> >
> >
> > On Mon, Nov 30, 2020 at 11:56 AM Ilya Maximets <i.maximets@ovn.org>
wrote:
> > >
> > > On 11/30/20 8:45 PM, Han Zhou wrote:
> > > >
> > > >
> > > > On Mon, Nov 30, 2020 at 8:41 AM Dumitru Ceara <dceara@redhat.com
<mailto:dceara@redhat.com>> wrote:
> > > >>
> > > >> It's possible that the IDL client processes multiple jsonrpc
updates
> > > >> in a single ovsdb_idl_run().
> > > >>
> > > >> Considering the following updates processed in a single IDL run:
> > > >> 1. Update row R1 from table A while R1 is also referenced by row
R2 from
> > > >>    table B:
> > > >>    - this adds R1 to table A's track_list.
> > > >> 2. Delete row R1 from table A while R1 is also referenced by row
R2 from
> > > >>    table B:
> > > >>    - because row R2 still refers to row R1, this will create an
orphan
> > > >>      R1.
> > > >>    - at this point R1 is still in table A's hmap.
> > > >>
> > > >> When the IDL client calls ovsdb_idl_track_clear() after it has
finished
> > > >> processing the tracked changes, row R1 gets freed leaving a
dangling
> > > >> pointer in table A's hmap.
> > > >>
> > > >> To fix this we don't free rows in ovsdb_idl_track_clear() if they
are
> > > >> orphan and still referenced by other rows, i.e., the row's
'dst_arcs'
> > > >> list is not empty.  Later, when all arc sources (e.g., R2) are
> > > >> deleted, the orphan R1 will be cleaned up as well.
> > > >>
> > > >> The only exception is when the whole contents of the IDL are
flushed,
> > > >> in ovsdb_idl_db_clear(), in which case it's safe to free all rows.
> > > >>
> > > >> Reported-by: Ilya Maximets <i.maximets@ovn.org <mailto:
i.maximets@ovn.org>>
> > > >> Fixes: 932104f483ef ("ovsdb-idl: Add support for change tracking.")
> > > >> Signed-off-by: Dumitru Ceara <dceara@redhat.com <mailto:
dceara@redhat.com>>
> > > >> ---
> > > >>  lib/ovsdb-idl.c |   21 ++++++++++++++++-----
> > > >>  1 file changed, 16 insertions(+), 5 deletions(-)
> > > >>
> > > >> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
> > > >> index e0c9833..0841a2e 100644
> > > >> --- a/lib/ovsdb-idl.c
> > > >> +++ b/lib/ovsdb-idl.c
> > > >> @@ -221,7 +221,7 @@ struct ovsdb_idl_db {
> > > >>      struct uuid last_id;
> > > >>  };
> > > >>
> > > >> -static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *);
> > > >> +static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *, bool
flush_all);
> > > >>  static void ovsdb_idl_db_add_column(struct ovsdb_idl_db *,
> > > >>                                      const struct ovsdb_idl_column
*);
> > > >>  static void ovsdb_idl_db_omit(struct ovsdb_idl_db *,
> > > >> @@ -660,7 +660,7 @@ ovsdb_idl_db_clear(struct ovsdb_idl_db *db)
> > > >>      ovsdb_idl_row_destroy_postprocess(db);
> > > >>
> > > >>      db->cond_seqno = 0;
> > > >> -    ovsdb_idl_db_track_clear(db);
> > > >> +    ovsdb_idl_db_track_clear(db, true);
> > > >>
> > > >>      if (changed) {
> > > >>          db->change_seqno++;
> > > >> @@ -1955,7 +1955,7 @@ ovsdb_idl_track_is_updated(const struct
ovsdb_idl_row *row,
> > > >>   * loop when it is ready to do ovsdb_idl_run() again.
> > > >>   */
> > > >>  static void
> > > >> -ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
> > > >> +ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db, bool flush_all)
> > > >>  {
> > > >>      size_t i;
> > > >>
> > > >> @@ -1989,7 +1989,18 @@ ovsdb_idl_db_track_clear(struct
ovsdb_idl_db *db)
> > > >>                          free(row->tracked_old_datum);
> > > >>                          row->tracked_old_datum = NULL;
> > > >>                      }
> > > >> -                    free(row);
> > > >> +
> > > >> +                    /* Rows that were reused as orphan after
being processed
> > > >> +                     * for deletion are still in the table hmap
and will be
> > > >> +                     * cleaned up when their src arcs are removed.
> > > >> +                     *
> > > >> +                     * The exception is when 'destroy' is
explicitly set to
> > > >> +                     * 'true' which usually happens when the
complete IDL
> > > >> +                     * contents are being flushed.
> > > >> +                     */
> > > >> +                    if (flush_all ||
ovs_list_is_empty(&row->dst_arcs)) {
> > > >> +                        free(row);
> > > >> +                    }
> > > >
> > > > Here is a problem. The row R1 is now unparsed and datum freed.
However, when R2 is deleted, R1 will be pushed to the tracked change list
again and when the user examines it in tracked changes and accesses the old
data, it will crash.
> > >
> > > It is an ophan row and, also, tracked_old_datum already freed,
> > > so it should not be iterable.  See:
> > > f0d23f67954c ("ovsdb-idl: Fix iteration over tracked rows with no
actual data.")
> > >
> > Thanks Ilya for the explanation. This makes sense. So the row becomes a
"pure orphaned row" after being handled here in ovsdb_idl_db_track_clear().
I wonder if some comments should be added to the code to clarify these
tricky scenarios.
> >
>
> Hi Han, Ilya,
>
> Thanks for the reviews!
> Would the following incremental work for you to better document the
behavior?
>
> Thanks,
> Dumitru
>
> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
> index 0841a2e..50d790f 100644
> --- a/lib/ovsdb-idl.c
> +++ b/lib/ovsdb-idl.c
> @@ -1992,7 +1992,9 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db
> *db, bool flush_all)
>
>                      /* Rows that were reused as orphan after being
processed
>                       * for deletion are still in the table hmap and will
be
> -                     * cleaned up when their src arcs are removed.
> +                     * cleaned up when their src arcs are removed.
These rows
> +                     * will not be reported anymore as "deleted" to IDL
> +                     * clients.
>                       *
>                       * The exception is when 'destroy' is explicitly set
to
>                       * 'true' which usually happens when the complete IDL
>

Thanks Dumitru. Yes this comment is helpful. In addition, I was indeed
talking about documenting the "Pure orphaned row" somewhere. This is a
tricky scenario and not easy to capture by reading the code. I only see
some explanation in commit messages. (Of course, it should be a separate
patch). I think I have acked all the patches of the series, but just in
case I forgot:

Acked-by: Han Zhou <hzhou@ovn.org>
Dumitru Ceara Dec. 2, 2020, 8:42 a.m. UTC | #6
On 12/2/20 8:59 AM, Han Zhou wrote:
> 
> 
> On Tue, Dec 1, 2020 at 12:36 AM Dumitru Ceara <dceara@redhat.com
> <mailto:dceara@redhat.com>> wrote:
>>
>> On Mon, Nov 30, 2020 at 9:18 PM Han Zhou <hzhou@ovn.org
> <mailto:hzhou@ovn.org>> wrote:
>> >
>> >
>> >
>> > On Mon, Nov 30, 2020 at 11:56 AM Ilya Maximets <i.maximets@ovn.org
> <mailto:i.maximets@ovn.org>> wrote:
>> > >
>> > > On 11/30/20 8:45 PM, Han Zhou wrote:
>> > > >
>> > > >
>> > > > On Mon, Nov 30, 2020 at 8:41 AM Dumitru Ceara <dceara@redhat.com
> <mailto:dceara@redhat.com> <mailto:dceara@redhat.com
> <mailto:dceara@redhat.com>>> wrote:
>> > > >>
>> > > >> It's possible that the IDL client processes multiple jsonrpc
> updates
>> > > >> in a single ovsdb_idl_run().
>> > > >>
>> > > >> Considering the following updates processed in a single IDL run:
>> > > >> 1. Update row R1 from table A while R1 is also referenced by
> row R2 from
>> > > >>    table B:
>> > > >>    - this adds R1 to table A's track_list.
>> > > >> 2. Delete row R1 from table A while R1 is also referenced by
> row R2 from
>> > > >>    table B:
>> > > >>    - because row R2 still refers to row R1, this will create an
> orphan
>> > > >>      R1.
>> > > >>    - at this point R1 is still in table A's hmap.
>> > > >>
>> > > >> When the IDL client calls ovsdb_idl_track_clear() after it has
> finished
>> > > >> processing the tracked changes, row R1 gets freed leaving a
> dangling
>> > > >> pointer in table A's hmap.
>> > > >>
>> > > >> To fix this we don't free rows in ovsdb_idl_track_clear() if
> they are
>> > > >> orphan and still referenced by other rows, i.e., the row's
> 'dst_arcs'
>> > > >> list is not empty.  Later, when all arc sources (e.g., R2) are
>> > > >> deleted, the orphan R1 will be cleaned up as well.
>> > > >>
>> > > >> The only exception is when the whole contents of the IDL are
> flushed,
>> > > >> in ovsdb_idl_db_clear(), in which case it's safe to free all rows.
>> > > >>
>> > > >> Reported-by: Ilya Maximets <i.maximets@ovn.org
> <mailto:i.maximets@ovn.org> <mailto:i.maximets@ovn.org
> <mailto:i.maximets@ovn.org>>>
>> > > >> Fixes: 932104f483ef ("ovsdb-idl: Add support for change tracking.")
>> > > >> Signed-off-by: Dumitru Ceara <dceara@redhat.com
> <mailto:dceara@redhat.com> <mailto:dceara@redhat.com
> <mailto:dceara@redhat.com>>>
>> > > >> ---
>> > > >>  lib/ovsdb-idl.c |   21 ++++++++++++++++-----
>> > > >>  1 file changed, 16 insertions(+), 5 deletions(-)
>> > > >>
>> > > >> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
>> > > >> index e0c9833..0841a2e 100644
>> > > >> --- a/lib/ovsdb-idl.c
>> > > >> +++ b/lib/ovsdb-idl.c
>> > > >> @@ -221,7 +221,7 @@ struct ovsdb_idl_db {
>> > > >>      struct uuid last_id;
>> > > >>  };
>> > > >>
>> > > >> -static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *);
>> > > >> +static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *,
> bool flush_all);
>> > > >>  static void ovsdb_idl_db_add_column(struct ovsdb_idl_db *,
>> > > >>                                      const struct
> ovsdb_idl_column *);
>> > > >>  static void ovsdb_idl_db_omit(struct ovsdb_idl_db *,
>> > > >> @@ -660,7 +660,7 @@ ovsdb_idl_db_clear(struct ovsdb_idl_db *db)
>> > > >>      ovsdb_idl_row_destroy_postprocess(db);
>> > > >>
>> > > >>      db->cond_seqno = 0;
>> > > >> -    ovsdb_idl_db_track_clear(db);
>> > > >> +    ovsdb_idl_db_track_clear(db, true);
>> > > >>
>> > > >>      if (changed) {
>> > > >>          db->change_seqno++;
>> > > >> @@ -1955,7 +1955,7 @@ ovsdb_idl_track_is_updated(const struct
> ovsdb_idl_row *row,
>> > > >>   * loop when it is ready to do ovsdb_idl_run() again.
>> > > >>   */
>> > > >>  static void
>> > > >> -ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
>> > > >> +ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db, bool flush_all)
>> > > >>  {
>> > > >>      size_t i;
>> > > >>
>> > > >> @@ -1989,7 +1989,18 @@ ovsdb_idl_db_track_clear(struct
> ovsdb_idl_db *db)
>> > > >>                          free(row->tracked_old_datum);
>> > > >>                          row->tracked_old_datum = NULL;
>> > > >>                      }
>> > > >> -                    free(row);
>> > > >> +
>> > > >> +                    /* Rows that were reused as orphan after
> being processed
>> > > >> +                     * for deletion are still in the table
> hmap and will be
>> > > >> +                     * cleaned up when their src arcs are removed.
>> > > >> +                     *
>> > > >> +                     * The exception is when 'destroy' is
> explicitly set to
>> > > >> +                     * 'true' which usually happens when the
> complete IDL
>> > > >> +                     * contents are being flushed.
>> > > >> +                     */
>> > > >> +                    if (flush_all ||
> ovs_list_is_empty(&row->dst_arcs)) {
>> > > >> +                        free(row);
>> > > >> +                    }
>> > > >
>> > > > Here is a problem. The row R1 is now unparsed and datum freed.
> However, when R2 is deleted, R1 will be pushed to the tracked change
> list again and when the user examines it in tracked changes and accesses
> the old data, it will crash.
>> > >
>> > > It is an ophan row and, also, tracked_old_datum already freed,
>> > > so it should not be iterable.  See:
>> > > f0d23f67954c ("ovsdb-idl: Fix iteration over tracked rows with no
> actual data.")
>> > >
>> > Thanks Ilya for the explanation. This makes sense. So the row
> becomes a "pure orphaned row" after being handled here in
> ovsdb_idl_db_track_clear(). I wonder if some comments should be added to
> the code to clarify these tricky scenarios.
>> >
>>
>> Hi Han, Ilya,
>>
>> Thanks for the reviews!
>> Would the following incremental work for you to better document the
> behavior?
>>
>> Thanks,
>> Dumitru
>>
>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
>> index 0841a2e..50d790f 100644
>> --- a/lib/ovsdb-idl.c
>> +++ b/lib/ovsdb-idl.c
>> @@ -1992,7 +1992,9 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db
>> *db, bool flush_all)
>>
>>                      /* Rows that were reused as orphan after being
> processed
>>                       * for deletion are still in the table hmap and
> will be
>> -                     * cleaned up when their src arcs are removed.
>> +                     * cleaned up when their src arcs are removed. 
> These rows
>> +                     * will not be reported anymore as "deleted" to IDL
>> +                     * clients.
>>                       *
>>                       * The exception is when 'destroy' is explicitly
> set to
>>                       * 'true' which usually happens when the complete IDL
>>
> 
> Thanks Dumitru. Yes this comment is helpful. In addition, I was indeed
> talking about documenting the "Pure orphaned row" somewhere. This is a
> tricky scenario and not easy to capture by reading the code. I only see
> some explanation in commit messages. (Of course, it should be a separate
> patch). I think I have acked all the patches of the series, but just in
> case I forgot:
> 
> Acked-by: Han Zhou <hzhou@ovn.org <mailto:hzhou@ovn.org>>

Thanks!

There's an ongoing effort from Ben to refactor the IDL in two layers
[1].  I can try to work on a doc patch once Ben's changes are merged.

Regards,
Dumitru

[1] https://patchwork.ozlabs.org/project/openvswitch/list/?series=217954
diff mbox series

Patch

diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index e0c9833..0841a2e 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -221,7 +221,7 @@  struct ovsdb_idl_db {
     struct uuid last_id;
 };
 
-static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *);
+static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *, bool flush_all);
 static void ovsdb_idl_db_add_column(struct ovsdb_idl_db *,
                                     const struct ovsdb_idl_column *);
 static void ovsdb_idl_db_omit(struct ovsdb_idl_db *,
@@ -660,7 +660,7 @@  ovsdb_idl_db_clear(struct ovsdb_idl_db *db)
     ovsdb_idl_row_destroy_postprocess(db);
 
     db->cond_seqno = 0;
-    ovsdb_idl_db_track_clear(db);
+    ovsdb_idl_db_track_clear(db, true);
 
     if (changed) {
         db->change_seqno++;
@@ -1955,7 +1955,7 @@  ovsdb_idl_track_is_updated(const struct ovsdb_idl_row *row,
  * loop when it is ready to do ovsdb_idl_run() again.
  */
 static void
-ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
+ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db, bool flush_all)
 {
     size_t i;
 
@@ -1989,7 +1989,18 @@  ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
                         free(row->tracked_old_datum);
                         row->tracked_old_datum = NULL;
                     }
-                    free(row);
+
+                    /* Rows that were reused as orphan after being processed
+                     * for deletion are still in the table hmap and will be
+                     * cleaned up when their src arcs are removed.
+                     *
+                     * The exception is when 'destroy' is explicitly set to
+                     * 'true' which usually happens when the complete IDL
+                     * contents are being flushed.
+                     */
+                    if (flush_all || ovs_list_is_empty(&row->dst_arcs)) {
+                        free(row);
+                    }
                 }
             }
         }
@@ -2004,7 +2015,7 @@  ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
 void
 ovsdb_idl_track_clear(struct ovsdb_idl *idl)
 {
-    ovsdb_idl_db_track_clear(&idl->data);
+    ovsdb_idl_db_track_clear(&idl->data, false);
 }
 
 static void