diff mbox series

[ovs-dev,5/5] ovsdb: transaction: Calculate added/removed from diff.

Message ID 20231218020249.3447973-6-i.maximets@ovn.org
State Superseded
Delegated to: Ilya Maximets
Headers show
Series ovsdb: transaction: Bug Fixes + Reuse of column diffs. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Ilya Maximets Dec. 18, 2023, 2:02 a.m. UTC
In case the difference between 'old' and 'new' rows is readily
available, it can be used to construct added/removed datums
instead.  Diffs are typically much smaller than the column
itself.  This change more than doubles the performance of a
transaction replay.

For example, with this change applied, initial read of OVSDB
file containing 136K small transactions for large OVN port
groups and address sets on my laptop takes 11 seconds vs 24
seconds without.

Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
---
 lib/ovsdb-data.c    | 26 ++++++++++++++++++++++++++
 lib/ovsdb-data.h    |  1 +
 ovsdb/transaction.c | 15 ++++++++++++---
 3 files changed, 39 insertions(+), 3 deletions(-)

Comments

Mike Pattrick Jan. 1, 2024, 6:37 a.m. UTC | #1
On Sun, Dec 17, 2023 at 9:03 PM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> In case the difference between 'old' and 'new' rows is readily
> available, it can be used to construct added/removed datums
> instead.  Diffs are typically much smaller than the column
> itself.  This change more than doubles the performance of a
> transaction replay.
>
> For example, with this change applied, initial read of OVSDB
> file containing 136K small transactions for large OVN port
> groups and address sets on my laptop takes 11 seconds vs 24
> seconds without.
>
> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
> ---
>  lib/ovsdb-data.c    | 26 ++++++++++++++++++++++++++
>  lib/ovsdb-data.h    |  1 +
>  ovsdb/transaction.c | 15 ++++++++++++---
>  3 files changed, 39 insertions(+), 3 deletions(-)
>
> diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c
> index f18f74298..f0d7bee23 100644
> --- a/lib/ovsdb-data.c
> +++ b/lib/ovsdb-data.c
> @@ -2238,6 +2238,8 @@ ovsdb_symbol_table_insert(struct ovsdb_symbol_table *symtab,
>  /* APIs for Generating and apply diffs.  */
>
>  /* Find what needs to be added to and removed from 'old' to construct 'new'.
> + * If the optional 'diff' is porvided, it can be used to speed up processing,

Provided

> + * in case it is smaller than the original 'old' and 'new'.
>   *
>   * The 'added' and 'removed' datums are always safe; the orders of keys are
>   * maintained since they are added in order.   */
> @@ -2246,6 +2248,7 @@ ovsdb_datum_added_removed(struct ovsdb_datum *added,
>                            struct ovsdb_datum *removed,
>                            const struct ovsdb_datum *old,
>                            const struct ovsdb_datum *new,
> +                          const struct ovsdb_datum *diff,
>                            const struct ovsdb_type *type)
>  {
>      size_t oi, ni;
> @@ -2258,6 +2261,29 @@ ovsdb_datum_added_removed(struct ovsdb_datum *added,
>          return;
>      }
>
> +    /* Use diff, if provided, unless it's comparable in size. */
> +    if (diff && diff->n * 2 < old->n + new->n) {

I guess the motivation for the size check is that the old algorithm is
O(n) and the diff algorithm is O(n lg n)? If so it may be worthwhile
to note that in the comment, it wasn't initially clear to me. Or am I
misunderstanding this?

> +        unsigned int idx;
> +
> +        for (size_t di = 0; di < diff->n; di++) {
> +            bool found = ovsdb_datum_find_key(old, &diff->keys[di],
> +                                              type->key.type, &idx);
> +
> +            if (!found) {
> +                ovsdb_datum_add_from_index_unsafe(added, diff, di, type);
> +            } else {
> +                if (type->value.type != OVSDB_TYPE_VOID
> +                    && !ovsdb_atom_equals(&diff->values[di],
> +                                          &old->values[idx],
> +                                          type->value.type)) {
> +                    ovsdb_datum_add_from_index_unsafe(added, diff, di, type);
> +                }
> +                ovsdb_datum_add_from_index_unsafe(removed, old, idx, type);
> +            }
> +        }
> +        return;
> +    }
> +
>      /* Generate the diff in O(n) time. */
>      for (oi = ni = 0; oi < old->n && ni < new->n;) {
>          int c = ovsdb_atom_compare_3way(&old->keys[oi], &new->keys[ni],
> diff --git a/lib/ovsdb-data.h b/lib/ovsdb-data.h
> index f048a8cb0..c0408ee49 100644
> --- a/lib/ovsdb-data.h
> +++ b/lib/ovsdb-data.h
> @@ -256,6 +256,7 @@ void ovsdb_datum_added_removed(struct ovsdb_datum *added,
>                                 struct ovsdb_datum *removed,
>                                 const struct ovsdb_datum *old,
>                                 const struct ovsdb_datum *new,
> +                               const struct ovsdb_datum *diff,
>                                 const struct ovsdb_type *type);
>
>  void ovsdb_datum_diff(struct ovsdb_datum *diff,
> diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c
> index b69d03b5a..e1a87bb74 100644
> --- a/ovsdb/transaction.c
> +++ b/ovsdb/transaction.c
> @@ -347,12 +347,13 @@ update_row_ref_count(struct ovsdb_txn *txn, struct ovsdb_txn_row *r)
>                      return error;
>                  }
>              } else if (r->old && r->new) {
> -                struct ovsdb_datum added, removed;
> +                struct ovsdb_datum added, removed, *diff;
>
> +                diff = (r->diff) ? &r->diff->fields[column->index] : NULL;
>                  ovsdb_datum_added_removed(&added, &removed,
>                                            &r->old->fields[column->index],
>                                            &r->new->fields[column->index],
> -                                          &column->type);
> +                                          diff, &column->type);
>
>                  error = ovsdb_txn_adjust_row_refs(
>                              txn, r->old, column, &removed, -1);
> @@ -760,9 +761,13 @@ assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
>          if (datum->n != orig_n
>              || bitmap_is_set(txn_row->changed, column->index)) {
>              if (txn_row->old) {
> +                struct ovsdb_datum *diff;
> +
> +                diff = (txn_row->diff && datum->n == orig_n)
> +                       ? &txn_row->diff->fields[column->index] : NULL;
>                  ovsdb_datum_added_removed(&added, &removed,
>                                            &txn_row->old->fields[column->index],
> -                                          datum, &column->type);
> +                                          datum, diff, &column->type);
>              } else {
>                  ovsdb_datum_clone(&added, datum);
>              }
> @@ -790,6 +795,10 @@ assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
>
>          if (datum->n != orig_n) {
>              bitmap_set1(txn_row->changed, column->index);
> +            /* Can no longer rely on the previous diff. */
> +            ovsdb_row_destroy(txn_row->diff);
> +            txn_row->diff = NULL;
> +
>              if (datum->n < column->type.n_min) {
>                  const struct uuid *row_uuid = ovsdb_row_get_uuid(txn_row->new);
>                  if (zero && !txn_row->old) {
> --
> 2.43.0
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Ilya Maximets Jan. 5, 2024, 2:40 p.m. UTC | #2
On 1/1/24 07:37, Mike Pattrick wrote:
> On Sun, Dec 17, 2023 at 9:03 PM Ilya Maximets <i.maximets@ovn.org> wrote:
>>
>> In case the difference between 'old' and 'new' rows is readily
>> available, it can be used to construct added/removed datums
>> instead.  Diffs are typically much smaller than the column
>> itself.  This change more than doubles the performance of a
>> transaction replay.
>>
>> For example, with this change applied, initial read of OVSDB
>> file containing 136K small transactions for large OVN port
>> groups and address sets on my laptop takes 11 seconds vs 24
>> seconds without.
>>
>> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
>> ---
>>  lib/ovsdb-data.c    | 26 ++++++++++++++++++++++++++
>>  lib/ovsdb-data.h    |  1 +
>>  ovsdb/transaction.c | 15 ++++++++++++---
>>  3 files changed, 39 insertions(+), 3 deletions(-)
>>
>> diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c
>> index f18f74298..f0d7bee23 100644
>> --- a/lib/ovsdb-data.c
>> +++ b/lib/ovsdb-data.c
>> @@ -2238,6 +2238,8 @@ ovsdb_symbol_table_insert(struct ovsdb_symbol_table *symtab,
>>  /* APIs for Generating and apply diffs.  */
>>
>>  /* Find what needs to be added to and removed from 'old' to construct 'new'.
>> + * If the optional 'diff' is porvided, it can be used to speed up processing,
> 
> Provided
> 
>> + * in case it is smaller than the original 'old' and 'new'.
>>   *
>>   * The 'added' and 'removed' datums are always safe; the orders of keys are
>>   * maintained since they are added in order.   */
>> @@ -2246,6 +2248,7 @@ ovsdb_datum_added_removed(struct ovsdb_datum *added,
>>                            struct ovsdb_datum *removed,
>>                            const struct ovsdb_datum *old,
>>                            const struct ovsdb_datum *new,
>> +                          const struct ovsdb_datum *diff,
>>                            const struct ovsdb_type *type)
>>  {
>>      size_t oi, ni;
>> @@ -2258,6 +2261,29 @@ ovsdb_datum_added_removed(struct ovsdb_datum *added,
>>          return;
>>      }
>>
>> +    /* Use diff, if provided, unless it's comparable in size. */
>> +    if (diff && diff->n * 2 < old->n + new->n) {
> 
> I guess the motivation for the size check is that the old algorithm is
> O(n) and the diff algorithm is O(n lg n)? If so it may be worthwhile
> to note that in the comment, it wasn't initially clear to me. Or am I
> misunderstanding this?

That's correct.  If the size is comparable, the linear comparison
should be faster.  I can update the comment like this:

    /* Use diff, if provided, unless it's comparable in size.  With a large
     * diff, the O(n log n) binary search of each element may be slower than
     * a simple O(n) comparison between old and new. */

What do you think?

Best regards, Ilya Maximets.
Mike Pattrick Jan. 5, 2024, 3:20 p.m. UTC | #3
On Fri, Jan 5, 2024 at 9:40 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 1/1/24 07:37, Mike Pattrick wrote:
> > On Sun, Dec 17, 2023 at 9:03 PM Ilya Maximets <i.maximets@ovn.org> wrote:
> >>
> >> In case the difference between 'old' and 'new' rows is readily
> >> available, it can be used to construct added/removed datums
> >> instead.  Diffs are typically much smaller than the column
> >> itself.  This change more than doubles the performance of a
> >> transaction replay.
> >>
> >> For example, with this change applied, initial read of OVSDB
> >> file containing 136K small transactions for large OVN port
> >> groups and address sets on my laptop takes 11 seconds vs 24
> >> seconds without.
> >>
> >> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
> >> ---
> >>  lib/ovsdb-data.c    | 26 ++++++++++++++++++++++++++
> >>  lib/ovsdb-data.h    |  1 +
> >>  ovsdb/transaction.c | 15 ++++++++++++---
> >>  3 files changed, 39 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c
> >> index f18f74298..f0d7bee23 100644
> >> --- a/lib/ovsdb-data.c
> >> +++ b/lib/ovsdb-data.c
> >> @@ -2238,6 +2238,8 @@ ovsdb_symbol_table_insert(struct ovsdb_symbol_table *symtab,
> >>  /* APIs for Generating and apply diffs.  */
> >>
> >>  /* Find what needs to be added to and removed from 'old' to construct 'new'.
> >> + * If the optional 'diff' is porvided, it can be used to speed up processing,
> >
> > Provided
> >
> >> + * in case it is smaller than the original 'old' and 'new'.
> >>   *
> >>   * The 'added' and 'removed' datums are always safe; the orders of keys are
> >>   * maintained since they are added in order.   */
> >> @@ -2246,6 +2248,7 @@ ovsdb_datum_added_removed(struct ovsdb_datum *added,
> >>                            struct ovsdb_datum *removed,
> >>                            const struct ovsdb_datum *old,
> >>                            const struct ovsdb_datum *new,
> >> +                          const struct ovsdb_datum *diff,
> >>                            const struct ovsdb_type *type)
> >>  {
> >>      size_t oi, ni;
> >> @@ -2258,6 +2261,29 @@ ovsdb_datum_added_removed(struct ovsdb_datum *added,
> >>          return;
> >>      }
> >>
> >> +    /* Use diff, if provided, unless it's comparable in size. */
> >> +    if (diff && diff->n * 2 < old->n + new->n) {
> >
> > I guess the motivation for the size check is that the old algorithm is
> > O(n) and the diff algorithm is O(n lg n)? If so it may be worthwhile
> > to note that in the comment, it wasn't initially clear to me. Or am I
> > misunderstanding this?
>
> That's correct.  If the size is comparable, the linear comparison
> should be faster.  I can update the comment like this:
>
>     /* Use diff, if provided, unless it's comparable in size.  With a large
>      * diff, the O(n log n) binary search of each element may be slower than
>      * a simple O(n) comparison between old and new. */
>
> What do you think?

I think that's a good comment, thoroughly clarifies the conditional.

-M

>
> Best regards, Ilya Maximets.
>
diff mbox series

Patch

diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c
index f18f74298..f0d7bee23 100644
--- a/lib/ovsdb-data.c
+++ b/lib/ovsdb-data.c
@@ -2238,6 +2238,8 @@  ovsdb_symbol_table_insert(struct ovsdb_symbol_table *symtab,
 /* APIs for Generating and apply diffs.  */
 
 /* Find what needs to be added to and removed from 'old' to construct 'new'.
+ * If the optional 'diff' is porvided, it can be used to speed up processing,
+ * in case it is smaller than the original 'old' and 'new'.
  *
  * The 'added' and 'removed' datums are always safe; the orders of keys are
  * maintained since they are added in order.   */
@@ -2246,6 +2248,7 @@  ovsdb_datum_added_removed(struct ovsdb_datum *added,
                           struct ovsdb_datum *removed,
                           const struct ovsdb_datum *old,
                           const struct ovsdb_datum *new,
+                          const struct ovsdb_datum *diff,
                           const struct ovsdb_type *type)
 {
     size_t oi, ni;
@@ -2258,6 +2261,29 @@  ovsdb_datum_added_removed(struct ovsdb_datum *added,
         return;
     }
 
+    /* Use diff, if provided, unless it's comparable in size. */
+    if (diff && diff->n * 2 < old->n + new->n) {
+        unsigned int idx;
+
+        for (size_t di = 0; di < diff->n; di++) {
+            bool found = ovsdb_datum_find_key(old, &diff->keys[di],
+                                              type->key.type, &idx);
+
+            if (!found) {
+                ovsdb_datum_add_from_index_unsafe(added, diff, di, type);
+            } else {
+                if (type->value.type != OVSDB_TYPE_VOID
+                    && !ovsdb_atom_equals(&diff->values[di],
+                                          &old->values[idx],
+                                          type->value.type)) {
+                    ovsdb_datum_add_from_index_unsafe(added, diff, di, type);
+                }
+                ovsdb_datum_add_from_index_unsafe(removed, old, idx, type);
+            }
+        }
+        return;
+    }
+
     /* Generate the diff in O(n) time. */
     for (oi = ni = 0; oi < old->n && ni < new->n;) {
         int c = ovsdb_atom_compare_3way(&old->keys[oi], &new->keys[ni],
diff --git a/lib/ovsdb-data.h b/lib/ovsdb-data.h
index f048a8cb0..c0408ee49 100644
--- a/lib/ovsdb-data.h
+++ b/lib/ovsdb-data.h
@@ -256,6 +256,7 @@  void ovsdb_datum_added_removed(struct ovsdb_datum *added,
                                struct ovsdb_datum *removed,
                                const struct ovsdb_datum *old,
                                const struct ovsdb_datum *new,
+                               const struct ovsdb_datum *diff,
                                const struct ovsdb_type *type);
 
 void ovsdb_datum_diff(struct ovsdb_datum *diff,
diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c
index b69d03b5a..e1a87bb74 100644
--- a/ovsdb/transaction.c
+++ b/ovsdb/transaction.c
@@ -347,12 +347,13 @@  update_row_ref_count(struct ovsdb_txn *txn, struct ovsdb_txn_row *r)
                     return error;
                 }
             } else if (r->old && r->new) {
-                struct ovsdb_datum added, removed;
+                struct ovsdb_datum added, removed, *diff;
 
+                diff = (r->diff) ? &r->diff->fields[column->index] : NULL;
                 ovsdb_datum_added_removed(&added, &removed,
                                           &r->old->fields[column->index],
                                           &r->new->fields[column->index],
-                                          &column->type);
+                                          diff, &column->type);
 
                 error = ovsdb_txn_adjust_row_refs(
                             txn, r->old, column, &removed, -1);
@@ -760,9 +761,13 @@  assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
         if (datum->n != orig_n
             || bitmap_is_set(txn_row->changed, column->index)) {
             if (txn_row->old) {
+                struct ovsdb_datum *diff;
+
+                diff = (txn_row->diff && datum->n == orig_n)
+                       ? &txn_row->diff->fields[column->index] : NULL;
                 ovsdb_datum_added_removed(&added, &removed,
                                           &txn_row->old->fields[column->index],
-                                          datum, &column->type);
+                                          datum, diff, &column->type);
             } else {
                 ovsdb_datum_clone(&added, datum);
             }
@@ -790,6 +795,10 @@  assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
 
         if (datum->n != orig_n) {
             bitmap_set1(txn_row->changed, column->index);
+            /* Can no longer rely on the previous diff. */
+            ovsdb_row_destroy(txn_row->diff);
+            txn_row->diff = NULL;
+
             if (datum->n < column->type.n_min) {
                 const struct uuid *row_uuid = ovsdb_row_get_uuid(txn_row->new);
                 if (zero && !txn_row->old) {