diff mbox series

[ovs-dev,v3,2/3] ovsdb-data: Optimize subtraction of sets.

Message ID 20210922234724.3697532-3-i.maximets@ovn.org
State Accepted
Headers show
Series ovsdb: Optimize set operations. | expand

Checks

Context Check Description
ovsrobot/apply-robot warning apply and check: warning

Commit Message

Ilya Maximets Sept. 22, 2021, 11:47 p.m. UTC
Current algorithm for ovsdb_datum_subtract looks like this:

  for-each atom in a:
      if atom in b:
          swap(atom, <last atom in 'a'>)
          destroy(atom)
  quicksort(a)

Complexity:

  Na * log2(Nb)  +  (Na - Nb) * log2(Na - Nb)
    Search          Comparisons for quicksort

It's not optimal, especially because Nb << Na in a vast majority of
cases.

Reversing the search phase to look up atoms from 'b' in 'a', and
closing gaps from deleted elements in 'a' by plain memory copy to
avoid quicksort.

Resulted complexity:

  Nb * log2(Na)  +    (Na - Nb)
    Search          Memory copies

Subtraction is heavily used while executing database transactions.
For example, to remove one port from a logical switch in OVN.
Complexity of such operation if original logical switch had 100 ports
goes down from

 100 * log2(1)   = 100 comparisons for search and
  99 * log2(99)  = 656 comparisons for quicksort
                   ------------------------------
                   756 comparisons in total
to only

   1 * log2(100) = 7 comparisons for search
   + memory copy of 99 * sizeof (union ovsdb_atom) bytes.

We could use memmove to close the gaps after removing atoms, but
it will lead to 2 memory copies inside the call, while we can perform
only one to the temporary 'result' and swap pointers.

Performance in cases, where sizes of 'a' and 'b' are comparable,
should not change.  Cases with Nb >> Na should not happen in practice.

All in all, this change allows ovsdb-server to perform several times
more transactions, that removes elements from sets, per second.

Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
---
 lib/ovsdb-data.c | 53 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 41 insertions(+), 12 deletions(-)

Comments

Han Zhou Sept. 23, 2021, 7:37 a.m. UTC | #1
On Wed, Sep 22, 2021 at 4:47 PM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> Current algorithm for ovsdb_datum_subtract looks like this:
>
>   for-each atom in a:
>       if atom in b:
>           swap(atom, <last atom in 'a'>)
>           destroy(atom)
>   quicksort(a)
>
> Complexity:
>
>   Na * log2(Nb)  +  (Na - Nb) * log2(Na - Nb)
>     Search          Comparisons for quicksort
>
> It's not optimal, especially because Nb << Na in a vast majority of
> cases.
>
> Reversing the search phase to look up atoms from 'b' in 'a', and
> closing gaps from deleted elements in 'a' by plain memory copy to
> avoid quicksort.
>
> Resulted complexity:
>
>   Nb * log2(Na)  +    (Na - Nb)
>     Search          Memory copies
>
> Subtraction is heavily used while executing database transactions.
> For example, to remove one port from a logical switch in OVN.
> Complexity of such operation if original logical switch had 100 ports
> goes down from
>
>  100 * log2(1)   = 100 comparisons for search and
>   99 * log2(99)  = 656 comparisons for quicksort
>                    ------------------------------
>                    756 comparisons in total
> to only
>
>    1 * log2(100) = 7 comparisons for search
>    + memory copy of 99 * sizeof (union ovsdb_atom) bytes.
>
> We could use memmove to close the gaps after removing atoms, but
> it will lead to 2 memory copies inside the call, while we can perform
> only one to the temporary 'result' and swap pointers.
>
> Performance in cases, where sizes of 'a' and 'b' are comparable,
> should not change.  Cases with Nb >> Na should not happen in practice.
>
> All in all, this change allows ovsdb-server to perform several times
> more transactions, that removes elements from sets, per second.
>
> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
> ---
>  lib/ovsdb-data.c | 53 +++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 41 insertions(+), 12 deletions(-)
>
> diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c
> index 7f9ff83f4..5297f91ed 100644
> --- a/lib/ovsdb-data.c
> +++ b/lib/ovsdb-data.c
> @@ -2020,26 +2020,55 @@ ovsdb_datum_subtract(struct ovsdb_datum *a, const
struct ovsdb_type *a_type,
>                       const struct ovsdb_datum *b,
>                       const struct ovsdb_type *b_type)
>  {
> -    bool changed = false;
> -    size_t i;
> +    unsigned int *idx, ai;
> +    size_t n_idx;
>
>      ovs_assert(a_type->key.type == b_type->key.type);
>      ovs_assert(a_type->value.type == b_type->value.type
>                 || b_type->value.type == OVSDB_TYPE_VOID);
>
> -    /* XXX The big-O of this could easily be improved. */
> -    for (i = 0; i < a->n; ) {
> -        unsigned int idx = ovsdb_datum_find(a, i, b, b_type);
> -        if (idx != UINT_MAX) {
> -            changed = true;
> -            ovsdb_datum_remove_unsafe(a, i, a_type);
> -        } else {
> -            i++;
> +    idx = xmalloc(b->n * sizeof *idx);
> +    n_idx = 0;
> +    for (size_t bi = 0; bi < b->n; bi++) {
> +        ai = ovsdb_datum_find(b, bi, a, b_type);
> +        if (ai == UINT_MAX) {
> +            /* No such atom in 'a'. */
> +            continue;
>          }
> +        /* Not destroying right away since ovsdb_datum_find() will use
them. */
> +        idx[n_idx++] = ai;
>      }
> -    if (changed) {
> -        ovsdb_datum_sort_assert(a, a_type->key.type);
> +    if (!n_idx) {
> +        free(idx);
> +        return;
>      }
> +
> +    struct ovsdb_datum result;
> +
> +    ovsdb_datum_init_empty(&result);
> +    ovsdb_datum_reallocate(&result, a_type, a->n - n_idx);
> +
> +    unsigned int start_idx = 0;
> +    for (size_t i = 0; i < n_idx; i++) {
> +        ai = idx[i];
> +
> +        /* Destroying atom. */
> +        ovsdb_atom_destroy(&a->keys[ai], a_type->key.type);
> +        if (a_type->value.type != OVSDB_TYPE_VOID) {
> +            ovsdb_atom_destroy(&a->values[ai], a_type->value.type);
> +        }
> +
> +        /* Copy non-removed atoms from 'a' to result. */
> +        ovsdb_datum_push_unsafe(&result, a, start_idx, ai - start_idx,
a_type);
> +        start_idx = idx[i] + 1;
> +    }
> +    /* Copying remaining atoms. */
> +    ovsdb_datum_push_unsafe(&result, a, start_idx, a->n - start_idx,
a_type);
> +    a->n = 0;
> +
> +    ovsdb_datum_swap(&result, a);
> +    ovsdb_datum_destroy(&result, a_type);
> +    free(idx);
>  }
>
>  struct ovsdb_symbol_table *
> --
> 2.31.1
>

Acked-by: Han Zhou <hzhou@ovn.org>
Mark Gray Sept. 23, 2021, 8:59 a.m. UTC | #2
On 23/09/2021 00:47, Ilya Maximets wrote:
> Current algorithm for ovsdb_datum_subtract looks like this:
> 
>   for-each atom in a:
>       if atom in b:
>           swap(atom, <last atom in 'a'>)
>           destroy(atom)
>   quicksort(a)
> 
> Complexity:
> 
>   Na * log2(Nb)  +  (Na - Nb) * log2(Na - Nb)
>     Search          Comparisons for quicksort
> 
> It's not optimal, especially because Nb << Na in a vast majority of
> cases.
> 
> Reversing the search phase to look up atoms from 'b' in 'a', and
> closing gaps from deleted elements in 'a' by plain memory copy to
> avoid quicksort.
> 
> Resulted complexity:
> 
>   Nb * log2(Na)  +    (Na - Nb)
>     Search          Memory copies
> 
> Subtraction is heavily used while executing database transactions.
> For example, to remove one port from a logical switch in OVN.
> Complexity of such operation if original logical switch had 100 ports
> goes down from
> 
>  100 * log2(1)   = 100 comparisons for search and
>   99 * log2(99)  = 656 comparisons for quicksort
>                    ------------------------------
>                    756 comparisons in total
> to only
> 
>    1 * log2(100) = 7 comparisons for search
>    + memory copy of 99 * sizeof (union ovsdb_atom) bytes.
> 
> We could use memmove to close the gaps after removing atoms, but
> it will lead to 2 memory copies inside the call, while we can perform
> only one to the temporary 'result' and swap pointers.
> 
> Performance in cases, where sizes of 'a' and 'b' are comparable,
> should not change.  Cases with Nb >> Na should not happen in practice.
> 
> All in all, this change allows ovsdb-server to perform several times
> more transactions, that removes elements from sets, per second.
> 
> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
> ---
>  lib/ovsdb-data.c | 53 +++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 41 insertions(+), 12 deletions(-)
> 
> diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c
> index 7f9ff83f4..5297f91ed 100644
> --- a/lib/ovsdb-data.c
> +++ b/lib/ovsdb-data.c
> @@ -2020,26 +2020,55 @@ ovsdb_datum_subtract(struct ovsdb_datum *a, const struct ovsdb_type *a_type,
>                       const struct ovsdb_datum *b,
>                       const struct ovsdb_type *b_type)
>  {
> -    bool changed = false;
> -    size_t i;
> +    unsigned int *idx, ai;
> +    size_t n_idx;
>  
>      ovs_assert(a_type->key.type == b_type->key.type);
>      ovs_assert(a_type->value.type == b_type->value.type
>                 || b_type->value.type == OVSDB_TYPE_VOID);
>  
> -    /* XXX The big-O of this could easily be improved. */
> -    for (i = 0; i < a->n; ) {
> -        unsigned int idx = ovsdb_datum_find(a, i, b, b_type);
> -        if (idx != UINT_MAX) {
> -            changed = true;
> -            ovsdb_datum_remove_unsafe(a, i, a_type);
> -        } else {
> -            i++;
> +    idx = xmalloc(b->n * sizeof *idx);
> +    n_idx = 0;
> +    for (size_t bi = 0; bi < b->n; bi++) {
> +        ai = ovsdb_datum_find(b, bi, a, b_type);
> +        if (ai == UINT_MAX) {
> +            /* No such atom in 'a'. */
> +            continue;
>          }
> +        /* Not destroying right away since ovsdb_datum_find() will use them. */
> +        idx[n_idx++] = ai;
>      }
> -    if (changed) {
> -        ovsdb_datum_sort_assert(a, a_type->key.type);
> +    if (!n_idx) {
> +        free(idx);
> +        return;
>      }
> +
> +    struct ovsdb_datum result;
> +
> +    ovsdb_datum_init_empty(&result);
> +    ovsdb_datum_reallocate(&result, a_type, a->n - n_idx);
> +
> +    unsigned int start_idx = 0;
> +    for (size_t i = 0; i < n_idx; i++) {
> +        ai = idx[i];
> +
> +        /* Destroying atom. */
> +        ovsdb_atom_destroy(&a->keys[ai], a_type->key.type);
> +        if (a_type->value.type != OVSDB_TYPE_VOID) {
> +            ovsdb_atom_destroy(&a->values[ai], a_type->value.type);
> +        }
> +
> +        /* Copy non-removed atoms from 'a' to result. */
> +        ovsdb_datum_push_unsafe(&result, a, start_idx, ai - start_idx, a_type);
> +        start_idx = idx[i] + 1;
> +    }
> +    /* Copying remaining atoms. */
> +    ovsdb_datum_push_unsafe(&result, a, start_idx, a->n - start_idx, a_type);
> +    a->n = 0;
> +
> +    ovsdb_datum_swap(&result, a);
> +    ovsdb_datum_destroy(&result, a_type);
> +    free(idx);
>  }
>  
>  struct ovsdb_symbol_table *
> 


Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
0-day Robot Sept. 27, 2021, 6:50 p.m. UTC | #3
Bleep bloop.  Greetings Ilya Maximets, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


Patch skipped due to previous failure.

Please check this out.  If you feel there has been an error, please email aconole@redhat.com

Thanks,
0-day Robot
diff mbox series

Patch

diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c
index 7f9ff83f4..5297f91ed 100644
--- a/lib/ovsdb-data.c
+++ b/lib/ovsdb-data.c
@@ -2020,26 +2020,55 @@  ovsdb_datum_subtract(struct ovsdb_datum *a, const struct ovsdb_type *a_type,
                      const struct ovsdb_datum *b,
                      const struct ovsdb_type *b_type)
 {
-    bool changed = false;
-    size_t i;
+    unsigned int *idx, ai;
+    size_t n_idx;
 
     ovs_assert(a_type->key.type == b_type->key.type);
     ovs_assert(a_type->value.type == b_type->value.type
                || b_type->value.type == OVSDB_TYPE_VOID);
 
-    /* XXX The big-O of this could easily be improved. */
-    for (i = 0; i < a->n; ) {
-        unsigned int idx = ovsdb_datum_find(a, i, b, b_type);
-        if (idx != UINT_MAX) {
-            changed = true;
-            ovsdb_datum_remove_unsafe(a, i, a_type);
-        } else {
-            i++;
+    idx = xmalloc(b->n * sizeof *idx);
+    n_idx = 0;
+    for (size_t bi = 0; bi < b->n; bi++) {
+        ai = ovsdb_datum_find(b, bi, a, b_type);
+        if (ai == UINT_MAX) {
+            /* No such atom in 'a'. */
+            continue;
         }
+        /* Not destroying right away since ovsdb_datum_find() will use them. */
+        idx[n_idx++] = ai;
     }
-    if (changed) {
-        ovsdb_datum_sort_assert(a, a_type->key.type);
+    if (!n_idx) {
+        free(idx);
+        return;
     }
+
+    struct ovsdb_datum result;
+
+    ovsdb_datum_init_empty(&result);
+    ovsdb_datum_reallocate(&result, a_type, a->n - n_idx);
+
+    unsigned int start_idx = 0;
+    for (size_t i = 0; i < n_idx; i++) {
+        ai = idx[i];
+
+        /* Destroying atom. */
+        ovsdb_atom_destroy(&a->keys[ai], a_type->key.type);
+        if (a_type->value.type != OVSDB_TYPE_VOID) {
+            ovsdb_atom_destroy(&a->values[ai], a_type->value.type);
+        }
+
+        /* Copy non-removed atoms from 'a' to result. */
+        ovsdb_datum_push_unsafe(&result, a, start_idx, ai - start_idx, a_type);
+        start_idx = idx[i] + 1;
+    }
+    /* Copying remaining atoms. */
+    ovsdb_datum_push_unsafe(&result, a, start_idx, a->n - start_idx, a_type);
+    a->n = 0;
+
+    ovsdb_datum_swap(&result, a);
+    ovsdb_datum_destroy(&result, a_type);
+    free(idx);
 }
 
 struct ovsdb_symbol_table *