diff mbox series

[ovs-dev,3/3] ovn-northd: Avoid recompute caused by in-flight transactions.

Message ID 20230706100140.699587-3-hzhou@ovn.org
State Accepted
Headers show
Series [ovs-dev,1/3] northd: Incremental processing of VIF updates and deletions in 'lflow' node. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Han Zhou July 6, 2023, 10:01 a.m. UTC
Although each individual VIF port related changes are handled
incrementally, it still triggers recompute if there are in-flight
transactions (either to NB or SB) when a change comes, which is very
common in real production environment if change happens frequently.
It is also easy to hit such situation in test cases where nb_cfg
mechanism is heavily used, which makes it difficult to write reliable
and stable tests, such as what the commit 8c30ba1386 was trying to work
around.

This patch skips the I-P engine execution until the NB & SB transaction
handles are available (no in-flight transactions), and when skippiing
the runs it keeps the tracked changes in IDL across main loop
iterations. This way we avoid recompute without worrying about missing
any changes.

Signed-off-by: Han Zhou <hzhou@ovn.org>
---
 northd/inc-proc-northd.c |   6 +--
 northd/ovn-northd.c      |  22 +++++----
 tests/ovn-northd.at      | 104 +++++++++++++++++++--------------------
 3 files changed, 66 insertions(+), 66 deletions(-)

Comments

Numan Siddique July 6, 2023, 1:54 p.m. UTC | #1
On Thu, Jul 6, 2023 at 3:32 PM Han Zhou <hzhou@ovn.org> wrote:
>
> Although each individual VIF port related changes are handled
> incrementally, it still triggers recompute if there are in-flight
> transactions (either to NB or SB) when a change comes, which is very
> common in real production environment if change happens frequently.
> It is also easy to hit such situation in test cases where nb_cfg
> mechanism is heavily used, which makes it difficult to write reliable
> and stable tests, such as what the commit 8c30ba1386 was trying to work
> around.
>
> This patch skips the I-P engine execution until the NB & SB transaction
> handles are available (no in-flight transactions), and when skippiing
> the runs it keeps the tracked changes in IDL across main loop
> iterations. This way we avoid recompute without worrying about missing
> any changes.
>
> Signed-off-by: Han Zhou <hzhou@ovn.org>

Acked-by: Numan Siddique <numans@ovn.org>

Numan

> ---
>  northd/inc-proc-northd.c |   6 +--
>  northd/ovn-northd.c      |  22 +++++----
>  tests/ovn-northd.at      | 104 +++++++++++++++++++--------------------
>  3 files changed, 66 insertions(+), 66 deletions(-)
>
> diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> index 19fc67795643..d328deb222e6 100644
> --- a/northd/inc-proc-northd.c
> +++ b/northd/inc-proc-northd.c
> @@ -296,6 +296,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
>  bool inc_proc_northd_run(struct ovsdb_idl_txn *ovnnb_txn,
>                           struct ovsdb_idl_txn *ovnsb_txn,
>                           bool recompute) {
> +    ovs_assert(ovnnb_txn && ovnsb_txn);
>      engine_init_run();
>
>      /* Force a full recompute if instructed to, for example, after a NB/SB
> @@ -312,10 +313,7 @@ bool inc_proc_northd_run(struct ovsdb_idl_txn *ovnnb_txn,
>      };
>
>      engine_set_context(&eng_ctx);
> -
> -    if (ovnnb_txn && ovnsb_txn) {
> -        engine_run(true);
> -    }
> +    engine_run(true);
>
>      if (!engine_has_run()) {
>          if (engine_need_run()) {
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 3e0848e41b8e..4fa1b039ea32 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -881,6 +881,7 @@ main(int argc, char *argv[])
>              simap_destroy(&usage);
>          }
>
> +        bool clear_idl_track = true;
>          if (!state.paused) {
>              if (!ovsdb_idl_has_lock(ovnsb_idl_loop.idl) &&
>                  !ovsdb_idl_is_lock_contended(ovnsb_idl_loop.idl))
> @@ -930,25 +931,26 @@ main(int argc, char *argv[])
>              }
>
>              if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
> -                int64_t loop_start_time = time_wall_msec();
> -                bool activity = inc_proc_northd_run(ovnnb_txn, ovnsb_txn,
> -                                                    recompute);
> -                recompute = false;
> -                if (ovnsb_txn) {
> +                bool activity = false;
> +                if (ovnnb_txn && ovnsb_txn) {
> +                    int64_t loop_start_time = time_wall_msec();
> +                    activity = inc_proc_northd_run(ovnnb_txn, ovnsb_txn,
> +                                                        recompute);
> +                    recompute = false;
>                      check_and_add_supported_dhcp_opts_to_sb_db(
>                                   ovnsb_txn, ovnsb_idl_loop.idl);
>                      check_and_add_supported_dhcpv6_opts_to_sb_db(
>                                   ovnsb_txn, ovnsb_idl_loop.idl);
>                      check_and_update_rbac(
>                                   ovnsb_txn, ovnsb_idl_loop.idl);
> -                }
>
> -                if (ovnnb_txn && ovnsb_txn) {
>                      update_sequence_numbers(loop_start_time,
>                                              ovnnb_idl_loop.idl,
>                                              ovnsb_idl_loop.idl,
>                                              ovnnb_txn, ovnsb_txn,
>                                              &ovnsb_idl_loop);
> +                } else if (!recompute) {
> +                    clear_idl_track = false;
>                  }
>
>                  /* If there are any errors, we force a full recompute in order
> @@ -998,8 +1000,10 @@ main(int argc, char *argv[])
>              recompute = true;
>          }
>
> -        ovsdb_idl_track_clear(ovnnb_idl_loop.idl);
> -        ovsdb_idl_track_clear(ovnsb_idl_loop.idl);
> +        if (clear_idl_track) {
> +            ovsdb_idl_track_clear(ovnnb_idl_loop.idl);
> +            ovsdb_idl_track_clear(ovnsb_idl_loop.idl);
> +        }
>
>          unixctl_server_run(unixctl);
>          unixctl_server_wait(unixctl);
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index f1bf9092eeb7..e79d33b2aec5 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -9522,67 +9522,65 @@ as hv1
>  ovs-vsctl add-br br-phys
>  ovn_attach n1 br-phys 192.168.0.11
>
> -fail_count=0
>  check_recompute_counter() {
>      northd_recomp=$(as northd ovn-appctl -t NORTHD_TYPE inc-engine/show-stats northd recompute)
> -    if test x$northd_recomp != x$1; then
> -        fail_count=$(($fail_count + 1))
> -        echo check northd recompute failed: expected $1, got $northd_recomp
> -        return 1
> -    fi
> +    AT_CHECK([test x$northd_recomp = x$1])
> +
>      lflow_recomp=$(as northd ovn-appctl -t NORTHD_TYPE inc-engine/show-stats lflow recompute)
> -    if test x$lflow_recomp != x$2; then
> -        fail_count=$(($fail_count + 1))
> -        echo check lflow recompute failed: expected $2, got $lflow_recomp
> -        return 1
> -    fi
> -    return 0
> +    AT_CHECK([test x$lflow_recomp = x$2])
>  }
>
> -# Depending on order of responses from NB and SB, the number of recompute may
> -# be different. This test case only verifies the best case scenario, which
> -# should have the expected recompute count at least 50% of the time.
> +check ovn-nbctl --wait=hv ls-add ls0
> +
> +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> +check ovn-nbctl --wait=hv lsp-add ls0 lsp0-0 -- lsp-set-addresses lsp0-0 "unknown"
> +ovs-vsctl add-port br-int lsp0-0 -- set interface lsp0-0 external_ids:iface-id=lsp0-0
> +wait_for_ports_up
> +check ovn-nbctl --wait=hv sync
> +check_recompute_counter 5 5
> +
> +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> +check ovn-nbctl --wait=hv lsp-add ls0 lsp0-1 -- lsp-set-addresses lsp0-1 "aa:aa:aa:00:00:01 192.168.0.11"
> +ovs-vsctl add-port br-int lsp0-1 -- set interface lsp0-1 external_ids:iface-id=lsp0-1
> +wait_for_ports_up
> +check ovn-nbctl --wait=hv sync
> +check_recompute_counter 0 0
> +
> +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> +check ovn-nbctl --wait=hv lsp-add ls0 lsp0-2 -- lsp-set-addresses lsp0-2 "aa:aa:aa:00:00:02 192.168.0.12"
> +ovs-vsctl add-port br-int lsp0-2 -- set interface lsp0-2 external_ids:iface-id=lsp0-2
> +wait_for_ports_up
> +check ovn-nbctl --wait=hv sync
> +check_recompute_counter 0 0
> +
> +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> +check ovn-nbctl --wait=hv lsp-del lsp0-1
> +check_recompute_counter 0 0
> +
> +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> +check ovn-nbctl --wait=hv lsp-set-addresses lsp0-2 "aa:aa:aa:00:00:88 192.168.0.88"
> +check_recompute_counter 0 0
> +
> +# Delete and re-add a LSP for several times continuously, to ensure
> +# frequent operations do not trigger recompute when there are in-flight
> +# transcations.
>  for i in $(seq 10); do
> -    check ovn-nbctl --wait=hv ls-add ls$i
> -
> -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> -    check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-0 -- lsp-set-addresses lsp${i}-0 "unknown"
> -    ovs-vsctl add-port br-int lsp${i}-0 -- set interface lsp${i}-0 external_ids:iface-id=lsp${i}-0
> -    wait_for_ports_up
> -    check ovn-nbctl --wait=hv sync
> -    check_recompute_counter 5 5 || continue
> -
> -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> -    check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-1 -- lsp-set-addresses lsp${i}-1 "aa:aa:aa:00:00:01 192.168.0.11"
> -    ovs-vsctl add-port br-int lsp${i}-1 -- set interface lsp${i}-1 external_ids:iface-id=lsp${i}-1
> -    wait_for_ports_up
> -    check ovn-nbctl --wait=hv sync
> -    check_recompute_counter 0 0 || continue
> -
> -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> -    check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-2 -- lsp-set-addresses lsp${i}-2 "aa:aa:aa:00:00:02 192.168.0.12"
> -    ovs-vsctl add-port br-int lsp${i}-2 -- set interface lsp${i}-2 external_ids:iface-id=lsp${i}-2
> -    wait_for_ports_up
> -    check ovn-nbctl --wait=hv sync
> -    check_recompute_counter 0 0 || continue
> -
> -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> -    check ovn-nbctl --wait=hv lsp-del lsp${i}-1
> -    check_recompute_counter 0 0 || continue
> -
> -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> -    check ovn-nbctl --wait=hv lsp-set-addresses lsp${i}-2 "aa:aa:aa:00:00:88 192.168.0.88"
> -    check_recompute_counter 0 0 || continue
> -
> -    # No change, no recompute
> -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> -    check ovn-nbctl --wait=sb sync
> -    check_recompute_counter 0 0 || continue
> +    # wait for sb but not wait for hv
> +    check ovn-nbctl --wait=sb lsp-del lsp0-2
> +    check ovn-nbctl --wait=sb lsp-add ls0 lsp0-2 -- lsp-set-addresses lsp0-2 "aa:aa:aa:00:00:02 192.168.0.12"
>
> -    CHECK_NO_CHANGE_AFTER_RECOMPUTE
> +    # even without waiting for sb
> +    check ovn-nbctl lsp-del lsp0-2
> +    check ovn-nbctl lsp-add ls0 lsp0-2 -- lsp-set-addresses lsp0-2 "aa:aa:aa:00:00:02 192.168.0.12"
>  done
> -echo Test failed $fail_count in 10.
> -AT_CHECK([test $fail_count -le 5])
> +check_recompute_counter 0 0
> +
> +# No change, no recompute
> +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> +check ovn-nbctl --wait=sb sync
> +check_recompute_counter 0 0
> +
> +CHECK_NO_CHANGE_AFTER_RECOMPUTE
>
>  OVN_CLEANUP([hv1])
>  AT_CLEANUP
> --
> 2.30.2
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Han Zhou July 7, 2023, 1:09 a.m. UTC | #2
On Thu, Jul 6, 2023 at 9:55 PM Numan Siddique <numans@ovn.org> wrote:
>
> On Thu, Jul 6, 2023 at 3:32 PM Han Zhou <hzhou@ovn.org> wrote:
> >
> > Although each individual VIF port related changes are handled
> > incrementally, it still triggers recompute if there are in-flight
> > transactions (either to NB or SB) when a change comes, which is very
> > common in real production environment if change happens frequently.
> > It is also easy to hit such situation in test cases where nb_cfg
> > mechanism is heavily used, which makes it difficult to write reliable
> > and stable tests, such as what the commit 8c30ba1386 was trying to work
> > around.
> >
> > This patch skips the I-P engine execution until the NB & SB transaction
> > handles are available (no in-flight transactions), and when skippiing
> > the runs it keeps the tracked changes in IDL across main loop
> > iterations. This way we avoid recompute without worrying about missing
> > any changes.
> >
> > Signed-off-by: Han Zhou <hzhou@ovn.org>
>
> Acked-by: Numan Siddique <numans@ovn.org>
>
> Numan

Thanks Numan! I applied the series to main.

Han
>
> > ---
> >  northd/inc-proc-northd.c |   6 +--
> >  northd/ovn-northd.c      |  22 +++++----
> >  tests/ovn-northd.at      | 104 +++++++++++++++++++--------------------
> >  3 files changed, 66 insertions(+), 66 deletions(-)
> >
> > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> > index 19fc67795643..d328deb222e6 100644
> > --- a/northd/inc-proc-northd.c
> > +++ b/northd/inc-proc-northd.c
> > @@ -296,6 +296,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> >  bool inc_proc_northd_run(struct ovsdb_idl_txn *ovnnb_txn,
> >                           struct ovsdb_idl_txn *ovnsb_txn,
> >                           bool recompute) {
> > +    ovs_assert(ovnnb_txn && ovnsb_txn);
> >      engine_init_run();
> >
> >      /* Force a full recompute if instructed to, for example, after a
NB/SB
> > @@ -312,10 +313,7 @@ bool inc_proc_northd_run(struct ovsdb_idl_txn
*ovnnb_txn,
> >      };
> >
> >      engine_set_context(&eng_ctx);
> > -
> > -    if (ovnnb_txn && ovnsb_txn) {
> > -        engine_run(true);
> > -    }
> > +    engine_run(true);
> >
> >      if (!engine_has_run()) {
> >          if (engine_need_run()) {
> > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> > index 3e0848e41b8e..4fa1b039ea32 100644
> > --- a/northd/ovn-northd.c
> > +++ b/northd/ovn-northd.c
> > @@ -881,6 +881,7 @@ main(int argc, char *argv[])
> >              simap_destroy(&usage);
> >          }
> >
> > +        bool clear_idl_track = true;
> >          if (!state.paused) {
> >              if (!ovsdb_idl_has_lock(ovnsb_idl_loop.idl) &&
> >                  !ovsdb_idl_is_lock_contended(ovnsb_idl_loop.idl))
> > @@ -930,25 +931,26 @@ main(int argc, char *argv[])
> >              }
> >
> >              if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
> > -                int64_t loop_start_time = time_wall_msec();
> > -                bool activity = inc_proc_northd_run(ovnnb_txn,
ovnsb_txn,
> > -                                                    recompute);
> > -                recompute = false;
> > -                if (ovnsb_txn) {
> > +                bool activity = false;
> > +                if (ovnnb_txn && ovnsb_txn) {
> > +                    int64_t loop_start_time = time_wall_msec();
> > +                    activity = inc_proc_northd_run(ovnnb_txn,
ovnsb_txn,
> > +                                                        recompute);
> > +                    recompute = false;
> >                      check_and_add_supported_dhcp_opts_to_sb_db(
> >                                   ovnsb_txn, ovnsb_idl_loop.idl);
> >                      check_and_add_supported_dhcpv6_opts_to_sb_db(
> >                                   ovnsb_txn, ovnsb_idl_loop.idl);
> >                      check_and_update_rbac(
> >                                   ovnsb_txn, ovnsb_idl_loop.idl);
> > -                }
> >
> > -                if (ovnnb_txn && ovnsb_txn) {
> >                      update_sequence_numbers(loop_start_time,
> >                                              ovnnb_idl_loop.idl,
> >                                              ovnsb_idl_loop.idl,
> >                                              ovnnb_txn, ovnsb_txn,
> >                                              &ovnsb_idl_loop);
> > +                } else if (!recompute) {
> > +                    clear_idl_track = false;
> >                  }
> >
> >                  /* If there are any errors, we force a full recompute
in order
> > @@ -998,8 +1000,10 @@ main(int argc, char *argv[])
> >              recompute = true;
> >          }
> >
> > -        ovsdb_idl_track_clear(ovnnb_idl_loop.idl);
> > -        ovsdb_idl_track_clear(ovnsb_idl_loop.idl);
> > +        if (clear_idl_track) {
> > +            ovsdb_idl_track_clear(ovnnb_idl_loop.idl);
> > +            ovsdb_idl_track_clear(ovnsb_idl_loop.idl);
> > +        }
> >
> >          unixctl_server_run(unixctl);
> >          unixctl_server_wait(unixctl);
> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> > index f1bf9092eeb7..e79d33b2aec5 100644
> > --- a/tests/ovn-northd.at
> > +++ b/tests/ovn-northd.at
> > @@ -9522,67 +9522,65 @@ as hv1
> >  ovs-vsctl add-br br-phys
> >  ovn_attach n1 br-phys 192.168.0.11
> >
> > -fail_count=0
> >  check_recompute_counter() {
> >      northd_recomp=$(as northd ovn-appctl -t NORTHD_TYPE
inc-engine/show-stats northd recompute)
> > -    if test x$northd_recomp != x$1; then
> > -        fail_count=$(($fail_count + 1))
> > -        echo check northd recompute failed: expected $1, got
$northd_recomp
> > -        return 1
> > -    fi
> > +    AT_CHECK([test x$northd_recomp = x$1])
> > +
> >      lflow_recomp=$(as northd ovn-appctl -t NORTHD_TYPE
inc-engine/show-stats lflow recompute)
> > -    if test x$lflow_recomp != x$2; then
> > -        fail_count=$(($fail_count + 1))
> > -        echo check lflow recompute failed: expected $2, got
$lflow_recomp
> > -        return 1
> > -    fi
> > -    return 0
> > +    AT_CHECK([test x$lflow_recomp = x$2])
> >  }
> >
> > -# Depending on order of responses from NB and SB, the number of
recompute may
> > -# be different. This test case only verifies the best case scenario,
which
> > -# should have the expected recompute count at least 50% of the time.
> > +check ovn-nbctl --wait=hv ls-add ls0
> > +
> > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > +check ovn-nbctl --wait=hv lsp-add ls0 lsp0-0 -- lsp-set-addresses
lsp0-0 "unknown"
> > +ovs-vsctl add-port br-int lsp0-0 -- set interface lsp0-0
external_ids:iface-id=lsp0-0
> > +wait_for_ports_up
> > +check ovn-nbctl --wait=hv sync
> > +check_recompute_counter 5 5
> > +
> > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > +check ovn-nbctl --wait=hv lsp-add ls0 lsp0-1 -- lsp-set-addresses
lsp0-1 "aa:aa:aa:00:00:01 192.168.0.11"
> > +ovs-vsctl add-port br-int lsp0-1 -- set interface lsp0-1
external_ids:iface-id=lsp0-1
> > +wait_for_ports_up
> > +check ovn-nbctl --wait=hv sync
> > +check_recompute_counter 0 0
> > +
> > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > +check ovn-nbctl --wait=hv lsp-add ls0 lsp0-2 -- lsp-set-addresses
lsp0-2 "aa:aa:aa:00:00:02 192.168.0.12"
> > +ovs-vsctl add-port br-int lsp0-2 -- set interface lsp0-2
external_ids:iface-id=lsp0-2
> > +wait_for_ports_up
> > +check ovn-nbctl --wait=hv sync
> > +check_recompute_counter 0 0
> > +
> > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > +check ovn-nbctl --wait=hv lsp-del lsp0-1
> > +check_recompute_counter 0 0
> > +
> > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > +check ovn-nbctl --wait=hv lsp-set-addresses lsp0-2 "aa:aa:aa:00:00:88
192.168.0.88"
> > +check_recompute_counter 0 0
> > +
> > +# Delete and re-add a LSP for several times continuously, to ensure
> > +# frequent operations do not trigger recompute when there are in-flight
> > +# transcations.
> >  for i in $(seq 10); do
> > -    check ovn-nbctl --wait=hv ls-add ls$i
> > -
> > -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > -    check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-0 --
lsp-set-addresses lsp${i}-0 "unknown"
> > -    ovs-vsctl add-port br-int lsp${i}-0 -- set interface lsp${i}-0
external_ids:iface-id=lsp${i}-0
> > -    wait_for_ports_up
> > -    check ovn-nbctl --wait=hv sync
> > -    check_recompute_counter 5 5 || continue
> > -
> > -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > -    check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-1 --
lsp-set-addresses lsp${i}-1 "aa:aa:aa:00:00:01 192.168.0.11"
> > -    ovs-vsctl add-port br-int lsp${i}-1 -- set interface lsp${i}-1
external_ids:iface-id=lsp${i}-1
> > -    wait_for_ports_up
> > -    check ovn-nbctl --wait=hv sync
> > -    check_recompute_counter 0 0 || continue
> > -
> > -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > -    check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-2 --
lsp-set-addresses lsp${i}-2 "aa:aa:aa:00:00:02 192.168.0.12"
> > -    ovs-vsctl add-port br-int lsp${i}-2 -- set interface lsp${i}-2
external_ids:iface-id=lsp${i}-2
> > -    wait_for_ports_up
> > -    check ovn-nbctl --wait=hv sync
> > -    check_recompute_counter 0 0 || continue
> > -
> > -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > -    check ovn-nbctl --wait=hv lsp-del lsp${i}-1
> > -    check_recompute_counter 0 0 || continue
> > -
> > -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > -    check ovn-nbctl --wait=hv lsp-set-addresses lsp${i}-2
"aa:aa:aa:00:00:88 192.168.0.88"
> > -    check_recompute_counter 0 0 || continue
> > -
> > -    # No change, no recompute
> > -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > -    check ovn-nbctl --wait=sb sync
> > -    check_recompute_counter 0 0 || continue
> > +    # wait for sb but not wait for hv
> > +    check ovn-nbctl --wait=sb lsp-del lsp0-2
> > +    check ovn-nbctl --wait=sb lsp-add ls0 lsp0-2 -- lsp-set-addresses
lsp0-2 "aa:aa:aa:00:00:02 192.168.0.12"
> >
> > -    CHECK_NO_CHANGE_AFTER_RECOMPUTE
> > +    # even without waiting for sb
> > +    check ovn-nbctl lsp-del lsp0-2
> > +    check ovn-nbctl lsp-add ls0 lsp0-2 -- lsp-set-addresses lsp0-2
"aa:aa:aa:00:00:02 192.168.0.12"
> >  done
> > -echo Test failed $fail_count in 10.
> > -AT_CHECK([test $fail_count -le 5])
> > +check_recompute_counter 0 0
> > +
> > +# No change, no recompute
> > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > +check ovn-nbctl --wait=sb sync
> > +check_recompute_counter 0 0
> > +
> > +CHECK_NO_CHANGE_AFTER_RECOMPUTE
> >
> >  OVN_CLEANUP([hv1])
> >  AT_CLEANUP
> > --
> > 2.30.2
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
Numan Siddique July 7, 2023, 6:01 a.m. UTC | #3
On Fri, Jul 7, 2023 at 6:40 AM Han Zhou <hzhou@ovn.org> wrote:
>
> On Thu, Jul 6, 2023 at 9:55 PM Numan Siddique <numans@ovn.org> wrote:
> >
> > On Thu, Jul 6, 2023 at 3:32 PM Han Zhou <hzhou@ovn.org> wrote:
> > >
> > > Although each individual VIF port related changes are handled
> > > incrementally, it still triggers recompute if there are in-flight
> > > transactions (either to NB or SB) when a change comes, which is very
> > > common in real production environment if change happens frequently.
> > > It is also easy to hit such situation in test cases where nb_cfg
> > > mechanism is heavily used, which makes it difficult to write reliable
> > > and stable tests, such as what the commit 8c30ba1386 was trying to work
> > > around.
> > >
> > > This patch skips the I-P engine execution until the NB & SB transaction
> > > handles are available (no in-flight transactions), and when skippiing
> > > the runs it keeps the tracked changes in IDL across main loop
> > > iterations. This way we avoid recompute without worrying about missing
> > > any changes.
> > >
> > > Signed-off-by: Han Zhou <hzhou@ovn.org>
> >
> > Acked-by: Numan Siddique <numans@ovn.org>
> >
> > Numan
>
> Thanks Numan! I applied the series to main.

Hi Han,

Looks like the test case - "LSP incremental processing" is a little bit flaky.

If you see the latest CI run,  this test is failing for ARM -
https://github.com/ovn-org/ovn/runs/14845008643
It also failed for the load balancer I-P patch series
(http://patchwork.ozlabs.org/project/ovn/list/?series=362800)
in my local repo and it passed in a re-run.


------
as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
../../../tests/ovn-macros.at:419: "$@"
ovn-nbctl --wait=hv lsp-add ls0 lsp0-0 -- lsp-set-addresses lsp0-0 unknown
../../../tests/ovn-macros.at:419: "$@"
Waiting until 0 rows in nb Logical_Switch_Port with up!=true type=""...
ovn-macros.at:470: waiting until test $count = $(count_rows $db:$table
$a $b $c $d $e)...
ovn-macros.at:470: wait succeeded immediately
Waiting until 0 rows in nb Logical_Switch_Port with up!=true type=router...
ovn-macros.at:470: waiting until test $count = $(count_rows $db:$table
$a $b $c $d $e)...
ovn-macros.at:470: wait succeeded immediately
ovn-nbctl --wait=hv sync
../../../tests/ovn-macros.at:419: "$@"
../../../tests/ovn-northd.at:9515: test x$northd_recomp = x$1
../../../tests/ovn-northd.at:9515: exit code was 1, expected 0
----
Looks like its failing here -
https://github.com/ovn-org/ovn/blob/main/tests/ovn-northd.at#L9540

Maybe we shouldn't do exact matches on the counters ?

Thanks
Numan


>
> Han
> >
> > > ---
> > >  northd/inc-proc-northd.c |   6 +--
> > >  northd/ovn-northd.c      |  22 +++++----
> > >  tests/ovn-northd.at      | 104 +++++++++++++++++++--------------------
> > >  3 files changed, 66 insertions(+), 66 deletions(-)
> > >
> > > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> > > index 19fc67795643..d328deb222e6 100644
> > > --- a/northd/inc-proc-northd.c
> > > +++ b/northd/inc-proc-northd.c
> > > @@ -296,6 +296,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> > >  bool inc_proc_northd_run(struct ovsdb_idl_txn *ovnnb_txn,
> > >                           struct ovsdb_idl_txn *ovnsb_txn,
> > >                           bool recompute) {
> > > +    ovs_assert(ovnnb_txn && ovnsb_txn);
> > >      engine_init_run();
> > >
> > >      /* Force a full recompute if instructed to, for example, after a
> NB/SB
> > > @@ -312,10 +313,7 @@ bool inc_proc_northd_run(struct ovsdb_idl_txn
> *ovnnb_txn,
> > >      };
> > >
> > >      engine_set_context(&eng_ctx);
> > > -
> > > -    if (ovnnb_txn && ovnsb_txn) {
> > > -        engine_run(true);
> > > -    }
> > > +    engine_run(true);
> > >
> > >      if (!engine_has_run()) {
> > >          if (engine_need_run()) {
> > > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> > > index 3e0848e41b8e..4fa1b039ea32 100644
> > > --- a/northd/ovn-northd.c
> > > +++ b/northd/ovn-northd.c
> > > @@ -881,6 +881,7 @@ main(int argc, char *argv[])
> > >              simap_destroy(&usage);
> > >          }
> > >
> > > +        bool clear_idl_track = true;
> > >          if (!state.paused) {
> > >              if (!ovsdb_idl_has_lock(ovnsb_idl_loop.idl) &&
> > >                  !ovsdb_idl_is_lock_contended(ovnsb_idl_loop.idl))
> > > @@ -930,25 +931,26 @@ main(int argc, char *argv[])
> > >              }
> > >
> > >              if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
> > > -                int64_t loop_start_time = time_wall_msec();
> > > -                bool activity = inc_proc_northd_run(ovnnb_txn,
> ovnsb_txn,
> > > -                                                    recompute);
> > > -                recompute = false;
> > > -                if (ovnsb_txn) {
> > > +                bool activity = false;
> > > +                if (ovnnb_txn && ovnsb_txn) {
> > > +                    int64_t loop_start_time = time_wall_msec();
> > > +                    activity = inc_proc_northd_run(ovnnb_txn,
> ovnsb_txn,
> > > +                                                        recompute);
> > > +                    recompute = false;
> > >                      check_and_add_supported_dhcp_opts_to_sb_db(
> > >                                   ovnsb_txn, ovnsb_idl_loop.idl);
> > >                      check_and_add_supported_dhcpv6_opts_to_sb_db(
> > >                                   ovnsb_txn, ovnsb_idl_loop.idl);
> > >                      check_and_update_rbac(
> > >                                   ovnsb_txn, ovnsb_idl_loop.idl);
> > > -                }
> > >
> > > -                if (ovnnb_txn && ovnsb_txn) {
> > >                      update_sequence_numbers(loop_start_time,
> > >                                              ovnnb_idl_loop.idl,
> > >                                              ovnsb_idl_loop.idl,
> > >                                              ovnnb_txn, ovnsb_txn,
> > >                                              &ovnsb_idl_loop);
> > > +                } else if (!recompute) {
> > > +                    clear_idl_track = false;
> > >                  }
> > >
> > >                  /* If there are any errors, we force a full recompute
> in order
> > > @@ -998,8 +1000,10 @@ main(int argc, char *argv[])
> > >              recompute = true;
> > >          }
> > >
> > > -        ovsdb_idl_track_clear(ovnnb_idl_loop.idl);
> > > -        ovsdb_idl_track_clear(ovnsb_idl_loop.idl);
> > > +        if (clear_idl_track) {
> > > +            ovsdb_idl_track_clear(ovnnb_idl_loop.idl);
> > > +            ovsdb_idl_track_clear(ovnsb_idl_loop.idl);
> > > +        }
> > >
> > >          unixctl_server_run(unixctl);
> > >          unixctl_server_wait(unixctl);
> > > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> > > index f1bf9092eeb7..e79d33b2aec5 100644
> > > --- a/tests/ovn-northd.at
> > > +++ b/tests/ovn-northd.at
> > > @@ -9522,67 +9522,65 @@ as hv1
> > >  ovs-vsctl add-br br-phys
> > >  ovn_attach n1 br-phys 192.168.0.11
> > >
> > > -fail_count=0
> > >  check_recompute_counter() {
> > >      northd_recomp=$(as northd ovn-appctl -t NORTHD_TYPE
> inc-engine/show-stats northd recompute)
> > > -    if test x$northd_recomp != x$1; then
> > > -        fail_count=$(($fail_count + 1))
> > > -        echo check northd recompute failed: expected $1, got
> $northd_recomp
> > > -        return 1
> > > -    fi
> > > +    AT_CHECK([test x$northd_recomp = x$1])
> > > +
> > >      lflow_recomp=$(as northd ovn-appctl -t NORTHD_TYPE
> inc-engine/show-stats lflow recompute)
> > > -    if test x$lflow_recomp != x$2; then
> > > -        fail_count=$(($fail_count + 1))
> > > -        echo check lflow recompute failed: expected $2, got
> $lflow_recomp
> > > -        return 1
> > > -    fi
> > > -    return 0
> > > +    AT_CHECK([test x$lflow_recomp = x$2])
> > >  }
> > >
> > > -# Depending on order of responses from NB and SB, the number of
> recompute may
> > > -# be different. This test case only verifies the best case scenario,
> which
> > > -# should have the expected recompute count at least 50% of the time.
> > > +check ovn-nbctl --wait=hv ls-add ls0
> > > +
> > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > +check ovn-nbctl --wait=hv lsp-add ls0 lsp0-0 -- lsp-set-addresses
> lsp0-0 "unknown"
> > > +ovs-vsctl add-port br-int lsp0-0 -- set interface lsp0-0
> external_ids:iface-id=lsp0-0
> > > +wait_for_ports_up
> > > +check ovn-nbctl --wait=hv sync
> > > +check_recompute_counter 5 5
> > > +
> > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > +check ovn-nbctl --wait=hv lsp-add ls0 lsp0-1 -- lsp-set-addresses
> lsp0-1 "aa:aa:aa:00:00:01 192.168.0.11"
> > > +ovs-vsctl add-port br-int lsp0-1 -- set interface lsp0-1
> external_ids:iface-id=lsp0-1
> > > +wait_for_ports_up
> > > +check ovn-nbctl --wait=hv sync
> > > +check_recompute_counter 0 0
> > > +
> > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > +check ovn-nbctl --wait=hv lsp-add ls0 lsp0-2 -- lsp-set-addresses
> lsp0-2 "aa:aa:aa:00:00:02 192.168.0.12"
> > > +ovs-vsctl add-port br-int lsp0-2 -- set interface lsp0-2
> external_ids:iface-id=lsp0-2
> > > +wait_for_ports_up
> > > +check ovn-nbctl --wait=hv sync
> > > +check_recompute_counter 0 0
> > > +
> > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > +check ovn-nbctl --wait=hv lsp-del lsp0-1
> > > +check_recompute_counter 0 0
> > > +
> > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > +check ovn-nbctl --wait=hv lsp-set-addresses lsp0-2 "aa:aa:aa:00:00:88
> 192.168.0.88"
> > > +check_recompute_counter 0 0
> > > +
> > > +# Delete and re-add a LSP for several times continuously, to ensure
> > > +# frequent operations do not trigger recompute when there are in-flight
> > > +# transcations.
> > >  for i in $(seq 10); do
> > > -    check ovn-nbctl --wait=hv ls-add ls$i
> > > -
> > > -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > -    check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-0 --
> lsp-set-addresses lsp${i}-0 "unknown"
> > > -    ovs-vsctl add-port br-int lsp${i}-0 -- set interface lsp${i}-0
> external_ids:iface-id=lsp${i}-0
> > > -    wait_for_ports_up
> > > -    check ovn-nbctl --wait=hv sync
> > > -    check_recompute_counter 5 5 || continue
> > > -
> > > -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > -    check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-1 --
> lsp-set-addresses lsp${i}-1 "aa:aa:aa:00:00:01 192.168.0.11"
> > > -    ovs-vsctl add-port br-int lsp${i}-1 -- set interface lsp${i}-1
> external_ids:iface-id=lsp${i}-1
> > > -    wait_for_ports_up
> > > -    check ovn-nbctl --wait=hv sync
> > > -    check_recompute_counter 0 0 || continue
> > > -
> > > -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > -    check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-2 --
> lsp-set-addresses lsp${i}-2 "aa:aa:aa:00:00:02 192.168.0.12"
> > > -    ovs-vsctl add-port br-int lsp${i}-2 -- set interface lsp${i}-2
> external_ids:iface-id=lsp${i}-2
> > > -    wait_for_ports_up
> > > -    check ovn-nbctl --wait=hv sync
> > > -    check_recompute_counter 0 0 || continue
> > > -
> > > -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > -    check ovn-nbctl --wait=hv lsp-del lsp${i}-1
> > > -    check_recompute_counter 0 0 || continue
> > > -
> > > -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > -    check ovn-nbctl --wait=hv lsp-set-addresses lsp${i}-2
> "aa:aa:aa:00:00:88 192.168.0.88"
> > > -    check_recompute_counter 0 0 || continue
> > > -
> > > -    # No change, no recompute
> > > -    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > -    check ovn-nbctl --wait=sb sync
> > > -    check_recompute_counter 0 0 || continue
> > > +    # wait for sb but not wait for hv
> > > +    check ovn-nbctl --wait=sb lsp-del lsp0-2
> > > +    check ovn-nbctl --wait=sb lsp-add ls0 lsp0-2 -- lsp-set-addresses
> lsp0-2 "aa:aa:aa:00:00:02 192.168.0.12"
> > >
> > > -    CHECK_NO_CHANGE_AFTER_RECOMPUTE
> > > +    # even without waiting for sb
> > > +    check ovn-nbctl lsp-del lsp0-2
> > > +    check ovn-nbctl lsp-add ls0 lsp0-2 -- lsp-set-addresses lsp0-2
> "aa:aa:aa:00:00:02 192.168.0.12"
> > >  done
> > > -echo Test failed $fail_count in 10.
> > > -AT_CHECK([test $fail_count -le 5])
> > > +check_recompute_counter 0 0
> > > +
> > > +# No change, no recompute
> > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > +check ovn-nbctl --wait=sb sync
> > > +check_recompute_counter 0 0
> > > +
> > > +CHECK_NO_CHANGE_AFTER_RECOMPUTE
> > >
> > >  OVN_CLEANUP([hv1])
> > >  AT_CLEANUP
> > > --
> > > 2.30.2
> > >
> > > _______________________________________________
> > > dev mailing list
> > > dev@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > >
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Han Zhou July 7, 2023, 6:32 a.m. UTC | #4
On Fri, Jul 7, 2023 at 2:02 PM Numan Siddique <numans@ovn.org> wrote:
>
> On Fri, Jul 7, 2023 at 6:40 AM Han Zhou <hzhou@ovn.org> wrote:
> >
> > On Thu, Jul 6, 2023 at 9:55 PM Numan Siddique <numans@ovn.org> wrote:
> > >
> > > On Thu, Jul 6, 2023 at 3:32 PM Han Zhou <hzhou@ovn.org> wrote:
> > > >
> > > > Although each individual VIF port related changes are handled
> > > > incrementally, it still triggers recompute if there are in-flight
> > > > transactions (either to NB or SB) when a change comes, which is very
> > > > common in real production environment if change happens frequently.
> > > > It is also easy to hit such situation in test cases where nb_cfg
> > > > mechanism is heavily used, which makes it difficult to write
reliable
> > > > and stable tests, such as what the commit 8c30ba1386 was trying to
work
> > > > around.
> > > >
> > > > This patch skips the I-P engine execution until the NB & SB
transaction
> > > > handles are available (no in-flight transactions), and when
skippiing
> > > > the runs it keeps the tracked changes in IDL across main loop
> > > > iterations. This way we avoid recompute without worrying about
missing
> > > > any changes.
> > > >
> > > > Signed-off-by: Han Zhou <hzhou@ovn.org>
> > >
> > > Acked-by: Numan Siddique <numans@ovn.org>
> > >
> > > Numan
> >
> > Thanks Numan! I applied the series to main.
>
> Hi Han,
>
> Looks like the test case - "LSP incremental processing" is a little bit
flaky.
>
> If you see the latest CI run,  this test is failing for ARM -
> https://github.com/ovn-org/ovn/runs/14845008643
> It also failed for the load balancer I-P patch series
> (http://patchwork.ozlabs.org/project/ovn/list/?series=362800)
> in my local repo and it passed in a re-run.
>
>
> ------
> as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
> ../../../tests/ovn-macros.at:419: "$@"
> ovn-nbctl --wait=hv lsp-add ls0 lsp0-0 -- lsp-set-addresses lsp0-0 unknown
> ../../../tests/ovn-macros.at:419: "$@"
> Waiting until 0 rows in nb Logical_Switch_Port with up!=true type=""...
> ovn-macros.at:470: waiting until test $count = $(count_rows $db:$table
> $a $b $c $d $e)...
> ovn-macros.at:470: wait succeeded immediately
> Waiting until 0 rows in nb Logical_Switch_Port with up!=true
type=router...
> ovn-macros.at:470: waiting until test $count = $(count_rows $db:$table
> $a $b $c $d $e)...
> ovn-macros.at:470: wait succeeded immediately
> ovn-nbctl --wait=hv sync
> ../../../tests/ovn-macros.at:419: "$@"
> ../../../tests/ovn-northd.at:9515: test x$northd_recomp = x$1
> ../../../tests/ovn-northd.at:9515: exit code was 1, expected 0
> ----
> Looks like its failing here -
> https://github.com/ovn-org/ovn/blob/main/tests/ovn-northd.at#L9540
>
> Maybe we shouldn't do exact matches on the counters ?
>
Sorry for this. I think the below patch should fix it:
https://patchwork.ozlabs.org/project/ovn/patch/20230707062938.761299-1-hzhou@ovn.org/

Thanks,
Han

> Thanks
> Numan
>
>
> >
> > Han
> > >
> > > > ---
> > > >  northd/inc-proc-northd.c |   6 +--
> > > >  northd/ovn-northd.c      |  22 +++++----
> > > >  tests/ovn-northd.at      | 104
+++++++++++++++++++--------------------
> > > >  3 files changed, 66 insertions(+), 66 deletions(-)
> > > >
> > > > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> > > > index 19fc67795643..d328deb222e6 100644
> > > > --- a/northd/inc-proc-northd.c
> > > > +++ b/northd/inc-proc-northd.c
> > > > @@ -296,6 +296,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop
*nb,
> > > >  bool inc_proc_northd_run(struct ovsdb_idl_txn *ovnnb_txn,
> > > >                           struct ovsdb_idl_txn *ovnsb_txn,
> > > >                           bool recompute) {
> > > > +    ovs_assert(ovnnb_txn && ovnsb_txn);
> > > >      engine_init_run();
> > > >
> > > >      /* Force a full recompute if instructed to, for example, after
a
> > NB/SB
> > > > @@ -312,10 +313,7 @@ bool inc_proc_northd_run(struct ovsdb_idl_txn
> > *ovnnb_txn,
> > > >      };
> > > >
> > > >      engine_set_context(&eng_ctx);
> > > > -
> > > > -    if (ovnnb_txn && ovnsb_txn) {
> > > > -        engine_run(true);
> > > > -    }
> > > > +    engine_run(true);
> > > >
> > > >      if (!engine_has_run()) {
> > > >          if (engine_need_run()) {
> > > > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> > > > index 3e0848e41b8e..4fa1b039ea32 100644
> > > > --- a/northd/ovn-northd.c
> > > > +++ b/northd/ovn-northd.c
> > > > @@ -881,6 +881,7 @@ main(int argc, char *argv[])
> > > >              simap_destroy(&usage);
> > > >          }
> > > >
> > > > +        bool clear_idl_track = true;
> > > >          if (!state.paused) {
> > > >              if (!ovsdb_idl_has_lock(ovnsb_idl_loop.idl) &&
> > > >                  !ovsdb_idl_is_lock_contended(ovnsb_idl_loop.idl))
> > > > @@ -930,25 +931,26 @@ main(int argc, char *argv[])
> > > >              }
> > > >
> > > >              if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
> > > > -                int64_t loop_start_time = time_wall_msec();
> > > > -                bool activity = inc_proc_northd_run(ovnnb_txn,
> > ovnsb_txn,
> > > > -                                                    recompute);
> > > > -                recompute = false;
> > > > -                if (ovnsb_txn) {
> > > > +                bool activity = false;
> > > > +                if (ovnnb_txn && ovnsb_txn) {
> > > > +                    int64_t loop_start_time = time_wall_msec();
> > > > +                    activity = inc_proc_northd_run(ovnnb_txn,
> > ovnsb_txn,
> > > > +                                                        recompute);
> > > > +                    recompute = false;
> > > >                      check_and_add_supported_dhcp_opts_to_sb_db(
> > > >                                   ovnsb_txn, ovnsb_idl_loop.idl);
> > > >                      check_and_add_supported_dhcpv6_opts_to_sb_db(
> > > >                                   ovnsb_txn, ovnsb_idl_loop.idl);
> > > >                      check_and_update_rbac(
> > > >                                   ovnsb_txn, ovnsb_idl_loop.idl);
> > > > -                }
> > > >
> > > > -                if (ovnnb_txn && ovnsb_txn) {
> > > >                      update_sequence_numbers(loop_start_time,
> > > >                                              ovnnb_idl_loop.idl,
> > > >                                              ovnsb_idl_loop.idl,
> > > >                                              ovnnb_txn, ovnsb_txn,
> > > >                                              &ovnsb_idl_loop);
> > > > +                } else if (!recompute) {
> > > > +                    clear_idl_track = false;
> > > >                  }
> > > >
> > > >                  /* If there are any errors, we force a full
recompute
> > in order
> > > > @@ -998,8 +1000,10 @@ main(int argc, char *argv[])
> > > >              recompute = true;
> > > >          }
> > > >
> > > > -        ovsdb_idl_track_clear(ovnnb_idl_loop.idl);
> > > > -        ovsdb_idl_track_clear(ovnsb_idl_loop.idl);
> > > > +        if (clear_idl_track) {
> > > > +            ovsdb_idl_track_clear(ovnnb_idl_loop.idl);
> > > > +            ovsdb_idl_track_clear(ovnsb_idl_loop.idl);
> > > > +        }
> > > >
> > > >          unixctl_server_run(unixctl);
> > > >          unixctl_server_wait(unixctl);
> > > > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> > > > index f1bf9092eeb7..e79d33b2aec5 100644
> > > > --- a/tests/ovn-northd.at
> > > > +++ b/tests/ovn-northd.at
> > > > @@ -9522,67 +9522,65 @@ as hv1
> > > >  ovs-vsctl add-br br-phys
> > > >  ovn_attach n1 br-phys 192.168.0.11
> > > >
> > > > -fail_count=0
> > > >  check_recompute_counter() {
> > > >      northd_recomp=$(as northd ovn-appctl -t NORTHD_TYPE
> > inc-engine/show-stats northd recompute)
> > > > -    if test x$northd_recomp != x$1; then
> > > > -        fail_count=$(($fail_count + 1))
> > > > -        echo check northd recompute failed: expected $1, got
> > $northd_recomp
> > > > -        return 1
> > > > -    fi
> > > > +    AT_CHECK([test x$northd_recomp = x$1])
> > > > +
> > > >      lflow_recomp=$(as northd ovn-appctl -t NORTHD_TYPE
> > inc-engine/show-stats lflow recompute)
> > > > -    if test x$lflow_recomp != x$2; then
> > > > -        fail_count=$(($fail_count + 1))
> > > > -        echo check lflow recompute failed: expected $2, got
> > $lflow_recomp
> > > > -        return 1
> > > > -    fi
> > > > -    return 0
> > > > +    AT_CHECK([test x$lflow_recomp = x$2])
> > > >  }
> > > >
> > > > -# Depending on order of responses from NB and SB, the number of
> > recompute may
> > > > -# be different. This test case only verifies the best case
scenario,
> > which
> > > > -# should have the expected recompute count at least 50% of the
time.
> > > > +check ovn-nbctl --wait=hv ls-add ls0
> > > > +
> > > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > > +check ovn-nbctl --wait=hv lsp-add ls0 lsp0-0 -- lsp-set-addresses
> > lsp0-0 "unknown"
> > > > +ovs-vsctl add-port br-int lsp0-0 -- set interface lsp0-0
> > external_ids:iface-id=lsp0-0
> > > > +wait_for_ports_up
> > > > +check ovn-nbctl --wait=hv sync
> > > > +check_recompute_counter 5 5
> > > > +
> > > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > > +check ovn-nbctl --wait=hv lsp-add ls0 lsp0-1 -- lsp-set-addresses
> > lsp0-1 "aa:aa:aa:00:00:01 192.168.0.11"
> > > > +ovs-vsctl add-port br-int lsp0-1 -- set interface lsp0-1
> > external_ids:iface-id=lsp0-1
> > > > +wait_for_ports_up
> > > > +check ovn-nbctl --wait=hv sync
> > > > +check_recompute_counter 0 0
> > > > +
> > > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > > +check ovn-nbctl --wait=hv lsp-add ls0 lsp0-2 -- lsp-set-addresses
> > lsp0-2 "aa:aa:aa:00:00:02 192.168.0.12"
> > > > +ovs-vsctl add-port br-int lsp0-2 -- set interface lsp0-2
> > external_ids:iface-id=lsp0-2
> > > > +wait_for_ports_up
> > > > +check ovn-nbctl --wait=hv sync
> > > > +check_recompute_counter 0 0
> > > > +
> > > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > > +check ovn-nbctl --wait=hv lsp-del lsp0-1
> > > > +check_recompute_counter 0 0
> > > > +
> > > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > > +check ovn-nbctl --wait=hv lsp-set-addresses lsp0-2
"aa:aa:aa:00:00:88
> > 192.168.0.88"
> > > > +check_recompute_counter 0 0
> > > > +
> > > > +# Delete and re-add a LSP for several times continuously, to ensure
> > > > +# frequent operations do not trigger recompute when there are
in-flight
> > > > +# transcations.
> > > >  for i in $(seq 10); do
> > > > -    check ovn-nbctl --wait=hv ls-add ls$i
> > > > -
> > > > -    check as northd ovn-appctl -t NORTHD_TYPE
inc-engine/clear-stats
> > > > -    check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-0 --
> > lsp-set-addresses lsp${i}-0 "unknown"
> > > > -    ovs-vsctl add-port br-int lsp${i}-0 -- set interface lsp${i}-0
> > external_ids:iface-id=lsp${i}-0
> > > > -    wait_for_ports_up
> > > > -    check ovn-nbctl --wait=hv sync
> > > > -    check_recompute_counter 5 5 || continue
> > > > -
> > > > -    check as northd ovn-appctl -t NORTHD_TYPE
inc-engine/clear-stats
> > > > -    check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-1 --
> > lsp-set-addresses lsp${i}-1 "aa:aa:aa:00:00:01 192.168.0.11"
> > > > -    ovs-vsctl add-port br-int lsp${i}-1 -- set interface lsp${i}-1
> > external_ids:iface-id=lsp${i}-1
> > > > -    wait_for_ports_up
> > > > -    check ovn-nbctl --wait=hv sync
> > > > -    check_recompute_counter 0 0 || continue
> > > > -
> > > > -    check as northd ovn-appctl -t NORTHD_TYPE
inc-engine/clear-stats
> > > > -    check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-2 --
> > lsp-set-addresses lsp${i}-2 "aa:aa:aa:00:00:02 192.168.0.12"
> > > > -    ovs-vsctl add-port br-int lsp${i}-2 -- set interface lsp${i}-2
> > external_ids:iface-id=lsp${i}-2
> > > > -    wait_for_ports_up
> > > > -    check ovn-nbctl --wait=hv sync
> > > > -    check_recompute_counter 0 0 || continue
> > > > -
> > > > -    check as northd ovn-appctl -t NORTHD_TYPE
inc-engine/clear-stats
> > > > -    check ovn-nbctl --wait=hv lsp-del lsp${i}-1
> > > > -    check_recompute_counter 0 0 || continue
> > > > -
> > > > -    check as northd ovn-appctl -t NORTHD_TYPE
inc-engine/clear-stats
> > > > -    check ovn-nbctl --wait=hv lsp-set-addresses lsp${i}-2
> > "aa:aa:aa:00:00:88 192.168.0.88"
> > > > -    check_recompute_counter 0 0 || continue
> > > > -
> > > > -    # No change, no recompute
> > > > -    check as northd ovn-appctl -t NORTHD_TYPE
inc-engine/clear-stats
> > > > -    check ovn-nbctl --wait=sb sync
> > > > -    check_recompute_counter 0 0 || continue
> > > > +    # wait for sb but not wait for hv
> > > > +    check ovn-nbctl --wait=sb lsp-del lsp0-2
> > > > +    check ovn-nbctl --wait=sb lsp-add ls0 lsp0-2 --
lsp-set-addresses
> > lsp0-2 "aa:aa:aa:00:00:02 192.168.0.12"
> > > >
> > > > -    CHECK_NO_CHANGE_AFTER_RECOMPUTE
> > > > +    # even without waiting for sb
> > > > +    check ovn-nbctl lsp-del lsp0-2
> > > > +    check ovn-nbctl lsp-add ls0 lsp0-2 -- lsp-set-addresses lsp0-2
> > "aa:aa:aa:00:00:02 192.168.0.12"
> > > >  done
> > > > -echo Test failed $fail_count in 10.
> > > > -AT_CHECK([test $fail_count -le 5])
> > > > +check_recompute_counter 0 0
> > > > +
> > > > +# No change, no recompute
> > > > +check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
> > > > +check ovn-nbctl --wait=sb sync
> > > > +check_recompute_counter 0 0
> > > > +
> > > > +CHECK_NO_CHANGE_AFTER_RECOMPUTE
> > > >
> > > >  OVN_CLEANUP([hv1])
> > > >  AT_CLEANUP
> > > > --
> > > > 2.30.2
> > > >
> > > > _______________________________________________
> > > > dev mailing list
> > > > dev@openvswitch.org
> > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > > >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
diff mbox series

Patch

diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
index 19fc67795643..d328deb222e6 100644
--- a/northd/inc-proc-northd.c
+++ b/northd/inc-proc-northd.c
@@ -296,6 +296,7 @@  void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
 bool inc_proc_northd_run(struct ovsdb_idl_txn *ovnnb_txn,
                          struct ovsdb_idl_txn *ovnsb_txn,
                          bool recompute) {
+    ovs_assert(ovnnb_txn && ovnsb_txn);
     engine_init_run();
 
     /* Force a full recompute if instructed to, for example, after a NB/SB
@@ -312,10 +313,7 @@  bool inc_proc_northd_run(struct ovsdb_idl_txn *ovnnb_txn,
     };
 
     engine_set_context(&eng_ctx);
-
-    if (ovnnb_txn && ovnsb_txn) {
-        engine_run(true);
-    }
+    engine_run(true);
 
     if (!engine_has_run()) {
         if (engine_need_run()) {
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 3e0848e41b8e..4fa1b039ea32 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -881,6 +881,7 @@  main(int argc, char *argv[])
             simap_destroy(&usage);
         }
 
+        bool clear_idl_track = true;
         if (!state.paused) {
             if (!ovsdb_idl_has_lock(ovnsb_idl_loop.idl) &&
                 !ovsdb_idl_is_lock_contended(ovnsb_idl_loop.idl))
@@ -930,25 +931,26 @@  main(int argc, char *argv[])
             }
 
             if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
-                int64_t loop_start_time = time_wall_msec();
-                bool activity = inc_proc_northd_run(ovnnb_txn, ovnsb_txn,
-                                                    recompute);
-                recompute = false;
-                if (ovnsb_txn) {
+                bool activity = false;
+                if (ovnnb_txn && ovnsb_txn) {
+                    int64_t loop_start_time = time_wall_msec();
+                    activity = inc_proc_northd_run(ovnnb_txn, ovnsb_txn,
+                                                        recompute);
+                    recompute = false;
                     check_and_add_supported_dhcp_opts_to_sb_db(
                                  ovnsb_txn, ovnsb_idl_loop.idl);
                     check_and_add_supported_dhcpv6_opts_to_sb_db(
                                  ovnsb_txn, ovnsb_idl_loop.idl);
                     check_and_update_rbac(
                                  ovnsb_txn, ovnsb_idl_loop.idl);
-                }
 
-                if (ovnnb_txn && ovnsb_txn) {
                     update_sequence_numbers(loop_start_time,
                                             ovnnb_idl_loop.idl,
                                             ovnsb_idl_loop.idl,
                                             ovnnb_txn, ovnsb_txn,
                                             &ovnsb_idl_loop);
+                } else if (!recompute) {
+                    clear_idl_track = false;
                 }
 
                 /* If there are any errors, we force a full recompute in order
@@ -998,8 +1000,10 @@  main(int argc, char *argv[])
             recompute = true;
         }
 
-        ovsdb_idl_track_clear(ovnnb_idl_loop.idl);
-        ovsdb_idl_track_clear(ovnsb_idl_loop.idl);
+        if (clear_idl_track) {
+            ovsdb_idl_track_clear(ovnnb_idl_loop.idl);
+            ovsdb_idl_track_clear(ovnsb_idl_loop.idl);
+        }
 
         unixctl_server_run(unixctl);
         unixctl_server_wait(unixctl);
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index f1bf9092eeb7..e79d33b2aec5 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -9522,67 +9522,65 @@  as hv1
 ovs-vsctl add-br br-phys
 ovn_attach n1 br-phys 192.168.0.11
 
-fail_count=0
 check_recompute_counter() {
     northd_recomp=$(as northd ovn-appctl -t NORTHD_TYPE inc-engine/show-stats northd recompute)
-    if test x$northd_recomp != x$1; then
-        fail_count=$(($fail_count + 1))
-        echo check northd recompute failed: expected $1, got $northd_recomp
-        return 1
-    fi
+    AT_CHECK([test x$northd_recomp = x$1])
+
     lflow_recomp=$(as northd ovn-appctl -t NORTHD_TYPE inc-engine/show-stats lflow recompute)
-    if test x$lflow_recomp != x$2; then
-        fail_count=$(($fail_count + 1))
-        echo check lflow recompute failed: expected $2, got $lflow_recomp
-        return 1
-    fi
-    return 0
+    AT_CHECK([test x$lflow_recomp = x$2])
 }
 
-# Depending on order of responses from NB and SB, the number of recompute may
-# be different. This test case only verifies the best case scenario, which
-# should have the expected recompute count at least 50% of the time.
+check ovn-nbctl --wait=hv ls-add ls0
+
+check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
+check ovn-nbctl --wait=hv lsp-add ls0 lsp0-0 -- lsp-set-addresses lsp0-0 "unknown"
+ovs-vsctl add-port br-int lsp0-0 -- set interface lsp0-0 external_ids:iface-id=lsp0-0
+wait_for_ports_up
+check ovn-nbctl --wait=hv sync
+check_recompute_counter 5 5
+
+check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
+check ovn-nbctl --wait=hv lsp-add ls0 lsp0-1 -- lsp-set-addresses lsp0-1 "aa:aa:aa:00:00:01 192.168.0.11"
+ovs-vsctl add-port br-int lsp0-1 -- set interface lsp0-1 external_ids:iface-id=lsp0-1
+wait_for_ports_up
+check ovn-nbctl --wait=hv sync
+check_recompute_counter 0 0
+
+check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
+check ovn-nbctl --wait=hv lsp-add ls0 lsp0-2 -- lsp-set-addresses lsp0-2 "aa:aa:aa:00:00:02 192.168.0.12"
+ovs-vsctl add-port br-int lsp0-2 -- set interface lsp0-2 external_ids:iface-id=lsp0-2
+wait_for_ports_up
+check ovn-nbctl --wait=hv sync
+check_recompute_counter 0 0
+
+check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
+check ovn-nbctl --wait=hv lsp-del lsp0-1
+check_recompute_counter 0 0
+
+check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
+check ovn-nbctl --wait=hv lsp-set-addresses lsp0-2 "aa:aa:aa:00:00:88 192.168.0.88"
+check_recompute_counter 0 0
+
+# Delete and re-add a LSP for several times continuously, to ensure
+# frequent operations do not trigger recompute when there are in-flight
+# transcations.
 for i in $(seq 10); do
-    check ovn-nbctl --wait=hv ls-add ls$i
-
-    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
-    check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-0 -- lsp-set-addresses lsp${i}-0 "unknown"
-    ovs-vsctl add-port br-int lsp${i}-0 -- set interface lsp${i}-0 external_ids:iface-id=lsp${i}-0
-    wait_for_ports_up
-    check ovn-nbctl --wait=hv sync
-    check_recompute_counter 5 5 || continue
-
-    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
-    check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-1 -- lsp-set-addresses lsp${i}-1 "aa:aa:aa:00:00:01 192.168.0.11"
-    ovs-vsctl add-port br-int lsp${i}-1 -- set interface lsp${i}-1 external_ids:iface-id=lsp${i}-1
-    wait_for_ports_up
-    check ovn-nbctl --wait=hv sync
-    check_recompute_counter 0 0 || continue
-
-    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
-    check ovn-nbctl --wait=hv lsp-add ls$i lsp${i}-2 -- lsp-set-addresses lsp${i}-2 "aa:aa:aa:00:00:02 192.168.0.12"
-    ovs-vsctl add-port br-int lsp${i}-2 -- set interface lsp${i}-2 external_ids:iface-id=lsp${i}-2
-    wait_for_ports_up
-    check ovn-nbctl --wait=hv sync
-    check_recompute_counter 0 0 || continue
-
-    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
-    check ovn-nbctl --wait=hv lsp-del lsp${i}-1
-    check_recompute_counter 0 0 || continue
-
-    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
-    check ovn-nbctl --wait=hv lsp-set-addresses lsp${i}-2 "aa:aa:aa:00:00:88 192.168.0.88"
-    check_recompute_counter 0 0 || continue
-
-    # No change, no recompute
-    check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
-    check ovn-nbctl --wait=sb sync
-    check_recompute_counter 0 0 || continue
+    # wait for sb but not wait for hv
+    check ovn-nbctl --wait=sb lsp-del lsp0-2
+    check ovn-nbctl --wait=sb lsp-add ls0 lsp0-2 -- lsp-set-addresses lsp0-2 "aa:aa:aa:00:00:02 192.168.0.12"
 
-    CHECK_NO_CHANGE_AFTER_RECOMPUTE
+    # even without waiting for sb
+    check ovn-nbctl lsp-del lsp0-2
+    check ovn-nbctl lsp-add ls0 lsp0-2 -- lsp-set-addresses lsp0-2 "aa:aa:aa:00:00:02 192.168.0.12"
 done
-echo Test failed $fail_count in 10.
-AT_CHECK([test $fail_count -le 5])
+check_recompute_counter 0 0
+
+# No change, no recompute
+check as northd ovn-appctl -t NORTHD_TYPE inc-engine/clear-stats
+check ovn-nbctl --wait=sb sync
+check_recompute_counter 0 0
+
+CHECK_NO_CHANGE_AFTER_RECOMPUTE
 
 OVN_CLEANUP([hv1])
 AT_CLEANUP