Message ID | 20220725213443.646243-5-hzhou@ovn.org |
---|---|
State | Changes Requested |
Headers | show |
Series | Avoid unnecessary deletion & recreation during restart. | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | fail | github build: failed |
ovsrobot/github-robot-_ovn-kubernetes | fail | github build: failed |
On 7/25/22 23:34, Han Zhou wrote: > Also remove the reset mechanism when DB is reconnected, because at DB > reconnection the data in IDL would not reset. > > Signed-off-by: Han Zhou <hzhou@ovn.org> > --- Hi Han, I noticed that with this change the "VIF plugging" test starts failing when running with conditional monitoring disabled: 777: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no -- ovn_monitor_all=yes ok 775: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes -- ovn_monitor_all=yes ok 778: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no -- ovn_monitor_all=no FAILED (ovs-macros.at:255) 776: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes -- ovn_monitor_all=no FAILED (ovs-macros.at:255) > controller/ovn-controller.c | 1 - > controller/vif-plug.c | 20 ++++++-------------- > 2 files changed, 6 insertions(+), 15 deletions(-) > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > index 8fc554201..8bb18fc23 100644 > --- a/controller/ovn-controller.c > +++ b/controller/ovn-controller.c > @@ -3874,7 +3874,6 @@ main(int argc, char *argv[]) > if (!new_ovnsb_cond_seqno) { > VLOG_INFO("OVNSB IDL reconnected, force recompute."); > engine_set_force_recompute(true); > - vif_plug_reset_idl_prime_counter(); > } > ovnsb_cond_seqno = new_ovnsb_cond_seqno; > } > diff --git a/controller/vif-plug.c b/controller/vif-plug.c > index c6fbe7e59..38348bf54 100644 > --- a/controller/vif-plug.c > +++ b/controller/vif-plug.c > @@ -532,22 +532,14 @@ vif_plug_handle_iface(const struct ovsrec_interface *iface_rec, > * completeness of the initial data downloading we need this counter so that we > * do not erronously unplug ports because the data is just not loaded yet. > */ > -#define VIF_PLUG_PRIME_IDL_COUNT_SEEED 10 > -static int vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED; > - > -void > -vif_plug_reset_idl_prime_counter(void) > -{ > - vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED; > -} > - We should remove this from the .h file too. > void > vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, > struct vif_plug_ctx_out *vif_plug_ctx_out) > { > - if (vif_plug_prime_idl_count && --vif_plug_prime_idl_count > 0) { > - VLOG_DBG("vif_plug_run: vif_plug_prime_idl_count=%d, will not unplug " > - "ports in this iteration.", vif_plug_prime_idl_count); > + bool delay_plug = daemon_started_recently(); > + if (delay_plug) { > + VLOG_DBG("vif_plug_run: daemon started recently, will not unplug " > + "ports in this iteration."); > } > > if (!vif_plug_ctx_in->chassis_rec) { > @@ -557,7 +549,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, > OVSREC_INTERFACE_TABLE_FOR_EACH (iface_rec, > vif_plug_ctx_in->iface_table) { > vif_plug_handle_iface(iface_rec, vif_plug_ctx_in, vif_plug_ctx_out, > - !vif_plug_prime_idl_count); > + !delay_plug); > } > > struct sbrec_port_binding *target = > @@ -573,7 +565,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, > enum en_lport_type lport_type = get_lport_type(pb); > if (lport_type == LP_VIF) { > vif_plug_handle_lport_vif(pb, vif_plug_ctx_in, vif_plug_ctx_out, > - !vif_plug_prime_idl_count); > + !delay_plug); > } > } > sbrec_port_binding_index_destroy_row(target); Regards, Dumitru
On Thu, Aug 4, 2022 at 7:09 AM Dumitru Ceara <dceara@redhat.com> wrote: > > On 7/25/22 23:34, Han Zhou wrote: > > Also remove the reset mechanism when DB is reconnected, because at DB > > reconnection the data in IDL would not reset. > > > > Signed-off-by: Han Zhou <hzhou@ovn.org> > > --- > > Hi Han, > > I noticed that with this change the "VIF plugging" test starts failing > when running with conditional monitoring disabled: > > 777: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no > -- ovn_monitor_all=yes ok > 775: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes > -- ovn_monitor_all=yes ok > 778: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no > -- ovn_monitor_all=no FAILED (ovs-macros.at:255) > 776: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes > -- ovn_monitor_all=no FAILED (ovs-macros.at:255) I am terribly sorry for this. I guess I must have forgotten to run the test or check the test result after adding this patch. No idea how that could have happened :( This failure is because the new approach is more strict than before, which requires 20 delay countdowns with real engine-updates instead of just 10 main loop iterations. Adding the ignore-startup-delay command fixes it: --- 8>< ---------------------------------------------------- ><8 -------------- diff --git a/tests/ovn.at b/tests/ovn.at index 3ba6ced4e..e2b523b67 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -31606,6 +31606,7 @@ OVS_WAIT_UNTIL([ test x43 = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid} mtu_request) ]) +as hv1 check ovn-appctl -t ovn-controller debug/ignore-startup-delay # Check that pointing requested-chassis somewhere else will unplug the port check ovn-nbctl --wait=hv set Logical_Switch_Port lsp1 \ options:requested-chassis=non-existent-chassis @@ -31613,6 +31614,7 @@ OVS_WAIT_UNTIL([ ! as hv1 ovs-vsctl get Interface ${iface1_uuid} _uuid ]) +as hv2 check ovn-appctl -t ovn-controller debug/ignore-startup-delay # Check that removing an lport will unplug it AT_CHECK([test x${iface2_uuid} = x$(as hv2 ovs-vsctl get Interface ${iface2_uuid} _uuid)], [0], []) check ovn-nbctl --wait=hv lsp-del ${lsp2_uuid} ------------------------------------------------------------------------------------ I will wait for complete review comments before sending v3 for this fix. Thanks, Han > > > controller/ovn-controller.c | 1 - > > controller/vif-plug.c | 20 ++++++-------------- > > 2 files changed, 6 insertions(+), 15 deletions(-) > > > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > > index 8fc554201..8bb18fc23 100644 > > --- a/controller/ovn-controller.c > > +++ b/controller/ovn-controller.c > > @@ -3874,7 +3874,6 @@ main(int argc, char *argv[]) > > if (!new_ovnsb_cond_seqno) { > > VLOG_INFO("OVNSB IDL reconnected, force recompute."); > > engine_set_force_recompute(true); > > - vif_plug_reset_idl_prime_counter(); > > } > > ovnsb_cond_seqno = new_ovnsb_cond_seqno; > > } > > diff --git a/controller/vif-plug.c b/controller/vif-plug.c > > index c6fbe7e59..38348bf54 100644 > > --- a/controller/vif-plug.c > > +++ b/controller/vif-plug.c > > @@ -532,22 +532,14 @@ vif_plug_handle_iface(const struct ovsrec_interface *iface_rec, > > * completeness of the initial data downloading we need this counter so that we > > * do not erronously unplug ports because the data is just not loaded yet. > > */ > > -#define VIF_PLUG_PRIME_IDL_COUNT_SEEED 10 > > -static int vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED; > > - > > -void > > -vif_plug_reset_idl_prime_counter(void) > > -{ > > - vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED; > > -} > > - > > We should remove this from the .h file too. > > > void > > vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, > > struct vif_plug_ctx_out *vif_plug_ctx_out) > > { > > - if (vif_plug_prime_idl_count && --vif_plug_prime_idl_count > 0) { > > - VLOG_DBG("vif_plug_run: vif_plug_prime_idl_count=%d, will not unplug " > > - "ports in this iteration.", vif_plug_prime_idl_count); > > + bool delay_plug = daemon_started_recently(); > > + if (delay_plug) { > > + VLOG_DBG("vif_plug_run: daemon started recently, will not unplug " > > + "ports in this iteration."); > > } > > > > if (!vif_plug_ctx_in->chassis_rec) { > > @@ -557,7 +549,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, > > OVSREC_INTERFACE_TABLE_FOR_EACH (iface_rec, > > vif_plug_ctx_in->iface_table) { > > vif_plug_handle_iface(iface_rec, vif_plug_ctx_in, vif_plug_ctx_out, > > - !vif_plug_prime_idl_count); > > + !delay_plug); > > } > > > > struct sbrec_port_binding *target = > > @@ -573,7 +565,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, > > enum en_lport_type lport_type = get_lport_type(pb); > > if (lport_type == LP_VIF) { > > vif_plug_handle_lport_vif(pb, vif_plug_ctx_in, vif_plug_ctx_out, > > - !vif_plug_prime_idl_count); > > + !delay_plug); > > } > > } > > sbrec_port_binding_index_destroy_row(target); > > Regards, > Dumitru >
On Sat, Aug 6, 2022 at 1:49 AM Han Zhou <hzhou@ovn.org> wrote: > > > > On Thu, Aug 4, 2022 at 7:09 AM Dumitru Ceara <dceara@redhat.com> wrote: > > > > On 7/25/22 23:34, Han Zhou wrote: > > > Also remove the reset mechanism when DB is reconnected, because at DB > > > reconnection the data in IDL would not reset. > > > > > > Signed-off-by: Han Zhou <hzhou@ovn.org> > > > --- > > > > Hi Han, > > > > I noticed that with this change the "VIF plugging" test starts failing > > when running with conditional monitoring disabled: > > > > 777: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no > > -- ovn_monitor_all=yes ok > > 775: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes > > -- ovn_monitor_all=yes ok > > 778: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no > > -- ovn_monitor_all=no FAILED (ovs-macros.at:255) > > 776: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes > > -- ovn_monitor_all=no FAILED (ovs-macros.at:255) > > I am terribly sorry for this. I guess I must have forgotten to run the test or check the test result after adding this patch. No idea how that could have happened :( > This failure is because the new approach is more strict than before, which requires 20 delay countdowns with real engine-updates instead of just 10 main loop iterations. Adding the ignore-startup-delay command fixes it: > > --- 8>< ---------------------------------------------------- ><8 -------------- > diff --git a/tests/ovn.at b/tests/ovn.at > index 3ba6ced4e..e2b523b67 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -31606,6 +31606,7 @@ OVS_WAIT_UNTIL([ > test x43 = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid} mtu_request) > ]) > > +as hv1 check ovn-appctl -t ovn-controller debug/ignore-startup-delay > # Check that pointing requested-chassis somewhere else will unplug the port > check ovn-nbctl --wait=hv set Logical_Switch_Port lsp1 \ > options:requested-chassis=non-existent-chassis > @@ -31613,6 +31614,7 @@ OVS_WAIT_UNTIL([ > ! as hv1 ovs-vsctl get Interface ${iface1_uuid} _uuid > ]) > > +as hv2 check ovn-appctl -t ovn-controller debug/ignore-startup-delay > # Check that removing an lport will unplug it > AT_CHECK([test x${iface2_uuid} = x$(as hv2 ovs-vsctl get Interface ${iface2_uuid} _uuid)], [0], []) > check ovn-nbctl --wait=hv lsp-del ${lsp2_uuid} > ------------------------------------------------------------------------------------ > I will wait for complete review comments before sending v3 for this fix. I can confirm that this fixes the failing test. For rigor I also ran a version of the test that artificially generated 20 updates prior to attempting to unplug anything and that was also successful, and I see that both the countdown and timer works as intended. > Thanks, > Han > > > > > > controller/ovn-controller.c | 1 - > > > controller/vif-plug.c | 20 ++++++-------------- > > > 2 files changed, 6 insertions(+), 15 deletions(-) > > > > > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > > > index 8fc554201..8bb18fc23 100644 > > > --- a/controller/ovn-controller.c > > > +++ b/controller/ovn-controller.c > > > @@ -3874,7 +3874,6 @@ main(int argc, char *argv[]) > > > if (!new_ovnsb_cond_seqno) { > > > VLOG_INFO("OVNSB IDL reconnected, force recompute."); > > > engine_set_force_recompute(true); > > > - vif_plug_reset_idl_prime_counter(); > > > } > > > ovnsb_cond_seqno = new_ovnsb_cond_seqno; > > > } > > > diff --git a/controller/vif-plug.c b/controller/vif-plug.c > > > index c6fbe7e59..38348bf54 100644 > > > --- a/controller/vif-plug.c > > > +++ b/controller/vif-plug.c > > > @@ -532,22 +532,14 @@ vif_plug_handle_iface(const struct ovsrec_interface *iface_rec, > > > * completeness of the initial data downloading we need this counter so that we > > > * do not erronously unplug ports because the data is just not loaded yet. > > > */ > > > -#define VIF_PLUG_PRIME_IDL_COUNT_SEEED 10 > > > -static int vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED; > > > - > > > -void > > > -vif_plug_reset_idl_prime_counter(void) > > > -{ > > > - vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED; > > > -} > > > - > > > > We should remove this from the .h file too. +1 > > > > > void > > > vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, > > > struct vif_plug_ctx_out *vif_plug_ctx_out) > > > { > > > - if (vif_plug_prime_idl_count && --vif_plug_prime_idl_count > 0) { > > > - VLOG_DBG("vif_plug_run: vif_plug_prime_idl_count=%d, will not unplug " > > > - "ports in this iteration.", vif_plug_prime_idl_count); > > > + bool delay_plug = daemon_started_recently(); > > > + if (delay_plug) { > > > + VLOG_DBG("vif_plug_run: daemon started recently, will not unplug " > > > + "ports in this iteration."); > > > } > > > > > > if (!vif_plug_ctx_in->chassis_rec) { > > > @@ -557,7 +549,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, > > > OVSREC_INTERFACE_TABLE_FOR_EACH (iface_rec, > > > vif_plug_ctx_in->iface_table) { > > > vif_plug_handle_iface(iface_rec, vif_plug_ctx_in, vif_plug_ctx_out, > > > - !vif_plug_prime_idl_count); > > > + !delay_plug); > > > } > > > > > > struct sbrec_port_binding *target = > > > @@ -573,7 +565,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, > > > enum en_lport_type lport_type = get_lport_type(pb); > > > if (lport_type == LP_VIF) { > > > vif_plug_handle_lport_vif(pb, vif_plug_ctx_in, vif_plug_ctx_out, > > > - !vif_plug_prime_idl_count); > > > + !delay_plug); > > > } > > > } > > > sbrec_port_binding_index_destroy_row(target); > > > > Regards, > > Dumitru > > Otherwise this LGTM
On 8/9/22 08:46, Frode Nordahl wrote: > On Sat, Aug 6, 2022 at 1:49 AM Han Zhou <hzhou@ovn.org> wrote: >> >> >> >> On Thu, Aug 4, 2022 at 7:09 AM Dumitru Ceara <dceara@redhat.com> wrote: >>> >>> On 7/25/22 23:34, Han Zhou wrote: >>>> Also remove the reset mechanism when DB is reconnected, because at DB >>>> reconnection the data in IDL would not reset. >>>> >>>> Signed-off-by: Han Zhou <hzhou@ovn.org> >>>> --- >>> >>> Hi Han, >>> >>> I noticed that with this change the "VIF plugging" test starts failing >>> when running with conditional monitoring disabled: >>> >>> 777: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no >>> -- ovn_monitor_all=yes ok >>> 775: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes >>> -- ovn_monitor_all=yes ok >>> 778: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no >>> -- ovn_monitor_all=no FAILED (ovs-macros.at:255) >>> 776: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes >>> -- ovn_monitor_all=no FAILED (ovs-macros.at:255) >> >> I am terribly sorry for this. I guess I must have forgotten to run the test or check the test result after adding this patch. No idea how that could have happened :( >> This failure is because the new approach is more strict than before, which requires 20 delay countdowns with real engine-updates instead of just 10 main loop iterations. Adding the ignore-startup-delay command fixes it: >> >> --- 8>< ---------------------------------------------------- ><8 -------------- >> diff --git a/tests/ovn.at b/tests/ovn.at >> index 3ba6ced4e..e2b523b67 100644 >> --- a/tests/ovn.at >> +++ b/tests/ovn.at >> @@ -31606,6 +31606,7 @@ OVS_WAIT_UNTIL([ >> test x43 = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid} mtu_request) >> ]) >> >> +as hv1 check ovn-appctl -t ovn-controller debug/ignore-startup-delay >> # Check that pointing requested-chassis somewhere else will unplug the port >> check ovn-nbctl --wait=hv set Logical_Switch_Port lsp1 \ >> options:requested-chassis=non-existent-chassis >> @@ -31613,6 +31614,7 @@ OVS_WAIT_UNTIL([ >> ! as hv1 ovs-vsctl get Interface ${iface1_uuid} _uuid >> ]) >> >> +as hv2 check ovn-appctl -t ovn-controller debug/ignore-startup-delay >> # Check that removing an lport will unplug it >> AT_CHECK([test x${iface2_uuid} = x$(as hv2 ovs-vsctl get Interface ${iface2_uuid} _uuid)], [0], []) >> check ovn-nbctl --wait=hv lsp-del ${lsp2_uuid} >> ------------------------------------------------------------------------------------ >> I will wait for complete review comments before sending v3 for this fix. > > I can confirm that this fixes the failing test. For rigor I also ran a > version of the test that artificially generated 20 updates prior to > attempting to unplug anything and that was also successful, and I see > that both the countdown and timer works as intended. > >> Thanks, >> Han >> >>> >>>> controller/ovn-controller.c | 1 - >>>> controller/vif-plug.c | 20 ++++++-------------- >>>> 2 files changed, 6 insertions(+), 15 deletions(-) >>>> >>>> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c >>>> index 8fc554201..8bb18fc23 100644 >>>> --- a/controller/ovn-controller.c >>>> +++ b/controller/ovn-controller.c >>>> @@ -3874,7 +3874,6 @@ main(int argc, char *argv[]) >>>> if (!new_ovnsb_cond_seqno) { >>>> VLOG_INFO("OVNSB IDL reconnected, force recompute."); >>>> engine_set_force_recompute(true); >>>> - vif_plug_reset_idl_prime_counter(); >>>> } >>>> ovnsb_cond_seqno = new_ovnsb_cond_seqno; >>>> } >>>> diff --git a/controller/vif-plug.c b/controller/vif-plug.c >>>> index c6fbe7e59..38348bf54 100644 >>>> --- a/controller/vif-plug.c >>>> +++ b/controller/vif-plug.c >>>> @@ -532,22 +532,14 @@ vif_plug_handle_iface(const struct ovsrec_interface *iface_rec, >>>> * completeness of the initial data downloading we need this counter so that we >>>> * do not erronously unplug ports because the data is just not loaded yet. >>>> */ >>>> -#define VIF_PLUG_PRIME_IDL_COUNT_SEEED 10 >>>> -static int vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED; >>>> - >>>> -void >>>> -vif_plug_reset_idl_prime_counter(void) >>>> -{ >>>> - vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED; >>>> -} >>>> - >>> >>> We should remove this from the .h file too. > > +1 > >>> >>>> void >>>> vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, >>>> struct vif_plug_ctx_out *vif_plug_ctx_out) >>>> { >>>> - if (vif_plug_prime_idl_count && --vif_plug_prime_idl_count > 0) { >>>> - VLOG_DBG("vif_plug_run: vif_plug_prime_idl_count=%d, will not unplug " >>>> - "ports in this iteration.", vif_plug_prime_idl_count); >>>> + bool delay_plug = daemon_started_recently(); >>>> + if (delay_plug) { >>>> + VLOG_DBG("vif_plug_run: daemon started recently, will not unplug " >>>> + "ports in this iteration."); >>>> } >>>> >>>> if (!vif_plug_ctx_in->chassis_rec) { >>>> @@ -557,7 +549,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, >>>> OVSREC_INTERFACE_TABLE_FOR_EACH (iface_rec, >>>> vif_plug_ctx_in->iface_table) { >>>> vif_plug_handle_iface(iface_rec, vif_plug_ctx_in, vif_plug_ctx_out, >>>> - !vif_plug_prime_idl_count); >>>> + !delay_plug); >>>> } >>>> >>>> struct sbrec_port_binding *target = >>>> @@ -573,7 +565,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, >>>> enum en_lport_type lport_type = get_lport_type(pb); >>>> if (lport_type == LP_VIF) { >>>> vif_plug_handle_lport_vif(pb, vif_plug_ctx_in, vif_plug_ctx_out, >>>> - !vif_plug_prime_idl_count); >>>> + !delay_plug); >>>> } >>>> } >>>> sbrec_port_binding_index_destroy_row(target); >>> >>> Regards, >>> Dumitru >>> > > Otherwise this LGTM > Same here, if this is the only change to the patch, feel free to add my ack to v3: Acked-by: Dumitru Ceara <dceara@redhat.com>
On Wed, Aug 17, 2022 at 8:27 AM Dumitru Ceara <dceara@redhat.com> wrote: > > On 8/9/22 08:46, Frode Nordahl wrote: > > On Sat, Aug 6, 2022 at 1:49 AM Han Zhou <hzhou@ovn.org> wrote: > >> > >> > >> > >> On Thu, Aug 4, 2022 at 7:09 AM Dumitru Ceara <dceara@redhat.com> wrote: > >>> > >>> On 7/25/22 23:34, Han Zhou wrote: > >>>> Also remove the reset mechanism when DB is reconnected, because at DB > >>>> reconnection the data in IDL would not reset. > >>>> > >>>> Signed-off-by: Han Zhou <hzhou@ovn.org> > >>>> --- > >>> > >>> Hi Han, > >>> > >>> I noticed that with this change the "VIF plugging" test starts failing > >>> when running with conditional monitoring disabled: > >>> > >>> 777: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no > >>> -- ovn_monitor_all=yes ok > >>> 775: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes > >>> -- ovn_monitor_all=yes ok > >>> 778: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no > >>> -- ovn_monitor_all=no FAILED (ovs-macros.at:255) > >>> 776: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes > >>> -- ovn_monitor_all=no FAILED (ovs-macros.at:255) > >> > >> I am terribly sorry for this. I guess I must have forgotten to run the test or check the test result after adding this patch. No idea how that could have happened :( > >> This failure is because the new approach is more strict than before, which requires 20 delay countdowns with real engine-updates instead of just 10 main loop iterations. Adding the ignore-startup-delay command fixes it: > >> > >> --- 8>< ---------------------------------------------------- ><8 -------------- > >> diff --git a/tests/ovn.at b/tests/ovn.at > >> index 3ba6ced4e..e2b523b67 100644 > >> --- a/tests/ovn.at > >> +++ b/tests/ovn.at > >> @@ -31606,6 +31606,7 @@ OVS_WAIT_UNTIL([ > >> test x43 = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid} mtu_request) > >> ]) > >> > >> +as hv1 check ovn-appctl -t ovn-controller debug/ignore-startup-delay > >> # Check that pointing requested-chassis somewhere else will unplug the port > >> check ovn-nbctl --wait=hv set Logical_Switch_Port lsp1 \ > >> options:requested-chassis=non-existent-chassis > >> @@ -31613,6 +31614,7 @@ OVS_WAIT_UNTIL([ > >> ! as hv1 ovs-vsctl get Interface ${iface1_uuid} _uuid > >> ]) > >> > >> +as hv2 check ovn-appctl -t ovn-controller debug/ignore-startup-delay > >> # Check that removing an lport will unplug it > >> AT_CHECK([test x${iface2_uuid} = x$(as hv2 ovs-vsctl get Interface ${iface2_uuid} _uuid)], [0], []) > >> check ovn-nbctl --wait=hv lsp-del ${lsp2_uuid} > >> ------------------------------------------------------------------------------------ > >> I will wait for complete review comments before sending v3 for this fix. > > > > I can confirm that this fixes the failing test. For rigor I also ran a > > version of the test that artificially generated 20 updates prior to > > attempting to unplug anything and that was also successful, and I see > > that both the countdown and timer works as intended. > > > >> Thanks, > >> Han > >> > >>> > >>>> controller/ovn-controller.c | 1 - > >>>> controller/vif-plug.c | 20 ++++++-------------- > >>>> 2 files changed, 6 insertions(+), 15 deletions(-) > >>>> > >>>> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > >>>> index 8fc554201..8bb18fc23 100644 > >>>> --- a/controller/ovn-controller.c > >>>> +++ b/controller/ovn-controller.c > >>>> @@ -3874,7 +3874,6 @@ main(int argc, char *argv[]) > >>>> if (!new_ovnsb_cond_seqno) { > >>>> VLOG_INFO("OVNSB IDL reconnected, force recompute."); > >>>> engine_set_force_recompute(true); > >>>> - vif_plug_reset_idl_prime_counter(); > >>>> } > >>>> ovnsb_cond_seqno = new_ovnsb_cond_seqno; > >>>> } > >>>> diff --git a/controller/vif-plug.c b/controller/vif-plug.c > >>>> index c6fbe7e59..38348bf54 100644 > >>>> --- a/controller/vif-plug.c > >>>> +++ b/controller/vif-plug.c > >>>> @@ -532,22 +532,14 @@ vif_plug_handle_iface(const struct ovsrec_interface *iface_rec, > >>>> * completeness of the initial data downloading we need this counter so that we > >>>> * do not erronously unplug ports because the data is just not loaded yet. > >>>> */ > >>>> -#define VIF_PLUG_PRIME_IDL_COUNT_SEEED 10 > >>>> -static int vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED; > >>>> - > >>>> -void > >>>> -vif_plug_reset_idl_prime_counter(void) > >>>> -{ > >>>> - vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED; > >>>> -} > >>>> - > >>> > >>> We should remove this from the .h file too. > > > > +1 > > > >>> > >>>> void > >>>> vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, > >>>> struct vif_plug_ctx_out *vif_plug_ctx_out) > >>>> { > >>>> - if (vif_plug_prime_idl_count && --vif_plug_prime_idl_count > 0) { > >>>> - VLOG_DBG("vif_plug_run: vif_plug_prime_idl_count=%d, will not unplug " > >>>> - "ports in this iteration.", vif_plug_prime_idl_count); > >>>> + bool delay_plug = daemon_started_recently(); > >>>> + if (delay_plug) { > >>>> + VLOG_DBG("vif_plug_run: daemon started recently, will not unplug " > >>>> + "ports in this iteration."); > >>>> } > >>>> > >>>> if (!vif_plug_ctx_in->chassis_rec) { > >>>> @@ -557,7 +549,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, > >>>> OVSREC_INTERFACE_TABLE_FOR_EACH (iface_rec, > >>>> vif_plug_ctx_in->iface_table) { > >>>> vif_plug_handle_iface(iface_rec, vif_plug_ctx_in, vif_plug_ctx_out, > >>>> - !vif_plug_prime_idl_count); > >>>> + !delay_plug); > >>>> } > >>>> > >>>> struct sbrec_port_binding *target = > >>>> @@ -573,7 +565,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, > >>>> enum en_lport_type lport_type = get_lport_type(pb); > >>>> if (lport_type == LP_VIF) { > >>>> vif_plug_handle_lport_vif(pb, vif_plug_ctx_in, vif_plug_ctx_out, > >>>> - !vif_plug_prime_idl_count); > >>>> + !delay_plug); > >>>> } > >>>> } > >>>> sbrec_port_binding_index_destroy_row(target); > >>> > >>> Regards, > >>> Dumitru > >>> > > > > Otherwise this LGTM > > > > Same here, if this is the only change to the patch, feel free to add my > ack to v3: > > Acked-by: Dumitru Ceara <dceara@redhat.com> > Thanks! Added ACK from you both in v3. Han
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index 8fc554201..8bb18fc23 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -3874,7 +3874,6 @@ main(int argc, char *argv[]) if (!new_ovnsb_cond_seqno) { VLOG_INFO("OVNSB IDL reconnected, force recompute."); engine_set_force_recompute(true); - vif_plug_reset_idl_prime_counter(); } ovnsb_cond_seqno = new_ovnsb_cond_seqno; } diff --git a/controller/vif-plug.c b/controller/vif-plug.c index c6fbe7e59..38348bf54 100644 --- a/controller/vif-plug.c +++ b/controller/vif-plug.c @@ -532,22 +532,14 @@ vif_plug_handle_iface(const struct ovsrec_interface *iface_rec, * completeness of the initial data downloading we need this counter so that we * do not erronously unplug ports because the data is just not loaded yet. */ -#define VIF_PLUG_PRIME_IDL_COUNT_SEEED 10 -static int vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED; - -void -vif_plug_reset_idl_prime_counter(void) -{ - vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED; -} - void vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, struct vif_plug_ctx_out *vif_plug_ctx_out) { - if (vif_plug_prime_idl_count && --vif_plug_prime_idl_count > 0) { - VLOG_DBG("vif_plug_run: vif_plug_prime_idl_count=%d, will not unplug " - "ports in this iteration.", vif_plug_prime_idl_count); + bool delay_plug = daemon_started_recently(); + if (delay_plug) { + VLOG_DBG("vif_plug_run: daemon started recently, will not unplug " + "ports in this iteration."); } if (!vif_plug_ctx_in->chassis_rec) { @@ -557,7 +549,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, OVSREC_INTERFACE_TABLE_FOR_EACH (iface_rec, vif_plug_ctx_in->iface_table) { vif_plug_handle_iface(iface_rec, vif_plug_ctx_in, vif_plug_ctx_out, - !vif_plug_prime_idl_count); + !delay_plug); } struct sbrec_port_binding *target = @@ -573,7 +565,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in, enum en_lport_type lport_type = get_lport_type(pb); if (lport_type == LP_VIF) { vif_plug_handle_lport_vif(pb, vif_plug_ctx_in, vif_plug_ctx_out, - !vif_plug_prime_idl_count); + !delay_plug); } } sbrec_port_binding_index_destroy_row(target);
Also remove the reset mechanism when DB is reconnected, because at DB reconnection the data in IDL would not reset. Signed-off-by: Han Zhou <hzhou@ovn.org> --- controller/ovn-controller.c | 1 - controller/vif-plug.c | 20 ++++++-------------- 2 files changed, 6 insertions(+), 15 deletions(-)