diff mbox series

[ovs-dev,v2,4/4] vif-plug.c: Use daemon_started_recently() to replace the prime counter.

Message ID 20220725213443.646243-5-hzhou@ovn.org
State Changes Requested
Headers show
Series Avoid unnecessary deletion & recreation during restart. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed
ovsrobot/github-robot-_ovn-kubernetes fail github build: failed

Commit Message

Han Zhou July 25, 2022, 9:34 p.m. UTC
Also remove the reset mechanism when DB is reconnected, because at DB
reconnection the data in IDL would not reset.

Signed-off-by: Han Zhou <hzhou@ovn.org>
---
 controller/ovn-controller.c |  1 -
 controller/vif-plug.c       | 20 ++++++--------------
 2 files changed, 6 insertions(+), 15 deletions(-)

Comments

Dumitru Ceara Aug. 4, 2022, 2:09 p.m. UTC | #1
On 7/25/22 23:34, Han Zhou wrote:
> Also remove the reset mechanism when DB is reconnected, because at DB
> reconnection the data in IDL would not reset.
> 
> Signed-off-by: Han Zhou <hzhou@ovn.org>
> ---

Hi Han,

I noticed that with this change the "VIF plugging" test starts failing
when running with conditional monitoring disabled:

777: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no
-- ovn_monitor_all=yes ok
775: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes
-- ovn_monitor_all=yes ok
778: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no
-- ovn_monitor_all=no FAILED (ovs-macros.at:255)
776: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes
-- ovn_monitor_all=no FAILED (ovs-macros.at:255)

>  controller/ovn-controller.c |  1 -
>  controller/vif-plug.c       | 20 ++++++--------------
>  2 files changed, 6 insertions(+), 15 deletions(-)
> 
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 8fc554201..8bb18fc23 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -3874,7 +3874,6 @@ main(int argc, char *argv[])
>              if (!new_ovnsb_cond_seqno) {
>                  VLOG_INFO("OVNSB IDL reconnected, force recompute.");
>                  engine_set_force_recompute(true);
> -                vif_plug_reset_idl_prime_counter();
>              }
>              ovnsb_cond_seqno = new_ovnsb_cond_seqno;
>          }
> diff --git a/controller/vif-plug.c b/controller/vif-plug.c
> index c6fbe7e59..38348bf54 100644
> --- a/controller/vif-plug.c
> +++ b/controller/vif-plug.c
> @@ -532,22 +532,14 @@ vif_plug_handle_iface(const struct ovsrec_interface *iface_rec,
>   * completeness of the initial data downloading we need this counter so that we
>   * do not erronously unplug ports because the data is just not loaded yet.
>   */
> -#define VIF_PLUG_PRIME_IDL_COUNT_SEEED 10
> -static int vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED;
> -
> -void
> -vif_plug_reset_idl_prime_counter(void)
> -{
> -    vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED;
> -}
> -

We should remove this from the .h file too.

>  void
>  vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in,
>               struct vif_plug_ctx_out *vif_plug_ctx_out)
>  {
> -    if (vif_plug_prime_idl_count && --vif_plug_prime_idl_count > 0) {
> -        VLOG_DBG("vif_plug_run: vif_plug_prime_idl_count=%d, will not unplug "
> -                 "ports in this iteration.", vif_plug_prime_idl_count);
> +    bool delay_plug = daemon_started_recently();
> +    if (delay_plug) {
> +        VLOG_DBG("vif_plug_run: daemon started recently, will not unplug "
> +                 "ports in this iteration.");
>      }
>  
>      if (!vif_plug_ctx_in->chassis_rec) {
> @@ -557,7 +549,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in,
>      OVSREC_INTERFACE_TABLE_FOR_EACH (iface_rec,
>                                       vif_plug_ctx_in->iface_table) {
>          vif_plug_handle_iface(iface_rec, vif_plug_ctx_in, vif_plug_ctx_out,
> -                              !vif_plug_prime_idl_count);
> +                              !delay_plug);
>      }
>  
>      struct sbrec_port_binding *target =
> @@ -573,7 +565,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in,
>          enum en_lport_type lport_type = get_lport_type(pb);
>          if (lport_type == LP_VIF) {
>              vif_plug_handle_lport_vif(pb, vif_plug_ctx_in, vif_plug_ctx_out,
> -                                      !vif_plug_prime_idl_count);
> +                                      !delay_plug);
>          }
>      }
>      sbrec_port_binding_index_destroy_row(target);

Regards,
Dumitru
Han Zhou Aug. 5, 2022, 11:49 p.m. UTC | #2
On Thu, Aug 4, 2022 at 7:09 AM Dumitru Ceara <dceara@redhat.com> wrote:
>
> On 7/25/22 23:34, Han Zhou wrote:
> > Also remove the reset mechanism when DB is reconnected, because at DB
> > reconnection the data in IDL would not reset.
> >
> > Signed-off-by: Han Zhou <hzhou@ovn.org>
> > ---
>
> Hi Han,
>
> I noticed that with this change the "VIF plugging" test starts failing
> when running with conditional monitoring disabled:
>
> 777: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no
> -- ovn_monitor_all=yes ok
> 775: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes
> -- ovn_monitor_all=yes ok
> 778: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no
> -- ovn_monitor_all=no FAILED (ovs-macros.at:255)
> 776: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes
> -- ovn_monitor_all=no FAILED (ovs-macros.at:255)

I am terribly sorry for this. I guess I must have forgotten to run the test
or check the test result after adding this patch. No idea how that could
have happened :(
This failure is because the new approach is more strict than before, which
requires 20 delay countdowns with real engine-updates instead of just 10
main loop iterations. Adding the ignore-startup-delay command fixes it:

--- 8>< ---------------------------------------------------- ><8
--------------
diff --git a/tests/ovn.at b/tests/ovn.at
index 3ba6ced4e..e2b523b67 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -31606,6 +31606,7 @@ OVS_WAIT_UNTIL([
     test x43 = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid}
mtu_request)
 ])

+as hv1 check ovn-appctl -t ovn-controller debug/ignore-startup-delay
 # Check that pointing requested-chassis somewhere else will unplug the port
 check ovn-nbctl --wait=hv set Logical_Switch_Port lsp1 \
     options:requested-chassis=non-existent-chassis
@@ -31613,6 +31614,7 @@ OVS_WAIT_UNTIL([
     ! as hv1 ovs-vsctl get Interface ${iface1_uuid} _uuid
 ])

+as hv2 check ovn-appctl -t ovn-controller debug/ignore-startup-delay
 # Check that removing an lport will unplug it
 AT_CHECK([test x${iface2_uuid} = x$(as hv2 ovs-vsctl get Interface
${iface2_uuid} _uuid)], [0], [])
 check ovn-nbctl --wait=hv lsp-del ${lsp2_uuid}
------------------------------------------------------------------------------------
I will wait for complete review comments before sending v3 for this fix.

Thanks,
Han

>
> >  controller/ovn-controller.c |  1 -
> >  controller/vif-plug.c       | 20 ++++++--------------
> >  2 files changed, 6 insertions(+), 15 deletions(-)
> >
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index 8fc554201..8bb18fc23 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -3874,7 +3874,6 @@ main(int argc, char *argv[])
> >              if (!new_ovnsb_cond_seqno) {
> >                  VLOG_INFO("OVNSB IDL reconnected, force recompute.");
> >                  engine_set_force_recompute(true);
> > -                vif_plug_reset_idl_prime_counter();
> >              }
> >              ovnsb_cond_seqno = new_ovnsb_cond_seqno;
> >          }
> > diff --git a/controller/vif-plug.c b/controller/vif-plug.c
> > index c6fbe7e59..38348bf54 100644
> > --- a/controller/vif-plug.c
> > +++ b/controller/vif-plug.c
> > @@ -532,22 +532,14 @@ vif_plug_handle_iface(const struct
ovsrec_interface *iface_rec,
> >   * completeness of the initial data downloading we need this counter
so that we
> >   * do not erronously unplug ports because the data is just not loaded
yet.
> >   */
> > -#define VIF_PLUG_PRIME_IDL_COUNT_SEEED 10
> > -static int vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED;
> > -
> > -void
> > -vif_plug_reset_idl_prime_counter(void)
> > -{
> > -    vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED;
> > -}
> > -
>
> We should remove this from the .h file too.
>
> >  void
> >  vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in,
> >               struct vif_plug_ctx_out *vif_plug_ctx_out)
> >  {
> > -    if (vif_plug_prime_idl_count && --vif_plug_prime_idl_count > 0) {
> > -        VLOG_DBG("vif_plug_run: vif_plug_prime_idl_count=%d, will not
unplug "
> > -                 "ports in this iteration.", vif_plug_prime_idl_count);
> > +    bool delay_plug = daemon_started_recently();
> > +    if (delay_plug) {
> > +        VLOG_DBG("vif_plug_run: daemon started recently, will not
unplug "
> > +                 "ports in this iteration.");
> >      }
> >
> >      if (!vif_plug_ctx_in->chassis_rec) {
> > @@ -557,7 +549,7 @@ vif_plug_run(struct vif_plug_ctx_in
*vif_plug_ctx_in,
> >      OVSREC_INTERFACE_TABLE_FOR_EACH (iface_rec,
> >                                       vif_plug_ctx_in->iface_table) {
> >          vif_plug_handle_iface(iface_rec, vif_plug_ctx_in,
vif_plug_ctx_out,
> > -                              !vif_plug_prime_idl_count);
> > +                              !delay_plug);
> >      }
> >
> >      struct sbrec_port_binding *target =
> > @@ -573,7 +565,7 @@ vif_plug_run(struct vif_plug_ctx_in
*vif_plug_ctx_in,
> >          enum en_lport_type lport_type = get_lport_type(pb);
> >          if (lport_type == LP_VIF) {
> >              vif_plug_handle_lport_vif(pb, vif_plug_ctx_in,
vif_plug_ctx_out,
> > -                                      !vif_plug_prime_idl_count);
> > +                                      !delay_plug);
> >          }
> >      }
> >      sbrec_port_binding_index_destroy_row(target);
>
> Regards,
> Dumitru
>
Frode Nordahl Aug. 9, 2022, 6:46 a.m. UTC | #3
On Sat, Aug 6, 2022 at 1:49 AM Han Zhou <hzhou@ovn.org> wrote:
>
>
>
> On Thu, Aug 4, 2022 at 7:09 AM Dumitru Ceara <dceara@redhat.com> wrote:
> >
> > On 7/25/22 23:34, Han Zhou wrote:
> > > Also remove the reset mechanism when DB is reconnected, because at DB
> > > reconnection the data in IDL would not reset.
> > >
> > > Signed-off-by: Han Zhou <hzhou@ovn.org>
> > > ---
> >
> > Hi Han,
> >
> > I noticed that with this change the "VIF plugging" test starts failing
> > when running with conditional monitoring disabled:
> >
> > 777: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no
> > -- ovn_monitor_all=yes ok
> > 775: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes
> > -- ovn_monitor_all=yes ok
> > 778: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no
> > -- ovn_monitor_all=no FAILED (ovs-macros.at:255)
> > 776: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes
> > -- ovn_monitor_all=no FAILED (ovs-macros.at:255)
>
> I am terribly sorry for this. I guess I must have forgotten to run the test or check the test result after adding this patch. No idea how that could have happened :(
> This failure is because the new approach is more strict than before, which requires 20 delay countdowns with real engine-updates instead of just 10 main loop iterations. Adding the ignore-startup-delay command fixes it:
>
> --- 8>< ---------------------------------------------------- ><8 --------------
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 3ba6ced4e..e2b523b67 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -31606,6 +31606,7 @@ OVS_WAIT_UNTIL([
>      test x43 = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid} mtu_request)
>  ])
>
> +as hv1 check ovn-appctl -t ovn-controller debug/ignore-startup-delay
>  # Check that pointing requested-chassis somewhere else will unplug the port
>  check ovn-nbctl --wait=hv set Logical_Switch_Port lsp1 \
>      options:requested-chassis=non-existent-chassis
> @@ -31613,6 +31614,7 @@ OVS_WAIT_UNTIL([
>      ! as hv1 ovs-vsctl get Interface ${iface1_uuid} _uuid
>  ])
>
> +as hv2 check ovn-appctl -t ovn-controller debug/ignore-startup-delay
>  # Check that removing an lport will unplug it
>  AT_CHECK([test x${iface2_uuid} = x$(as hv2 ovs-vsctl get Interface ${iface2_uuid} _uuid)], [0], [])
>  check ovn-nbctl --wait=hv lsp-del ${lsp2_uuid}
> ------------------------------------------------------------------------------------
> I will wait for complete review comments before sending v3 for this fix.

I can confirm that this fixes the failing test. For rigor I also ran a
version of the test that artificially generated 20 updates prior to
attempting to unplug anything and that was also successful, and I see
that both the countdown and timer works as intended.

> Thanks,
> Han
>
> >
> > >  controller/ovn-controller.c |  1 -
> > >  controller/vif-plug.c       | 20 ++++++--------------
> > >  2 files changed, 6 insertions(+), 15 deletions(-)
> > >
> > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > > index 8fc554201..8bb18fc23 100644
> > > --- a/controller/ovn-controller.c
> > > +++ b/controller/ovn-controller.c
> > > @@ -3874,7 +3874,6 @@ main(int argc, char *argv[])
> > >              if (!new_ovnsb_cond_seqno) {
> > >                  VLOG_INFO("OVNSB IDL reconnected, force recompute.");
> > >                  engine_set_force_recompute(true);
> > > -                vif_plug_reset_idl_prime_counter();
> > >              }
> > >              ovnsb_cond_seqno = new_ovnsb_cond_seqno;
> > >          }
> > > diff --git a/controller/vif-plug.c b/controller/vif-plug.c
> > > index c6fbe7e59..38348bf54 100644
> > > --- a/controller/vif-plug.c
> > > +++ b/controller/vif-plug.c
> > > @@ -532,22 +532,14 @@ vif_plug_handle_iface(const struct ovsrec_interface *iface_rec,
> > >   * completeness of the initial data downloading we need this counter so that we
> > >   * do not erronously unplug ports because the data is just not loaded yet.
> > >   */
> > > -#define VIF_PLUG_PRIME_IDL_COUNT_SEEED 10
> > > -static int vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED;
> > > -
> > > -void
> > > -vif_plug_reset_idl_prime_counter(void)
> > > -{
> > > -    vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED;
> > > -}
> > > -
> >
> > We should remove this from the .h file too.

+1

> >
> > >  void
> > >  vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in,
> > >               struct vif_plug_ctx_out *vif_plug_ctx_out)
> > >  {
> > > -    if (vif_plug_prime_idl_count && --vif_plug_prime_idl_count > 0) {
> > > -        VLOG_DBG("vif_plug_run: vif_plug_prime_idl_count=%d, will not unplug "
> > > -                 "ports in this iteration.", vif_plug_prime_idl_count);
> > > +    bool delay_plug = daemon_started_recently();
> > > +    if (delay_plug) {
> > > +        VLOG_DBG("vif_plug_run: daemon started recently, will not unplug "
> > > +                 "ports in this iteration.");
> > >      }
> > >
> > >      if (!vif_plug_ctx_in->chassis_rec) {
> > > @@ -557,7 +549,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in,
> > >      OVSREC_INTERFACE_TABLE_FOR_EACH (iface_rec,
> > >                                       vif_plug_ctx_in->iface_table) {
> > >          vif_plug_handle_iface(iface_rec, vif_plug_ctx_in, vif_plug_ctx_out,
> > > -                              !vif_plug_prime_idl_count);
> > > +                              !delay_plug);
> > >      }
> > >
> > >      struct sbrec_port_binding *target =
> > > @@ -573,7 +565,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in,
> > >          enum en_lport_type lport_type = get_lport_type(pb);
> > >          if (lport_type == LP_VIF) {
> > >              vif_plug_handle_lport_vif(pb, vif_plug_ctx_in, vif_plug_ctx_out,
> > > -                                      !vif_plug_prime_idl_count);
> > > +                                      !delay_plug);
> > >          }
> > >      }
> > >      sbrec_port_binding_index_destroy_row(target);
> >
> > Regards,
> > Dumitru
> >

Otherwise this LGTM
Dumitru Ceara Aug. 17, 2022, 3:27 p.m. UTC | #4
On 8/9/22 08:46, Frode Nordahl wrote:
> On Sat, Aug 6, 2022 at 1:49 AM Han Zhou <hzhou@ovn.org> wrote:
>>
>>
>>
>> On Thu, Aug 4, 2022 at 7:09 AM Dumitru Ceara <dceara@redhat.com> wrote:
>>>
>>> On 7/25/22 23:34, Han Zhou wrote:
>>>> Also remove the reset mechanism when DB is reconnected, because at DB
>>>> reconnection the data in IDL would not reset.
>>>>
>>>> Signed-off-by: Han Zhou <hzhou@ovn.org>
>>>> ---
>>>
>>> Hi Han,
>>>
>>> I noticed that with this change the "VIF plugging" test starts failing
>>> when running with conditional monitoring disabled:
>>>
>>> 777: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no
>>> -- ovn_monitor_all=yes ok
>>> 775: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes
>>> -- ovn_monitor_all=yes ok
>>> 778: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no
>>> -- ovn_monitor_all=no FAILED (ovs-macros.at:255)
>>> 776: ovn-controller - VIF plugging -- ovn-northd -- parallelization=yes
>>> -- ovn_monitor_all=no FAILED (ovs-macros.at:255)
>>
>> I am terribly sorry for this. I guess I must have forgotten to run the test or check the test result after adding this patch. No idea how that could have happened :(
>> This failure is because the new approach is more strict than before, which requires 20 delay countdowns with real engine-updates instead of just 10 main loop iterations. Adding the ignore-startup-delay command fixes it:
>>
>> --- 8>< ---------------------------------------------------- ><8 --------------
>> diff --git a/tests/ovn.at b/tests/ovn.at
>> index 3ba6ced4e..e2b523b67 100644
>> --- a/tests/ovn.at
>> +++ b/tests/ovn.at
>> @@ -31606,6 +31606,7 @@ OVS_WAIT_UNTIL([
>>      test x43 = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid} mtu_request)
>>  ])
>>
>> +as hv1 check ovn-appctl -t ovn-controller debug/ignore-startup-delay
>>  # Check that pointing requested-chassis somewhere else will unplug the port
>>  check ovn-nbctl --wait=hv set Logical_Switch_Port lsp1 \
>>      options:requested-chassis=non-existent-chassis
>> @@ -31613,6 +31614,7 @@ OVS_WAIT_UNTIL([
>>      ! as hv1 ovs-vsctl get Interface ${iface1_uuid} _uuid
>>  ])
>>
>> +as hv2 check ovn-appctl -t ovn-controller debug/ignore-startup-delay
>>  # Check that removing an lport will unplug it
>>  AT_CHECK([test x${iface2_uuid} = x$(as hv2 ovs-vsctl get Interface ${iface2_uuid} _uuid)], [0], [])
>>  check ovn-nbctl --wait=hv lsp-del ${lsp2_uuid}
>> ------------------------------------------------------------------------------------
>> I will wait for complete review comments before sending v3 for this fix.
> 
> I can confirm that this fixes the failing test. For rigor I also ran a
> version of the test that artificially generated 20 updates prior to
> attempting to unplug anything and that was also successful, and I see
> that both the countdown and timer works as intended.
> 
>> Thanks,
>> Han
>>
>>>
>>>>  controller/ovn-controller.c |  1 -
>>>>  controller/vif-plug.c       | 20 ++++++--------------
>>>>  2 files changed, 6 insertions(+), 15 deletions(-)
>>>>
>>>> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
>>>> index 8fc554201..8bb18fc23 100644
>>>> --- a/controller/ovn-controller.c
>>>> +++ b/controller/ovn-controller.c
>>>> @@ -3874,7 +3874,6 @@ main(int argc, char *argv[])
>>>>              if (!new_ovnsb_cond_seqno) {
>>>>                  VLOG_INFO("OVNSB IDL reconnected, force recompute.");
>>>>                  engine_set_force_recompute(true);
>>>> -                vif_plug_reset_idl_prime_counter();
>>>>              }
>>>>              ovnsb_cond_seqno = new_ovnsb_cond_seqno;
>>>>          }
>>>> diff --git a/controller/vif-plug.c b/controller/vif-plug.c
>>>> index c6fbe7e59..38348bf54 100644
>>>> --- a/controller/vif-plug.c
>>>> +++ b/controller/vif-plug.c
>>>> @@ -532,22 +532,14 @@ vif_plug_handle_iface(const struct ovsrec_interface *iface_rec,
>>>>   * completeness of the initial data downloading we need this counter so that we
>>>>   * do not erronously unplug ports because the data is just not loaded yet.
>>>>   */
>>>> -#define VIF_PLUG_PRIME_IDL_COUNT_SEEED 10
>>>> -static int vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED;
>>>> -
>>>> -void
>>>> -vif_plug_reset_idl_prime_counter(void)
>>>> -{
>>>> -    vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED;
>>>> -}
>>>> -
>>>
>>> We should remove this from the .h file too.
> 
> +1
> 
>>>
>>>>  void
>>>>  vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in,
>>>>               struct vif_plug_ctx_out *vif_plug_ctx_out)
>>>>  {
>>>> -    if (vif_plug_prime_idl_count && --vif_plug_prime_idl_count > 0) {
>>>> -        VLOG_DBG("vif_plug_run: vif_plug_prime_idl_count=%d, will not unplug "
>>>> -                 "ports in this iteration.", vif_plug_prime_idl_count);
>>>> +    bool delay_plug = daemon_started_recently();
>>>> +    if (delay_plug) {
>>>> +        VLOG_DBG("vif_plug_run: daemon started recently, will not unplug "
>>>> +                 "ports in this iteration.");
>>>>      }
>>>>
>>>>      if (!vif_plug_ctx_in->chassis_rec) {
>>>> @@ -557,7 +549,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in,
>>>>      OVSREC_INTERFACE_TABLE_FOR_EACH (iface_rec,
>>>>                                       vif_plug_ctx_in->iface_table) {
>>>>          vif_plug_handle_iface(iface_rec, vif_plug_ctx_in, vif_plug_ctx_out,
>>>> -                              !vif_plug_prime_idl_count);
>>>> +                              !delay_plug);
>>>>      }
>>>>
>>>>      struct sbrec_port_binding *target =
>>>> @@ -573,7 +565,7 @@ vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in,
>>>>          enum en_lport_type lport_type = get_lport_type(pb);
>>>>          if (lport_type == LP_VIF) {
>>>>              vif_plug_handle_lport_vif(pb, vif_plug_ctx_in, vif_plug_ctx_out,
>>>> -                                      !vif_plug_prime_idl_count);
>>>> +                                      !delay_plug);
>>>>          }
>>>>      }
>>>>      sbrec_port_binding_index_destroy_row(target);
>>>
>>> Regards,
>>> Dumitru
>>>
> 
> Otherwise this LGTM
> 

Same here, if this is the only change to the patch, feel free to add my
ack to v3:

Acked-by: Dumitru Ceara <dceara@redhat.com>
Han Zhou Aug. 19, 2022, 12:22 a.m. UTC | #5
On Wed, Aug 17, 2022 at 8:27 AM Dumitru Ceara <dceara@redhat.com> wrote:
>
> On 8/9/22 08:46, Frode Nordahl wrote:
> > On Sat, Aug 6, 2022 at 1:49 AM Han Zhou <hzhou@ovn.org> wrote:
> >>
> >>
> >>
> >> On Thu, Aug 4, 2022 at 7:09 AM Dumitru Ceara <dceara@redhat.com> wrote:
> >>>
> >>> On 7/25/22 23:34, Han Zhou wrote:
> >>>> Also remove the reset mechanism when DB is reconnected, because at DB
> >>>> reconnection the data in IDL would not reset.
> >>>>
> >>>> Signed-off-by: Han Zhou <hzhou@ovn.org>
> >>>> ---
> >>>
> >>> Hi Han,
> >>>
> >>> I noticed that with this change the "VIF plugging" test starts failing
> >>> when running with conditional monitoring disabled:
> >>>
> >>> 777: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no
> >>> -- ovn_monitor_all=yes ok
> >>> 775: ovn-controller - VIF plugging -- ovn-northd --
parallelization=yes
> >>> -- ovn_monitor_all=yes ok
> >>> 778: ovn-controller - VIF plugging -- ovn-northd -- parallelization=no
> >>> -- ovn_monitor_all=no FAILED (ovs-macros.at:255)
> >>> 776: ovn-controller - VIF plugging -- ovn-northd --
parallelization=yes
> >>> -- ovn_monitor_all=no FAILED (ovs-macros.at:255)
> >>
> >> I am terribly sorry for this. I guess I must have forgotten to run the
test or check the test result after adding this patch. No idea how that
could have happened :(
> >> This failure is because the new approach is more strict than before,
which requires 20 delay countdowns with real engine-updates instead of just
10 main loop iterations. Adding the ignore-startup-delay command fixes it:
> >>
> >> --- 8>< ---------------------------------------------------- ><8
--------------
> >> diff --git a/tests/ovn.at b/tests/ovn.at
> >> index 3ba6ced4e..e2b523b67 100644
> >> --- a/tests/ovn.at
> >> +++ b/tests/ovn.at
> >> @@ -31606,6 +31606,7 @@ OVS_WAIT_UNTIL([
> >>      test x43 = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid}
mtu_request)
> >>  ])
> >>
> >> +as hv1 check ovn-appctl -t ovn-controller debug/ignore-startup-delay
> >>  # Check that pointing requested-chassis somewhere else will unplug
the port
> >>  check ovn-nbctl --wait=hv set Logical_Switch_Port lsp1 \
> >>      options:requested-chassis=non-existent-chassis
> >> @@ -31613,6 +31614,7 @@ OVS_WAIT_UNTIL([
> >>      ! as hv1 ovs-vsctl get Interface ${iface1_uuid} _uuid
> >>  ])
> >>
> >> +as hv2 check ovn-appctl -t ovn-controller debug/ignore-startup-delay
> >>  # Check that removing an lport will unplug it
> >>  AT_CHECK([test x${iface2_uuid} = x$(as hv2 ovs-vsctl get Interface
${iface2_uuid} _uuid)], [0], [])
> >>  check ovn-nbctl --wait=hv lsp-del ${lsp2_uuid}
> >>
------------------------------------------------------------------------------------
> >> I will wait for complete review comments before sending v3 for this
fix.
> >
> > I can confirm that this fixes the failing test. For rigor I also ran a
> > version of the test that artificially generated 20 updates prior to
> > attempting to unplug anything and that was also successful, and I see
> > that both the countdown and timer works as intended.
> >
> >> Thanks,
> >> Han
> >>
> >>>
> >>>>  controller/ovn-controller.c |  1 -
> >>>>  controller/vif-plug.c       | 20 ++++++--------------
> >>>>  2 files changed, 6 insertions(+), 15 deletions(-)
> >>>>
> >>>> diff --git a/controller/ovn-controller.c
b/controller/ovn-controller.c
> >>>> index 8fc554201..8bb18fc23 100644
> >>>> --- a/controller/ovn-controller.c
> >>>> +++ b/controller/ovn-controller.c
> >>>> @@ -3874,7 +3874,6 @@ main(int argc, char *argv[])
> >>>>              if (!new_ovnsb_cond_seqno) {
> >>>>                  VLOG_INFO("OVNSB IDL reconnected, force
recompute.");
> >>>>                  engine_set_force_recompute(true);
> >>>> -                vif_plug_reset_idl_prime_counter();
> >>>>              }
> >>>>              ovnsb_cond_seqno = new_ovnsb_cond_seqno;
> >>>>          }
> >>>> diff --git a/controller/vif-plug.c b/controller/vif-plug.c
> >>>> index c6fbe7e59..38348bf54 100644
> >>>> --- a/controller/vif-plug.c
> >>>> +++ b/controller/vif-plug.c
> >>>> @@ -532,22 +532,14 @@ vif_plug_handle_iface(const struct
ovsrec_interface *iface_rec,
> >>>>   * completeness of the initial data downloading we need this
counter so that we
> >>>>   * do not erronously unplug ports because the data is just not
loaded yet.
> >>>>   */
> >>>> -#define VIF_PLUG_PRIME_IDL_COUNT_SEEED 10
> >>>> -static int vif_plug_prime_idl_count =
VIF_PLUG_PRIME_IDL_COUNT_SEEED;
> >>>> -
> >>>> -void
> >>>> -vif_plug_reset_idl_prime_counter(void)
> >>>> -{
> >>>> -    vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED;
> >>>> -}
> >>>> -
> >>>
> >>> We should remove this from the .h file too.
> >
> > +1
> >
> >>>
> >>>>  void
> >>>>  vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in,
> >>>>               struct vif_plug_ctx_out *vif_plug_ctx_out)
> >>>>  {
> >>>> -    if (vif_plug_prime_idl_count && --vif_plug_prime_idl_count > 0)
{
> >>>> -        VLOG_DBG("vif_plug_run: vif_plug_prime_idl_count=%d, will
not unplug "
> >>>> -                 "ports in this iteration.",
vif_plug_prime_idl_count);
> >>>> +    bool delay_plug = daemon_started_recently();
> >>>> +    if (delay_plug) {
> >>>> +        VLOG_DBG("vif_plug_run: daemon started recently, will not
unplug "
> >>>> +                 "ports in this iteration.");
> >>>>      }
> >>>>
> >>>>      if (!vif_plug_ctx_in->chassis_rec) {
> >>>> @@ -557,7 +549,7 @@ vif_plug_run(struct vif_plug_ctx_in
*vif_plug_ctx_in,
> >>>>      OVSREC_INTERFACE_TABLE_FOR_EACH (iface_rec,
> >>>>                                       vif_plug_ctx_in->iface_table) {
> >>>>          vif_plug_handle_iface(iface_rec, vif_plug_ctx_in,
vif_plug_ctx_out,
> >>>> -                              !vif_plug_prime_idl_count);
> >>>> +                              !delay_plug);
> >>>>      }
> >>>>
> >>>>      struct sbrec_port_binding *target =
> >>>> @@ -573,7 +565,7 @@ vif_plug_run(struct vif_plug_ctx_in
*vif_plug_ctx_in,
> >>>>          enum en_lport_type lport_type = get_lport_type(pb);
> >>>>          if (lport_type == LP_VIF) {
> >>>>              vif_plug_handle_lport_vif(pb, vif_plug_ctx_in,
vif_plug_ctx_out,
> >>>> -                                      !vif_plug_prime_idl_count);
> >>>> +                                      !delay_plug);
> >>>>          }
> >>>>      }
> >>>>      sbrec_port_binding_index_destroy_row(target);
> >>>
> >>> Regards,
> >>> Dumitru
> >>>
> >
> > Otherwise this LGTM
> >
>
> Same here, if this is the only change to the patch, feel free to add my
> ack to v3:
>
> Acked-by: Dumitru Ceara <dceara@redhat.com>
>
Thanks! Added ACK from you both in v3.

Han
diff mbox series

Patch

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 8fc554201..8bb18fc23 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -3874,7 +3874,6 @@  main(int argc, char *argv[])
             if (!new_ovnsb_cond_seqno) {
                 VLOG_INFO("OVNSB IDL reconnected, force recompute.");
                 engine_set_force_recompute(true);
-                vif_plug_reset_idl_prime_counter();
             }
             ovnsb_cond_seqno = new_ovnsb_cond_seqno;
         }
diff --git a/controller/vif-plug.c b/controller/vif-plug.c
index c6fbe7e59..38348bf54 100644
--- a/controller/vif-plug.c
+++ b/controller/vif-plug.c
@@ -532,22 +532,14 @@  vif_plug_handle_iface(const struct ovsrec_interface *iface_rec,
  * completeness of the initial data downloading we need this counter so that we
  * do not erronously unplug ports because the data is just not loaded yet.
  */
-#define VIF_PLUG_PRIME_IDL_COUNT_SEEED 10
-static int vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED;
-
-void
-vif_plug_reset_idl_prime_counter(void)
-{
-    vif_plug_prime_idl_count = VIF_PLUG_PRIME_IDL_COUNT_SEEED;
-}
-
 void
 vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in,
              struct vif_plug_ctx_out *vif_plug_ctx_out)
 {
-    if (vif_plug_prime_idl_count && --vif_plug_prime_idl_count > 0) {
-        VLOG_DBG("vif_plug_run: vif_plug_prime_idl_count=%d, will not unplug "
-                 "ports in this iteration.", vif_plug_prime_idl_count);
+    bool delay_plug = daemon_started_recently();
+    if (delay_plug) {
+        VLOG_DBG("vif_plug_run: daemon started recently, will not unplug "
+                 "ports in this iteration.");
     }
 
     if (!vif_plug_ctx_in->chassis_rec) {
@@ -557,7 +549,7 @@  vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in,
     OVSREC_INTERFACE_TABLE_FOR_EACH (iface_rec,
                                      vif_plug_ctx_in->iface_table) {
         vif_plug_handle_iface(iface_rec, vif_plug_ctx_in, vif_plug_ctx_out,
-                              !vif_plug_prime_idl_count);
+                              !delay_plug);
     }
 
     struct sbrec_port_binding *target =
@@ -573,7 +565,7 @@  vif_plug_run(struct vif_plug_ctx_in *vif_plug_ctx_in,
         enum en_lport_type lport_type = get_lport_type(pb);
         if (lport_type == LP_VIF) {
             vif_plug_handle_lport_vif(pb, vif_plug_ctx_in, vif_plug_ctx_out,
-                                      !vif_plug_prime_idl_count);
+                                      !delay_plug);
         }
     }
     sbrec_port_binding_index_destroy_row(target);