diff mbox

[ovs-dev,v2] ovn-controller: eliminate stall in ofctrl state machine

Message ID 1467937868-1864-1-git-send-email-lrichard@redhat.com
State Superseded
Headers show

Commit Message

Lance Richardson July 8, 2016, 12:31 a.m. UTC
The "ovn -- 2 HVs, 3 LRs connected via LS, static routes"
test case currently exhibits frequent failures. These failures
occur because, at the time that the test packets are sent to
verify forwarding, no flows have been installed in the vswitch
for one of the hypervisors.

Investigation shows that, in the failing case, the ofctrl state
machine has not yet transitioned to the S_UPDATE_FLOWS state.
This occurrs when ofctrl_run() is called and:
   1) The state is S_TLV_TABLE_MOD_SENT.
   2) An OFPTYPE_NXT_TLV_TABLE_REPLY message is queued for reception.
   3) No event (other than SB probe timer expiration) is expected
      that would unblock poll_block() in the main ovn-controller
      loop.

In this scenario, ofctrl_run() will move state to S_CLEAR_FLOWS
and return, without having executed run_S_CLEAR_FLOWS() which
would have immediately transitioned the state to S_UPDATE_FLOWS
which is needed in order for ovn-controller to configure flows
in ovs-vswitchd. After a delay of about 5 seconds (the default
SB probe timer interval), ofctrl_run() would be called again
to make the transition to S_UPDATE_FLOWS, but by this time
the test case has already failed.

Fix by expanding the state machine's "while state != old_state"
loop to include processing of receive events, with a maximum
iteration limit to prevent excessive looping in pathological
cases. Without this fix, around 40 failures are seen out of
100 attempts, with this fix no failures have been observed in
several hundred attempts.

Signed-off-by: Lance Richardson <lrichard@redhat.com>
---
 v2: Added maximum iteration limit to state machine loop.

 ovn/controller/ofctrl.c | 50 ++++++++++++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 23 deletions(-)

Comments

Lance Richardson July 8, 2016, 12:33 a.m. UTC | #1
Oops, had intended to cc: Justin.
----- Original Message -----
> From: "Lance Richardson" <lrichard@redhat.com>
> To: dev@openvswitch.org
> Sent: Thursday, July 7, 2016 8:31:08 PM
> Subject: [ovs-dev] [PATCH v2] ovn-controller: eliminate stall in ofctrl	state machine
> 
> The "ovn -- 2 HVs, 3 LRs connected via LS, static routes"
> test case currently exhibits frequent failures. These failures
> occur because, at the time that the test packets are sent to
> verify forwarding, no flows have been installed in the vswitch
> for one of the hypervisors.
> 
> Investigation shows that, in the failing case, the ofctrl state
> machine has not yet transitioned to the S_UPDATE_FLOWS state.
> This occurrs when ofctrl_run() is called and:
>    1) The state is S_TLV_TABLE_MOD_SENT.
>    2) An OFPTYPE_NXT_TLV_TABLE_REPLY message is queued for reception.
>    3) No event (other than SB probe timer expiration) is expected
>       that would unblock poll_block() in the main ovn-controller
>       loop.
> 
> In this scenario, ofctrl_run() will move state to S_CLEAR_FLOWS
> and return, without having executed run_S_CLEAR_FLOWS() which
> would have immediately transitioned the state to S_UPDATE_FLOWS
> which is needed in order for ovn-controller to configure flows
> in ovs-vswitchd. After a delay of about 5 seconds (the default
> SB probe timer interval), ofctrl_run() would be called again
> to make the transition to S_UPDATE_FLOWS, but by this time
> the test case has already failed.
> 
> Fix by expanding the state machine's "while state != old_state"
> loop to include processing of receive events, with a maximum
> iteration limit to prevent excessive looping in pathological
> cases. Without this fix, around 40 failures are seen out of
> 100 attempts, with this fix no failures have been observed in
> several hundred attempts.
> 
> Signed-off-by: Lance Richardson <lrichard@redhat.com>
> ---
>  v2: Added maximum iteration limit to state machine loop.
> 
>  ovn/controller/ofctrl.c | 50
>  ++++++++++++++++++++++++++-----------------------
>  1 file changed, 27 insertions(+), 23 deletions(-)
> 
> diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
> index 4c410da..fa23af0 100644
> --- a/ovn/controller/ofctrl.c
> +++ b/ovn/controller/ofctrl.c
> @@ -88,6 +88,9 @@ enum ofctrl_state {
>  /* Current state. */
>  static enum ofctrl_state state;
>  
> +/* Maximum number of state machine iterations per invocation*/
> +#define OFCTRL_SM_ITER_MAX 10
> +
>  /* Transaction IDs for messages in flight to the switch. */
>  static ovs_be32 xid, xid2;
>  
> @@ -401,6 +404,7 @@ ofctrl_run(const struct ovsrec_bridge *br_int)
>      }
>  
>      enum ofctrl_state old_state;
> +    int count = 0;
>      do {
>          old_state = state;
>          switch (state) {
> @@ -410,36 +414,36 @@ ofctrl_run(const struct ovsrec_bridge *br_int)
>          default:
>              OVS_NOT_REACHED();
>          }
> -    } while (state != old_state);
>  
> -    for (int i = 0; state == old_state && i < 50; i++) {
> -        struct ofpbuf *msg = rconn_recv(swconn);
> -        if (!msg) {
> -            break;
> -        }
> +        for (int i = 0; state == old_state && i < 50; i++) {
> +            struct ofpbuf *msg = rconn_recv(swconn);
> +            if (!msg) {
> +                break;
> +            }
>  
> -        const struct ofp_header *oh = msg->data;
> -        enum ofptype type;
> -        enum ofperr error;
> +            const struct ofp_header *oh = msg->data;
> +            enum ofptype type;
> +            enum ofperr error;
>  
> -        error = ofptype_decode(&type, oh);
> -        if (!error) {
> -            switch (state) {
> +            error = ofptype_decode(&type, oh);
> +            if (!error) {
> +                switch (state) {
>  #define STATE(NAME) case NAME: recv_##NAME(oh, type); break;
> -                STATES
> +                    STATES
>  #undef STATE
> -            default:
> -                OVS_NOT_REACHED();
> +                default:
> +                    OVS_NOT_REACHED();
> +                }
> +            } else {
> +                char *s = ofp_to_string(oh, ntohs(oh->length), 1);
> +                VLOG_WARN("could not decode OpenFlow message (%s): %s",
> +                          ofperr_to_string(error), s);
> +                free(s);
>              }
> -        } else {
> -            char *s = ofp_to_string(oh, ntohs(oh->length), 1);
> -            VLOG_WARN("could not decode OpenFlow message (%s): %s",
> -                      ofperr_to_string(error), s);
> -            free(s);
> -        }
>  
> -        ofpbuf_delete(msg);
> -    }
> +            ofpbuf_delete(msg);
> +        }
> +    } while (state != old_state && count++ < OFCTRL_SM_ITER_MAX);
>  
>      return (state == S_CLEAR_FLOWS || state == S_UPDATE_FLOWS
>              ? mff_ovn_geneve : 0);
> --
> 2.5.5
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
Lance Richardson July 9, 2016, 4:29 p.m. UTC | #2
+ Ryan Moats
----- Original Message -----
> From: "Lance Richardson" <lrichard@redhat.com>
> To: dev@openvswitch.org
> Cc: "Justin Pettit" <jpettit@ovn.org>
> Sent: Thursday, July 7, 2016 8:33:57 PM
> Subject: Re: [ovs-dev] [PATCH v2] ovn-controller: eliminate stall in ofctrl	state machine
> 
> Oops, had intended to cc: Justin.
> ----- Original Message -----
> > From: "Lance Richardson" <lrichard@redhat.com>
> > To: dev@openvswitch.org
> > Sent: Thursday, July 7, 2016 8:31:08 PM
> > Subject: [ovs-dev] [PATCH v2] ovn-controller: eliminate stall in ofctrl
> > 	state machine
> > 
> > The "ovn -- 2 HVs, 3 LRs connected via LS, static routes"
> > test case currently exhibits frequent failures. These failures
> > occur because, at the time that the test packets are sent to
> > verify forwarding, no flows have been installed in the vswitch
> > for one of the hypervisors.
> > 
> > Investigation shows that, in the failing case, the ofctrl state
> > machine has not yet transitioned to the S_UPDATE_FLOWS state.
> > This occurrs when ofctrl_run() is called and:
> >    1) The state is S_TLV_TABLE_MOD_SENT.
> >    2) An OFPTYPE_NXT_TLV_TABLE_REPLY message is queued for reception.
> >    3) No event (other than SB probe timer expiration) is expected
> >       that would unblock poll_block() in the main ovn-controller
> >       loop.
> > 
> > In this scenario, ofctrl_run() will move state to S_CLEAR_FLOWS
> > and return, without having executed run_S_CLEAR_FLOWS() which
> > would have immediately transitioned the state to S_UPDATE_FLOWS
> > which is needed in order for ovn-controller to configure flows
> > in ovs-vswitchd. After a delay of about 5 seconds (the default
> > SB probe timer interval), ofctrl_run() would be called again
> > to make the transition to S_UPDATE_FLOWS, but by this time
> > the test case has already failed.
> > 
> > Fix by expanding the state machine's "while state != old_state"
> > loop to include processing of receive events, with a maximum
> > iteration limit to prevent excessive looping in pathological
> > cases. Without this fix, around 40 failures are seen out of
> > 100 attempts, with this fix no failures have been observed in
> > several hundred attempts.
> > 
> > Signed-off-by: Lance Richardson <lrichard@redhat.com>
> > ---
> >  v2: Added maximum iteration limit to state machine loop.
> > 
> >  ovn/controller/ofctrl.c | 50
> >  ++++++++++++++++++++++++++-----------------------
> >  1 file changed, 27 insertions(+), 23 deletions(-)
> > 
> > diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
> > index 4c410da..fa23af0 100644
> > --- a/ovn/controller/ofctrl.c
> > +++ b/ovn/controller/ofctrl.c
> > @@ -88,6 +88,9 @@ enum ofctrl_state {
> >  /* Current state. */
> >  static enum ofctrl_state state;
> >  
> > +/* Maximum number of state machine iterations per invocation*/
> > +#define OFCTRL_SM_ITER_MAX 10
> > +
> >  /* Transaction IDs for messages in flight to the switch. */
> >  static ovs_be32 xid, xid2;
> >  
> > @@ -401,6 +404,7 @@ ofctrl_run(const struct ovsrec_bridge *br_int)
> >      }
> >  
> >      enum ofctrl_state old_state;
> > +    int count = 0;
> >      do {
> >          old_state = state;
> >          switch (state) {
> > @@ -410,36 +414,36 @@ ofctrl_run(const struct ovsrec_bridge *br_int)
> >          default:
> >              OVS_NOT_REACHED();
> >          }
> > -    } while (state != old_state);
> >  
> > -    for (int i = 0; state == old_state && i < 50; i++) {
> > -        struct ofpbuf *msg = rconn_recv(swconn);
> > -        if (!msg) {
> > -            break;
> > -        }
> > +        for (int i = 0; state == old_state && i < 50; i++) {
> > +            struct ofpbuf *msg = rconn_recv(swconn);
> > +            if (!msg) {
> > +                break;
> > +            }
> >  
> > -        const struct ofp_header *oh = msg->data;
> > -        enum ofptype type;
> > -        enum ofperr error;
> > +            const struct ofp_header *oh = msg->data;
> > +            enum ofptype type;
> > +            enum ofperr error;
> >  
> > -        error = ofptype_decode(&type, oh);
> > -        if (!error) {
> > -            switch (state) {
> > +            error = ofptype_decode(&type, oh);
> > +            if (!error) {
> > +                switch (state) {
> >  #define STATE(NAME) case NAME: recv_##NAME(oh, type); break;
> > -                STATES
> > +                    STATES
> >  #undef STATE
> > -            default:
> > -                OVS_NOT_REACHED();
> > +                default:
> > +                    OVS_NOT_REACHED();
> > +                }
> > +            } else {
> > +                char *s = ofp_to_string(oh, ntohs(oh->length), 1);
> > +                VLOG_WARN("could not decode OpenFlow message (%s): %s",
> > +                          ofperr_to_string(error), s);
> > +                free(s);
> >              }
> > -        } else {
> > -            char *s = ofp_to_string(oh, ntohs(oh->length), 1);
> > -            VLOG_WARN("could not decode OpenFlow message (%s): %s",
> > -                      ofperr_to_string(error), s);
> > -            free(s);
> > -        }
> >  
> > -        ofpbuf_delete(msg);
> > -    }
> > +            ofpbuf_delete(msg);
> > +        }
> > +    } while (state != old_state && count++ < OFCTRL_SM_ITER_MAX);
> >  
> >      return (state == S_CLEAR_FLOWS || state == S_UPDATE_FLOWS
> >              ? mff_ovn_geneve : 0);
> > --
> > 2.5.5
> > 
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > http://openvswitch.org/mailman/listinfo/dev
> > 
>
Ryan Moats July 11, 2016, 5:44 p.m. UTC | #3
Lance Richardson <lrichard@redhat.com> wrote on 07/09/2016 11:29:08 AM:

> From: Lance Richardson <lrichard@redhat.com>
> To: dev@openvswitch.org
> Cc: Justin Pettit <jpettit@ovn.org>, Ryan Moats/Omaha/IBM@IBMUS
> Date: 07/09/2016 11:29 AM
> Subject: Re: [ovs-dev] [PATCH v2] ovn-controller: eliminate stall in
> ofctrl state machine
>
> + Ryan Moats
> ----- Original Message -----
> > From: "Lance Richardson" <lrichard@redhat.com>
> > To: dev@openvswitch.org
> > Cc: "Justin Pettit" <jpettit@ovn.org>
> > Sent: Thursday, July 7, 2016 8:33:57 PM
> > Subject: Re: [ovs-dev] [PATCH v2] ovn-controller: eliminate stall
> in ofctrl   state machine
> >
> > Oops, had intended to cc: Justin.
> > ----- Original Message -----
> > > From: "Lance Richardson" <lrichard@redhat.com>
> > > To: dev@openvswitch.org
> > > Sent: Thursday, July 7, 2016 8:31:08 PM
> > > Subject: [ovs-dev] [PATCH v2] ovn-controller: eliminate stall in
ofctrl
> > >    state machine
> > >
> > > The "ovn -- 2 HVs, 3 LRs connected via LS, static routes"
> > > test case currently exhibits frequent failures. These failures
> > > occur because, at the time that the test packets are sent to
> > > verify forwarding, no flows have been installed in the vswitch
> > > for one of the hypervisors.
> > >
> > > Investigation shows that, in the failing case, the ofctrl state
> > > machine has not yet transitioned to the S_UPDATE_FLOWS state.
> > > This occurrs when ofctrl_run() is called and:
> > >    1) The state is S_TLV_TABLE_MOD_SENT.
> > >    2) An OFPTYPE_NXT_TLV_TABLE_REPLY message is queued for reception.
> > >    3) No event (other than SB probe timer expiration) is expected
> > >       that would unblock poll_block() in the main ovn-controller
> > >       loop.
> > >
> > > In this scenario, ofctrl_run() will move state to S_CLEAR_FLOWS
> > > and return, without having executed run_S_CLEAR_FLOWS() which
> > > would have immediately transitioned the state to S_UPDATE_FLOWS
> > > which is needed in order for ovn-controller to configure flows
> > > in ovs-vswitchd. After a delay of about 5 seconds (the default
> > > SB probe timer interval), ofctrl_run() would be called again
> > > to make the transition to S_UPDATE_FLOWS, but by this time
> > > the test case has already failed.
> > >
> > > Fix by expanding the state machine's "while state != old_state"
> > > loop to include processing of receive events, with a maximum
> > > iteration limit to prevent excessive looping in pathological
> > > cases. Without this fix, around 40 failures are seen out of
> > > 100 attempts, with this fix no failures have been observed in
> > > several hundred attempts.
> > >
> > > Signed-off-by: Lance Richardson <lrichard@redhat.com>

I've run this through the same testing that I did with v1 (i.e.
rally to create 10 ports on each of 15 networks on a four-node
devstack cloud running tip of tree master everywhere):

- When I only have 3 projects (tenants), I get a statistically
  significant 6% reduction in the port_create API time measured
  by rally.

- When I have 15 projects, then the reduction in API time is
  almost (but not quite) statistically significant.

I consider the first result to be a good enough reason for an
ack and tested by, because I view it as helping under scale
(for OpenStack, the port density per project/tenant is a key
driver, and so patches that help when that is higher are good
things).

Acked-by: Ryan Moats <rmoats@us.ibm.com>
Tested-by: Ryan Moats <rmoats@us.ibm.com>


Having said that
Lance Richardson July 12, 2016, 2:10 a.m. UTC | #4
----- Original Message -----
> From: "Ryan Moats" <rmoats@us.ibm.com>
> To: "Lance Richardson" <lrichard@redhat.com>
> Cc: dev@openvswitch.org, "Justin Pettit" <jpettit@ovn.org>
> Sent: Monday, July 11, 2016 1:44:18 PM
> Subject: Re: [ovs-dev] [PATCH v2] ovn-controller: eliminate stall in ofctrl	state machine
> 
> Lance Richardson <lrichard@redhat.com> wrote on 07/09/2016 11:29:08 AM:
> 
> > From: Lance Richardson <lrichard@redhat.com>
> > To: dev@openvswitch.org
> > Cc: Justin Pettit <jpettit@ovn.org>, Ryan Moats/Omaha/IBM@IBMUS
> > Date: 07/09/2016 11:29 AM
> > Subject: Re: [ovs-dev] [PATCH v2] ovn-controller: eliminate stall in
> > ofctrl state machine
> >
> > + Ryan Moats
> > ----- Original Message -----
> > > From: "Lance Richardson" <lrichard@redhat.com>
> > > To: dev@openvswitch.org
> > > Cc: "Justin Pettit" <jpettit@ovn.org>
> > > Sent: Thursday, July 7, 2016 8:33:57 PM
> > > Subject: Re: [ovs-dev] [PATCH v2] ovn-controller: eliminate stall
> > in ofctrl   state machine
> > >
> > > Oops, had intended to cc: Justin.
> > > ----- Original Message -----
> > > > From: "Lance Richardson" <lrichard@redhat.com>
> > > > To: dev@openvswitch.org
> > > > Sent: Thursday, July 7, 2016 8:31:08 PM
> > > > Subject: [ovs-dev] [PATCH v2] ovn-controller: eliminate stall in
> ofctrl
> > > >    state machine
> > > >
> > > > The "ovn -- 2 HVs, 3 LRs connected via LS, static routes"
> > > > test case currently exhibits frequent failures. These failures
> > > > occur because, at the time that the test packets are sent to
> > > > verify forwarding, no flows have been installed in the vswitch
> > > > for one of the hypervisors.
> > > >
> > > > Investigation shows that, in the failing case, the ofctrl state
> > > > machine has not yet transitioned to the S_UPDATE_FLOWS state.
> > > > This occurrs when ofctrl_run() is called and:
> > > >    1) The state is S_TLV_TABLE_MOD_SENT.
> > > >    2) An OFPTYPE_NXT_TLV_TABLE_REPLY message is queued for reception.
> > > >    3) No event (other than SB probe timer expiration) is expected
> > > >       that would unblock poll_block() in the main ovn-controller
> > > >       loop.
> > > >
> > > > In this scenario, ofctrl_run() will move state to S_CLEAR_FLOWS
> > > > and return, without having executed run_S_CLEAR_FLOWS() which
> > > > would have immediately transitioned the state to S_UPDATE_FLOWS
> > > > which is needed in order for ovn-controller to configure flows
> > > > in ovs-vswitchd. After a delay of about 5 seconds (the default
> > > > SB probe timer interval), ofctrl_run() would be called again
> > > > to make the transition to S_UPDATE_FLOWS, but by this time
> > > > the test case has already failed.
> > > >
> > > > Fix by expanding the state machine's "while state != old_state"
> > > > loop to include processing of receive events, with a maximum
> > > > iteration limit to prevent excessive looping in pathological
> > > > cases. Without this fix, around 40 failures are seen out of
> > > > 100 attempts, with this fix no failures have been observed in
> > > > several hundred attempts.
> > > >
> > > > Signed-off-by: Lance Richardson <lrichard@redhat.com>
> 
> I've run this through the same testing that I did with v1 (i.e.
> rally to create 10 ports on each of 15 networks on a four-node
> devstack cloud running tip of tree master everywhere):
> 
> - When I only have 3 projects (tenants), I get a statistically
>   significant 6% reduction in the port_create API time measured
>   by rally.
> 
> - When I have 15 projects, then the reduction in API time is
>   almost (but not quite) statistically significant.
> 
> I consider the first result to be a good enough reason for an
> ack and tested by, because I view it as helping under scale
> (for OpenStack, the port density per project/tenant is a key
> driver, and so patches that help when that is higher are good
> things).
> 
> Acked-by: Ryan Moats <rmoats@us.ibm.com>
> Tested-by: Ryan Moats <rmoats@us.ibm.com>
> 
> 
> Having said that
> 

Thanks for reviewing and testing (again), Ryan.

Justin, did you have any thoughts on this second approach?

Thanks,

   Lance
Ben Pfaff July 22, 2016, 10:47 p.m. UTC | #5
On Thu, Jul 07, 2016 at 08:31:08PM -0400, Lance Richardson wrote:
> The "ovn -- 2 HVs, 3 LRs connected via LS, static routes"
> test case currently exhibits frequent failures. These failures
> occur because, at the time that the test packets are sent to
> verify forwarding, no flows have been installed in the vswitch
> for one of the hypervisors.
> 
> Investigation shows that, in the failing case, the ofctrl state
> machine has not yet transitioned to the S_UPDATE_FLOWS state.
> This occurrs when ofctrl_run() is called and:
>    1) The state is S_TLV_TABLE_MOD_SENT.
>    2) An OFPTYPE_NXT_TLV_TABLE_REPLY message is queued for reception.
>    3) No event (other than SB probe timer expiration) is expected
>       that would unblock poll_block() in the main ovn-controller
>       loop.
> 
> In this scenario, ofctrl_run() will move state to S_CLEAR_FLOWS
> and return, without having executed run_S_CLEAR_FLOWS() which
> would have immediately transitioned the state to S_UPDATE_FLOWS
> which is needed in order for ovn-controller to configure flows
> in ovs-vswitchd. After a delay of about 5 seconds (the default
> SB probe timer interval), ofctrl_run() would be called again
> to make the transition to S_UPDATE_FLOWS, but by this time
> the test case has already failed.
> 
> Fix by expanding the state machine's "while state != old_state"
> loop to include processing of receive events, with a maximum
> iteration limit to prevent excessive looping in pathological
> cases. Without this fix, around 40 failures are seen out of
> 100 attempts, with this fix no failures have been observed in
> several hundred attempts.
> 
> Signed-off-by: Lance Richardson <lrichard@redhat.com>
> ---
>  v2: Added maximum iteration limit to state machine loop.

Thanks for investigating this.

I understand why your patch fixes a problem, but I don't yet understand
the root cause of the problem, and I'd like to know more before I commit
the patch.  I have two questions:

First, I don't understand why, if there's a message queued for reception
as you describe in 2), the main loop would wait for a timer expiration.
Before poll_block(), the main loop should call ofctrl_wait(), which
calls rconn_recv_wait(), which should cause poll_block() to wake up if
there's anything incoming from the OpenFlow connection.

Second, I don't know why the test "state == old_state" is here in
ofctrl_run().  I think that we can delete it.  It could be a culprit,
but on the other hand it seems like it's not a big deal if poll_block()
properly wakes up when a message is received:

    for (int i = 0; state == old_state && i < 50; i++) {
        struct ofpbuf *msg = rconn_recv(swconn);
        if (!msg) {
            break;
        }

Thanks,

Ben.
Lance Richardson July 23, 2016, 3:04 a.m. UTC | #6
----- Original Message -----
> From: "Ben Pfaff" <blp@ovn.org>
> To: "Lance Richardson" <lrichard@redhat.com>
> Cc: dev@openvswitch.org
> Sent: Friday, July 22, 2016 6:47:14 PM
> Subject: Re: [ovs-dev] [PATCH v2] ovn-controller: eliminate stall in ofctrl state machine
> 
> On Thu, Jul 07, 2016 at 08:31:08PM -0400, Lance Richardson wrote:
> > The "ovn -- 2 HVs, 3 LRs connected via LS, static routes"
> > test case currently exhibits frequent failures. These failures
> > occur because, at the time that the test packets are sent to
> > verify forwarding, no flows have been installed in the vswitch
> > for one of the hypervisors.
> > 
> > Investigation shows that, in the failing case, the ofctrl state
> > machine has not yet transitioned to the S_UPDATE_FLOWS state.
> > This occurrs when ofctrl_run() is called and:
> >    1) The state is S_TLV_TABLE_MOD_SENT.
> >    2) An OFPTYPE_NXT_TLV_TABLE_REPLY message is queued for reception.
> >    3) No event (other than SB probe timer expiration) is expected
> >       that would unblock poll_block() in the main ovn-controller
> >       loop.
> > 
> > In this scenario, ofctrl_run() will move state to S_CLEAR_FLOWS
> > and return, without having executed run_S_CLEAR_FLOWS() which
> > would have immediately transitioned the state to S_UPDATE_FLOWS
> > which is needed in order for ovn-controller to configure flows
> > in ovs-vswitchd. After a delay of about 5 seconds (the default
> > SB probe timer interval), ofctrl_run() would be called again
> > to make the transition to S_UPDATE_FLOWS, but by this time
> > the test case has already failed.
> > 
> > Fix by expanding the state machine's "while state != old_state"
> > loop to include processing of receive events, with a maximum
> > iteration limit to prevent excessive looping in pathological
> > cases. Without this fix, around 40 failures are seen out of
> > 100 attempts, with this fix no failures have been observed in
> > several hundred attempts.
> > 
> > Signed-off-by: Lance Richardson <lrichard@redhat.com>
> > ---
> >  v2: Added maximum iteration limit to state machine loop.
> 
> Thanks for investigating this.
> 
> I understand why your patch fixes a problem, but I don't yet understand
> the root cause of the problem, and I'd like to know more before I commit
> the patch.  I have two questions:
> 
> First, I don't understand why, if there's a message queued for reception
> as you describe in 2), the main loop would wait for a timer expiration.
> Before poll_block(), the main loop should call ofctrl_wait(), which
> calls rconn_recv_wait(), which should cause poll_block() to wake up if
> there's anything incoming from the OpenFlow connection.
> 
> Second, I don't know why the test "state == old_state" is here in
> ofctrl_run().  I think that we can delete it.  It could be a culprit,
> but on the other hand it seems like it's not a big deal if poll_block()
> properly wakes up when a message is received:
> 
>     for (int i = 0; state == old_state && i < 50; i++) {
>         struct ofpbuf *msg = rconn_recv(swconn);
>         if (!msg) {
>             break;
>         }
> 
> Thanks,
> 
> Ben.
> 
As currently implemented, when ofctrl_run() is called,
the run_*() functions will be called to transition through states that
don't depend on external events until the state no longer changes, then
any pending receive events are processed until the arbitrary limit of 50
events is processed, all receive events have been processed, or the state
changes (which does seem odd).

<This is the important bit:>
If the receive event handling happens to transition to a state for which
receive events are not expected, i.e. where the run_*() needs to be called
in order to transition to the next state, then the ofctrl_run() will return
and the transition will not be made until an event (timer expiration or receive) occurs.

So the "message queued for reception" I referred to in the description is the
one that transitions into S_CLEAR_FLOWS state.  ofctrl_run() returns after
making this transition, despite the fact there is no receive event expected which would
cause ofctrl_run() to be called again. The state machine is stuck
in this state until the next event (timer expiration) occurs to unblock
the poll loop and cause ofctrl_run() to be called again.  When ofctrl_run()
is called, the run_S_CLEAR_FLOWS() function immediately transitions into
S_UPDATE_FLOWS state.

Another way to describe this: if a receive event causes a state transition,
the run_*() function for the new state needs to be called in order to
execute the actions for the new state (including possibly transitioning
into another state).  If this doesn't occur, the state machine stalls
until an event occurs to unblock the poll loop.

I can't say I'm particularly happy with the solution proposed in my patch,
there's probably a better way. If the above explanation still doesn't help,
let me know and I'll make another attempt when I'm a little less tired ;-)

   Lance
diff mbox

Patch

diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
index 4c410da..fa23af0 100644
--- a/ovn/controller/ofctrl.c
+++ b/ovn/controller/ofctrl.c
@@ -88,6 +88,9 @@  enum ofctrl_state {
 /* Current state. */
 static enum ofctrl_state state;
 
+/* Maximum number of state machine iterations per invocation*/
+#define OFCTRL_SM_ITER_MAX 10
+
 /* Transaction IDs for messages in flight to the switch. */
 static ovs_be32 xid, xid2;
 
@@ -401,6 +404,7 @@  ofctrl_run(const struct ovsrec_bridge *br_int)
     }
 
     enum ofctrl_state old_state;
+    int count = 0;
     do {
         old_state = state;
         switch (state) {
@@ -410,36 +414,36 @@  ofctrl_run(const struct ovsrec_bridge *br_int)
         default:
             OVS_NOT_REACHED();
         }
-    } while (state != old_state);
 
-    for (int i = 0; state == old_state && i < 50; i++) {
-        struct ofpbuf *msg = rconn_recv(swconn);
-        if (!msg) {
-            break;
-        }
+        for (int i = 0; state == old_state && i < 50; i++) {
+            struct ofpbuf *msg = rconn_recv(swconn);
+            if (!msg) {
+                break;
+            }
 
-        const struct ofp_header *oh = msg->data;
-        enum ofptype type;
-        enum ofperr error;
+            const struct ofp_header *oh = msg->data;
+            enum ofptype type;
+            enum ofperr error;
 
-        error = ofptype_decode(&type, oh);
-        if (!error) {
-            switch (state) {
+            error = ofptype_decode(&type, oh);
+            if (!error) {
+                switch (state) {
 #define STATE(NAME) case NAME: recv_##NAME(oh, type); break;
-                STATES
+                    STATES
 #undef STATE
-            default:
-                OVS_NOT_REACHED();
+                default:
+                    OVS_NOT_REACHED();
+                }
+            } else {
+                char *s = ofp_to_string(oh, ntohs(oh->length), 1);
+                VLOG_WARN("could not decode OpenFlow message (%s): %s",
+                          ofperr_to_string(error), s);
+                free(s);
             }
-        } else {
-            char *s = ofp_to_string(oh, ntohs(oh->length), 1);
-            VLOG_WARN("could not decode OpenFlow message (%s): %s",
-                      ofperr_to_string(error), s);
-            free(s);
-        }
 
-        ofpbuf_delete(msg);
-    }
+            ofpbuf_delete(msg);
+        }
+    } while (state != old_state && count++ < OFCTRL_SM_ITER_MAX);
 
     return (state == S_CLEAR_FLOWS || state == S_UPDATE_FLOWS
             ? mff_ovn_geneve : 0);