Message ID | 1606225883-17192-1-git-send-email-dceara@redhat.com |
---|---|
State | Superseded |
Headers | show |
Series | [ovs-dev,v3] ovn-controller: Fix nb_cfg update with monitor_cond_change in flight. | expand |
On Tue, Nov 24, 2020 at 5:51 AM Dumitru Ceara <dceara@redhat.com> wrote: > > It is not correct for ovn-controller to pass the current SB_Global.nb_cfg > value to ofctrl_put() if there are pending changes to conditional > monitoring clauses (local or in flight). It might be that after the > monitor condition is acked by the SB, records that were added to the SB > before SB_Global.nb_cfg was set are now sent as updates to > ovn-controller. These should be first installed in OVS before > ovn-controller reports that it caught up with the current > SB_Global.nb_cfg value. > > Also: > - ofctrl_put should not advance cur_cfg if there are flow updates in > flight. > - ofctrl_put should be called after SB monitor conditions are updated. > > Fixes: ca278d98a4f5 ("ovn-controller: Initial use of incremental engine - quiet mode.") For my understanding, the above commit didn't introduce the problem. The monitor condition change was never considered in nb_cfg update before, so I think it is a day-one issue. I was aware of the problem/limitation of the nb_cfg mechanism but I didn't think of such a solution. I like the way this patch is addressing it! Or maybe the "Fixes" refers to this part of the patch: "ofctrl_put should not advance cur_cfg if there are flow updates in flight". That commit is the culprit for this. I'd suggest using a separate patch because these are totally different problems. > Acked-by: Ben Pfaff <blp@ovn.org> > Signed-off-by: Dumitru Ceara <dceara@redhat.com> > > --- > V3: > - move ofctrl_put() call after updating SB monitor conditions. > - added Ben's ack. > v2: > - use new IDL *set_condition() return value. > - fix ofctrl_put to not advance cur_cfg if there are flow updates in > flight. > --- > controller/ofctrl.c | 2 +- > controller/ovn-controller.c | 91 ++++++++++++++++++++++++++++++--------------- > 2 files changed, 62 insertions(+), 31 deletions(-) > > diff --git a/controller/ofctrl.c b/controller/ofctrl.c > index c1bbc58..eac5305 100644 > --- a/controller/ofctrl.c > +++ b/controller/ofctrl.c > @@ -2034,7 +2034,7 @@ ofctrl_put(struct ovn_desired_flow_table *flow_table, > need_put = true; > } else if (nb_cfg != old_nb_cfg) { > /* nb_cfg changed since last ofctrl_put() call */ > - if (cur_cfg == old_nb_cfg) { > + if (cur_cfg == old_nb_cfg && ovs_list_is_empty(&flow_updates)) { Good catch! However, in the "else" branch it also sets the need_put to true, which shouldn't be affected by the condition ovs_list_is_empty(&flow_updates). Could you revise it to make the nb_cfg update accurate while not impacting the need_put logic? Consider my conditional ack for the in-flight monitor condition change part of this patch if you would split the in-flight flow-updates as a separate patch. Acked-by: Han Zhou <hzhou@ovn.org> Thanks, Han > /* we were up-to-date already, so just update with the > * new nb_cfg */ > cur_cfg = nb_cfg; > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > index b0f1975..46589e4 100644 > --- a/controller/ovn-controller.c > +++ b/controller/ovn-controller.c > @@ -133,7 +133,7 @@ get_bridge(const struct ovsrec_bridge_table *bridge_table, const char *br_name) > return NULL; > } > > -static void > +static unsigned int > update_sb_monitors(struct ovsdb_idl *ovnsb_idl, > const struct sbrec_chassis *chassis, > const struct sset *local_ifaces, > @@ -239,16 +239,24 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, > } > } > > -out: > - sbrec_port_binding_set_condition(ovnsb_idl, &pb); > - sbrec_logical_flow_set_condition(ovnsb_idl, &lf); > - sbrec_mac_binding_set_condition(ovnsb_idl, &mb); > - sbrec_multicast_group_set_condition(ovnsb_idl, &mg); > - sbrec_dns_set_condition(ovnsb_idl, &dns); > - sbrec_controller_event_set_condition(ovnsb_idl, &ce); > - sbrec_ip_multicast_set_condition(ovnsb_idl, &ip_mcast); > - sbrec_igmp_group_set_condition(ovnsb_idl, &igmp); > - sbrec_chassis_private_set_condition(ovnsb_idl, &chprv); > +out:; > + unsigned int cond_seqnos[] = { > + sbrec_port_binding_set_condition(ovnsb_idl, &pb), > + sbrec_logical_flow_set_condition(ovnsb_idl, &lf), > + sbrec_mac_binding_set_condition(ovnsb_idl, &mb), > + sbrec_multicast_group_set_condition(ovnsb_idl, &mg), > + sbrec_dns_set_condition(ovnsb_idl, &dns), > + sbrec_controller_event_set_condition(ovnsb_idl, &ce), > + sbrec_ip_multicast_set_condition(ovnsb_idl, &ip_mcast), > + sbrec_igmp_group_set_condition(ovnsb_idl, &igmp), > + sbrec_chassis_private_set_condition(ovnsb_idl, &chprv), > + }; > + > + unsigned int expected_cond_seqno = 0; > + for (size_t i = 0; i < ARRAY_SIZE(cond_seqnos); i++) { > + expected_cond_seqno = MAX(expected_cond_seqno, cond_seqnos[i]); > + } > + > ovsdb_idl_condition_destroy(&pb); > ovsdb_idl_condition_destroy(&lf); > ovsdb_idl_condition_destroy(&mb); > @@ -258,6 +266,7 @@ out: > ovsdb_idl_condition_destroy(&ip_mcast); > ovsdb_idl_condition_destroy(&igmp); > ovsdb_idl_condition_destroy(&chprv); > + return expected_cond_seqno; > } > > static const char * > @@ -491,7 +500,7 @@ get_ofctrl_probe_interval(struct ovsdb_idl *ovs_idl) > static void > update_sb_db(struct ovsdb_idl *ovs_idl, struct ovsdb_idl *ovnsb_idl, > bool *monitor_all_p, bool *reset_ovnsb_idl_min_index, > - bool *enable_lflow_cache) > + bool *enable_lflow_cache, unsigned int *sb_cond_seqno) > { > const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(ovs_idl); > if (!cfg) { > @@ -516,7 +525,11 @@ update_sb_db(struct ovsdb_idl *ovs_idl, struct ovsdb_idl *ovnsb_idl, > * Otherwise, don't call it here, because there would be unnecessary > * extra cost. Instead, it is called after the engine execution only > * when it is necessary. */ > - update_sb_monitors(ovnsb_idl, NULL, NULL, NULL, true); > + unsigned int next_cond_seqno = > + update_sb_monitors(ovnsb_idl, NULL, NULL, NULL, true); > + if (sb_cond_seqno) { > + *sb_cond_seqno = next_cond_seqno; > + } > } > if (monitor_all_p) { > *monitor_all_p = monitor_all; > @@ -803,11 +816,23 @@ restore_ct_zones(const struct ovsrec_bridge_table *bridge_table, > } > > static int64_t > -get_nb_cfg(const struct sbrec_sb_global_table *sb_global_table) > +get_nb_cfg(const struct sbrec_sb_global_table *sb_global_table, > + unsigned int cond_seqno, unsigned int expected_cond_seqno) > { > + static int64_t nb_cfg = 0; > + > + /* Delay getting nb_cfg if there are monitor condition changes > + * in flight. It might be that those changes would instruct the > + * server to send updates that happened before SB_Global.nb_cfg. > + */ > + if (cond_seqno != expected_cond_seqno) { > + return nb_cfg; > + } > + > const struct sbrec_sb_global *sb > = sbrec_sb_global_table_first(sb_global_table); > - return sb ? sb->nb_cfg : 0; > + nb_cfg = sb ? sb->nb_cfg : 0; > + return nb_cfg; > } > > /* Propagates the local cfg seqno, 'cur_cfg', to the chassis_private record > @@ -2615,6 +2640,7 @@ main(int argc, char *argv[]) > > unsigned int ovs_cond_seqno = UINT_MAX; > unsigned int ovnsb_cond_seqno = UINT_MAX; > + unsigned int ovnsb_expected_cond_seqno = UINT_MAX; > > struct controller_engine_ctx ctrl_engine_ctx = { > .enable_lflow_cache = true > @@ -2652,7 +2678,8 @@ main(int argc, char *argv[]) > > update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl, &sb_monitor_all, > &reset_ovnsb_idl_min_index, > - &ctrl_engine_ctx.enable_lflow_cache); > + &ctrl_engine_ctx.enable_lflow_cache, > + &ovnsb_expected_cond_seqno); > update_ssl_config(ovsrec_ssl_table_get(ovs_idl_loop.idl)); > ofctrl_set_probe_interval(get_ofctrl_probe_interval(ovs_idl_loop.idl)); > > @@ -2769,15 +2796,6 @@ main(int argc, char *argv[]) > sbrec_sb_global_table_get(ovnsb_idl_loop.idl)); > } > > - flow_output_data = engine_get_data(&en_flow_output); > - if (flow_output_data && ct_zones_data) { > - ofctrl_put(&flow_output_data->flow_table, > - &ct_zones_data->pending, > - sbrec_meter_table_get(ovnsb_idl_loop.idl), > - get_nb_cfg(sbrec_sb_global_table_get( > - ovnsb_idl_loop.idl)), > - engine_node_changed(&en_flow_output)); > - } > runtime_data = engine_get_data(&en_runtime_data); > if (runtime_data) { > patch_run(ovs_idl_txn, > @@ -2803,12 +2821,25 @@ main(int argc, char *argv[]) > &runtime_data->local_datapaths, > &runtime_data->active_tunnels); > if (engine_node_changed(&en_runtime_data)) { > - update_sb_monitors(ovnsb_idl_loop.idl, chassis, > - &runtime_data->local_lports, > - &runtime_data->local_datapaths, > - sb_monitor_all); > + ovnsb_expected_cond_seqno = > + update_sb_monitors( > + ovnsb_idl_loop.idl, chassis, > + &runtime_data->local_lports, > + &runtime_data->local_datapaths, > + sb_monitor_all); > } > } > + flow_output_data = engine_get_data(&en_flow_output); > + if (flow_output_data && ct_zones_data) { > + ofctrl_put(&flow_output_data->flow_table, > + &ct_zones_data->pending, > + sbrec_meter_table_get(ovnsb_idl_loop.idl), > + get_nb_cfg(sbrec_sb_global_table_get( > + ovnsb_idl_loop.idl), > + ovnsb_cond_seqno, > + ovnsb_expected_cond_seqno), > + engine_node_changed(&en_flow_output)); > + } > } > > } > @@ -2920,7 +2951,7 @@ loop_done: > bool done = !ovsdb_idl_has_ever_connected(ovnsb_idl_loop.idl); > while (!done) { > update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl, > - NULL, NULL, NULL); > + NULL, NULL, NULL, NULL); > update_ssl_config(ovsrec_ssl_table_get(ovs_idl_loop.idl)); > > struct ovsdb_idl_txn *ovs_idl_txn > -- > 1.8.3.1 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
On 11/30/20 9:43 AM, Han Zhou wrote: > > > On Tue, Nov 24, 2020 at 5:51 AM Dumitru Ceara <dceara@redhat.com > <mailto:dceara@redhat.com>> wrote: >> >> It is not correct for ovn-controller to pass the current SB_Global.nb_cfg >> value to ofctrl_put() if there are pending changes to conditional >> monitoring clauses (local or in flight). It might be that after the >> monitor condition is acked by the SB, records that were added to the SB >> before SB_Global.nb_cfg was set are now sent as updates to >> ovn-controller. These should be first installed in OVS before >> ovn-controller reports that it caught up with the current >> SB_Global.nb_cfg value. >> >> Also: >> - ofctrl_put should not advance cur_cfg if there are flow updates in >> flight. >> - ofctrl_put should be called after SB monitor conditions are updated. >> >> Fixes: ca278d98a4f5 ("ovn-controller: Initial use of incremental > engine - quiet mode.") > > For my understanding, the above commit didn't introduce the problem. The > monitor condition change was never considered in nb_cfg update before, > so I think it is a day-one issue. > I was aware of the problem/limitation of the nb_cfg mechanism but I > didn't think of such a solution. I like the way this patch is addressing it! > Thanks! > Or maybe the "Fixes" refers to this part of the patch: "ofctrl_put > should not advance cur_cfg if there are flow updates in flight". That > commit is the culprit for this. I'd suggest using a separate patch > because these are totally different problems. Right, the "Fixes" is only for the second part of the patch. I'll split it in a series. > >> Acked-by: Ben Pfaff <blp@ovn.org <mailto:blp@ovn.org>> >> Signed-off-by: Dumitru Ceara <dceara@redhat.com > <mailto:dceara@redhat.com>> >> >> --- >> V3: >> - move ofctrl_put() call after updating SB monitor conditions. >> - added Ben's ack. >> v2: >> - use new IDL *set_condition() return value. >> - fix ofctrl_put to not advance cur_cfg if there are flow updates in >> flight. >> --- >> controller/ofctrl.c | 2 +- >> controller/ovn-controller.c | 91 > ++++++++++++++++++++++++++++++--------------- >> 2 files changed, 62 insertions(+), 31 deletions(-) >> >> diff --git a/controller/ofctrl.c b/controller/ofctrl.c >> index c1bbc58..eac5305 100644 >> --- a/controller/ofctrl.c >> +++ b/controller/ofctrl.c >> @@ -2034,7 +2034,7 @@ ofctrl_put(struct ovn_desired_flow_table > *flow_table, >> need_put = true; >> } else if (nb_cfg != old_nb_cfg) { >> /* nb_cfg changed since last ofctrl_put() call */ >> - if (cur_cfg == old_nb_cfg) { >> + if (cur_cfg == old_nb_cfg && ovs_list_is_empty(&flow_updates)) { > > Good catch! > However, in the "else" branch it also sets the need_put to true, which > shouldn't be affected by the condition ovs_list_is_empty(&flow_updates). > Could you revise it to make the nb_cfg update accurate while not > impacting the need_put logic? > I see, I missed the fact that we used the same logic to update two different things. I'll fix it in v4. > Consider my conditional ack for the in-flight monitor condition change > part of this patch if you would split the in-flight flow-updates as a > separate patch. > Acked-by: Han Zhou <hzhou@ovn.org <mailto:hzhou@ovn.org>> > > Thanks, > Han Thanks, Dumitru
diff --git a/controller/ofctrl.c b/controller/ofctrl.c index c1bbc58..eac5305 100644 --- a/controller/ofctrl.c +++ b/controller/ofctrl.c @@ -2034,7 +2034,7 @@ ofctrl_put(struct ovn_desired_flow_table *flow_table, need_put = true; } else if (nb_cfg != old_nb_cfg) { /* nb_cfg changed since last ofctrl_put() call */ - if (cur_cfg == old_nb_cfg) { + if (cur_cfg == old_nb_cfg && ovs_list_is_empty(&flow_updates)) { /* we were up-to-date already, so just update with the * new nb_cfg */ cur_cfg = nb_cfg; diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index b0f1975..46589e4 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -133,7 +133,7 @@ get_bridge(const struct ovsrec_bridge_table *bridge_table, const char *br_name) return NULL; } -static void +static unsigned int update_sb_monitors(struct ovsdb_idl *ovnsb_idl, const struct sbrec_chassis *chassis, const struct sset *local_ifaces, @@ -239,16 +239,24 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, } } -out: - sbrec_port_binding_set_condition(ovnsb_idl, &pb); - sbrec_logical_flow_set_condition(ovnsb_idl, &lf); - sbrec_mac_binding_set_condition(ovnsb_idl, &mb); - sbrec_multicast_group_set_condition(ovnsb_idl, &mg); - sbrec_dns_set_condition(ovnsb_idl, &dns); - sbrec_controller_event_set_condition(ovnsb_idl, &ce); - sbrec_ip_multicast_set_condition(ovnsb_idl, &ip_mcast); - sbrec_igmp_group_set_condition(ovnsb_idl, &igmp); - sbrec_chassis_private_set_condition(ovnsb_idl, &chprv); +out:; + unsigned int cond_seqnos[] = { + sbrec_port_binding_set_condition(ovnsb_idl, &pb), + sbrec_logical_flow_set_condition(ovnsb_idl, &lf), + sbrec_mac_binding_set_condition(ovnsb_idl, &mb), + sbrec_multicast_group_set_condition(ovnsb_idl, &mg), + sbrec_dns_set_condition(ovnsb_idl, &dns), + sbrec_controller_event_set_condition(ovnsb_idl, &ce), + sbrec_ip_multicast_set_condition(ovnsb_idl, &ip_mcast), + sbrec_igmp_group_set_condition(ovnsb_idl, &igmp), + sbrec_chassis_private_set_condition(ovnsb_idl, &chprv), + }; + + unsigned int expected_cond_seqno = 0; + for (size_t i = 0; i < ARRAY_SIZE(cond_seqnos); i++) { + expected_cond_seqno = MAX(expected_cond_seqno, cond_seqnos[i]); + } + ovsdb_idl_condition_destroy(&pb); ovsdb_idl_condition_destroy(&lf); ovsdb_idl_condition_destroy(&mb); @@ -258,6 +266,7 @@ out: ovsdb_idl_condition_destroy(&ip_mcast); ovsdb_idl_condition_destroy(&igmp); ovsdb_idl_condition_destroy(&chprv); + return expected_cond_seqno; } static const char * @@ -491,7 +500,7 @@ get_ofctrl_probe_interval(struct ovsdb_idl *ovs_idl) static void update_sb_db(struct ovsdb_idl *ovs_idl, struct ovsdb_idl *ovnsb_idl, bool *monitor_all_p, bool *reset_ovnsb_idl_min_index, - bool *enable_lflow_cache) + bool *enable_lflow_cache, unsigned int *sb_cond_seqno) { const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(ovs_idl); if (!cfg) { @@ -516,7 +525,11 @@ update_sb_db(struct ovsdb_idl *ovs_idl, struct ovsdb_idl *ovnsb_idl, * Otherwise, don't call it here, because there would be unnecessary * extra cost. Instead, it is called after the engine execution only * when it is necessary. */ - update_sb_monitors(ovnsb_idl, NULL, NULL, NULL, true); + unsigned int next_cond_seqno = + update_sb_monitors(ovnsb_idl, NULL, NULL, NULL, true); + if (sb_cond_seqno) { + *sb_cond_seqno = next_cond_seqno; + } } if (monitor_all_p) { *monitor_all_p = monitor_all; @@ -803,11 +816,23 @@ restore_ct_zones(const struct ovsrec_bridge_table *bridge_table, } static int64_t -get_nb_cfg(const struct sbrec_sb_global_table *sb_global_table) +get_nb_cfg(const struct sbrec_sb_global_table *sb_global_table, + unsigned int cond_seqno, unsigned int expected_cond_seqno) { + static int64_t nb_cfg = 0; + + /* Delay getting nb_cfg if there are monitor condition changes + * in flight. It might be that those changes would instruct the + * server to send updates that happened before SB_Global.nb_cfg. + */ + if (cond_seqno != expected_cond_seqno) { + return nb_cfg; + } + const struct sbrec_sb_global *sb = sbrec_sb_global_table_first(sb_global_table); - return sb ? sb->nb_cfg : 0; + nb_cfg = sb ? sb->nb_cfg : 0; + return nb_cfg; } /* Propagates the local cfg seqno, 'cur_cfg', to the chassis_private record @@ -2615,6 +2640,7 @@ main(int argc, char *argv[]) unsigned int ovs_cond_seqno = UINT_MAX; unsigned int ovnsb_cond_seqno = UINT_MAX; + unsigned int ovnsb_expected_cond_seqno = UINT_MAX; struct controller_engine_ctx ctrl_engine_ctx = { .enable_lflow_cache = true @@ -2652,7 +2678,8 @@ main(int argc, char *argv[]) update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl, &sb_monitor_all, &reset_ovnsb_idl_min_index, - &ctrl_engine_ctx.enable_lflow_cache); + &ctrl_engine_ctx.enable_lflow_cache, + &ovnsb_expected_cond_seqno); update_ssl_config(ovsrec_ssl_table_get(ovs_idl_loop.idl)); ofctrl_set_probe_interval(get_ofctrl_probe_interval(ovs_idl_loop.idl)); @@ -2769,15 +2796,6 @@ main(int argc, char *argv[]) sbrec_sb_global_table_get(ovnsb_idl_loop.idl)); } - flow_output_data = engine_get_data(&en_flow_output); - if (flow_output_data && ct_zones_data) { - ofctrl_put(&flow_output_data->flow_table, - &ct_zones_data->pending, - sbrec_meter_table_get(ovnsb_idl_loop.idl), - get_nb_cfg(sbrec_sb_global_table_get( - ovnsb_idl_loop.idl)), - engine_node_changed(&en_flow_output)); - } runtime_data = engine_get_data(&en_runtime_data); if (runtime_data) { patch_run(ovs_idl_txn, @@ -2803,12 +2821,25 @@ main(int argc, char *argv[]) &runtime_data->local_datapaths, &runtime_data->active_tunnels); if (engine_node_changed(&en_runtime_data)) { - update_sb_monitors(ovnsb_idl_loop.idl, chassis, - &runtime_data->local_lports, - &runtime_data->local_datapaths, - sb_monitor_all); + ovnsb_expected_cond_seqno = + update_sb_monitors( + ovnsb_idl_loop.idl, chassis, + &runtime_data->local_lports, + &runtime_data->local_datapaths, + sb_monitor_all); } } + flow_output_data = engine_get_data(&en_flow_output); + if (flow_output_data && ct_zones_data) { + ofctrl_put(&flow_output_data->flow_table, + &ct_zones_data->pending, + sbrec_meter_table_get(ovnsb_idl_loop.idl), + get_nb_cfg(sbrec_sb_global_table_get( + ovnsb_idl_loop.idl), + ovnsb_cond_seqno, + ovnsb_expected_cond_seqno), + engine_node_changed(&en_flow_output)); + } } } @@ -2920,7 +2951,7 @@ loop_done: bool done = !ovsdb_idl_has_ever_connected(ovnsb_idl_loop.idl); while (!done) { update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl, - NULL, NULL, NULL); + NULL, NULL, NULL, NULL); update_ssl_config(ovsrec_ssl_table_get(ovs_idl_loop.idl)); struct ovsdb_idl_txn *ovs_idl_txn