Message ID | 20210813225534.2558974-2-hzhou@ovn.org |
---|---|
State | Accepted |
Headers | show |
Series | Use Distributed Gateway Port for ovn-controller scalability. | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
On Fri, Aug 13, 2021 at 6:56 PM Han Zhou <hzhou@ovn.org> wrote: > > When there is a big number of ha_chassis_groups (e.g. for distributed > gateway ports), the calculation of ha_ref_chassis can take the major > part of ovn-northd CPU as shown in perf. > > However, when there is only one chassis in ha_chassis_group, no BFD > sessions are needed, so ha_ref_chassis calculation is unnecessary. > > Signed-off-by: Han Zhou <hzhou@ovn.org> Acked-by: Numan Siddique <numan@ovn.org> Please see below for a couple of small typo nits. Thanks Numan > --- > northd/ovn-northd.c | 62 ++++++++++++++++++++++++++++++++++++++------ > northd/ovn_northd.dl | 8 +++++- > tests/ovn-northd.at | 11 +++++++- > 3 files changed, 71 insertions(+), 10 deletions(-) > > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > index 52fc255ae..502fea35c 100644 > --- a/northd/ovn-northd.c > +++ b/northd/ovn-northd.c > @@ -6527,7 +6527,8 @@ build_lrouter_groups__(struct hmap *ports, struct ovn_datapath *od) > * ha chassis group name. */ > for (size_t i = 0; i < od->n_l3dgw_ports; i++) { > struct ovn_port *crp = od->l3dgw_ports[i]->cr_port; > - if (crp->sb->ha_chassis_group) { > + if (crp->sb->ha_chassis_group && > + crp->sb->ha_chassis_group->n_ha_chassis > 1) { > sset_add(&od->lr_group->ha_chassis_groups, > crp->sb->ha_chassis_group->name); > } > @@ -14164,19 +14165,62 @@ add_to_ha_ref_chassis_info(struct ha_ref_chassis_info *ref_ch_info, > ref_ch_info->free_slots--; > } > > +struct ha_chassis_group_node { > + struct hmap_node hmap_node; > + const struct sbrec_ha_chassis_group *ha_ch_grp; > +}; > + > static void > -update_sb_ha_group_ref_chassis(struct shash *ha_ref_chassis_map) > +update_sb_ha_group_ref_chassis(struct northd_context *ctx, > + struct shash *ha_ref_chassis_map) > { > + struct hmap ha_ch_grps = HMAP_INITIALIZER(&ha_ch_grps); > + struct ha_chassis_group_node *ha_ch_grp_node; > + > + /* Initialize a set of all ha_chassis_groups in SB. */ > + const struct sbrec_ha_chassis_group *ha_ch_grp; > + SBREC_HA_CHASSIS_GROUP_FOR_EACH (ha_ch_grp, ctx->ovnsb_idl) { > + ha_ch_grp_node = xzalloc(sizeof *ha_ch_grp_node); > + ha_ch_grp_node->ha_ch_grp = ha_ch_grp; > + hmap_insert(&ha_ch_grps, &ha_ch_grp_node->hmap_node, > + uuid_hash(&ha_ch_grp->header_.uuid)); > + } > + > + /* Update each group and removes it from the set. */ s/removes/remove > struct shash_node *node, *next; > SHASH_FOR_EACH_SAFE (node, next, ha_ref_chassis_map) { > struct ha_ref_chassis_info *ha_ref_info = node->data; > sbrec_ha_chassis_group_set_ref_chassis(ha_ref_info->ha_chassis_group, > ha_ref_info->ref_chassis, > ha_ref_info->n_ref_chassis); > + > + /* Remove the updated group from the set. */ > + HMAP_FOR_EACH_WITH_HASH (ha_ch_grp_node, hmap_node, > + uuid_hash(&ha_ref_info->ha_chassis_group->header_.uuid), > + &ha_ch_grps) { > + if (ha_ch_grp_node->ha_ch_grp == ha_ref_info->ha_chassis_group) { > + hmap_remove(&ha_ch_grps, &ha_ch_grp_node->hmap_node); > + free(ha_ch_grp_node); > + break; > + } > + } > free(ha_ref_info->ref_chassis); > free(ha_ref_info); > shash_delete(ha_ref_chassis_map, node); > } > + > + /* Now the rest of the groups don't have any ref-chassis, so clear the SB > + * field for those records. */ > + struct ha_chassis_group_node *ha_ch_grp_next; > + HMAP_FOR_EACH_SAFE (ha_ch_grp_node, ha_ch_grp_next, hmap_node, > + &ha_ch_grps) { > + sbrec_ha_chassis_group_set_ref_chassis(ha_ch_grp_node->ha_ch_grp, > + NULL, 0); > + hmap_remove(&ha_ch_grps, &ha_ch_grp_node->hmap_node); > + free(ha_ch_grp_node); > + } > + > + hmap_destroy(&ha_ch_grps); > } > > /* This function checks if the port binding 'sb' references > @@ -14248,11 +14292,13 @@ handle_port_binding_changes(struct northd_context *ctx, struct hmap *ports, > if (ctx->ovnsb_txn) { > const struct sbrec_ha_chassis_group *ha_ch_grp; > SBREC_HA_CHASSIS_GROUP_FOR_EACH (ha_ch_grp, ctx->ovnsb_idl) { > - struct ha_ref_chassis_info *ref_ch_info = > - xzalloc(sizeof *ref_ch_info); > - ref_ch_info->ha_chassis_group = ha_ch_grp; > - build_ha_chassis_ref = true; > - shash_add(ha_ref_chassis_map, ha_ch_grp->name, ref_ch_info); > + if (ha_ch_grp->n_ha_chassis > 1) { > + struct ha_ref_chassis_info *ref_ch_info = > + xzalloc(sizeof *ref_ch_info); > + ref_ch_info->ha_chassis_group = ha_ch_grp; > + build_ha_chassis_ref = true; > + shash_add(ha_ref_chassis_map, ha_ch_grp->name, ref_ch_info); > + } > } > } > > @@ -14726,7 +14772,7 @@ ovnsb_db_run(struct northd_context *ctx, > handle_port_binding_changes(ctx, ports, &ha_ref_chassis_map); > update_northbound_cfg(ctx, sb_loop, loop_start_time); > if (ctx->ovnsb_txn) { > - update_sb_ha_group_ref_chassis(&ha_ref_chassis_map); > + update_sb_ha_group_ref_chassis(ctx, &ha_ref_chassis_map); > } > shash_destroy(&ha_ref_chassis_map); > > diff --git a/northd/ovn_northd.dl b/northd/ovn_northd.dl > index a94726351..3693b6fba 100644 > --- a/northd/ovn_northd.dl > +++ b/northd/ovn_northd.dl > @@ -512,11 +512,17 @@ sb::Out_Port_Binding(._uuid = pbinding._uuid, > * router 'lr_uuid' can end up at (or start from). This is used for > * sb::Out_HA_Chassis_Group's ref_chassis column. > * > + * Only HA Chassis Groups with more than 1 chassises need to maintain the > + * referenced chassises. I think the plural of chassis is chassis itself. so - s/chassises/chassis Thanks Numan > + * > * RefChassisSet0 has a row for each logical router that actually references a > * chassis. RefChassisSet has a row for every logical router. */ > relation RefChassis(lr_uuid: uuid, chassis_uuid: uuid) > RefChassis(lr_uuid, chassis_uuid) :- > - DistributedGatewayPortHAChassisGroup(lrp, _), > + DistributedGatewayPortHAChassisGroup(lrp, hacg_uuid), > + HAChassis(.hacg_uuid = hacg_uuid, .hac_uuid = hac_uuid), > + var hacg_size = hac_uuid.group_by(hacg_uuid).to_set().size(), > + hacg_size > 1, > DistributedGatewayPort(lrp, lr_uuid), > ConnectedLogicalRouter[(lr_uuid, set_uuid)], > ConnectedLogicalRouter[(lr2_uuid, set_uuid)], > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index 190f78261..93bb0e1da 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -34,6 +34,8 @@ check_row_count Port_Binding 1 logical_port=cr-alice ha_chassis_group=$ha_chgrp_ > ha_ch=$(fetch_column HA_Chassis_Group ha_chassis name=alice) > check_column "$ha_ch" HA_Chassis _uuid > > +ovn-sbctl list ha_chassis_group > + > # Delete chassis - gw2 in SB DB. > # ovn-northd should not recreate ha_chassis rows > # repeatedly when gw2 is deleted. > @@ -536,6 +538,13 @@ check ovn-nbctl lrp-del lr1-sw0 > > wait_column "$comp2_ch_uuid" HA_Chassis_Group ref_chassis > > +# Delete one of the gateway chassises making the ha_chassis_group has only one > +# chassis. In this case ref_chassis field should be empty for this > +# ha_chassis_group. (ref_chassis is calculated only if there are more than 1 > +# chassises in the ha_chassis_group. > +check ovn-nbctl --wait=sb lrp-del-gateway-chassis lr0-public ch2 > +wait_column "" HA_Chassis_Group ref_chassis > + > # Set redirect-chassis option to lr0-public. It should be ignored > # (because redirect-chassis is obsolete). > check ovn-nbctl set logical_router_port lr0-public options:redirect-chassis=ch1 > @@ -543,7 +552,7 @@ check ovn-nbctl set logical_router_port lr0-public options:redirect-chassis=ch1 > wait_row_count HA_Chassis_Group 1 > wait_row_count HA_Chassis_Group 1 name=lr0-public > > -wait_row_count HA_Chassis 2 > +wait_row_count HA_Chassis 1 > > # Delete the gateway chassis. > check ovn-nbctl clear logical_router_port lr0-public gateway_chassis > -- > 2.30.2 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index 52fc255ae..502fea35c 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -6527,7 +6527,8 @@ build_lrouter_groups__(struct hmap *ports, struct ovn_datapath *od) * ha chassis group name. */ for (size_t i = 0; i < od->n_l3dgw_ports; i++) { struct ovn_port *crp = od->l3dgw_ports[i]->cr_port; - if (crp->sb->ha_chassis_group) { + if (crp->sb->ha_chassis_group && + crp->sb->ha_chassis_group->n_ha_chassis > 1) { sset_add(&od->lr_group->ha_chassis_groups, crp->sb->ha_chassis_group->name); } @@ -14164,19 +14165,62 @@ add_to_ha_ref_chassis_info(struct ha_ref_chassis_info *ref_ch_info, ref_ch_info->free_slots--; } +struct ha_chassis_group_node { + struct hmap_node hmap_node; + const struct sbrec_ha_chassis_group *ha_ch_grp; +}; + static void -update_sb_ha_group_ref_chassis(struct shash *ha_ref_chassis_map) +update_sb_ha_group_ref_chassis(struct northd_context *ctx, + struct shash *ha_ref_chassis_map) { + struct hmap ha_ch_grps = HMAP_INITIALIZER(&ha_ch_grps); + struct ha_chassis_group_node *ha_ch_grp_node; + + /* Initialize a set of all ha_chassis_groups in SB. */ + const struct sbrec_ha_chassis_group *ha_ch_grp; + SBREC_HA_CHASSIS_GROUP_FOR_EACH (ha_ch_grp, ctx->ovnsb_idl) { + ha_ch_grp_node = xzalloc(sizeof *ha_ch_grp_node); + ha_ch_grp_node->ha_ch_grp = ha_ch_grp; + hmap_insert(&ha_ch_grps, &ha_ch_grp_node->hmap_node, + uuid_hash(&ha_ch_grp->header_.uuid)); + } + + /* Update each group and removes it from the set. */ struct shash_node *node, *next; SHASH_FOR_EACH_SAFE (node, next, ha_ref_chassis_map) { struct ha_ref_chassis_info *ha_ref_info = node->data; sbrec_ha_chassis_group_set_ref_chassis(ha_ref_info->ha_chassis_group, ha_ref_info->ref_chassis, ha_ref_info->n_ref_chassis); + + /* Remove the updated group from the set. */ + HMAP_FOR_EACH_WITH_HASH (ha_ch_grp_node, hmap_node, + uuid_hash(&ha_ref_info->ha_chassis_group->header_.uuid), + &ha_ch_grps) { + if (ha_ch_grp_node->ha_ch_grp == ha_ref_info->ha_chassis_group) { + hmap_remove(&ha_ch_grps, &ha_ch_grp_node->hmap_node); + free(ha_ch_grp_node); + break; + } + } free(ha_ref_info->ref_chassis); free(ha_ref_info); shash_delete(ha_ref_chassis_map, node); } + + /* Now the rest of the groups don't have any ref-chassis, so clear the SB + * field for those records. */ + struct ha_chassis_group_node *ha_ch_grp_next; + HMAP_FOR_EACH_SAFE (ha_ch_grp_node, ha_ch_grp_next, hmap_node, + &ha_ch_grps) { + sbrec_ha_chassis_group_set_ref_chassis(ha_ch_grp_node->ha_ch_grp, + NULL, 0); + hmap_remove(&ha_ch_grps, &ha_ch_grp_node->hmap_node); + free(ha_ch_grp_node); + } + + hmap_destroy(&ha_ch_grps); } /* This function checks if the port binding 'sb' references @@ -14248,11 +14292,13 @@ handle_port_binding_changes(struct northd_context *ctx, struct hmap *ports, if (ctx->ovnsb_txn) { const struct sbrec_ha_chassis_group *ha_ch_grp; SBREC_HA_CHASSIS_GROUP_FOR_EACH (ha_ch_grp, ctx->ovnsb_idl) { - struct ha_ref_chassis_info *ref_ch_info = - xzalloc(sizeof *ref_ch_info); - ref_ch_info->ha_chassis_group = ha_ch_grp; - build_ha_chassis_ref = true; - shash_add(ha_ref_chassis_map, ha_ch_grp->name, ref_ch_info); + if (ha_ch_grp->n_ha_chassis > 1) { + struct ha_ref_chassis_info *ref_ch_info = + xzalloc(sizeof *ref_ch_info); + ref_ch_info->ha_chassis_group = ha_ch_grp; + build_ha_chassis_ref = true; + shash_add(ha_ref_chassis_map, ha_ch_grp->name, ref_ch_info); + } } } @@ -14726,7 +14772,7 @@ ovnsb_db_run(struct northd_context *ctx, handle_port_binding_changes(ctx, ports, &ha_ref_chassis_map); update_northbound_cfg(ctx, sb_loop, loop_start_time); if (ctx->ovnsb_txn) { - update_sb_ha_group_ref_chassis(&ha_ref_chassis_map); + update_sb_ha_group_ref_chassis(ctx, &ha_ref_chassis_map); } shash_destroy(&ha_ref_chassis_map); diff --git a/northd/ovn_northd.dl b/northd/ovn_northd.dl index a94726351..3693b6fba 100644 --- a/northd/ovn_northd.dl +++ b/northd/ovn_northd.dl @@ -512,11 +512,17 @@ sb::Out_Port_Binding(._uuid = pbinding._uuid, * router 'lr_uuid' can end up at (or start from). This is used for * sb::Out_HA_Chassis_Group's ref_chassis column. * + * Only HA Chassis Groups with more than 1 chassises need to maintain the + * referenced chassises. + * * RefChassisSet0 has a row for each logical router that actually references a * chassis. RefChassisSet has a row for every logical router. */ relation RefChassis(lr_uuid: uuid, chassis_uuid: uuid) RefChassis(lr_uuid, chassis_uuid) :- - DistributedGatewayPortHAChassisGroup(lrp, _), + DistributedGatewayPortHAChassisGroup(lrp, hacg_uuid), + HAChassis(.hacg_uuid = hacg_uuid, .hac_uuid = hac_uuid), + var hacg_size = hac_uuid.group_by(hacg_uuid).to_set().size(), + hacg_size > 1, DistributedGatewayPort(lrp, lr_uuid), ConnectedLogicalRouter[(lr_uuid, set_uuid)], ConnectedLogicalRouter[(lr2_uuid, set_uuid)], diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index 190f78261..93bb0e1da 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -34,6 +34,8 @@ check_row_count Port_Binding 1 logical_port=cr-alice ha_chassis_group=$ha_chgrp_ ha_ch=$(fetch_column HA_Chassis_Group ha_chassis name=alice) check_column "$ha_ch" HA_Chassis _uuid +ovn-sbctl list ha_chassis_group + # Delete chassis - gw2 in SB DB. # ovn-northd should not recreate ha_chassis rows # repeatedly when gw2 is deleted. @@ -536,6 +538,13 @@ check ovn-nbctl lrp-del lr1-sw0 wait_column "$comp2_ch_uuid" HA_Chassis_Group ref_chassis +# Delete one of the gateway chassises making the ha_chassis_group has only one +# chassis. In this case ref_chassis field should be empty for this +# ha_chassis_group. (ref_chassis is calculated only if there are more than 1 +# chassises in the ha_chassis_group. +check ovn-nbctl --wait=sb lrp-del-gateway-chassis lr0-public ch2 +wait_column "" HA_Chassis_Group ref_chassis + # Set redirect-chassis option to lr0-public. It should be ignored # (because redirect-chassis is obsolete). check ovn-nbctl set logical_router_port lr0-public options:redirect-chassis=ch1 @@ -543,7 +552,7 @@ check ovn-nbctl set logical_router_port lr0-public options:redirect-chassis=ch1 wait_row_count HA_Chassis_Group 1 wait_row_count HA_Chassis_Group 1 name=lr0-public -wait_row_count HA_Chassis 2 +wait_row_count HA_Chassis 1 # Delete the gateway chassis. check ovn-nbctl clear logical_router_port lr0-public gateway_chassis
When there is a big number of ha_chassis_groups (e.g. for distributed gateway ports), the calculation of ha_ref_chassis can take the major part of ovn-northd CPU as shown in perf. However, when there is only one chassis in ha_chassis_group, no BFD sessions are needed, so ha_ref_chassis calculation is unnecessary. Signed-off-by: Han Zhou <hzhou@ovn.org> --- northd/ovn-northd.c | 62 ++++++++++++++++++++++++++++++++++++++------ northd/ovn_northd.dl | 8 +++++- tests/ovn-northd.at | 11 +++++++- 3 files changed, 71 insertions(+), 10 deletions(-)