Message ID | 20220909213223.824013-5-i.maximets@ovn.org |
---|---|
State | Accepted |
Headers | show |
Series | northd: Optimize preparation of load balancers. | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | fail | github build: failed |
ovsrobot/github-robot-_ovn-kubernetes | success | github build: passed |
On Fri, Sep 9, 2022 at 2:33 PM Ilya Maximets <i.maximets@ovn.org> wrote: > > If the same load balancer group is attached to multiple routers, > IP sets will be constructed for each of them by sequentially calling > sset_add() for each IP for each load balancer for each router. > Instead of doing that, we can create IP sets for load balancer > groups and clone them. That will avoid extra ssed_find__() call > making the construction of IP sets 2x faster. > > Only first load balancer group is cloned, the rest as well as > standalone load balancers are still added one by one, because > there is no more efficient way to merge sets. > > The order of processing changed to make sure that we're actually > optimizing copy of a large group. > > The code can be optimized further by introduction of a reference > counter and not copying at all if the router has no standalone load > balancers and only one group. Also, construction of "reachable" > sets can be optimized for the "neighbor_responder = all" case. But, > for now, only moved to a new structure for better readability. > > Acked-by: Dumitru Ceara <dceara@redhat.com> > Signed-off-by: Ilya Maximets <i.maximets@ovn.org> Thanks Ilya and Dumitru. I merged the 3 previous commits. This last patch looks good to me, too, but please see a minor comment below. > --- > lib/lb.c | 47 ++++++++++++++++++++ > lib/lb.h | 16 +++++++ > northd/northd.c | 114 ++++++++++++++++++++++++++---------------------- > northd/northd.h | 13 ++---- > 4 files changed, 129 insertions(+), 61 deletions(-) > > diff --git a/lib/lb.c b/lib/lb.c > index fa1a66d82..477cf8f5e 100644 > --- a/lib/lb.c > +++ b/lib/lb.c > @@ -26,6 +26,51 @@ > > VLOG_DEFINE_THIS_MODULE(lb); > > +struct ovn_lb_ip_set * > +ovn_lb_ip_set_create(void) > +{ > + struct ovn_lb_ip_set *lb_ip_set = xzalloc(sizeof *lb_ip_set); > + > + sset_init(&lb_ip_set->ips_v4); > + sset_init(&lb_ip_set->ips_v4_routable); > + sset_init(&lb_ip_set->ips_v4_reachable); > + sset_init(&lb_ip_set->ips_v6); > + sset_init(&lb_ip_set->ips_v6_routable); > + sset_init(&lb_ip_set->ips_v6_reachable); > + > + return lb_ip_set; > +} > + > +void > +ovn_lb_ip_set_destroy(struct ovn_lb_ip_set *lb_ip_set) > +{ > + if (!lb_ip_set) { > + return; > + } > + sset_destroy(&lb_ip_set->ips_v4); > + sset_destroy(&lb_ip_set->ips_v4_routable); > + sset_destroy(&lb_ip_set->ips_v4_reachable); > + sset_destroy(&lb_ip_set->ips_v6); > + sset_destroy(&lb_ip_set->ips_v6_routable); > + sset_destroy(&lb_ip_set->ips_v6_reachable); > + free(lb_ip_set); > +} > + > +struct ovn_lb_ip_set * > +ovn_lb_ip_set_clone(struct ovn_lb_ip_set *lb_ip_set) > +{ > + struct ovn_lb_ip_set *clone = ovn_lb_ip_set_create(); > + > + sset_clone(&clone->ips_v4, &lb_ip_set->ips_v4); > + sset_clone(&clone->ips_v4_routable, &lb_ip_set->ips_v4_routable); > + sset_clone(&clone->ips_v4_reachable, &lb_ip_set->ips_v4_reachable); > + sset_clone(&clone->ips_v6, &lb_ip_set->ips_v6); > + sset_clone(&clone->ips_v6_routable, &lb_ip_set->ips_v6_routable); > + sset_clone(&clone->ips_v6_reachable, &lb_ip_set->ips_v6_reachable); > + > + return clone; > +} > + > static > bool ovn_lb_vip_init(struct ovn_lb_vip *lb_vip, const char *lb_key, > const char *lb_value) > @@ -303,6 +348,7 @@ ovn_lb_group_create(const struct nbrec_load_balancer_group *nbrec_lb_group, > lb_group->lbs = xmalloc(lb_group->n_lbs * sizeof *lb_group->lbs); > lb_group->ls = xmalloc(max_datapaths * sizeof *lb_group->ls); > lb_group->lr = xmalloc(max_datapaths * sizeof *lb_group->lr); > + lb_group->lb_ips = ovn_lb_ip_set_create(); > > for (size_t i = 0; i < nbrec_lb_group->n_load_balancer; i++) { > const struct uuid *lb_uuid = > @@ -320,6 +366,7 @@ ovn_lb_group_destroy(struct ovn_lb_group *lb_group) > return; > } > > + ovn_lb_ip_set_destroy(lb_group->lb_ips); > free(lb_group->lbs); > free(lb_group->ls); > free(lb_group->lr); > diff --git a/lib/lb.h b/lib/lb.h > index e0b153fb3..9b902f005 100644 > --- a/lib/lb.h > +++ b/lib/lb.h > @@ -37,6 +37,21 @@ enum lb_neighbor_responder_mode { > LB_NEIGH_RESPOND_ALL, > }; > > +/* The "routable" ssets are subsets of the load balancer IPs for which IP > + * routes and ARP resolution flows are automatically added. */ > +struct ovn_lb_ip_set { > + struct sset ips_v4; > + struct sset ips_v4_routable; > + struct sset ips_v4_reachable; > + struct sset ips_v6; > + struct sset ips_v6_routable; > + struct sset ips_v6_reachable; > +}; > + > +struct ovn_lb_ip_set *ovn_lb_ip_set_create(void); > +void ovn_lb_ip_set_destroy(struct ovn_lb_ip_set *); > +struct ovn_lb_ip_set *ovn_lb_ip_set_clone(struct ovn_lb_ip_set *); > + > struct ovn_northd_lb { > struct hmap_node hmap_node; > > @@ -112,6 +127,7 @@ struct ovn_lb_group { > struct uuid uuid; > size_t n_lbs; > struct ovn_northd_lb **lbs; > + struct ovn_lb_ip_set *lb_ips; > > /* Datapaths to which this LB group is applied. */ > size_t n_ls; > diff --git a/northd/northd.c b/northd/northd.c > index a4aed7f96..f2c0f5aed 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -821,13 +821,6 @@ lr_lb_address_set_ref(const struct ovn_datapath *od, int addr_family) > static void > init_lb_for_datapath(struct ovn_datapath *od) > { > - sset_init(&od->lb_ips_v4); > - sset_init(&od->lb_ips_v4_routable); > - sset_init(&od->lb_ips_v4_reachable); > - sset_init(&od->lb_ips_v6); > - sset_init(&od->lb_ips_v6_routable); > - sset_init(&od->lb_ips_v6_reachable); > - > if (od->nbs) { > od->has_lb_vip = ls_has_lb_vip(od); > } else { > @@ -838,16 +831,12 @@ init_lb_for_datapath(struct ovn_datapath *od) > static void > destroy_lb_for_datapath(struct ovn_datapath *od) > { > + ovn_lb_ip_set_destroy(od->lb_ips); > + od->lb_ips = NULL; > + > if (!od->nbs && !od->nbr) { > return; > } > - > - sset_destroy(&od->lb_ips_v4); > - sset_destroy(&od->lb_ips_v4_routable); > - sset_destroy(&od->lb_ips_v4_reachable); > - sset_destroy(&od->lb_ips_v6); > - sset_destroy(&od->lb_ips_v6_routable); > - sset_destroy(&od->lb_ips_v6_reachable); > } > > /* A group of logical router datapaths which are connected - either > @@ -2818,20 +2807,20 @@ get_nat_addresses(const struct ovn_port *op, size_t *n, bool routable_only, > if (include_lb_ips) { > const char *ip_address; > if (routable_only) { > - SSET_FOR_EACH (ip_address, &op->od->lb_ips_v4_routable) { > + SSET_FOR_EACH (ip_address, &op->od->lb_ips->ips_v4_routable) { > ds_put_format(&c_addresses, " %s", ip_address); > central_ip_address = true; > } > - SSET_FOR_EACH (ip_address, &op->od->lb_ips_v6_routable) { > + SSET_FOR_EACH (ip_address, &op->od->lb_ips->ips_v6_routable) { > ds_put_format(&c_addresses, " %s", ip_address); > central_ip_address = true; > } > } else { > - SSET_FOR_EACH (ip_address, &op->od->lb_ips_v4) { > + SSET_FOR_EACH (ip_address, &op->od->lb_ips->ips_v4) { > ds_put_format(&c_addresses, " %s", ip_address); > central_ip_address = true; > } > - SSET_FOR_EACH (ip_address, &op->od->lb_ips_v6) { > + SSET_FOR_EACH (ip_address, &op->od->lb_ips->ips_v6) { > ds_put_format(&c_addresses, " %s", ip_address); > central_ip_address = true; > } > @@ -3822,20 +3811,21 @@ build_lb_vip_actions(struct ovn_lb_vip *lb_vip, > } > > static void > -build_lrouter_lb_ips(struct ovn_datapath *od, const struct ovn_northd_lb *lb) > +build_lrouter_lb_ips(struct ovn_lb_ip_set *lb_ips, > + const struct ovn_northd_lb *lb) > { > const char *ip_address; > > SSET_FOR_EACH (ip_address, &lb->ips_v4) { > - sset_add(&od->lb_ips_v4, ip_address); > + sset_add(&lb_ips->ips_v4, ip_address); > if (lb->routable) { > - sset_add(&od->lb_ips_v4_routable, ip_address); > + sset_add(&lb_ips->ips_v4_routable, ip_address); > } > } > SSET_FOR_EACH (ip_address, &lb->ips_v6) { > - sset_add(&od->lb_ips_v6, ip_address); > + sset_add(&lb_ips->ips_v6, ip_address); > if (lb->routable) { > - sset_add(&od->lb_ips_v6_routable, ip_address); > + sset_add(&lb_ips->ips_v6_routable, ip_address); > } > } > } > @@ -3863,6 +3853,11 @@ build_lbs(struct northd_input *input_data, struct hmap *datapaths, > input_data->nbrec_load_balancer_group_table) { > lb_group = ovn_lb_group_create(nbrec_lb_group, lbs, > hmap_count(datapaths)); > + > + for (size_t i = 0; i < lb_group->n_lbs; i++) { > + build_lrouter_lb_ips(lb_group->lb_ips, lb_group->lbs[i]); > + } > + > hmap_insert(lb_groups, &lb_group->hmap_node, > uuid_hash(&lb_group->uuid)); > } > @@ -3900,23 +3895,34 @@ build_lbs(struct northd_input *input_data, struct hmap *datapaths, > continue; > } > > - for (size_t i = 0; i < od->nbr->n_load_balancer; i++) { > - const struct uuid *lb_uuid = > - &od->nbr->load_balancer[i]->header_.uuid; > - lb = ovn_northd_lb_find(lbs, lb_uuid); > - ovn_northd_lb_add_lr(lb, 1, &od); > - build_lrouter_lb_ips(od, lb); > - } > - > + /* Checking load balancer groups first to more efficiently copy > + * IP sets for large groups. */ > for (size_t i = 0; i < od->nbr->n_load_balancer_group; i++) { > nbrec_lb_group = od->nbr->load_balancer_group[i]; > lb_group = ovn_lb_group_find(lb_groups, > &nbrec_lb_group->header_.uuid); > ovn_lb_group_add_lr(lb_group, od); > - for (size_t j = 0; j < lb_group->n_lbs; j++) { > - build_lrouter_lb_ips(od, lb_group->lbs[j]); > + > + if (!od->lb_ips) { > + od->lb_ips = ovn_lb_ip_set_clone(lb_group->lb_ips); Shall we do this for the lb_group that has the largest number of VIPs instead of the first lb_group? Otherwise, if the first lb_group happens to be a small one, is the optimization of this patch not effective? Since we don't expect a big number lb_groups, I think a simple iteration should be sufficient for this check. Thanks, Han > + } else { > + for (size_t j = 0; j < lb_group->n_lbs; j++) { > + build_lrouter_lb_ips(od->lb_ips, lb_group->lbs[j]); > + } > } > } > + > + if (!od->lb_ips) { > + od->lb_ips = ovn_lb_ip_set_create(); > + } > + > + for (size_t i = 0; i < od->nbr->n_load_balancer; i++) { > + const struct uuid *lb_uuid = > + &od->nbr->load_balancer[i]->header_.uuid; > + lb = ovn_northd_lb_find(lbs, lb_uuid); > + ovn_northd_lb_add_lr(lb, 1, &od); > + build_lrouter_lb_ips(od->lb_ips, lb); > + } > } > > HMAP_FOR_EACH (lb_group, hmap_node, lb_groups) { > @@ -3977,9 +3983,9 @@ build_lrouter_lb_reachable_ips(struct ovn_datapath *od, > if (lb->neigh_mode == LB_NEIGH_RESPOND_ALL) { > for (size_t i = 0; i < lb->n_vips; i++) { > if (IN6_IS_ADDR_V4MAPPED(&lb->vips[i].vip)) { > - sset_add(&od->lb_ips_v4_reachable, lb->vips[i].vip_str); > + sset_add(&od->lb_ips->ips_v4_reachable, lb->vips[i].vip_str); > } else { > - sset_add(&od->lb_ips_v6_reachable, lb->vips[i].vip_str); > + sset_add(&od->lb_ips->ips_v6_reachable, lb->vips[i].vip_str); > } > } > return; > @@ -3995,7 +4001,8 @@ build_lrouter_lb_reachable_ips(struct ovn_datapath *od, > > LIST_FOR_EACH (op, dp_node, &od->port_list) { > if (lrouter_port_ipv4_reachable(op, vip_ip4)) { > - sset_add(&od->lb_ips_v4_reachable, lb->vips[i].vip_str); > + sset_add(&od->lb_ips->ips_v4_reachable, > + lb->vips[i].vip_str); > break; > } > } > @@ -4004,7 +4011,8 @@ build_lrouter_lb_reachable_ips(struct ovn_datapath *od, > > LIST_FOR_EACH (op, dp_node, &od->port_list) { > if (lrouter_port_ipv6_reachable(op, &lb->vips[i].vip)) { > - sset_add(&od->lb_ips_v6_reachable, lb->vips[i].vip_str); > + sset_add(&od->lb_ips->ips_v6_reachable, > + lb->vips[i].vip_str); > break; > } > } > @@ -7467,7 +7475,7 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op, > */ > > const char *ip_addr; > - SSET_FOR_EACH (ip_addr, &op->od->lb_ips_v4) { > + SSET_FOR_EACH (ip_addr, &op->od->lb_ips->ips_v4) { > ovs_be32 ipv4_addr; > > /* Check if the ovn port has a network configured on which we could > @@ -7480,7 +7488,7 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op, > stage_hint); > } > } > - SSET_FOR_EACH (ip_addr, &op->od->lb_ips_v6) { > + SSET_FOR_EACH (ip_addr, &op->od->lb_ips->ips_v6) { > struct in6_addr ipv6_addr; > > /* Check if the ovn port has a network configured on which we could > @@ -7510,13 +7518,13 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op, > * expect ARP requests/NS for the DNAT external_ip. > */ > if (nat_entry_is_v6(nat_entry)) { > - if (!sset_contains(&op->od->lb_ips_v6, nat->external_ip)) { > + if (!sset_contains(&op->od->lb_ips->ips_v6, nat->external_ip)) { > build_lswitch_rport_arp_req_flow( > nat->external_ip, AF_INET6, sw_op, sw_od, 80, lflows, > stage_hint); > } > } else { > - if (!sset_contains(&op->od->lb_ips_v4, nat->external_ip)) { > + if (!sset_contains(&op->od->lb_ips->ips_v4, nat->external_ip)) { > build_lswitch_rport_arp_req_flow( > nat->external_ip, AF_INET, sw_op, sw_od, 80, lflows, > stage_hint); > @@ -10709,7 +10717,8 @@ build_lrouter_drop_own_dest(struct ovn_port *op, enum ovn_stage stage, > const char *ip = op->lrp_networks.ipv4_addrs[i].addr_s; > > bool router_ip_in_snat_ips = !!shash_find(&op->od->snat_ips, ip); > - bool router_ip_in_lb_ips = !!sset_find(&op->od->lb_ips_v4, ip); > + bool router_ip_in_lb_ips = > + !!sset_find(&op->od->lb_ips->ips_v4, ip); > bool drop_router_ip = (drop_snat_ip == (router_ip_in_snat_ips || > router_ip_in_lb_ips)); > > @@ -10737,7 +10746,8 @@ build_lrouter_drop_own_dest(struct ovn_port *op, enum ovn_stage stage, > const char *ip = op->lrp_networks.ipv6_addrs[i].addr_s; > > bool router_ip_in_snat_ips = !!shash_find(&op->od->snat_ips, ip); > - bool router_ip_in_lb_ips = !!sset_find(&op->od->lb_ips_v6, ip); > + bool router_ip_in_lb_ips = > + !!sset_find(&op->od->lb_ips->ips_v6, ip); > bool drop_router_ip = (drop_snat_ip == (router_ip_in_snat_ips || > router_ip_in_lb_ips)); > > @@ -12804,7 +12814,7 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op, > &op->nbrp->header_, lflows); > } > > - if (sset_count(&op->od->lb_ips_v4_reachable)) { > + if (sset_count(&op->od->lb_ips->ips_v4_reachable)) { > ds_clear(match); > if (is_l3dgw_port(op)) { > ds_put_format(match, "is_chassis_resident(%s)", > @@ -12819,7 +12829,7 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op, > free(lb_ips_v4_as); > } > > - if (sset_count(&op->od->lb_ips_v6_reachable)) { > + if (sset_count(&op->od->lb_ips->ips_v6_reachable)) { > ds_clear(match); > > if (is_l3dgw_port(op)) { > @@ -14632,23 +14642,25 @@ sync_address_sets(struct northd_input *input_data, > continue; > } > > - if (sset_count(&od->lb_ips_v4_reachable)) { > + if (sset_count(&od->lb_ips->ips_v4_reachable)) { > char *ipv4_addrs_name = lr_lb_address_set_name(od, AF_INET); > - const char **ipv4_addrs = sset_array(&od->lb_ips_v4_reachable); > + const char **ipv4_addrs = > + sset_array(&od->lb_ips->ips_v4_reachable); > > sync_address_set(ovnsb_txn, ipv4_addrs_name, ipv4_addrs, > - sset_count(&od->lb_ips_v4_reachable), > + sset_count(&od->lb_ips->ips_v4_reachable), > &sb_address_sets); > free(ipv4_addrs_name); > free(ipv4_addrs); > } > > - if (sset_count(&od->lb_ips_v6_reachable)) { > + if (sset_count(&od->lb_ips->ips_v6_reachable)) { > char *ipv6_addrs_name = lr_lb_address_set_name(od, AF_INET6); > - const char **ipv6_addrs = sset_array(&od->lb_ips_v6_reachable); > + const char **ipv6_addrs = > + sset_array(&od->lb_ips->ips_v6_reachable); > > sync_address_set(ovnsb_txn, ipv6_addrs_name, ipv6_addrs, > - sset_count(&od->lb_ips_v6_reachable), > + sset_count(&od->lb_ips->ips_v6_reachable), > &sb_address_sets); > free(ipv6_addrs_name); > free(ipv6_addrs); > diff --git a/northd/northd.h b/northd/northd.h > index 8d299864f..0723d900a 100644 > --- a/northd/northd.h > +++ b/northd/northd.h > @@ -236,16 +236,9 @@ struct ovn_datapath { > struct lport_addresses dnat_force_snat_addrs; > struct lport_addresses lb_force_snat_addrs; > bool lb_force_snat_router_ip; > - /* The "routable" ssets are subsets of the load balancer > - * IPs for which IP routes and ARP resolution flows are automatically > - * added > - */ > - struct sset lb_ips_v4; > - struct sset lb_ips_v4_routable; > - struct sset lb_ips_v4_reachable; > - struct sset lb_ips_v6; > - struct sset lb_ips_v6_routable; > - struct sset lb_ips_v6_reachable; > + > + /* Load Balancer vIPs relevant for this datapath. */ > + struct ovn_lb_ip_set *lb_ips; > > struct ovn_port **localnet_ports; > size_t n_localnet_ports; > -- > 2.34.3 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
On 9/12/22 22:53, Han Zhou wrote: > > > On Fri, Sep 9, 2022 at 2:33 PM Ilya Maximets <i.maximets@ovn.org <mailto:i.maximets@ovn.org>> wrote: >> >> If the same load balancer group is attached to multiple routers, >> IP sets will be constructed for each of them by sequentially calling >> sset_add() for each IP for each load balancer for each router. >> Instead of doing that, we can create IP sets for load balancer >> groups and clone them. That will avoid extra ssed_find__() call >> making the construction of IP sets 2x faster. >> >> Only first load balancer group is cloned, the rest as well as >> standalone load balancers are still added one by one, because >> there is no more efficient way to merge sets. >> >> The order of processing changed to make sure that we're actually >> optimizing copy of a large group. >> >> The code can be optimized further by introduction of a reference >> counter and not copying at all if the router has no standalone load >> balancers and only one group. Also, construction of "reachable" >> sets can be optimized for the "neighbor_responder = all" case. But, >> for now, only moved to a new structure for better readability. >> >> Acked-by: Dumitru Ceara <dceara@redhat.com <mailto:dceara@redhat.com>> >> Signed-off-by: Ilya Maximets <i.maximets@ovn.org <mailto:i.maximets@ovn.org>> > > Thanks Ilya and Dumitru. I merged the 3 previous commits. This last patch looks good to me, too, but please see a minor comment below. > >> --- >> @@ -3900,23 +3895,34 @@ build_lbs(struct northd_input *input_data, struct hmap *datapaths, >> continue; >> } >> >> - for (size_t i = 0; i < od->nbr->n_load_balancer; i++) { >> - const struct uuid *lb_uuid = >> - &od->nbr->load_balancer[i]->header_.uuid; >> - lb = ovn_northd_lb_find(lbs, lb_uuid); >> - ovn_northd_lb_add_lr(lb, 1, &od); >> - build_lrouter_lb_ips(od, lb); >> - } >> - >> + /* Checking load balancer groups first to more efficiently copy >> + * IP sets for large groups. */ >> for (size_t i = 0; i < od->nbr->n_load_balancer_group; i++) { >> nbrec_lb_group = od->nbr->load_balancer_group[i]; >> lb_group = ovn_lb_group_find(lb_groups, >> &nbrec_lb_group->header_.uuid); >> ovn_lb_group_add_lr(lb_group, od); >> - for (size_t j = 0; j < lb_group->n_lbs; j++) { >> - build_lrouter_lb_ips(od, lb_group->lbs[j]); >> + >> + if (!od->lb_ips) { >> + od->lb_ips = ovn_lb_ip_set_clone(lb_group->lb_ips); > > Shall we do this for the lb_group that has the largest number of VIPs instead of the first lb_group? Otherwise, if the first lb_group happens to be a small one, is the optimization of this patch not effective? Since we don't expect a big number lb_groups, I think a simple iteration should be sufficient for this check. I think, in most scenarios that we hit today, there is only one big cluster-wide group. And other load balancers are applied without grouping. So, it's not a big concern for a time being. But I actually thought about trying to find the largest group to optimize better, just didn't want to make the code more complex. In any case, it doesn't hurt to implement this and make the code a bit more future-proof. One note here is that we don't really need the largest group, but a group with a largest number of unique IPs. At the same time, I'd like to avoid extra hash map lookups. So, the size of a group itself sounds like a fair approximations. If that makes sense, the following diff should do the trick: diff --git a/northd/northd.c b/northd/northd.c index f2c0f5aed..a3f0112c8 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -3895,10 +3895,21 @@ build_lbs(struct northd_input *input_data, struct hmap *datapaths, continue; } - /* Checking load balancer groups first to more efficiently copy - * IP sets for large groups. */ + /* Checking load balancer groups first, starting from the largest one, + * to more efficiently copy IP sets. */ + size_t largest_group = 0; + + for (size_t i = 1; i < od->nbr->n_load_balancer_group; i++) { + if (od->nbr->load_balancer_group[i]->n_load_balancer > + od->nbr->load_balancer_group[largest_group]->n_load_balancer) { + largest_group = i; + } + } + for (size_t i = 0; i < od->nbr->n_load_balancer_group; i++) { - nbrec_lb_group = od->nbr->load_balancer_group[i]; + size_t idx = (i + largest_group) % od->nbr->n_load_balancer_group; + + nbrec_lb_group = od->nbr->load_balancer_group[idx]; lb_group = ovn_lb_group_find(lb_groups, &nbrec_lb_group->header_.uuid); ovn_lb_group_add_lr(lb_group, od); --- Han, Dumitru, what do you think? I can post v4 with that change, or you may fold it in while applying the change. Either way is fine by me. Best regards, Ilya Maximets.
On Tue, Sep 13, 2022 at 3:18 AM Ilya Maximets <i.maximets@ovn.org> wrote: > > On 9/12/22 22:53, Han Zhou wrote: > > > > > > On Fri, Sep 9, 2022 at 2:33 PM Ilya Maximets <i.maximets@ovn.org <mailto:i.maximets@ovn.org>> wrote: > >> > >> If the same load balancer group is attached to multiple routers, > >> IP sets will be constructed for each of them by sequentially calling > >> sset_add() for each IP for each load balancer for each router. > >> Instead of doing that, we can create IP sets for load balancer > >> groups and clone them. That will avoid extra ssed_find__() call > >> making the construction of IP sets 2x faster. > >> > >> Only first load balancer group is cloned, the rest as well as > >> standalone load balancers are still added one by one, because > >> there is no more efficient way to merge sets. > >> > >> The order of processing changed to make sure that we're actually > >> optimizing copy of a large group. > >> > >> The code can be optimized further by introduction of a reference > >> counter and not copying at all if the router has no standalone load > >> balancers and only one group. Also, construction of "reachable" > >> sets can be optimized for the "neighbor_responder = all" case. But, > >> for now, only moved to a new structure for better readability. > >> > >> Acked-by: Dumitru Ceara <dceara@redhat.com <mailto:dceara@redhat.com>> > >> Signed-off-by: Ilya Maximets <i.maximets@ovn.org <mailto: i.maximets@ovn.org>> > > > > Thanks Ilya and Dumitru. I merged the 3 previous commits. This last patch looks good to me, too, but please see a minor comment below. > > > >> --- > >> @@ -3900,23 +3895,34 @@ build_lbs(struct northd_input *input_data, struct hmap *datapaths, > >> continue; > >> } > >> > >> - for (size_t i = 0; i < od->nbr->n_load_balancer; i++) { > >> - const struct uuid *lb_uuid = > >> - &od->nbr->load_balancer[i]->header_.uuid; > >> - lb = ovn_northd_lb_find(lbs, lb_uuid); > >> - ovn_northd_lb_add_lr(lb, 1, &od); > >> - build_lrouter_lb_ips(od, lb); > >> - } > >> - > >> + /* Checking load balancer groups first to more efficiently copy > >> + * IP sets for large groups. */ > >> for (size_t i = 0; i < od->nbr->n_load_balancer_group; i++) { > >> nbrec_lb_group = od->nbr->load_balancer_group[i]; > >> lb_group = ovn_lb_group_find(lb_groups, > >> &nbrec_lb_group->header_.uuid); > >> ovn_lb_group_add_lr(lb_group, od); > >> - for (size_t j = 0; j < lb_group->n_lbs; j++) { > >> - build_lrouter_lb_ips(od, lb_group->lbs[j]); > >> + > >> + if (!od->lb_ips) { > >> + od->lb_ips = ovn_lb_ip_set_clone(lb_group->lb_ips); > > > > Shall we do this for the lb_group that has the largest number of VIPs instead of the first lb_group? Otherwise, if the first lb_group happens to be a small one, is the optimization of this patch not effective? Since we don't expect a big number lb_groups, I think a simple iteration should be sufficient for this check. > > I think, in most scenarios that we hit today, there is only one > big cluster-wide group. And other load balancers are applied > without grouping. So, it's not a big concern for a time being. > > But I actually thought about trying to find the largest group to > optimize better, just didn't want to make the code more complex. > In any case, it doesn't hurt to implement this and make the code > a bit more future-proof. > > One note here is that we don't really need the largest group, > but a group with a largest number of unique IPs. At the same > time, I'd like to avoid extra hash map lookups. So, the size > of a group itself sounds like a fair approximations. > > If that makes sense, the following diff should do the trick: > > diff --git a/northd/northd.c b/northd/northd.c > index f2c0f5aed..a3f0112c8 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -3895,10 +3895,21 @@ build_lbs(struct northd_input *input_data, struct hmap *datapaths, > continue; > } > > - /* Checking load balancer groups first to more efficiently copy > - * IP sets for large groups. */ > + /* Checking load balancer groups first, starting from the largest one, > + * to more efficiently copy IP sets. */ > + size_t largest_group = 0; > + > + for (size_t i = 1; i < od->nbr->n_load_balancer_group; i++) { > + if (od->nbr->load_balancer_group[i]->n_load_balancer > > + od->nbr->load_balancer_group[largest_group]->n_load_balancer) { > + largest_group = i; > + } > + } > + > for (size_t i = 0; i < od->nbr->n_load_balancer_group; i++) { > - nbrec_lb_group = od->nbr->load_balancer_group[i]; > + size_t idx = (i + largest_group) % od->nbr->n_load_balancer_group; > + > + nbrec_lb_group = od->nbr->load_balancer_group[idx]; > lb_group = ovn_lb_group_find(lb_groups, > &nbrec_lb_group->header_.uuid); > ovn_lb_group_add_lr(lb_group, od); > --- > > Han, Dumitru, what do you think? > > I can post v4 with that change, or you may fold it in while > applying the change. Either way is fine by me. > > Best regards, Ilya Maximets. Thanks Ilya, I think this is good enough. I applied this diff and pushed to the main branch. Han
diff --git a/lib/lb.c b/lib/lb.c index fa1a66d82..477cf8f5e 100644 --- a/lib/lb.c +++ b/lib/lb.c @@ -26,6 +26,51 @@ VLOG_DEFINE_THIS_MODULE(lb); +struct ovn_lb_ip_set * +ovn_lb_ip_set_create(void) +{ + struct ovn_lb_ip_set *lb_ip_set = xzalloc(sizeof *lb_ip_set); + + sset_init(&lb_ip_set->ips_v4); + sset_init(&lb_ip_set->ips_v4_routable); + sset_init(&lb_ip_set->ips_v4_reachable); + sset_init(&lb_ip_set->ips_v6); + sset_init(&lb_ip_set->ips_v6_routable); + sset_init(&lb_ip_set->ips_v6_reachable); + + return lb_ip_set; +} + +void +ovn_lb_ip_set_destroy(struct ovn_lb_ip_set *lb_ip_set) +{ + if (!lb_ip_set) { + return; + } + sset_destroy(&lb_ip_set->ips_v4); + sset_destroy(&lb_ip_set->ips_v4_routable); + sset_destroy(&lb_ip_set->ips_v4_reachable); + sset_destroy(&lb_ip_set->ips_v6); + sset_destroy(&lb_ip_set->ips_v6_routable); + sset_destroy(&lb_ip_set->ips_v6_reachable); + free(lb_ip_set); +} + +struct ovn_lb_ip_set * +ovn_lb_ip_set_clone(struct ovn_lb_ip_set *lb_ip_set) +{ + struct ovn_lb_ip_set *clone = ovn_lb_ip_set_create(); + + sset_clone(&clone->ips_v4, &lb_ip_set->ips_v4); + sset_clone(&clone->ips_v4_routable, &lb_ip_set->ips_v4_routable); + sset_clone(&clone->ips_v4_reachable, &lb_ip_set->ips_v4_reachable); + sset_clone(&clone->ips_v6, &lb_ip_set->ips_v6); + sset_clone(&clone->ips_v6_routable, &lb_ip_set->ips_v6_routable); + sset_clone(&clone->ips_v6_reachable, &lb_ip_set->ips_v6_reachable); + + return clone; +} + static bool ovn_lb_vip_init(struct ovn_lb_vip *lb_vip, const char *lb_key, const char *lb_value) @@ -303,6 +348,7 @@ ovn_lb_group_create(const struct nbrec_load_balancer_group *nbrec_lb_group, lb_group->lbs = xmalloc(lb_group->n_lbs * sizeof *lb_group->lbs); lb_group->ls = xmalloc(max_datapaths * sizeof *lb_group->ls); lb_group->lr = xmalloc(max_datapaths * sizeof *lb_group->lr); + lb_group->lb_ips = ovn_lb_ip_set_create(); for (size_t i = 0; i < nbrec_lb_group->n_load_balancer; i++) { const struct uuid *lb_uuid = @@ -320,6 +366,7 @@ ovn_lb_group_destroy(struct ovn_lb_group *lb_group) return; } + ovn_lb_ip_set_destroy(lb_group->lb_ips); free(lb_group->lbs); free(lb_group->ls); free(lb_group->lr); diff --git a/lib/lb.h b/lib/lb.h index e0b153fb3..9b902f005 100644 --- a/lib/lb.h +++ b/lib/lb.h @@ -37,6 +37,21 @@ enum lb_neighbor_responder_mode { LB_NEIGH_RESPOND_ALL, }; +/* The "routable" ssets are subsets of the load balancer IPs for which IP + * routes and ARP resolution flows are automatically added. */ +struct ovn_lb_ip_set { + struct sset ips_v4; + struct sset ips_v4_routable; + struct sset ips_v4_reachable; + struct sset ips_v6; + struct sset ips_v6_routable; + struct sset ips_v6_reachable; +}; + +struct ovn_lb_ip_set *ovn_lb_ip_set_create(void); +void ovn_lb_ip_set_destroy(struct ovn_lb_ip_set *); +struct ovn_lb_ip_set *ovn_lb_ip_set_clone(struct ovn_lb_ip_set *); + struct ovn_northd_lb { struct hmap_node hmap_node; @@ -112,6 +127,7 @@ struct ovn_lb_group { struct uuid uuid; size_t n_lbs; struct ovn_northd_lb **lbs; + struct ovn_lb_ip_set *lb_ips; /* Datapaths to which this LB group is applied. */ size_t n_ls; diff --git a/northd/northd.c b/northd/northd.c index a4aed7f96..f2c0f5aed 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -821,13 +821,6 @@ lr_lb_address_set_ref(const struct ovn_datapath *od, int addr_family) static void init_lb_for_datapath(struct ovn_datapath *od) { - sset_init(&od->lb_ips_v4); - sset_init(&od->lb_ips_v4_routable); - sset_init(&od->lb_ips_v4_reachable); - sset_init(&od->lb_ips_v6); - sset_init(&od->lb_ips_v6_routable); - sset_init(&od->lb_ips_v6_reachable); - if (od->nbs) { od->has_lb_vip = ls_has_lb_vip(od); } else { @@ -838,16 +831,12 @@ init_lb_for_datapath(struct ovn_datapath *od) static void destroy_lb_for_datapath(struct ovn_datapath *od) { + ovn_lb_ip_set_destroy(od->lb_ips); + od->lb_ips = NULL; + if (!od->nbs && !od->nbr) { return; } - - sset_destroy(&od->lb_ips_v4); - sset_destroy(&od->lb_ips_v4_routable); - sset_destroy(&od->lb_ips_v4_reachable); - sset_destroy(&od->lb_ips_v6); - sset_destroy(&od->lb_ips_v6_routable); - sset_destroy(&od->lb_ips_v6_reachable); } /* A group of logical router datapaths which are connected - either @@ -2818,20 +2807,20 @@ get_nat_addresses(const struct ovn_port *op, size_t *n, bool routable_only, if (include_lb_ips) { const char *ip_address; if (routable_only) { - SSET_FOR_EACH (ip_address, &op->od->lb_ips_v4_routable) { + SSET_FOR_EACH (ip_address, &op->od->lb_ips->ips_v4_routable) { ds_put_format(&c_addresses, " %s", ip_address); central_ip_address = true; } - SSET_FOR_EACH (ip_address, &op->od->lb_ips_v6_routable) { + SSET_FOR_EACH (ip_address, &op->od->lb_ips->ips_v6_routable) { ds_put_format(&c_addresses, " %s", ip_address); central_ip_address = true; } } else { - SSET_FOR_EACH (ip_address, &op->od->lb_ips_v4) { + SSET_FOR_EACH (ip_address, &op->od->lb_ips->ips_v4) { ds_put_format(&c_addresses, " %s", ip_address); central_ip_address = true; } - SSET_FOR_EACH (ip_address, &op->od->lb_ips_v6) { + SSET_FOR_EACH (ip_address, &op->od->lb_ips->ips_v6) { ds_put_format(&c_addresses, " %s", ip_address); central_ip_address = true; } @@ -3822,20 +3811,21 @@ build_lb_vip_actions(struct ovn_lb_vip *lb_vip, } static void -build_lrouter_lb_ips(struct ovn_datapath *od, const struct ovn_northd_lb *lb) +build_lrouter_lb_ips(struct ovn_lb_ip_set *lb_ips, + const struct ovn_northd_lb *lb) { const char *ip_address; SSET_FOR_EACH (ip_address, &lb->ips_v4) { - sset_add(&od->lb_ips_v4, ip_address); + sset_add(&lb_ips->ips_v4, ip_address); if (lb->routable) { - sset_add(&od->lb_ips_v4_routable, ip_address); + sset_add(&lb_ips->ips_v4_routable, ip_address); } } SSET_FOR_EACH (ip_address, &lb->ips_v6) { - sset_add(&od->lb_ips_v6, ip_address); + sset_add(&lb_ips->ips_v6, ip_address); if (lb->routable) { - sset_add(&od->lb_ips_v6_routable, ip_address); + sset_add(&lb_ips->ips_v6_routable, ip_address); } } } @@ -3863,6 +3853,11 @@ build_lbs(struct northd_input *input_data, struct hmap *datapaths, input_data->nbrec_load_balancer_group_table) { lb_group = ovn_lb_group_create(nbrec_lb_group, lbs, hmap_count(datapaths)); + + for (size_t i = 0; i < lb_group->n_lbs; i++) { + build_lrouter_lb_ips(lb_group->lb_ips, lb_group->lbs[i]); + } + hmap_insert(lb_groups, &lb_group->hmap_node, uuid_hash(&lb_group->uuid)); } @@ -3900,23 +3895,34 @@ build_lbs(struct northd_input *input_data, struct hmap *datapaths, continue; } - for (size_t i = 0; i < od->nbr->n_load_balancer; i++) { - const struct uuid *lb_uuid = - &od->nbr->load_balancer[i]->header_.uuid; - lb = ovn_northd_lb_find(lbs, lb_uuid); - ovn_northd_lb_add_lr(lb, 1, &od); - build_lrouter_lb_ips(od, lb); - } - + /* Checking load balancer groups first to more efficiently copy + * IP sets for large groups. */ for (size_t i = 0; i < od->nbr->n_load_balancer_group; i++) { nbrec_lb_group = od->nbr->load_balancer_group[i]; lb_group = ovn_lb_group_find(lb_groups, &nbrec_lb_group->header_.uuid); ovn_lb_group_add_lr(lb_group, od); - for (size_t j = 0; j < lb_group->n_lbs; j++) { - build_lrouter_lb_ips(od, lb_group->lbs[j]); + + if (!od->lb_ips) { + od->lb_ips = ovn_lb_ip_set_clone(lb_group->lb_ips); + } else { + for (size_t j = 0; j < lb_group->n_lbs; j++) { + build_lrouter_lb_ips(od->lb_ips, lb_group->lbs[j]); + } } } + + if (!od->lb_ips) { + od->lb_ips = ovn_lb_ip_set_create(); + } + + for (size_t i = 0; i < od->nbr->n_load_balancer; i++) { + const struct uuid *lb_uuid = + &od->nbr->load_balancer[i]->header_.uuid; + lb = ovn_northd_lb_find(lbs, lb_uuid); + ovn_northd_lb_add_lr(lb, 1, &od); + build_lrouter_lb_ips(od->lb_ips, lb); + } } HMAP_FOR_EACH (lb_group, hmap_node, lb_groups) { @@ -3977,9 +3983,9 @@ build_lrouter_lb_reachable_ips(struct ovn_datapath *od, if (lb->neigh_mode == LB_NEIGH_RESPOND_ALL) { for (size_t i = 0; i < lb->n_vips; i++) { if (IN6_IS_ADDR_V4MAPPED(&lb->vips[i].vip)) { - sset_add(&od->lb_ips_v4_reachable, lb->vips[i].vip_str); + sset_add(&od->lb_ips->ips_v4_reachable, lb->vips[i].vip_str); } else { - sset_add(&od->lb_ips_v6_reachable, lb->vips[i].vip_str); + sset_add(&od->lb_ips->ips_v6_reachable, lb->vips[i].vip_str); } } return; @@ -3995,7 +4001,8 @@ build_lrouter_lb_reachable_ips(struct ovn_datapath *od, LIST_FOR_EACH (op, dp_node, &od->port_list) { if (lrouter_port_ipv4_reachable(op, vip_ip4)) { - sset_add(&od->lb_ips_v4_reachable, lb->vips[i].vip_str); + sset_add(&od->lb_ips->ips_v4_reachable, + lb->vips[i].vip_str); break; } } @@ -4004,7 +4011,8 @@ build_lrouter_lb_reachable_ips(struct ovn_datapath *od, LIST_FOR_EACH (op, dp_node, &od->port_list) { if (lrouter_port_ipv6_reachable(op, &lb->vips[i].vip)) { - sset_add(&od->lb_ips_v6_reachable, lb->vips[i].vip_str); + sset_add(&od->lb_ips->ips_v6_reachable, + lb->vips[i].vip_str); break; } } @@ -7467,7 +7475,7 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op, */ const char *ip_addr; - SSET_FOR_EACH (ip_addr, &op->od->lb_ips_v4) { + SSET_FOR_EACH (ip_addr, &op->od->lb_ips->ips_v4) { ovs_be32 ipv4_addr; /* Check if the ovn port has a network configured on which we could @@ -7480,7 +7488,7 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op, stage_hint); } } - SSET_FOR_EACH (ip_addr, &op->od->lb_ips_v6) { + SSET_FOR_EACH (ip_addr, &op->od->lb_ips->ips_v6) { struct in6_addr ipv6_addr; /* Check if the ovn port has a network configured on which we could @@ -7510,13 +7518,13 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op, * expect ARP requests/NS for the DNAT external_ip. */ if (nat_entry_is_v6(nat_entry)) { - if (!sset_contains(&op->od->lb_ips_v6, nat->external_ip)) { + if (!sset_contains(&op->od->lb_ips->ips_v6, nat->external_ip)) { build_lswitch_rport_arp_req_flow( nat->external_ip, AF_INET6, sw_op, sw_od, 80, lflows, stage_hint); } } else { - if (!sset_contains(&op->od->lb_ips_v4, nat->external_ip)) { + if (!sset_contains(&op->od->lb_ips->ips_v4, nat->external_ip)) { build_lswitch_rport_arp_req_flow( nat->external_ip, AF_INET, sw_op, sw_od, 80, lflows, stage_hint); @@ -10709,7 +10717,8 @@ build_lrouter_drop_own_dest(struct ovn_port *op, enum ovn_stage stage, const char *ip = op->lrp_networks.ipv4_addrs[i].addr_s; bool router_ip_in_snat_ips = !!shash_find(&op->od->snat_ips, ip); - bool router_ip_in_lb_ips = !!sset_find(&op->od->lb_ips_v4, ip); + bool router_ip_in_lb_ips = + !!sset_find(&op->od->lb_ips->ips_v4, ip); bool drop_router_ip = (drop_snat_ip == (router_ip_in_snat_ips || router_ip_in_lb_ips)); @@ -10737,7 +10746,8 @@ build_lrouter_drop_own_dest(struct ovn_port *op, enum ovn_stage stage, const char *ip = op->lrp_networks.ipv6_addrs[i].addr_s; bool router_ip_in_snat_ips = !!shash_find(&op->od->snat_ips, ip); - bool router_ip_in_lb_ips = !!sset_find(&op->od->lb_ips_v6, ip); + bool router_ip_in_lb_ips = + !!sset_find(&op->od->lb_ips->ips_v6, ip); bool drop_router_ip = (drop_snat_ip == (router_ip_in_snat_ips || router_ip_in_lb_ips)); @@ -12804,7 +12814,7 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op, &op->nbrp->header_, lflows); } - if (sset_count(&op->od->lb_ips_v4_reachable)) { + if (sset_count(&op->od->lb_ips->ips_v4_reachable)) { ds_clear(match); if (is_l3dgw_port(op)) { ds_put_format(match, "is_chassis_resident(%s)", @@ -12819,7 +12829,7 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op, free(lb_ips_v4_as); } - if (sset_count(&op->od->lb_ips_v6_reachable)) { + if (sset_count(&op->od->lb_ips->ips_v6_reachable)) { ds_clear(match); if (is_l3dgw_port(op)) { @@ -14632,23 +14642,25 @@ sync_address_sets(struct northd_input *input_data, continue; } - if (sset_count(&od->lb_ips_v4_reachable)) { + if (sset_count(&od->lb_ips->ips_v4_reachable)) { char *ipv4_addrs_name = lr_lb_address_set_name(od, AF_INET); - const char **ipv4_addrs = sset_array(&od->lb_ips_v4_reachable); + const char **ipv4_addrs = + sset_array(&od->lb_ips->ips_v4_reachable); sync_address_set(ovnsb_txn, ipv4_addrs_name, ipv4_addrs, - sset_count(&od->lb_ips_v4_reachable), + sset_count(&od->lb_ips->ips_v4_reachable), &sb_address_sets); free(ipv4_addrs_name); free(ipv4_addrs); } - if (sset_count(&od->lb_ips_v6_reachable)) { + if (sset_count(&od->lb_ips->ips_v6_reachable)) { char *ipv6_addrs_name = lr_lb_address_set_name(od, AF_INET6); - const char **ipv6_addrs = sset_array(&od->lb_ips_v6_reachable); + const char **ipv6_addrs = + sset_array(&od->lb_ips->ips_v6_reachable); sync_address_set(ovnsb_txn, ipv6_addrs_name, ipv6_addrs, - sset_count(&od->lb_ips_v6_reachable), + sset_count(&od->lb_ips->ips_v6_reachable), &sb_address_sets); free(ipv6_addrs_name); free(ipv6_addrs); diff --git a/northd/northd.h b/northd/northd.h index 8d299864f..0723d900a 100644 --- a/northd/northd.h +++ b/northd/northd.h @@ -236,16 +236,9 @@ struct ovn_datapath { struct lport_addresses dnat_force_snat_addrs; struct lport_addresses lb_force_snat_addrs; bool lb_force_snat_router_ip; - /* The "routable" ssets are subsets of the load balancer - * IPs for which IP routes and ARP resolution flows are automatically - * added - */ - struct sset lb_ips_v4; - struct sset lb_ips_v4_routable; - struct sset lb_ips_v4_reachable; - struct sset lb_ips_v6; - struct sset lb_ips_v6_routable; - struct sset lb_ips_v6_reachable; + + /* Load Balancer vIPs relevant for this datapath. */ + struct ovn_lb_ip_set *lb_ips; struct ovn_port **localnet_ports; size_t n_localnet_ports;