Message ID | 20210603122908.2066838-1-numans@ovn.org |
---|---|
State | Accepted |
Headers | show |
Series | ovn-controller: Split logical flow and physical flow processing | expand |
On Thu, Jun 3, 2021 at 5:29 AM <numans@ovn.org> wrote: > > From: Numan Siddique <numans@ovn.org> > > After the commit [1], we do a full recompute of ct zone I-P engine for > any datapath binding change. This results in physical_flow() getting > called. In a highly scaled environment this can result in costly CPU > cycles. This patch addressed this by handling the datapath binding > changes incrementally in the ct_zone engine node. ct zone recomputation > is required only when a datapath binding is deleted or a logical router > datapath binding changes the snat-ct-zone option value. This patch > handles this. > > [1] - f9cab11d5fab("Allow explicit setting of the SNAT zone on a gateway router.") > > Signed-off-by: Numan Siddique <numans@ovn.org> > --- > controller/ovn-controller.c | 55 ++++++++++++++++++++++++++++++++++++- > controller/physical.c | 5 ++++ > tests/ovn-performance.at | 26 ++++++++++++++++++ > 3 files changed, 85 insertions(+), 1 deletion(-) > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > index b49786441..5b55a45b7 100644 > --- a/controller/ovn-controller.c > +++ b/controller/ovn-controller.c > @@ -1734,6 +1734,58 @@ en_ct_zones_run(struct engine_node *node, void *data) > engine_set_node_state(node, EN_UPDATED); > } > > +/* Handles datapath binding changes for the ct_zones engine. > + * Returns false if the datapath is deleted or if the requested snat > + * ct zone doesn't match with the ct_zones data. */ > +static bool > +ct_zones_datapath_binding_handler(struct engine_node *node, void *data) > +{ > + struct ed_type_ct_zones *ct_zones_data = data; > + const struct sbrec_datapath_binding *dp; > + struct ed_type_runtime_data *rt_data = > + engine_get_input_data("runtime_data", node); > + struct sbrec_datapath_binding_table *dp_table = > + (struct sbrec_datapath_binding_table *)EN_OVSDB_GET( > + engine_get_input("SB_datapath_binding", node)); > + > + SBREC_DATAPATH_BINDING_TABLE_FOR_EACH_TRACKED (dp, dp_table) { > + if (!get_local_datapath(&rt_data->local_datapaths, > + dp->tunnel_key)) { > + continue; > + } > + > + if (sbrec_datapath_binding_is_deleted(dp)) { > + /* Fall back to full recompute of ct_zones engine. */ > + return false; > + } What if a datapath_binding is created? Should we return false as well? > + > + int req_snat_zone = datapath_snat_ct_zone(dp); > + if (req_snat_zone == -1) { > + /* datapath snat ct zone is not set. This condition will also hit > + * when CMS clears the snat-ct-zone for the logical router. > + * In this case there is no harm in using the previosly specified > + * snat ct zone for this datapath. Also it is hard to know > + * if this option was cleared or if this option is never set. */ > + continue; > + } > + > + /* Check if the requested snat zone has changed for the datapath > + * or not. If so, then fall back to full recompute of > + * ct_zone engine. */ > + char *snat_dp_zone_key = alloc_nat_zone_key(&dp->header_.uuid, "snat"); > + struct simap_node *simap_node = simap_find(&ct_zones_data->current, > + snat_dp_zone_key); > + free(snat_dp_zone_key); > + if (!simap_node || simap_node->data != req_snat_zone) { > + /* There is no entry yet or the requested snat zone has changed. > + * Trigger full recompute of ct_zones engine. */ > + return false; > + } > + } > + > + return true; > +} > + > /* The data in the ct_zones node is always valid (i.e., no stale pointers). */ > static bool > en_ct_zones_is_valid(struct engine_node *node OVS_UNUSED) > @@ -2758,7 +2810,8 @@ main(int argc, char *argv[]) > > engine_add_input(&en_ct_zones, &en_ovs_open_vswitch, NULL); > engine_add_input(&en_ct_zones, &en_ovs_bridge, NULL); > - engine_add_input(&en_ct_zones, &en_sb_datapath_binding, NULL); > + engine_add_input(&en_ct_zones, &en_sb_datapath_binding, > + ct_zones_datapath_binding_handler); > engine_add_input(&en_ct_zones, &en_runtime_data, NULL); > > engine_add_input(&en_runtime_data, &en_ofctrl_is_connected, NULL); > diff --git a/controller/physical.c b/controller/physical.c > index 04259d44a..e70efc71d 100644 > --- a/controller/physical.c > +++ b/controller/physical.c > @@ -15,6 +15,7 @@ > > #include <config.h> > #include "binding.h" > +#include "coverage.h" > #include "byte-order.h" > #include "encaps.h" > #include "flow.h" > @@ -48,6 +49,8 @@ > > VLOG_DEFINE_THIS_MODULE(physical); > > +COVERAGE_DEFINE(physical_run); > + > /* Datapath zone IDs for connection tracking and NAT */ > struct zone_ids { > int ct; /* MFF_LOG_CT_ZONE. */ > @@ -1528,6 +1531,8 @@ void > physical_run(struct physical_ctx *p_ctx, > struct ovn_desired_flow_table *flow_table) > { > + COVERAGE_INC(physical_run); > + > if (!hc_uuid) { > hc_uuid = xmalloc(sizeof(struct uuid)); > uuid_generate(hc_uuid); > diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at > index e510c6cef..61356e16d 100644 > --- a/tests/ovn-performance.at > +++ b/tests/ovn-performance.at > @@ -462,6 +462,32 @@ OVN_CONTROLLER_EXPECT_HIT_COND( > [ovn-nbctl --wait=hv lrp-set-gateway-chassis lr1-public hv3 30 && ovn-nbctl --wait=hv sync] > ) > > +# Test physical_run for logical router and other datapath binding changes. > +OVN_CONTROLLER_EXPECT_HIT_COND( > + [hv1 hv2 hv3 hv4 hv5], [physical_run], [>0 >0 >0 =0 =0], > + [ovn-nbctl --wait=hv set logical_router lr1 options:snat-ct-zone=10 && ovn-nbctl --wait=hv sync] > +) > + > +OVN_CONTROLLER_EXPECT_HIT_COND( > + [hv1 hv2 hv3 hv4 hv5], [physical_run], [>0 >0 >0 =0 =0], > + [ovn-nbctl --wait=hv set logical_router lr1 options:snat-ct-zone=11 && ovn-nbctl --wait=hv sync] > +) > + > +OVN_CONTROLLER_EXPECT_NO_HIT( > + [hv1 hv2 hv3 hv4 hv5], [physical_run], > + [ovn-nbctl --wait=hv remove logical_router lr1 options snat-ct-zone && ovn-nbctl --wait=hv sync] > +) > + > +OVN_CONTROLLER_EXPECT_NO_HIT( > + [hv1 hv2 hv3 hv4 hv5], [physical_run], > + [ovn-sbctl set datapath_binding lr1 external_ids:foo=bar && ovn-nbctl --wait=hv sync] > +) > + > +OVN_CONTROLLER_EXPECT_NO_HIT( > + [hv1 hv2 hv3 hv4 hv5], [physical_run], > + [ovn-sbctl set datapath_binding ls1 external_ids:foo=bar && ovn-nbctl --wait=hv sync] > +) > + > # After this, BFD should be enabled from hv1 and hv2 to hv3. > # So there should be lflow_run hits in hv1, hv2, hv3 and hv4 > OVN_CONTROLLER_EXPECT_HIT_COND( > -- > 2.31.1 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
On Mon, Jun 14, 2021 at 3:28 AM Han Zhou <hzhou@ovn.org> wrote: > > On Thu, Jun 3, 2021 at 5:29 AM <numans@ovn.org> wrote: > > > > From: Numan Siddique <numans@ovn.org> > > > > After the commit [1], we do a full recompute of ct zone I-P engine for > > any datapath binding change. This results in physical_flow() getting > > called. In a highly scaled environment this can result in costly CPU > > cycles. This patch addressed this by handling the datapath binding > > changes incrementally in the ct_zone engine node. ct zone recomputation > > is required only when a datapath binding is deleted or a logical router > > datapath binding changes the snat-ct-zone option value. This patch > > handles this. > > > > [1] - f9cab11d5fab("Allow explicit setting of the SNAT zone on a gateway > router.") > > > > Signed-off-by: Numan Siddique <numans@ovn.org> > > --- > > controller/ovn-controller.c | 55 ++++++++++++++++++++++++++++++++++++- > > controller/physical.c | 5 ++++ > > tests/ovn-performance.at | 26 ++++++++++++++++++ > > 3 files changed, 85 insertions(+), 1 deletion(-) > > > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > > index b49786441..5b55a45b7 100644 > > --- a/controller/ovn-controller.c > > +++ b/controller/ovn-controller.c > > @@ -1734,6 +1734,58 @@ en_ct_zones_run(struct engine_node *node, void > *data) > > engine_set_node_state(node, EN_UPDATED); > > } > > > > +/* Handles datapath binding changes for the ct_zones engine. > > + * Returns false if the datapath is deleted or if the requested snat > > + * ct zone doesn't match with the ct_zones data. */ > > +static bool > > +ct_zones_datapath_binding_handler(struct engine_node *node, void *data) > > +{ > > + struct ed_type_ct_zones *ct_zones_data = data; > > + const struct sbrec_datapath_binding *dp; > > + struct ed_type_runtime_data *rt_data = > > + engine_get_input_data("runtime_data", node); > > + struct sbrec_datapath_binding_table *dp_table = > > + (struct sbrec_datapath_binding_table *)EN_OVSDB_GET( > > + engine_get_input("SB_datapath_binding", node)); > > + > > + SBREC_DATAPATH_BINDING_TABLE_FOR_EACH_TRACKED (dp, dp_table) { > > + if (!get_local_datapath(&rt_data->local_datapaths, > > + dp->tunnel_key)) { > > + continue; > > + } > > + > > + if (sbrec_datapath_binding_is_deleted(dp)) { > > + /* Fall back to full recompute of ct_zones engine. */ > > + return false; > > + } > > What if a datapath_binding is created? Should we return false as well? Hi Han, Thanks for the reviews. I don't think there is a need to trigger a recompute of ct_zones when a new datapath is created. In the patch 0 you mentioned - " I understand it may not affect correctness for now since new datapath always triggers recompute, but I think it's better to be explicit that the ct_zones node cannot handle it incrementally." But we don't trigger a full recompute of either runtime_data engine or flow_output_engine when a new datapath is created. Probably you meant recompute the ct_zone engine node ? Datapath handler for the runtime data (runtime_data_sb_datapath_binding_handler()), returns false only if a datapath is present in the local_datapaths and it is deleted. When a new datapath is added to local_datapaths, there will be a full recompute of ct_zones since the runtime data handler is NULL. With your suggestion we will trigger a recompute of ct_zones when a datapath is created, but not added to the local_datapaths. I think there is no need to trigger recompute. What do you think ? Thanks Numan > > > + > > + int req_snat_zone = datapath_snat_ct_zone(dp); > > + if (req_snat_zone == -1) { > > + /* datapath snat ct zone is not set. This condition will > also hit > > + * when CMS clears the snat-ct-zone for the logical router. > > + * In this case there is no harm in using the previosly > specified > > + * snat ct zone for this datapath. Also it is hard to know > > + * if this option was cleared or if this option is never > set. */ > > + continue; > > + } > > + > > + /* Check if the requested snat zone has changed for the datapath > > + * or not. If so, then fall back to full recompute of > > + * ct_zone engine. */ > > + char *snat_dp_zone_key = alloc_nat_zone_key(&dp->header_.uuid, > "snat"); > > + struct simap_node *simap_node = > simap_find(&ct_zones_data->current, > > + snat_dp_zone_key); > > + free(snat_dp_zone_key); > > + if (!simap_node || simap_node->data != req_snat_zone) { > > + /* There is no entry yet or the requested snat zone has > changed. > > + * Trigger full recompute of ct_zones engine. */ > > + return false; > > + } > > + } > > + > > + return true; > > +} > > + > > /* The data in the ct_zones node is always valid (i.e., no stale > pointers). */ > > static bool > > en_ct_zones_is_valid(struct engine_node *node OVS_UNUSED) > > @@ -2758,7 +2810,8 @@ main(int argc, char *argv[]) > > > > engine_add_input(&en_ct_zones, &en_ovs_open_vswitch, NULL); > > engine_add_input(&en_ct_zones, &en_ovs_bridge, NULL); > > - engine_add_input(&en_ct_zones, &en_sb_datapath_binding, NULL); > > + engine_add_input(&en_ct_zones, &en_sb_datapath_binding, > > + ct_zones_datapath_binding_handler); > > engine_add_input(&en_ct_zones, &en_runtime_data, NULL); > > > > engine_add_input(&en_runtime_data, &en_ofctrl_is_connected, NULL); > > diff --git a/controller/physical.c b/controller/physical.c > > index 04259d44a..e70efc71d 100644 > > --- a/controller/physical.c > > +++ b/controller/physical.c > > @@ -15,6 +15,7 @@ > > > > #include <config.h> > > #include "binding.h" > > +#include "coverage.h" > > #include "byte-order.h" > > #include "encaps.h" > > #include "flow.h" > > @@ -48,6 +49,8 @@ > > > > VLOG_DEFINE_THIS_MODULE(physical); > > > > +COVERAGE_DEFINE(physical_run); > > + > > /* Datapath zone IDs for connection tracking and NAT */ > > struct zone_ids { > > int ct; /* MFF_LOG_CT_ZONE. */ > > @@ -1528,6 +1531,8 @@ void > > physical_run(struct physical_ctx *p_ctx, > > struct ovn_desired_flow_table *flow_table) > > { > > + COVERAGE_INC(physical_run); > > + > > if (!hc_uuid) { > > hc_uuid = xmalloc(sizeof(struct uuid)); > > uuid_generate(hc_uuid); > > diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at > > index e510c6cef..61356e16d 100644 > > --- a/tests/ovn-performance.at > > +++ b/tests/ovn-performance.at > > @@ -462,6 +462,32 @@ OVN_CONTROLLER_EXPECT_HIT_COND( > > [ovn-nbctl --wait=hv lrp-set-gateway-chassis lr1-public hv3 30 && > ovn-nbctl --wait=hv sync] > > ) > > > > +# Test physical_run for logical router and other datapath binding > changes. > > +OVN_CONTROLLER_EXPECT_HIT_COND( > > + [hv1 hv2 hv3 hv4 hv5], [physical_run], [>0 >0 >0 =0 =0], > > + [ovn-nbctl --wait=hv set logical_router lr1 options:snat-ct-zone=10 > && ovn-nbctl --wait=hv sync] > > +) > > + > > +OVN_CONTROLLER_EXPECT_HIT_COND( > > + [hv1 hv2 hv3 hv4 hv5], [physical_run], [>0 >0 >0 =0 =0], > > + [ovn-nbctl --wait=hv set logical_router lr1 options:snat-ct-zone=11 > && ovn-nbctl --wait=hv sync] > > +) > > + > > +OVN_CONTROLLER_EXPECT_NO_HIT( > > + [hv1 hv2 hv3 hv4 hv5], [physical_run], > > + [ovn-nbctl --wait=hv remove logical_router lr1 options snat-ct-zone > && ovn-nbctl --wait=hv sync] > > +) > > + > > +OVN_CONTROLLER_EXPECT_NO_HIT( > > + [hv1 hv2 hv3 hv4 hv5], [physical_run], > > + [ovn-sbctl set datapath_binding lr1 external_ids:foo=bar && > ovn-nbctl --wait=hv sync] > > +) > > + > > +OVN_CONTROLLER_EXPECT_NO_HIT( > > + [hv1 hv2 hv3 hv4 hv5], [physical_run], > > + [ovn-sbctl set datapath_binding ls1 external_ids:foo=bar && > ovn-nbctl --wait=hv sync] > > +) > > + > > # After this, BFD should be enabled from hv1 and hv2 to hv3. > > # So there should be lflow_run hits in hv1, hv2, hv3 and hv4 > > OVN_CONTROLLER_EXPECT_HIT_COND( > > -- > > 2.31.1 > > > > _______________________________________________ > > dev mailing list > > dev@openvswitch.org > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >
On Mon, Jun 14, 2021 at 7:14 AM Numan Siddique <numans@ovn.org> wrote: > > On Mon, Jun 14, 2021 at 3:28 AM Han Zhou <hzhou@ovn.org> wrote: > > > > On Thu, Jun 3, 2021 at 5:29 AM <numans@ovn.org> wrote: > > > > > > From: Numan Siddique <numans@ovn.org> > > > > > > After the commit [1], we do a full recompute of ct zone I-P engine for > > > any datapath binding change. This results in physical_flow() getting > > > called. In a highly scaled environment this can result in costly CPU > > > cycles. This patch addressed this by handling the datapath binding > > > changes incrementally in the ct_zone engine node. ct zone recomputation > > > is required only when a datapath binding is deleted or a logical router > > > datapath binding changes the snat-ct-zone option value. This patch > > > handles this. > > > > > > [1] - f9cab11d5fab("Allow explicit setting of the SNAT zone on a gateway > > router.") > > > > > > Signed-off-by: Numan Siddique <numans@ovn.org> > > > --- > > > controller/ovn-controller.c | 55 ++++++++++++++++++++++++++++++++++++- > > > controller/physical.c | 5 ++++ > > > tests/ovn-performance.at | 26 ++++++++++++++++++ > > > 3 files changed, 85 insertions(+), 1 deletion(-) > > > > > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > > > index b49786441..5b55a45b7 100644 > > > --- a/controller/ovn-controller.c > > > +++ b/controller/ovn-controller.c > > > @@ -1734,6 +1734,58 @@ en_ct_zones_run(struct engine_node *node, void > > *data) > > > engine_set_node_state(node, EN_UPDATED); > > > } > > > > > > +/* Handles datapath binding changes for the ct_zones engine. > > > + * Returns false if the datapath is deleted or if the requested snat > > > + * ct zone doesn't match with the ct_zones data. */ > > > +static bool > > > +ct_zones_datapath_binding_handler(struct engine_node *node, void *data) > > > +{ > > > + struct ed_type_ct_zones *ct_zones_data = data; > > > + const struct sbrec_datapath_binding *dp; > > > + struct ed_type_runtime_data *rt_data = > > > + engine_get_input_data("runtime_data", node); > > > + struct sbrec_datapath_binding_table *dp_table = > > > + (struct sbrec_datapath_binding_table *)EN_OVSDB_GET( > > > + engine_get_input("SB_datapath_binding", node)); > > > + > > > + SBREC_DATAPATH_BINDING_TABLE_FOR_EACH_TRACKED (dp, dp_table) { > > > + if (!get_local_datapath(&rt_data->local_datapaths, > > > + dp->tunnel_key)) { > > > + continue; > > > + } > > > + > > > + if (sbrec_datapath_binding_is_deleted(dp)) { > > > + /* Fall back to full recompute of ct_zones engine. */ > > > + return false; > > > + } > > > > What if a datapath_binding is created? Should we return false as well? > > Hi Han, > > Thanks for the reviews. I don't think there is a need to trigger a recompute of > ct_zones when a new datapath is created. > > In the patch 0 you mentioned - " I understand it may not affect > correctness for now since new datapath always triggers recompute, but I > think it's better to be explicit that the ct_zones node cannot handle it > incrementally." > > But we don't trigger a full recompute of either runtime_data engine > or flow_output_engine > when a new datapath is created. Probably you meant recompute the > ct_zone engine node ? > > Datapath handler for the runtime data > (runtime_data_sb_datapath_binding_handler()), > returns false only if a datapath is present in the local_datapaths and > it is deleted. > > When a new datapath is added to local_datapaths, there will be a full > recompute of > ct_zones since the runtime data handler is NULL. With your suggestion we will > trigger a recompute of ct_zones when a datapath is created, but not added to the > local_datapaths. Patch 5 of the series does add the runtime data handler for ct_zones. I missed out this information in my previous reply. Thanks Numan > > I think there is no need to trigger recompute. What do you think ? > > Thanks > Numan > > > > > > + > > > + int req_snat_zone = datapath_snat_ct_zone(dp); > > > + if (req_snat_zone == -1) { > > > + /* datapath snat ct zone is not set. This condition will > > also hit > > > + * when CMS clears the snat-ct-zone for the logical router. > > > + * In this case there is no harm in using the previosly > > specified > > > + * snat ct zone for this datapath. Also it is hard to know > > > + * if this option was cleared or if this option is never > > set. */ > > > + continue; > > > + } > > > + > > > + /* Check if the requested snat zone has changed for the datapath > > > + * or not. If so, then fall back to full recompute of > > > + * ct_zone engine. */ > > > + char *snat_dp_zone_key = alloc_nat_zone_key(&dp->header_.uuid, > > "snat"); > > > + struct simap_node *simap_node = > > simap_find(&ct_zones_data->current, > > > + snat_dp_zone_key); > > > + free(snat_dp_zone_key); > > > + if (!simap_node || simap_node->data != req_snat_zone) { > > > + /* There is no entry yet or the requested snat zone has > > changed. > > > + * Trigger full recompute of ct_zones engine. */ > > > + return false; > > > + } > > > + } > > > + > > > + return true; > > > +} > > > + > > > /* The data in the ct_zones node is always valid (i.e., no stale > > pointers). */ > > > static bool > > > en_ct_zones_is_valid(struct engine_node *node OVS_UNUSED) > > > @@ -2758,7 +2810,8 @@ main(int argc, char *argv[]) > > > > > > engine_add_input(&en_ct_zones, &en_ovs_open_vswitch, NULL); > > > engine_add_input(&en_ct_zones, &en_ovs_bridge, NULL); > > > - engine_add_input(&en_ct_zones, &en_sb_datapath_binding, NULL); > > > + engine_add_input(&en_ct_zones, &en_sb_datapath_binding, > > > + ct_zones_datapath_binding_handler); > > > engine_add_input(&en_ct_zones, &en_runtime_data, NULL); > > > > > > engine_add_input(&en_runtime_data, &en_ofctrl_is_connected, NULL); > > > diff --git a/controller/physical.c b/controller/physical.c > > > index 04259d44a..e70efc71d 100644 > > > --- a/controller/physical.c > > > +++ b/controller/physical.c > > > @@ -15,6 +15,7 @@ > > > > > > #include <config.h> > > > #include "binding.h" > > > +#include "coverage.h" > > > #include "byte-order.h" > > > #include "encaps.h" > > > #include "flow.h" > > > @@ -48,6 +49,8 @@ > > > > > > VLOG_DEFINE_THIS_MODULE(physical); > > > > > > +COVERAGE_DEFINE(physical_run); > > > + > > > /* Datapath zone IDs for connection tracking and NAT */ > > > struct zone_ids { > > > int ct; /* MFF_LOG_CT_ZONE. */ > > > @@ -1528,6 +1531,8 @@ void > > > physical_run(struct physical_ctx *p_ctx, > > > struct ovn_desired_flow_table *flow_table) > > > { > > > + COVERAGE_INC(physical_run); > > > + > > > if (!hc_uuid) { > > > hc_uuid = xmalloc(sizeof(struct uuid)); > > > uuid_generate(hc_uuid); > > > diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at > > > index e510c6cef..61356e16d 100644 > > > --- a/tests/ovn-performance.at > > > +++ b/tests/ovn-performance.at > > > @@ -462,6 +462,32 @@ OVN_CONTROLLER_EXPECT_HIT_COND( > > > [ovn-nbctl --wait=hv lrp-set-gateway-chassis lr1-public hv3 30 && > > ovn-nbctl --wait=hv sync] > > > ) > > > > > > +# Test physical_run for logical router and other datapath binding > > changes. > > > +OVN_CONTROLLER_EXPECT_HIT_COND( > > > + [hv1 hv2 hv3 hv4 hv5], [physical_run], [>0 >0 >0 =0 =0], > > > + [ovn-nbctl --wait=hv set logical_router lr1 options:snat-ct-zone=10 > > && ovn-nbctl --wait=hv sync] > > > +) > > > + > > > +OVN_CONTROLLER_EXPECT_HIT_COND( > > > + [hv1 hv2 hv3 hv4 hv5], [physical_run], [>0 >0 >0 =0 =0], > > > + [ovn-nbctl --wait=hv set logical_router lr1 options:snat-ct-zone=11 > > && ovn-nbctl --wait=hv sync] > > > +) > > > + > > > +OVN_CONTROLLER_EXPECT_NO_HIT( > > > + [hv1 hv2 hv3 hv4 hv5], [physical_run], > > > + [ovn-nbctl --wait=hv remove logical_router lr1 options snat-ct-zone > > && ovn-nbctl --wait=hv sync] > > > +) > > > + > > > +OVN_CONTROLLER_EXPECT_NO_HIT( > > > + [hv1 hv2 hv3 hv4 hv5], [physical_run], > > > + [ovn-sbctl set datapath_binding lr1 external_ids:foo=bar && > > ovn-nbctl --wait=hv sync] > > > +) > > > + > > > +OVN_CONTROLLER_EXPECT_NO_HIT( > > > + [hv1 hv2 hv3 hv4 hv5], [physical_run], > > > + [ovn-sbctl set datapath_binding ls1 external_ids:foo=bar && > > ovn-nbctl --wait=hv sync] > > > +) > > > + > > > # After this, BFD should be enabled from hv1 and hv2 to hv3. > > > # So there should be lflow_run hits in hv1, hv2, hv3 and hv4 > > > OVN_CONTROLLER_EXPECT_HIT_COND( > > > -- > > > 2.31.1 > > > > > > _______________________________________________ > > > dev mailing list > > > dev@openvswitch.org > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > > _______________________________________________ > > dev mailing list > > dev@openvswitch.org > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > >
On Mon, Jun 14, 2021 at 7:20 AM Numan Siddique <numans@ovn.org> wrote: > > On Mon, Jun 14, 2021 at 7:14 AM Numan Siddique <numans@ovn.org> wrote: > > > > On Mon, Jun 14, 2021 at 3:28 AM Han Zhou <hzhou@ovn.org> wrote: > > > > > > On Thu, Jun 3, 2021 at 5:29 AM <numans@ovn.org> wrote: > > > > > > > > From: Numan Siddique <numans@ovn.org> > > > > > > > > After the commit [1], we do a full recompute of ct zone I-P engine for > > > > any datapath binding change. This results in physical_flow() getting > > > > called. In a highly scaled environment this can result in costly CPU > > > > cycles. This patch addressed this by handling the datapath binding > > > > changes incrementally in the ct_zone engine node. ct zone recomputation > > > > is required only when a datapath binding is deleted or a logical router > > > > datapath binding changes the snat-ct-zone option value. This patch > > > > handles this. > > > > > > > > [1] - f9cab11d5fab("Allow explicit setting of the SNAT zone on a gateway > > > router.") > > > > > > > > Signed-off-by: Numan Siddique <numans@ovn.org> > > > > --- > > > > controller/ovn-controller.c | 55 ++++++++++++++++++++++++++++++++++++- > > > > controller/physical.c | 5 ++++ > > > > tests/ovn-performance.at | 26 ++++++++++++++++++ > > > > 3 files changed, 85 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > > > > index b49786441..5b55a45b7 100644 > > > > --- a/controller/ovn-controller.c > > > > +++ b/controller/ovn-controller.c > > > > @@ -1734,6 +1734,58 @@ en_ct_zones_run(struct engine_node *node, void > > > *data) > > > > engine_set_node_state(node, EN_UPDATED); > > > > } > > > > > > > > +/* Handles datapath binding changes for the ct_zones engine. > > > > + * Returns false if the datapath is deleted or if the requested snat > > > > + * ct zone doesn't match with the ct_zones data. */ > > > > +static bool > > > > +ct_zones_datapath_binding_handler(struct engine_node *node, void *data) > > > > +{ > > > > + struct ed_type_ct_zones *ct_zones_data = data; > > > > + const struct sbrec_datapath_binding *dp; > > > > + struct ed_type_runtime_data *rt_data = > > > > + engine_get_input_data("runtime_data", node); > > > > + struct sbrec_datapath_binding_table *dp_table = > > > > + (struct sbrec_datapath_binding_table *)EN_OVSDB_GET( > > > > + engine_get_input("SB_datapath_binding", node)); > > > > + > > > > + SBREC_DATAPATH_BINDING_TABLE_FOR_EACH_TRACKED (dp, dp_table) { > > > > + if (!get_local_datapath(&rt_data->local_datapaths, > > > > + dp->tunnel_key)) { > > > > + continue; > > > > + } > > > > + > > > > + if (sbrec_datapath_binding_is_deleted(dp)) { > > > > + /* Fall back to full recompute of ct_zones engine. */ > > > > + return false; > > > > + } > > > > > > What if a datapath_binding is created? Should we return false as well? > > > > Hi Han, > > > > Thanks for the reviews. I don't think there is a need to trigger a recompute of > > ct_zones when a new datapath is created. > > > > In the patch 0 you mentioned - " I understand it may not affect > > correctness for now since new datapath always triggers recompute, but I > > think it's better to be explicit that the ct_zones node cannot handle it > > incrementally." > > > > But we don't trigger a full recompute of either runtime_data engine > > or flow_output_engine > > when a new datapath is created. Probably you meant recompute the > > ct_zone engine node ? > > > > Datapath handler for the runtime data > > (runtime_data_sb_datapath_binding_handler()), > > returns false only if a datapath is present in the local_datapaths and > > it is deleted. > > > > When a new datapath is added to local_datapaths, there will be a full > > recompute of > > ct_zones since the runtime data handler is NULL. With your suggestion we will > > trigger a recompute of ct_zones when a datapath is created, but not added to the > > local_datapaths. > > Patch 5 of the series does add the runtime data handler for ct_zones. > I missed out this information > in my previous reply. Oops. My bad. In the ct_zones_datapath_binding_handler(), we continue if the datapath is not in the local_datapaths. Sorry for the noise. I'll update the patch as per your suggestion. Makes sense to me. Thanks Numan > > Thanks > Numan > > > > > I think there is no need to trigger recompute. What do you think ? > > > > Thanks > > Numan > > > > > > > > > + > > > > + int req_snat_zone = datapath_snat_ct_zone(dp); > > > > + if (req_snat_zone == -1) { > > > > + /* datapath snat ct zone is not set. This condition will > > > also hit > > > > + * when CMS clears the snat-ct-zone for the logical router. > > > > + * In this case there is no harm in using the previosly > > > specified > > > > + * snat ct zone for this datapath. Also it is hard to know > > > > + * if this option was cleared or if this option is never > > > set. */ > > > > + continue; > > > > + } > > > > + > > > > + /* Check if the requested snat zone has changed for the datapath > > > > + * or not. If so, then fall back to full recompute of > > > > + * ct_zone engine. */ > > > > + char *snat_dp_zone_key = alloc_nat_zone_key(&dp->header_.uuid, > > > "snat"); > > > > + struct simap_node *simap_node = > > > simap_find(&ct_zones_data->current, > > > > + snat_dp_zone_key); > > > > + free(snat_dp_zone_key); > > > > + if (!simap_node || simap_node->data != req_snat_zone) { > > > > + /* There is no entry yet or the requested snat zone has > > > changed. > > > > + * Trigger full recompute of ct_zones engine. */ > > > > + return false; > > > > + } > > > > + } > > > > + > > > > + return true; > > > > +} > > > > + > > > > /* The data in the ct_zones node is always valid (i.e., no stale > > > pointers). */ > > > > static bool > > > > en_ct_zones_is_valid(struct engine_node *node OVS_UNUSED) > > > > @@ -2758,7 +2810,8 @@ main(int argc, char *argv[]) > > > > > > > > engine_add_input(&en_ct_zones, &en_ovs_open_vswitch, NULL); > > > > engine_add_input(&en_ct_zones, &en_ovs_bridge, NULL); > > > > - engine_add_input(&en_ct_zones, &en_sb_datapath_binding, NULL); > > > > + engine_add_input(&en_ct_zones, &en_sb_datapath_binding, > > > > + ct_zones_datapath_binding_handler); > > > > engine_add_input(&en_ct_zones, &en_runtime_data, NULL); > > > > > > > > engine_add_input(&en_runtime_data, &en_ofctrl_is_connected, NULL); > > > > diff --git a/controller/physical.c b/controller/physical.c > > > > index 04259d44a..e70efc71d 100644 > > > > --- a/controller/physical.c > > > > +++ b/controller/physical.c > > > > @@ -15,6 +15,7 @@ > > > > > > > > #include <config.h> > > > > #include "binding.h" > > > > +#include "coverage.h" > > > > #include "byte-order.h" > > > > #include "encaps.h" > > > > #include "flow.h" > > > > @@ -48,6 +49,8 @@ > > > > > > > > VLOG_DEFINE_THIS_MODULE(physical); > > > > > > > > +COVERAGE_DEFINE(physical_run); > > > > + > > > > /* Datapath zone IDs for connection tracking and NAT */ > > > > struct zone_ids { > > > > int ct; /* MFF_LOG_CT_ZONE. */ > > > > @@ -1528,6 +1531,8 @@ void > > > > physical_run(struct physical_ctx *p_ctx, > > > > struct ovn_desired_flow_table *flow_table) > > > > { > > > > + COVERAGE_INC(physical_run); > > > > + > > > > if (!hc_uuid) { > > > > hc_uuid = xmalloc(sizeof(struct uuid)); > > > > uuid_generate(hc_uuid); > > > > diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at > > > > index e510c6cef..61356e16d 100644 > > > > --- a/tests/ovn-performance.at > > > > +++ b/tests/ovn-performance.at > > > > @@ -462,6 +462,32 @@ OVN_CONTROLLER_EXPECT_HIT_COND( > > > > [ovn-nbctl --wait=hv lrp-set-gateway-chassis lr1-public hv3 30 && > > > ovn-nbctl --wait=hv sync] > > > > ) > > > > > > > > +# Test physical_run for logical router and other datapath binding > > > changes. > > > > +OVN_CONTROLLER_EXPECT_HIT_COND( > > > > + [hv1 hv2 hv3 hv4 hv5], [physical_run], [>0 >0 >0 =0 =0], > > > > + [ovn-nbctl --wait=hv set logical_router lr1 options:snat-ct-zone=10 > > > && ovn-nbctl --wait=hv sync] > > > > +) > > > > + > > > > +OVN_CONTROLLER_EXPECT_HIT_COND( > > > > + [hv1 hv2 hv3 hv4 hv5], [physical_run], [>0 >0 >0 =0 =0], > > > > + [ovn-nbctl --wait=hv set logical_router lr1 options:snat-ct-zone=11 > > > && ovn-nbctl --wait=hv sync] > > > > +) > > > > + > > > > +OVN_CONTROLLER_EXPECT_NO_HIT( > > > > + [hv1 hv2 hv3 hv4 hv5], [physical_run], > > > > + [ovn-nbctl --wait=hv remove logical_router lr1 options snat-ct-zone > > > && ovn-nbctl --wait=hv sync] > > > > +) > > > > + > > > > +OVN_CONTROLLER_EXPECT_NO_HIT( > > > > + [hv1 hv2 hv3 hv4 hv5], [physical_run], > > > > + [ovn-sbctl set datapath_binding lr1 external_ids:foo=bar && > > > ovn-nbctl --wait=hv sync] > > > > +) > > > > + > > > > +OVN_CONTROLLER_EXPECT_NO_HIT( > > > > + [hv1 hv2 hv3 hv4 hv5], [physical_run], > > > > + [ovn-sbctl set datapath_binding ls1 external_ids:foo=bar && > > > ovn-nbctl --wait=hv sync] > > > > +) > > > > + > > > > # After this, BFD should be enabled from hv1 and hv2 to hv3. > > > > # So there should be lflow_run hits in hv1, hv2, hv3 and hv4 > > > > OVN_CONTROLLER_EXPECT_HIT_COND( > > > > -- > > > > 2.31.1 > > > > > > > > _______________________________________________ > > > > dev mailing list > > > > dev@openvswitch.org > > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > > > _______________________________________________ > > > dev mailing list > > > dev@openvswitch.org > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > > >
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index b49786441..5b55a45b7 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -1734,6 +1734,58 @@ en_ct_zones_run(struct engine_node *node, void *data) engine_set_node_state(node, EN_UPDATED); } +/* Handles datapath binding changes for the ct_zones engine. + * Returns false if the datapath is deleted or if the requested snat + * ct zone doesn't match with the ct_zones data. */ +static bool +ct_zones_datapath_binding_handler(struct engine_node *node, void *data) +{ + struct ed_type_ct_zones *ct_zones_data = data; + const struct sbrec_datapath_binding *dp; + struct ed_type_runtime_data *rt_data = + engine_get_input_data("runtime_data", node); + struct sbrec_datapath_binding_table *dp_table = + (struct sbrec_datapath_binding_table *)EN_OVSDB_GET( + engine_get_input("SB_datapath_binding", node)); + + SBREC_DATAPATH_BINDING_TABLE_FOR_EACH_TRACKED (dp, dp_table) { + if (!get_local_datapath(&rt_data->local_datapaths, + dp->tunnel_key)) { + continue; + } + + if (sbrec_datapath_binding_is_deleted(dp)) { + /* Fall back to full recompute of ct_zones engine. */ + return false; + } + + int req_snat_zone = datapath_snat_ct_zone(dp); + if (req_snat_zone == -1) { + /* datapath snat ct zone is not set. This condition will also hit + * when CMS clears the snat-ct-zone for the logical router. + * In this case there is no harm in using the previosly specified + * snat ct zone for this datapath. Also it is hard to know + * if this option was cleared or if this option is never set. */ + continue; + } + + /* Check if the requested snat zone has changed for the datapath + * or not. If so, then fall back to full recompute of + * ct_zone engine. */ + char *snat_dp_zone_key = alloc_nat_zone_key(&dp->header_.uuid, "snat"); + struct simap_node *simap_node = simap_find(&ct_zones_data->current, + snat_dp_zone_key); + free(snat_dp_zone_key); + if (!simap_node || simap_node->data != req_snat_zone) { + /* There is no entry yet or the requested snat zone has changed. + * Trigger full recompute of ct_zones engine. */ + return false; + } + } + + return true; +} + /* The data in the ct_zones node is always valid (i.e., no stale pointers). */ static bool en_ct_zones_is_valid(struct engine_node *node OVS_UNUSED) @@ -2758,7 +2810,8 @@ main(int argc, char *argv[]) engine_add_input(&en_ct_zones, &en_ovs_open_vswitch, NULL); engine_add_input(&en_ct_zones, &en_ovs_bridge, NULL); - engine_add_input(&en_ct_zones, &en_sb_datapath_binding, NULL); + engine_add_input(&en_ct_zones, &en_sb_datapath_binding, + ct_zones_datapath_binding_handler); engine_add_input(&en_ct_zones, &en_runtime_data, NULL); engine_add_input(&en_runtime_data, &en_ofctrl_is_connected, NULL); diff --git a/controller/physical.c b/controller/physical.c index 04259d44a..e70efc71d 100644 --- a/controller/physical.c +++ b/controller/physical.c @@ -15,6 +15,7 @@ #include <config.h> #include "binding.h" +#include "coverage.h" #include "byte-order.h" #include "encaps.h" #include "flow.h" @@ -48,6 +49,8 @@ VLOG_DEFINE_THIS_MODULE(physical); +COVERAGE_DEFINE(physical_run); + /* Datapath zone IDs for connection tracking and NAT */ struct zone_ids { int ct; /* MFF_LOG_CT_ZONE. */ @@ -1528,6 +1531,8 @@ void physical_run(struct physical_ctx *p_ctx, struct ovn_desired_flow_table *flow_table) { + COVERAGE_INC(physical_run); + if (!hc_uuid) { hc_uuid = xmalloc(sizeof(struct uuid)); uuid_generate(hc_uuid); diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at index e510c6cef..61356e16d 100644 --- a/tests/ovn-performance.at +++ b/tests/ovn-performance.at @@ -462,6 +462,32 @@ OVN_CONTROLLER_EXPECT_HIT_COND( [ovn-nbctl --wait=hv lrp-set-gateway-chassis lr1-public hv3 30 && ovn-nbctl --wait=hv sync] ) +# Test physical_run for logical router and other datapath binding changes. +OVN_CONTROLLER_EXPECT_HIT_COND( + [hv1 hv2 hv3 hv4 hv5], [physical_run], [>0 >0 >0 =0 =0], + [ovn-nbctl --wait=hv set logical_router lr1 options:snat-ct-zone=10 && ovn-nbctl --wait=hv sync] +) + +OVN_CONTROLLER_EXPECT_HIT_COND( + [hv1 hv2 hv3 hv4 hv5], [physical_run], [>0 >0 >0 =0 =0], + [ovn-nbctl --wait=hv set logical_router lr1 options:snat-ct-zone=11 && ovn-nbctl --wait=hv sync] +) + +OVN_CONTROLLER_EXPECT_NO_HIT( + [hv1 hv2 hv3 hv4 hv5], [physical_run], + [ovn-nbctl --wait=hv remove logical_router lr1 options snat-ct-zone && ovn-nbctl --wait=hv sync] +) + +OVN_CONTROLLER_EXPECT_NO_HIT( + [hv1 hv2 hv3 hv4 hv5], [physical_run], + [ovn-sbctl set datapath_binding lr1 external_ids:foo=bar && ovn-nbctl --wait=hv sync] +) + +OVN_CONTROLLER_EXPECT_NO_HIT( + [hv1 hv2 hv3 hv4 hv5], [physical_run], + [ovn-sbctl set datapath_binding ls1 external_ids:foo=bar && ovn-nbctl --wait=hv sync] +) + # After this, BFD should be enabled from hv1 and hv2 to hv3. # So there should be lflow_run hits in hv1, hv2, hv3 and hv4 OVN_CONTROLLER_EXPECT_HIT_COND(