@@ -62,6 +62,8 @@ lflow_get_input_data(struct engine_node *node,
EN_OVSDB_GET(engine_get_input("SB_igmp_group", node));
lflow_input->sbrec_logical_dp_group_table =
EN_OVSDB_GET(engine_get_input("SB_logical_dp_group", node));
+ lflow_input->sbrec_ecmp_nh_table =
+ EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
lflow_input->sbrec_mcast_group_by_name_dp =
engine_ovsdb_node_get_index(
@@ -259,6 +259,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
engine_add_input(&en_lflow, &en_port_group, lflow_port_group_handler);
engine_add_input(&en_lflow, &en_lr_stateful, lflow_lr_stateful_handler);
engine_add_input(&en_lflow, &en_ls_stateful, lflow_ls_stateful_handler);
+ engine_add_input(&en_lflow, &en_sb_ecmp_nexthop, NULL);
engine_add_input(&en_sync_to_sb_addr_set, &en_northd, NULL);
engine_add_input(&en_sync_to_sb_addr_set, &en_lr_stateful, NULL);
@@ -10558,14 +10558,16 @@ find_static_route_outport(struct ovn_datapath *od, const struct hmap *lr_ports,
}
static void
-add_ecmp_symmetric_reply_flows(struct lflow_table *lflows,
- struct ovn_datapath *od,
- bool ct_masked_mark,
- const char *port_ip,
- struct ovn_port *out_port,
- const struct parsed_route *route,
- struct ds *route_match,
- struct lflow_ref *lflow_ref)
+add_ecmp_symmetric_reply_flows(
+ struct lflow_table *lflows,
+ struct ovn_datapath *od,
+ bool ct_masked_mark,
+ const char *port_ip,
+ struct ovn_port *out_port,
+ const struct parsed_route *route,
+ struct ds *route_match,
+ struct lflow_ref *lflow_ref,
+ const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nh_table)
{
const struct nbrec_logical_router_static_route *st_route = route->route;
struct ds match = DS_EMPTY_INITIALIZER;
@@ -10601,15 +10603,28 @@ add_ecmp_symmetric_reply_flows(struct lflow_table *lflows,
* ds_put_cstr() call. The previous contents are needed.
*/
ds_put_cstr(&match, " && !ct.rpl && (ct.new || ct.est)");
+ struct ds nexthop_label = DS_EMPTY_INITIALIZER;
+
+ const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
+ SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop, sbrec_ecmp_nh_table) {
+ if (!strcmp(st_route->nexthop, sb_ecmp_nexthop->nexthop)) {
+ int id = sb_ecmp_nexthop->id;
+ ds_put_format(&nexthop_label, "ct_label.label = %d;", id);
+ break;
+ }
+ }
+
ds_put_format(&actions,
"ct_commit { ct_label.ecmp_reply_eth = eth.src; "
- " %s = %" PRId64 ";}; "
+ " %s = %" PRId64 "; %s }; "
"next;",
- ct_ecmp_reply_port_match, out_port->sb->tunnel_key);
+ ct_ecmp_reply_port_match, out_port->sb->tunnel_key,
+ ds_cstr(&nexthop_label));
ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 100,
ds_cstr(&match), ds_cstr(&actions),
&st_route->header_,
lflow_ref);
+ ds_destroy(&nexthop_label);
/* Bypass ECMP selection if we already have ct_label information
* for where to route the packet.
@@ -10660,10 +10675,12 @@ add_ecmp_symmetric_reply_flows(struct lflow_table *lflows,
}
static void
-build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od,
- bool ct_masked_mark, const struct hmap *lr_ports,
- struct ecmp_groups_node *eg,
- struct lflow_ref *lflow_ref)
+build_ecmp_route_flow(
+ struct lflow_table *lflows, struct ovn_datapath *od,
+ bool ct_masked_mark, const struct hmap *lr_ports,
+ struct ecmp_groups_node *eg,
+ struct lflow_ref *lflow_ref,
+ const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nh_table)
{
bool is_ipv4 = IN6_IS_ADDR_V4MAPPED(&eg->prefix);
@@ -10721,7 +10738,7 @@ build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od,
add_ecmp_symmetric_reply_flows(lflows, od, ct_masked_mark,
lrp_addr_s, out_port,
route_, &route_match,
- lflow_ref);
+ lflow_ref, sbrec_ecmp_nh_table);
}
ds_clear(&match);
ds_put_format(&match, REG_ECMP_GROUP_ID" == %"PRIu16" && "
@@ -12581,7 +12598,8 @@ build_static_route_flows_for_lrouter(
struct ovn_datapath *od, const struct chassis_features *features,
struct lflow_table *lflows, const struct hmap *lr_ports,
const struct hmap *bfd_connections,
- struct lflow_ref *lflow_ref)
+ struct lflow_ref *lflow_ref,
+ const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nh_table)
{
ovs_assert(od->nbr);
ovn_lflow_add_default_drop(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP,
@@ -12630,7 +12648,7 @@ build_static_route_flows_for_lrouter(
/* add a flow in IP_ROUTING, and one flow for each member in
* IP_ROUTING_ECMP. */
build_ecmp_route_flow(lflows, od, features->ct_no_masked_label,
- lr_ports, group, lflow_ref);
+ lr_ports, group, lflow_ref, sbrec_ecmp_nh_table);
}
const struct unique_routes_node *ur;
HMAP_FOR_EACH (ur, hmap_node, &unique_routes) {
@@ -15541,6 +15559,7 @@ struct lswitch_flow_build_info {
struct ds actions;
size_t thread_lflow_counter;
const char *svc_monitor_mac;
+ const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nh_table;
};
/* Helper function to combine all lflow generation which is iterated by
@@ -15588,7 +15607,7 @@ build_lswitch_and_lrouter_iterate_by_lr(struct ovn_datapath *od,
build_static_route_flows_for_lrouter(od, lsi->features,
lsi->lflows, lsi->lr_ports,
lsi->bfd_connections,
- NULL);
+ NULL, lsi->sbrec_ecmp_nh_table);
build_mcast_lookup_flows_for_lrouter(od, lsi->lflows, &lsi->match,
&lsi->actions, NULL);
build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports,
@@ -15905,7 +15924,8 @@ build_lswitch_and_lrouter_flows(
const struct hmap *svc_monitor_map,
const struct hmap *bfd_connections,
const struct chassis_features *features,
- const char *svc_monitor_mac)
+ const char *svc_monitor_mac,
+ const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nh_table)
{
char *svc_check_match = xasprintf("eth.dst == %s", svc_monitor_mac);
@@ -15939,6 +15959,7 @@ build_lswitch_and_lrouter_flows(
lsiv[index].svc_check_match = svc_check_match;
lsiv[index].thread_lflow_counter = 0;
lsiv[index].svc_monitor_mac = svc_monitor_mac;
+ lsiv[index].sbrec_ecmp_nh_table = sbrec_ecmp_nh_table;
ds_init(&lsiv[index].match);
ds_init(&lsiv[index].actions);
@@ -15981,6 +16002,7 @@ build_lswitch_and_lrouter_flows(
.svc_monitor_mac = svc_monitor_mac,
.match = DS_EMPTY_INITIALIZER,
.actions = DS_EMPTY_INITIALIZER,
+ .sbrec_ecmp_nh_table = sbrec_ecmp_nh_table,
};
/* Combined build - all lflow generation from lswitch and lrouter
@@ -16140,7 +16162,8 @@ void build_lflows(struct ovsdb_idl_txn *ovnsb_txn,
input_data->svc_monitor_map,
input_data->bfd_connections,
input_data->features,
- input_data->svc_monitor_mac);
+ input_data->svc_monitor_mac,
+ input_data->sbrec_ecmp_nh_table);
if (parallelization_state == STATE_INIT_HASH_SIZES) {
parallelization_state = STATE_USE_PARALLELIZATION;
@@ -175,6 +175,7 @@ struct lflow_input {
const struct sbrec_multicast_group_table *sbrec_multicast_group_table;
const struct sbrec_igmp_group_table *sbrec_igmp_group_table;
const struct sbrec_logical_dp_group_table *sbrec_logical_dp_group_table;
+ const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nh_table;
/* Indexes */
struct ovsdb_idl_index *sbrec_mcast_group_by_name_dp;
@@ -29181,7 +29181,7 @@ AT_CHECK([
for hv in 1 2; do
grep table=17 hv${hv}flows | \
grep "priority=100" | \
- grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_MARK\\[[16..31\\]]))"
+ grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_MARK\\[[16..31\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[96..127\\]]))"
grep table=25 hv${hv}flows | \
grep "priority=200" | \
@@ -29306,7 +29306,7 @@ AT_CHECK([
for hv in 1 2; do
grep table=17 hv${hv}flows | \
grep "priority=100" | \
- grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_MARK\\[[16..31\\]]))"
+ grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_MARK\\[[16..31\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[96..127\\]]))"
grep table=25 hv${hv}flows | \
grep "priority=200" | \
@@ -6121,19 +6121,21 @@ NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.2 | FORMAT_PING], \
# and just ensure that the known ethernet address is present.
AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.0.1) | \
sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
-sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl
-icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x401020400000000
-tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x401020400000000,protoinfo=(state=<cleared>)
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' |
+sed -e 's/labels=0x[[0-9]]/labels=0x?/'], [0], [dnl
+icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x?000000000401020400000000
+tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x?000000000401020400000000,protoinfo=(state=<cleared>)
])
# Ensure datapaths show conntrack states as expected
# Like with conntrack entries, we shouldn't try to predict
# port binding tunnel keys. So omit them from expected labels.
-ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x401020400000000/.*)'
-AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x401020400000000/.*)' -c], [0], [dnl
+AT_CHECK([ovs-appctl dpctl/dump-flows | sed -e 's/label=0x[[0-9]]/label=0x?/' | \
+grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x?000000000401020400000000/.*)' -c], [0], [dnl
2
])
-AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x401020400000000)' -c], [0], [dnl
+AT_CHECK([[ovs-appctl dpctl/dump-flows | sed -e 's/ct_label(0x[0-9]/ct_label(0x?/' | \
+grep 'ct_state(-new+est+rpl+trk).*ct_label(0x?000000000401020400000000)' -c]], [0], [dnl
2
])
@@ -6152,18 +6154,21 @@ NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.2 | FORMAT_PING], \
[0], [dnl
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x1001020400000000/.*)' -c], [0], [dnl
+AT_CHECK([ovs-appctl dpctl/dump-flows | sed -e 's/label=0x[[0-9]]/label=0x?/' | \
+grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x?000000001001020400000000/.*)' -c], [0], [dnl
2
])
-AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x1001020400000000)' -c], [0], [dnl
+AT_CHECK([[ovs-appctl dpctl/dump-flows | sed -e 's/ct_label(0x[0-9]/ct_label(0x?/' | \
+grep 'ct_state(-new+est+rpl+trk).*ct_label(0x?000000001001020400000000)' -c]], [0], [dnl
2
])
-AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 0x1001020400000000 | FORMAT_CT(172.16.0.1) | \
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 1001020400000000 | FORMAT_CT(172.16.0.1) | \
sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
-sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
-icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000
-tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000,protoinfo=(state=<cleared>)
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' |
+sed -e 's/labels=0x[[0-9]]/labels=0x?/' | sort], [0], [dnl
+icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x?000000001001020400000000
+tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x?000000001001020400000000,protoinfo=(state=<cleared>)
])
# Check entries in table 76 and 77 expires w/o traffic
OVS_WAIT_UNTIL([
@@ -6322,11 +6327,12 @@ NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 fd01::2 | FORMAT_PING], \
# Ensure datapaths show conntrack states as expected
# Like with conntrack entries, we shouldn't try to predict
# port binding tunnel keys. So omit them from expected labels.
-AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x401020400000000/.*)' -c], [0], [dnl
+AT_CHECK([ovs-appctl dpctl/dump-flows | sed -e 's/label=0x[[0-9]]/label=0x?/' | \
+grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x?000000000401020400000000/.*)' -c], [0], [dnl
2
])
-
-AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x401020400000000)' -c], [0], [dnl
+AT_CHECK([[ovs-appctl dpctl/dump-flows | sed -e 's/ct_label(0x[0-9]/ct_label(0x?/' | \
+grep 'ct_state(-new+est+rpl+trk).*ct_label(0x?000000000401020400000000)' -c]], [0], [dnl
2
])
@@ -6335,9 +6341,10 @@ AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_lab
# and just ensure that the known ethernet address is present.
AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd01::2) | \
sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
-sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
-icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x401020400000000
-tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x401020400000000,protoinfo=(state=<cleared>)
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' |
+sed -e 's/labels=0x[[0-9]]/labels=0x?/' | sort], [0], [dnl
+icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x?000000000401020400000000
+tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x?000000000401020400000000,protoinfo=(state=<cleared>)
])
# Flush conntrack entries for easier output parsing of next test.
@@ -6354,18 +6361,21 @@ NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 fd01::2 | FORMAT_PING], \
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x1001020400000000/.*)' -c], [0], [dnl
+AT_CHECK([ovs-appctl dpctl/dump-flows | sed -e 's/label=0x[[0-9]]/label=0x?/' | \
+grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x?000000001001020400000000/.*)' -c], [0], [dnl
2
])
-AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x1001020400000000)' -c], [0], [dnl
+AT_CHECK([[ovs-appctl dpctl/dump-flows | sed -e 's/ct_label(0x[0-9]/ct_label(0x?/' | \
+grep 'ct_state(-new+est+rpl+trk).*ct_label(0x?000000001001020400000000)' -c]], [0], [dnl
2
])
-AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 0x1001020400000000 | FORMAT_CT(fd01::2) | \
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 1001020400000000 | FORMAT_CT(fd01::2) | \
sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
-sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl
-icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000
-tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000,protoinfo=(state=<cleared>)
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/' |
+sed -e 's/labels=0x[[0-9]]/labels=0x?/'], [0], [dnl
+icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x?000000001001020400000000
+tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x?000000001001020400000000,protoinfo=(state=<cleared>)
])
# Check entries in table 76 and 77 expires w/o traffic
Introduce the nexthop identifier in the ct_label.label field for ecmp-symmetric replies connections. This field will be used by ovn-controller to track ct entries and to flush them if requested by the CMS (e.g. removing the related static routes). Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> --- northd/en-lflow.c | 2 ++ northd/inc-proc-northd.c | 1 + northd/northd.c | 63 +++++++++++++++++++++++++++------------- northd/northd.h | 1 + tests/ovn.at | 4 +-- tests/system-ovn.at | 58 +++++++++++++++++++++--------------- 6 files changed, 83 insertions(+), 46 deletions(-)