@@ -105,6 +105,25 @@ lookup_port_cb(const void *aux_, const char *port_name, unsigned int *portp)
return false;
}
+/* Given the OVN port name, get its openflow port */
+static bool
+tunnel_ofport_cb(const void *aux_, const char *port_name, ofp_port_t *ofport)
+{
+ const struct lookup_port_aux *aux = aux_;
+
+ const struct sbrec_port_binding *pb
+ = lport_lookup_by_name(aux->sbrec_port_binding_by_name, port_name);
+ if (!pb || (pb->datapath != aux->dp) || !pb->chassis) {
+ return false;
+ }
+
+ if (!get_tunnel_ofport(pb->chassis->name, NULL, ofport)) {
+ return false;
+ }
+
+ return true;
+}
+
static bool
is_chassis_resident_cb(const void *c_aux_, const char *port_name)
{
@@ -773,6 +792,7 @@ consider_logical_flow(
struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
struct ovnact_encode_params ep = {
.lookup_port = lookup_port_cb,
+ .tunnel_ofport = tunnel_ofport_cb,
.aux = &aux,
.is_switch = is_switch(ldp),
.group_table = group_table,
@@ -1794,3 +1794,16 @@ physical_run(struct ovsdb_idl_index *sbrec_port_binding_by_name,
simap_destroy(&new_tunnel_to_ofport);
}
+
+bool
+get_tunnel_ofport(const char *chassis_name, char *encap_ip, ofp_port_t *ofport)
+{
+ struct chassis_tunnel *tun = NULL;
+ tun = chassis_tunnel_find(chassis_name, encap_ip);
+ if (!tun) {
+ return false;
+ }
+
+ *ofport = tun->ofport;
+ return true;
+}
@@ -72,4 +72,8 @@ void physical_handle_mc_group_changes(
const struct simap *ct_zones,
const struct hmap *local_datapaths,
struct ovn_desired_flow_table *);
+bool get_tunnel_ofport(
+ const char *chassis_name,
+ char *encap_ip,
+ ofp_port_t *ofport);
#endif /* controller/physical.h */
@@ -90,7 +90,8 @@ struct ovn_extend_table;
OVNACT(CHECK_PKT_LARGER, ovnact_check_pkt_larger) \
OVNACT(TRIGGER_EVENT, ovnact_controller_event) \
OVNACT(BIND_VPORT, ovnact_bind_vport) \
- OVNACT(HANDLE_SVC_CHECK, ovnact_handle_svc_check)
+ OVNACT(HANDLE_SVC_CHECK, ovnact_handle_svc_check) \
+ OVNACT(FWD_GROUP, ovnact_fwd_group)
/* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
enum OVS_PACKED_ENUM ovnact_type {
@@ -374,6 +375,15 @@ struct ovnact_handle_svc_check {
struct expr_field port; /* Logical port name. */
};
+/* OVNACT_FWD_GROUP. */
+struct ovnact_fwd_group {
+ struct ovnact ovnact;
+ bool liveness;
+ char **child_ports; /* Logical ports */
+ size_t n_child_ports;
+ uint8_t ltable; /* Logical table ID of next table. */
+};
+
/* Internal use by the helpers below. */
void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
@@ -635,6 +645,13 @@ struct ovnact_encode_params {
* '*portp' and returns true; otherwise, returns false. */
bool (*lookup_port)(const void *aux, const char *port_name,
unsigned int *portp);
+
+ /* Looks up tunnel port to a chassis by its port name. If found, stores
+ * its openflow port number in '*ofport' and returns true;
+ * otherwise, returns false. */
+ bool (*tunnel_ofport)(const void *aux, const char *port_name,
+ ofp_port_t *ofport);
+
const void *aux;
/* 'true' if the flow is for a switch. */
@@ -2988,6 +2988,146 @@ ovnact_handle_svc_check_free(struct ovnact_handle_svc_check *sc OVS_UNUSED)
{
}
+static void
+parse_fwd_group_action(struct action_context *ctx)
+{
+ char *child_port, **child_port_list = NULL;
+ size_t allocated_ports = 0;
+ size_t n_child_ports = 0;
+ bool liveness = false;
+
+ if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
+ if (lexer_match_id(ctx->lexer, "liveness")) {
+ if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
+ return;
+ }
+ if (ctx->lexer->token.type != LEX_T_STRING) {
+ lexer_syntax_error(ctx->lexer,
+ "expecting true/false");
+ return;
+ }
+ if (!strcmp(ctx->lexer->token.s, "true")) {
+ liveness = true;
+ lexer_get(ctx->lexer);
+ }
+ lexer_force_match(ctx->lexer, LEX_T_COMMA);
+ }
+ if (lexer_match_id(ctx->lexer, "childports")) {
+ if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
+ return;
+ }
+ while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
+ if (ctx->lexer->token.type != LEX_T_STRING) {
+ lexer_syntax_error(ctx->lexer,
+ "expecting logical switch port");
+ if (child_port_list) {
+ free(child_port_list);
+ }
+ return;
+ }
+ /* Parse child's logical ports */
+ child_port = xstrdup(ctx->lexer->token.s);
+ lexer_get(ctx->lexer);
+ lexer_match(ctx->lexer, LEX_T_COMMA);
+
+ if (n_child_ports >= allocated_ports) {
+ child_port_list = x2nrealloc(child_port_list,
+ &allocated_ports,
+ sizeof *child_port_list);
+ }
+ child_port_list[n_child_ports++] = child_port;
+ }
+ }
+ }
+
+ struct ovnact_fwd_group *fwd_group = ovnact_put_FWD_GROUP(ctx->ovnacts);
+ fwd_group->ltable = ctx->pp->cur_ltable + 1;
+ fwd_group->liveness = liveness;
+ fwd_group->child_ports = child_port_list;
+ fwd_group->n_child_ports = n_child_ports;
+}
+
+static void
+format_FWD_GROUP(const struct ovnact_fwd_group *fwd_group, struct ds *s)
+{
+ ds_put_cstr(s, "fwd_group(");
+ if (fwd_group->liveness) {
+ ds_put_cstr(s, "liveness=true,");
+ }
+ if (fwd_group->n_child_ports) {
+ for (size_t i = 0; i < fwd_group->n_child_ports; i++) {
+ if (i) {
+ ds_put_cstr(s, ", ");
+ }
+
+ ds_put_format(s, "childports=%s", fwd_group->child_ports[i]);
+ }
+ }
+ ds_put_cstr(s, ");");
+}
+
+static void
+encode_FWD_GROUP(const struct ovnact_fwd_group *fwd_group,
+ const struct ovnact_encode_params *ep,
+ struct ofpbuf *ofpacts)
+{
+ if (!fwd_group->n_child_ports) {
+ /* Nothing to do without child ports */
+ return;
+ }
+
+ uint32_t reg_index = MFF_LOG_OUTPORT - MFF_REG0;
+ struct ds ds = DS_EMPTY_INITIALIZER;
+
+ ds_put_format(&ds, "type=select,selection_method=dp_hash");
+
+ for (size_t i = 0; i < fwd_group->n_child_ports; i++) {
+ uint32_t port_tunnel_key;
+ ofp_port_t ofport;
+
+ const char *port_name = fwd_group->child_ports[i];
+
+ /* Find the tunnel key of the logical port */
+ if (!ep->lookup_port(ep->aux, port_name, &port_tunnel_key)) {
+ return;
+ }
+ ds_put_format(&ds, ",bucket=");
+
+ if (fwd_group->liveness) {
+ /* Find the openflow port number of the tunnel port */
+ if (!ep->tunnel_ofport(ep->aux, port_name, &ofport)) {
+ return;
+ }
+
+ /* Watch port for failure, used with BFD */
+ ds_put_format(&ds, "watch_port:%d,", ofport);
+ }
+
+ ds_put_format(&ds, "load=0x%d->NXM_NX_REG%d[0..15]",
+ port_tunnel_key, reg_index);
+ ds_put_format(&ds, ",resubmit(,%d)", ep->output_ptable);
+ }
+
+ uint32_t table_id = 0;
+ struct ofpact_group *og;
+ table_id = ovn_extend_table_assign_id(ep->group_table, ds_cstr(&ds),
+ ep->lflow_uuid);
+ ds_destroy(&ds);
+ if (table_id == EXT_TABLE_ID_INVALID) {
+ return;
+ }
+
+ /* Create an action to set the group */
+ og = ofpact_put_GROUP(ofpacts);
+ og->group_id = table_id;
+}
+
+static void
+ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
+{
+ free(fwd_group->child_ports);
+}
+
/* Parses an assignment or exchange or put_dhcp_opts action. */
static void
parse_set_action(struct action_context *ctx)
@@ -3110,6 +3250,8 @@ parse_action(struct action_context *ctx)
parse_bind_vport(ctx);
} else if (lexer_match_id(ctx->lexer, "handle_svc_check")) {
parse_handle_svc_check(ctx);
+ } else if (lexer_match_id(ctx->lexer, "fwd_group")) {
+ parse_fwd_group_action(ctx);
} else {
lexer_syntax_error(ctx->lexer, "expecting action");
}
@@ -5428,6 +5428,61 @@ build_stateful(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs)
}
static void
+build_fwd_group_lflows(struct ovn_datapath *od, struct hmap *lflows)
+{
+ struct ds match = DS_EMPTY_INITIALIZER;
+ struct ds actions = DS_EMPTY_INITIALIZER;
+
+ for (int i = 0; i < od->nbs->n_forwarding_groups; ++i) {
+ const struct nbrec_forwarding_group *fwd_group = NULL;
+ fwd_group = od->nbs->forwarding_groups[i];
+ if (!fwd_group || (fwd_group->n_child_port == 0)) {
+ continue;
+ }
+
+ /* ARP responder for the forwarding group's virtual IP */
+ ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
+ fwd_group->vip);
+ ds_put_format(&actions,
+ "eth.dst = eth.src; "
+ "eth.src = %s; "
+ "arp.op = 2; /* ARP reply */ "
+ "arp.tha = arp.sha; "
+ "arp.sha = %s; "
+ "arp.tpa = arp.spa; "
+ "arp.spa = %s; "
+ "outport = inport; "
+ "flags.loopback = 1; "
+ "output;",
+ fwd_group->vmac, fwd_group->vmac, fwd_group->vip);
+
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 50,
+ ds_cstr(&match), ds_cstr(&actions));
+
+ /* L2 lookup for the forwarding group's virtual MAC */
+ ds_clear(&match);
+ ds_put_format(&match, "eth.dst == %s", fwd_group->vmac);
+
+ /* Create a comma separated string of child ports */
+ struct ds group_ports = DS_EMPTY_INITIALIZER;
+ if (fwd_group->liveness) {
+ ds_put_cstr(&group_ports, "liveness=\"true\",");
+ }
+ ds_put_cstr(&group_ports, "childports=");
+ for (i = 0; i < (fwd_group->n_child_port - 1); ++i) {
+ ds_put_format(&group_ports, "\"%s\",", fwd_group->child_port[i]);
+ }
+ ds_put_format(&group_ports, "\"%s\"",
+ fwd_group->child_port[fwd_group->n_child_port - 1]);
+
+ ds_clear(&actions);
+ ds_put_format(&actions, "fwd_group(%s);", ds_cstr(&group_ports));
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 50,
+ ds_cstr(&match), ds_cstr(&actions));
+ }
+}
+
+static void
build_lrouter_groups__(struct hmap *ports, struct ovn_datapath *od)
{
ovs_assert((od && od->nbr && od->lr_group));
@@ -5727,6 +5782,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
build_stateful(od, lflows, lbs);
}
+ /* Build logical flows for the forwarding groups */
+ HMAP_FOR_EACH (od, key_node, datapaths) {
+ if (!od->nbs || !od->nbs->n_forwarding_groups) {
+ continue;
+ }
+
+ build_fwd_group_lflows(od, lflows);
+ }
+
/* Logical switch ingress table 0: Admission control framework (priority
* 100). */
HMAP_FOR_EACH (od, key_node, datapaths) {
@@ -2291,6 +2291,9 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
case OVNACT_HANDLE_SVC_CHECK:
break;
+
+ case OVNACT_FWD_GROUP:
+ break;
}
}
ds_destroy(&s);
A forwarding group is an aggregation of logical switch ports of a logical switch to load balance traffic across the ports. It also detects the liveness if the logical switch ports are realized as OVN tunnel ports on the physical topology. In the below logical topology diagram, the logical switch has two ports connected to chassis / external routers R1 and R2. The logical router needs to send traffic to an external network that is connected through R1 and R2. +----+ +----------+ R1 | ***** / +----+ ** ** +----------+ +--------------+ / lsp1 * * | Logical | | Logical |/ * External * | Router +--------+ switch X * Network * | | | |\ * * +----------+ +--------------+ \ lsp2 * * ^ \ +----+ ** ** | +----------+ R2 | ***** | +----+ fwd_group -> (lsp1, lsp2) In the absence of forwarding group, the logical router will have unicast route to point to either R1 or R2. In case of R1 or R2 going down, it will require control plane's intervention to update the route to point to proper nexthop. With forwarding group, a virtual IP (VIP) and virtual MAC (VMAC) address are configured on the forwarding group. The logical router points to the forwarding group's VIP as the nexthop for hosts behind R1 and R2. [root@fwd-group]# ovn-nbctl fwd-group-add fwd ls1 VIP_1 VMAC_1 lsp1 lsp2 [root@fwd-group]# ovn-nbctl fwd-group-list FWD_GROUP LS VIP VMAC CHILD_PORTS fwd ls1 VIP_1 VMAC_1 lsp1 lsp2 [root@fwd-group]# ovn-nbctl lr-route-list lr1 IPv4 Routes external_host_prefix/prefix_len VIP_1 dst-ip The logical switch will install an ARP responder rule to reply with VMAC as the MAC address for ARP requests for VIP. It will also install a MAC lookup rule for VMAC with action to load balance across the logical switch ports of the forwarding group. Datapath: "ls1" Pipeline: ingress table=10(ls_in_arp_rsp ), priority=50 , match=(arp.tpa == VIP_1 && arp.op == 1), action=(eth.dst = eth.src; eth.src = VMAC_1; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = VMAC_1; arp.tpa = arp.spa; arp.spa = VIP; outport = inport; flags.loopback = 1; output;) table=13(ls_in_l2_lkup ), priority=50 , match=(eth.dst == VMAC_1), action=(fwd_group(childports="lsp1","lsp2");) In the physical topology, OVN managed hypervisors are connected to R1 and R2 through overlay tunnels. The logical flow's "fwd_group" action mentioned above, gets translated to openflow group type "select" with one bucket for each logical switch port. cookie=0x0, duration=16.869s, table=29, n_packets=4, n_bytes=392, idle_age=0, priority=111,metadata=0x9,dl_dst=VMAC_1 actions=group:1 group_id=1,type=select,selection_method=dp_hash, bucket=actions=load:0x2->NXM_NX_REG15[0..15], resubmit(,32), bucket=actions=load:0x3->NXM_NX_REG15[0..15],resubmit(,32) where 0x2 and 0x3 are port tunnel keys of lsp1 and lsp2. The openflow group type "select" with selection method "dp_hash" load balances traffic based on source and destination Ethernet address, VLAN ID, Ethernet type, IPv4/v6 source and destination address and protocol, and for TCP and SCTP only, the source and destination ports. To detect path failure between OVN managed hypervisors and (R1, R2), BFD is enabled on the tunnel interfaces. The openflow group is modified to include watch_port for liveness detection of a port. The forwarding group must be configured with --liveness to enable it. With liveness enabled, the logical flow changes to: table=13(ls_in_l2_lkup ), priority=50 , match=(eth.dst == VMAC_1), action=(fwd_group(liveness="true",childports="lsp1","lsp2");) While the openflow group is: group_id=1,type=select,selection_method=dp_hash, bucket=watch_port:31,actions=load:0x2->NXM_NX_REG15[0..15],resubmit(,32), bucket=watch_port:32,actions=load:0x3->NXM_NX_REG15[0..15],resubmit(,32) Where 31 and 32 are ovs port numbers for the tunnel interfaces connecting to R1 and R2. If the BFD forwarding status is down for any of the tunnels, the corresponding bucket will not be selected for packet forwarding. Signed-off-by: Manoj Sharma <manoj.sharma@nutanix.com> --- controller/lflow.c | 20 +++++++ controller/physical.c | 13 +++++ controller/physical.h | 4 ++ include/ovn/actions.h | 19 ++++++- lib/actions.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++ northd/ovn-northd.c | 64 +++++++++++++++++++++++ utilities/ovn-trace.c | 3 ++ 7 files changed, 264 insertions(+), 1 deletion(-)