@@ -79,6 +79,8 @@ struct uuid;
#define OFTABLE_CHK_IN_PORT_SEC 73
#define OFTABLE_CHK_IN_PORT_SEC_ND 74
#define OFTABLE_CHK_OUT_PORT_SEC 75
+#define OFTABLE_ECMP_NH_MAC 76
+#define OFTABLE_ECMP_NH 77
enum ref_type {
REF_TYPE_ADDRSET,
@@ -118,6 +118,7 @@ struct ovn_extend_table;
OVNACT(LOOKUP_FDB, ovnact_lookup_fdb) \
OVNACT(CHECK_IN_PORT_SEC, ovnact_result) \
OVNACT(CHECK_OUT_PORT_SEC, ovnact_result) \
+ OVNACT(COMMIT_ECMP_NH, ovnact_commit_ecmp_nh) \
/* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
enum OVS_PACKED_ENUM ovnact_type {
@@ -453,6 +454,13 @@ struct ovnact_lookup_fdb {
struct expr_field dst; /* 1-bit destination field. */
};
+/* OVNACT_COMMIT_ECMP_NH. */
+struct ovnact_commit_ecmp_nh {
+ struct ovnact ovnact;
+ bool ipv6;
+ uint8_t proto;
+};
+
/* Internal use by the helpers below. */
void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
@@ -70,6 +70,7 @@ enum mff_log_flags_bits {
MLF_LOCALPORT_BIT = 10,
MLF_USE_SNAT_ZONE = 11,
MLF_CHECK_PORT_SEC_BIT = 12,
+ MLF_LOOKUP_COMMIT_ECMP_NH_BIT = 13,
};
/* MFF_LOG_FLAGS_REG flag assignments */
@@ -113,6 +114,8 @@ enum mff_log_flags {
/* Indicate the packet has been received from a localport */
MLF_LOCALPORT = (1 << MLF_LOCALPORT_BIT),
+
+ MLF_LOOKUP_COMMIT_ECMP_NH = (1 << MLF_LOOKUP_COMMIT_ECMP_NH_BIT),
};
/* OVN logical fields
@@ -41,6 +41,7 @@
#include "uuid.h"
#include "socket-util.h"
#include "lib/ovn-util.h"
+#include "controller/lflow.h"
VLOG_DEFINE_THIS_MODULE(actions);
@@ -4278,6 +4279,281 @@ encode_CHECK_OUT_PORT_SEC(const struct ovnact_result *dl,
MLF_CHECK_PORT_SEC_BIT, ofpacts);
}
+static void
+parse_commit_ecmp_nh(struct action_context *ctx,
+ struct ovnact_commit_ecmp_nh *ecmp_nh)
+{
+ uint8_t proto;
+ bool ipv6;
+
+ lexer_force_match(ctx->lexer, LEX_T_LPAREN); /* Skip '('. */
+ if (!lexer_match_id(ctx->lexer, "ipv6")) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+ if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+ if (lexer_match_string(ctx->lexer, "true") ||
+ lexer_match_id(ctx->lexer, "true")) {
+ ipv6 = true;
+ } else if (lexer_match_string(ctx->lexer, "false") ||
+ lexer_match_id(ctx->lexer, "false")) {
+ ipv6 = false;
+ } else {
+ lexer_syntax_error(ctx->lexer,
+ "expecting true or false");
+ return;
+ }
+
+ lexer_force_match(ctx->lexer, LEX_T_COMMA);
+
+ if (!lexer_match_id(ctx->lexer, "proto")) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+ if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+ if (lexer_match_id(ctx->lexer, "tcp")) {
+ proto = IPPROTO_TCP;
+ } else if (lexer_match_id(ctx->lexer, "udp")) {
+ proto = IPPROTO_UDP;
+ } else if (lexer_match_id(ctx->lexer, "sctp")) {
+ proto = IPPROTO_SCTP;
+ } else {
+ lexer_syntax_error(ctx->lexer, "invalid protocol");
+ return;
+ }
+
+ lexer_force_match(ctx->lexer, LEX_T_RPAREN); /* Skip ')'. */
+
+ ecmp_nh->proto = proto;
+ ecmp_nh->ipv6 = ipv6;
+}
+
+static void
+format_COMMIT_ECMP_NH(const struct ovnact_commit_ecmp_nh *ecmp_nh,
+ struct ds *s)
+{
+ const char *proto;
+
+ switch (ecmp_nh->proto) {
+ case IPPROTO_UDP:
+ proto = "udp";
+ break;
+ case IPPROTO_SCTP:
+ proto = "sctp";
+ break;
+ case IPPROTO_TCP:
+ default:
+ proto = "tcp";
+ break;
+ }
+ ds_put_format(s, "commit_ecmp_nh(ipv6 = %s, proto = %s);",
+ ecmp_nh->ipv6 ? "true" : "false", proto);
+}
+
+static void
+ovnact_commit_ecmp_nh_free(struct ovnact_commit_ecmp_nh *ecmp_nh OVS_UNUSED)
+{
+}
+
+static void
+commit_ecmp_learn_action(struct ofpbuf *ofpacts, bool nw_conn,
+ bool ipv6, uint8_t proto)
+{
+ struct ofpact_learn *ol = ofpact_put_LEARN(ofpacts);
+ struct match match = MATCH_CATCHALL_INITIALIZER;
+ struct ofpact_learn_spec *ol_spec;
+ unsigned int imm_bytes;
+ uint8_t *src_imm;
+
+ ol->flags = NX_LEARN_F_DELETE_LEARNED;
+ ol->idle_timeout = 20; /* seconds. */
+ ol->hard_timeout = 30; /* seconds. */
+ ol->priority = OFP_DEFAULT_PRIORITY;
+ ol->table_id = nw_conn ? OFTABLE_ECMP_NH_MAC : OFTABLE_ECMP_NH;
+
+ /* Match on metadata of the packet that created the new table. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field = mf_from_id(MFF_METADATA);
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_FIELD;
+ ol_spec->src.field = mf_from_id(MFF_METADATA);
+
+ if (nw_conn) {
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field = mf_from_id(MFF_ETH_SRC);
+ ol_spec->src.field = mf_from_id(MFF_ETH_SRC);
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_FIELD;
+ }
+
+ /* Match on the same ETH type as the packet that created the new table. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field = mf_from_id(MFF_ETH_TYPE);
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
+ union mf_value imm_eth_type = {
+ .be16 = ipv6 ? htons(ETH_TYPE_IPV6) : htons(ETH_TYPE_IP)
+ };
+ mf_write_subfield_value(&ol_spec->dst, &imm_eth_type, &match);
+ /* Push value last, as this may reallocate 'ol_spec'. */
+ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
+ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
+ memcpy(src_imm, &imm_eth_type, imm_bytes);
+
+ /* IP src. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field =
+ ipv6 ? mf_from_id(MFF_IPV6_SRC) : mf_from_id(MFF_IPV4_SRC);
+ if (nw_conn) {
+ ol_spec->src.field =
+ ipv6 ? mf_from_id(MFF_IPV6_SRC) : mf_from_id(MFF_IPV4_SRC);
+ } else {
+ ol_spec->src.field =
+ ipv6 ? mf_from_id(MFF_IPV6_DST) : mf_from_id(MFF_IPV4_DST);
+ }
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_FIELD;
+
+ /* IP dst. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field =
+ ipv6 ? mf_from_id(MFF_IPV6_DST) : mf_from_id(MFF_IPV4_DST);
+ if (nw_conn) {
+ ol_spec->src.field =
+ ipv6 ? mf_from_id(MFF_IPV6_DST) : mf_from_id(MFF_IPV4_DST);
+ } else {
+ ol_spec->src.field =
+ ipv6 ? mf_from_id(MFF_IPV6_SRC) : mf_from_id(MFF_IPV4_SRC);
+ }
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_FIELD;
+
+ /* IP proto. */
+ union mf_value imm_proto = {
+ .u8 = proto,
+ };
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field = mf_from_id(MFF_IP_PROTO);
+ ol_spec->src.field = mf_from_id(MFF_IP_PROTO);
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
+ mf_write_subfield_value(&ol_spec->dst, &imm_proto, &match);
+ /* Push value last, as this may reallocate 'ol_spec' */
+ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
+ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
+ memcpy(src_imm, &imm_proto, imm_bytes);
+
+ /* src port */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ switch (proto) {
+ case IPPROTO_TCP:
+ ol_spec->dst.field = mf_from_id(MFF_TCP_SRC);
+ ol_spec->src.field =
+ nw_conn ? mf_from_id(MFF_TCP_SRC) : mf_from_id(MFF_TCP_DST);
+ break;
+ case IPPROTO_UDP:
+ ol_spec->dst.field = mf_from_id(MFF_UDP_SRC);
+ ol_spec->src.field =
+ nw_conn ? mf_from_id(MFF_UDP_SRC) : mf_from_id(MFF_UDP_DST);
+ break;
+ case IPPROTO_SCTP:
+ ol_spec->dst.field = mf_from_id(MFF_SCTP_SRC);
+ ol_spec->src.field =
+ nw_conn ? mf_from_id(MFF_SCTP_SRC) : mf_from_id(MFF_SCTP_DST);
+ break;
+ default:
+ OVS_NOT_REACHED();
+ break;
+ }
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_FIELD;
+
+ /* dst port */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ switch (proto) {
+ case IPPROTO_TCP:
+ ol_spec->dst.field = mf_from_id(MFF_TCP_DST);
+ ol_spec->src.field =
+ nw_conn ? mf_from_id(MFF_TCP_DST) : mf_from_id(MFF_TCP_SRC);
+ break;
+ case IPPROTO_UDP:
+ ol_spec->dst.field = mf_from_id(MFF_UDP_DST);
+ ol_spec->src.field =
+ nw_conn ? mf_from_id(MFF_UDP_DST) : mf_from_id(MFF_UDP_SRC);
+ break;
+ case IPPROTO_SCTP:
+ ol_spec->dst.field = mf_from_id(MFF_SCTP_DST);
+ ol_spec->src.field =
+ nw_conn ? mf_from_id(MFF_SCTP_DST) : mf_from_id(MFF_SCTP_SRC);
+ break;
+ default:
+ OVS_NOT_REACHED();
+ break;
+ }
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_FIELD;
+
+ /* Set MLF_LOOKUP_COMMIT_ECMP_NH_BIT for ecmp replies. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field = mf_from_id(MFF_LOG_FLAGS);
+ ol_spec->dst.ofs = MLF_LOOKUP_COMMIT_ECMP_NH_BIT;
+ ol_spec->dst.n_bits = 1;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_LOAD;
+ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
+ union mf_value imm_reg_value = {
+ .u8 = 1
+ };
+ mf_write_subfield_value(&ol_spec->dst, &imm_reg_value, &match);
+
+ /* Push value last, as this may reallocate 'ol_spec' */
+ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
+ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
+ ol = ofpacts->header;
+ memcpy(src_imm, &imm_reg_value, imm_bytes);
+
+ ofpact_finish_LEARN(ofpacts, &ol);
+}
+
+static void
+encode_COMMIT_ECMP_NH(const struct ovnact_commit_ecmp_nh *ecmp_nh,
+ const struct ovnact_encode_params *ep OVS_UNUSED,
+ struct ofpbuf *ofpacts)
+{
+ commit_ecmp_learn_action(ofpacts, true, ecmp_nh->ipv6, ecmp_nh->proto);
+ commit_ecmp_learn_action(ofpacts, false, ecmp_nh->ipv6, ecmp_nh->proto);
+}
+
/* Parses an assignment or exchange or put_dhcp_opts action. */
static void
parse_set_action(struct action_context *ctx)
@@ -4458,6 +4734,8 @@ parse_action(struct action_context *ctx)
ovnact_put_CT_SNAT_TO_VIP(ctx->ovnacts);
} else if (lexer_match_id(ctx->lexer, "put_fdb")) {
parse_put_fdb(ctx, ovnact_put_PUT_FDB(ctx->ovnacts));
+ } else if (lexer_match_id(ctx->lexer, "commit_ecmp_nh")) {
+ parse_commit_ecmp_nh(ctx, ovnact_put_COMMIT_ECMP_NH(ctx->ovnacts));
} else {
lexer_syntax_error(ctx->lexer, "expecting action");
}
@@ -2573,6 +2573,39 @@ tcp.flags = RST;
<b>Example:</b> <code>reg8[0..7] = check_out_port_sec();</code>
</p>
</dd>
+
+ <dt><code>commit_ecmp_nh(<var>ipv6</var>);</code></dt>
+ <dd>
+ <p>
+ <b>Parameters</b>: IPv4/IPv6 traffic.
+ </p>
+
+ <p>
+ This action translates to an openflow "learn" action that inserts
+ two new flows in tables 76 and 77.
+ </p>
+
+ <ul>
+ <li>
+ Match on the the 5-tuple and the expected next-hop mac address
+ in table 76: <code>nw_src=ip0</code>, <code>nw_dst=ip1</code>,
+ <code>ip_proto</code>,<code>tp_src=l4_port0</code>,
+ <code>tp_dst=l4_port1</code>,<code>dl_src=ethaddr</code> and
+ set <code>REGBIT_KNOWN_ECMP_NH</code>.
+ </li>
+ <li>
+ Match on the 5-tuple in table 77: <code>nw_src=ip1</code>,
+ <code>nw_dst=ip0</code>, <code>ip_proto</code>,
+ <code>tp_src=l4_port1</code>, <code>tp_dst=l4_port0</code>
+ and set <code>REGBIT_KNOWN_ECMP_NH</code> to 1
+ </li>
+ </ul>
+
+ <p>
+ This action is applied if the packet arrives via ECMP route or
+ if it is routed via an ECMP route
+ </p>
+ </dd>
</dl>
</column>
@@ -2069,6 +2069,18 @@ reg0[0] = check_out_port_sec(foo);
check_out_port_sec;
Syntax error at `check_out_port_sec' expecting action.
+# commit_ecmp_nh
+commit_ecmp_nh(ipv6 = "false", proto = tcp);
+ formats as commit_ecmp_nh(ipv6 = false, proto = tcp);
+ encodes as learn(table=76,idle_timeout=20,hard_timeout=30,delete_learned,OXM_OF_METADATA[],NXM_OF_ETH_SRC[],eth_type=0x800,NXM_OF_IP_SRC[],NXM_OF_IP_DST[],nw_proto=6,NXM_OF_TCP_SRC[],NXM_OF_TCP_DST[],load:0x1->NXM_NX_REG10[13]),learn(table=77,idle_timeout=20,hard_timeout=30,delete_learned,OXM_OF_METADATA[],eth_type=0x800,NXM_OF_IP_SRC[]=NXM_OF_IP_DST[0..-1],NXM_OF_IP_DST[]=NXM_OF_IP_SRC[0..-1],nw_proto=6,NXM_OF_TCP_SRC[]=NXM_OF_TCP_DST[0..-1],NXM_OF_TCP_DST[]=NXM_OF_TCP_SRC[0..-1],load:0x1->NXM_NX_REG10[13])
+
+commit_ecmp_nh(ipv6 = "true", proto = udp);
+ formats as commit_ecmp_nh(ipv6 = true, proto = udp);
+ encodes as learn(table=76,idle_timeout=20,hard_timeout=30,delete_learned,OXM_OF_METADATA[],NXM_OF_ETH_SRC[],eth_type=0x86dd,NXM_NX_IPV6_SRC[],NXM_NX_IPV6_DST[],nw_proto=17,NXM_OF_UDP_SRC[],NXM_OF_UDP_DST[],load:0x1->NXM_NX_REG10[13]),learn(table=77,idle_timeout=20,hard_timeout=30,delete_learned,OXM_OF_METADATA[],eth_type=0x86dd,NXM_NX_IPV6_SRC[]=NXM_NX_IPV6_DST[0..-1],NXM_NX_IPV6_DST[]=NXM_NX_IPV6_SRC[0..-1],nw_proto=17,NXM_OF_UDP_SRC[]=NXM_OF_UDP_DST[0..-1],NXM_OF_UDP_DST[]=NXM_OF_UDP_SRC[0..-1],load:0x1->NXM_NX_REG10[13])
+
+commit_ecmp_nh(proto = sctp);
+ Syntax error at `proto' invalid parameter.
+
# push/pop
push(xxreg0);push(xxreg1[10..20]);push(eth.src);pop(xxreg0[0..47]);pop(xxreg0[48..57]);pop(xxreg1);
formats as push(xxreg0); push(xxreg1[10..20]); push(eth.src); pop(xxreg0[0..47]); pop(xxreg0[48..57]); pop(xxreg1);
@@ -3224,6 +3224,8 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
execute_check_out_port_sec(ovnact_get_CHECK_OUT_PORT_SEC(a),
dp, uflow);
break;
+ case OVNACT_COMMIT_ECMP_NH:
+ break;
}
}
ofpbuf_uninit(&stack);
commit_ecmp_nh action translates to an openflow "learn" action that inserts two new flows in the OFTABLE_ECMP_NH_MAC and OFTABLE_ECMP_NH tables. These new flows are used to match on the the 5-tuple and the expected next-hop mac address and set REGBIT_KNOWN_ECMP_NH. commit_ecmp_nh action will be used to improve ECMP symmetric routing reliability. Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> --- controller/lflow.h | 2 + include/ovn/actions.h | 8 + include/ovn/logical-fields.h | 3 + lib/actions.c | 278 +++++++++++++++++++++++++++++++++++ ovn-sb.xml | 33 +++++ tests/ovn.at | 12 ++ utilities/ovn-trace.c | 2 + 7 files changed, 338 insertions(+)