@@ -79,6 +79,7 @@ struct uuid;
#define OFTABLE_CHK_OUT_PORT_SEC 75
#define OFTABLE_ECMP_NH_MAC 76
#define OFTABLE_ECMP_NH 77
+#define OFTABLE_CHK_LB_AFFINITY 78
enum ref_type {
REF_TYPE_ADDRSET,
@@ -121,6 +121,7 @@ struct ovn_extend_table;
OVNACT(COMMIT_ECMP_NH, ovnact_commit_ecmp_nh) \
OVNACT(CHK_ECMP_NH_MAC, ovnact_result) \
OVNACT(CHK_ECMP_NH, ovnact_result) \
+ OVNACT(COMMIT_LB_AFF, ovnact_commit_lb_aff) \
/* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
enum OVS_PACKED_ENUM ovnact_type {
@@ -463,6 +464,20 @@ struct ovnact_commit_ecmp_nh {
uint8_t proto;
};
+/* OVNACT_COMMIT_LB_AFF. */
+struct ovnact_commit_lb_aff {
+ struct ovnact ovnact;
+
+ struct in6_addr vip;
+ uint16_t vip_port;
+ uint8_t proto;
+
+ struct in6_addr backend;
+ uint16_t backend_port;
+
+ uint16_t timeout;
+};
+
/* Internal use by the helpers below. */
void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
@@ -71,6 +71,7 @@ enum mff_log_flags_bits {
MLF_USE_SNAT_ZONE = 11,
MLF_CHECK_PORT_SEC_BIT = 12,
MLF_LOOKUP_COMMIT_ECMP_NH_BIT = 13,
+ MLF_COMMIT_LB_AFF_BIT = 14,
};
/* MFF_LOG_FLAGS_REG flag assignments */
@@ -116,6 +117,8 @@ enum mff_log_flags {
MLF_LOCALPORT = (1 << MLF_LOCALPORT_BIT),
MLF_LOOKUP_COMMIT_ECMP_NH = (1 << MLF_LOOKUP_COMMIT_ECMP_NH_BIT),
+
+ MLF_COMMIT_LB_AFF = (1 << MLF_COMMIT_LB_AFF_BIT),
};
/* OVN logical fields
@@ -4600,6 +4600,366 @@ encode_CHK_ECMP_NH(const struct ovnact_result *res,
MLF_LOOKUP_COMMIT_ECMP_NH_BIT, ofpacts);
}
+static void
+parse_commit_lb_aff(struct action_context *ctx,
+ struct ovnact_commit_lb_aff *lb_aff)
+{
+ uint16_t timeout, port = 0;
+ char *ip_str;
+ int family;
+
+ lexer_force_match(ctx->lexer, LEX_T_LPAREN); /* Skip '('. */
+ if (!lexer_match_id(ctx->lexer, "vip")) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (ctx->lexer->token.type != LEX_T_STRING) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (!ip_address_and_port_from_lb_key(ctx->lexer->token.s, &ip_str,
+ &port, &family)) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (family == AF_INET) {
+ ovs_be32 ip4;
+ ip_parse(ip_str, &ip4);
+ in6_addr_set_mapped_ipv4(&lb_aff->vip, ip4);
+ } else {
+ ipv6_parse(ip_str, &lb_aff->vip);
+ }
+
+ lb_aff->vip_port = port;
+ free(ip_str);
+
+ lexer_get(ctx->lexer);
+ lexer_force_match(ctx->lexer, LEX_T_COMMA);
+
+ if (!lexer_match_id(ctx->lexer, "backend")) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (ctx->lexer->token.type != LEX_T_STRING) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (!ip_address_and_port_from_lb_key(ctx->lexer->token.s, &ip_str,
+ &port, &family)) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (family == AF_INET) {
+ ovs_be32 ip4;
+ ip_parse(ip_str, &ip4);
+ in6_addr_set_mapped_ipv4(&lb_aff->backend, ip4);
+ } else {
+ ipv6_parse(ip_str, &lb_aff->backend);
+ }
+
+ lb_aff->backend_port = port;
+ free(ip_str);
+
+ lexer_get(ctx->lexer);
+ lexer_force_match(ctx->lexer, LEX_T_COMMA);
+
+ if (!lexer_match_id(ctx->lexer, "proto")) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+
+ if (lexer_match_id(ctx->lexer, "tcp")) {
+ lb_aff->proto = IPPROTO_TCP;
+ } else if (lexer_match_id(ctx->lexer, "udp")) {
+ lb_aff->proto = IPPROTO_UDP;
+ } else if (lexer_match_id(ctx->lexer, "sctp")) {
+ lb_aff->proto = IPPROTO_SCTP;
+ } else {
+ lexer_syntax_error(ctx->lexer, "invalid protocol");
+ return;
+ }
+ lexer_force_match(ctx->lexer, LEX_T_COMMA);
+
+ if (!lexer_match_id(ctx->lexer, "timeout")) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+ if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
+ lexer_syntax_error(ctx->lexer, "invalid parameter");
+ return;
+ }
+ if (!action_parse_uint16(ctx, &timeout, "affinity timeout")) {
+ return;
+ }
+ lb_aff->timeout = timeout;
+
+ lexer_force_match(ctx->lexer, LEX_T_RPAREN); /* Skip ')'. */
+
+}
+
+static void
+format_COMMIT_LB_AFF(const struct ovnact_commit_lb_aff *lb_aff, struct ds *s)
+{
+ if (!IN6_IS_ADDR_V4MAPPED(&lb_aff->vip)) {
+ char ip_str[INET6_ADDRSTRLEN] = {};
+ inet_ntop(AF_INET6, &lb_aff->vip, ip_str, INET6_ADDRSTRLEN);
+ ds_put_format(s, "commit_lb_aff(vip = \"[%s]", ip_str);
+ } else {
+ ovs_be32 ip = in6_addr_get_mapped_ipv4(&lb_aff->vip);
+ char *ip_str = xasprintf(IP_FMT, IP_ARGS(ip));
+ ds_put_format(s, "commit_lb_aff(vip = \"%s", ip_str);
+ free(ip_str);
+ }
+ if (lb_aff->vip_port) {
+ ds_put_format(s, ":%d", lb_aff->vip_port);
+ }
+ ds_put_cstr(s, "\"");
+
+ if (!IN6_IS_ADDR_V4MAPPED(&lb_aff->backend)) {
+ char ip_str[INET6_ADDRSTRLEN] = {};
+ inet_ntop(AF_INET6, &lb_aff->backend, ip_str, INET6_ADDRSTRLEN);
+ ds_put_format(s, ", backend = \"[%s]", ip_str);
+ } else {
+ ovs_be32 ip = in6_addr_get_mapped_ipv4(&lb_aff->backend);
+ char *ip_str = xasprintf(IP_FMT, IP_ARGS(ip));
+ ds_put_format(s, ", backend = \"%s", ip_str);
+ free(ip_str);
+ }
+ if (lb_aff->backend_port) {
+ ds_put_format(s, ":%d", lb_aff->backend_port);
+ }
+ ds_put_cstr(s, "\"");
+
+ const char *proto;
+ switch (lb_aff->proto) {
+ case IPPROTO_UDP:
+ proto = "udp";
+ break;
+ case IPPROTO_SCTP:
+ proto = "sctp";
+ break;
+ case IPPROTO_TCP:
+ default:
+ proto = "tcp";
+ break;
+ }
+ ds_put_format(s, ", proto = %s, timeout = %d);",
+ proto, lb_aff->timeout);
+}
+
+static void
+encode_COMMIT_LB_AFF(const struct ovnact_commit_lb_aff *lb_aff,
+ const struct ovnact_encode_params *ep OVS_UNUSED,
+ struct ofpbuf *ofpacts)
+{
+ bool ipv6 = !IN6_IS_ADDR_V4MAPPED(&lb_aff->vip);
+ size_t ol_offset = ofpacts->size;
+ struct ofpact_learn *ol = ofpact_put_LEARN(ofpacts);
+ struct match match = MATCH_CATCHALL_INITIALIZER;
+ struct ofpact_learn_spec *ol_spec;
+ unsigned int imm_bytes;
+ uint8_t *src_imm;
+
+ ol->flags = NX_LEARN_F_DELETE_LEARNED;
+ ol->idle_timeout = lb_aff->timeout; /* seconds. */
+ ol->priority = OFP_DEFAULT_PRIORITY;
+ ol->table_id = OFTABLE_CHK_LB_AFFINITY;
+
+ /* Match on the same ETH type as the packet that created the new table. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field = mf_from_id(MFF_ETH_TYPE);
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
+ union mf_value imm_eth_type = {
+ .be16 = ipv6 ? htons(ETH_TYPE_IPV6) : htons(ETH_TYPE_IP)
+ };
+ mf_write_subfield_value(&ol_spec->dst, &imm_eth_type, &match);
+ /* Push value last, as this may reallocate 'ol_spec'. */
+ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
+ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
+ memcpy(src_imm, &imm_eth_type, imm_bytes);
+
+ /* IP src. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field =
+ ipv6 ? mf_from_id(MFF_IPV6_SRC) : mf_from_id(MFF_IPV4_SRC);
+ ol_spec->src.field =
+ ipv6 ? mf_from_id(MFF_IPV6_SRC) : mf_from_id(MFF_IPV4_SRC);
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_FIELD;
+
+ /* IP dst. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field =
+ ipv6 ? mf_from_id(MFF_IPV6_DST) : mf_from_id(MFF_IPV4_DST);
+ union mf_value imm_ip;
+ if (ipv6) {
+ imm_ip = (union mf_value) {
+ .ipv6 = lb_aff->vip,
+ };
+ } else {
+ ovs_be32 ip4 = in6_addr_get_mapped_ipv4(&lb_aff->vip);
+ imm_ip = (union mf_value) {
+ .be32 = ip4,
+ };
+ }
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
+ mf_write_subfield_value(&ol_spec->dst, &imm_ip, &match);
+
+ /* Push value last, as this may reallocate 'ol_spec' */
+ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
+ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
+ memcpy(src_imm, &imm_ip, imm_bytes);
+
+ /* IP proto. */
+ union mf_value imm_proto = {
+ .u8 = lb_aff->proto,
+ };
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field = mf_from_id(MFF_IP_PROTO);
+ ol_spec->src.field = mf_from_id(MFF_IP_PROTO);
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
+ mf_write_subfield_value(&ol_spec->dst, &imm_proto, &match);
+ /* Push value last, as this may reallocate 'ol_spec' */
+ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
+ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
+ memcpy(src_imm, &imm_proto, imm_bytes);
+
+ /* dst port */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ switch (lb_aff->proto) {
+ case IPPROTO_TCP:
+ ol_spec->dst.field = mf_from_id(MFF_TCP_DST);
+ ol_spec->src.field = mf_from_id(MFF_TCP_DST);
+ break;
+ case IPPROTO_UDP:
+ ol_spec->dst.field = mf_from_id(MFF_UDP_DST);
+ ol_spec->src.field = mf_from_id(MFF_UDP_DST);
+ break;
+ case IPPROTO_SCTP:
+ ol_spec->dst.field = mf_from_id(MFF_SCTP_DST);
+ ol_spec->src.field = mf_from_id(MFF_SCTP_DST);
+ break;
+ default:
+ OVS_NOT_REACHED();
+ break;
+ }
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_MATCH;
+ ol_spec->src_type = NX_LEARN_SRC_FIELD;
+
+ /* Set MLF_LOOKUP_COMMIT_ECMP_NH_BIT for ecmp replies. */
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ ol_spec->dst.field = mf_from_id(MFF_LOG_FLAGS);
+ ol_spec->dst.ofs = MLF_COMMIT_LB_AFF_BIT;
+ ol_spec->dst.n_bits = 1;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ ol_spec->dst_type = NX_LEARN_DST_LOAD;
+ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
+ union mf_value imm_reg_value = {
+ .u8 = 1
+ };
+ mf_write_subfield_value(&ol_spec->dst, &imm_reg_value, &match);
+
+ /* Push value last, as this may reallocate 'ol_spec' */
+ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
+ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
+ ol = ofpacts->header;
+ memcpy(src_imm, &imm_reg_value, imm_bytes);
+
+ /* Load backend IP in REG4/XXREG1. */
+ union mf_value imm_backend_ip;
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+
+ if (ipv6) {
+ imm_backend_ip = (union mf_value) {
+ .ipv6 = lb_aff->backend,
+ };
+ ol_spec->dst.field = mf_from_id(MFF_XXREG1);
+ } else {
+ ovs_be32 ip4 = in6_addr_get_mapped_ipv4(&lb_aff->backend);
+ imm_backend_ip = (union mf_value) {
+ .be32 = ip4,
+ };
+ ol_spec->dst.field = mf_from_id(MFF_REG4);
+ }
+
+ ol_spec->dst_type = NX_LEARN_DST_LOAD;
+ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = ol_spec->dst.field->n_bits;
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ mf_write_subfield_value(&ol_spec->dst, &imm_backend_ip, &match);
+ /* Push value last, as this may reallocate 'ol_spec' */
+ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
+ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
+ memcpy(src_imm, &imm_backend_ip, imm_bytes);
+
+ /* Load backend port in REG8. */
+ union mf_value imm_backend_port;
+ ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec);
+ imm_backend_port = (union mf_value) {
+ .be16 = htons(lb_aff->backend_port),
+ };
+
+ ol_spec->dst.field = mf_from_id(MFF_REG8);
+ ol_spec->dst_type = NX_LEARN_DST_LOAD;
+ ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE;
+ ol_spec->dst.ofs = 0;
+ ol_spec->dst.n_bits = 8 * sizeof(lb_aff->backend_port);
+ ol_spec->n_bits = ol_spec->dst.n_bits;
+ mf_write_subfield_value(&ol_spec->dst, &imm_backend_port, &match);
+ /* Push value last, as this may reallocate 'ol_spec' */
+ imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8);
+ src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes));
+ memcpy(src_imm, &imm_backend_port, imm_bytes);
+
+ ol = ofpbuf_at_assert(ofpacts, ol_offset, sizeof *ol);
+ ofpact_finish_LEARN(ofpacts, &ol);
+}
+
+static void
+ovnact_commit_lb_aff_free(struct ovnact_commit_lb_aff *ecmp_nh OVS_UNUSED)
+{
+}
+
/* Parses an assignment or exchange or put_dhcp_opts action. */
static void
parse_set_action(struct action_context *ctx)
@@ -4790,6 +5150,8 @@ parse_action(struct action_context *ctx)
parse_put_fdb(ctx, ovnact_put_PUT_FDB(ctx->ovnacts));
} else if (lexer_match_id(ctx->lexer, "commit_ecmp_nh")) {
parse_commit_ecmp_nh(ctx, ovnact_put_COMMIT_ECMP_NH(ctx->ovnacts));
+ } else if (lexer_match_id(ctx->lexer, "commit_lb_aff")) {
+ parse_commit_lb_aff(ctx, ovnact_put_COMMIT_LB_AFF(ctx->ovnacts));
} else {
lexer_syntax_error(ctx->lexer, "expecting action");
}
@@ -2624,6 +2624,41 @@ tcp.flags = RST;
register <var>R</var> is set to 1.
</p>
</dd>
+
+ <dt>
+ <code>
+ commit_lb_aff(<var>vip</var>, <var>backend</var>,
+ <var>proto</var>, <var>timeout</var>);
+ </code>
+ </dt>
+ <dd>
+ <p>
+ <b>Parameters</b>: load-balancer virtual ip:port <var>vip</var>,
+ load-balancer backend ip:port <var>backend</var>, load-balancer
+ protocol <var>proto</var>, affinity timeout <var>timeout</var>.
+ </p>
+
+ <p>
+ This action translates to an openflow "learn" action that inserts
+ a new flow in table 78.
+ </p>
+
+ <ul>
+ <li>
+ Match on the 4-tuple in table 78: <code>nw_src=ip client</code>,
+ <code>nw_dst=vip ip</code>, <code>ip_proto</code>,
+ <code>tp_dst=vip port</code> and set <code>reg9[6]</code> to 1,
+ <code>reg4</code> and <code>reg8</code> to backend ip and port
+ respectively. For IPv6 register <code>xxreg1</code> is used to
+ store the backend ip.
+ </li>
+ </ul>
+
+ <p>
+ This action is applied for new connection received by a specific
+ load-balancer.
+ </p>
+ </dd>
</dl>
</column>
@@ -2125,6 +2125,13 @@ reg9[5] = chk_ecmp_nh_mac();
reg9[5] = chk_ecmp_nh();
encodes as set_field:0/0x2000->reg10,resubmit(,77),move:NXM_NX_REG10[13]->OXM_OF_PKT_REG4[5]
+# commit_lb_aff
+commit_lb_aff(vip = "172.16.0.123:8080", backend = "10.0.0.3:8080", proto = tcp, timeout = 30);
+ encodes as learn(table=78,idle_timeout=30,delete_learned,eth_type=0x800,NXM_OF_IP_SRC[],ip_dst=172.16.0.123,nw_proto=6,NXM_OF_TCP_DST[],load:0x1->NXM_NX_REG10[14],load:0xa000003->NXM_NX_REG4[],load:0x1f90->NXM_NX_REG8[0..15])
+
+commit_lb_aff(vip = "[::1]:8080", backend = "[::2]:8080", proto = tcp, timeout = 30);
+ encodes as learn(table=78,idle_timeout=30,delete_learned,eth_type=0x86dd,NXM_NX_IPV6_SRC[],ipv6_dst=::1,nw_proto=6,NXM_OF_TCP_DST[],load:0x1->NXM_NX_REG10[14],load:0x2->NXM_NX_XXREG1[],load:0x1f90->NXM_NX_REG8[0..15])
+
# push/pop
push(xxreg0);push(xxreg1[10..20]);push(eth.src);pop(xxreg0[0..47]);pop(xxreg0[48..57]);pop(xxreg1);
formats as push(xxreg0); push(xxreg1[10..20]); push(eth.src); pop(xxreg0[0..47]); pop(xxreg0[48..57]); pop(xxreg1);
@@ -3298,6 +3298,8 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
break;
case OVNACT_CHK_ECMP_NH:
break;
+ case OVNACT_COMMIT_LB_AFF:
+ break;
}
}
ofpbuf_uninit(&stack);
commit_lb_aff action translates to an openflow "learn" action that inserts a new flow in the OFTABLE_CHK_LB_AFFINITY table. The new flow is used to match on the the 5-tuple and set REGBIT_KNOWN_LB_SESSION bit. Moreover the new flow stores backend IP and port in register REG4 and REG8[0..15] respectively. Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> --- controller/lflow.h | 1 + include/ovn/actions.h | 15 ++ include/ovn/logical-fields.h | 3 + lib/actions.c | 362 +++++++++++++++++++++++++++++++++++ ovn-sb.xml | 35 ++++ tests/ovn.at | 7 + utilities/ovn-trace.c | 2 + 7 files changed, 425 insertions(+)