Message ID | 5e673957904019f9dd2444849f5c3bbd7188a1cc.1664888003.git.lorenzo.bianconi@redhat.com |
---|---|
State | Superseded |
Headers | show |
Series | Introduce lb affinity timeout support | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
ovsrobot/github-robot-_ovn-kubernetes | fail | github build: failed |
On 10/4/22 09:02, Lorenzo Bianconi wrote: > commit_lb_aff action translates to an openflow "learn" action that > inserts a new flow in the OFTABLE_CHK_LB_AFFINITY table. The new flow is > used to match on the the 5-tuple and set REGBIT_KNOWN_LB_SESSION bit. > Moreover the new flow stores backend IP and port in register REG4 and > REG8[0..15] respectively. > > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> > --- > controller/lflow.h | 1 + > include/ovn/actions.h | 15 ++ > include/ovn/logical-fields.h | 3 + > lib/actions.c | 362 +++++++++++++++++++++++++++++++++++ > ovn-sb.xml | 35 ++++ > tests/ovn.at | 7 + > utilities/ovn-trace.c | 2 + > 7 files changed, 425 insertions(+) > > diff --git a/controller/lflow.h b/controller/lflow.h > index 8cbe312ca..4be079555 100644 > --- a/controller/lflow.h > +++ b/controller/lflow.h > @@ -79,6 +79,7 @@ struct uuid; > #define OFTABLE_CHK_OUT_PORT_SEC 75 > #define OFTABLE_ECMP_NH_MAC 76 > #define OFTABLE_ECMP_NH 77 > +#define OFTABLE_CHK_LB_AFFINITY 78 > > enum ref_type { > REF_TYPE_ADDRSET, > diff --git a/include/ovn/actions.h b/include/ovn/actions.h > index d7ee84dac..597cbb8e3 100644 > --- a/include/ovn/actions.h > +++ b/include/ovn/actions.h > @@ -121,6 +121,7 @@ struct ovn_extend_table; > OVNACT(COMMIT_ECMP_NH, ovnact_commit_ecmp_nh) \ > OVNACT(CHK_ECMP_NH_MAC, ovnact_result) \ > OVNACT(CHK_ECMP_NH, ovnact_result) \ > + OVNACT(COMMIT_LB_AFF, ovnact_commit_lb_aff) \ > > /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */ > enum OVS_PACKED_ENUM ovnact_type { > @@ -463,6 +464,20 @@ struct ovnact_commit_ecmp_nh { > uint8_t proto; > }; > > +/* OVNACT_COMMIT_LB_AFF. */ > +struct ovnact_commit_lb_aff { > + struct ovnact ovnact; > + > + struct in6_addr vip; > + uint16_t vip_port; > + uint8_t proto; > + > + struct in6_addr backend; > + uint16_t backend_port; > + > + uint16_t timeout; > +}; > + > /* Internal use by the helpers below. */ > void ovnact_init(struct ovnact *, enum ovnact_type, size_t len); > void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len); > diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h > index 3db7265e4..52f40de38 100644 > --- a/include/ovn/logical-fields.h > +++ b/include/ovn/logical-fields.h > @@ -71,6 +71,7 @@ enum mff_log_flags_bits { > MLF_USE_SNAT_ZONE = 11, > MLF_CHECK_PORT_SEC_BIT = 12, > MLF_LOOKUP_COMMIT_ECMP_NH_BIT = 13, > + MLF_COMMIT_LB_AFF_BIT = 14, > }; > > /* MFF_LOG_FLAGS_REG flag assignments */ > @@ -116,6 +117,8 @@ enum mff_log_flags { > MLF_LOCALPORT = (1 << MLF_LOCALPORT_BIT), > > MLF_LOOKUP_COMMIT_ECMP_NH = (1 << MLF_LOOKUP_COMMIT_ECMP_NH_BIT), > + > + MLF_COMMIT_LB_AFF = (1 << MLF_COMMIT_LB_AFF_BIT), > }; > > /* OVN logical fields > diff --git a/lib/actions.c b/lib/actions.c > index adbb42db4..5c68e5c3a 100644 > --- a/lib/actions.c > +++ b/lib/actions.c > @@ -4600,6 +4600,366 @@ encode_CHK_ECMP_NH(const struct ovnact_result *res, > MLF_LOOKUP_COMMIT_ECMP_NH_BIT, ofpacts); > } > > +static void > +parse_commit_lb_aff(struct action_context *ctx, > + struct ovnact_commit_lb_aff *lb_aff) > +{ > + uint16_t timeout, port = 0; > + char *ip_str; > + int family; > + > + lexer_force_match(ctx->lexer, LEX_T_LPAREN); /* Skip '('. */ > + if (!lexer_match_id(ctx->lexer, "vip")) { > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > + return; > + } > + > + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > + return; > + } > + > + if (ctx->lexer->token.type != LEX_T_STRING) { > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > + return; > + } > + > + if (!ip_address_and_port_from_lb_key(ctx->lexer->token.s, &ip_str, > + &port, &family)) { > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > + return; > + } > + > + if (family == AF_INET) { > + ovs_be32 ip4; > + ip_parse(ip_str, &ip4); > + in6_addr_set_mapped_ipv4(&lb_aff->vip, ip4); > + } else { > + ipv6_parse(ip_str, &lb_aff->vip); > + } > + > + lb_aff->vip_port = port; > + free(ip_str); > + > + lexer_get(ctx->lexer); > + lexer_force_match(ctx->lexer, LEX_T_COMMA); > + > + if (!lexer_match_id(ctx->lexer, "backend")) { > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > + return; > + } > + > + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > + return; > + } > + > + if (ctx->lexer->token.type != LEX_T_STRING) { > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > + return; > + } > + > + if (!ip_address_and_port_from_lb_key(ctx->lexer->token.s, &ip_str, > + &port, &family)) { > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > + return; > + } > + > + if (family == AF_INET) { > + ovs_be32 ip4; > + ip_parse(ip_str, &ip4); > + in6_addr_set_mapped_ipv4(&lb_aff->backend, ip4); > + } else { > + ipv6_parse(ip_str, &lb_aff->backend); > + } > + > + lb_aff->backend_port = port; > + free(ip_str); > + > + lexer_get(ctx->lexer); > + lexer_force_match(ctx->lexer, LEX_T_COMMA); > + > + if (!lexer_match_id(ctx->lexer, "proto")) { > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > + return; > + } > + > + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > + return; > + } > + > + if (lexer_match_id(ctx->lexer, "tcp")) { > + lb_aff->proto = IPPROTO_TCP; > + } else if (lexer_match_id(ctx->lexer, "udp")) { > + lb_aff->proto = IPPROTO_UDP; > + } else if (lexer_match_id(ctx->lexer, "sctp")) { > + lb_aff->proto = IPPROTO_SCTP; > + } else { > + lexer_syntax_error(ctx->lexer, "invalid protocol"); > + return; > + } > + lexer_force_match(ctx->lexer, LEX_T_COMMA); > + > + if (!lexer_match_id(ctx->lexer, "timeout")) { > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > + return; > + } > + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > + return; > + } > + if (!action_parse_uint16(ctx, &timeout, "affinity timeout")) { > + return; > + } > + lb_aff->timeout = timeout; > + > + lexer_force_match(ctx->lexer, LEX_T_RPAREN); /* Skip ')'. */ > + > +} > + > +static void > +format_COMMIT_LB_AFF(const struct ovnact_commit_lb_aff *lb_aff, struct ds *s) > +{ > + if (!IN6_IS_ADDR_V4MAPPED(&lb_aff->vip)) { > + char ip_str[INET6_ADDRSTRLEN] = {}; > + inet_ntop(AF_INET6, &lb_aff->vip, ip_str, INET6_ADDRSTRLEN); > + ds_put_format(s, "commit_lb_aff(vip = \"[%s]", ip_str); > + } else { > + ovs_be32 ip = in6_addr_get_mapped_ipv4(&lb_aff->vip); > + char *ip_str = xasprintf(IP_FMT, IP_ARGS(ip)); > + ds_put_format(s, "commit_lb_aff(vip = \"%s", ip_str); > + free(ip_str); > + } > + if (lb_aff->vip_port) { > + ds_put_format(s, ":%d", lb_aff->vip_port); > + } > + ds_put_cstr(s, "\""); > + > + if (!IN6_IS_ADDR_V4MAPPED(&lb_aff->backend)) { > + char ip_str[INET6_ADDRSTRLEN] = {}; > + inet_ntop(AF_INET6, &lb_aff->backend, ip_str, INET6_ADDRSTRLEN); > + ds_put_format(s, ", backend = \"[%s]", ip_str); > + } else { > + ovs_be32 ip = in6_addr_get_mapped_ipv4(&lb_aff->backend); > + char *ip_str = xasprintf(IP_FMT, IP_ARGS(ip)); > + ds_put_format(s, ", backend = \"%s", ip_str); > + free(ip_str); > + } > + if (lb_aff->backend_port) { > + ds_put_format(s, ":%d", lb_aff->backend_port); > + } > + ds_put_cstr(s, "\""); > + > + const char *proto; > + switch (lb_aff->proto) { > + case IPPROTO_UDP: > + proto = "udp"; > + break; > + case IPPROTO_SCTP: > + proto = "sctp"; > + break; > + case IPPROTO_TCP: > + default: > + proto = "tcp"; > + break; > + } > + ds_put_format(s, ", proto = %s, timeout = %d);", > + proto, lb_aff->timeout); > +} > + > +static void > +encode_COMMIT_LB_AFF(const struct ovnact_commit_lb_aff *lb_aff, > + const struct ovnact_encode_params *ep OVS_UNUSED, > + struct ofpbuf *ofpacts) > +{ > + bool ipv6 = !IN6_IS_ADDR_V4MAPPED(&lb_aff->vip); > + size_t ol_offset = ofpacts->size; > + struct ofpact_learn *ol = ofpact_put_LEARN(ofpacts); > + struct match match = MATCH_CATCHALL_INITIALIZER; > + struct ofpact_learn_spec *ol_spec; > + unsigned int imm_bytes; > + uint8_t *src_imm; > + > + ol->flags = NX_LEARN_F_DELETE_LEARNED; > + ol->idle_timeout = lb_aff->timeout; /* seconds. */ > + ol->priority = OFP_DEFAULT_PRIORITY; > + ol->table_id = OFTABLE_CHK_LB_AFFINITY; > + > + /* Match on the same ETH type as the packet that created the new table. */ > + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); > + ol_spec->dst.field = mf_from_id(MFF_ETH_TYPE); > + ol_spec->dst.ofs = 0; > + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; > + ol_spec->n_bits = ol_spec->dst.n_bits; > + ol_spec->dst_type = NX_LEARN_DST_MATCH; > + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; > + union mf_value imm_eth_type = { > + .be16 = ipv6 ? htons(ETH_TYPE_IPV6) : htons(ETH_TYPE_IP) > + }; > + mf_write_subfield_value(&ol_spec->dst, &imm_eth_type, &match); > + /* Push value last, as this may reallocate 'ol_spec'. */ > + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); > + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); > + memcpy(src_imm, &imm_eth_type, imm_bytes); > + > + /* IP src. */ > + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); > + ol_spec->dst.field = > + ipv6 ? mf_from_id(MFF_IPV6_SRC) : mf_from_id(MFF_IPV4_SRC); > + ol_spec->src.field = > + ipv6 ? mf_from_id(MFF_IPV6_SRC) : mf_from_id(MFF_IPV4_SRC); > + ol_spec->dst.ofs = 0; > + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; > + ol_spec->n_bits = ol_spec->dst.n_bits; > + ol_spec->dst_type = NX_LEARN_DST_MATCH; > + ol_spec->src_type = NX_LEARN_SRC_FIELD; > + > + /* IP dst. */ > + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); > + ol_spec->dst.field = > + ipv6 ? mf_from_id(MFF_IPV6_DST) : mf_from_id(MFF_IPV4_DST); > + union mf_value imm_ip; > + if (ipv6) { > + imm_ip = (union mf_value) { > + .ipv6 = lb_aff->vip, > + }; > + } else { > + ovs_be32 ip4 = in6_addr_get_mapped_ipv4(&lb_aff->vip); > + imm_ip = (union mf_value) { > + .be32 = ip4, > + }; > + } > + ol_spec->dst.ofs = 0; > + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; > + ol_spec->n_bits = ol_spec->dst.n_bits; > + ol_spec->dst_type = NX_LEARN_DST_MATCH; > + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; > + mf_write_subfield_value(&ol_spec->dst, &imm_ip, &match); > + > + /* Push value last, as this may reallocate 'ol_spec' */ > + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); > + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); > + memcpy(src_imm, &imm_ip, imm_bytes); > + > + /* IP proto. */ > + union mf_value imm_proto = { > + .u8 = lb_aff->proto, > + }; > + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); > + ol_spec->dst.field = mf_from_id(MFF_IP_PROTO); > + ol_spec->src.field = mf_from_id(MFF_IP_PROTO); > + ol_spec->dst.ofs = 0; > + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; > + ol_spec->n_bits = ol_spec->dst.n_bits; > + ol_spec->dst_type = NX_LEARN_DST_MATCH; > + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; > + mf_write_subfield_value(&ol_spec->dst, &imm_proto, &match); > + /* Push value last, as this may reallocate 'ol_spec' */ > + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); > + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); > + memcpy(src_imm, &imm_proto, imm_bytes); > + > + /* dst port */ > + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); > + switch (lb_aff->proto) { > + case IPPROTO_TCP: > + ol_spec->dst.field = mf_from_id(MFF_TCP_DST); > + ol_spec->src.field = mf_from_id(MFF_TCP_DST); > + break; > + case IPPROTO_UDP: > + ol_spec->dst.field = mf_from_id(MFF_UDP_DST); > + ol_spec->src.field = mf_from_id(MFF_UDP_DST); > + break; > + case IPPROTO_SCTP: > + ol_spec->dst.field = mf_from_id(MFF_SCTP_DST); > + ol_spec->src.field = mf_from_id(MFF_SCTP_DST); > + break; > + default: > + OVS_NOT_REACHED(); > + break; > + } > + ol_spec->dst.ofs = 0; > + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; > + ol_spec->n_bits = ol_spec->dst.n_bits; > + ol_spec->dst_type = NX_LEARN_DST_MATCH; > + ol_spec->src_type = NX_LEARN_SRC_FIELD; > + > + /* Set MLF_LOOKUP_COMMIT_ECMP_NH_BIT for ecmp replies. */ > + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); > + ol_spec->dst.field = mf_from_id(MFF_LOG_FLAGS); > + ol_spec->dst.ofs = MLF_COMMIT_LB_AFF_BIT; > + ol_spec->dst.n_bits = 1; > + ol_spec->n_bits = ol_spec->dst.n_bits; > + ol_spec->dst_type = NX_LEARN_DST_LOAD; > + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; > + union mf_value imm_reg_value = { > + .u8 = 1 > + }; > + mf_write_subfield_value(&ol_spec->dst, &imm_reg_value, &match); > + > + /* Push value last, as this may reallocate 'ol_spec' */ > + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); > + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); > + ol = ofpacts->header; > + memcpy(src_imm, &imm_reg_value, imm_bytes); > + > + /* Load backend IP in REG4/XXREG1. */ > + union mf_value imm_backend_ip; > + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); > + > + if (ipv6) { > + imm_backend_ip = (union mf_value) { > + .ipv6 = lb_aff->backend, > + }; > + ol_spec->dst.field = mf_from_id(MFF_XXREG1); > + } else { > + ovs_be32 ip4 = in6_addr_get_mapped_ipv4(&lb_aff->backend); > + imm_backend_ip = (union mf_value) { > + .be32 = ip4, > + }; > + ol_spec->dst.field = mf_from_id(MFF_REG4); > + } > + > + ol_spec->dst_type = NX_LEARN_DST_LOAD; > + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; > + ol_spec->dst.ofs = 0; > + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; > + ol_spec->n_bits = ol_spec->dst.n_bits; > + mf_write_subfield_value(&ol_spec->dst, &imm_backend_ip, &match); > + /* Push value last, as this may reallocate 'ol_spec' */ > + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); > + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); > + memcpy(src_imm, &imm_backend_ip, imm_bytes); > + > + /* Load backend port in REG8. */ > + union mf_value imm_backend_port; > + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); > + imm_backend_port = (union mf_value) { > + .be16 = htons(lb_aff->backend_port), > + }; > + > + ol_spec->dst.field = mf_from_id(MFF_REG8); > + ol_spec->dst_type = NX_LEARN_DST_LOAD; > + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; > + ol_spec->dst.ofs = 0; > + ol_spec->dst.n_bits = 8 * sizeof(lb_aff->backend_port); > + ol_spec->n_bits = ol_spec->dst.n_bits; > + mf_write_subfield_value(&ol_spec->dst, &imm_backend_port, &match); > + /* Push value last, as this may reallocate 'ol_spec' */ > + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); > + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); > + memcpy(src_imm, &imm_backend_port, imm_bytes); > + > + ol = ofpbuf_at_assert(ofpacts, ol_offset, sizeof *ol); > + ofpact_finish_LEARN(ofpacts, &ol); > +} > + > +static void > +ovnact_commit_lb_aff_free(struct ovnact_commit_lb_aff *ecmp_nh OVS_UNUSED) > +{ > +} > + > /* Parses an assignment or exchange or put_dhcp_opts action. */ > static void > parse_set_action(struct action_context *ctx) > @@ -4790,6 +5150,8 @@ parse_action(struct action_context *ctx) > parse_put_fdb(ctx, ovnact_put_PUT_FDB(ctx->ovnacts)); > } else if (lexer_match_id(ctx->lexer, "commit_ecmp_nh")) { > parse_commit_ecmp_nh(ctx, ovnact_put_COMMIT_ECMP_NH(ctx->ovnacts)); > + } else if (lexer_match_id(ctx->lexer, "commit_lb_aff")) { > + parse_commit_lb_aff(ctx, ovnact_put_COMMIT_LB_AFF(ctx->ovnacts)); > } else { > lexer_syntax_error(ctx->lexer, "expecting action"); > } > diff --git a/ovn-sb.xml b/ovn-sb.xml > index 315d60853..fae62c09d 100644 > --- a/ovn-sb.xml > +++ b/ovn-sb.xml > @@ -2624,6 +2624,41 @@ tcp.flags = RST; > register <var>R</var> is set to 1. > </p> > </dd> > + > + <dt> > + <code> > + commit_lb_aff(<var>vip</var>, <var>backend</var>, > + <var>proto</var>, <var>timeout</var>); > + </code> > + </dt> > + <dd> > + <p> > + <b>Parameters</b>: load-balancer virtual ip:port <var>vip</var>, > + load-balancer backend ip:port <var>backend</var>, load-balancer > + protocol <var>proto</var>, affinity timeout <var>timeout</var>. > + </p> > + > + <p> > + This action translates to an openflow "learn" action that inserts > + a new flow in tables 78. s/tables/table/ > + </p> > + > + <ul> > + <li> > + Match on the 4-tuple in table 78: <code>nw_src=ip client</code>, > + <code>nw_dst=vip ip</code>, <code>ip_proto</code>, > + <code>tp_dst=vip port</code> and set <code>reg9[6]</code> to 1, > + <code>reg4</code> and <code>reg8</code> to backend ip and port > + respectively. For IPv6 register <code>xxreg1</code> is used to > + store the backend ip. > + </li> > + </ul> > + > + <p> > + This action is applied for new connection received by a specific > + load-balancer. > + </p> > + </dd> > </dl> > </column> > > diff --git a/tests/ovn.at b/tests/ovn.at > index f8b8db4df..fbf281494 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -2125,6 +2125,13 @@ reg9[5] = chk_ecmp_nh_mac(); > reg9[5] = chk_ecmp_nh(); > encodes as set_field:0/0x2000->reg10,resubmit(,77),move:NXM_NX_REG10[13]->OXM_OF_PKT_REG4[5] > > +# commit_lb_aff > +commit_lb_aff(vip = "172.16.0.123:8080", backend = "10.0.0.3:8080", proto = tcp, timeout = 30); > + encodes as learn(table=78,idle_timeout=30,delete_learned,eth_type=0x800,NXM_OF_IP_SRC[],ip_dst=172.16.0.123,nw_proto=6,NXM_OF_TCP_DST[],load:0x1->NXM_NX_REG10[14],load:0xa000003->NXM_NX_REG4[],load:0x1f90->NXM_NX_REG8[0..15]) Just for my own education, what do the NXM_OF_IP_SRC[] and NXM_OF_TCP_DST[] fields by themselves mean? I see from the code that it's a match, rather than an action, but the syntax here is odd. Does it just mean that these registers need to be non-zero? > + > +commit_lb_aff(vip = "[::1]:8080", backend = "[::2]:8080", proto = tcp, timeout = 30); > + encodes as learn(table=78,idle_timeout=30,delete_learned,eth_type=0x86dd,NXM_NX_IPV6_SRC[],ipv6_dst=::1,nw_proto=6,NXM_OF_TCP_DST[],load:0x1->NXM_NX_REG10[14],load:0x2->NXM_NX_XXREG1[],load:0x1f90->NXM_NX_REG8[0..15]) > + > # push/pop > push(xxreg0);push(xxreg1[10..20]);push(eth.src);pop(xxreg0[0..47]);pop(xxreg0[48..57]);pop(xxreg1); > formats as push(xxreg0); push(xxreg1[10..20]); push(eth.src); pop(xxreg0[0..47]); pop(xxreg0[48..57]); pop(xxreg1); > diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c > index d9e7129d9..5adfd2521 100644 > --- a/utilities/ovn-trace.c > +++ b/utilities/ovn-trace.c > @@ -3298,6 +3298,8 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len, > break; > case OVNACT_CHK_ECMP_NH: > break; > + case OVNACT_COMMIT_LB_AFF: > + break; > } > } > ofpbuf_uninit(&stack);
> On 10/4/22 09:02, Lorenzo Bianconi wrote: > > commit_lb_aff action translates to an openflow "learn" action that > > inserts a new flow in the OFTABLE_CHK_LB_AFFINITY table. The new flow is > > used to match on the the 5-tuple and set REGBIT_KNOWN_LB_SESSION bit. > > Moreover the new flow stores backend IP and port in register REG4 and > > REG8[0..15] respectively. > > > > Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> > > --- > > controller/lflow.h | 1 + > > include/ovn/actions.h | 15 ++ > > include/ovn/logical-fields.h | 3 + > > lib/actions.c | 362 +++++++++++++++++++++++++++++++++++ > > ovn-sb.xml | 35 ++++ > > tests/ovn.at | 7 + > > utilities/ovn-trace.c | 2 + > > 7 files changed, 425 insertions(+) > > > > diff --git a/controller/lflow.h b/controller/lflow.h > > index 8cbe312ca..4be079555 100644 > > --- a/controller/lflow.h > > +++ b/controller/lflow.h > > @@ -79,6 +79,7 @@ struct uuid; > > #define OFTABLE_CHK_OUT_PORT_SEC 75 > > #define OFTABLE_ECMP_NH_MAC 76 > > #define OFTABLE_ECMP_NH 77 > > +#define OFTABLE_CHK_LB_AFFINITY 78 > > enum ref_type { > > REF_TYPE_ADDRSET, > > diff --git a/include/ovn/actions.h b/include/ovn/actions.h > > index d7ee84dac..597cbb8e3 100644 > > --- a/include/ovn/actions.h > > +++ b/include/ovn/actions.h > > @@ -121,6 +121,7 @@ struct ovn_extend_table; > > OVNACT(COMMIT_ECMP_NH, ovnact_commit_ecmp_nh) \ > > OVNACT(CHK_ECMP_NH_MAC, ovnact_result) \ > > OVNACT(CHK_ECMP_NH, ovnact_result) \ > > + OVNACT(COMMIT_LB_AFF, ovnact_commit_lb_aff) \ > > /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */ > > enum OVS_PACKED_ENUM ovnact_type { > > @@ -463,6 +464,20 @@ struct ovnact_commit_ecmp_nh { > > uint8_t proto; > > }; > > +/* OVNACT_COMMIT_LB_AFF. */ > > +struct ovnact_commit_lb_aff { > > + struct ovnact ovnact; > > + > > + struct in6_addr vip; > > + uint16_t vip_port; > > + uint8_t proto; > > + > > + struct in6_addr backend; > > + uint16_t backend_port; > > + > > + uint16_t timeout; > > +}; > > + > > /* Internal use by the helpers below. */ > > void ovnact_init(struct ovnact *, enum ovnact_type, size_t len); > > void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len); > > diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h > > index 3db7265e4..52f40de38 100644 > > --- a/include/ovn/logical-fields.h > > +++ b/include/ovn/logical-fields.h > > @@ -71,6 +71,7 @@ enum mff_log_flags_bits { > > MLF_USE_SNAT_ZONE = 11, > > MLF_CHECK_PORT_SEC_BIT = 12, > > MLF_LOOKUP_COMMIT_ECMP_NH_BIT = 13, > > + MLF_COMMIT_LB_AFF_BIT = 14, > > }; > > /* MFF_LOG_FLAGS_REG flag assignments */ > > @@ -116,6 +117,8 @@ enum mff_log_flags { > > MLF_LOCALPORT = (1 << MLF_LOCALPORT_BIT), > > MLF_LOOKUP_COMMIT_ECMP_NH = (1 << MLF_LOOKUP_COMMIT_ECMP_NH_BIT), > > + > > + MLF_COMMIT_LB_AFF = (1 << MLF_COMMIT_LB_AFF_BIT), > > }; > > /* OVN logical fields > > diff --git a/lib/actions.c b/lib/actions.c > > index adbb42db4..5c68e5c3a 100644 > > --- a/lib/actions.c > > +++ b/lib/actions.c > > @@ -4600,6 +4600,366 @@ encode_CHK_ECMP_NH(const struct ovnact_result *res, > > MLF_LOOKUP_COMMIT_ECMP_NH_BIT, ofpacts); > > } > > +static void > > +parse_commit_lb_aff(struct action_context *ctx, > > + struct ovnact_commit_lb_aff *lb_aff) > > +{ > > + uint16_t timeout, port = 0; > > + char *ip_str; > > + int family; > > + > > + lexer_force_match(ctx->lexer, LEX_T_LPAREN); /* Skip '('. */ > > + if (!lexer_match_id(ctx->lexer, "vip")) { > > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > > + return; > > + } > > + > > + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { > > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > > + return; > > + } > > + > > + if (ctx->lexer->token.type != LEX_T_STRING) { > > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > > + return; > > + } > > + > > + if (!ip_address_and_port_from_lb_key(ctx->lexer->token.s, &ip_str, > > + &port, &family)) { > > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > > + return; > > + } > > + > > + if (family == AF_INET) { > > + ovs_be32 ip4; > > + ip_parse(ip_str, &ip4); > > + in6_addr_set_mapped_ipv4(&lb_aff->vip, ip4); > > + } else { > > + ipv6_parse(ip_str, &lb_aff->vip); > > + } > > + > > + lb_aff->vip_port = port; > > + free(ip_str); > > + > > + lexer_get(ctx->lexer); > > + lexer_force_match(ctx->lexer, LEX_T_COMMA); > > + > > + if (!lexer_match_id(ctx->lexer, "backend")) { > > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > > + return; > > + } > > + > > + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { > > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > > + return; > > + } > > + > > + if (ctx->lexer->token.type != LEX_T_STRING) { > > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > > + return; > > + } > > + > > + if (!ip_address_and_port_from_lb_key(ctx->lexer->token.s, &ip_str, > > + &port, &family)) { > > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > > + return; > > + } > > + > > + if (family == AF_INET) { > > + ovs_be32 ip4; > > + ip_parse(ip_str, &ip4); > > + in6_addr_set_mapped_ipv4(&lb_aff->backend, ip4); > > + } else { > > + ipv6_parse(ip_str, &lb_aff->backend); > > + } > > + > > + lb_aff->backend_port = port; > > + free(ip_str); > > + > > + lexer_get(ctx->lexer); > > + lexer_force_match(ctx->lexer, LEX_T_COMMA); > > + > > + if (!lexer_match_id(ctx->lexer, "proto")) { > > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > > + return; > > + } > > + > > + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { > > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > > + return; > > + } > > + > > + if (lexer_match_id(ctx->lexer, "tcp")) { > > + lb_aff->proto = IPPROTO_TCP; > > + } else if (lexer_match_id(ctx->lexer, "udp")) { > > + lb_aff->proto = IPPROTO_UDP; > > + } else if (lexer_match_id(ctx->lexer, "sctp")) { > > + lb_aff->proto = IPPROTO_SCTP; > > + } else { > > + lexer_syntax_error(ctx->lexer, "invalid protocol"); > > + return; > > + } > > + lexer_force_match(ctx->lexer, LEX_T_COMMA); > > + > > + if (!lexer_match_id(ctx->lexer, "timeout")) { > > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > > + return; > > + } > > + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { > > + lexer_syntax_error(ctx->lexer, "invalid parameter"); > > + return; > > + } > > + if (!action_parse_uint16(ctx, &timeout, "affinity timeout")) { > > + return; > > + } > > + lb_aff->timeout = timeout; > > + > > + lexer_force_match(ctx->lexer, LEX_T_RPAREN); /* Skip ')'. */ > > + > > +} > > + > > +static void > > +format_COMMIT_LB_AFF(const struct ovnact_commit_lb_aff *lb_aff, struct ds *s) > > +{ > > + if (!IN6_IS_ADDR_V4MAPPED(&lb_aff->vip)) { > > + char ip_str[INET6_ADDRSTRLEN] = {}; > > + inet_ntop(AF_INET6, &lb_aff->vip, ip_str, INET6_ADDRSTRLEN); > > + ds_put_format(s, "commit_lb_aff(vip = \"[%s]", ip_str); > > + } else { > > + ovs_be32 ip = in6_addr_get_mapped_ipv4(&lb_aff->vip); > > + char *ip_str = xasprintf(IP_FMT, IP_ARGS(ip)); > > + ds_put_format(s, "commit_lb_aff(vip = \"%s", ip_str); > > + free(ip_str); > > + } > > + if (lb_aff->vip_port) { > > + ds_put_format(s, ":%d", lb_aff->vip_port); > > + } > > + ds_put_cstr(s, "\""); > > + > > + if (!IN6_IS_ADDR_V4MAPPED(&lb_aff->backend)) { > > + char ip_str[INET6_ADDRSTRLEN] = {}; > > + inet_ntop(AF_INET6, &lb_aff->backend, ip_str, INET6_ADDRSTRLEN); > > + ds_put_format(s, ", backend = \"[%s]", ip_str); > > + } else { > > + ovs_be32 ip = in6_addr_get_mapped_ipv4(&lb_aff->backend); > > + char *ip_str = xasprintf(IP_FMT, IP_ARGS(ip)); > > + ds_put_format(s, ", backend = \"%s", ip_str); > > + free(ip_str); > > + } > > + if (lb_aff->backend_port) { > > + ds_put_format(s, ":%d", lb_aff->backend_port); > > + } > > + ds_put_cstr(s, "\""); > > + > > + const char *proto; > > + switch (lb_aff->proto) { > > + case IPPROTO_UDP: > > + proto = "udp"; > > + break; > > + case IPPROTO_SCTP: > > + proto = "sctp"; > > + break; > > + case IPPROTO_TCP: > > + default: > > + proto = "tcp"; > > + break; > > + } > > + ds_put_format(s, ", proto = %s, timeout = %d);", > > + proto, lb_aff->timeout); > > +} > > + > > +static void > > +encode_COMMIT_LB_AFF(const struct ovnact_commit_lb_aff *lb_aff, > > + const struct ovnact_encode_params *ep OVS_UNUSED, > > + struct ofpbuf *ofpacts) > > +{ > > + bool ipv6 = !IN6_IS_ADDR_V4MAPPED(&lb_aff->vip); > > + size_t ol_offset = ofpacts->size; > > + struct ofpact_learn *ol = ofpact_put_LEARN(ofpacts); > > + struct match match = MATCH_CATCHALL_INITIALIZER; > > + struct ofpact_learn_spec *ol_spec; > > + unsigned int imm_bytes; > > + uint8_t *src_imm; > > + > > + ol->flags = NX_LEARN_F_DELETE_LEARNED; > > + ol->idle_timeout = lb_aff->timeout; /* seconds. */ > > + ol->priority = OFP_DEFAULT_PRIORITY; > > + ol->table_id = OFTABLE_CHK_LB_AFFINITY; > > + > > + /* Match on the same ETH type as the packet that created the new table. */ > > + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); > > + ol_spec->dst.field = mf_from_id(MFF_ETH_TYPE); > > + ol_spec->dst.ofs = 0; > > + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; > > + ol_spec->n_bits = ol_spec->dst.n_bits; > > + ol_spec->dst_type = NX_LEARN_DST_MATCH; > > + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; > > + union mf_value imm_eth_type = { > > + .be16 = ipv6 ? htons(ETH_TYPE_IPV6) : htons(ETH_TYPE_IP) > > + }; > > + mf_write_subfield_value(&ol_spec->dst, &imm_eth_type, &match); > > + /* Push value last, as this may reallocate 'ol_spec'. */ > > + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); > > + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); > > + memcpy(src_imm, &imm_eth_type, imm_bytes); > > + > > + /* IP src. */ > > + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); > > + ol_spec->dst.field = > > + ipv6 ? mf_from_id(MFF_IPV6_SRC) : mf_from_id(MFF_IPV4_SRC); > > + ol_spec->src.field = > > + ipv6 ? mf_from_id(MFF_IPV6_SRC) : mf_from_id(MFF_IPV4_SRC); > > + ol_spec->dst.ofs = 0; > > + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; > > + ol_spec->n_bits = ol_spec->dst.n_bits; > > + ol_spec->dst_type = NX_LEARN_DST_MATCH; > > + ol_spec->src_type = NX_LEARN_SRC_FIELD; > > + > > + /* IP dst. */ > > + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); > > + ol_spec->dst.field = > > + ipv6 ? mf_from_id(MFF_IPV6_DST) : mf_from_id(MFF_IPV4_DST); > > + union mf_value imm_ip; > > + if (ipv6) { > > + imm_ip = (union mf_value) { > > + .ipv6 = lb_aff->vip, > > + }; > > + } else { > > + ovs_be32 ip4 = in6_addr_get_mapped_ipv4(&lb_aff->vip); > > + imm_ip = (union mf_value) { > > + .be32 = ip4, > > + }; > > + } > > + ol_spec->dst.ofs = 0; > > + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; > > + ol_spec->n_bits = ol_spec->dst.n_bits; > > + ol_spec->dst_type = NX_LEARN_DST_MATCH; > > + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; > > + mf_write_subfield_value(&ol_spec->dst, &imm_ip, &match); > > + > > + /* Push value last, as this may reallocate 'ol_spec' */ > > + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); > > + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); > > + memcpy(src_imm, &imm_ip, imm_bytes); > > + > > + /* IP proto. */ > > + union mf_value imm_proto = { > > + .u8 = lb_aff->proto, > > + }; > > + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); > > + ol_spec->dst.field = mf_from_id(MFF_IP_PROTO); > > + ol_spec->src.field = mf_from_id(MFF_IP_PROTO); > > + ol_spec->dst.ofs = 0; > > + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; > > + ol_spec->n_bits = ol_spec->dst.n_bits; > > + ol_spec->dst_type = NX_LEARN_DST_MATCH; > > + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; > > + mf_write_subfield_value(&ol_spec->dst, &imm_proto, &match); > > + /* Push value last, as this may reallocate 'ol_spec' */ > > + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); > > + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); > > + memcpy(src_imm, &imm_proto, imm_bytes); > > + > > + /* dst port */ > > + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); > > + switch (lb_aff->proto) { > > + case IPPROTO_TCP: > > + ol_spec->dst.field = mf_from_id(MFF_TCP_DST); > > + ol_spec->src.field = mf_from_id(MFF_TCP_DST); > > + break; > > + case IPPROTO_UDP: > > + ol_spec->dst.field = mf_from_id(MFF_UDP_DST); > > + ol_spec->src.field = mf_from_id(MFF_UDP_DST); > > + break; > > + case IPPROTO_SCTP: > > + ol_spec->dst.field = mf_from_id(MFF_SCTP_DST); > > + ol_spec->src.field = mf_from_id(MFF_SCTP_DST); > > + break; > > + default: > > + OVS_NOT_REACHED(); > > + break; > > + } > > + ol_spec->dst.ofs = 0; > > + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; > > + ol_spec->n_bits = ol_spec->dst.n_bits; > > + ol_spec->dst_type = NX_LEARN_DST_MATCH; > > + ol_spec->src_type = NX_LEARN_SRC_FIELD; > > + > > + /* Set MLF_LOOKUP_COMMIT_ECMP_NH_BIT for ecmp replies. */ > > + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); > > + ol_spec->dst.field = mf_from_id(MFF_LOG_FLAGS); > > + ol_spec->dst.ofs = MLF_COMMIT_LB_AFF_BIT; > > + ol_spec->dst.n_bits = 1; > > + ol_spec->n_bits = ol_spec->dst.n_bits; > > + ol_spec->dst_type = NX_LEARN_DST_LOAD; > > + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; > > + union mf_value imm_reg_value = { > > + .u8 = 1 > > + }; > > + mf_write_subfield_value(&ol_spec->dst, &imm_reg_value, &match); > > + > > + /* Push value last, as this may reallocate 'ol_spec' */ > > + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); > > + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); > > + ol = ofpacts->header; > > + memcpy(src_imm, &imm_reg_value, imm_bytes); > > + > > + /* Load backend IP in REG4/XXREG1. */ > > + union mf_value imm_backend_ip; > > + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); > > + > > + if (ipv6) { > > + imm_backend_ip = (union mf_value) { > > + .ipv6 = lb_aff->backend, > > + }; > > + ol_spec->dst.field = mf_from_id(MFF_XXREG1); > > + } else { > > + ovs_be32 ip4 = in6_addr_get_mapped_ipv4(&lb_aff->backend); > > + imm_backend_ip = (union mf_value) { > > + .be32 = ip4, > > + }; > > + ol_spec->dst.field = mf_from_id(MFF_REG4); > > + } > > + > > + ol_spec->dst_type = NX_LEARN_DST_LOAD; > > + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; > > + ol_spec->dst.ofs = 0; > > + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; > > + ol_spec->n_bits = ol_spec->dst.n_bits; > > + mf_write_subfield_value(&ol_spec->dst, &imm_backend_ip, &match); > > + /* Push value last, as this may reallocate 'ol_spec' */ > > + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); > > + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); > > + memcpy(src_imm, &imm_backend_ip, imm_bytes); > > + > > + /* Load backend port in REG8. */ > > + union mf_value imm_backend_port; > > + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); > > + imm_backend_port = (union mf_value) { > > + .be16 = htons(lb_aff->backend_port), > > + }; > > + > > + ol_spec->dst.field = mf_from_id(MFF_REG8); > > + ol_spec->dst_type = NX_LEARN_DST_LOAD; > > + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; > > + ol_spec->dst.ofs = 0; > > + ol_spec->dst.n_bits = 8 * sizeof(lb_aff->backend_port); > > + ol_spec->n_bits = ol_spec->dst.n_bits; > > + mf_write_subfield_value(&ol_spec->dst, &imm_backend_port, &match); > > + /* Push value last, as this may reallocate 'ol_spec' */ > > + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); > > + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); > > + memcpy(src_imm, &imm_backend_port, imm_bytes); > > + > > + ol = ofpbuf_at_assert(ofpacts, ol_offset, sizeof *ol); > > + ofpact_finish_LEARN(ofpacts, &ol); > > +} > > + > > +static void > > +ovnact_commit_lb_aff_free(struct ovnact_commit_lb_aff *ecmp_nh OVS_UNUSED) > > +{ > > +} > > + > > /* Parses an assignment or exchange or put_dhcp_opts action. */ > > static void > > parse_set_action(struct action_context *ctx) > > @@ -4790,6 +5150,8 @@ parse_action(struct action_context *ctx) > > parse_put_fdb(ctx, ovnact_put_PUT_FDB(ctx->ovnacts)); > > } else if (lexer_match_id(ctx->lexer, "commit_ecmp_nh")) { > > parse_commit_ecmp_nh(ctx, ovnact_put_COMMIT_ECMP_NH(ctx->ovnacts)); > > + } else if (lexer_match_id(ctx->lexer, "commit_lb_aff")) { > > + parse_commit_lb_aff(ctx, ovnact_put_COMMIT_LB_AFF(ctx->ovnacts)); > > } else { > > lexer_syntax_error(ctx->lexer, "expecting action"); > > } > > diff --git a/ovn-sb.xml b/ovn-sb.xml > > index 315d60853..fae62c09d 100644 > > --- a/ovn-sb.xml > > +++ b/ovn-sb.xml > > @@ -2624,6 +2624,41 @@ tcp.flags = RST; > > register <var>R</var> is set to 1. > > </p> > > </dd> > > + > > + <dt> > > + <code> > > + commit_lb_aff(<var>vip</var>, <var>backend</var>, > > + <var>proto</var>, <var>timeout</var>); > > + </code> > > + </dt> > > + <dd> > > + <p> > > + <b>Parameters</b>: load-balancer virtual ip:port <var>vip</var>, > > + load-balancer backend ip:port <var>backend</var>, load-balancer > > + protocol <var>proto</var>, affinity timeout <var>timeout</var>. > > + </p> > > + > > + <p> > > + This action translates to an openflow "learn" action that inserts > > + a new flow in tables 78. > > s/tables/table/ > > > + </p> > > + > > + <ul> > > + <li> > > + Match on the 4-tuple in table 78: <code>nw_src=ip client</code>, > > + <code>nw_dst=vip ip</code>, <code>ip_proto</code>, > > + <code>tp_dst=vip port</code> and set <code>reg9[6]</code> to 1, > > + <code>reg4</code> and <code>reg8</code> to backend ip and port > > + respectively. For IPv6 register <code>xxreg1</code> is used to > > + store the backend ip. > > + </li> > > + </ul> > > + > > + <p> > > + This action is applied for new connection received by a specific > > + load-balancer. > > + </p> > > + </dd> > > </dl> > > </column> > > diff --git a/tests/ovn.at b/tests/ovn.at > > index f8b8db4df..fbf281494 100644 > > --- a/tests/ovn.at > > +++ b/tests/ovn.at > > @@ -2125,6 +2125,13 @@ reg9[5] = chk_ecmp_nh_mac(); > > reg9[5] = chk_ecmp_nh(); > > encodes as set_field:0/0x2000->reg10,resubmit(,77),move:NXM_NX_REG10[13]->OXM_OF_PKT_REG4[5] > > +# commit_lb_aff > > +commit_lb_aff(vip = "172.16.0.123:8080", backend = "10.0.0.3:8080", proto = tcp, timeout = 30); > > + encodes as learn(table=78,idle_timeout=30,delete_learned,eth_type=0x800,NXM_OF_IP_SRC[],ip_dst=172.16.0.123,nw_proto=6,NXM_OF_TCP_DST[],load:0x1->NXM_NX_REG10[14],load:0xa000003->NXM_NX_REG4[],load:0x1f90->NXM_NX_REG8[0..15]) > > Just for my own education, what do the NXM_OF_IP_SRC[] and NXM_OF_TCP_DST[] > fields by themselves mean? I see from the code that it's a match, rather > than an action, but the syntax here is odd. Does it just mean that these > registers need to be non-zero? I think it stands for "what is in the packet that is hitting the rule". E.g. this is what we have in tables 16 and 78: cookie= ...., table=16, ...., actions=learn(table=78,idle_timeout=60,delete_learned,eth_type=0x800,NXM_OF_IP_SRC[],ip_dst=172.16.1.100,nw_proto=6,NXM_OF_TCP_DST[],load:0x1->NXM_NX_REG10[14],load:0xc0a80102->NXM_NX_REG4[],load:0x50->NXM_NX_REG8[0..15]),resubmit(,17) cookie=0x0, duration=0.390s, table=78, n_packets=1, n_bytes=74, idle_timeout=60, idle_age=0, tcp,nw_src=172.16.1.2,nw_dst=172.16.1.100,tp_dst=80 actions=load:0x1->NXM_NX_REG10[14],load:0xc0a80102->NXM_NX_REG4[],load:0x50->NXM_NX_REG8[0..15] Regards, Lorenzo > > > + > > +commit_lb_aff(vip = "[::1]:8080", backend = "[::2]:8080", proto = tcp, timeout = 30); > > + encodes as learn(table=78,idle_timeout=30,delete_learned,eth_type=0x86dd,NXM_NX_IPV6_SRC[],ipv6_dst=::1,nw_proto=6,NXM_OF_TCP_DST[],load:0x1->NXM_NX_REG10[14],load:0x2->NXM_NX_XXREG1[],load:0x1f90->NXM_NX_REG8[0..15]) > > + > > # push/pop > > push(xxreg0);push(xxreg1[10..20]);push(eth.src);pop(xxreg0[0..47]);pop(xxreg0[48..57]);pop(xxreg1); > > formats as push(xxreg0); push(xxreg1[10..20]); push(eth.src); pop(xxreg0[0..47]); pop(xxreg0[48..57]); pop(xxreg1); > > diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c > > index d9e7129d9..5adfd2521 100644 > > --- a/utilities/ovn-trace.c > > +++ b/utilities/ovn-trace.c > > @@ -3298,6 +3298,8 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len, > > break; > > case OVNACT_CHK_ECMP_NH: > > break; > > + case OVNACT_COMMIT_LB_AFF: > > + break; > > } > > } > > ofpbuf_uninit(&stack); >
diff --git a/controller/lflow.h b/controller/lflow.h index 8cbe312ca..4be079555 100644 --- a/controller/lflow.h +++ b/controller/lflow.h @@ -79,6 +79,7 @@ struct uuid; #define OFTABLE_CHK_OUT_PORT_SEC 75 #define OFTABLE_ECMP_NH_MAC 76 #define OFTABLE_ECMP_NH 77 +#define OFTABLE_CHK_LB_AFFINITY 78 enum ref_type { REF_TYPE_ADDRSET, diff --git a/include/ovn/actions.h b/include/ovn/actions.h index d7ee84dac..597cbb8e3 100644 --- a/include/ovn/actions.h +++ b/include/ovn/actions.h @@ -121,6 +121,7 @@ struct ovn_extend_table; OVNACT(COMMIT_ECMP_NH, ovnact_commit_ecmp_nh) \ OVNACT(CHK_ECMP_NH_MAC, ovnact_result) \ OVNACT(CHK_ECMP_NH, ovnact_result) \ + OVNACT(COMMIT_LB_AFF, ovnact_commit_lb_aff) \ /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */ enum OVS_PACKED_ENUM ovnact_type { @@ -463,6 +464,20 @@ struct ovnact_commit_ecmp_nh { uint8_t proto; }; +/* OVNACT_COMMIT_LB_AFF. */ +struct ovnact_commit_lb_aff { + struct ovnact ovnact; + + struct in6_addr vip; + uint16_t vip_port; + uint8_t proto; + + struct in6_addr backend; + uint16_t backend_port; + + uint16_t timeout; +}; + /* Internal use by the helpers below. */ void ovnact_init(struct ovnact *, enum ovnact_type, size_t len); void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len); diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h index 3db7265e4..52f40de38 100644 --- a/include/ovn/logical-fields.h +++ b/include/ovn/logical-fields.h @@ -71,6 +71,7 @@ enum mff_log_flags_bits { MLF_USE_SNAT_ZONE = 11, MLF_CHECK_PORT_SEC_BIT = 12, MLF_LOOKUP_COMMIT_ECMP_NH_BIT = 13, + MLF_COMMIT_LB_AFF_BIT = 14, }; /* MFF_LOG_FLAGS_REG flag assignments */ @@ -116,6 +117,8 @@ enum mff_log_flags { MLF_LOCALPORT = (1 << MLF_LOCALPORT_BIT), MLF_LOOKUP_COMMIT_ECMP_NH = (1 << MLF_LOOKUP_COMMIT_ECMP_NH_BIT), + + MLF_COMMIT_LB_AFF = (1 << MLF_COMMIT_LB_AFF_BIT), }; /* OVN logical fields diff --git a/lib/actions.c b/lib/actions.c index adbb42db4..5c68e5c3a 100644 --- a/lib/actions.c +++ b/lib/actions.c @@ -4600,6 +4600,366 @@ encode_CHK_ECMP_NH(const struct ovnact_result *res, MLF_LOOKUP_COMMIT_ECMP_NH_BIT, ofpacts); } +static void +parse_commit_lb_aff(struct action_context *ctx, + struct ovnact_commit_lb_aff *lb_aff) +{ + uint16_t timeout, port = 0; + char *ip_str; + int family; + + lexer_force_match(ctx->lexer, LEX_T_LPAREN); /* Skip '('. */ + if (!lexer_match_id(ctx->lexer, "vip")) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (ctx->lexer->token.type != LEX_T_STRING) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (!ip_address_and_port_from_lb_key(ctx->lexer->token.s, &ip_str, + &port, &family)) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (family == AF_INET) { + ovs_be32 ip4; + ip_parse(ip_str, &ip4); + in6_addr_set_mapped_ipv4(&lb_aff->vip, ip4); + } else { + ipv6_parse(ip_str, &lb_aff->vip); + } + + lb_aff->vip_port = port; + free(ip_str); + + lexer_get(ctx->lexer); + lexer_force_match(ctx->lexer, LEX_T_COMMA); + + if (!lexer_match_id(ctx->lexer, "backend")) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (ctx->lexer->token.type != LEX_T_STRING) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (!ip_address_and_port_from_lb_key(ctx->lexer->token.s, &ip_str, + &port, &family)) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (family == AF_INET) { + ovs_be32 ip4; + ip_parse(ip_str, &ip4); + in6_addr_set_mapped_ipv4(&lb_aff->backend, ip4); + } else { + ipv6_parse(ip_str, &lb_aff->backend); + } + + lb_aff->backend_port = port; + free(ip_str); + + lexer_get(ctx->lexer); + lexer_force_match(ctx->lexer, LEX_T_COMMA); + + if (!lexer_match_id(ctx->lexer, "proto")) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (lexer_match_id(ctx->lexer, "tcp")) { + lb_aff->proto = IPPROTO_TCP; + } else if (lexer_match_id(ctx->lexer, "udp")) { + lb_aff->proto = IPPROTO_UDP; + } else if (lexer_match_id(ctx->lexer, "sctp")) { + lb_aff->proto = IPPROTO_SCTP; + } else { + lexer_syntax_error(ctx->lexer, "invalid protocol"); + return; + } + lexer_force_match(ctx->lexer, LEX_T_COMMA); + + if (!lexer_match_id(ctx->lexer, "timeout")) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + if (!action_parse_uint16(ctx, &timeout, "affinity timeout")) { + return; + } + lb_aff->timeout = timeout; + + lexer_force_match(ctx->lexer, LEX_T_RPAREN); /* Skip ')'. */ + +} + +static void +format_COMMIT_LB_AFF(const struct ovnact_commit_lb_aff *lb_aff, struct ds *s) +{ + if (!IN6_IS_ADDR_V4MAPPED(&lb_aff->vip)) { + char ip_str[INET6_ADDRSTRLEN] = {}; + inet_ntop(AF_INET6, &lb_aff->vip, ip_str, INET6_ADDRSTRLEN); + ds_put_format(s, "commit_lb_aff(vip = \"[%s]", ip_str); + } else { + ovs_be32 ip = in6_addr_get_mapped_ipv4(&lb_aff->vip); + char *ip_str = xasprintf(IP_FMT, IP_ARGS(ip)); + ds_put_format(s, "commit_lb_aff(vip = \"%s", ip_str); + free(ip_str); + } + if (lb_aff->vip_port) { + ds_put_format(s, ":%d", lb_aff->vip_port); + } + ds_put_cstr(s, "\""); + + if (!IN6_IS_ADDR_V4MAPPED(&lb_aff->backend)) { + char ip_str[INET6_ADDRSTRLEN] = {}; + inet_ntop(AF_INET6, &lb_aff->backend, ip_str, INET6_ADDRSTRLEN); + ds_put_format(s, ", backend = \"[%s]", ip_str); + } else { + ovs_be32 ip = in6_addr_get_mapped_ipv4(&lb_aff->backend); + char *ip_str = xasprintf(IP_FMT, IP_ARGS(ip)); + ds_put_format(s, ", backend = \"%s", ip_str); + free(ip_str); + } + if (lb_aff->backend_port) { + ds_put_format(s, ":%d", lb_aff->backend_port); + } + ds_put_cstr(s, "\""); + + const char *proto; + switch (lb_aff->proto) { + case IPPROTO_UDP: + proto = "udp"; + break; + case IPPROTO_SCTP: + proto = "sctp"; + break; + case IPPROTO_TCP: + default: + proto = "tcp"; + break; + } + ds_put_format(s, ", proto = %s, timeout = %d);", + proto, lb_aff->timeout); +} + +static void +encode_COMMIT_LB_AFF(const struct ovnact_commit_lb_aff *lb_aff, + const struct ovnact_encode_params *ep OVS_UNUSED, + struct ofpbuf *ofpacts) +{ + bool ipv6 = !IN6_IS_ADDR_V4MAPPED(&lb_aff->vip); + size_t ol_offset = ofpacts->size; + struct ofpact_learn *ol = ofpact_put_LEARN(ofpacts); + struct match match = MATCH_CATCHALL_INITIALIZER; + struct ofpact_learn_spec *ol_spec; + unsigned int imm_bytes; + uint8_t *src_imm; + + ol->flags = NX_LEARN_F_DELETE_LEARNED; + ol->idle_timeout = lb_aff->timeout; /* seconds. */ + ol->priority = OFP_DEFAULT_PRIORITY; + ol->table_id = OFTABLE_CHK_LB_AFFINITY; + + /* Match on the same ETH type as the packet that created the new table. */ + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); + ol_spec->dst.field = mf_from_id(MFF_ETH_TYPE); + ol_spec->dst.ofs = 0; + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; + ol_spec->n_bits = ol_spec->dst.n_bits; + ol_spec->dst_type = NX_LEARN_DST_MATCH; + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; + union mf_value imm_eth_type = { + .be16 = ipv6 ? htons(ETH_TYPE_IPV6) : htons(ETH_TYPE_IP) + }; + mf_write_subfield_value(&ol_spec->dst, &imm_eth_type, &match); + /* Push value last, as this may reallocate 'ol_spec'. */ + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); + memcpy(src_imm, &imm_eth_type, imm_bytes); + + /* IP src. */ + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); + ol_spec->dst.field = + ipv6 ? mf_from_id(MFF_IPV6_SRC) : mf_from_id(MFF_IPV4_SRC); + ol_spec->src.field = + ipv6 ? mf_from_id(MFF_IPV6_SRC) : mf_from_id(MFF_IPV4_SRC); + ol_spec->dst.ofs = 0; + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; + ol_spec->n_bits = ol_spec->dst.n_bits; + ol_spec->dst_type = NX_LEARN_DST_MATCH; + ol_spec->src_type = NX_LEARN_SRC_FIELD; + + /* IP dst. */ + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); + ol_spec->dst.field = + ipv6 ? mf_from_id(MFF_IPV6_DST) : mf_from_id(MFF_IPV4_DST); + union mf_value imm_ip; + if (ipv6) { + imm_ip = (union mf_value) { + .ipv6 = lb_aff->vip, + }; + } else { + ovs_be32 ip4 = in6_addr_get_mapped_ipv4(&lb_aff->vip); + imm_ip = (union mf_value) { + .be32 = ip4, + }; + } + ol_spec->dst.ofs = 0; + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; + ol_spec->n_bits = ol_spec->dst.n_bits; + ol_spec->dst_type = NX_LEARN_DST_MATCH; + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; + mf_write_subfield_value(&ol_spec->dst, &imm_ip, &match); + + /* Push value last, as this may reallocate 'ol_spec' */ + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); + memcpy(src_imm, &imm_ip, imm_bytes); + + /* IP proto. */ + union mf_value imm_proto = { + .u8 = lb_aff->proto, + }; + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); + ol_spec->dst.field = mf_from_id(MFF_IP_PROTO); + ol_spec->src.field = mf_from_id(MFF_IP_PROTO); + ol_spec->dst.ofs = 0; + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; + ol_spec->n_bits = ol_spec->dst.n_bits; + ol_spec->dst_type = NX_LEARN_DST_MATCH; + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; + mf_write_subfield_value(&ol_spec->dst, &imm_proto, &match); + /* Push value last, as this may reallocate 'ol_spec' */ + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); + memcpy(src_imm, &imm_proto, imm_bytes); + + /* dst port */ + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); + switch (lb_aff->proto) { + case IPPROTO_TCP: + ol_spec->dst.field = mf_from_id(MFF_TCP_DST); + ol_spec->src.field = mf_from_id(MFF_TCP_DST); + break; + case IPPROTO_UDP: + ol_spec->dst.field = mf_from_id(MFF_UDP_DST); + ol_spec->src.field = mf_from_id(MFF_UDP_DST); + break; + case IPPROTO_SCTP: + ol_spec->dst.field = mf_from_id(MFF_SCTP_DST); + ol_spec->src.field = mf_from_id(MFF_SCTP_DST); + break; + default: + OVS_NOT_REACHED(); + break; + } + ol_spec->dst.ofs = 0; + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; + ol_spec->n_bits = ol_spec->dst.n_bits; + ol_spec->dst_type = NX_LEARN_DST_MATCH; + ol_spec->src_type = NX_LEARN_SRC_FIELD; + + /* Set MLF_LOOKUP_COMMIT_ECMP_NH_BIT for ecmp replies. */ + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); + ol_spec->dst.field = mf_from_id(MFF_LOG_FLAGS); + ol_spec->dst.ofs = MLF_COMMIT_LB_AFF_BIT; + ol_spec->dst.n_bits = 1; + ol_spec->n_bits = ol_spec->dst.n_bits; + ol_spec->dst_type = NX_LEARN_DST_LOAD; + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; + union mf_value imm_reg_value = { + .u8 = 1 + }; + mf_write_subfield_value(&ol_spec->dst, &imm_reg_value, &match); + + /* Push value last, as this may reallocate 'ol_spec' */ + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); + ol = ofpacts->header; + memcpy(src_imm, &imm_reg_value, imm_bytes); + + /* Load backend IP in REG4/XXREG1. */ + union mf_value imm_backend_ip; + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); + + if (ipv6) { + imm_backend_ip = (union mf_value) { + .ipv6 = lb_aff->backend, + }; + ol_spec->dst.field = mf_from_id(MFF_XXREG1); + } else { + ovs_be32 ip4 = in6_addr_get_mapped_ipv4(&lb_aff->backend); + imm_backend_ip = (union mf_value) { + .be32 = ip4, + }; + ol_spec->dst.field = mf_from_id(MFF_REG4); + } + + ol_spec->dst_type = NX_LEARN_DST_LOAD; + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; + ol_spec->dst.ofs = 0; + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; + ol_spec->n_bits = ol_spec->dst.n_bits; + mf_write_subfield_value(&ol_spec->dst, &imm_backend_ip, &match); + /* Push value last, as this may reallocate 'ol_spec' */ + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); + memcpy(src_imm, &imm_backend_ip, imm_bytes); + + /* Load backend port in REG8. */ + union mf_value imm_backend_port; + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); + imm_backend_port = (union mf_value) { + .be16 = htons(lb_aff->backend_port), + }; + + ol_spec->dst.field = mf_from_id(MFF_REG8); + ol_spec->dst_type = NX_LEARN_DST_LOAD; + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; + ol_spec->dst.ofs = 0; + ol_spec->dst.n_bits = 8 * sizeof(lb_aff->backend_port); + ol_spec->n_bits = ol_spec->dst.n_bits; + mf_write_subfield_value(&ol_spec->dst, &imm_backend_port, &match); + /* Push value last, as this may reallocate 'ol_spec' */ + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); + memcpy(src_imm, &imm_backend_port, imm_bytes); + + ol = ofpbuf_at_assert(ofpacts, ol_offset, sizeof *ol); + ofpact_finish_LEARN(ofpacts, &ol); +} + +static void +ovnact_commit_lb_aff_free(struct ovnact_commit_lb_aff *ecmp_nh OVS_UNUSED) +{ +} + /* Parses an assignment or exchange or put_dhcp_opts action. */ static void parse_set_action(struct action_context *ctx) @@ -4790,6 +5150,8 @@ parse_action(struct action_context *ctx) parse_put_fdb(ctx, ovnact_put_PUT_FDB(ctx->ovnacts)); } else if (lexer_match_id(ctx->lexer, "commit_ecmp_nh")) { parse_commit_ecmp_nh(ctx, ovnact_put_COMMIT_ECMP_NH(ctx->ovnacts)); + } else if (lexer_match_id(ctx->lexer, "commit_lb_aff")) { + parse_commit_lb_aff(ctx, ovnact_put_COMMIT_LB_AFF(ctx->ovnacts)); } else { lexer_syntax_error(ctx->lexer, "expecting action"); } diff --git a/ovn-sb.xml b/ovn-sb.xml index 315d60853..fae62c09d 100644 --- a/ovn-sb.xml +++ b/ovn-sb.xml @@ -2624,6 +2624,41 @@ tcp.flags = RST; register <var>R</var> is set to 1. </p> </dd> + + <dt> + <code> + commit_lb_aff(<var>vip</var>, <var>backend</var>, + <var>proto</var>, <var>timeout</var>); + </code> + </dt> + <dd> + <p> + <b>Parameters</b>: load-balancer virtual ip:port <var>vip</var>, + load-balancer backend ip:port <var>backend</var>, load-balancer + protocol <var>proto</var>, affinity timeout <var>timeout</var>. + </p> + + <p> + This action translates to an openflow "learn" action that inserts + a new flow in tables 78. + </p> + + <ul> + <li> + Match on the 4-tuple in table 78: <code>nw_src=ip client</code>, + <code>nw_dst=vip ip</code>, <code>ip_proto</code>, + <code>tp_dst=vip port</code> and set <code>reg9[6]</code> to 1, + <code>reg4</code> and <code>reg8</code> to backend ip and port + respectively. For IPv6 register <code>xxreg1</code> is used to + store the backend ip. + </li> + </ul> + + <p> + This action is applied for new connection received by a specific + load-balancer. + </p> + </dd> </dl> </column> diff --git a/tests/ovn.at b/tests/ovn.at index f8b8db4df..fbf281494 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -2125,6 +2125,13 @@ reg9[5] = chk_ecmp_nh_mac(); reg9[5] = chk_ecmp_nh(); encodes as set_field:0/0x2000->reg10,resubmit(,77),move:NXM_NX_REG10[13]->OXM_OF_PKT_REG4[5] +# commit_lb_aff +commit_lb_aff(vip = "172.16.0.123:8080", backend = "10.0.0.3:8080", proto = tcp, timeout = 30); + encodes as learn(table=78,idle_timeout=30,delete_learned,eth_type=0x800,NXM_OF_IP_SRC[],ip_dst=172.16.0.123,nw_proto=6,NXM_OF_TCP_DST[],load:0x1->NXM_NX_REG10[14],load:0xa000003->NXM_NX_REG4[],load:0x1f90->NXM_NX_REG8[0..15]) + +commit_lb_aff(vip = "[::1]:8080", backend = "[::2]:8080", proto = tcp, timeout = 30); + encodes as learn(table=78,idle_timeout=30,delete_learned,eth_type=0x86dd,NXM_NX_IPV6_SRC[],ipv6_dst=::1,nw_proto=6,NXM_OF_TCP_DST[],load:0x1->NXM_NX_REG10[14],load:0x2->NXM_NX_XXREG1[],load:0x1f90->NXM_NX_REG8[0..15]) + # push/pop push(xxreg0);push(xxreg1[10..20]);push(eth.src);pop(xxreg0[0..47]);pop(xxreg0[48..57]);pop(xxreg1); formats as push(xxreg0); push(xxreg1[10..20]); push(eth.src); pop(xxreg0[0..47]); pop(xxreg0[48..57]); pop(xxreg1); diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c index d9e7129d9..5adfd2521 100644 --- a/utilities/ovn-trace.c +++ b/utilities/ovn-trace.c @@ -3298,6 +3298,8 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len, break; case OVNACT_CHK_ECMP_NH: break; + case OVNACT_COMMIT_LB_AFF: + break; } } ofpbuf_uninit(&stack);
commit_lb_aff action translates to an openflow "learn" action that inserts a new flow in the OFTABLE_CHK_LB_AFFINITY table. The new flow is used to match on the the 5-tuple and set REGBIT_KNOWN_LB_SESSION bit. Moreover the new flow stores backend IP and port in register REG4 and REG8[0..15] respectively. Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> --- controller/lflow.h | 1 + include/ovn/actions.h | 15 ++ include/ovn/logical-fields.h | 3 + lib/actions.c | 362 +++++++++++++++++++++++++++++++++++ ovn-sb.xml | 35 ++++ tests/ovn.at | 7 + utilities/ovn-trace.c | 2 + 7 files changed, 425 insertions(+)