From patchwork Wed Nov 29 21:59:47 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mark Michelson X-Patchwork-Id: 842779 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=openvswitch.org (client-ip=140.211.169.12; helo=mail.linuxfoundation.org; envelope-from=ovs-dev-bounces@openvswitch.org; receiver=) Received: from mail.linuxfoundation.org (mail.linuxfoundation.org [140.211.169.12]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 3ynDwh0Lh3z9sNc for ; Thu, 30 Nov 2017 09:00:04 +1100 (AEDT) Received: from mail.linux-foundation.org (localhost [127.0.0.1]) by mail.linuxfoundation.org (Postfix) with ESMTP id 1FE97D56; Wed, 29 Nov 2017 22:00:02 +0000 (UTC) X-Original-To: dev@openvswitch.org Delivered-To: ovs-dev@mail.linuxfoundation.org Received: from smtp1.linuxfoundation.org (smtp1.linux-foundation.org [172.17.192.35]) by mail.linuxfoundation.org (Postfix) with ESMTPS id 8A082C05 for ; Wed, 29 Nov 2017 22:00:00 +0000 (UTC) X-Greylist: domain auto-whitelisted by SQLgrey-1.7.6 Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by smtp1.linuxfoundation.org (Postfix) with ESMTPS id 1F12B1AE for ; Wed, 29 Nov 2017 22:00:00 +0000 (UTC) Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 905C480477 for ; Wed, 29 Nov 2017 21:59:59 +0000 (UTC) Received: from monae.redhat.com (ovpn-122-239.rdu2.redhat.com [10.10.122.239]) by smtp.corp.redhat.com (Postfix) with ESMTP id 52EAF5D6A3 for ; Wed, 29 Nov 2017 21:59:59 +0000 (UTC) From: Mark Michelson To: dev@openvswitch.org Date: Wed, 29 Nov 2017 15:59:47 -0600 Message-Id: <20171129215948.4879-2-mmichels@redhat.com> In-Reply-To: <20171129215948.4879-1-mmichels@redhat.com> References: <20171129215948.4879-1-mmichels@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.15 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.28]); Wed, 29 Nov 2017 21:59:59 +0000 (UTC) X-Spam-Status: No, score=-6.9 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI, T_RP_MATCHES_RCVD autolearn=ham version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on smtp1.linux-foundation.org Subject: [ovs-dev] [PATCH v6 1/2] OVN: Add multicast local-only flag. X-BeenThere: ovs-dev@openvswitch.org X-Mailman-Version: 2.1.12 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Sender: ovs-dev-bounces@openvswitch.org Errors-To: ovs-dev-bounces@openvswitch.org When this flag is set, then a multicast packet that would normally be delivered to ports on multiple hypervisors is only delivered to ports on the local hypervisor. The primary known use case for this is when multicast packets originate from ovn-controller. Multiple ovn-controllers will be attempting to send out those multicast packets, and so each should only be responsible for delivering those packets to their local ports. Signed-off-by: Mark Michelson --- ovn/controller/physical.c | 15 +++++++++++++++ ovn/lib/logical-fields.h | 6 ++++++ ovn/ovn-architecture.7.xml | 10 ++++++++++ 3 files changed, 31 insertions(+) diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c index faeed2208..5a80e2cda 100644 --- a/ovn/controller/physical.c +++ b/ovn/controller/physical.c @@ -995,6 +995,21 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve, struct ofpbuf remote_ofpacts; ofpbuf_init(&remote_ofpacts, 0); SBREC_MULTICAST_GROUP_FOR_EACH (mc, ctx->ovnsb_idl) { + /* Table 32, priority 150. + * ======================= + * + * Multicast packets that should not be sent to other hypervisors. + */ + struct match match = MATCH_CATCHALL_INITIALIZER; + match_set_metadata(&match, htonll(mc->datapath->tunnel_key)); + match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, mc->tunnel_key); + match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, + MLF_LOCAL_ONLY, MLF_LOCAL_ONLY); + ofpbuf_clear(&ofpacts); + put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts); + ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 150, 0, &match, + &ofpacts); + consider_mc_group(mff_ovn_geneve, ct_zones, local_datapaths, chassis, mc, &ofpacts, &remote_ofpacts, flow_table); } diff --git a/ovn/lib/logical-fields.h b/ovn/lib/logical-fields.h index 696c529be..b1dbb035a 100644 --- a/ovn/lib/logical-fields.h +++ b/ovn/lib/logical-fields.h @@ -49,6 +49,7 @@ enum mff_log_flags_bits { MLF_RCV_FROM_VXLAN_BIT = 1, MLF_FORCE_SNAT_FOR_DNAT_BIT = 2, MLF_FORCE_SNAT_FOR_LB_BIT = 3, + MLF_LOCAL_ONLY_BIT = 4, }; /* MFF_LOG_FLAGS_REG flag assignments */ @@ -69,6 +70,11 @@ enum mff_log_flags { /* Indicate that a packet needs a force SNAT in the gateway router when * load-balancing has taken place. */ MLF_FORCE_SNAT_FOR_LB = (1 << MLF_FORCE_SNAT_FOR_LB_BIT), + + /* Indicate that a packet that should be distributed across multiple + * hypervisors should instead only be output to local targets + */ + MLF_LOCAL_ONLY = (1 << MLF_LOCAL_ONLY_BIT), }; #endif /* ovn/lib/logical-fields.h */ diff --git a/ovn/ovn-architecture.7.xml b/ovn/ovn-architecture.7.xml index b13b41177..808b9329e 100644 --- a/ovn/ovn-architecture.7.xml +++ b/ovn/ovn-architecture.7.xml @@ -1027,6 +1027,16 @@ their traffic should never go out through a tunnel.
  • + A higher-priority rule to match packets that have the MLF_LOCAL_ONLY + logical flow flag set, and whose destination is a multicast address. + This flag indicates that the packet should not be delivered to remote + hypervisors, even if the multicast destination includes ports on + remote hypervisors. This flag is used when + ovn-controller is the originator of the multicast packet. + Since each ovn-controller instance is originating these + packets, the packets only need to be delivered to local ports. +
  • +
  • A fallback flow that resubmits to table 33 if there is no other match.
  • From patchwork Wed Nov 29 21:59:48 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mark Michelson X-Patchwork-Id: 842781 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=openvswitch.org (client-ip=140.211.169.12; helo=mail.linuxfoundation.org; envelope-from=ovs-dev-bounces@openvswitch.org; receiver=) Received: from mail.linuxfoundation.org (mail.linuxfoundation.org [140.211.169.12]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 3ynDy16dkjz9s7g for ; Thu, 30 Nov 2017 09:01:13 +1100 (AEDT) Received: from mail.linux-foundation.org (localhost [127.0.0.1]) by mail.linuxfoundation.org (Postfix) with ESMTP id 10F58D63; Wed, 29 Nov 2017 22:00:05 +0000 (UTC) X-Original-To: dev@openvswitch.org Delivered-To: ovs-dev@mail.linuxfoundation.org Received: from smtp1.linuxfoundation.org (smtp1.linux-foundation.org [172.17.192.35]) by mail.linuxfoundation.org (Postfix) with ESMTPS id 33EF9D5B for ; Wed, 29 Nov 2017 22:00:02 +0000 (UTC) X-Greylist: domain auto-whitelisted by SQLgrey-1.7.6 Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by smtp1.linuxfoundation.org (Postfix) with ESMTPS id 753741B4 for ; Wed, 29 Nov 2017 22:00:00 +0000 (UTC) Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 0A98AC0587D2 for ; Wed, 29 Nov 2017 22:00:00 +0000 (UTC) Received: from monae.redhat.com (ovpn-122-239.rdu2.redhat.com [10.10.122.239]) by smtp.corp.redhat.com (Postfix) with ESMTP id B03B65D6A3 for ; Wed, 29 Nov 2017 21:59:59 +0000 (UTC) From: Mark Michelson To: dev@openvswitch.org Date: Wed, 29 Nov 2017 15:59:48 -0600 Message-Id: <20171129215948.4879-3-mmichels@redhat.com> In-Reply-To: <20171129215948.4879-1-mmichels@redhat.com> References: <20171129215948.4879-1-mmichels@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.15 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.32]); Wed, 29 Nov 2017 22:00:00 +0000 (UTC) X-Spam-Status: No, score=-6.9 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI, T_RP_MATCHES_RCVD autolearn=ham version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on smtp1.linux-foundation.org Subject: [ovs-dev] [PATCH v6 2/2] OVN: Add support for periodic router advertisements. X-BeenThere: ovs-dev@openvswitch.org X-Mailman-Version: 2.1.12 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Sender: ovs-dev-bounces@openvswitch.org Errors-To: ovs-dev-bounces@openvswitch.org This change adds three new options to the Northbound Logical_Router_Port's ipv6_ra_configs option: * send_periodic: If set to "true", then OVN will send periodic router advertisements out of this router port. * max_interval: The maximum amount of time to wait between sending periodic router advertisements. * min_interval: The minimum amount of time to wait between sending periodic router advertisements. When send_periodic is true, then IPv6 RA configs, as well as some layer 2 and layer 3 information about the router port, are copied to the southbound database. From there, ovn-controller can use this information to know when to send periodic RAs and what to send in them. Because periodic RAs originate from each ovn-controller, the new keep-local flag is set on the packet so that ports don't receive an overabundance of RAs. Signed-off-by: Mark Michelson --- lib/packets.c | 4 +- lib/packets.h | 7 ++ ovn/controller/pinctrl.c | 300 +++++++++++++++++++++++++++++++++++++++++++++++ ovn/northd/ovn-northd.c | 75 +++++++++++- ovn/ovn-nb.xml | 20 ++++ tests/ovn-northd.at | 110 +++++++++++++++++ tests/ovn.at | 150 ++++++++++++++++++++++++ 7 files changed, 663 insertions(+), 3 deletions(-) diff --git a/lib/packets.c b/lib/packets.c index f58937085..fcf0ba03a 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -1644,7 +1644,7 @@ compose_nd_ra(struct dp_packet *b, const struct in6_addr *ipv6_src, const struct in6_addr *ipv6_dst, uint8_t cur_hop_limit, uint8_t mo_flags, ovs_be16 router_lt, ovs_be32 reachable_time, - ovs_be32 retrans_timer, ovs_be32 mtu) + ovs_be32 retrans_timer, uint32_t mtu) { /* Don't compose Router Advertisement packet with MTU Option if mtu * value is 0. */ @@ -1676,7 +1676,7 @@ compose_nd_ra(struct dp_packet *b, mtu_opt->type = ND_OPT_MTU; mtu_opt->len = 1; mtu_opt->reserved = 0; - put_16aligned_be32(&mtu_opt->mtu, mtu); + put_16aligned_be32(&mtu_opt->mtu, htonl(mtu)); } ra->icmph.icmp6_cksum = 0; diff --git a/lib/packets.h b/lib/packets.h index 13ea46da2..3daf33b9b 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -976,6 +976,7 @@ BUILD_ASSERT_DECL(ND_PREFIX_OPT_LEN == sizeof(struct ovs_nd_prefix_opt)); /* Neighbor Discovery option: MTU. */ #define ND_MTU_OPT_LEN 8 +#define ND_MTU_DEFAULT 0 struct ovs_nd_mtu_opt { uint8_t type; /* ND_OPT_MTU */ uint8_t len; /* Always 1. */ @@ -1015,6 +1016,12 @@ BUILD_ASSERT_DECL(RA_MSG_LEN == sizeof(struct ovs_ra_msg)); #define ND_RA_MANAGED_ADDRESS 0x80 #define ND_RA_OTHER_CONFIG 0x40 +/* Defaults based on MaxRtrInterval and MinRtrInterval from RFC 4861 section + * 6.2.1 + */ +#define ND_RA_MAX_INTERVAL_DEFAULT 600 +#define ND_RA_MIN_INTERVAL_DEFAULT(max) ((max) >= 9 ? (max) / 3 : (max) * 3 / 4) + /* * Use the same struct for MLD and MLD2, naming members as the defined fields in * in the corresponding version of the protocol, though they are reserved in the diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c index 17b173147..cf414b8f2 100644 --- a/ovn/controller/pinctrl.c +++ b/ovn/controller/pinctrl.c @@ -48,6 +48,7 @@ #include "socket-util.h" #include "timeval.h" #include "vswitch-idl.h" +#include "lflow.h" VLOG_DEFINE_THIS_MODULE(pinctrl); @@ -88,6 +89,11 @@ static void pinctrl_handle_put_nd_ra_opts( static void pinctrl_handle_nd_ns(const struct flow *ip_flow, const struct match *md, struct ofpbuf *userdata); +static void init_ipv6_ras(void); +static void destroy_ipv6_ras(void); +static void ipv6_ra_wait(void); +static void send_ipv6_ras(const struct controller_ctx *, + struct hmap *local_datapaths); COVERAGE_DEFINE(pinctrl_drop_put_mac_binding); @@ -98,6 +104,7 @@ pinctrl_init(void) conn_seq_no = 0; init_put_mac_bindings(); init_send_garps(); + init_ipv6_ras(); } static ovs_be32 @@ -1083,6 +1090,297 @@ pinctrl_run(struct controller_ctx *ctx, run_put_mac_bindings(ctx); send_garp_run(ctx, br_int, chassis, chassis_index, local_datapaths, active_tunnels); + send_ipv6_ras(ctx, local_datapaths); +} + +/* Table of ipv6_ra_state structures, keyed on logical port name */ +static struct shash ipv6_ras; + +/* Next IPV6 RA in seconds. */ +static long long int send_ipv6_ra_time; + +struct ipv6_ra_config { + time_t min_interval; + time_t max_interval; + struct eth_addr eth_src; + struct eth_addr eth_dst; + struct in6_addr ipv6_src; + struct in6_addr ipv6_dst; + int32_t mtu; + uint8_t mo_flags; /* Managed/Other flags for RAs */ + uint8_t la_flags; /* On-link/autonomous flags for address prefixes */ + struct lport_addresses prefixes; +}; + +struct ipv6_ra_state { + long long int next_announce; + struct ipv6_ra_config *config; + int64_t port_key; + int64_t metadata; + bool delete_me; +}; + +static void +init_ipv6_ras(void) +{ + shash_init(&ipv6_ras); + send_ipv6_ra_time = LLONG_MAX; +} + +static void +ipv6_ra_config_delete(struct ipv6_ra_config *config) +{ + if (config) { + destroy_lport_addresses(&config->prefixes); + free(config); + } +} + +static void +ipv6_ra_delete(struct ipv6_ra_state *ra) +{ + if (ra) { + ipv6_ra_config_delete(ra->config); + free(ra); + } +} + +static void +destroy_ipv6_ras(void) +{ + struct shash_node *iter, *next; + SHASH_FOR_EACH_SAFE (iter, next, &ipv6_ras) { + struct ipv6_ra_state *ra = iter->data; + ipv6_ra_delete(ra); + shash_delete(&ipv6_ras, iter); + } + shash_destroy(&ipv6_ras); +} + +static struct ipv6_ra_config * +ipv6_ra_update_config(const struct sbrec_port_binding *pb) +{ + struct ipv6_ra_config *config; + + config = xzalloc(sizeof *config); + + config->max_interval = smap_get_int(&pb->options, "ipv6_ra_max_interval", + ND_RA_MAX_INTERVAL_DEFAULT); + config->min_interval = smap_get_int(&pb->options, "ipv6_ra_min_interval", + ND_RA_MIN_INTERVAL_DEFAULT(config->max_interval)); + config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", ND_MTU_DEFAULT); + config->la_flags = ND_PREFIX_ON_LINK; + + const char *address_mode = smap_get(&pb->options, "ipv6_ra_address_mode"); + if (!address_mode) { + VLOG_WARN("No address mode specified"); + goto fail; + } + if (!strcmp(address_mode, "dhcpv6_stateless")) { + config->mo_flags = IPV6_ND_RA_FLAG_OTHER_ADDR_CONFIG; + } else if (!strcmp(address_mode, "dhcpv6_stateful")) { + config->mo_flags = IPV6_ND_RA_FLAG_MANAGED_ADDR_CONFIG; + } else if (!strcmp(address_mode, "slaac")) { + config->la_flags |= ND_PREFIX_AUTONOMOUS_ADDRESS; + } else { + VLOG_WARN("Invalid address mode %s", address_mode); + goto fail; + } + + const char *prefixes = smap_get(&pb->options, "ipv6_ra_prefixes"); + if (prefixes && !extract_ip_addresses(prefixes, &config->prefixes)) { + VLOG_WARN("Invalid IPv6 prefixes: %s", prefixes); + goto fail; + } + + /* All nodes multicast addresses */ + config->eth_dst = ETH_ADDR_C(33,33,00,00,00,01); + ipv6_parse("ff02::1", &config->ipv6_dst); + + const char *eth_addr = smap_get(&pb->options, "ipv6_ra_src_eth"); + if (!eth_addr || !eth_addr_from_string(eth_addr, &config->eth_src)) { + VLOG_WARN("Invalid ethernet source %s", eth_addr); + goto fail; + } + const char *ip_addr = smap_get(&pb->options, "ipv6_ra_src_addr"); + if (!ip_addr || !ipv6_parse(ip_addr, &config->ipv6_src)) { + VLOG_WARN("Invalid IP source %s", ip_addr); + goto fail; + } + + return config; + +fail: + ipv6_ra_config_delete(config); + return NULL; +} + +static long long int +ipv6_ra_calc_next_announce(time_t min_interval, time_t max_interval) +{ + long long int min_interval_ms = min_interval * 1000LL; + long long int max_interval_ms = max_interval * 1000LL; + + return time_msec() + min_interval_ms + + random_range(max_interval_ms - min_interval_ms); +} + +static void +put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits, + struct ofpbuf *ofpacts) +{ + struct ofpact_set_field *sf = ofpact_put_set_field(ofpacts, + mf_from_id(dst), NULL, + NULL); + ovs_be64 n_value = htonll(value); + bitwise_copy(&n_value, 8, 0, sf->value, sf->field->n_bytes, ofs, n_bits); + bitwise_one(ofpact_set_field_mask(sf), sf->field->n_bytes, ofs, n_bits); +} + +static long long int +ipv6_ra_send(struct ipv6_ra_state *ra) +{ + if (time_msec() < ra->next_announce) { + return ra->next_announce; + } + + uint64_t packet_stub[128 / 8]; + struct dp_packet packet; + dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); + compose_nd_ra(&packet, ra->config->eth_src, ra->config->eth_dst, + &ra->config->ipv6_src, &ra->config->ipv6_dst, + 255, ra->config->mo_flags, 0, 0, 0, ra->config->mtu); + + for (int i = 0; i < ra->config->prefixes.n_ipv6_addrs; i++) { + ovs_be128 addr; + memcpy(&addr, &ra->config->prefixes.ipv6_addrs[i].addr, sizeof addr); + packet_put_ra_prefix_opt(&packet, + ra->config->prefixes.ipv6_addrs[i].plen, + ra->config->la_flags, htonl(IPV6_ND_RA_OPT_PREFIX_VALID_LIFETIME), + htonl(IPV6_ND_RA_OPT_PREFIX_PREFERRED_LIFETIME), addr); + } + + uint64_t ofpacts_stub[4096 / 8]; + struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); + + /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */ + uint32_t dp_key = ra->metadata; + uint32_t port_key = ra->port_key; + put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts); + put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts); + put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts); + struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); + resubmit->in_port = OFPP_CONTROLLER; + resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE; + + struct ofputil_packet_out po = { + .packet = dp_packet_data(&packet), + .packet_len = dp_packet_size(&packet), + .buffer_id = UINT32_MAX, + .ofpacts = ofpacts.data, + .ofpacts_len = ofpacts.size, + }; + + match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); + enum ofp_version version = rconn_get_version(swconn); + enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version); + queue_msg(ofputil_encode_packet_out(&po, proto)); + dp_packet_uninit(&packet); + ofpbuf_uninit(&ofpacts); + + ra->next_announce = ipv6_ra_calc_next_announce(ra->config->min_interval, + ra->config->max_interval); + + return ra->next_announce; +} + +static void +ipv6_ra_wait(void) +{ + poll_timer_wait_until(send_ipv6_ra_time); +} + +static void +send_ipv6_ras(const struct controller_ctx *ctx, struct hmap *local_datapaths) +{ + struct shash_node *iter, *iter_next; + + send_ipv6_ra_time = LLONG_MAX; + + SHASH_FOR_EACH (iter, &ipv6_ras) { + struct ipv6_ra_state *ra = iter->data; + ra->delete_me = true; + } + + const struct local_datapath *ld; + HMAP_FOR_EACH (ld, hmap_node, local_datapaths) { + struct sbrec_port_binding *lpval; + const struct sbrec_port_binding *pb; + struct ovsdb_idl_index_cursor cursor; + + lpval = sbrec_port_binding_index_init_row(ctx->ovnsb_idl, + &sbrec_table_port_binding); + sbrec_port_binding_index_set_datapath(lpval, ld->datapath); + ovsdb_idl_initialize_cursor(ctx->ovnsb_idl, &sbrec_table_port_binding, + "lport-by-datapath", &cursor); + SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, lpval) { + if (!smap_get_bool(&pb->options, "ipv6_ra_send_periodic", false)) { + continue; + } + + const char *peer_s = smap_get(&pb->options, "peer"); + if (!peer_s) { + continue; + } + + const struct sbrec_port_binding *peer + = lport_lookup_by_name(ctx->ovnsb_idl, peer_s); + if (!peer) { + continue; + } + + struct ipv6_ra_config *config = ipv6_ra_update_config(pb); + if (!config) { + continue; + } + + struct ipv6_ra_state *ra + = shash_find_data(&ipv6_ras, pb->logical_port); + if (!ra) { + ra = xzalloc(sizeof *ra); + ra->config = config; + ra->next_announce = ipv6_ra_calc_next_announce( + ra->config->min_interval, + ra->config->max_interval); + shash_add(&ipv6_ras, pb->logical_port, ra); + } else { + ipv6_ra_config_delete(ra->config); + ra->config = config; + } + + /* Peer is the logical switch port that the logical + * router port is connected to. The RA is injected + * into that logical switch port. + */ + ra->port_key = peer->tunnel_key; + ra->metadata = peer->datapath->tunnel_key; + ra->delete_me = false; + + long long int next_ra = ipv6_ra_send(ra); + if (send_ipv6_ra_time > next_ra) { + send_ipv6_ra_time = next_ra; + } + } + } + + /* Remove those that are no longer in the SB database */ + SHASH_FOR_EACH_SAFE (iter, iter_next, &ipv6_ras) { + struct ipv6_ra_state *ra = iter->data; + if (ra->delete_me) { + shash_delete(&ipv6_ras, iter); + ipv6_ra_delete(ra); + } + } } void @@ -1092,6 +1390,7 @@ pinctrl_wait(struct controller_ctx *ctx) rconn_run_wait(swconn); rconn_recv_wait(swconn); send_garp_wait(); + ipv6_ra_wait(); } void @@ -1100,6 +1399,7 @@ pinctrl_destroy(void) rconn_destroy(swconn); destroy_put_mac_bindings(); destroy_send_garps(); + destroy_ipv6_ras(); } /* Implementation of the "put_arp" and "put_nd" OVN actions. These diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c index 7e6b1d9a1..fc14dc8c3 100644 --- a/ovn/northd/ovn-northd.c +++ b/ovn/northd/ovn-northd.c @@ -1941,7 +1941,14 @@ ovn_port_update_sbrec(struct northd_context *ctx, sbrec_port_binding_set_parent_port(op->sb, NULL); sbrec_port_binding_set_tag(op->sb, NULL, 0); - sbrec_port_binding_set_mac(op->sb, NULL, 0); + + struct ds s = DS_EMPTY_INITIALIZER; + ds_put_cstr(&s, op->nbrp->mac); + for (int i = 0; i < op->nbrp->n_networks; ++i) { + ds_put_format(&s, " %s", op->nbrp->networks[i]); + } + const char *addresses = ds_cstr(&s); + sbrec_port_binding_set_mac(op->sb, &addresses, 1); struct smap ids = SMAP_INITIALIZER(&ids); sbrec_port_binding_set_external_ids(op->sb, &ids); @@ -4453,6 +4460,67 @@ add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, ds_destroy(&undnat_match); } +#define ND_RA_MAX_INTERVAL_MAX 1800 +#define ND_RA_MAX_INTERVAL_MIN 4 + +#define ND_RA_MIN_INTERVAL_MAX(max) ((max) * 3 / 4) +#define ND_RA_MIN_INTERVAL_MIN 3 + +static void +copy_ra_to_sb(struct ovn_port *op, const char *address_mode) +{ + struct smap options; + smap_clone(&options, &op->sb->options); + + smap_add(&options, "ipv6_ra_send_periodic", "true"); + smap_add(&options, "ipv6_ra_address_mode", address_mode); + + int max_interval = smap_get_int(&op->nbrp->ipv6_ra_configs, + "max_interval", ND_RA_MAX_INTERVAL_DEFAULT); + if (max_interval > ND_RA_MAX_INTERVAL_MAX) { + max_interval = ND_RA_MAX_INTERVAL_MAX; + } + if (max_interval < ND_RA_MAX_INTERVAL_MIN) { + max_interval = ND_RA_MAX_INTERVAL_MIN; + } + smap_add_format(&options, "ipv6_ra_max_interval", "%d", max_interval); + + int min_interval = smap_get_int(&op->nbrp->ipv6_ra_configs, + "min_interval", ND_RA_MIN_INTERVAL_DEFAULT(max_interval)); + if (min_interval > ND_RA_MIN_INTERVAL_MAX(max_interval)) { + min_interval = ND_RA_MIN_INTERVAL_MAX(max_interval); + } + if (min_interval < ND_RA_MIN_INTERVAL_MIN) { + min_interval = ND_RA_MIN_INTERVAL_MIN; + } + smap_add_format(&options, "ipv6_ra_min_interval", "%d", min_interval); + + int mtu = smap_get_int(&op->nbrp->ipv6_ra_configs, "mtu", ND_MTU_DEFAULT); + /* RFC 2460 requires the MTU for IPv6 to be at least 1280 */ + if (mtu && mtu >= 1280) { + smap_add_format(&options, "ipv6_ra_mtu", "%d", mtu); + } + + struct ds s = DS_EMPTY_INITIALIZER; + for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; ++i) { + struct ipv6_netaddr *addrs = &op->lrp_networks.ipv6_addrs[i]; + if (in6_is_lla(&addrs->network)) { + smap_add(&options, "ipv6_ra_src_addr", addrs->addr_s); + continue; + } + ds_put_format(&s, "%s/%u ", addrs->network_s, addrs->plen); + } + /* Remove trailing space */ + ds_chomp(&s, ' '); + smap_add(&options, "ipv6_ra_prefixes", ds_cstr(&s)); + ds_destroy(&s); + + smap_add(&options, "ipv6_ra_src_eth", op->lrp_networks.ea_s); + + sbrec_port_binding_set_options(op->sb, &options); + smap_destroy(&options); +} + static void build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, struct hmap *lflows) @@ -5464,6 +5532,11 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, continue; } + if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic", + false)) { + copy_ra_to_sb(op, address_mode); + } + ds_clear(&match); ds_put_format(&match, "inport == %s && ip6.dst == ff02::2 && nd_rs", op->json_key); diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml index 1091c05ce..20166becb 100644 --- a/ovn/ovn-nb.xml +++ b/ovn/ovn-nb.xml @@ -1383,6 +1383,26 @@ Per RFC 2460, the mtu value is recommended no less than 1280, so any mtu value less than 1280 will be considered as no MTU Option. + + + If set to true, then this router interface will send router + advertisements periodically. The default is false. + + + + The maximum number of seconds to wait between sending periodic router + advertisements. This option has no effect if is false. The default + is 600. + + + + The minimum number of seconds to wait between sending periodic router + advertisements. This option has no effect if is false. The default + is one-third of , + i.e. 200 seconds if that key is unset. + diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index 954e25942..b2bb7e307 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -152,3 +152,113 @@ ovn-nbctl lsp-set-options S1-R1 router-port=R1-S1 AT_CHECK([test x`ovn-nbctl lsp-get-up S1-R1` = xup]) AT_CLEANUP + +AT_SETUP([ovn -- check IPv6 RA config propagation to SBDB]) +ovn_start + +ovn-nbctl lr-add ro +ovn-nbctl lrp-add ro ro-sw 00:00:00:00:00:01 aef0::1/64 +ovn-nbctl ls-add sw +ovn-nbctl lsp-add sw sw-ro +ovn-nbctl lsp-set-type sw-ro router +ovn-nbctl lsp-set-options sw-ro router-port=ro-sw +ovn-nbctl lsp-set-addresses sw-ro 00:00:00:00:00:01 +ovn-nbctl set Logical_Router_Port ro-sw ipv6_ra_configs:send_periodic=true +ovn-nbctl set Logical_Router_Port ro-sw ipv6_ra_configs:address_mode=slaac +ovn-nbctl --wait=sb set Logical_Router_Port ro-sw ipv6_ra_configs:mtu=1280 + +uuid=$(ovn-sbctl --columns=_uuid --bare find Port_Binding logical_port=ro-sw) + +AT_CHECK([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_send_periodic], +[0], ["true" +]) +AT_CHECK([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_address_mode], +[0], [slaac +]) +AT_CHECK([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_max_interval], +[0], ["600" +]) +AT_CHECK([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_min_interval], +[0], ["200" +]) +AT_CHECK([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_mtu], +[0], ["1280" +]) +AT_CHECK([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_src_eth], +[0], ["00:00:00:00:00:01" +]) +AT_CHECK([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_src_addr], +[0], ["fe80::200:ff:fe00:1" +]) +AT_CHECK([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_prefixes], +[0], ["aef0::/64" +]) + +ovn-nbctl set Logical_Router_Port ro-sw ipv6_ra_configs:max_interval=300 +ovn-nbctl --wait=sb set Logical_Router_Port ro-sw ipv6_ra_configs:min_interval=600 + +AT_CHECK([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_max_interval], +[0], ["300" +]) +AT_CHECK([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_min_interval], +[0], ["225" +]) + +ovn-nbctl set Logical_Router_Port ro-sw ipv6_ra_configs:max_interval=300 +ovn-nbctl --wait=sb set Logical_Router_Port ro-sw ipv6_ra_configs:min_interval=250 + +AT_CHECK([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_max_interval], +[0], ["300" +]) +AT_CHECK([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_min_interval], +[0], ["225" +]) + +ovn-nbctl set Logical_Router_Port ro-sw ipv6_ra_configs:max_interval=0 +ovn-nbctl --wait=sb set Logical_Router_Port ro-sw ipv6_ra_configs:min_interval=0 + +AT_CHECK([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_max_interval], +[0], ["4" +]) +AT_CHECK([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_min_interval], +[0], ["3" +]) + +ovn-nbctl set Logical_Router_Port ro-sw ipv6_ra_configs:max_interval=3600 +ovn-nbctl --wait=sb set Logical_Router_Port ro-sw ipv6_ra_configs:min_interval=2400 + +AT_CHECK([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_max_interval], +[0], ["1800" +]) +AT_CHECK([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_min_interval], +[0], ["1350" +]) + +ovn-nbctl --wait=sb set Logical_Router_port ro-sw ipv6_ra_configs:send_periodic=false + +AT_CHECK_UNQUOTED([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_send_periodic], +[1], [], [ovn-sbctl: no key "ipv6_ra_send_periodic" in Port_Binding record "${uuid}" column options +]) +AT_CHECK_UNQUOTED([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_max_interval], +[1], [], [ovn-sbctl: no key "ipv6_ra_max_interval" in Port_Binding record "${uuid}" column options +]) +AT_CHECK_UNQUOTED([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_min_interval], +[1], [], [ovn-sbctl: no key "ipv6_ra_min_interval" in Port_Binding record "${uuid}" column options +]) +AT_CHECK_UNQUOTED([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_mtu], +[1], [], [ovn-sbctl: no key "ipv6_ra_mtu" in Port_Binding record "${uuid}" column options +]) +AT_CHECK_UNQUOTED([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_address_mode], +[1], [], [ovn-sbctl: no key "ipv6_ra_address_mode" in Port_Binding record "${uuid}" column options +]) +AT_CHECK_UNQUOTED([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_src_eth], +[1], [], [ovn-sbctl: no key "ipv6_ra_src_eth" in Port_Binding record "${uuid}" column options +]) +AT_CHECK_UNQUOTED([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_src_addr], +[1], [], [ovn-sbctl: no key "ipv6_ra_src_addr" in Port_Binding record "${uuid}" column options +]) +AT_CHECK_UNQUOTED([ovn-sbctl get Port_Binding ${uuid} options:ipv6_ra_prefixes], +[1], [], [ovn-sbctl: no key "ipv6_ra_prefixes" in Port_Binding record "${uuid}" column options +]) + +AT_CLEANUP diff --git a/tests/ovn.at b/tests/ovn.at index b6a83cc22..2a8b5a5d9 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -9060,3 +9060,153 @@ AT_CHECK([test x$(ovn-sbctl --bare --columns chassis find port_binding logical_p OVN_CLEANUP([hv1]) AT_CLEANUP + +AT_SETUP([ovn -- IPv6 periodic RA]) +ovn_start + +# This test sets up two hypervisors. +# hv1 and hv2 run ovn-controllers, and +# each has a VIF connected to the same +# logical switch in OVN. The logical +# switch is connected to a logical +# router port that is configured to send +# periodic router advertisements. +# +# The reason for having two ovn-controller +# hypervisors is to ensure that the +# periodic RAs being sent by each ovn-controller +# are kept to their local hypervisors. If the +# packets are not kept local, then each port +# will receive too many RAs. + +net_add n1 +sim_add hv1 +sim_add hv2 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.2 +as hv2 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.3 + +ovn-nbctl lr-add ro +ovn-nbctl lrp-add ro ro-sw 00:00:00:00:00:01 aef0::1/64 + +ovn-nbctl ls-add sw +ovn-nbctl lsp-add sw sw-ro +ovn-nbctl lsp-set-type sw-ro router +ovn-nbctl lsp-set-options sw-ro router-port=ro-sw +ovn-nbctl lsp-set-addresses sw-ro 00:00:00:00:00:01 +ovn-nbctl lsp-add sw sw-p1 +ovn-nbctl lsp-set-addresses sw-p1 "00:00:00:00:00:02 aef0::200:ff:fe00:2" +ovn-nbctl lsp-add sw sw-p2 +ovn-nbctl lsp-set-addresses sw-p2 "00:00:00:00:00:03 aef0::200:ff:fe00:3" + +ovn-nbctl set Logical_Router_Port ro-sw ipv6_ra_configs:send_periodic=true +ovn-nbctl set Logical_Router_Port ro-sw ipv6_ra_configs:address_mode=slaac +ovn-nbctl set Logical_Router_Port ro-sw ipv6_ra_configs:max_interval=4 +ovn-nbctl set Logical_Router_Port ro-sw ipv6_ra_configs:min_interval=3 + +for i in hv1 hv2 ; do + as $i + ovs-vsctl -- add-port br-int $i-vif1 -- \ + set interface $i-vif1 external-ids:iface-id=sw-p1 \ + options:tx_pcap=$i/vif1-tx.pcap \ + options:rxq_pcap=$i/vif1-rx.pcap \ + ofport-request=1 +done + +# Allow time for ovn-northd and ovn-controller to catch up +sleep 1 + +reset_pcap_file() { + local iface=$1 + local pcap_file=$2 + ovs-vsctl -- set Interface $iface options:tx_pcap=dummy-tx.pcap \ +options:rxq_pcap=dummy-rx.pcap + rm -f ${pcap_file}*.pcap + ovs-vsctl -- set Interface $iface options:tx_pcap=${pcap_file}-tx.pcap \ +options:rxq_pcap=${pcap_file}-rx.pcap + +} + +construct_expected_ra() { + local src_mac=000000000001 + local dst_mac=333300000001 + local src_addr=fe80000000000000020000fffe000001 + local dst_addr=ff020000000000000000000000000001 + + local mtu=$1 + local ra_mo=$2 + local ra_prefix_la=$3 + + local slla=0101${src_mac} + local mtu_opt="" + if test $mtu != 0; then + mtu_opt=05010000${mtu} + fi + shift 3 + + local prefix="" + while [[ $# -gt 0 ]] ; do + local size=$1 + local net=$2 + prefix=${prefix}0304${size}${ra_prefix_la}ffffffffffffffff00000000${net} + shift 2 + done + + local ra=ff${ra_mo}00000000000000000000${slla}${mtu_opt}${prefix} + local icmp=8600XXXX${ra} + + local ip_len=$(expr ${#icmp} / 2) + ip_len=$(printf "%0.4x" ${ip_len}) + + local ip=60000000${ip_len}3aff${src_addr}${dst_addr}${icmp} + local eth=${dst_mac}${src_mac}86dd${ip} + local packet=${eth} + echo $packet >> expected +} + +ra_test() { + construct_expected_ra $@ + + for i in hv1 hv2 ; do + OVS_WAIT_WHILE([test 24 = $(wc -c $i/vif1-tx.pcap | cut -d " " -f1)]) + + $PYTHON "$top_srcdir/utilities/ovs-pcap.in" $i/vif1-tx.pcap > packets + + cat expected | cut -c -112 > expout + AT_CHECK([cat packets | cut -c -112], [0], [expout]) + + # Skip ICMPv6 checksum. + cat expected | cut -c 117- > expout + AT_CHECK([cat packets | cut -c 117-], [0], [expout]) + + rm -f packets + as $i reset_pcap_file $i-vif1 $i/vif1 + done + + rm -f expected +} + +# Baseline test with no MTU +ra_test 0 00 c0 40 aef00000000000000000000000000000 + +# Now make sure an MTU option makes it +ovn-nbctl --wait=hv set Logical_Router_Port ro-sw ipv6_ra_configs:mtu=1500 +ra_test 000005dc 00 c0 40 aef00000000000000000000000000000 + +# Now test for multiple network prefixes +ovn-nbctl --wait=hv set Logical_Router_port ro-sw networks='aef0\:\:1/64 fd0f\:\:1/48' +ra_test 000005dc 00 c0 40 aef00000000000000000000000000000 30 fd0f0000000000000000000000000000 + +# Test a different address mode now +ovn-nbctl --wait=hv set Logical_Router_Port ro-sw ipv6_ra_configs:address_mode=dhcpv6_stateful +ra_test 000005dc 80 80 40 aef00000000000000000000000000000 30 fd0f0000000000000000000000000000 + +# And the other address mode +ovn-nbctl --wait=hv set Logical_Router_Port ro-sw ipv6_ra_configs:address_mode=dhcpv6_stateless +ra_test 000005dc 40 80 40 aef00000000000000000000000000000 30 fd0f0000000000000000000000000000 + +OVN_CLEANUP([hv1],[hv2]) +AT_CLEANUP