@@ -67,6 +67,7 @@ struct ovn_extend_table;
OVNACT(ICMP4, ovnact_nest) \
OVNACT(ICMP4_ERROR, ovnact_nest) \
OVNACT(ICMP6, ovnact_nest) \
+ OVNACT(IGMP, ovnact_null) \
OVNACT(TCP_RESET, ovnact_nest) \
OVNACT(ND_NA, ovnact_nest) \
OVNACT(ND_NA_ROUTER, ovnact_nest) \
@@ -498,6 +499,12 @@ enum action_opcode {
/* "trigger_event (event_type)" */
ACTION_OPCODE_EVENT,
+
+ /* "igmp()".
+ *
+ * Snoop IGMP, learn the multicast participants
+ */
+ ACTION_OPCODE_IGMP,
};
/* Header. */
@@ -10,6 +10,8 @@ ovn_controller_ovn_controller_SOURCES = \
ovn/controller/encaps.h \
ovn/controller/ha-chassis.c \
ovn/controller/ha-chassis.h \
+ ovn/controller/ip-mcast.c \
+ ovn/controller/ip-mcast.h \
ovn/controller/lflow.c \
ovn/controller/lflow.h \
ovn/controller/lport.c \
new file mode 100644
@@ -0,0 +1,164 @@
+/* Copyright (c) 2019, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "ip-mcast.h"
+#include "lport.h"
+#include "ovn/lib/ovn-sb-idl.h"
+
+/*
+ * Used for (faster) updating of IGMP_Group ports.
+ */
+struct igmp_group_port {
+ struct hmap_node hmap_node;
+ const struct sbrec_port_binding *port;
+};
+
+struct ovsdb_idl_index *
+igmp_group_index_create(struct ovsdb_idl *idl)
+{
+ const struct ovsdb_idl_index_column cols[] = {
+ { .column = &sbrec_igmp_group_col_address },
+ { .column = &sbrec_igmp_group_col_datapath },
+ { .column = &sbrec_igmp_group_col_chassis },
+ };
+
+ return ovsdb_idl_index_create(idl, cols, ARRAY_SIZE(cols));
+}
+
+/* Looks up an IGMP group based on an IPv4 (mapped in IPv6) or IPv6 'address'
+ * and 'datapath'.
+ */
+const struct sbrec_igmp_group *
+igmp_group_lookup(struct ovsdb_idl_index *igmp_groups,
+ const struct in6_addr *address,
+ const struct sbrec_datapath_binding *datapath,
+ const struct sbrec_chassis *chassis)
+{
+ char addr_str[INET6_ADDRSTRLEN];
+
+ if (!ipv6_string_mapped(addr_str, address)) {
+ return NULL;
+ }
+
+ struct sbrec_igmp_group *target =
+ sbrec_igmp_group_index_init_row(igmp_groups);
+
+ sbrec_igmp_group_index_set_address(target, addr_str);
+ sbrec_igmp_group_index_set_datapath(target, datapath);
+ sbrec_igmp_group_index_set_chassis(target, chassis);
+
+ const struct sbrec_igmp_group *g =
+ sbrec_igmp_group_index_find(igmp_groups, target);
+ sbrec_igmp_group_index_destroy_row(target);
+ return g;
+}
+
+/* Creates and returns a new IGMP group based on an IPv4 (mapped in IPv6) or
+ * IPv6 'address', 'datapath' and 'chassis'.
+ */
+struct sbrec_igmp_group *
+igmp_group_create(struct ovsdb_idl_txn *idl_txn,
+ const struct in6_addr *address,
+ const struct sbrec_datapath_binding *datapath,
+ const struct sbrec_chassis *chassis)
+{
+ char addr_str[INET6_ADDRSTRLEN];
+
+ if (!ipv6_string_mapped(addr_str, address)) {
+ return NULL;
+ }
+
+ struct sbrec_igmp_group *g = sbrec_igmp_group_insert(idl_txn);
+
+ sbrec_igmp_group_set_address(g, addr_str);
+ sbrec_igmp_group_set_datapath(g, datapath);
+ sbrec_igmp_group_set_chassis(g, chassis);
+
+ return g;
+}
+
+void
+igmp_group_update_ports(const struct sbrec_igmp_group *g,
+ struct ovsdb_idl_index *datapaths,
+ struct ovsdb_idl_index *port_bindings,
+ const struct mcast_snooping *ms OVS_UNUSED,
+ const struct mcast_group *mc_group)
+ OVS_REQ_RDLOCK(ms->rwlock)
+{
+ struct igmp_group_port *old_ports_storage =
+ (g->n_ports ? xmalloc(g->n_ports * sizeof *old_ports_storage) : NULL);
+
+ struct hmap old_ports = HMAP_INITIALIZER(&old_ports);
+
+ for (size_t i = 0; i < g->n_ports; i++) {
+ struct igmp_group_port *old_port = &old_ports_storage[i];
+
+ old_port->port = g->ports[i];
+ hmap_insert(&old_ports, &old_port->hmap_node,
+ old_port->port->tunnel_key);
+ }
+
+ struct mcast_group_bundle *bundle;
+ uint64_t dp_key = g->datapath->tunnel_key;
+
+ LIST_FOR_EACH (bundle, bundle_node, &mc_group->bundle_lru) {
+ uint32_t port_key = (uintptr_t)bundle->port;
+ const struct sbrec_port_binding *sbrec_port =
+ lport_lookup_by_key(datapaths, port_bindings, dp_key, port_key);
+ if (!sbrec_port) {
+ continue;
+ }
+
+ struct hmap_node *node = hmap_first_with_hash(&old_ports, port_key);
+ if (!node) {
+ sbrec_igmp_group_update_ports_addvalue(g, sbrec_port);
+ } else {
+ hmap_remove(&old_ports, node);
+ }
+ }
+
+ struct igmp_group_port *igmp_port;
+ HMAP_FOR_EACH_POP (igmp_port, hmap_node, &old_ports) {
+ sbrec_igmp_group_update_ports_delvalue(g, igmp_port->port);
+ }
+
+ free(old_ports_storage);
+ hmap_destroy(&old_ports);
+}
+
+void
+igmp_group_delete(const struct sbrec_igmp_group *g)
+{
+ sbrec_igmp_group_delete(g);
+}
+
+bool
+igmp_group_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
+ struct ovsdb_idl_index *igmp_groups)
+{
+ const struct sbrec_igmp_group *g;
+
+ if (!ovnsb_idl_txn) {
+ return true;
+ }
+
+ SBREC_IGMP_GROUP_FOR_EACH_BYINDEX (g, igmp_groups) {
+ igmp_group_delete(g);
+ }
+
+ return true;
+}
new file mode 100644
@@ -0,0 +1,52 @@
+/* Copyright (c) 2019, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_IP_MCAST_H
+#define OVN_IP_MCAST_H 1
+
+#include "mcast-snooping.h"
+
+struct ovsdb_idl;
+struct ovsdb_idl_txn;
+
+struct sbrec_chassis;
+struct sbrec_datapath_binding;
+
+struct ovsdb_idl_index *igmp_group_index_create(struct ovsdb_idl *);
+const struct sbrec_igmp_group *igmp_group_lookup(
+ struct ovsdb_idl_index *igmp_groups,
+ const struct in6_addr *address,
+ const struct sbrec_datapath_binding *datapath,
+ const struct sbrec_chassis *chassis);
+
+struct sbrec_igmp_group *igmp_group_create(
+ struct ovsdb_idl_txn *idl_txn,
+ const struct in6_addr *address,
+ const struct sbrec_datapath_binding *datapath,
+ const struct sbrec_chassis *chassis);
+
+void igmp_group_update_ports(const struct sbrec_igmp_group *g,
+ struct ovsdb_idl_index *datapaths,
+ struct ovsdb_idl_index *port_bindings,
+ const struct mcast_snooping *ms,
+ const struct mcast_group *mc_group)
+ OVS_REQ_RDLOCK(ms->rwlock);
+
+void igmp_group_delete(const struct sbrec_igmp_group *g);
+
+bool igmp_group_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
+ struct ovsdb_idl_index *igmp_groups);
+
+#endif /* ovn/controller/ip-mcast.h */
@@ -33,6 +33,7 @@
#include "openvswitch/dynamic-string.h"
#include "encaps.h"
#include "fatal-signal.h"
+#include "ip-mcast.h"
#include "openvswitch/hmap.h"
#include "lflow.h"
#include "lib/vswitch-idl.h"
@@ -43,6 +44,7 @@
#include "ovn/actions.h"
#include "ovn/lib/chassis-index.h"
#include "ovn/lib/extend-table.h"
+#include "ovn/lib/ip-mcast-index.h"
#include "ovn/lib/ovn-sb-idl.h"
#include "ovn/lib/ovn-util.h"
#include "patch.h"
@@ -135,6 +137,10 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
*
* Monitor Controller_Event rows for local chassis.
*
+ * Monitor IP_Multicast for local datapaths.
+ *
+ * Monitor IGMP_Groups for local chassis.
+ *
* We always monitor patch ports because they allow us to see the linkages
* between related logical datapaths. That way, when we know that we have
* a VIF on a particular logical switch, we immediately know to monitor all
@@ -145,6 +151,8 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
struct ovsdb_idl_condition mg = OVSDB_IDL_CONDITION_INIT(&mg);
struct ovsdb_idl_condition dns = OVSDB_IDL_CONDITION_INIT(&dns);
struct ovsdb_idl_condition ce = OVSDB_IDL_CONDITION_INIT(&ce);
+ struct ovsdb_idl_condition ip_mcast = OVSDB_IDL_CONDITION_INIT(&ip_mcast);
+ struct ovsdb_idl_condition igmp = OVSDB_IDL_CONDITION_INIT(&igmp);
sbrec_port_binding_add_clause_type(&pb, OVSDB_F_EQ, "patch");
/* XXX: We can optimize this, if we find a way to only monitor
* ports that have a Gateway_Chassis that point's to our own
@@ -171,6 +179,8 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
sbrec_controller_event_add_clause_chassis(&ce, OVSDB_F_EQ,
&chassis->header_.uuid);
+ sbrec_igmp_group_add_clause_chassis(&igmp, OVSDB_F_EQ,
+ &chassis->header_.uuid);
}
if (local_ifaces) {
const char *name;
@@ -190,6 +200,8 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
sbrec_mac_binding_add_clause_datapath(&mb, OVSDB_F_EQ, uuid);
sbrec_multicast_group_add_clause_datapath(&mg, OVSDB_F_EQ, uuid);
sbrec_dns_add_clause_datapaths(&dns, OVSDB_F_INCLUDES, &uuid, 1);
+ sbrec_ip_multicast_add_clause_datapath(&ip_mcast, OVSDB_F_EQ,
+ uuid);
}
}
sbrec_port_binding_set_condition(ovnsb_idl, &pb);
@@ -198,12 +210,16 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
sbrec_multicast_group_set_condition(ovnsb_idl, &mg);
sbrec_dns_set_condition(ovnsb_idl, &dns);
sbrec_controller_event_set_condition(ovnsb_idl, &ce);
+ sbrec_ip_multicast_set_condition(ovnsb_idl, &ip_mcast);
+ sbrec_igmp_group_set_condition(ovnsb_idl, &igmp);
ovsdb_idl_condition_destroy(&pb);
ovsdb_idl_condition_destroy(&lf);
ovsdb_idl_condition_destroy(&mb);
ovsdb_idl_condition_destroy(&mg);
ovsdb_idl_condition_destroy(&dns);
ovsdb_idl_condition_destroy(&ce);
+ ovsdb_idl_condition_destroy(&ip_mcast);
+ ovsdb_idl_condition_destroy(&igmp);
}
static const char *
@@ -1747,6 +1763,10 @@ main(int argc, char *argv[])
= ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
&sbrec_mac_binding_col_logical_port,
&sbrec_mac_binding_col_ip);
+ struct ovsdb_idl_index *sbrec_ip_multicast
+ = ip_mcast_index_create(ovnsb_idl_loop.idl);
+ struct ovsdb_idl_index *sbrec_igmp_group
+ = igmp_group_index_create(ovnsb_idl_loop.idl);
ovsdb_idl_track_add_all(ovnsb_idl_loop.idl);
ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
@@ -1988,6 +2008,8 @@ main(int argc, char *argv[])
sbrec_port_binding_by_key,
sbrec_port_binding_by_name,
sbrec_mac_binding_by_lport_ip,
+ sbrec_igmp_group,
+ sbrec_ip_multicast,
sbrec_dns_table_get(ovnsb_idl_loop.idl),
sbrec_controller_event_table_get(
ovnsb_idl_loop.idl),
@@ -2121,6 +2143,7 @@ main(int argc, char *argv[])
done = binding_cleanup(ovnsb_idl_txn, port_binding_table, chassis);
done = chassis_cleanup(ovnsb_idl_txn, chassis) && done;
done = encaps_cleanup(ovs_idl_txn, br_int) && done;
+ done = igmp_group_cleanup(ovnsb_idl_txn, sbrec_igmp_group) && done;
if (done) {
poll_immediate_wake();
}
@@ -44,6 +44,7 @@
#include "ovn/actions.h"
#include "ovn/lex.h"
#include "ovn/lib/acl-log.h"
+#include "ovn/lib/ip-mcast-index.h"
#include "ovn/lib/ovn-l7.h"
#include "ovn/lib/ovn-util.h"
#include "ovn/logical-fields.h"
@@ -54,6 +55,7 @@
#include "timeval.h"
#include "vswitch-idl.h"
#include "lflow.h"
+#include "ip-mcast.h"
VLOG_DEFINE_THIS_MODULE(pinctrl);
@@ -105,6 +107,17 @@ VLOG_DEFINE_THIS_MODULE(pinctrl);
* the hmap - 'buffered_mac_bindings' and reinjects the
* buffered packets.
*
+ * - igmp - This action punts an IGMP packet to the controller
+ * which maintains multicast group information. The
+ * multicast groups (mcast_snoop_map) are synced to
+ * the 'IGMP_Group' table by ip_mcast_sync().
+ * ip_mcast_sync() also reads the 'IP_Multicast'
+ * (snooping and querier) configuration and builds a
+ * local configuration mcast_cfg_map.
+ * ip_mcast_snoop_run() which runs in the
+ * pinctrl_handler() thread configures the per datapath
+ * mcast_snoop_map entries according to mcast_cfg_map.
+ *
* pinctrl module also periodically sends IPv6 Router Solicitation requests
* and gARPs (for the router gateway IPs and configured NAT addresses).
*
@@ -122,6 +135,13 @@ VLOG_DEFINE_THIS_MODULE(pinctrl);
* pinctrl_handler() thread sends these gARPs using the
* shash 'send_garp_data'.
*
+ * IGMP Queries - pinctrl_run() prepares the IGMP queries (at most one
+ * per local datapath) based on the mcast_snoop_map
+ * contents and stores them in mcast_query_list.
+ *
+ * pinctrl_handler thread sends the periodic IGMP queries
+ * by walking the mcast_query_list.
+ *
* Notification between pinctrl_handler() and pinctrl_run()
* -------------------------------------------------------
* 'struct seq' is used for notification between pinctrl_handler() thread
@@ -131,8 +151,8 @@ VLOG_DEFINE_THIS_MODULE(pinctrl);
* in 'send_garp_data', 'ipv6_ras' and 'buffered_mac_bindings' structures.
*
* 'pinctrl_main_seq' is used by pinctrl_handler() thread to wake up
- * the main thread from poll_block() when mac bindings needs to be updated
- * in the Southboubd DB.
+ * the main thread from poll_block() when mac bindings/igmp groups need to
+ * be updated in the Southboubd DB.
* */
static struct ovs_mutex pinctrl_mutex = OVS_MUTEX_INITIALIZER;
@@ -226,6 +246,30 @@ static void prepare_ipv6_ras(
static void send_ipv6_ras(struct rconn *swconn,
long long int *send_ipv6_ra_time)
OVS_REQUIRES(pinctrl_mutex);
+
+static void ip_mcast_snoop_init(void);
+static void ip_mcast_snoop_destroy(void);
+static void ip_mcast_snoop_run(void)
+ OVS_REQUIRES(pinctrl_mutex);
+static void ip_mcast_querier_run(struct rconn *swconn,
+ long long int *query_time);
+static void ip_mcast_querier_wait(long long int query_time);
+static void ip_mcast_sync(
+ struct ovsdb_idl_txn *ovnsb_idl_txn,
+ const struct sbrec_chassis *chassis,
+ const struct hmap *local_datapaths,
+ struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
+ struct ovsdb_idl_index *sbrec_port_binding_by_key,
+ struct ovsdb_idl_index *sbrec_igmp_groups,
+ struct ovsdb_idl_index *sbrec_ip_multicast)
+ OVS_REQUIRES(pinctrl_mutex);
+static void pinctrl_ip_mcast_handle_igmp(
+ struct rconn *swconn,
+ const struct flow *ip_flow,
+ struct dp_packet *pkt_in,
+ const struct match *md,
+ struct ofpbuf *userdata);
+
static bool may_inject_pkts(void);
COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
@@ -386,6 +430,7 @@ pinctrl_init(void)
init_ipv6_ras();
init_buffered_packets_map();
init_event_table();
+ ip_mcast_snoop_init();
pinctrl.br_int_name = NULL;
pinctrl_handler_seq = seq_create();
pinctrl_main_seq = seq_create();
@@ -1825,6 +1870,10 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg)
pinctrl_handle_arp(swconn, &headers, &packet, &pin.flow_metadata,
&userdata);
break;
+ case ACTION_OPCODE_IGMP:
+ pinctrl_ip_mcast_handle_igmp(swconn, &headers, &packet,
+ &pin.flow_metadata, &userdata);
+ break;
case ACTION_OPCODE_PUT_ARP:
ovs_mutex_lock(&pinctrl_mutex);
@@ -1943,7 +1992,6 @@ pinctrl_recv(struct rconn *swconn, const struct ofp_header *oh,
}
/* Called with in the main ovn-controller thread context. */
-
static void
notify_pinctrl_handler(void)
{
@@ -1975,6 +2023,8 @@ pinctrl_handler(void *arg_)
static long long int send_ipv6_ra_time = LLONG_MAX;
/* Next GARP announcement in ms. */
static long long int send_garp_time = LLONG_MAX;
+ /* Next multicast query (IGMP) in ms. */
+ static long long int send_mcast_query_time = LLONG_MAX;
swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP13_VERSION);
@@ -1999,6 +2049,10 @@ pinctrl_handler(void *arg_)
rconn_disconnect(swconn);
}
+ ovs_mutex_lock(&pinctrl_mutex);
+ ip_mcast_snoop_run();
+ ovs_mutex_unlock(&pinctrl_mutex);
+
rconn_run(swconn);
if (rconn_is_connected(swconn)) {
if (conn_seq_no != rconn_get_connection_seqno(swconn)) {
@@ -2026,6 +2080,8 @@ pinctrl_handler(void *arg_)
send_ipv6_ras(swconn, &send_ipv6_ra_time);
send_mac_binding_buffered_pkts(swconn);
ovs_mutex_unlock(&pinctrl_mutex);
+
+ ip_mcast_querier_run(swconn, &send_mcast_query_time);
}
}
@@ -2033,6 +2089,7 @@ pinctrl_handler(void *arg_)
rconn_recv_wait(swconn);
send_garp_wait(send_garp_time);
ipv6_ra_wait(send_ipv6_ra_time);
+ ip_mcast_querier_wait(send_mcast_query_time);
new_seq = seq_read(pinctrl_handler_seq);
seq_wait(pinctrl_handler_seq, new_seq);
@@ -2054,6 +2111,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
struct ovsdb_idl_index *sbrec_port_binding_by_key,
struct ovsdb_idl_index *sbrec_port_binding_by_name,
struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
+ struct ovsdb_idl_index *sbrec_igmp_groups,
+ struct ovsdb_idl_index *sbrec_ip_multicast_opts,
const struct sbrec_dns_table *dns_table,
const struct sbrec_controller_event_table *ce_table,
const struct ovsrec_bridge *br_int,
@@ -2082,6 +2141,11 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
sbrec_port_binding_by_name, local_datapaths);
sync_dns_cache(dns_table);
controller_event_run(ovnsb_idl_txn, ce_table, chassis);
+ ip_mcast_sync(ovnsb_idl_txn, chassis, local_datapaths,
+ sbrec_datapath_binding_by_key,
+ sbrec_port_binding_by_key,
+ sbrec_igmp_groups,
+ sbrec_ip_multicast_opts);
run_buffered_binding(sbrec_port_binding_by_datapath,
sbrec_mac_binding_by_lport_ip,
local_datapaths);
@@ -2434,6 +2498,7 @@ pinctrl_destroy(void)
event_table_destroy();
destroy_put_mac_bindings();
destroy_dns_cache();
+ ip_mcast_snoop_destroy();
seq_destroy(pinctrl_main_seq);
seq_destroy(pinctrl_handler_seq);
}
@@ -2873,6 +2938,718 @@ send_garp(struct rconn *swconn, struct garp_data *garp,
return garp->announce_time;
}
+/*
+ * Multicast snooping configuration.
+ */
+struct ip_mcast_snoop_cfg {
+ bool enabled;
+ bool querier_enabled;
+
+ uint32_t table_size; /* Max number of allowed multicast groups. */
+ uint32_t idle_time_s; /* Idle timeout for multicast groups. */
+ uint32_t query_interval_s; /* Multicast query interval. */
+ uint32_t query_max_resp_s; /* Multicast query max-response field. */
+ uint32_t seq_no; /* Used for flushing learnt groups. */
+
+ struct eth_addr query_eth_src; /* Src ETH address used for queries. */
+ struct eth_addr query_eth_dst; /* Dst ETH address used for queries. */
+ ovs_be32 query_ipv4_src; /* Src IPv4 address used for queries. */
+ ovs_be32 query_ipv4_dst; /* Dsc IPv4 address used for queries. */
+};
+
+/*
+ * Holds per-datapath information about multicast snooping. Maintained by
+ * pinctrl_handler().
+ */
+struct ip_mcast_snoop {
+ struct hmap_node hmap_node; /* Linkage in the hash map. */
+ struct ovs_list query_node; /* Linkage in the query list. */
+ struct ip_mcast_snoop_cfg cfg; /* Multicast configuration. */
+ struct mcast_snooping *ms; /* Multicast group state. */
+ int64_t dp_key; /* Datapath running the snooping. */
+
+ long long int query_time_ms; /* Next query time in ms. */
+};
+
+/*
+ * Holds the per-datapath multicast configuration state. Maintained by
+ * pinctrl_run().
+ */
+struct ip_mcast_snoop_state {
+ struct hmap_node hmap_node;
+ int64_t dp_key;
+ struct ip_mcast_snoop_cfg cfg;
+};
+
+/* Only default vlan supported for now. */
+#define IP_MCAST_VLAN 1
+
+/* Multicast snooping information stored independently by datapath key.
+ * Protected by pinctrl_mutex. pinctrl_handler has RW access and pinctrl_main
+ * has RO access.
+ */
+static struct hmap mcast_snoop_map OVS_GUARDED_BY(pinctrl_mutex);
+
+/* Contains multicast queries to be sent. Only used by pinctrl_handler so no
+ * locking needed.
+ */
+static struct ovs_list mcast_query_list;
+
+/* Multicast config information stored independently by datapath key.
+ * Protected by pinctrl_mutex. pinctrl_handler has RO access and pinctrl_main
+ * has RW access. Read accesses from pinctrl_ip_mcast_handle_igmp() can be
+ * performed without taking the lock as they are executed in the pinctrl_main
+ * thread.
+ */
+static struct hmap mcast_cfg_map OVS_GUARDED_BY(pinctrl_mutex);
+
+static void
+ip_mcast_snoop_cfg_load(struct ip_mcast_snoop_cfg *cfg,
+ const struct sbrec_ip_multicast *ip_mcast)
+{
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+ memset(cfg, 0, sizeof *cfg);
+ cfg->enabled =
+ (ip_mcast->enabled && ip_mcast->enabled[0]);
+ cfg->querier_enabled =
+ (cfg->enabled && ip_mcast->querier && ip_mcast->querier[0]);
+
+ if (ip_mcast->table_size) {
+ cfg->table_size = ip_mcast->table_size[0];
+ } else {
+ cfg->table_size = OVN_MCAST_DEFAULT_MAX_ENTRIES;
+ }
+
+ if (ip_mcast->idle_timeout) {
+ cfg->idle_time_s = ip_mcast->idle_timeout[0];
+ } else {
+ cfg->idle_time_s = OVN_MCAST_DEFAULT_IDLE_TIMEOUT_S;
+ }
+
+ if (ip_mcast->query_interval) {
+ cfg->query_interval_s = ip_mcast->query_interval[0];
+ } else {
+ cfg->query_interval_s = cfg->idle_time_s / 2;
+ if (cfg->query_interval_s < OVN_MCAST_MIN_QUERY_INTERVAL_S) {
+ cfg->query_interval_s = OVN_MCAST_MIN_QUERY_INTERVAL_S;
+ }
+ }
+
+ if (ip_mcast->query_max_resp) {
+ cfg->query_max_resp_s = ip_mcast->query_max_resp[0];
+ } else {
+ cfg->query_max_resp_s = OVN_MCAST_DEFAULT_QUERY_MAX_RESPONSE_S;
+ }
+
+ cfg->seq_no = ip_mcast->seq_no;
+
+ if (cfg->querier_enabled) {
+ /* Try to parse the source ETH address. */
+ if (!ip_mcast->eth_src ||
+ !eth_addr_from_string(ip_mcast->eth_src,
+ &cfg->query_eth_src)) {
+ VLOG_WARN_RL(&rl,
+ "IGMP Querier enabled with invalid ETH src address");
+ /* Failed to parse the IPv4 source address. Disable the querier. */
+ cfg->querier_enabled = false;
+ }
+
+ /* Try to parse the source IP address. */
+ if (!ip_mcast->ip4_src ||
+ !ip_parse(ip_mcast->ip4_src, &cfg->query_ipv4_src)) {
+ VLOG_WARN_RL(&rl,
+ "IGMP Querier enabled with invalid IPv4 src address");
+ /* Failed to parse the IPv4 source address. Disable the querier. */
+ cfg->querier_enabled = false;
+ }
+
+ /* IGMP queries must be sent to 224.0.0.1. */
+ cfg->query_eth_dst =
+ (struct eth_addr)ETH_ADDR_C(01, 00, 5E, 00, 00, 01);
+ cfg->query_ipv4_dst = htonl(0xe0000001);
+ }
+}
+
+static uint32_t
+ip_mcast_snoop_hash(int64_t dp_key)
+{
+ return hash_uint64(dp_key);
+}
+
+static struct ip_mcast_snoop_state *
+ip_mcast_snoop_state_add(int64_t dp_key)
+ OVS_REQUIRES(pinctrl_mutex)
+{
+ struct ip_mcast_snoop_state *ms_state = xmalloc(sizeof *ms_state);
+
+ ms_state->dp_key = dp_key;
+ hmap_insert(&mcast_cfg_map, &ms_state->hmap_node,
+ ip_mcast_snoop_hash(dp_key));
+ return ms_state;
+}
+
+static struct ip_mcast_snoop_state *
+ip_mcast_snoop_state_find(int64_t dp_key)
+ OVS_REQUIRES(pinctrl_mutex)
+{
+ struct ip_mcast_snoop_state *ms_state;
+ uint32_t hash = ip_mcast_snoop_hash(dp_key);
+
+ HMAP_FOR_EACH_WITH_HASH (ms_state, hmap_node, hash, &mcast_cfg_map) {
+ if (ms_state->dp_key == dp_key) {
+ return ms_state;
+ }
+ }
+ return NULL;
+}
+
+static bool
+ip_mcast_snoop_state_update(int64_t dp_key,
+ const struct ip_mcast_snoop_cfg *cfg)
+ OVS_REQUIRES(pinctrl_mutex)
+{
+ bool notify = false;
+ struct ip_mcast_snoop_state *ms_state = ip_mcast_snoop_state_find(dp_key);
+
+ if (!ms_state) {
+ ms_state = ip_mcast_snoop_state_add(dp_key);
+ notify = true;
+ } else if (memcmp(cfg, &ms_state->cfg, sizeof *cfg)) {
+ notify = true;
+ }
+
+ ms_state->cfg = *cfg;
+ return notify;
+}
+
+static void
+ip_mcast_snoop_state_remove(struct ip_mcast_snoop_state *ms_state)
+ OVS_REQUIRES(pinctrl_mutex)
+{
+ hmap_remove(&mcast_cfg_map, &ms_state->hmap_node);
+ free(ms_state);
+}
+
+static bool
+ip_mcast_snoop_enable(struct ip_mcast_snoop *ip_ms)
+{
+ if (ip_ms->cfg.enabled) {
+ return true;
+ }
+
+ ip_ms->ms = mcast_snooping_create();
+ return ip_ms->ms != NULL;
+}
+
+static void
+ip_mcast_snoop_flush(struct ip_mcast_snoop *ip_ms)
+{
+ if (!ip_ms->cfg.enabled) {
+ return;
+ }
+
+ mcast_snooping_flush(ip_ms->ms);
+}
+
+static void
+ip_mcast_snoop_disable(struct ip_mcast_snoop *ip_ms)
+{
+ if (!ip_ms->cfg.enabled) {
+ return;
+ }
+
+ mcast_snooping_unref(ip_ms->ms);
+ ip_ms->ms = NULL;
+}
+
+static bool
+ip_mcast_snoop_configure(struct ip_mcast_snoop *ip_ms,
+ const struct ip_mcast_snoop_cfg *cfg)
+{
+ if (cfg->enabled) {
+ if (!ip_mcast_snoop_enable(ip_ms)) {
+ return false;
+ }
+ if (ip_ms->cfg.seq_no != cfg->seq_no) {
+ ip_mcast_snoop_flush(ip_ms);
+ }
+
+ if (ip_ms->cfg.querier_enabled && !cfg->querier_enabled) {
+ ovs_list_remove(&ip_ms->query_node);
+ } else if (!ip_ms->cfg.querier_enabled && cfg->querier_enabled) {
+ ovs_list_push_back(&mcast_query_list, &ip_ms->query_node);
+ }
+ } else {
+ ip_mcast_snoop_disable(ip_ms);
+ goto set_fields;
+ }
+
+ ovs_rwlock_wrlock(&ip_ms->ms->rwlock);
+ if (cfg->table_size != ip_ms->cfg.table_size) {
+ mcast_snooping_set_max_entries(ip_ms->ms, cfg->table_size);
+ }
+
+ if (cfg->idle_time_s != ip_ms->cfg.idle_time_s) {
+ mcast_snooping_set_idle_time(ip_ms->ms, cfg->idle_time_s);
+ }
+ ovs_rwlock_unlock(&ip_ms->ms->rwlock);
+
+ if (cfg->query_interval_s != ip_ms->cfg.query_interval_s) {
+ long long int now = time_msec();
+
+ if (ip_ms->query_time_ms > now + cfg->query_interval_s * 1000) {
+ ip_ms->query_time_ms = now;
+ }
+ }
+
+set_fields:
+ memcpy(&ip_ms->cfg, cfg, sizeof ip_ms->cfg);
+ return true;
+}
+
+static struct ip_mcast_snoop *
+ip_mcast_snoop_add(int64_t dp_key, const struct ip_mcast_snoop_cfg *cfg)
+ OVS_REQUIRES(pinctrl_mutex)
+{
+ struct ip_mcast_snoop *ip_ms = xzalloc(sizeof *ip_ms);
+
+ ip_ms->dp_key = dp_key;
+ if (!ip_mcast_snoop_configure(ip_ms, cfg)) {
+ free(ip_ms);
+ return NULL;
+ }
+
+ hmap_insert(&mcast_snoop_map, &ip_ms->hmap_node,
+ ip_mcast_snoop_hash(dp_key));
+ return ip_ms;
+}
+
+static struct ip_mcast_snoop *
+ip_mcast_snoop_find(int64_t dp_key)
+ OVS_REQUIRES(pinctrl_mutex)
+{
+ struct ip_mcast_snoop *ip_ms;
+
+ HMAP_FOR_EACH_WITH_HASH (ip_ms, hmap_node, ip_mcast_snoop_hash(dp_key),
+ &mcast_snoop_map) {
+ if (ip_ms->dp_key == dp_key) {
+ return ip_ms;
+ }
+ }
+ return NULL;
+}
+
+static void
+ip_mcast_snoop_remove(struct ip_mcast_snoop *ip_ms)
+ OVS_REQUIRES(pinctrl_mutex)
+{
+ hmap_remove(&mcast_snoop_map, &ip_ms->hmap_node);
+
+ if (ip_ms->cfg.querier_enabled) {
+ ovs_list_remove(&ip_ms->query_node);
+ }
+
+ ip_mcast_snoop_disable(ip_ms);
+ free(ip_ms);
+}
+
+static void
+ip_mcast_snoop_init(void)
+ OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+ hmap_init(&mcast_snoop_map);
+ ovs_list_init(&mcast_query_list);
+ hmap_init(&mcast_cfg_map);
+}
+
+static void
+ip_mcast_snoop_destroy(void)
+ OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+ struct ip_mcast_snoop *ip_ms, *ip_ms_next;
+
+ HMAP_FOR_EACH_SAFE (ip_ms, ip_ms_next, hmap_node, &mcast_snoop_map) {
+ ip_mcast_snoop_remove(ip_ms);
+ }
+ hmap_destroy(&mcast_snoop_map);
+
+ struct ip_mcast_snoop_state *ip_ms_state;
+
+ HMAP_FOR_EACH_POP (ip_ms_state, hmap_node, &mcast_cfg_map) {
+ free(ip_ms_state);
+ }
+}
+
+static void
+ip_mcast_snoop_run(void)
+ OVS_REQUIRES(pinctrl_mutex)
+{
+ struct ip_mcast_snoop *ip_ms, *ip_ms_next;
+
+ /* First read the config updated by pinctrl_main. If there's any new or
+ * updated config then apply it.
+ */
+ struct ip_mcast_snoop_state *ip_ms_state;
+
+ HMAP_FOR_EACH (ip_ms_state, hmap_node, &mcast_cfg_map) {
+ ip_ms = ip_mcast_snoop_find(ip_ms_state->dp_key);
+
+ if (!ip_ms) {
+ ip_mcast_snoop_add(ip_ms_state->dp_key, &ip_ms_state->cfg);
+ } else if (memcmp(&ip_ms_state->cfg, &ip_ms->cfg,
+ sizeof ip_ms_state->cfg)) {
+ ip_mcast_snoop_configure(ip_ms, &ip_ms_state->cfg);
+ }
+ }
+
+ bool notify = false;
+
+ /* Then walk the multicast snoop instances. */
+ HMAP_FOR_EACH_SAFE (ip_ms, ip_ms_next, hmap_node, &mcast_snoop_map) {
+
+ /* Delete the stale ones. */
+ if (!ip_mcast_snoop_state_find(ip_ms->dp_key)) {
+ ip_mcast_snoop_remove(ip_ms);
+ continue;
+ }
+
+ /* If enabled run the snooping instance to timeout old groups. */
+ if (ip_ms->cfg.enabled) {
+ if (mcast_snooping_run(ip_ms->ms)) {
+ notify = true;
+ }
+
+ mcast_snooping_wait(ip_ms->ms);
+ }
+ }
+
+ if (notify) {
+ notify_pinctrl_main();
+ }
+}
+
+/*
+ * This runs in the pinctrl main thread, so it has access to the southbound
+ * database. It reads the IP_Multicast table and updates the local multicast
+ * configuration. Then writes to the southbound database the updated
+ * IGMP_Groups.
+ */
+static void
+ip_mcast_sync(struct ovsdb_idl_txn *ovnsb_idl_txn,
+ const struct sbrec_chassis *chassis,
+ const struct hmap *local_datapaths,
+ struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
+ struct ovsdb_idl_index *sbrec_port_binding_by_key,
+ struct ovsdb_idl_index *sbrec_igmp_groups,
+ struct ovsdb_idl_index *sbrec_ip_multicast)
+ OVS_REQUIRES(pinctrl_mutex)
+{
+ bool notify = false;
+
+ if (!ovnsb_idl_txn || !chassis) {
+ return;
+ }
+
+ struct sbrec_ip_multicast *ip_mcast;
+ struct ip_mcast_snoop_state *ip_ms_state, *ip_ms_state_next;
+
+ /* First read and update our own local multicast configuration for the
+ * local datapaths.
+ */
+ SBREC_IP_MULTICAST_FOR_EACH_BYINDEX (ip_mcast, sbrec_ip_multicast) {
+
+ int64_t dp_key = ip_mcast->datapath->tunnel_key;
+ struct ip_mcast_snoop_cfg cfg;
+
+ ip_mcast_snoop_cfg_load(&cfg, ip_mcast);
+ if (ip_mcast_snoop_state_update(dp_key, &cfg)) {
+ notify = true;
+ }
+ }
+
+ /* Then delete the old entries. */
+ HMAP_FOR_EACH_SAFE (ip_ms_state, ip_ms_state_next, hmap_node,
+ &mcast_cfg_map) {
+ if (!get_local_datapath(local_datapaths, ip_ms_state->dp_key)) {
+ ip_mcast_snoop_state_remove(ip_ms_state);
+ notify = true;
+ }
+ }
+
+ const struct sbrec_igmp_group *sbrec_igmp;
+
+ /* Then flush any IGMP_Group entries that are not needed anymore:
+ * - either multicast snooping was disabled on the datapath
+ * - or the group has expired.
+ */
+ SBREC_IGMP_GROUP_FOR_EACH_BYINDEX (sbrec_igmp, sbrec_igmp_groups) {
+ ovs_be32 group_addr;
+
+ if (!sbrec_igmp->datapath) {
+ continue;
+ }
+
+ int64_t dp_key = sbrec_igmp->datapath->tunnel_key;
+ struct ip_mcast_snoop *ip_ms = ip_mcast_snoop_find(dp_key);
+
+ /* If the datapath doesn't exist anymore or IGMP snooping was disabled
+ * on it then delete the IGMP_Group entry.
+ */
+ if (!ip_ms || !ip_ms->cfg.enabled) {
+ igmp_group_delete(sbrec_igmp);
+ continue;
+ }
+
+ if (!ip_parse(sbrec_igmp->address, &group_addr)) {
+ continue;
+ }
+
+ ovs_rwlock_rdlock(&ip_ms->ms->rwlock);
+ struct mcast_group *mc_group =
+ mcast_snooping_lookup4(ip_ms->ms, group_addr,
+ IP_MCAST_VLAN);
+
+ if (!mc_group || ovs_list_is_empty(&mc_group->bundle_lru)) {
+ igmp_group_delete(sbrec_igmp);
+ }
+ ovs_rwlock_unlock(&ip_ms->ms->rwlock);
+ }
+
+ struct ip_mcast_snoop *ip_ms, *ip_ms_next;
+
+ /* Last: write new IGMP_Groups to the southbound DB and update existing
+ * ones (if needed). We also flush any old per-datapath multicast snoop
+ * structures.
+ */
+ HMAP_FOR_EACH_SAFE (ip_ms, ip_ms_next, hmap_node, &mcast_snoop_map) {
+ /* Flush any non-local snooping datapaths (e.g., stale). */
+ struct local_datapath *local_dp =
+ get_local_datapath(local_datapaths, ip_ms->dp_key);
+
+ if (!local_dp) {
+ continue;
+ }
+
+ /* Skip datapaths on which snooping is disabled. */
+ if (!ip_ms->cfg.enabled) {
+ continue;
+ }
+
+ struct mcast_group *mc_group;
+
+ ovs_rwlock_rdlock(&ip_ms->ms->rwlock);
+ LIST_FOR_EACH (mc_group, group_node, &ip_ms->ms->group_lru) {
+ if (ovs_list_is_empty(&mc_group->bundle_lru)) {
+ continue;
+ }
+ sbrec_igmp = igmp_group_lookup(sbrec_igmp_groups, &mc_group->addr,
+ local_dp->datapath, chassis);
+ if (!sbrec_igmp) {
+ sbrec_igmp = igmp_group_create(ovnsb_idl_txn, &mc_group->addr,
+ local_dp->datapath, chassis);
+ }
+
+ igmp_group_update_ports(sbrec_igmp, sbrec_datapath_binding_by_key,
+ sbrec_port_binding_by_key, ip_ms->ms,
+ mc_group);
+ }
+ ovs_rwlock_unlock(&ip_ms->ms->rwlock);
+ }
+
+ if (notify) {
+ notify_pinctrl_handler();
+ }
+}
+
+static void
+pinctrl_ip_mcast_handle_igmp(struct rconn *swconn OVS_UNUSED,
+ const struct flow *ip_flow,
+ struct dp_packet *pkt_in,
+ const struct match *md,
+ struct ofpbuf *userdata OVS_UNUSED)
+ OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+ /* This action only works for IP packets, and the switch should only send
+ * us IP packets this way, but check here just to be sure.
+ */
+ if (ip_flow->dl_type != htons(ETH_TYPE_IP)) {
+ VLOG_WARN_RL(&rl,
+ "IGMP action on non-IP packet (eth_type 0x%"PRIx16")",
+ ntohs(ip_flow->dl_type));
+ return;
+ }
+
+ int64_t dp_key = ntohll(md->flow.metadata);
+ uint32_t port_key = md->flow.regs[MFF_LOG_INPORT - MFF_REG0];
+
+ const struct igmp_header *igmp;
+ size_t offset;
+
+ offset = (char *) dp_packet_l4(pkt_in) - (char *) dp_packet_data(pkt_in);
+ igmp = dp_packet_at(pkt_in, offset, IGMP_HEADER_LEN);
+ if (!igmp || csum(igmp, dp_packet_l4_size(pkt_in)) != 0) {
+ VLOG_WARN_RL(&rl, "multicast snooping received bad IGMP checksum");
+ return;
+ }
+
+ ovs_be32 ip4 = ip_flow->igmp_group_ip4;
+
+ struct ip_mcast_snoop *ip_ms = ip_mcast_snoop_find(dp_key);
+ if (!ip_ms || !ip_ms->cfg.enabled) {
+ /* IGMP snooping is not configured or is disabled. */
+ return;
+ }
+
+ void *port_key_data = (void *)(uintptr_t)port_key;
+
+ bool group_change = false;
+
+ ovs_rwlock_wrlock(&ip_ms->ms->rwlock);
+ switch (ntohs(ip_flow->tp_src)) {
+ /* Only default VLAN is supported for now. */
+ case IGMP_HOST_MEMBERSHIP_REPORT:
+ case IGMPV2_HOST_MEMBERSHIP_REPORT:
+ group_change =
+ mcast_snooping_add_group4(ip_ms->ms, ip4, IP_MCAST_VLAN,
+ port_key_data);
+ break;
+ case IGMP_HOST_LEAVE_MESSAGE:
+ group_change =
+ mcast_snooping_leave_group4(ip_ms->ms, ip4, IP_MCAST_VLAN,
+ port_key_data);
+ break;
+ case IGMP_HOST_MEMBERSHIP_QUERY:
+ /* Shouldn't be receiving any of these since we are the multicast
+ * router. Store them for now.
+ */
+ group_change =
+ mcast_snooping_add_mrouter(ip_ms->ms, IP_MCAST_VLAN,
+ port_key_data);
+ break;
+ case IGMPV3_HOST_MEMBERSHIP_REPORT:
+ group_change =
+ mcast_snooping_add_report(ip_ms->ms, pkt_in, IP_MCAST_VLAN,
+ port_key_data);
+ break;
+ }
+ ovs_rwlock_unlock(&ip_ms->ms->rwlock);
+
+ if (group_change) {
+ notify_pinctrl_main();
+ }
+}
+
+static long long int
+ip_mcast_querier_send(struct rconn *swconn, struct ip_mcast_snoop *ip_ms,
+ long long int current_time)
+{
+ if (current_time < ip_ms->query_time_ms) {
+ return ip_ms->query_time_ms;
+ }
+
+ /* Compose a multicast query. */
+ uint64_t packet_stub[128 / 8];
+ struct dp_packet packet;
+
+ dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
+
+ uint8_t ip_tos = 0;
+ uint8_t igmp_ttl = 1;
+
+ dp_packet_clear(&packet);
+ packet.packet_type = htonl(PT_ETH);
+
+ struct eth_header *eh = dp_packet_put_zeros(&packet, sizeof *eh);
+ eh->eth_dst = ip_ms->cfg.query_eth_dst;
+ eh->eth_src = ip_ms->cfg.query_eth_src;
+
+ struct ip_header *nh = dp_packet_put_zeros(&packet, sizeof *nh);
+
+ eh->eth_type = htons(ETH_TYPE_IP);
+ dp_packet_set_l3(&packet, nh);
+ nh->ip_ihl_ver = IP_IHL_VER(5, 4);
+ nh->ip_tot_len = htons(sizeof(struct ip_header) +
+ sizeof(struct igmpv3_query_header));
+ nh->ip_tos = IP_DSCP_CS6;
+ nh->ip_proto = IPPROTO_IGMP;
+ nh->ip_frag_off = htons(IP_DF);
+ packet_set_ipv4(&packet, ip_ms->cfg.query_ipv4_src,
+ ip_ms->cfg.query_ipv4_dst, ip_tos, igmp_ttl);
+
+ nh->ip_csum = 0;
+ nh->ip_csum = csum(nh, sizeof *nh);
+
+ struct igmpv3_query_header *igh =
+ dp_packet_put_zeros(&packet, sizeof *igh);
+ dp_packet_set_l4(&packet, igh);
+
+ /* IGMP query max-response in tenths of seconds. */
+ uint8_t max_response = ip_ms->cfg.query_max_resp_s * 10;
+ uint8_t qqic = max_response;
+ packet_set_igmp3_query(&packet, max_response, 0, false, 0, qqic);
+
+ /* Inject multicast query. */
+ uint64_t ofpacts_stub[4096 / 8];
+ struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
+ enum ofp_version version = rconn_get_version(swconn);
+ put_load(ip_ms->dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
+ put_load(OVN_MCAST_FLOOD_TUNNEL_KEY, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
+ put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY, 1, &ofpacts);
+ struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
+ resubmit->in_port = OFPP_CONTROLLER;
+ resubmit->table_id = OFTABLE_LOCAL_OUTPUT;
+
+ struct ofputil_packet_out po = {
+ .packet = dp_packet_data(&packet),
+ .packet_len = dp_packet_size(&packet),
+ .buffer_id = UINT32_MAX,
+ .ofpacts = ofpacts.data,
+ .ofpacts_len = ofpacts.size,
+ };
+ match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
+ enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
+ queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
+ dp_packet_uninit(&packet);
+ ofpbuf_uninit(&ofpacts);
+
+ /* Set the next query time. */
+ ip_ms->query_time_ms = current_time + ip_ms->cfg.query_interval_s * 1000;
+ return ip_ms->query_time_ms;
+}
+
+static void
+ip_mcast_querier_run(struct rconn *swconn, long long int *query_time)
+{
+ if (ovs_list_is_empty(&mcast_query_list)) {
+ return;
+ }
+
+ /* Send multicast queries and update the next query time. */
+ long long int current_time = time_msec();
+ *query_time = LLONG_MAX;
+
+ struct ip_mcast_snoop *ip_ms;
+
+ LIST_FOR_EACH (ip_ms, query_node, &mcast_query_list) {
+ long long int next_query_time =
+ ip_mcast_querier_send(swconn, ip_ms, current_time);
+ if (*query_time > next_query_time) {
+ *query_time = next_query_time;
+ }
+ }
+}
+
+static void
+ip_mcast_querier_wait(long long int query_time)
+{
+ if (!ovs_list_is_empty(&mcast_query_list)) {
+ poll_timer_wait_until(query_time);
+ }
+}
+
/* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
static void
get_localnet_vifs_l3gwports(
@@ -3221,6 +3998,7 @@ may_inject_pkts(void)
{
return (!shash_is_empty(&ipv6_ras) ||
!shash_is_empty(&send_garp_data) ||
+ !ovs_list_is_empty(&mcast_query_list) ||
!ovs_list_is_empty(&buffered_mac_bindings));
}
@@ -38,6 +38,8 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
struct ovsdb_idl_index *sbrec_port_binding_by_key,
struct ovsdb_idl_index *sbrec_port_binding_by_name,
struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
+ struct ovsdb_idl_index *sbrec_igmp_groups,
+ struct ovsdb_idl_index *sbrec_ip_multicast_opts,
const struct sbrec_dns_table *,
const struct sbrec_controller_event_table *,
const struct ovsrec_bridge *, const struct sbrec_chassis *,
@@ -1232,6 +1232,12 @@ format_ICMP6(const struct ovnact_nest *nest, struct ds *s)
}
static void
+format_IGMP(const struct ovnact_null *a OVS_UNUSED, struct ds *s)
+{
+ ds_put_cstr(s, "igmp;");
+}
+
+static void
format_TCP_RESET(const struct ovnact_nest *nest, struct ds *s)
{
format_nested_action(nest, "tcp_reset", s);
@@ -1334,6 +1340,14 @@ encode_ICMP6(const struct ovnact_nest *on,
}
static void
+encode_IGMP(const struct ovnact_null *a OVS_UNUSED,
+ const struct ovnact_encode_params *ep OVS_UNUSED,
+ struct ofpbuf *ofpacts OVS_UNUSED)
+{
+ encode_controller_op(ACTION_OPCODE_IGMP, ofpacts);
+}
+
+static void
encode_TCP_RESET(const struct ovnact_nest *on,
const struct ovnact_encode_params *ep,
struct ofpbuf *ofpacts)
@@ -2666,6 +2680,8 @@ parse_action(struct action_context *ctx)
parse_ICMP4_ERROR(ctx);
} else if (lexer_match_id(ctx->lexer, "icmp6")) {
parse_ICMP6(ctx);
+ } else if (lexer_match_id(ctx->lexer, "igmp")) {
+ ovnact_put_IGMP(ctx->ovnacts);
} else if (lexer_match_id(ctx->lexer, "tcp_reset")) {
parse_TCP_RESET(ctx);
} else if (lexer_match_id(ctx->lexer, "nd_na")) {
@@ -12,6 +12,8 @@ ovn_lib_libovn_la_SOURCES = \
ovn/lib/expr.c \
ovn/lib/extend-table.h \
ovn/lib/extend-table.c \
+ ovn/lib/ip-mcast-index.c \
+ ovn/lib/ip-mcast-index.h \
ovn/lib/lex.c \
ovn/lib/ovn-l7.h \
ovn/lib/ovn-util.c \
new file mode 100644
@@ -0,0 +1,40 @@
+/* Copyright (c) 2019, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "ovn/lib/ip-mcast-index.h"
+#include "ovn/lib/ovn-sb-idl.h"
+
+struct ovsdb_idl_index *
+ip_mcast_index_create(struct ovsdb_idl *idl)
+{
+ return ovsdb_idl_index_create1(idl, &sbrec_ip_multicast_col_datapath);
+}
+
+const struct sbrec_ip_multicast *
+ip_mcast_lookup(struct ovsdb_idl_index *ip_mcast_index,
+ const struct sbrec_datapath_binding *datapath)
+{
+ struct sbrec_ip_multicast *target =
+ sbrec_ip_multicast_index_init_row(ip_mcast_index);
+ sbrec_ip_multicast_index_set_datapath(target, datapath);
+
+ struct sbrec_ip_multicast *ip_mcast =
+ sbrec_ip_multicast_index_find(ip_mcast_index, target);
+ sbrec_ip_multicast_index_destroy_row(target);
+
+ return ip_mcast;
+}
new file mode 100644
@@ -0,0 +1,39 @@
+/* Copyright (c) 2019, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_IP_MCAST_INDEX_H
+#define OVN_IP_MCAST_INDEX_H 1
+
+struct ovsdb_idl;
+
+struct sbrec_datapath_binding;
+
+#define OVN_MCAST_MIN_IDLE_TIMEOUT_S 15
+#define OVN_MCAST_MAX_IDLE_TIMEOUT_S 3600
+#define OVN_MCAST_DEFAULT_IDLE_TIMEOUT_S 300
+#define OVN_MCAST_MIN_QUERY_INTERVAL_S 1
+#define OVN_MCAST_MAX_QUERY_INTERVAL_S OVN_MCAST_MAX_IDLE_TIMEOUT_S
+#define OVN_MCAST_DEFAULT_QUERY_MAX_RESPONSE_S 1
+#define OVN_MCAST_DEFAULT_MAX_ENTRIES 2048
+
+#define OVN_MCAST_FLOOD_TUNNEL_KEY 65535
+#define OVN_MCAST_UNKNOWN_TUNNEL_KEY (OVN_MCAST_FLOOD_TUNNEL_KEY - 1)
+
+struct ovsdb_idl_index *ip_mcast_index_create(struct ovsdb_idl *);
+const struct sbrec_ip_multicast *ip_mcast_lookup(
+ struct ovsdb_idl_index *ip_mcast_index,
+ const struct sbrec_datapath_binding *datapath);
+
+#endif /* ovn/lib/ip-mcast-index.h */
@@ -164,6 +164,8 @@ ovn_init_symtab(struct shash *symtab)
expr_symtab_add_field(symtab, "icmp4.code", MFF_ICMPV4_CODE, "icmp4",
false);
+ expr_symtab_add_predicate(symtab, "igmp", "ip4 && ip.proto == 2");
+
expr_symtab_add_field(symtab, "ip6.src", MFF_IPV6_SRC, "ip6", false);
expr_symtab_add_field(symtab, "ip6.dst", MFF_IPV6_DST, "ip6", false);
expr_symtab_add_field(symtab, "ip6.label", MFF_IPV6_LABEL, "ip6", false);
@@ -1,7 +1,7 @@
{
"name": "OVN_Southbound",
"version": "2.4.0",
- "cksum": "1795697952 18106",
+ "cksum": "3059284885 20260",
"tables": {
"SB_Global": {
"columns": {
@@ -362,5 +362,43 @@
"min": 0, "max": 1}},
"seq_num": {"type": {"key": "integer"}}
},
- "isRoot": true
- }}}
+ "isRoot": true},
+ "IP_Multicast": {
+ "columns": {
+ "datapath": {"type": {"key": {"type": "uuid",
+ "refTable": "Datapath_Binding",
+ "refType": "weak"}}},
+ "enabled": {"type": {"key": "boolean", "min": 0, "max": 1}},
+ "querier": {"type": {"key": "boolean", "min": 0, "max": 1}},
+ "eth_src": {"type": "string"},
+ "ip4_src": {"type": "string"},
+ "table_size": {"type": {"key": "integer",
+ "min": 0, "max": 1}},
+ "idle_timeout": {"type": {"key": "integer",
+ "min": 0, "max": 1}},
+ "query_interval": {"type": {"key": "integer",
+ "min": 0, "max": 1}},
+ "query_max_resp": {"type": {"key": "integer",
+ "min": 0, "max": 1}},
+ "seq_no": {"type": "integer"}},
+ "indexes": [["datapath"]],
+ "isRoot": true},
+ "IGMP_Group": {
+ "columns": {
+ "address": {"type": "string"},
+ "datapath": {"type": {"key": {"type": "uuid",
+ "refTable": "Datapath_Binding",
+ "refType": "weak"},
+ "min": 0,
+ "max": 1}},
+ "chassis": {"type": {"key": {"type": "uuid",
+ "refTable": "Chassis",
+ "refType": "weak"},
+ "min": 0,
+ "max": 1}},
+ "ports": {"type": {"key": {"type": "uuid",
+ "refTable": "Port_Binding",
+ "refType": "weak"},
+ "min": 0, "max": "unlimited"}}},
+ "indexes": [["address", "datapath", "chassis"]],
+ "isRoot": true}}}
@@ -2009,6 +2009,14 @@ tcp.flags = RST;
</li>
</ul>
</dd>
+ <dt><code>igmp;</code></dt>
+ <dd>
+ <p>
+ This action sends the packet to <code>ovn-controller</code> for
+ multicast snooping.
+ </p>
+ <p><b>Prerequisite:</b> <code>igmp</code></p>
+ </dd>
</dl>
</column>
@@ -3555,4 +3563,76 @@ tcp.flags = RST;
event.
</column>
</table>
+ <table name="IP_Multicast">
+ <p>
+ IP Multicast configuration options. For now only applicable to IGMP.
+ </p>
+
+ <column name="datapath">
+ <ref table="Datapath_Binding"/> entry for which these configuration
+ options are defined.
+ </column>
+ <column name="enabled">
+ Enables/disables multicast snooping. Default: disabled.
+ </column>
+ <column name="querier">
+ Enables/disables multicast querying. If
+ <ref table="IP_Multicast" column="enabled"/> then multicast querying is
+ enabled by default.
+ </column>
+ <column name="table_size">
+ Limits the number of multicast groups that can be learned. Default:
+ 2048 groups per datapath.
+ </column>
+ <column name="idle_timeout">
+ Configures the idle timeout (in seconds) for IP multicast groups if
+ multicast snooping is enabled. Default: 300 seconds.
+ </column>
+ <column name="query_interval">
+ Configures the interval (in seconds) for sending multicast queries if
+ snooping and querier are enabled.
+ Default: <ref table="IP_Multicast" column="idle_timeout"/>/2 seconds.
+ </column>
+ <column name="seq_no">
+ <code>ovn-controller</code> reads this value and flushes all learned
+ multicast groups when it detects that <code>seq_no</code> was changed.
+ </column>
+
+ <group title="Querier configuration options">
+ The <code>ovn-controller</code> process that runs on OVN hypervisor
+ nodes uses the following columns to determine field values in IGMP
+ queries that it originates:
+ <column name="eth_src">
+ Source Ethernet address.
+ </column>
+ <column name="ip4_src">
+ Source IPv4 address.
+ </column>
+ <column name="query_max_resp">
+ Value (in seconds) to be used as "max-response" field in multicast
+ queries. Default: 1 second.
+ </column>
+ </group>
+ </table>
+ <table name="IGMP_Group">
+ <p>
+ Contains learned IGMP groups indexed by address/datapath/chassis.
+ </p>
+
+ <column name="address">
+ Destination IPv4 address for the IGMP group.
+ </column>
+
+ <column name="datapath">
+ Datapath to which this IGMP group belongs.
+ </column>
+
+ <column name="chassis">
+ Chassis to which this IGMP group belongs.
+ </column>
+
+ <column name="ports">
+ The destination port bindings for this IGMP group.
+ </column>
+ </table>
</database>
@@ -524,6 +524,9 @@ pre_get_info(struct ctl_context *ctx)
ovsdb_idl_add_column(ctx->idl, &sbrec_logical_flow_col_external_ids);
ovsdb_idl_add_column(ctx->idl, &sbrec_datapath_binding_col_external_ids);
+
+ ovsdb_idl_add_column(ctx->idl, &sbrec_ip_multicast_col_datapath);
+ ovsdb_idl_add_column(ctx->idl, &sbrec_ip_multicast_col_seq_no);
}
static struct cmd_show_table cmd_show_tables[] = {
@@ -955,6 +958,52 @@ cmd_lflow_list(struct ctl_context *ctx)
}
static void
+sbctl_ip_mcast_flush_switch(struct ctl_context *ctx,
+ const struct sbrec_datapath_binding *dp)
+{
+ const struct sbrec_ip_multicast *ip_mcast;
+
+ /* Lookup the corresponding IP_Multicast entry. */
+ SBREC_IP_MULTICAST_FOR_EACH (ip_mcast, ctx->idl) {
+ if (ip_mcast->datapath != dp) {
+ continue;
+ }
+
+ sbrec_ip_multicast_set_seq_no(ip_mcast, ip_mcast->seq_no + 1);
+ }
+}
+
+static void
+sbctl_ip_mcast_flush(struct ctl_context *ctx)
+{
+ const struct sbrec_datapath_binding *dp;
+
+ if (ctx->argc > 2) {
+ return;
+ }
+
+ if (ctx->argc == 2) {
+ const struct ovsdb_idl_row *row;
+ char *error = ctl_get_row(ctx, &sbrec_table_datapath_binding,
+ ctx->argv[1], false, &row);
+ if (error) {
+ ctl_fatal("%s", error);
+ }
+
+ dp = (const struct sbrec_datapath_binding *)row;
+ if (!dp) {
+ ctl_fatal("%s is not a valid datapath", ctx->argv[1]);
+ }
+
+ sbctl_ip_mcast_flush_switch(ctx, dp);
+ } else {
+ SBREC_DATAPATH_BINDING_FOR_EACH (dp, ctx->idl) {
+ sbctl_ip_mcast_flush_switch(ctx, dp);
+ }
+ }
+}
+
+static void
verify_connections(struct ctl_context *ctx)
{
const struct sbrec_sb_global *sb_global = sbrec_sb_global_first(ctx->idl);
@@ -1462,6 +1511,10 @@ static const struct ctl_command_syntax sbctl_commands[] = {
pre_get_info, cmd_lflow_list, NULL,
"--uuid,--ovs?,--stats", RO}, /* Friendly alias for lflow-list */
+ /* IP multicast commands. */
+ {"ip-multicast-flush", 0, 1, "SWITCH",
+ pre_get_info, sbctl_ip_mcast_flush, NULL, "", RW },
+
/* Connection commands. */
{"get-connection", 0, 0, "", pre_connection, cmd_get_connection, NULL, "", RO},
{"del-connection", 0, 0, "", pre_connection, cmd_del_connection, NULL, "", RW},
@@ -2126,6 +2126,10 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
super);
break;
+ case OVNACT_IGMP:
+ /* Nothing to do for tracing. */
+ break;
+
case OVNACT_TCP_RESET:
execute_tcp_reset(ovnact_get_TCP_RESET(a), dp, uflow, table_id,
pipeline, super);
@@ -1343,6 +1343,10 @@ trigger_event(event = "empty_lb_backends", vip = "10.0.0.1:80", protocol = "sctp
trigger_event(event = "empty_lb_backends", vip = "10.0.0.1:80", protocol = "tcp", load_balancer = "bacon");
Load balancer 'bacon' is not a UUID
+# IGMP
+igmp;
+ encodes as controller(userdata=00.00.00.10.00.00.00.00)
+
# Contradictionary prerequisites (allowed but not useful):
ip4.src = ip6.src[0..31];
encodes as move:NXM_NX_IPV6_SRC[0..31]->NXM_OF_IP_SRC[]