[ovs-dev,v3] ovn: Fix IPv6 DAD failure for container ports

Message ID 20181006083107.18095-1-nusiddiq@redhat.com
State Accepted
Delegated to: Guru Shetty
Headers show
Series
  • [ovs-dev,v3] ovn: Fix IPv6 DAD failure for container ports
Related show

Commit Message

Numan Siddique Oct. 6, 2018, 8:31 a.m.
From: Numan Siddique <nusiddiq@redhat.com>

When a container port is created inside a VM, the below kernel message
is seen and IPv6 doesn't work on that interface.

[  138.000753] IPv6: vlan4: IPv6 duplicate address <IPv6 LLA> detected!

When a container port sends a ethernet broadcast packet, OVN delivers the same
packet back to the child port (and hence the DAD check fails).

This is because
 - 'MLF_ALLOW_LOOPBACK_BIT' is set in REG10 in table 0 for the packets received
   from any child port.
 - for ethernet broadcast packets, Table 33 (OFTABLE_LOCAL_OUTPUT) clones the
   packet for every local port 'P' which belongs to the same datapath i.e
   'P'->REG15, resubmit(,34)
 - If REG14 and REG15 are same, Table 34 (OFTABLE_CHECK_LOOPBACK) drops the packet
   if 'MLF_ALLOW_LOOPBACK_BIT' is not set.
 - But in the case of container ports, this bit will be set and hence doesn't gets
   dropped and eventually gets delivered to the source container port.
 - The VM's kernel thinks its a DAD packet. The latest kernels (4.19) implements
   the RFC -7527 (enhanced DAD), but it is still a problem for older kernels.

This patch fixes the issue by using a new register bit (MLF_NESTED_CONTAINER_BIT)
instead of 'MLF_ALLOW_LOOPBACK_BIT' and sets it in REG10 for the packets received
from child ports so that Table 34 drops the packet for the source port.

Acked-by: Gurucharan Shetty <guru@ovn.org>
Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
---

v2 -> v3
-----
  * Fixed a grammmatic error typo in the code comment as per Guru's
  * suggestion
v1 -> v2 
-------
  * Addressed the review comments from Guru - Updated the commit message
    and added few comments in the code.

 ovn/controller/ofctrl.c   | 29 +++++++++++------
 ovn/controller/ofctrl.h   |  5 +++
 ovn/controller/physical.c | 65 ++++++++++++++++++++++++++++++++++-----
 ovn/lib/logical-fields.h  |  4 +++
 tests/ovn.at              | 11 +++++++
 5 files changed, 97 insertions(+), 17 deletions(-)

Patch

diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
index f5b53195d..218612787 100644
--- a/ovn/controller/ofctrl.c
+++ b/ovn/controller/ofctrl.c
@@ -630,9 +630,11 @@  ofctrl_recv(const struct ofp_header *oh, enum ofptype type)
  *
  * The caller should initialize its own hmap to hold the flows. */
 void
-ofctrl_add_flow(struct hmap *desired_flows,
-                uint8_t table_id, uint16_t priority, uint64_t cookie,
-                const struct match *match, const struct ofpbuf *actions)
+ofctrl_check_and_add_flow(struct hmap *desired_flows,
+                          uint8_t table_id, uint16_t priority, uint64_t cookie,
+                          const struct match *match,
+                          const struct ofpbuf *actions,
+                          bool log_duplicate_flow)
 {
     struct ovn_flow *f = xmalloc(sizeof *f);
     f->table_id = table_id;
@@ -644,13 +646,14 @@  ofctrl_add_flow(struct hmap *desired_flows,
     f->cookie = cookie;
 
     if (ovn_flow_lookup(desired_flows, f)) {
-        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
-        if (!VLOG_DROP_DBG(&rl)) {
-            char *s = ovn_flow_to_string(f);
-            VLOG_DBG("dropping duplicate flow: %s", s);
-            free(s);
+        if (log_duplicate_flow) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
+            if (!VLOG_DROP_DBG(&rl)) {
+                char *s = ovn_flow_to_string(f);
+                VLOG_DBG("dropping duplicate flow: %s", s);
+                free(s);
+            }
         }
-
         ovn_flow_destroy(f);
         return;
     }
@@ -658,6 +661,14 @@  ofctrl_add_flow(struct hmap *desired_flows,
     hmap_insert(desired_flows, &f->hmap_node, f->hmap_node.hash);
 }
 
+void
+ofctrl_add_flow(struct hmap *desired_flows,
+                uint8_t table_id, uint16_t priority, uint64_t cookie,
+                const struct match *match, const struct ofpbuf *actions)
+{
+    ofctrl_check_and_add_flow(desired_flows, table_id, priority, cookie,
+                              match, actions, true);
+}
 
 /* ovn_flow. */
 
diff --git a/ovn/controller/ofctrl.h b/ovn/controller/ofctrl.h
index e3fc95b05..ae0cfa513 100644
--- a/ovn/controller/ofctrl.h
+++ b/ovn/controller/ofctrl.h
@@ -55,4 +55,9 @@  void ofctrl_add_flow(struct hmap *desired_flows, uint8_t table_id,
                      uint16_t priority, uint64_t cookie,
                      const struct match *, const struct ofpbuf *ofpacts);
 
+void ofctrl_check_and_add_flow(struct hmap *desired_flows, uint8_t table_id,
+                               uint16_t priority, uint64_t cookie,
+                               const struct match *,
+                               const struct ofpbuf *ofpacts,
+                               bool log_duplicate_flow);
 #endif /* ovn/ofctrl.h */
diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index c38d7b09f..ab3b02ab1 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -189,7 +189,8 @@  get_zone_ids(const struct sbrec_port_binding *binding,
 
 static void
 put_local_common_flows(uint32_t dp_key, uint32_t port_key,
-                       bool nested_container, const struct zone_ids *zone_ids,
+                       uint32_t parent_port_key,
+                       const struct zone_ids *zone_ids,
                        struct ofpbuf *ofpacts_p, struct hmap *flow_table)
 {
     struct match match;
@@ -244,11 +245,19 @@  put_local_common_flows(uint32_t dp_key, uint32_t port_key,
     /* Table 64, Priority 100.
      * =======================
      *
-     * If the packet is supposed to hair-pin because the "loopback"
-     * flag is set (or if the destination is a nested container),
+     * If the packet is supposed to hair-pin because the
+     *   - "loopback" flag is set
+     *   - or if the destination is a nested container
+     *   - or if "nested_container" flag is set and the destination is the
+     *     parent port,
      * temporarily set the in_port to zero, resubmit to
      * table 65 for logical-to-physical translation, then restore
-     * the port number. */
+     * the port number.
+     *
+     * If 'parent_port_key' is set, then the 'port_key' represents a nested
+     * container. */
+
+    bool nested_container = parent_port_key ? true: false;
     match_init_catchall(&match);
     ofpbuf_clear(ofpacts_p);
     match_set_metadata(&match, htonll(dp_key));
@@ -264,6 +273,38 @@  put_local_common_flows(uint32_t dp_key, uint32_t port_key,
     put_stack(MFF_IN_PORT, ofpact_put_STACK_POP(ofpacts_p));
     ofctrl_add_flow(flow_table, OFTABLE_SAVE_INPORT, 100, 0,
                     &match, ofpacts_p);
+
+    if (nested_container) {
+        /* It's a nested container and when the packet from the nested
+         * container is to be sent to the parent port, "nested_container"
+         * flag will be set. We need to temporarily set the in_port to zero
+         * as mentioned in the comment above.
+         *
+         * If a parent port has multiple child ports, then this if condition
+         * will be hit multiple times, but we want to add only one flow.
+         * ofctrl_add_flow() logs a warning message for duplicate flows.
+         * So use the function 'ofctrl_check_and_add_flow' which doesn't
+         * log a warning.
+         *
+         * Other option is to add this flow for all the ports which are not
+         * nested containers. In which case we will add this flow for all the
+         * ports even if they don't have any child ports which is
+         * unnecessary.
+         */
+        match_init_catchall(&match);
+        ofpbuf_clear(ofpacts_p);
+        match_set_metadata(&match, htonll(dp_key));
+        match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, parent_port_key);
+        match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0,
+                             MLF_NESTED_CONTAINER, MLF_NESTED_CONTAINER);
+
+        put_stack(MFF_IN_PORT, ofpact_put_STACK_PUSH(ofpacts_p));
+        put_load(0, MFF_IN_PORT, 0, 16, ofpacts_p);
+        put_resubmit(OFTABLE_LOG_TO_PHY, ofpacts_p);
+        put_stack(MFF_IN_PORT, ofpact_put_STACK_POP(ofpacts_p));
+        ofctrl_check_and_add_flow(flow_table, OFTABLE_SAVE_INPORT, 100, 0,
+                                  &match, ofpacts_p, false);
+    }
 }
 
 static void
@@ -328,7 +369,7 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_chassis_by_name,
         }
 
         struct zone_ids binding_zones = get_zone_ids(binding, ct_zones);
-        put_local_common_flows(dp_key, port_key, false, &binding_zones,
+        put_local_common_flows(dp_key, port_key, 0, &binding_zones,
                                ofpacts_p, flow_table);
 
         match_init_catchall(&match);
@@ -452,6 +493,7 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_chassis_by_name,
 
     int tag = 0;
     bool nested_container = false;
+    const struct sbrec_port_binding *parent_port = NULL;
     ofp_port_t ofport;
     bool is_remote = false;
     if (binding->parent_port && *binding->parent_port) {
@@ -463,6 +505,8 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_chassis_by_name,
         if (ofport) {
             tag = *binding->tag;
             nested_container = true;
+            parent_port = lport_lookup_by_name(
+                sbrec_port_binding_by_name, binding->parent_port);
         }
     } else {
         ofport = u16_to_ofp(simap_get(&localvif_to_ofport,
@@ -523,7 +567,10 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_chassis_by_name,
          */
 
         struct zone_ids zone_ids = get_zone_ids(binding, ct_zones);
-        put_local_common_flows(dp_key, port_key, nested_container, &zone_ids,
+        uint32_t parent_port_key = parent_port ? parent_port->tunnel_key : 0;
+        /* Pass the parent port tunnel key if the port is a nested
+         * container. */
+        put_local_common_flows(dp_key, port_key, parent_port_key, &zone_ids,
                                ofpacts_p, flow_table);
 
         /* Table 0, Priority 150 and 100.
@@ -553,8 +600,10 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_chassis_by_name,
             if (nested_container) {
                 /* When a packet comes from a container sitting behind a
                  * parent_port, we should let it loopback to other containers
-                 * or the parent_port itself. */
-                put_load(MLF_ALLOW_LOOPBACK, MFF_LOG_FLAGS, 0, 1, ofpacts_p);
+                 * or the parent_port itself. Indicate this by setting the
+                 * MLF_NESTED_CONTAINER_BIT in MFF_LOG_FLAGS.*/
+                put_load(1, MFF_LOG_FLAGS, MLF_NESTED_CONTAINER_BIT, 1,
+                         ofpacts_p);
             }
             ofpact_put_STRIP_VLAN(ofpacts_p);
         }
diff --git a/ovn/lib/logical-fields.h b/ovn/lib/logical-fields.h
index b1dbb035a..95759a8bb 100644
--- a/ovn/lib/logical-fields.h
+++ b/ovn/lib/logical-fields.h
@@ -50,6 +50,7 @@  enum mff_log_flags_bits {
     MLF_FORCE_SNAT_FOR_DNAT_BIT = 2,
     MLF_FORCE_SNAT_FOR_LB_BIT = 3,
     MLF_LOCAL_ONLY_BIT = 4,
+    MLF_NESTED_CONTAINER_BIT = 5,
 };
 
 /* MFF_LOG_FLAGS_REG flag assignments */
@@ -75,6 +76,9 @@  enum mff_log_flags {
      * hypervisors should instead only be output to local targets
      */
     MLF_LOCAL_ONLY = (1 << MLF_LOCAL_ONLY_BIT),
+
+    /* Indicate that a packet was received from a nested container. */
+    MLF_NESTED_CONTAINER = (1 << MLF_NESTED_CONTAINER_BIT),
 };
 
 #endif /* ovn/lib/logical-fields.h */
diff --git a/tests/ovn.at b/tests/ovn.at
index 769e09f81..a7aba5ccd 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -7038,6 +7038,17 @@  packet=${dst_mac}${src_mac}8100000208004500001c000000003f110100${src_ip}${dst_ip
 echo  $packet >> expected1
 OVN_CHECK_PACKETS([hv1/vm1-tx.pcap], [expected1])
 
+# Send broadcast packet from foo1. foo1 should not receive the same packet.
+src_mac="f00000010205"
+dst_mac="ffffffffffff"
+src_ip=`ip_to_hex 192 168 1 2`
+dst_ip=`ip_to_hex 255 255 255 255`
+packet=${dst_mac}${src_mac}8100000108004500001c0000000040110000${src_ip}${dst_ip}0035111100080000
+as hv1 ovs-appctl netdev-dummy/receive vm1 $packet
+
+# expected packet at VM1
+OVN_CHECK_PACKETS([hv1/vm1-tx.pcap], [expected1])
+
 OVN_CLEANUP([hv1],[hv2])
 
 AT_CLEANUP