diff mbox series

[ovs-dev,RFC] ofproto-dpif-xlate: Flood the updates for learning tables through peers.

Message ID 20190618142930.27624-1-i.maximets@samsung.com
State RFC
Headers show
Series [ovs-dev,RFC] ofproto-dpif-xlate: Flood the updates for learning tables through peers. | expand

Commit Message

Ilya Maximets June 18, 2019, 2:29 p.m. UTC
Consider following setup:

  br0:
    phy-port
    random-port
    patch-to-br-int

  br-int:
    port-1
    port-2
    patch-to-br0

And the expected traffic pattern:

    phy-port ---> port-1 ---> port-2 ---> phy-port

For example, this could be done if 'port-1' and 'port-2' are ports of
two VMs and we have some service chain PHY --> VM1 --> VM2 --> PHY.
In case we have a unidirectional packet stream (UDP), VM1 will never
send packets outside the integration bridge (br-int). This will lead
to situation where the physical bridge (br0) will expire the mac of
'port-1' from the mac learning table after some time and will never
know it again, causing constant flooding inside the physical bridge
(br0):

  # ovs-appctl ofproto/trace \
               br0 "in_port=phy-port,ip,eth_src=<SRC>,eth_dst=<DST>"
  Flow: ip,in_port=1,dl_src=<SRC>,dl_dst=<DST>

  bridge("br0")
  -----------------
   0. priority 0
      NORMAL
       -> no learned MAC for destination, flooding

  bridge("br-int")
  ----------------
   0. in_port=patch-to-br0, priority 0
      NORMAL
       -> forwarding to learned port

  Final flow: unchanged
  Megaflow: recirc_id=0,eth,ip,in_port=phy-port,dl_src=<SRC>,dl_dst=<DST>
  Datapath actions: br0,random-port,port-1

This is unwanted because 'br-int' knows that the desired mac is behind
the 'port-1' and sends packets directly, while 'br0' keeps flooding the
packets to all the ports significantly reducing performance.

Since bridges connected via patch ports are almost the same L2 bridge,
they should share L2 learning information in order to avoid such
conditions.

This could be implemented by flooding of the mac learning information
over the patch ports while updating the learning tables.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
---

Not fully tested.

 ofproto/ofproto-dpif-xlate.c | 66 ++++++++++++++++++++++++++++++++----
 1 file changed, 59 insertions(+), 7 deletions(-)

Comments

Ben Pfaff June 18, 2019, 3:58 p.m. UTC | #1
On Tue, Jun 18, 2019 at 05:29:30PM +0300, Ilya Maximets wrote:
> Since bridges connected via patch ports are almost the same L2 bridge,
> they should share L2 learning information in order to avoid such
> conditions.
> 
> This could be implemented by flooding of the mac learning information
> over the patch ports while updating the learning tables.
> 
> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>

This idea is interesting to me because it breaks my idea of how bridges
connected by patch ports work.  My conception of such bridges has been
that they are almost completely independent; their only connection is
the patch port, which acts just like connecting two switches with a
physical cable.  I am not aware of violations of this model currently,
but I believe that this change would violate it.  I am not going to
insist on retaining the model--I do not know whether it is truly
helpful, and this patch seems beneficial--but I want to point out that
we're making a conceptual change
diff mbox series

Patch

diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 73966a4e8..11d04e049 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -34,6 +34,7 @@ 
 #include "csum.h"
 #include "dp-packet.h"
 #include "dpif.h"
+#include "hmapx.h"
 #include "in-band.h"
 #include "lacp.h"
 #include "learn.h"
@@ -544,7 +545,7 @@  static void xlate_table_action(struct xlate_ctx *, ofp_port_t in_port,
                                bool is_last_action, xlate_actions_handler *);
 
 static bool input_vid_is_valid(const struct xlate_ctx *,
-                               uint16_t vid, struct xbundle *);
+                               uint16_t vid, const struct xbundle *);
 static void xvlan_copy(struct xvlan *dst, const struct xvlan *src);
 static void xvlan_pop(struct xvlan *src);
 static void xvlan_push_uninit(struct xvlan *src);
@@ -2181,7 +2182,7 @@  mirror_ingress_packet(struct xlate_ctx *ctx)
  * 0...4095. */
 static bool
 input_vid_is_valid(const struct xlate_ctx *ctx,
-                   uint16_t vid, struct xbundle *in_xbundle)
+                   uint16_t vid, const struct xbundle *in_xbundle)
 {
     /* Allow any VID on the OFPP_NONE port. */
     if (in_xbundle == &ofpp_none_bundle) {
@@ -2554,7 +2555,8 @@  is_admissible(struct xlate_ctx *ctx, struct xport *in_port,
 
 static bool
 update_learning_table__(const struct xbridge *xbridge,
-                        struct xbundle *in_xbundle, struct eth_addr dl_src,
+                        const struct xbundle *in_xbundle,
+                        struct eth_addr dl_src,
                         int vlan, bool is_grat_arp)
 {
     return (in_xbundle == &ofpp_none_bundle
@@ -2565,10 +2567,48 @@  update_learning_table__(const struct xbridge *xbridge,
 }
 
 static void
-update_learning_table(const struct xlate_ctx *ctx,
-                      struct xbundle *in_xbundle, struct eth_addr dl_src,
-                      int vlan, bool is_grat_arp)
+update_learning_table_with_flood(struct xlate_ctx *ctx,
+                                 const struct xbundle *in_xbundle,
+                                 struct eth_addr dl_src,
+                                 const struct xvlan *xvlan, bool is_grat_arp,
+                                 struct hmapx *updated_bridges)
 {
+    struct xport *xport;
+    int vlan = xvlan->v[0].vid;
+
+    /* Prevent updating the same bridge twice. */
+    hmapx_add(updated_bridges, CONST_CAST(void *, ctx->xbridge));
+
+    /* Flooding the mac updates through the peers. */
+    HMAP_FOR_EACH (xport, ofp_node, &ctx->xbridge->xports) {
+        if (xport->peer
+            && !hmapx_contains(updated_bridges, xport->peer->xbridge)
+            && xport->xbundle
+            && xport->xbundle != in_xbundle
+            && xport->xbundle->ofbundle != in_xbundle->ofbundle
+            && xbundle_includes_vlan(xport->xbundle, xvlan)
+            && xport->xbundle->floodable
+            && !xbundle_mirror_out(ctx->xbridge, xport->xbundle)) {
+
+            const struct xbridge *xbridge_orig = ctx->xbridge;
+            const struct xbundle *peer_xbundle = xport->peer->xbundle;
+            struct xvlan out_xvlan, peer_xvlan;
+
+            xvlan_output_translate(xport->xbundle, xvlan, &out_xvlan);
+            if (!input_vid_is_valid(ctx, out_xvlan.v[0].vid, peer_xbundle)) {
+                continue;
+            }
+            xvlan_input_translate(peer_xbundle, &out_xvlan, &peer_xvlan);
+
+            ctx->xbridge = xport->peer->xbridge;
+            update_learning_table_with_flood(ctx, peer_xbundle, dl_src,
+                                             &peer_xvlan, is_grat_arp,
+                                             updated_bridges);
+            ctx->xbridge = xbridge_orig;
+        }
+    }
+
+    /* Update the current bridge. */
     if (!update_learning_table__(ctx->xbridge, in_xbundle, dl_src, vlan,
                                  is_grat_arp)) {
         xlate_report_debug(ctx, OFT_DETAIL, "learned that "ETH_ADDR_FMT" is "
@@ -2577,6 +2617,18 @@  update_learning_table(const struct xlate_ctx *ctx,
     }
 }
 
+static void
+update_learning_table(struct xlate_ctx *ctx,
+                      const struct xbundle *in_xbundle, struct eth_addr dl_src,
+                      const struct xvlan *xvlan, bool is_grat_arp)
+{
+    struct hmapx updated_bridges = HMAPX_INITIALIZER(&updated_bridges);
+
+    update_learning_table_with_flood(ctx, in_xbundle, dl_src, xvlan,
+                                     is_grat_arp, &updated_bridges);
+    hmapx_destroy(&updated_bridges);
+}
+
 /* Updates multicast snooping table 'ms' given that a packet matching 'flow'
  * was received on 'in_xbundle' in 'vlan' and is either Report or Query. */
 static void
@@ -2984,7 +3036,7 @@  xlate_normal(struct xlate_ctx *ctx)
         && flow->packet_type == htonl(PT_ETH)
         && in_port->pt_mode != NETDEV_PT_LEGACY_L3
     ) {
-        update_learning_table(ctx, in_xbundle, flow->dl_src, vlan,
+        update_learning_table(ctx, in_xbundle, flow->dl_src, &xvlan,
                               is_grat_arp);
     }
     if (ctx->xin->xcache && in_xbundle != &ofpp_none_bundle) {