[ovs-dev,RFC,ovn,10/10] ovn-ic: Interconnection port controller.
diff mbox series

Message ID 1569623665-77390-11-git-send-email-hzhou8@ebay.com
State New
Headers show
Series
  • OVN Interconnection
Related show

Commit Message

Han Zhou Sept. 27, 2019, 10:34 p.m. UTC
From: Han Zhou <hzhou8@ebay.com>

Sync interconnection logical ports and bindings between NB, SB
and ISB.  With this patch, the OVN interconnection works end to
end.

Signed-off-by: Han Zhou <hzhou8@ebay.com>
---
 controller/binding.c   |   6 +-
 ic/ovn-ic.c            | 342 +++++++++++++++++++++++++++++++++++++++++++++++++
 lib/ovn-util.c         |   7 +
 lib/ovn-util.h         |   2 +
 northd/ovn-northd.c    |   8 +-
 ovn-architecture.7.xml |   2 +-
 ovn-nb.xml             |  11 +-
 7 files changed, 367 insertions(+), 11 deletions(-)

Patch
diff mbox series

diff --git a/controller/binding.c b/controller/binding.c
index 242163d..e1f41b7 100644
--- a/controller/binding.c
+++ b/controller/binding.c
@@ -744,11 +744,13 @@  binding_evaluate_port_binding_changes(
          * - If a regular VIF is unbound from this chassis, the local ovsdb
          *   interface table will be updated, which will trigger recompute.
          *
-         * - If the port is not a regular VIF, always trigger recompute. */
+         * - If the port is not a regular VIF, and not a "remote" port,
+         *   always trigger recompute. */
         if (binding_rec->chassis == chassis_rec
             || is_our_chassis(chassis_rec, binding_rec,
                               active_tunnels, &lport_to_iface, local_lports)
-            || strcmp(binding_rec->type, "")) {
+            || (strcmp(binding_rec->type, "") && strcmp(binding_rec->type,
+                                                        "remote"))) {
             changed = true;
             break;
         }
diff --git a/ic/ovn-ic.c b/ic/ovn-ic.c
index ddc9d0a..2690238 100644
--- a/ic/ovn-ic.c
+++ b/ic/ovn-ic.c
@@ -326,6 +326,347 @@  gateway_run(struct ic_context *ctx, const struct isbrec_availability_zone *az)
     shash_destroy(&remote_gws);
 }
 
+static const struct nbrec_logical_switch *
+find_ts_in_nb(struct ic_context *ctx, char *ts_name)
+{
+    /* XXX: optimize with index */
+    const struct nbrec_logical_switch *ls;
+    bool found = false;
+    NBREC_LOGICAL_SWITCH_FOR_EACH (ls, ctx->ovnnb_idl) {
+        const char *ls_ts_name = smap_get(&ls->other_config, "interconn-ts");
+        if (ls_ts_name && !strcmp(ts_name, ls_ts_name)) {
+            found = true;
+            break;
+        }
+    }
+    if (found) {
+        return ls;
+    }
+    return NULL;
+}
+
+static const struct sbrec_port_binding *
+find_peer_port(struct ic_context *ctx,
+               const struct sbrec_port_binding *sb_pb)
+{
+    const char *peer_name = smap_get(&sb_pb->options, "peer");
+    if (!peer_name) {
+        return NULL;
+    }
+    /* XXX: use index */
+    const struct sbrec_port_binding *pb;
+    SBREC_PORT_BINDING_FOR_EACH (pb, ctx->ovnsb_idl) {
+        if (!strcmp(pb->logical_port, peer_name)) {
+            return pb;
+        }
+    }
+    return NULL;
+}
+
+static const struct sbrec_port_binding *
+find_crp_from_lrp(struct ic_context *ctx,
+                  const struct sbrec_port_binding *lrp_pb)
+{
+    char *crp_name = ovn_chassis_redirect_name(lrp_pb->logical_port);
+
+    /* XXX: use index */
+    const struct sbrec_port_binding *pb, *ret;
+    ret = NULL;
+    SBREC_PORT_BINDING_FOR_EACH (pb, ctx->ovnsb_idl) {
+        if (!strcmp(pb->logical_port, crp_name)) {
+            ret = pb;
+            break;
+        }
+    }
+    free(crp_name);
+    return ret;
+}
+
+static const struct sbrec_port_binding *
+find_crp_for_sb_pb(struct ic_context *ctx,
+                   const struct sbrec_port_binding *sb_pb)
+{
+    const struct sbrec_port_binding *peer = find_peer_port(ctx, sb_pb);
+    if (!peer) {
+        return NULL;
+    }
+
+    return find_crp_from_lrp(ctx, peer);
+}
+
+static const char *
+get_lrp_address_for_sb_pb(struct ic_context *ctx,
+                          const struct sbrec_port_binding *sb_pb)
+{
+    const struct sbrec_port_binding *peer = find_peer_port(ctx, sb_pb);
+    if (!peer) {
+        return NULL;
+    }
+
+    return peer->n_mac ? *peer->mac : NULL;
+}
+
+static const struct sbrec_chassis *
+find_sb_chassis(struct ic_context *ctx, const char *name)
+{
+    /* XXX: use index */
+    const struct sbrec_chassis *chassis;
+    SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) {
+        if (!strcmp(chassis->name, name)) {
+            return chassis;
+        }
+    }
+    return NULL;
+}
+
+/* For each local port:
+ *   - Sync from NB to ISB.
+ *   - Sync gateway from SB to ISB.
+ *   - Sync tunnel key from ISB to SB.
+ */
+static void
+sync_local_port(struct ic_context *ctx,
+                const struct isbrec_port_binding *isb_pb,
+                const struct sbrec_port_binding *sb_pb)
+{
+    /* Sync address from NB to ISB */
+    const char *address = get_lrp_address_for_sb_pb(ctx, sb_pb);
+    if (!address) {
+        VLOG_DBG("Can't get logical router port address for logical"
+                 " switch port %s", sb_pb->logical_port);
+        if (isb_pb->address[0]) {
+            isbrec_port_binding_set_address(isb_pb, "");
+        }
+    } else {
+        if (strcmp(address, isb_pb->address)) {
+            isbrec_port_binding_set_address(isb_pb, address);
+        }
+    }
+
+    /* Sync gateway from SB to ISB */
+    /* XXX: sync encap so that multiple encaps can be used for the same
+     * gateway. */
+    const struct sbrec_port_binding *crp = find_crp_for_sb_pb(ctx, sb_pb);
+    if (crp && crp->chassis) {
+        if (strcmp(crp->chassis->name, isb_pb->gateway)) {
+            isbrec_port_binding_set_gateway(isb_pb, crp->chassis->name);
+        }
+    } else {
+        if (isb_pb->gateway[0]) {
+            isbrec_port_binding_set_gateway(isb_pb, "");
+        }
+    }
+
+    /* Sync back tunnel key from ISB to SB */
+    if (sb_pb->tunnel_key != isb_pb->tunnel_key) {
+        sbrec_port_binding_set_tunnel_key(sb_pb, isb_pb->tunnel_key);
+    }
+}
+
+/* For each remote port:
+ *   - Sync from ISB to NB
+ *   - Sync gateway from ISB to SB
+ *   - Sync tunnel key from ISB to SB
+ */
+static void
+sync_remote_port(struct ic_context *ctx,
+                 const struct isbrec_port_binding *isb_pb,
+                 const struct nbrec_logical_switch_port *lsp,
+                 const struct sbrec_port_binding *sb_pb)
+{
+    /* Sync address from ISB to NB */
+    if (isb_pb->address[0]) {
+        if (lsp->n_addresses != 1 ||
+            strcmp(isb_pb->address, lsp->addresses[0])) {
+            nbrec_logical_switch_port_set_addresses(
+                lsp, (const char **)&isb_pb->address, 1);
+        }
+    } else {
+        if (lsp->n_addresses != 0) {
+            nbrec_logical_switch_port_set_addresses(lsp, NULL, 0);
+        }
+    }
+
+    /* Sync gateway from ISB to SB */
+    /* XXX: sync encap so that multiple encaps can be used for the same
+     * gateway. */
+    if (isb_pb->gateway[0]) {
+        if (!sb_pb->chassis || strcmp(sb_pb->chassis->name, isb_pb->gateway)) {
+            const struct sbrec_chassis *chassis =
+                find_sb_chassis(ctx, isb_pb->gateway);
+            if (!chassis) {
+                VLOG_DBG("Chassis %s is not found in SB, syncing from ISB "
+                         "to SB skipped for logical port %s.",
+                         isb_pb->gateway, lsp->name);
+                return;
+            }
+            sbrec_port_binding_set_chassis(sb_pb, chassis);
+        }
+    } else {
+        if (sb_pb->chassis) {
+            sbrec_port_binding_set_chassis(sb_pb, NULL);
+        }
+    }
+
+    /* Sync tunnel key from ISB to SB */
+    if (sb_pb->tunnel_key != isb_pb->tunnel_key) {
+        sbrec_port_binding_set_tunnel_key(sb_pb, isb_pb->tunnel_key);
+    }
+}
+
+static void
+create_nb_lsp(struct ic_context *ctx,
+              const struct isbrec_port_binding *isb_pb,
+              const struct nbrec_logical_switch *ls)
+{
+    const struct nbrec_logical_switch_port *lsp =
+        nbrec_logical_switch_port_insert(ctx->ovnnb_txn);
+    nbrec_logical_switch_port_set_name(lsp, isb_pb->logical_port);
+    nbrec_logical_switch_port_set_type(lsp, "remote");
+
+    bool up = true;
+    nbrec_logical_switch_port_set_up(lsp, &up, 1);
+
+    if (isb_pb->address[0]) {
+        nbrec_logical_switch_port_set_addresses(
+            lsp, (const char **)&isb_pb->address, 1);
+    }
+
+    nbrec_logical_switch_update_ports_addvalue(ls, lsp);
+}
+
+static void
+create_isb_pb(struct ic_context *ctx,
+              const struct sbrec_port_binding *sb_pb,
+              const struct isbrec_availability_zone *az,
+              const char *ts_name,
+              uint32_t pb_tnl_key)
+{
+    const struct isbrec_port_binding *isb_pb =
+        isbrec_port_binding_insert(ctx->ovnisb_txn);
+    isbrec_port_binding_set_availability_zone(isb_pb, az);
+    isbrec_port_binding_set_transit_switch(isb_pb, ts_name);
+    isbrec_port_binding_set_logical_port(isb_pb, sb_pb->logical_port);
+    isbrec_port_binding_set_tunnel_key(isb_pb, pb_tnl_key);
+
+    const char *address = get_lrp_address_for_sb_pb(ctx, sb_pb);
+    if (address) {
+        isbrec_port_binding_set_address(isb_pb, address);
+    }
+
+    /* XXX: sync encap so that multiple encaps can be used for the same
+     * gateway. */
+    const struct sbrec_port_binding *crp = find_crp_for_sb_pb(ctx, sb_pb);
+    if (crp && crp->chassis) {
+        isbrec_port_binding_set_gateway(isb_pb, crp->chassis->name);
+    }
+}
+
+static const struct sbrec_port_binding *
+find_lsp_in_sb(struct ic_context *ctx,
+               const struct nbrec_logical_switch_port *lsp)
+{
+    /* XXX: use index */
+    const struct sbrec_port_binding *sb_pb;
+    SBREC_PORT_BINDING_FOR_EACH (sb_pb, ctx->ovnsb_idl) {
+        if (!strcmp(sb_pb->logical_port, lsp->name)) {
+            return sb_pb;
+        }
+    }
+    return NULL;
+}
+
+static uint32_t
+allocate_port_key(struct hmap *pb_tnlids)
+{
+    static uint32_t hint;
+    return ovn_allocate_tnlid(pb_tnlids, "transit port",
+                              1, (1u << 15) - 1, &hint);
+}
+
+static void
+port_binding_run(struct ic_context *ctx,
+                 const struct isbrec_availability_zone *az)
+{
+    if (!ctx->ovnisb_txn || !ctx->ovnnb_txn || !ctx->ovnsb_txn) {
+        return;
+    }
+
+    const struct inbrec_transit_switch *ts;
+    INBREC_TRANSIT_SWITCH_FOR_EACH (ts, ctx->ovninb_idl) {
+        const struct nbrec_logical_switch *ls = find_ts_in_nb(ctx, ts->name);
+        if (!ls) {
+            VLOG_DBG("Transit switch %s not found in NB.", ts->name);
+            continue;
+        }
+        struct shash local_pbs = SHASH_INITIALIZER(&local_pbs);
+        struct shash remote_pbs = SHASH_INITIALIZER(&remote_pbs);
+        struct hmap pb_tnlids = HMAP_INITIALIZER(&pb_tnlids);
+        const struct isbrec_port_binding *isb_pb;
+        ISBREC_PORT_BINDING_FOR_EACH (isb_pb, ctx->ovnisb_idl) {
+            /* XXX: use index */
+            if (!strcmp(isb_pb->transit_switch, ts->name)) {
+                if (isb_pb->availability_zone == az) {
+                    shash_add(&local_pbs, isb_pb->logical_port, isb_pb);
+                } else {
+                    shash_add(&remote_pbs, isb_pb->logical_port, isb_pb);
+                }
+                ovn_add_tnlid(&pb_tnlids, isb_pb->tunnel_key);
+            }
+        }
+
+        const struct nbrec_logical_switch_port *lsp;
+        for (int i = 0; i < ls->n_ports; i++) {
+            lsp = ls->ports[i];
+            const struct sbrec_port_binding *sb_pb = find_lsp_in_sb(ctx, lsp);
+            VLOG_INFO("sb_pb for %s: %p", lsp->name, sb_pb);
+
+            if (!strcmp(lsp->type, "router")) {
+                /* The port is local. */
+                if (!sb_pb) {
+                    continue;
+                }
+                isb_pb = shash_find_and_delete(&local_pbs, lsp->name);
+                if (!isb_pb) {
+                    uint32_t pb_tnl_key = allocate_port_key(&pb_tnlids);
+                    create_isb_pb(ctx, sb_pb, az, ts->name, pb_tnl_key);
+                } else {
+                    sync_local_port(ctx, isb_pb, sb_pb);
+                }
+            } else if (!strcmp(lsp->type, "remote")) {
+                /* The port is remote. */
+                isb_pb = shash_find_and_delete(&remote_pbs, lsp->name);
+                if (!isb_pb) {
+                    nbrec_logical_switch_update_ports_delvalue(ls, lsp);
+                } else {
+                    if (!sb_pb) {
+                        continue;
+                    }
+                    sync_remote_port(ctx, isb_pb, lsp, sb_pb);
+                }
+            } else {
+                VLOG_DBG("Ignore lsp %s on ts %s with type %s.",
+                         lsp->name, ts->name, lsp->type);
+            }
+        }
+
+        /* Delete extra port-binding from ISB */
+        struct shash_node *node;
+        SHASH_FOR_EACH (node, &local_pbs) {
+            isbrec_port_binding_delete(node->data);
+        }
+
+        /* Create lsp in NB for remote ports */
+        SHASH_FOR_EACH (node, &remote_pbs) {
+            create_nb_lsp(ctx, node->data, ls);
+        }
+
+        shash_destroy(&local_pbs);
+        shash_destroy(&remote_pbs);
+        ovn_destroy_tnlids(&pb_tnlids);
+    }
+}
+
 static void
 ovn_db_run(struct ic_context *ctx)
 {
@@ -338,6 +679,7 @@  ovn_db_run(struct ic_context *ctx)
 
     ts_run(ctx);
     gateway_run(ctx, az);
+    port_binding_run(ctx, az);
 }
 
 static void
diff --git a/lib/ovn-util.c b/lib/ovn-util.c
index 950e86f..c889367 100644
--- a/lib/ovn-util.c
+++ b/lib/ovn-util.c
@@ -404,6 +404,7 @@  static const char *OVN_NB_LSP_TYPES[] = {
     "vtep",
     "external",
     "virtual",
+    "remote",
 };
 
 bool
@@ -508,3 +509,9 @@  ovn_allocate_tnlid(struct hmap *set, const char *name, uint32_t min, uint32_t ma
     VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
     return 0;
 }
+
+char *
+ovn_chassis_redirect_name(const char *port_name)
+{
+    return xasprintf("cr-%s", port_name);
+}
diff --git a/lib/ovn-util.h b/lib/ovn-util.h
index b9bda8d..233362b 100644
--- a/lib/ovn-util.h
+++ b/lib/ovn-util.h
@@ -100,4 +100,6 @@  void ovn_destroy_tnlids(struct hmap *tnlids);
 void ovn_add_tnlid(struct hmap *set, uint32_t tnlid);
 uint32_t ovn_allocate_tnlid(struct hmap *set, const char *name, uint32_t min,
                             uint32_t max, uint32_t *hint);
+
+char *ovn_chassis_redirect_name(const char *port_name);
 #endif
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index d9c8a0f..707cb1a 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -1081,12 +1081,6 @@  ovn_port_allocate_key(struct ovn_datapath *od)
                               1, (1u << 15) - 1, &od->port_key_hint);
 }
 
-static char *
-chassis_redirect_name(const char *port_name)
-{
-    return xasprintf("cr-%s", port_name);
-}
-
 static bool
 ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn)
 {
@@ -1964,7 +1958,7 @@  join_logical_ports(struct northd_context *ctx,
                         continue;
                     }
 
-                    char *redirect_name = chassis_redirect_name(nbrp->name);
+                    char *redirect_name = ovn_chassis_redirect_name(nbrp->name);
                     struct ovn_port *crp = ovn_port_find(ports, redirect_name);
                     if (crp) {
                         crp->derived = true;
diff --git a/ovn-architecture.7.xml b/ovn-architecture.7.xml
index 56b2167..417acbf 100644
--- a/ovn-architecture.7.xml
+++ b/ovn-architecture.7.xml
@@ -1808,7 +1808,7 @@ 
     </li>
   </ol>
 
-  <h2>OVN Deployments Interconnection (TODO)</h2>
+  <h2>OVN Deployments Interconnection</h2>
 
   <p>
     It is not uncommon for an operator to deploy multiple OVN clusters, for
diff --git a/ovn-nb.xml b/ovn-nb.xml
index 07f60d3..250fd67 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -504,7 +504,16 @@ 
                 parent owning the <code>virtual ip</code>.
               </li>
             </ul>
-           </dd>
+          </dd>
+
+          <dt><code>remote</code></dt>
+          <dd>
+            A remote port is to model a port that resides remotely on another
+            OVN, which is on the other side of a transit logical switch for OVN
+            interconnection.  This type of ports are created by
+            <code>ovn-ic</code> instead of by CMS.  Any change to the port will
+            be automatically overwritten by <code>ovn-ic</code>.
+          </dd>
         </dl>
       </column>
     </group>