@@ -994,6 +994,7 @@ enum ovs_action_attr {
#ifndef __KERNEL__
OVS_ACTION_ATTR_TUNNEL_PUSH, /* struct ovs_action_push_tnl*/
OVS_ACTION_ATTR_TUNNEL_POP, /* u32 port number. */
+ OVS_ACTION_ATTR_LB_OUTPUT, /* u32 bond-id */
#endif
__OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
* from userspace. */
@@ -79,6 +79,7 @@
#include "unixctl.h"
#include "util.h"
#include "uuid.h"
+#include "ofproto/bond.h"
VLOG_DEFINE_THIS_MODULE(dpif_netdev);
@@ -366,6 +367,11 @@ struct dp_netdev {
struct conntrack *conntrack;
struct pmd_auto_lb pmd_alb;
+ /* Bonds.
+ *
+ * Any lookup into 'bonds' requires taking 'bond_mutex'. */
+ struct ovs_mutex bond_mutex;
+ struct hmap bonds;
};
static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
@@ -596,6 +602,20 @@ struct tx_port {
struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST];
};
+/* Contained by struct tx_bond 'slave_buckets' */
+struct slave_entry {
+ odp_port_t slave_id;
+ atomic_ullong n_packets;
+ atomic_ullong n_bytes;
+};
+
+/* Contained by struct dp_netdev_pmd_thread's 'bond_cache' or 'tx_bonds'. */
+struct tx_bond {
+ struct hmap_node node;
+ uint32_t bond_id;
+ struct slave_entry slave_buckets[BOND_BUCKETS];
+};
+
/* A set of properties for the current processing loop that is not directly
* associated with the pmd thread itself, but with the packets being
* processed or the short-term system configuration (for example, time).
@@ -707,7 +727,6 @@ struct dp_netdev_pmd_thread {
atomic_bool wait_for_reload; /* Can we busy wait for the next reload? */
atomic_bool reload_tx_qid; /* Do we need to reload static_tx_qid? */
atomic_bool exit; /* For terminating the pmd thread. */
-
pthread_t thread;
unsigned core_id; /* CPU core id of this pmd thread. */
int numa_id; /* numa node id of this pmd thread. */
@@ -728,6 +747,11 @@ struct dp_netdev_pmd_thread {
* read by the pmd thread. */
struct hmap tx_ports OVS_GUARDED;
+ struct ovs_mutex bond_mutex; /* Mutex for 'tx_bonds'. */
+ /* Map of 'tx_bond's used for transmission. Written by the main thread,
+ * read/written by the pmd thread. */
+ struct hmap tx_bonds OVS_GUARDED;
+
/* These are thread-local copies of 'tx_ports'. One contains only tunnel
* ports (that support push_tunnel/pop_tunnel), the other contains ports
* with at least one txq (that support send). A port can be in both.
@@ -740,6 +764,8 @@ struct dp_netdev_pmd_thread {
* other instance will only be accessed by its own pmd thread. */
struct hmap tnl_port_cache;
struct hmap send_port_cache;
+ /* These are thread-local copies of 'tx_bonds' */
+ struct hmap bond_cache;
/* Keep track of detailed PMD performance statistics. */
struct pmd_perf_stats perf_stats;
@@ -819,6 +845,12 @@ static void dp_netdev_del_rxq_from_pmd(struct dp_netdev_pmd_thread *pmd,
static int
dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd,
bool force);
+static void dp_netdev_add_bond_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
+ struct tx_bond *bond)
+ OVS_REQUIRES(pmd->bond_mutex);
+static void dp_netdev_del_bond_tx_from_pmd(struct dp_netdev_pmd_thread *pmd,
+ struct tx_bond *tx)
+ OVS_REQUIRES(pmd->bond_mutex);
static void reconfigure_datapath(struct dp_netdev *dp)
OVS_REQUIRES(dp->port_mutex);
@@ -827,6 +859,10 @@ static void dp_netdev_pmd_unref(struct dp_netdev_pmd_thread *pmd);
static void dp_netdev_pmd_flow_flush(struct dp_netdev_pmd_thread *pmd);
static void pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
OVS_REQUIRES(pmd->port_mutex);
+static void pmd_load_cached_bonds(struct dp_netdev_pmd_thread *pmd)
+ OVS_REQUIRES(pmd->bond_mutex);
+static void pmd_load_bond_cache(struct dp_netdev_pmd_thread *pmd);
+
static inline void
dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
struct polled_queue *poll_list, int poll_cnt);
@@ -1385,6 +1421,45 @@ pmd_perf_show_cmd(struct unixctl_conn *conn, int argc,
par.command_type = PMD_INFO_PERF_SHOW;
dpif_netdev_pmd_info(conn, argc, argv, &par);
}
+
+static void
+dpif_netdev_dp_bond_show(struct unixctl_conn *conn, int argc,
+ const char *argv[], void *aux OVS_UNUSED)
+{
+ struct ds reply = DS_EMPTY_INITIALIZER;
+
+ ovs_mutex_lock(&dp_netdev_mutex);
+
+ struct dp_netdev *dp = NULL;
+ if (argc == 2) {
+ dp = shash_find_data(&dp_netdevs, argv[1]);
+ } else if (shash_count(&dp_netdevs) == 1) {
+ /* There's only one datapath */
+ dp = shash_first(&dp_netdevs)->data;
+ }
+ if (!dp) {
+ ovs_mutex_unlock(&dp_netdev_mutex);
+ unixctl_command_reply_error(conn,
+ "please specify an existing datapath");
+ return;
+ }
+ ds_put_cstr(&reply, "\nBonds:\n");
+
+ struct tx_bond *dp_bond_entry;
+ HMAP_FOR_EACH (dp_bond_entry, node, &dp->bonds) {
+ ds_put_format(&reply, "\tbond-id %u :\n",
+ dp_bond_entry->bond_id);
+ for (int bucket = 0; bucket < BOND_BUCKETS; bucket++) {
+ ds_put_format(&reply, "\t\tbucket %u - slave %u \n",
+ bucket,
+ dp_bond_entry->slave_buckets[bucket].slave_id);
+ }
+ }
+ ovs_mutex_unlock(&dp_netdev_mutex);
+ unixctl_command_reply(conn, ds_cstr(&reply));
+ ds_destroy(&reply);
+}
+
static int
dpif_netdev_init(void)
@@ -1416,6 +1491,9 @@ dpif_netdev_init(void)
"[-us usec] [-q qlen]",
0, 10, pmd_perf_log_set_cmd,
NULL);
+ unixctl_command_register("dpif-netdev/dp-bond-show", "[dp]",
+ 0, 1, dpif_netdev_dp_bond_show,
+ NULL);
return 0;
}
@@ -1540,6 +1618,9 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
ovs_mutex_init(&dp->port_mutex);
hmap_init(&dp->ports);
dp->port_seq = seq_create();
+ ovs_mutex_init(&dp->bond_mutex);
+ hmap_init(&dp->bonds);
+
fat_rwlock_init(&dp->upcall_rwlock);
dp->reconfigure_seq = seq_create();
@@ -1663,6 +1744,14 @@ dp_netdev_free(struct dp_netdev *dp)
}
ovs_mutex_unlock(&dp->port_mutex);
+ ovs_mutex_lock(&dp->bond_mutex);
+ struct tx_bond *bond, *next_bond;
+ HMAP_FOR_EACH_SAFE (bond, next_bond, node, &dp->bonds) {
+ hmap_remove(&dp->bonds, &bond->node);
+ free(bond);
+ }
+ ovs_mutex_unlock(&dp->bond_mutex);
+
dp_netdev_destroy_all_pmds(dp, true);
cmap_destroy(&dp->poll_threads);
@@ -1681,6 +1770,9 @@ dp_netdev_free(struct dp_netdev *dp)
hmap_destroy(&dp->ports);
ovs_mutex_destroy(&dp->port_mutex);
+ hmap_destroy(&dp->bonds);
+ ovs_mutex_destroy(&dp->bond_mutex);
+
/* Upcalls must be disabled at this point */
dp_netdev_destroy_upcall_lock(dp);
@@ -1784,6 +1876,7 @@ dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread *pmd)
ovs_mutex_lock(&pmd->port_mutex);
pmd_load_cached_ports(pmd);
ovs_mutex_unlock(&pmd->port_mutex);
+ pmd_load_bond_cache(pmd);
ovs_mutex_unlock(&pmd->dp->non_pmd_mutex);
return;
}
@@ -1798,6 +1891,12 @@ hash_port_no(odp_port_t port_no)
return hash_int(odp_to_u32(port_no), 0);
}
+static uint32_t
+hash_bond_id(uint32_t bond_id)
+{
+ return hash_int(bond_id, 0);
+}
+
static int
port_create(const char *devname, const char *type,
odp_port_t port_no, struct dp_netdev_port **portp)
@@ -4327,6 +4426,19 @@ tx_port_lookup(const struct hmap *hmap, odp_port_t port_no)
return NULL;
}
+static struct tx_bond *
+tx_bond_lookup(const struct hmap *hmap, uint32_t bond_id)
+{
+ struct tx_bond *tx;
+
+ HMAP_FOR_EACH_IN_BUCKET (tx, node, hash_bond_id(bond_id), hmap) {
+ if (tx->bond_id == bond_id) {
+ return tx;
+ }
+ }
+ return NULL;
+}
+
static int
port_reconfigure(struct dp_netdev_port *port)
{
@@ -4815,6 +4927,26 @@ pmd_remove_stale_ports(struct dp_netdev *dp,
ovs_mutex_unlock(&pmd->port_mutex);
}
+static void
+pmd_remove_stale_bonds(struct dp_netdev *dp,
+ struct dp_netdev_pmd_thread *pmd)
+ OVS_EXCLUDED(pmd->bond_mutex)
+ OVS_EXCLUDED(dp->bond_mutex)
+{
+ ovs_mutex_lock(&dp->bond_mutex);
+ ovs_mutex_lock(&pmd->bond_mutex);
+
+ struct tx_bond *tx, *tx_next;
+ HMAP_FOR_EACH_SAFE (tx, tx_next, node, &pmd->tx_bonds) {
+ if (!tx_bond_lookup(&dp->bonds, tx->bond_id)) {
+ dp_netdev_del_bond_tx_from_pmd(pmd, tx);
+ }
+ }
+
+ ovs_mutex_unlock(&pmd->bond_mutex);
+ ovs_mutex_unlock(&dp->bond_mutex);
+}
+
/* Must be called each time a port is added/removed or the cmask changes.
* This creates and destroys pmd threads, reconfigures ports, opens their
* rxqs and assigns all rxqs/txqs to pmd threads. */
@@ -4853,10 +4985,11 @@ reconfigure_datapath(struct dp_netdev *dp)
}
}
- /* Remove from the pmd threads all the ports that have been deleted or
- * need reconfiguration. */
+ /* Remove from the pmd threads all the ports/bonds that have been deleted
+ * or need reconfiguration. */
CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
pmd_remove_stale_ports(dp, pmd);
+ pmd_remove_stale_bonds(dp, pmd);
}
/* Reload affected pmd threads. We must wait for the pmd threads before
@@ -4978,6 +5111,21 @@ reconfigure_datapath(struct dp_netdev *dp)
ovs_mutex_unlock(&pmd->port_mutex);
}
+ /* Add every bond to the tx cache of every pmd thread, if it's not
+ * there already and if this pmd has at least one rxq to poll. */
+ ovs_mutex_lock(&dp->bond_mutex);
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ ovs_mutex_lock(&pmd->bond_mutex);
+ if (hmap_count(&pmd->poll_list) || pmd->core_id == NON_PMD_CORE_ID) {
+ struct tx_bond *bond;
+ HMAP_FOR_EACH (bond, node, &dp->bonds) {
+ dp_netdev_add_bond_tx_to_pmd(pmd, bond);
+ }
+ }
+ ovs_mutex_unlock(&pmd->bond_mutex);
+ }
+ ovs_mutex_unlock(&dp->bond_mutex);
+
/* Reload affected pmd threads. */
reload_affected_pmds(dp);
@@ -5236,7 +5384,6 @@ pmd_rebalance_dry_run(struct dp_netdev *dp)
return ret;
}
-
/* Return true if needs to revalidate datapath flows. */
static bool
dpif_netdev_run(struct dpif *dpif)
@@ -5406,6 +5553,56 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
}
static void
+pmd_free_cached_bonds(struct dp_netdev_pmd_thread *pmd)
+{
+ struct tx_bond *bond, *next;
+
+ /* Remove bonds from pmd which no longer exists. */
+ HMAP_FOR_EACH_SAFE (bond, next, node, &pmd->bond_cache) {
+ struct tx_bond *tx = NULL;
+
+ tx = tx_bond_lookup(&pmd->tx_bonds, bond->bond_id);
+ if (!tx) {
+ /* Bond no longer exist. Delete it from pmd. */
+ hmap_remove(&pmd->bond_cache, &bond->node);
+ free(bond);
+ }
+ }
+}
+
+/* Copies bonds from 'pmd->tx_bonds' (shared with the main thread) to
+ * 'pmd->bond_cache' (thread local) */
+static void
+pmd_load_cached_bonds(struct dp_netdev_pmd_thread *pmd)
+ OVS_REQUIRES(pmd->bond_mutex)
+{
+ pmd_free_cached_bonds(pmd);
+ hmap_shrink(&pmd->bond_cache);
+
+ struct tx_bond *tx_bond, *tx_bond_cached;
+ HMAP_FOR_EACH (tx_bond, node, &pmd->tx_bonds) {
+ /* Check if bond already exist on pmd. */
+ tx_bond_cached = tx_bond_lookup(&pmd->bond_cache, tx_bond->bond_id);
+
+ if (!tx_bond_cached) {
+ /* Create new bond entry in cache. */
+ tx_bond_cached = xmemdup(tx_bond, sizeof *tx_bond_cached);
+ hmap_insert(&pmd->bond_cache, &tx_bond_cached->node,
+ hash_bond_id(tx_bond_cached->bond_id));
+ } else {
+ /* Update the slave-map. */
+ for (int bucket = 0; bucket <= BOND_MASK; bucket++) {
+ tx_bond_cached->slave_buckets[bucket].slave_id =
+ tx_bond->slave_buckets[bucket].slave_id;
+ }
+ }
+ VLOG_DBG("Caching bond-id %d pmd %d",
+ tx_bond_cached->bond_id, pmd->core_id);
+ }
+}
+
+
+static void
pmd_alloc_static_tx_qid(struct dp_netdev_pmd_thread *pmd)
{
ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex);
@@ -5427,6 +5624,14 @@ pmd_free_static_tx_qid(struct dp_netdev_pmd_thread *pmd)
ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex);
}
+static void
+pmd_load_bond_cache(struct dp_netdev_pmd_thread *pmd)
+{
+ ovs_mutex_lock(&pmd->bond_mutex);
+ pmd_load_cached_bonds(pmd);
+ ovs_mutex_unlock(&pmd->bond_mutex);
+}
+
static int
pmd_load_queues_and_ports(struct dp_netdev_pmd_thread *pmd,
struct polled_queue **ppoll_list)
@@ -5454,6 +5659,8 @@ pmd_load_queues_and_ports(struct dp_netdev_pmd_thread *pmd,
ovs_mutex_unlock(&pmd->port_mutex);
+ pmd_load_bond_cache(pmd);
+
*ppoll_list = poll_list;
return i;
}
@@ -6016,6 +6223,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
atomic_init(&pmd->reload, false);
ovs_mutex_init(&pmd->flow_mutex);
ovs_mutex_init(&pmd->port_mutex);
+ ovs_mutex_init(&pmd->bond_mutex);
cmap_init(&pmd->flow_table);
cmap_init(&pmd->classifiers);
pmd->ctx.last_rxq = NULL;
@@ -6026,6 +6234,8 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
hmap_init(&pmd->tx_ports);
hmap_init(&pmd->tnl_port_cache);
hmap_init(&pmd->send_port_cache);
+ hmap_init(&pmd->tx_bonds);
+ hmap_init(&pmd->bond_cache);
/* init the 'flow_cache' since there is no
* actual thread created for NON_PMD_CORE_ID. */
if (core_id == NON_PMD_CORE_ID) {
@@ -6046,6 +6256,8 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
hmap_destroy(&pmd->send_port_cache);
hmap_destroy(&pmd->tnl_port_cache);
hmap_destroy(&pmd->tx_ports);
+ hmap_destroy(&pmd->bond_cache);
+ hmap_destroy(&pmd->tx_bonds);
hmap_destroy(&pmd->poll_list);
/* All flows (including their dpcls_rules) have been deleted already */
CMAP_FOR_EACH (cls, node, &pmd->classifiers) {
@@ -6057,6 +6269,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
ovs_mutex_destroy(&pmd->flow_mutex);
seq_destroy(pmd->reload_seq);
ovs_mutex_destroy(&pmd->port_mutex);
+ ovs_mutex_destroy(&pmd->bond_mutex);
free(pmd);
}
@@ -6210,6 +6423,45 @@ dp_netdev_del_port_tx_from_pmd(struct dp_netdev_pmd_thread *pmd,
free(tx);
pmd->need_reload = true;
}
+
+static void
+dp_netdev_add_bond_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
+ struct tx_bond *bond)
+ OVS_REQUIRES(pmd->bond_mutex)
+{
+ bool reload = false;
+ struct tx_bond *tx = tx_bond_lookup(&pmd->tx_bonds, bond->bond_id);
+ if (tx) {
+ /* Check if mapping is changed. */
+ for (int i = 0; i <= BOND_MASK; i++) {
+ if (bond->slave_buckets[i].slave_id !=
+ tx->slave_buckets[i].slave_id) {
+ /* Mapping is modified. Reload pmd bond cache again. */
+ reload = true;
+ }
+ /* Copy the map always. */
+ tx->slave_buckets[i].slave_id = bond->slave_buckets[i].slave_id;
+ }
+ } else {
+ tx = xmemdup(bond, sizeof *tx);
+ hmap_insert(&pmd->tx_bonds, &tx->node, hash_bond_id(bond->bond_id));
+ reload = true;
+ }
+ if (reload) {
+ pmd->need_reload = true;
+ }
+}
+
+/* Del 'tx' from the tx bond cache of 'pmd' */
+static void
+dp_netdev_del_bond_tx_from_pmd(struct dp_netdev_pmd_thread *pmd,
+ struct tx_bond *tx)
+ OVS_REQUIRES(pmd->bond_mutex)
+{
+ hmap_remove(&pmd->tx_bonds, &tx->node);
+ free(tx);
+ pmd->need_reload = true;
+}
static char *
dpif_netdev_get_datapath_version(void)
@@ -6982,6 +7234,13 @@ pmd_send_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
return tx_port_lookup(&pmd->send_port_cache, port_no);
}
+static struct tx_bond *
+pmd_tx_bond_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
+ uint32_t bond_id)
+{
+ return tx_bond_lookup(&pmd->bond_cache, bond_id);
+}
+
static int
push_tnl_action(const struct dp_netdev_pmd_thread *pmd,
const struct nlattr *attr,
@@ -7031,6 +7290,91 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd,
}
}
+static bool
+dp_execute_output_action(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets_,
+ bool should_steal, odp_port_t port_no)
+{
+ struct tx_port *p = pmd_send_port_cache_lookup(pmd, port_no);
+ if (!OVS_LIKELY(p)) {
+ return false;
+ }
+
+ struct dp_packet_batch out;
+ if (!should_steal) {
+ dp_packet_batch_clone(&out, packets_);
+ dp_packet_batch_reset_cutlen(packets_);
+ packets_ = &out;
+ }
+ dp_packet_batch_apply_cutlen(packets_);
+#ifdef DPDK_NETDEV
+ if (OVS_UNLIKELY(!dp_packet_batch_is_empty(&p->output_pkts)
+ && packets_->packets[0]->source
+ != p->output_pkts.packets[0]->source)) {
+ /* netdev-dpdk assumes that all packets in a single
+ * output batch has the same source. Flush here to
+ * avoid memory access issues. */
+ dp_netdev_pmd_flush_output_on_port(pmd, p);
+ }
+#endif
+ if (dp_packet_batch_size(&p->output_pkts)
+ + dp_packet_batch_size(packets_) > NETDEV_MAX_BURST) {
+ /* Flush here to avoid overflow. */
+ dp_netdev_pmd_flush_output_on_port(pmd, p);
+ }
+ if (dp_packet_batch_is_empty(&p->output_pkts)) {
+ pmd->n_output_batches++;
+ }
+
+ struct dp_packet *packet;
+ DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) {
+ p->output_pkts_rxqs[dp_packet_batch_size(&p->output_pkts)] =
+ pmd->ctx.last_rxq;
+ dp_packet_batch_add(&p->output_pkts, packet);
+ }
+ return true;
+}
+
+static bool
+dp_execute_lb_output_action(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets_,
+ bool should_steal, uint32_t bond)
+{
+ struct tx_bond *p_bond = pmd_tx_bond_cache_lookup(pmd, bond);
+ if (!p_bond) {
+ return false;
+ }
+
+ struct dp_packet_batch del_pkts;
+ dp_packet_batch_init(&del_pkts);
+
+ struct dp_packet *packet;
+ DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) {
+ /*
+ * Lookup the bond-hash table using hash to get the slave.
+ */
+ uint32_t hash = dp_packet_get_rss_hash(packet);
+ struct slave_entry *s_entry = &p_bond->slave_buckets[hash & BOND_MASK];
+ odp_port_t bond_member = s_entry->slave_id;
+ uint32_t size = dp_packet_size(packet);
+
+ struct dp_packet_batch output_pkt;
+ dp_packet_batch_init_packet(&output_pkt, packet);
+ if (OVS_LIKELY(dp_execute_output_action(pmd, &output_pkt, should_steal,
+ bond_member))) {
+ /* Update slave stats. */
+ non_atomic_ullong_add(&s_entry->n_packets, 1);
+ non_atomic_ullong_add(&s_entry->n_bytes, size);
+ } else {
+ dp_packet_batch_add(&del_pkts, packet);
+ }
+ }
+
+ /* Delete packets that failed OUTPUT action */
+ dp_packet_delete_batch(&del_pkts, should_steal);
+ return true;
+}
+
static void
dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
const struct nlattr *a, bool should_steal)
@@ -7045,43 +7389,15 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
switch ((enum ovs_action_attr)type) {
case OVS_ACTION_ATTR_OUTPUT:
- p = pmd_send_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
- if (OVS_LIKELY(p)) {
- struct dp_packet *packet;
- struct dp_packet_batch out;
-
- if (!should_steal) {
- dp_packet_batch_clone(&out, packets_);
- dp_packet_batch_reset_cutlen(packets_);
- packets_ = &out;
- }
- dp_packet_batch_apply_cutlen(packets_);
-
-#ifdef DPDK_NETDEV
- if (OVS_UNLIKELY(!dp_packet_batch_is_empty(&p->output_pkts)
- && packets_->packets[0]->source
- != p->output_pkts.packets[0]->source)) {
- /* XXX: netdev-dpdk assumes that all packets in a single
- * output batch has the same source. Flush here to
- * avoid memory access issues. */
- dp_netdev_pmd_flush_output_on_port(pmd, p);
- }
-#endif
- if (dp_packet_batch_size(&p->output_pkts)
- + dp_packet_batch_size(packets_) > NETDEV_MAX_BURST) {
- /* Flush here to avoid overflow. */
- dp_netdev_pmd_flush_output_on_port(pmd, p);
- }
-
- if (dp_packet_batch_is_empty(&p->output_pkts)) {
- pmd->n_output_batches++;
- }
+ if (dp_execute_output_action(pmd, packets_, should_steal,
+ nl_attr_get_odp_port(a))) {
+ return;
+ }
+ break;
- DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) {
- p->output_pkts_rxqs[dp_packet_batch_size(&p->output_pkts)] =
- pmd->ctx.last_rxq;
- dp_packet_batch_add(&p->output_pkts, packet);
- }
+ case OVS_ACTION_ATTR_LB_OUTPUT:
+ if (dp_execute_lb_output_action(pmd, packets_, should_steal,
+ nl_attr_get_u32(a))) {
return;
}
break;
@@ -7615,6 +7931,109 @@ dpif_netdev_ipf_dump_done(struct dpif *dpif OVS_UNUSED, void *ipf_dump_ctx)
}
+static int
+dpif_netdev_bond_add(struct dpif *dpif, uint32_t bond_id,
+ odp_port_t slave_map[])
+{
+ struct dp_netdev *dp = get_dp_netdev(dpif);
+
+ ovs_mutex_lock(&dp->bond_mutex);
+ /*
+ * Lookup for the bond. If already exists, just update the slave-map.
+ * Else create new.
+ */
+ struct tx_bond *dp_bond_entry = tx_bond_lookup(&dp->bonds, bond_id);
+ if (dp_bond_entry) {
+ for (int bucket = 0; bucket <= BOND_MASK; bucket++) {
+ dp_bond_entry->slave_buckets[bucket].slave_id = slave_map[bucket];
+ }
+
+ struct dp_netdev_pmd_thread *pmd;
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ ovs_mutex_lock(&pmd->bond_mutex);
+ dp_netdev_add_bond_tx_to_pmd(pmd, dp_bond_entry);
+ ovs_mutex_unlock(&pmd->bond_mutex);
+ }
+ } else {
+ struct tx_bond *dp_bond = xzalloc(sizeof *dp_bond);
+ dp_bond->bond_id = bond_id;
+ for (int bucket = 0; bucket < BOND_BUCKETS; bucket++) {
+ dp_bond->slave_buckets[bucket].slave_id = slave_map[bucket];
+ }
+ hmap_insert(&dp->bonds, &dp_bond->node,
+ hash_bond_id(dp_bond->bond_id));
+
+ /* Insert the bond map in all pmds. */
+ struct dp_netdev_pmd_thread *pmd;
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ ovs_mutex_lock(&pmd->bond_mutex);
+ dp_netdev_add_bond_tx_to_pmd(pmd, dp_bond);
+ ovs_mutex_unlock(&pmd->bond_mutex);
+ }
+ }
+ ovs_mutex_unlock(&dp->bond_mutex);
+ return 0;
+}
+
+static int
+dpif_netdev_bond_del(struct dpif *dpif, uint32_t bond_id)
+{
+ struct dp_netdev *dp = get_dp_netdev(dpif);
+
+ ovs_mutex_lock(&dp->bond_mutex);
+
+ /* Find the bond and delete it if present */
+ struct tx_bond *dp_bond_entry = tx_bond_lookup(&dp->bonds, bond_id);
+ if (dp_bond_entry) {
+ /* Remove the bond map in all pmds. */
+ struct dp_netdev_pmd_thread *pmd;
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ ovs_mutex_lock(&pmd->bond_mutex);
+ dp_netdev_del_bond_tx_from_pmd(pmd, dp_bond_entry);
+ ovs_mutex_unlock(&pmd->bond_mutex);
+ }
+ hmap_remove(&dp->bonds, &dp_bond_entry->node);
+ free(dp_bond_entry);
+ }
+
+ ovs_mutex_unlock(&dp->bond_mutex);
+ return 0;
+}
+
+static int
+dpif_netdev_bond_stats_get(struct dpif *dpif, uint32_t bond_id,
+ uint64_t *n_bytes)
+{
+ struct dp_netdev *dp = get_dp_netdev(dpif);
+
+ ovs_mutex_lock(&dp->bond_mutex);
+
+ /* Find the bond and retrieve stats if present */
+ struct tx_bond *dp_bond_entry = tx_bond_lookup(&dp->bonds, bond_id);
+ if (dp_bond_entry) {
+ /* Search the bond in all PMDs */
+ struct dp_netdev_pmd_thread *pmd;
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ ovs_mutex_lock(&pmd->bond_mutex);
+ struct tx_bond *pmd_bond_entry
+ = tx_bond_lookup(&pmd->bond_cache, bond_id);
+ if (pmd_bond_entry) {
+ /* Read bond stats. */
+ for (int i = 0; i <= BOND_MASK; i++) {
+ uint64_t pmd_n_bytes;
+ atomic_read_relaxed(
+ &pmd_bond_entry->slave_buckets[i].n_bytes,
+ &pmd_n_bytes);
+ n_bytes[i] += pmd_n_bytes;
+ }
+ }
+ ovs_mutex_unlock(&pmd->bond_mutex);
+ }
+ }
+ ovs_mutex_unlock(&dp->bond_mutex);
+ return 0;
+}
+
const struct dpif_class dpif_netdev_class = {
"netdev",
true, /* cleanup_required */
@@ -7687,6 +8106,9 @@ const struct dpif_class dpif_netdev_class = {
dpif_netdev_meter_set,
dpif_netdev_meter_get,
dpif_netdev_meter_del,
+ dpif_netdev_bond_add,
+ dpif_netdev_bond_del,
+ dpif_netdev_bond_stats_get,
};
static void
@@ -3945,6 +3945,9 @@ const struct dpif_class dpif_netlink_class = {
dpif_netlink_meter_set,
dpif_netlink_meter_get,
dpif_netlink_meter_del,
+ NULL, /* bond_add */
+ NULL, /* bond_del */
+ NULL, /* bond_stats_get */
};
static int
@@ -614,6 +614,15 @@ struct dpif_class {
* zero. */
int (*meter_del)(struct dpif *, ofproto_meter_id meter_id,
struct ofputil_meter_stats *, uint16_t n_bands);
+
+ /* Adds a bond with 'bond_id' and the slave-map to 'dpif'. */
+ int (*bond_add)(struct dpif *dpif, uint32_t bond_id,
+ odp_port_t slave_map[]);
+ /* Removes bond identified by 'bond_id' from 'dpif'. */
+ int (*bond_del)(struct dpif *dpif, uint32_t bond_id);
+ /* Reads bond stats from 'dpif'. */
+ int (*bond_stats_get)(struct dpif *dpif, uint32_t bond_id,
+ uint64_t *n_bytes);
};
extern const struct dpif_class dpif_netlink_class;
@@ -1177,6 +1177,7 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet_batch *packets_,
case OVS_ACTION_ATTR_CT:
case OVS_ACTION_ATTR_OUTPUT:
+ case OVS_ACTION_ATTR_LB_OUTPUT:
case OVS_ACTION_ATTR_TUNNEL_PUSH:
case OVS_ACTION_ATTR_TUNNEL_POP:
case OVS_ACTION_ATTR_USERSPACE:
@@ -1227,6 +1228,7 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet_batch *packets_,
struct dp_packet *clone = NULL;
uint32_t cutlen = dp_packet_get_cutlen(packet);
if (cutlen && (type == OVS_ACTION_ATTR_OUTPUT
+ || type == OVS_ACTION_ATTR_LB_OUTPUT
|| type == OVS_ACTION_ATTR_TUNNEL_PUSH
|| type == OVS_ACTION_ATTR_TUNNEL_POP
|| type == OVS_ACTION_ATTR_USERSPACE)) {
@@ -1879,6 +1881,16 @@ dpif_supports_tnl_push_pop(const struct dpif *dpif)
return dpif_is_netdev(dpif);
}
+bool
+dpif_supports_lb_output_action(const struct dpif *dpif)
+{
+ /*
+ * Balance-tcp optimization is currently supported in netdev
+ * datapath only.
+ */
+ return dpif_is_netdev(dpif);
+}
+
/* Meters */
void
dpif_meter_get_features(const struct dpif *dpif,
@@ -1976,3 +1988,40 @@ dpif_meter_del(struct dpif *dpif, ofproto_meter_id meter_id,
}
return error;
}
+
+int
+dpif_bond_add(struct dpif *dpif, uint32_t bond_id, odp_port_t slave_map[])
+{
+ int error = -1;
+
+ if (dpif && dpif->dpif_class && dpif->dpif_class->bond_del) {
+ error = dpif->dpif_class->bond_add(dpif, bond_id, slave_map);
+ }
+
+ return error;
+}
+
+int
+dpif_bond_del(struct dpif *dpif, uint32_t bond_id)
+{
+ int error = -1;
+
+ if (dpif && dpif->dpif_class && dpif->dpif_class->bond_del) {
+ error = dpif->dpif_class->bond_del(dpif, bond_id);
+ }
+
+ return error;
+}
+
+int
+dpif_bond_stats_get(struct dpif *dpif, uint32_t bond_id,
+ uint64_t *n_bytes)
+{
+ int error = -1;
+
+ if (dpif && dpif->dpif_class && dpif->dpif_class->bond_stats_get) {
+ error = dpif->dpif_class->bond_stats_get(dpif, bond_id, n_bytes);
+ }
+
+ return error;
+}
@@ -905,6 +905,13 @@ int dpif_get_pmds_for_port(const struct dpif * dpif, odp_port_t port_no,
char *dpif_get_dp_version(const struct dpif *);
bool dpif_supports_tnl_push_pop(const struct dpif *);
+bool dpif_supports_lb_output_action(const struct dpif *);
+
+int dpif_bond_add(struct dpif *dpif, uint32_t bond_id, odp_port_t slave_map[]);
+int dpif_bond_del(struct dpif *dpif, uint32_t bond_id);
+int dpif_bond_stats_get(struct dpif *dpif, uint32_t bond_id,
+ uint64_t *n_bytes);
+
/* Log functions. */
struct vlog_module;
@@ -725,6 +725,7 @@ requires_datapath_assistance(const struct nlattr *a)
switch (type) {
/* These only make sense in the context of a datapath. */
case OVS_ACTION_ATTR_OUTPUT:
+ case OVS_ACTION_ATTR_LB_OUTPUT:
case OVS_ACTION_ATTR_TUNNEL_PUSH:
case OVS_ACTION_ATTR_TUNNEL_POP:
case OVS_ACTION_ATTR_USERSPACE:
@@ -990,6 +991,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
break;
case OVS_ACTION_ATTR_OUTPUT:
+ case OVS_ACTION_ATTR_LB_OUTPUT:
case OVS_ACTION_ATTR_TUNNEL_PUSH:
case OVS_ACTION_ATTR_TUNNEL_POP:
case OVS_ACTION_ATTR_USERSPACE:
@@ -119,6 +119,7 @@ odp_action_len(uint16_t type)
switch ((enum ovs_action_attr) type) {
case OVS_ACTION_ATTR_OUTPUT: return sizeof(uint32_t);
+ case OVS_ACTION_ATTR_LB_OUTPUT: return sizeof(uint32_t);
case OVS_ACTION_ATTR_TRUNC: return sizeof(struct ovs_action_trunc);
case OVS_ACTION_ATTR_TUNNEL_PUSH: return ATTR_LEN_VARIABLE;
case OVS_ACTION_ATTR_TUNNEL_POP: return sizeof(uint32_t);
@@ -1121,6 +1122,9 @@ format_odp_action(struct ds *ds, const struct nlattr *a,
case OVS_ACTION_ATTR_OUTPUT:
odp_portno_name_format(portno_names, nl_attr_get_odp_port(a), ds);
break;
+ case OVS_ACTION_ATTR_LB_OUTPUT:
+ ds_put_format(ds, "lb_output(bond,%"PRIu32")", nl_attr_get_u32(a));
+ break;
case OVS_ACTION_ATTR_TRUNC: {
const struct ovs_action_trunc *trunc =
nl_attr_get_unspec(a, sizeof *trunc);
@@ -54,10 +54,6 @@ static struct ovs_rwlock rwlock = OVS_RWLOCK_INITIALIZER;
static struct hmap all_bonds__ = HMAP_INITIALIZER(&all_bonds__);
static struct hmap *const all_bonds OVS_GUARDED_BY(rwlock) = &all_bonds__;
-/* Bit-mask for hashing a flow down to a bucket. */
-#define BOND_MASK 0xff
-#define BOND_BUCKETS (BOND_MASK + 1)
-
/* Priority for internal rules created to handle recirculation */
#define RECIRC_RULE_PRIORITY 20
@@ -126,6 +122,8 @@ struct bond {
enum lacp_status lacp_status; /* Status of LACP negotiations. */
bool bond_revalidate; /* True if flows need revalidation. */
uint32_t basis; /* Basis for flow hash function. */
+ bool use_bond_cache; /* Use bond cache to avoid recirculation.
+ Applicable only for Balance TCP mode. */
/* SLB specific bonding info. */
struct bond_entry *hash; /* An array of BOND_BUCKETS elements. */
@@ -185,7 +183,7 @@ static struct bond_slave *choose_output_slave(const struct bond *,
struct flow_wildcards *,
uint16_t vlan)
OVS_REQ_RDLOCK(rwlock);
-static void update_recirc_rules__(struct bond *bond);
+static void update_recirc_rules__(struct bond *bond, uint32_t bond_recirc_id);
static bool bond_is_falling_back_to_ab(const struct bond *);
/* Attempts to parse 's' as the name of a bond balancing mode. If successful,
@@ -262,6 +260,7 @@ void
bond_unref(struct bond *bond)
{
struct bond_slave *slave;
+ uint32_t bond_recirc_id = 0;
if (!bond || ovs_refcount_unref_relaxed(&bond->ref_cnt) != 1) {
return;
@@ -282,12 +281,13 @@ bond_unref(struct bond *bond)
/* Free bond resources. Remove existing post recirc rules. */
if (bond->recirc_id) {
+ bond_recirc_id = bond->recirc_id;
recirc_free_id(bond->recirc_id);
bond->recirc_id = 0;
}
free(bond->hash);
bond->hash = NULL;
- update_recirc_rules__(bond);
+ update_recirc_rules__(bond, bond_recirc_id);
hmap_destroy(&bond->pr_rule_ops);
free(bond->name);
@@ -328,13 +328,14 @@ add_pr_rule(struct bond *bond, const struct match *match,
* lock annotation. Currently, only 'bond_unref()' calls
* this function directly. */
static void
-update_recirc_rules__(struct bond *bond)
+update_recirc_rules__(struct bond *bond, uint32_t bond_recirc_id)
{
struct match match;
struct bond_pr_rule_op *pr_op, *next_op;
uint64_t ofpacts_stub[128 / 8];
struct ofpbuf ofpacts;
int i;
+ ofp_port_t slave_map[BOND_MASK];
ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
@@ -353,8 +354,18 @@ update_recirc_rules__(struct bond *bond)
add_pr_rule(bond, &match, slave->ofp_port,
&bond->hash[i].pr_rule);
+ slave_map[i] = slave->ofp_port;
+ } else {
+ slave_map[i] = OFP_PORT_C(-1);
}
}
+ if (bond->ofproto->backer->rt_support.lb_output_action) {
+ ofproto_dpif_bundle_add(bond->ofproto, bond->recirc_id, slave_map);
+ }
+ } else {
+ if (bond->ofproto->backer->rt_support.lb_output_action) {
+ ofproto_dpif_bundle_del(bond->ofproto, bond_recirc_id);
+ }
}
HMAP_FOR_EACH_SAFE(pr_op, next_op, hmap_node, &bond->pr_rule_ops) {
@@ -404,7 +415,7 @@ static void
update_recirc_rules(struct bond *bond)
OVS_REQ_RDLOCK(rwlock)
{
- update_recirc_rules__(bond);
+ update_recirc_rules__(bond, bond->recirc_id);
}
/* Updates 'bond''s overall configuration to 's'.
@@ -467,6 +478,10 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s)
recirc_free_id(bond->recirc_id);
bond->recirc_id = 0;
}
+ if (bond->use_bond_cache != s->use_bond_cache) {
+ bond->use_bond_cache = s->use_bond_cache;
+ revalidate = true;
+ }
if (bond->balance == BM_AB || !bond->hash || revalidate) {
bond_entry_reset(bond);
@@ -944,6 +959,15 @@ bond_recirculation_account(struct bond *bond)
OVS_REQ_WRLOCK(rwlock)
{
int i;
+ uint64_t n_bytes[BOND_BUCKETS] = {0};
+
+ if (bond->hash && bond->recirc_id) {
+ /* Retrieve bond stats from datapath. */
+ if (bond->ofproto->backer->rt_support.lb_output_action) {
+ dpif_bond_stats_get(bond->ofproto->backer->dpif,
+ bond->recirc_id, n_bytes);
+ }
+ }
for (i=0; i<=BOND_MASK; i++) {
struct bond_entry *entry = &bond->hash[i];
@@ -953,8 +977,12 @@ bond_recirculation_account(struct bond *bond)
struct pkt_stats stats;
long long int used OVS_UNUSED;
- rule->ofproto->ofproto_class->rule_get_stats(
- rule, &stats, &used);
+ if (!bond->ofproto->backer->rt_support.lb_output_action) {
+ rule->ofproto->ofproto_class->rule_get_stats(
+ rule, &stats, &used);
+ } else {
+ stats.n_bytes = n_bytes[i];
+ }
bond_entry_account(entry, stats.n_bytes);
}
}
@@ -1365,6 +1393,8 @@ bond_print_details(struct ds *ds, const struct bond *bond)
may_recirc ? "yes" : "no", may_recirc ? recirc_id: -1);
ds_put_format(ds, "bond-hash-basis: %"PRIu32"\n", bond->basis);
+ ds_put_format(ds, "lb-output-action: %s\n",
+ bond->use_bond_cache ? "enabled" : "disabled");
ds_put_format(ds, "updelay: %d ms\n", bond->updelay);
ds_put_format(ds, "downdelay: %d ms\n", bond->downdelay);
@@ -1942,3 +1972,9 @@ bond_get_changed_active_slave(const char *name, struct eth_addr *mac,
return false;
}
+
+bool
+bond_get_cache_mode(const struct bond *bond)
+{
+ return bond->use_bond_cache;
+}
@@ -22,6 +22,10 @@
#include "ofproto-provider.h"
#include "packets.h"
+/* Bit-mask for hashing a flow down to a bucket. */
+#define BOND_MASK 0xff
+#define BOND_BUCKETS (BOND_MASK + 1)
+
struct flow;
struct netdev;
struct ofpbuf;
@@ -58,6 +62,8 @@ struct bond_settings {
/* The MAC address of the interface
that was active during the last
ovs run. */
+ bool use_bond_cache; /* Use bond cache. Only applicable for
+ bond mode BALANCE TCP. */
};
/* Program startup. */
@@ -122,4 +128,7 @@ void bond_rebalance(struct bond *);
*/
void bond_update_post_recirc_rules(struct bond *, uint32_t *recirc_id,
uint32_t *hash_basis);
+
+bool bond_get_cache_mode(const struct bond *);
+
#endif /* bond.h */
@@ -3016,6 +3016,7 @@ dpif_ipfix_read_actions(const struct flow *flow,
case OVS_ACTION_ATTR_POP_NSH:
case OVS_ACTION_ATTR_CHECK_PKT_LEN:
case OVS_ACTION_ATTR_UNSPEC:
+ case OVS_ACTION_ATTR_LB_OUTPUT:
case __OVS_ACTION_ATTR_MAX:
default:
break;
@@ -1177,6 +1177,7 @@ dpif_sflow_read_actions(const struct flow *flow,
case OVS_ACTION_ATTR_CT:
case OVS_ACTION_ATTR_CT_CLEAR:
case OVS_ACTION_ATTR_METER:
+ case OVS_ACTION_ATTR_LB_OUTPUT:
break;
case OVS_ACTION_ATTR_SET_MASKED:
@@ -409,6 +409,8 @@ struct xlate_ctx {
struct ofpbuf action_set; /* Action set. */
enum xlate_error error; /* Translation failed. */
+
+ bool tnl_push_no_recirc; /* Tunnel push recirculation status */
};
/* Structure to track VLAN manipulation */
@@ -3721,12 +3723,16 @@ native_tunnel_output(struct xlate_ctx *ctx, const struct xport *xport,
ctx->xin->allow_side_effects = backup_side_effects;
ctx->xin->packet = backup_packet;
ctx->wc = backup_wc;
+
+ ctx->tnl_push_no_recirc = true;
} else {
/* In order to maintain accurate stats, use recirc for
* natvie tunneling. */
nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, 0);
nl_msg_end_nested(ctx->odp_actions, clone_ofs);
- }
+
+ ctx->tnl_push_no_recirc = false;
+ }
/* Restore the flows after the translation. */
memcpy(&ctx->xin->flow, &old_flow, sizeof ctx->xin->flow);
@@ -4177,7 +4183,6 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
xlate_commit_actions(ctx);
if (xr) {
- /* Recirculate the packet. */
struct ovs_action_hash *act_hash;
/* Hash action. */
@@ -4186,15 +4191,30 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
/* Algorithm supported by all datapaths. */
hash_alg = OVS_HASH_ALG_L4;
}
- act_hash = nl_msg_put_unspec_uninit(ctx->odp_actions,
+
+ if ((xr->hash_alg == OVS_HASH_ALG_L4) &&
+ (ctx->xbridge->support.lb_output_action) &&
+ !ctx->tnl_push_no_recirc &&
+ bond_get_cache_mode(xport->xbundle->bond)) {
+ /*
+ * If bond mode is balance-tcp and optimize balance tcp
+ * is enabled then use the hash directly for slave selection
+ * and avoid recirculation.
+ *
+ * Currently support for netdev datapath only.
+ */
+ nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_LB_OUTPUT,
+ xr->recirc_id);
+ } else {
+ act_hash = nl_msg_put_unspec_uninit(ctx->odp_actions,
OVS_ACTION_ATTR_HASH,
sizeof *act_hash);
- act_hash->hash_alg = hash_alg;
- act_hash->hash_basis = xr->hash_basis;
-
- /* Recirc action. */
- nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC,
- xr->recirc_id);
+ act_hash->hash_alg = hash_alg;
+ act_hash->hash_basis = xr->hash_basis;
+ /* Recirculate the packet. */
+ nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC,
+ xr->recirc_id);
+ }
} else if (is_native_tunnel) {
/* Output to native tunnel port. */
native_tunnel_output(ctx, xport, flow, odp_port, truncate);
@@ -7249,7 +7269,8 @@ count_output_actions(const struct ofpbuf *odp_actions)
int n = 0;
NL_ATTR_FOR_EACH_UNSAFE (a, left, odp_actions->data, odp_actions->size) {
- if (a->nla_type == OVS_ACTION_ATTR_OUTPUT) {
+ if ((a->nla_type == OVS_ACTION_ATTR_OUTPUT) ||
+ (a->nla_type == OVS_ACTION_ATTR_LB_OUTPUT)) {
n++;
}
}
@@ -7446,6 +7467,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
.action_set_has_group = false,
.action_set = OFPBUF_STUB_INITIALIZER(action_set_stub),
+ .tnl_push_no_recirc = false,
};
/* 'base_flow' reflects the packet as it came in, but we need it to reflect
@@ -1584,6 +1584,8 @@ check_support(struct dpif_backer *backer)
backer->rt_support.max_hash_alg = check_max_dp_hash_alg(backer);
backer->rt_support.check_pkt_len = check_check_pkt_len(backer);
backer->rt_support.ct_timeout = check_ct_timeout_policy(backer);
+ backer->rt_support.lb_output_action=
+ dpif_supports_lb_output_action(backer->dpif);
/* Flow fields. */
backer->rt_support.odp.ct_state = check_ct_state(backer);
@@ -3437,6 +3439,27 @@ bundle_remove(struct ofport *port_)
}
}
+int
+ofproto_dpif_bundle_add(struct ofproto_dpif *ofproto,
+ uint32_t bond_id, const ofp_port_t slave_map[])
+{
+ /* Convert ofp_port to odp_port */
+ odp_port_t odp_map[BOND_BUCKETS];
+ for (int bucket = 0; bucket < BOND_BUCKETS; bucket++) {
+ odp_map[bucket] = (slave_map[bucket] == OFP_PORT_C(-1)
+ ? ODP_PORT_C(-1)
+ : ofp_port_to_odp_port(ofproto, slave_map[bucket]));
+ }
+
+ return dpif_bond_add(ofproto->backer->dpif, bond_id, odp_map);
+}
+
+int
+ofproto_dpif_bundle_del(struct ofproto_dpif *ofproto, uint32_t bond_id)
+{
+ return dpif_bond_del(ofproto->backer->dpif, bond_id);
+}
+
static void
send_pdu_cb(void *port_, const void *pdu, size_t pdu_size)
{
@@ -5566,6 +5589,7 @@ get_datapath_cap(const char *datapath_type, struct smap *cap)
smap_add_format(cap, "max_hash_alg", "%"PRIuSIZE, s.max_hash_alg);
smap_add(cap, "check_pkt_len", s.check_pkt_len ? "true" : "false");
smap_add(cap, "ct_timeout", s.ct_timeout ? "true" : "false");
+ smap_add(cap, "lb_output_action", s.lb_output_action ? "true" : "false");
}
/* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' and
@@ -199,7 +199,10 @@ struct group_dpif *group_dpif_lookup(struct ofproto_dpif *,
\
/* True if the datapath supports OVS_CT_ATTR_TIMEOUT in \
* OVS_ACTION_ATTR_CT action. */ \
- DPIF_SUPPORT_FIELD(bool, ct_timeout, "Conntrack timeout policy")
+ DPIF_SUPPORT_FIELD(bool, ct_timeout, "Conntrack timeout policy") \
+ \
+ /* True if the datapath supports balance_tcp optimization */ \
+ DPIF_SUPPORT_FIELD(bool, lb_output_action, "Optimized Balance TCP mode")
/* Stores the various features which the corresponding backer supports. */
struct dpif_backer_support {
@@ -375,6 +378,9 @@ int ofproto_dpif_add_internal_flow(struct ofproto_dpif *,
struct rule **rulep);
int ofproto_dpif_delete_internal_flow(struct ofproto_dpif *, struct match *,
int priority);
+int ofproto_dpif_bundle_add(struct ofproto_dpif *, uint32_t bond_id,
+ const ofp_port_t slave_map[]);
+int ofproto_dpif_bundle_del(struct ofproto_dpif *, uint32_t bond_id);
bool ovs_native_tunneling_is_on(struct ofproto_dpif *);
@@ -121,6 +121,7 @@ AT_CHECK([ovs-appctl bond/show], [0], [dnl
bond_mode: active-backup
bond may use recirculation: no, Recirc-ID : -1
bond-hash-basis: 0
+lb-output-action: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -286,6 +287,7 @@ slave: p3: current attached
bond_mode: balance-tcp
bond may use recirculation: yes, <del>
bond-hash-basis: 0
+lb-output-action: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -301,6 +303,7 @@ slave p1: enabled
bond_mode: balance-tcp
bond may use recirculation: yes, <del>
bond-hash-basis: 0
+lb-output-action: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -423,6 +426,7 @@ slave: p3: current attached
bond_mode: balance-tcp
bond may use recirculation: yes, <del>
bond-hash-basis: 0
+lb-output-action: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -440,6 +444,7 @@ slave p1: enabled
bond_mode: balance-tcp
bond may use recirculation: yes, <del>
bond-hash-basis: 0
+lb-output-action: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -555,6 +560,7 @@ slave: p3: current attached
bond_mode: balance-tcp
bond may use recirculation: yes, <del>
bond-hash-basis: 0
+lb-output-action: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -572,6 +578,7 @@ slave p1: enabled
bond_mode: balance-tcp
bond may use recirculation: yes, <del>
bond-hash-basis: 0
+lb-output-action: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -692,6 +699,7 @@ slave: p3: current attached
bond_mode: balance-tcp
bond may use recirculation: yes, <del>
bond-hash-basis: 0
+lb-output-action: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -709,6 +717,7 @@ slave p1: enabled
bond_mode: balance-tcp
bond may use recirculation: yes, <del>
bond-hash-basis: 0
+lb-output-action: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -4559,6 +4559,12 @@ port_configure_bond(struct port *port, struct bond_settings *s)
/* OVSDB did not store the last active interface */
s->active_slave_mac = eth_addr_zero;
}
+ /* Default set bond cache use to false. */
+ s->use_bond_cache = false;
+ if (s->balance == BM_TCP) {
+ s->use_bond_cache = smap_get_bool(&port->cfg->other_config,
+ "lb-output-action", false);
+ }
}
/* Returns true if 'port' is synthetic, that is, if we constructed it locally
@@ -1962,6 +1962,16 @@
<code>active-backup</code>.
</column>
+ <column name="other_config" key="lb-output-action"
+ type='{"type": "boolean"}'>
+ Enable/disable usage of optimized lb-output-action for
+ balancing flows among output slaves in load balanced bonds in
+ <code>balance-tcp</code>. When enabled, it uses optimized path for
+ balance-tcp mode by using rss hash and avoids recirculation.
+ It affects only new flows, i.e, existing flows remain unchanged.
+ This knob does not affect other balancing modes.
+ </column>
+
<group title="Link Failure Detection">
<p>
An important part of link bonding is detecting that links are down so
@@ -5722,6 +5732,19 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
higher performance for MPLS and active-active load balancing
bonding modes.
</column>
+ <column name="capabilities" key="lb_output_action"
+ type='{"type": "boolean"}'>
+ If this is true, then the datapath supports optimized balance-tcp
+ bond mode. This capability replaces existing <code>hash</code> and
+ <code>recirc</code> actions with new action <code>lb_output</code>
+ and avoids recirculation of packet in datapath. It is supported
+ only for balance-tcp bond mode in netdev datapath. The new action
+ gives higer performance but also requires pmd reloads whenever
+ traffic load needs to be rebalanced among bonded links. By default
+ this new action is disabled, however it can be enabled by setting
+ <ref column="other-config" key="lb-output-action"/> in
+ <ref table="Port"/> table.
+ </column>
<group title="Connection-Tracking Capabilities">
<p>
These capabilities are granular because Open vSwitch and its