@@ -734,6 +734,7 @@ enum ovs_hash_alg {
OVS_HASH_ALG_L4,
#ifndef __KERNEL__
OVS_HASH_ALG_SYM_L4,
+ OVS_HASH_ALG_L4_RSS,
#endif
__OVS_HASH_MAX
};
@@ -989,6 +990,7 @@ enum ovs_action_attr {
#ifndef __KERNEL__
OVS_ACTION_ATTR_TUNNEL_PUSH, /* struct ovs_action_push_tnl*/
OVS_ACTION_ATTR_TUNNEL_POP, /* u32 port number. */
+ OVS_ACTION_ATTR_LB_OUTPUT, /* bond-id */
#endif
__OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
* from userspace. */
@@ -79,6 +79,7 @@
#include "unixctl.h"
#include "util.h"
#include "uuid.h"
+#include "ofproto/bond.h"
VLOG_DEFINE_THIS_MODULE(dpif_netdev);
@@ -384,6 +385,11 @@ struct dp_netdev {
struct conntrack *conntrack;
struct pmd_auto_lb pmd_alb;
+ /* Bonds.
+ *
+ * Any lookup into 'bonds' requires taking 'bond_mutex'. */
+ struct ovs_mutex bond_mutex;
+ struct hmap bonds;
};
static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
@@ -614,6 +620,13 @@ struct tx_port {
struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST];
};
+/* Contained by struct dp_netdev_pmd_thread's 'bond_cache' or 'tx_bonds'. */
+struct tx_bond {
+ struct hmap_node node;
+ uint32_t bond_id;
+ uint32_t slave_map[BOND_BUCKETS];
+};
+
/* A set of properties for the current processing loop that is not directly
* associated with the pmd thread itself, but with the packets being
* processed or the short-term system configuration (for example, time).
@@ -708,6 +721,11 @@ struct dp_netdev_pmd_thread {
* read by the pmd thread. */
struct hmap tx_ports OVS_GUARDED;
+ struct ovs_mutex bond_mutex; /* Mutex for 'tx_bonds'. */
+ /* Map of 'tx_bond's used for transmission. Written by the main thread,
+ * read/written by the pmd thread. */
+ struct hmap tx_bonds OVS_GUARDED;
+
/* These are thread-local copies of 'tx_ports'. One contains only tunnel
* ports (that support push_tunnel/pop_tunnel), the other contains ports
* with at least one txq (that support send). A port can be in both.
@@ -720,6 +738,8 @@ struct dp_netdev_pmd_thread {
* other instance will only be accessed by its own pmd thread. */
struct hmap tnl_port_cache;
struct hmap send_port_cache;
+ /* These are thread-local copies of 'tx_bonds' */
+ struct hmap bond_cache;
/* Keep track of detailed PMD performance statistics. */
struct pmd_perf_stats perf_stats;
@@ -799,6 +819,12 @@ static void dp_netdev_del_rxq_from_pmd(struct dp_netdev_pmd_thread *pmd,
static int
dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd,
bool force);
+static void dp_netdev_add_bond_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
+ struct tx_bond *bond)
+ OVS_REQUIRES(pmd->bond_mutex);
+static void dp_netdev_del_bond_tx_from_pmd(struct dp_netdev_pmd_thread *pmd,
+ struct tx_bond *tx)
+ OVS_REQUIRES(pmd->bond_mutex);
static void reconfigure_datapath(struct dp_netdev *dp)
OVS_REQUIRES(dp->port_mutex);
@@ -807,6 +833,8 @@ static void dp_netdev_pmd_unref(struct dp_netdev_pmd_thread *pmd);
static void dp_netdev_pmd_flow_flush(struct dp_netdev_pmd_thread *pmd);
static void pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
OVS_REQUIRES(pmd->port_mutex);
+static void pmd_load_cached_bonds(struct dp_netdev_pmd_thread *pmd)
+ OVS_REQUIRES(pmd->bond_mutex);
static inline void
dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
struct polled_queue *poll_list, int poll_cnt);
@@ -1365,6 +1393,67 @@ pmd_perf_show_cmd(struct unixctl_conn *conn, int argc,
par.command_type = PMD_INFO_PERF_SHOW;
dpif_netdev_pmd_info(conn, argc, argv, &par);
}
+
+static void
+dpif_netdev_pmd_bond_show(struct unixctl_conn *conn, int argc,
+ const char *argv[], void *aux OVS_UNUSED)
+{
+ struct ds reply = DS_EMPTY_INITIALIZER;
+ struct dp_netdev_pmd_thread *pmd;
+ struct dp_netdev *dp = NULL;
+ uint32_t bucket;
+ struct tx_bond *pmd_bond_cache_entry = NULL;
+ struct tx_bond *pmd_bond_entry = NULL;
+
+ ovs_mutex_lock(&dp_netdev_mutex);
+
+ if (argc == 2) {
+ dp = shash_find_data(&dp_netdevs, argv[1]);
+ } else if (shash_count(&dp_netdevs) == 1) {
+ /* There's only one datapath */
+ dp = shash_first(&dp_netdevs)->data;
+ }
+ if (!dp) {
+ ovs_mutex_unlock(&dp_netdev_mutex);
+ unixctl_command_reply_error(conn,
+ "please specify an existing datapath");
+ return;
+ }
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ ds_put_cstr(&reply, (pmd->core_id == NON_PMD_CORE_ID)
+ ? "main thread" : "pmd thread");
+ if (pmd->numa_id != OVS_NUMA_UNSPEC) {
+ ds_put_format(&reply, " numa_id %d", pmd->numa_id);
+ }
+ if (pmd->core_id != OVS_CORE_UNSPEC &&
+ pmd->core_id != NON_PMD_CORE_ID) {
+ ds_put_format(&reply, " core_id %u", pmd->core_id);
+ }
+ ds_put_cstr(&reply, ":\n");
+ ds_put_cstr(&reply, "Bond cache:\n");
+ HMAP_FOR_EACH (pmd_bond_cache_entry, node, &pmd->bond_cache) {
+ ds_put_format(&reply, "\tbond-id %u :\n",
+ pmd_bond_cache_entry->bond_id);
+ for (bucket = 0; bucket < BOND_BUCKETS; bucket++) {
+ ds_put_format(&reply, "\t\tbucket %u - slave %u \n",
+ bucket, pmd_bond_cache_entry->slave_map[bucket]);
+ }
+ }
+ ds_put_cstr(&reply, "\nBonds:\n");
+ HMAP_FOR_EACH (pmd_bond_entry, node, &pmd->tx_bonds) {
+ ds_put_format(&reply, "\tbond-id %u :\n",
+ pmd_bond_entry->bond_id);
+ for (bucket = 0; bucket < BOND_BUCKETS; bucket++) {
+ ds_put_format(&reply, "\t\tbucket %u - slave %u \n",
+ bucket, pmd_bond_entry->slave_map[bucket]);
+ }
+ }
+ }
+ ovs_mutex_unlock(&dp_netdev_mutex);
+ unixctl_command_reply(conn, ds_cstr(&reply));
+ ds_destroy(&reply);
+}
+
static int
dpif_netdev_init(void)
@@ -1396,6 +1485,9 @@ dpif_netdev_init(void)
"[-us usec] [-q qlen]",
0, 10, pmd_perf_log_set_cmd,
NULL);
+ unixctl_command_register("dpif-netdev/pmd-bond-show", "[dp]",
+ 0, 1, dpif_netdev_pmd_bond_show,
+ NULL);
return 0;
}
@@ -1511,6 +1603,9 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
ovs_mutex_init(&dp->port_mutex);
hmap_init(&dp->ports);
dp->port_seq = seq_create();
+ ovs_mutex_init(&dp->bond_mutex);
+ hmap_init(&dp->bonds);
+
fat_rwlock_init(&dp->upcall_rwlock);
dp->reconfigure_seq = seq_create();
@@ -1625,6 +1720,7 @@ dp_netdev_free(struct dp_netdev *dp)
OVS_REQUIRES(dp_netdev_mutex)
{
struct dp_netdev_port *port, *next;
+ struct tx_bond *bond, *next_bond;
shash_find_and_delete(&dp_netdevs, dp->name);
@@ -1634,6 +1730,13 @@ dp_netdev_free(struct dp_netdev *dp)
}
ovs_mutex_unlock(&dp->port_mutex);
+ ovs_mutex_lock(&dp->bond_mutex);
+ HMAP_FOR_EACH_SAFE (bond, next_bond, node, &dp->bonds) {
+ hmap_remove(&dp->bonds, &bond->node);
+ free(bond);
+ }
+ ovs_mutex_unlock(&dp->bond_mutex);
+
dp_netdev_destroy_all_pmds(dp, true);
cmap_destroy(&dp->poll_threads);
@@ -1652,6 +1755,9 @@ dp_netdev_free(struct dp_netdev *dp)
hmap_destroy(&dp->ports);
ovs_mutex_destroy(&dp->port_mutex);
+ hmap_destroy(&dp->bonds);
+ ovs_mutex_destroy(&dp->bond_mutex);
+
/* Upcalls must be disabled at this point */
dp_netdev_destroy_upcall_lock(dp);
@@ -1755,6 +1861,9 @@ dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread *pmd)
ovs_mutex_lock(&pmd->port_mutex);
pmd_load_cached_ports(pmd);
ovs_mutex_unlock(&pmd->port_mutex);
+ ovs_mutex_lock(&pmd->bond_mutex);
+ pmd_load_cached_bonds(pmd);
+ ovs_mutex_unlock(&pmd->bond_mutex);
ovs_mutex_unlock(&pmd->dp->non_pmd_mutex);
return;
}
@@ -1772,6 +1881,12 @@ hash_port_no(odp_port_t port_no)
return hash_int(odp_to_u32(port_no), 0);
}
+static uint32_t
+hash_bond_id(uint32_t bond_id)
+{
+ return hash_int(bond_id, 0);
+}
+
static int
port_create(const char *devname, const char *type,
odp_port_t port_no, struct dp_netdev_port **portp)
@@ -4315,6 +4430,19 @@ tx_port_lookup(const struct hmap *hmap, odp_port_t port_no)
return NULL;
}
+static struct tx_bond *
+tx_bond_lookup(const struct hmap *hmap, uint32_t bond_id)
+{
+ struct tx_bond *tx;
+
+ HMAP_FOR_EACH_IN_BUCKET (tx, node, hash_bond_id(bond_id), hmap) {
+ if (tx->bond_id == bond_id) {
+ return tx;
+ }
+ }
+ return NULL;
+}
+
static int
port_reconfigure(struct dp_netdev_port *port)
{
@@ -4778,6 +4906,27 @@ pmd_remove_stale_ports(struct dp_netdev *dp,
ovs_mutex_unlock(&pmd->port_mutex);
}
+static void
+pmd_remove_stale_bonds(struct dp_netdev *dp,
+ struct dp_netdev_pmd_thread *pmd)
+ OVS_EXCLUDED(pmd->bond_mutex)
+ OVS_EXCLUDED(dp->bond_mutex)
+{
+ struct tx_bond *tx, *tx_next;
+
+ ovs_mutex_lock(&dp->bond_mutex);
+ ovs_mutex_lock(&pmd->bond_mutex);
+
+ HMAP_FOR_EACH_SAFE (tx, tx_next, node, &pmd->tx_bonds) {
+ if (!tx_bond_lookup(&dp->bonds, tx->bond_id)) {
+ dp_netdev_del_bond_tx_from_pmd(pmd, tx);
+ }
+ }
+
+ ovs_mutex_unlock(&pmd->bond_mutex);
+ ovs_mutex_unlock(&dp->bond_mutex);
+}
+
/* Must be called each time a port is added/removed or the cmask changes.
* This creates and destroys pmd threads, reconfigures ports, opens their
* rxqs and assigns all rxqs/txqs to pmd threads. */
@@ -4787,6 +4936,7 @@ reconfigure_datapath(struct dp_netdev *dp)
{
struct dp_netdev_pmd_thread *pmd;
struct dp_netdev_port *port;
+ struct tx_bond *bond;
int wanted_txqs;
dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
@@ -4815,10 +4965,11 @@ reconfigure_datapath(struct dp_netdev *dp)
}
}
- /* Remove from the pmd threads all the ports that have been deleted or
- * need reconfiguration. */
+ /* Remove from the pmd threads all the ports/bonds that have been deleted
+ * or need reconfiguration. */
CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
pmd_remove_stale_ports(dp, pmd);
+ pmd_remove_stale_bonds(dp, pmd);
}
/* Reload affected pmd threads. We must wait for the pmd threads before
@@ -4918,6 +5069,20 @@ reconfigure_datapath(struct dp_netdev *dp)
ovs_mutex_unlock(&pmd->port_mutex);
}
+ /* Add every bond to the tx cache of every pmd thread, if it's not
+ * there already and if this pmd has at least one rxq to poll. */
+ ovs_mutex_lock(&dp->bond_mutex);
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ ovs_mutex_lock(&pmd->bond_mutex);
+ if (hmap_count(&pmd->poll_list) || pmd->core_id == NON_PMD_CORE_ID) {
+ HMAP_FOR_EACH (bond, node, &dp->bonds) {
+ dp_netdev_add_bond_tx_to_pmd(pmd, bond);
+ }
+ }
+ ovs_mutex_unlock(&pmd->bond_mutex);
+ }
+ ovs_mutex_unlock(&dp->bond_mutex);
+
/* Reload affected pmd threads. */
reload_affected_pmds(dp);
@@ -5346,6 +5511,38 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
}
static void
+pmd_free_cached_bonds(struct dp_netdev_pmd_thread *pmd)
+{
+ struct tx_bond *tx_bond_cached;
+
+ HMAP_FOR_EACH_POP (tx_bond_cached, node, &pmd->bond_cache) {
+ free(tx_bond_cached);
+ }
+}
+
+/* Copies bonds from 'pmd->tx_bonds' (shared with the main thread) to
+ * 'pmd->bond_cache' (thread local) */
+static void
+pmd_load_cached_bonds(struct dp_netdev_pmd_thread *pmd)
+ OVS_REQUIRES(pmd->bond_mutex)
+{
+ struct tx_bond *tx_bond, *tx_bond_cached;
+
+ pmd_free_cached_bonds(pmd);
+ hmap_shrink(&pmd->bond_cache);
+
+ HMAP_FOR_EACH (tx_bond, node, &pmd->tx_bonds) {
+ tx_bond_cached = xmemdup(tx_bond, sizeof *tx_bond_cached);
+ hmap_insert(&pmd->bond_cache, &tx_bond_cached->node,
+ hash_bond_id(tx_bond_cached->bond_id));
+
+ VLOG_DBG("Caching bond-id %d pmd %d\n",
+ tx_bond_cached->bond_id, pmd->core_id);
+ }
+}
+
+
+static void
pmd_alloc_static_tx_qid(struct dp_netdev_pmd_thread *pmd)
{
ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex);
@@ -5394,6 +5591,10 @@ pmd_load_queues_and_ports(struct dp_netdev_pmd_thread *pmd,
ovs_mutex_unlock(&pmd->port_mutex);
+ ovs_mutex_lock(&pmd->bond_mutex);
+ pmd_load_cached_bonds(pmd);
+ ovs_mutex_unlock(&pmd->bond_mutex);
+
*ppoll_list = poll_list;
return i;
}
@@ -5936,6 +6137,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
ovs_mutex_init(&pmd->cond_mutex);
ovs_mutex_init(&pmd->flow_mutex);
ovs_mutex_init(&pmd->port_mutex);
+ ovs_mutex_init(&pmd->bond_mutex);
cmap_init(&pmd->flow_table);
cmap_init(&pmd->classifiers);
pmd->ctx.last_rxq = NULL;
@@ -5946,6 +6148,8 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
hmap_init(&pmd->tx_ports);
hmap_init(&pmd->tnl_port_cache);
hmap_init(&pmd->send_port_cache);
+ hmap_init(&pmd->tx_bonds);
+ hmap_init(&pmd->bond_cache);
/* init the 'flow_cache' since there is no
* actual thread created for NON_PMD_CORE_ID. */
if (core_id == NON_PMD_CORE_ID) {
@@ -5966,6 +6170,8 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
hmap_destroy(&pmd->send_port_cache);
hmap_destroy(&pmd->tnl_port_cache);
hmap_destroy(&pmd->tx_ports);
+ hmap_destroy(&pmd->bond_cache);
+ hmap_destroy(&pmd->tx_bonds);
hmap_destroy(&pmd->poll_list);
/* All flows (including their dpcls_rules) have been deleted already */
CMAP_FOR_EACH (cls, node, &pmd->classifiers) {
@@ -5980,6 +6186,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
xpthread_cond_destroy(&pmd->cond);
ovs_mutex_destroy(&pmd->cond_mutex);
ovs_mutex_destroy(&pmd->port_mutex);
+ ovs_mutex_destroy(&pmd->bond_mutex);
free(pmd);
}
@@ -6133,6 +6340,39 @@ dp_netdev_del_port_tx_from_pmd(struct dp_netdev_pmd_thread *pmd,
free(tx);
pmd->need_reload = true;
}
+
+static void
+dp_netdev_add_bond_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
+ struct tx_bond *bond)
+ OVS_REQUIRES(pmd->bond_mutex)
+{
+ struct tx_bond *tx;
+ tx = tx_bond_lookup(&pmd->tx_bonds, bond->bond_id);
+ if (tx) {
+ /*
+ * 'bond' already exists, check if its in bond-cache as well.
+ * PMD-reload is needed if not present.
+ */
+ if (!tx_bond_lookup(&pmd->bond_cache, bond->bond_id)) {
+ pmd->need_reload = true;
+ }
+ return;
+ }
+ tx = xmemdup(bond, sizeof *tx);
+ hmap_insert(&pmd->tx_bonds, &tx->node, hash_bond_id(tx->bond_id));
+ pmd->need_reload = true;
+}
+
+/* Del 'tx' from the tx bond cache of 'pmd' */
+static void
+dp_netdev_del_bond_tx_from_pmd(struct dp_netdev_pmd_thread *pmd,
+ struct tx_bond *tx)
+ OVS_REQUIRES(pmd->bond_mutex)
+{
+ hmap_remove(&pmd->tx_bonds, &tx->node);
+ free(tx);
+ pmd->need_reload = true;
+}
static char *
dpif_netdev_get_datapath_version(void)
@@ -6904,6 +7144,13 @@ pmd_send_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
return tx_port_lookup(&pmd->send_port_cache, port_no);
}
+static struct tx_bond *
+pmd_tx_bond_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
+ uint32_t bond_id)
+{
+ return tx_bond_lookup(&pmd->bond_cache, bond_id);
+}
+
static int
push_tnl_action(const struct dp_netdev_pmd_thread *pmd,
const struct nlattr *attr,
@@ -6953,6 +7200,51 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd,
}
}
+static int
+dp_execute_output_action(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets_,
+ bool should_steal,
+ odp_port_t port_no)
+{
+ struct tx_port *p;
+ p = pmd_send_port_cache_lookup(pmd, port_no);
+ if (OVS_LIKELY(p)) {
+ struct dp_packet *packet;
+ struct dp_packet_batch out;
+ if (!should_steal) {
+ dp_packet_batch_clone(&out, packets_);
+ dp_packet_batch_reset_cutlen(packets_);
+ packets_ = &out;
+ }
+ dp_packet_batch_apply_cutlen(packets_);
+#ifdef DPDK_NETDEV
+ if (OVS_UNLIKELY(!dp_packet_batch_is_empty(&p->output_pkts)
+ && packets_->packets[0]->source
+ != p->output_pkts.packets[0]->source)) {
+ /* netdev-dpdk assumes that all packets in a single
+ * output batch has the same source. Flush here to
+ * avoid memory access issues. */
+ dp_netdev_pmd_flush_output_on_port(pmd, p);
+ }
+#endif
+ if (dp_packet_batch_size(&p->output_pkts)
+ + dp_packet_batch_size(packets_) > NETDEV_MAX_BURST) {
+ /* Flush here to avoid overflow. */
+ dp_netdev_pmd_flush_output_on_port(pmd, p);
+ }
+ if (dp_packet_batch_is_empty(&p->output_pkts)) {
+ pmd->n_output_batches++;
+ }
+ DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) {
+ p->output_pkts_rxqs[dp_packet_batch_size(&p->output_pkts)] =
+ pmd->ctx.last_rxq;
+ dp_packet_batch_add(&p->output_pkts, packet);
+ }
+ return 0;
+ }
+ return -1;
+}
+
static void
dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
const struct nlattr *a, bool should_steal)
@@ -6964,49 +7256,55 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
struct dp_netdev *dp = pmd->dp;
int type = nl_attr_type(a);
struct tx_port *p;
+ int ret;
switch ((enum ovs_action_attr)type) {
case OVS_ACTION_ATTR_OUTPUT:
- p = pmd_send_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
- if (OVS_LIKELY(p)) {
- struct dp_packet *packet;
- struct dp_packet_batch out;
-
- if (!should_steal) {
- dp_packet_batch_clone(&out, packets_);
- dp_packet_batch_reset_cutlen(packets_);
- packets_ = &out;
- }
- dp_packet_batch_apply_cutlen(packets_);
-
-#ifdef DPDK_NETDEV
- if (OVS_UNLIKELY(!dp_packet_batch_is_empty(&p->output_pkts)
- && packets_->packets[0]->source
- != p->output_pkts.packets[0]->source)) {
- /* XXX: netdev-dpdk assumes that all packets in a single
- * output batch has the same source. Flush here to
- * avoid memory access issues. */
- dp_netdev_pmd_flush_output_on_port(pmd, p);
- }
-#endif
- if (dp_packet_batch_size(&p->output_pkts)
- + dp_packet_batch_size(packets_) > NETDEV_MAX_BURST) {
- /* Flush here to avoid overflow. */
- dp_netdev_pmd_flush_output_on_port(pmd, p);
- }
-
- if (dp_packet_batch_is_empty(&p->output_pkts)) {
- pmd->n_output_batches++;
- }
+ ret = dp_execute_output_action(pmd, packets_, should_steal,
+ nl_attr_get_odp_port(a));
+ if (ret == 0) {
+ /* Output action executed successfully. */
+ return;
+ }
+ break;
+ case OVS_ACTION_ATTR_LB_OUTPUT: {
+ uint32_t bond = nl_attr_get_u32(a);
+ uint32_t bond_member;
+ uint32_t bucket;
+ struct dp_packet_batch del_pkts;
+ struct dp_packet_batch output_pkt;
+ struct dp_packet *packet;
+ struct tx_bond *p_bond;
+ bool pkts_for_del = false;
+ p_bond = pmd_tx_bond_cache_lookup(pmd, bond);
+ if (p_bond) {
DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) {
- p->output_pkts_rxqs[dp_packet_batch_size(&p->output_pkts)] =
- pmd->ctx.last_rxq;
- dp_packet_batch_add(&p->output_pkts, packet);
+ /*
+ * Lookup the bond-hash table using hash to get the slave.
+ */
+ bucket = (packet->md.dp_hash & BOND_MASK);
+ bond_member = p_bond->slave_map[bucket];
+
+ dp_packet_batch_init_packet(&output_pkt, packet);
+ ret = dp_execute_output_action(pmd, &output_pkt, should_steal,
+ u32_to_odp(bond_member));
+ if (OVS_UNLIKELY(ret != 0)) {
+ if (OVS_UNLIKELY(!pkts_for_del)) {
+ pkts_for_del = true;
+ dp_packet_batch_init(&del_pkts);
+ }
+ dp_packet_batch_add(&del_pkts, packet);
+ }
+ }
+ /* Delete packets that failed OUTPUT action */
+ if (pkts_for_del) {
+ dp_packet_delete_batch(&del_pkts, should_steal);
}
return;
}
break;
+ }
case OVS_ACTION_ATTR_TUNNEL_PUSH:
if (should_steal) {
@@ -7435,6 +7733,125 @@ dpif_netdev_ipf_dump_done(struct dpif *dpif OVS_UNUSED, void *ipf_dump_ctx)
}
+static int
+dpif_netdev_bond_add(struct dpif *dpif, uint32_t bond_id, uint32_t slave_map[])
+{
+ struct dp_netdev *dp = get_dp_netdev(dpif);
+ struct dp_netdev_pmd_thread *pmd;
+ uint32_t bucket;
+ struct tx_bond *dp_bond_entry = NULL;
+ struct tx_bond *pmd_bond_entry = NULL;
+ struct tx_bond *pmd_bond_cache_entry = NULL;
+ bool reload = false;
+
+ ovs_mutex_lock(&dp->bond_mutex);
+ /*
+ * Lookup for the bond. If already exists, just update the slave-map.
+ * Else create new.
+ */
+ dp_bond_entry = tx_bond_lookup(&dp->bonds, bond_id);
+ if (dp_bond_entry) {
+ ovs_mutex_unlock(&dp->bond_mutex);
+ /* Update the slave-map for all PMDs and non-PMDs */
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ VLOG_DBG("Modifying hash-map for bond - %d pmd %d\n",
+ bond_id, pmd->core_id);
+ ovs_mutex_lock(&pmd->bond_mutex);
+ pmd_bond_entry = tx_bond_lookup(&pmd->tx_bonds, bond_id);
+ pmd_bond_cache_entry = tx_bond_lookup(&pmd->bond_cache, bond_id);
+
+ for (bucket = 0; bucket < BOND_BUCKETS; bucket++) {
+ if (pmd_bond_entry) {
+ pmd_bond_entry->slave_map[bucket] = slave_map[bucket];
+ }
+ if (pmd_bond_cache_entry) {
+ pmd_bond_cache_entry->slave_map[bucket] =
+ slave_map[bucket];
+ } else {
+ /* Reload is needed as bond-cache is still not loaded */
+ reload = true;
+ }
+ dp_bond_entry->slave_map[bucket] = slave_map[bucket];
+ }
+ ovs_mutex_unlock(&pmd->bond_mutex);
+ if (reload) {
+ dp_netdev_request_reconfigure(dp);
+ }
+ }
+ } else {
+ struct tx_bond *dp_bond = xzalloc(sizeof *dp_bond);
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ struct tx_bond *pmd_bond = xzalloc(sizeof *pmd_bond);
+ VLOG_DBG("Adding hash-map for bond - %d pmd %d\n",
+ bond_id, pmd->core_id);
+ dp_bond->bond_id = bond_id;
+ pmd_bond->bond_id = bond_id;
+ for (bucket = 0; bucket < BOND_BUCKETS; bucket++) {
+ dp_bond->slave_map[bucket] = slave_map[bucket];
+ pmd_bond->slave_map[bucket] = slave_map[bucket];
+ }
+ ovs_mutex_lock(&pmd->bond_mutex);
+ pmd_bond_entry = tx_bond_lookup(&pmd->tx_bonds, bond_id);
+ if (pmd_bond_entry) {
+ VLOG_DBG("Bond %d already exists in PMD %d\n",
+ bond_id, pmd->core_id);
+ memcpy(&pmd_bond_entry->slave_map[0], &pmd_bond->slave_map[0],
+ sizeof(pmd_bond_entry->slave_map));
+ free(pmd_bond);
+ } else {
+ hmap_insert(&pmd->tx_bonds, &pmd_bond->node,
+ hash_bond_id(pmd_bond->bond_id));
+ }
+ ovs_mutex_unlock(&pmd->bond_mutex);
+ }
+ hmap_insert(&dp->bonds, &dp_bond->node,
+ hash_bond_id(dp_bond->bond_id));
+ ovs_mutex_unlock(&dp->bond_mutex);
+ /*
+ * Since a new bond is added, PMDs need to be reconfigured to cache the
+ * bond slave-map.
+ */
+ dp_netdev_request_reconfigure(dp);
+ }
+ return 0;
+}
+
+static int
+dpif_netdev_bond_del(struct dpif *dpif, uint32_t bond_id)
+{
+ struct dp_netdev *dp = get_dp_netdev(dpif);
+ struct dp_netdev_pmd_thread *pmd;
+ struct tx_bond *dp_bond_entry = NULL;
+ struct tx_bond *pmd_bond_entry = NULL;
+
+ ovs_mutex_lock(&dp->bond_mutex);
+
+ /* Find the bond and delete it if present */
+ dp_bond_entry = tx_bond_lookup(&dp->bonds, bond_id);
+ if (dp_bond_entry) {
+ /* Delete it from all PMDs and non-PMDs */
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ ovs_mutex_lock(&pmd->bond_mutex);
+ VLOG_DBG("Deleting bond - %d pmd %d\n", bond_id, pmd->core_id);
+ pmd_bond_entry = tx_bond_lookup(&pmd->tx_bonds, bond_id);
+ if (pmd_bond_entry) {
+ hmap_remove(&pmd->tx_bonds, &pmd_bond_entry->node);
+ free(pmd_bond_entry);
+ }
+ ovs_mutex_unlock(&pmd->bond_mutex);
+ }
+ hmap_remove(&dp->bonds, &dp_bond_entry->node);
+ free(dp_bond_entry);
+ /*
+ * PMDs need to be reconfigured to remove the slave-map from its
+ * cache.
+ */
+ dp_netdev_request_reconfigure(dp);
+ }
+ ovs_mutex_unlock(&dp->bond_mutex);
+ return 0;
+}
+
const struct dpif_class dpif_netdev_class = {
"netdev",
true, /* cleanup_required */
@@ -7498,6 +7915,8 @@ const struct dpif_class dpif_netdev_class = {
dpif_netdev_meter_set,
dpif_netdev_meter_get,
dpif_netdev_meter_del,
+ dpif_netdev_bond_add,
+ dpif_netdev_bond_del,
};
static void
@@ -552,6 +552,11 @@ struct dpif_class {
* zero. */
int (*meter_del)(struct dpif *, ofproto_meter_id meter_id,
struct ofputil_meter_stats *, uint16_t n_bands);
+
+ /* Adds a bond with 'bond_id' and the slave-map to 'dpif' */
+ int (*bond_add)(struct dpif *dpif, uint32_t bond_id, uint32_t slave_map[]);
+ /* Removes bond identified by 'bond_id' from 'dpif' */
+ int (*bond_del)(struct dpif *dpif, uint32_t bond_id);
};
extern const struct dpif_class dpif_netlink_class;
@@ -1177,6 +1177,7 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet_batch *packets_,
case OVS_ACTION_ATTR_CT:
case OVS_ACTION_ATTR_OUTPUT:
+ case OVS_ACTION_ATTR_LB_OUTPUT:
case OVS_ACTION_ATTR_TUNNEL_PUSH:
case OVS_ACTION_ATTR_TUNNEL_POP:
case OVS_ACTION_ATTR_USERSPACE:
@@ -1227,6 +1228,7 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet_batch *packets_,
struct dp_packet *clone = NULL;
uint32_t cutlen = dp_packet_get_cutlen(packet);
if (cutlen && (type == OVS_ACTION_ATTR_OUTPUT
+ || type == OVS_ACTION_ATTR_LB_OUTPUT
|| type == OVS_ACTION_ATTR_TUNNEL_PUSH
|| type == OVS_ACTION_ATTR_TUNNEL_POP
|| type == OVS_ACTION_ATTR_USERSPACE)) {
@@ -1879,6 +1881,16 @@ dpif_supports_tnl_push_pop(const struct dpif *dpif)
return dpif_is_netdev(dpif);
}
+bool
+dpif_supports_balance_tcp_opt(const struct dpif *dpif)
+{
+ /*
+ * Balance-tcp optimization is currently supported in netdev
+ * datapath only.
+ */
+ return dpif_is_netdev(dpif);
+}
+
/* Meters */
void
dpif_meter_get_features(const struct dpif *dpif,
@@ -1976,3 +1988,27 @@ dpif_meter_del(struct dpif *dpif, ofproto_meter_id meter_id,
}
return error;
}
+
+int
+dpif_bond_add(struct dpif *dpif, uint32_t bond_id, uint32_t slave_map[])
+{
+ int error = 0;
+
+ if (dpif && dpif->dpif_class && dpif->dpif_class->bond_add) {
+ error = dpif->dpif_class->bond_add(dpif, bond_id, slave_map);
+ }
+
+ return error;
+}
+
+int
+dpif_bond_del(struct dpif *dpif, uint32_t bond_id)
+{
+ int error = 0;
+
+ if (dpif && dpif->dpif_class && dpif->dpif_class->bond_del) {
+ error = dpif->dpif_class->bond_del(dpif, bond_id);
+ }
+
+ return error;
+}
@@ -891,6 +891,11 @@ int dpif_get_pmds_for_port(const struct dpif * dpif, odp_port_t port_no,
char *dpif_get_dp_version(const struct dpif *);
bool dpif_supports_tnl_push_pop(const struct dpif *);
+bool dpif_supports_balance_tcp_opt(const struct dpif *);
+
+int dpif_bond_add(struct dpif *dpif, uint32_t bond_id, uint32_t slave_map[]);
+int dpif_bond_del(struct dpif *dpif, uint32_t bond_id);
+
/* Log functions. */
struct vlog_module;
@@ -725,6 +725,7 @@ requires_datapath_assistance(const struct nlattr *a)
switch (type) {
/* These only make sense in the context of a datapath. */
case OVS_ACTION_ATTR_OUTPUT:
+ case OVS_ACTION_ATTR_LB_OUTPUT:
case OVS_ACTION_ATTR_TUNNEL_PUSH:
case OVS_ACTION_ATTR_TUNNEL_POP:
case OVS_ACTION_ATTR_USERSPACE:
@@ -990,6 +991,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
break;
case OVS_ACTION_ATTR_OUTPUT:
+ case OVS_ACTION_ATTR_LB_OUTPUT:
case OVS_ACTION_ATTR_TUNNEL_PUSH:
case OVS_ACTION_ATTR_TUNNEL_POP:
case OVS_ACTION_ATTR_USERSPACE:
@@ -118,6 +118,7 @@ odp_action_len(uint16_t type)
switch ((enum ovs_action_attr) type) {
case OVS_ACTION_ATTR_OUTPUT: return sizeof(uint32_t);
+ case OVS_ACTION_ATTR_LB_OUTPUT: return sizeof(uint32_t);
case OVS_ACTION_ATTR_TRUNC: return sizeof(struct ovs_action_trunc);
case OVS_ACTION_ATTR_TUNNEL_PUSH: return ATTR_LEN_VARIABLE;
case OVS_ACTION_ATTR_TUNNEL_POP: return sizeof(uint32_t);
@@ -1113,6 +1114,9 @@ format_odp_action(struct ds *ds, const struct nlattr *a,
case OVS_ACTION_ATTR_OUTPUT:
odp_portno_name_format(portno_names, nl_attr_get_odp_port(a), ds);
break;
+ case OVS_ACTION_ATTR_LB_OUTPUT:
+ ds_put_format(ds, "lb_output(bond,%"PRIu32")", nl_attr_get_u32(a));
+ break;
case OVS_ACTION_ATTR_TRUNC: {
const struct ovs_action_trunc *trunc =
nl_attr_get_unspec(a, sizeof *trunc);
@@ -54,10 +54,6 @@ static struct ovs_rwlock rwlock = OVS_RWLOCK_INITIALIZER;
static struct hmap all_bonds__ = HMAP_INITIALIZER(&all_bonds__);
static struct hmap *const all_bonds OVS_GUARDED_BY(rwlock) = &all_bonds__;
-/* Bit-mask for hashing a flow down to a bucket. */
-#define BOND_MASK 0xff
-#define BOND_BUCKETS (BOND_MASK + 1)
-
/* Priority for internal rules created to handle recirculation */
#define RECIRC_RULE_PRIORITY 20
@@ -126,6 +122,7 @@ struct bond {
enum lacp_status lacp_status; /* Status of LACP negotiations. */
bool bond_revalidate; /* True if flows need revalidation. */
uint32_t basis; /* Basis for flow hash function. */
+ bool use_rss_hash; /* Use RSS hash for load balancing */
/* SLB specific bonding info. */
struct bond_entry *hash; /* An array of BOND_BUCKETS elements. */
@@ -185,7 +182,7 @@ static struct bond_slave *choose_output_slave(const struct bond *,
struct flow_wildcards *,
uint16_t vlan)
OVS_REQ_RDLOCK(rwlock);
-static void update_recirc_rules__(struct bond *bond);
+static void update_recirc_rules__(struct bond *bond, uint32_t bond_recirc_id);
static bool bond_is_falling_back_to_ab(const struct bond *);
/* Attempts to parse 's' as the name of a bond balancing mode. If successful,
@@ -262,6 +259,7 @@ void
bond_unref(struct bond *bond)
{
struct bond_slave *slave;
+ uint32_t bond_recirc_id = 0;
if (!bond || ovs_refcount_unref_relaxed(&bond->ref_cnt) != 1) {
return;
@@ -282,12 +280,13 @@ bond_unref(struct bond *bond)
/* Free bond resources. Remove existing post recirc rules. */
if (bond->recirc_id) {
+ bond_recirc_id = bond->recirc_id;
recirc_free_id(bond->recirc_id);
bond->recirc_id = 0;
}
free(bond->hash);
bond->hash = NULL;
- update_recirc_rules__(bond);
+ update_recirc_rules__(bond, bond_recirc_id);
hmap_destroy(&bond->pr_rule_ops);
free(bond->name);
@@ -328,13 +327,14 @@ add_pr_rule(struct bond *bond, const struct match *match,
* lock annotation. Currently, only 'bond_unref()' calls
* this function directly. */
static void
-update_recirc_rules__(struct bond *bond)
+update_recirc_rules__(struct bond *bond, uint32_t bond_recirc_id)
{
struct match match;
struct bond_pr_rule_op *pr_op, *next_op;
uint64_t ofpacts_stub[128 / 8];
struct ofpbuf ofpacts;
int i;
+ uint32_t slave_map[BOND_MASK];
ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
@@ -353,8 +353,14 @@ update_recirc_rules__(struct bond *bond)
add_pr_rule(bond, &match, slave->ofp_port,
&bond->hash[i].pr_rule);
+ slave_map[i] = slave->ofp_port;
+ } else {
+ slave_map[i] = -1;
}
}
+ ofproto_dpif_bundle_add(bond->ofproto, bond->recirc_id, slave_map);
+ } else {
+ ofproto_dpif_bundle_del(bond->ofproto, bond_recirc_id);
}
HMAP_FOR_EACH_SAFE(pr_op, next_op, hmap_node, &bond->pr_rule_ops) {
@@ -404,7 +410,7 @@ static void
update_recirc_rules(struct bond *bond)
OVS_REQ_RDLOCK(rwlock)
{
- update_recirc_rules__(bond);
+ update_recirc_rules__(bond, bond->recirc_id);
}
/* Updates 'bond''s overall configuration to 's'.
@@ -467,6 +473,10 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s)
recirc_free_id(bond->recirc_id);
bond->recirc_id = 0;
}
+ if (bond->use_rss_hash != s->use_rss_hash) {
+ bond->use_rss_hash = s->use_rss_hash;
+ revalidate = true;
+ }
if (bond->balance == BM_AB || !bond->hash || revalidate) {
bond_entry_reset(bond);
@@ -1362,6 +1372,8 @@ bond_print_details(struct ds *ds, const struct bond *bond)
may_recirc ? "yes" : "no", may_recirc ? recirc_id: -1);
ds_put_format(ds, "bond-hash-basis: %"PRIu32"\n", bond->basis);
+ ds_put_format(ds, "bond-hash-rss: %s\n",
+ bond->use_rss_hash ? "enabled" : "disabled");
ds_put_format(ds, "updelay: %d ms\n", bond->updelay);
ds_put_format(ds, "downdelay: %d ms\n", bond->downdelay);
@@ -1939,3 +1951,9 @@ bond_get_changed_active_slave(const char *name, struct eth_addr *mac,
return false;
}
+
+bool
+bond_get_rss_mode(const struct bond *bond)
+{
+ return bond->use_rss_hash;
+}
@@ -22,6 +22,10 @@
#include "ofproto-provider.h"
#include "packets.h"
+/* Bit-mask for hashing a flow down to a bucket. */
+#define BOND_MASK 0xff
+#define BOND_BUCKETS (BOND_MASK + 1)
+
struct flow;
struct netdev;
struct ofpbuf;
@@ -58,6 +62,7 @@ struct bond_settings {
/* The MAC address of the interface
that was active during the last
ovs run. */
+ bool use_rss_hash; /* Use rss hash for load balancing */
};
/* Program startup. */
@@ -122,4 +127,7 @@ void bond_rebalance(struct bond *);
*/
void bond_update_post_recirc_rules(struct bond *, uint32_t *recirc_id,
uint32_t *hash_basis);
+
+bool bond_get_rss_mode(const struct bond *);
+
#endif /* bond.h */
@@ -1177,6 +1177,7 @@ dpif_sflow_read_actions(const struct flow *flow,
case OVS_ACTION_ATTR_CT:
case OVS_ACTION_ATTR_CT_CLEAR:
case OVS_ACTION_ATTR_METER:
+ case OVS_ACTION_ATTR_LB_OUTPUT:
break;
case OVS_ACTION_ATTR_SET_MASKED:
@@ -409,6 +409,8 @@ struct xlate_ctx {
struct ofpbuf action_set; /* Action set. */
enum xlate_error error; /* Translation failed. */
+
+ bool tnl_push_no_recirc; /* Tunnel push recirculation status */
};
/* Structure to track VLAN manipulation */
@@ -2406,6 +2408,8 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
} else {
struct flow_wildcards *wc = ctx->wc;
struct ofport_dpif *ofport;
+ struct xport *in_port;
+ odp_port_t odp_port;
if (ctx->xbridge->support.odp.recirc) {
/* In case recirculation is not actually in use, 'xr.recirc_id'
@@ -2418,6 +2422,34 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
/* Use recirculation instead of output. */
use_recirc = true;
xr.hash_alg = OVS_HASH_ALG_L4;
+
+ if (bond_get_rss_mode(out_xbundle->bond)) {
+ /*
+ * Select the hash-alg based on datapath's capability.
+ * If not supported, default to OVS_HASH_ALG_L4 for
+ * which HASH + RECIRC actions would be set in xlate. Else
+ * use the RSS hash for better throughput. With
+ * OVS_HASH_ALG_L4_RSS, RECIRC action is also avoided.
+ *
+ * NOTE:
+ * Do not use load-balanced-output action when tunnel push
+ * recirculation is avoided (via CLONE action), as L4 hash
+ * for bond balancing needs to be computed post tunnel
+ * encapsulation.
+ */
+ if (ctx->xbridge->support.balance_tcp_opt &&
+ !ctx->tnl_push_no_recirc) {
+ xr.hash_alg = OVS_HASH_ALG_L4_RSS;
+ }
+
+ VLOG_DBG("xin-in_port: %u/%u base-flow-in_port: %u/%u "
+ "hash-algo = %d\n",
+ ctx->xin->flow.in_port.ofp_port,
+ ctx->xin->flow.in_port.odp_port,
+ ctx->base_flow.in_port.ofp_port,
+ ctx->base_flow.in_port.odp_port, xr.hash_alg);
+ }
+
/* Recirculation does not require unmasking hash fields. */
wc = NULL;
}
@@ -3694,12 +3726,16 @@ native_tunnel_output(struct xlate_ctx *ctx, const struct xport *xport,
ctx->xin->allow_side_effects = backup_side_effects;
ctx->xin->packet = backup_packet;
ctx->wc = backup_wc;
+
+ ctx->tnl_push_no_recirc = true;
} else {
/* In order to maintain accurate stats, use recirc for
* natvie tunneling. */
nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, 0);
nl_msg_end_nested(ctx->odp_actions, clone_ofs);
- }
+
+ ctx->tnl_push_no_recirc = false;
+ }
/* Restore the flows after the translation. */
memcpy(&ctx->xin->flow, &old_flow, sizeof ctx->xin->flow);
@@ -4125,24 +4161,36 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
xlate_commit_actions(ctx);
if (xr) {
- /* Recirculate the packet. */
struct ovs_action_hash *act_hash;
/* Hash action. */
enum ovs_hash_alg hash_alg = xr->hash_alg;
- if (hash_alg > ctx->xbridge->support.max_hash_alg) {
+ if (hash_alg > ctx->xbridge->support.max_hash_alg ||
+ hash_alg == OVS_HASH_ALG_L4_RSS) {
/* Algorithm supported by all datapaths. */
hash_alg = OVS_HASH_ALG_L4;
}
act_hash = nl_msg_put_unspec_uninit(ctx->odp_actions,
- OVS_ACTION_ATTR_HASH,
- sizeof *act_hash);
+ OVS_ACTION_ATTR_HASH,
+ sizeof *act_hash);
act_hash->hash_alg = hash_alg;
act_hash->hash_basis = xr->hash_basis;
- /* Recirc action. */
- nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC,
- xr->recirc_id);
+ if (xr->hash_alg == OVS_HASH_ALG_L4_RSS) {
+ /*
+ * If hash algorithm is RSS, use the hash directly
+ * for slave selection and avoid recirculation.
+ *
+ * Currently support for netdev datapath only.
+ */
+ nl_msg_put_odp_port(ctx->odp_actions,
+ OVS_ACTION_ATTR_LB_OUTPUT,
+ xr->recirc_id);
+ } else {
+ /* Recirculate the packet. */
+ nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC,
+ xr->recirc_id);
+ }
} else if (is_native_tunnel) {
/* Output to native tunnel port. */
native_tunnel_output(ctx, xport, flow, odp_port, truncate);
@@ -7167,7 +7215,8 @@ count_output_actions(const struct ofpbuf *odp_actions)
int n = 0;
NL_ATTR_FOR_EACH_UNSAFE (a, left, odp_actions->data, odp_actions->size) {
- if (a->nla_type == OVS_ACTION_ATTR_OUTPUT) {
+ if ((a->nla_type == OVS_ACTION_ATTR_OUTPUT) ||
+ (a->nla_type == OVS_ACTION_ATTR_LB_OUTPUT)) {
n++;
}
}
@@ -1441,6 +1441,8 @@ check_support(struct dpif_backer *backer)
backer->rt_support.ct_clear = check_ct_clear(backer);
backer->rt_support.max_hash_alg = check_max_dp_hash_alg(backer);
backer->rt_support.check_pkt_len = check_check_pkt_len(backer);
+ backer->rt_support.balance_tcp_opt =
+ dpif_supports_balance_tcp_opt(backer->dpif);
/* Flow fields. */
backer->rt_support.odp.ct_state = check_ct_state(backer);
@@ -3294,6 +3296,35 @@ bundle_remove(struct ofport *port_)
}
}
+int
+ofproto_dpif_bundle_add(struct ofproto_dpif *dpif,
+ uint32_t bond_id,
+ uint32_t slave_map[])
+{
+ int error;
+ uint32_t bucket;
+
+ /* Convert ofp_port to odp_port */
+ for (bucket = 0; bucket < BOND_BUCKETS; bucket++) {
+ if (slave_map[bucket] != -1) {
+ slave_map[bucket] = ofp_port_to_odp_port(dpif, slave_map[bucket]);
+ }
+ }
+
+ error = dpif_bond_add(dpif->backer->dpif, bond_id, slave_map);
+ return error;
+}
+
+int
+ofproto_dpif_bundle_del(struct ofproto_dpif *dpif,
+ uint32_t bond_id)
+{
+ int error;
+
+ error = dpif_bond_del(dpif->backer->dpif, bond_id);
+ return error;
+}
+
static void
send_pdu_cb(void *port_, const void *pdu, size_t pdu_size)
{
@@ -194,8 +194,11 @@ struct group_dpif *group_dpif_lookup(struct ofproto_dpif *,
/* Highest supported dp_hash algorithm. */ \
DPIF_SUPPORT_FIELD(size_t, max_hash_alg, "Max dp_hash algorithm") \
\
- /* True if the datapath supports OVS_ACTION_ATTR_CHECK_PKT_LEN. */ \
- DPIF_SUPPORT_FIELD(bool, check_pkt_len, "Check pkt length action")
+ /* True if the datapath supports OVS_ACTION_ATTR_CHECK_PKT_LEN. */ \
+ DPIF_SUPPORT_FIELD(bool, check_pkt_len, "Check pkt length action") \
+ \
+ /* True if the datapath supports balance_tcp optimization */ \
+ DPIF_SUPPORT_FIELD(bool, balance_tcp_opt, "Balance-tcp opt")
/* Stores the various features which the corresponding backer supports. */
struct dpif_backer_support {
@@ -361,6 +364,11 @@ int ofproto_dpif_add_internal_flow(struct ofproto_dpif *,
struct rule **rulep);
int ofproto_dpif_delete_internal_flow(struct ofproto_dpif *, struct match *,
int priority);
+int ofproto_dpif_bundle_add(struct ofproto_dpif *,
+ uint32_t bond_id,
+ uint32_t slave_map[]);
+int ofproto_dpif_bundle_del(struct ofproto_dpif *,
+ uint32_t bond_id);
bool ovs_native_tunneling_is_on(struct ofproto_dpif *);
@@ -121,6 +121,7 @@ AT_CHECK([ovs-appctl bond/show], [0], [dnl
bond_mode: active-backup
bond may use recirculation: no, Recirc-ID : -1
bond-hash-basis: 0
+bond-hash-rss: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -286,6 +287,7 @@ slave: p3: current attached
bond_mode: balance-tcp
bond may use recirculation: yes, <del>
bond-hash-basis: 0
+bond-hash-rss: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -301,6 +303,7 @@ slave p1: enabled
bond_mode: balance-tcp
bond may use recirculation: yes, <del>
bond-hash-basis: 0
+bond-hash-rss: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -423,6 +426,7 @@ slave: p3: current attached
bond_mode: balance-tcp
bond may use recirculation: yes, <del>
bond-hash-basis: 0
+bond-hash-rss: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -440,6 +444,7 @@ slave p1: enabled
bond_mode: balance-tcp
bond may use recirculation: yes, <del>
bond-hash-basis: 0
+bond-hash-rss: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -555,6 +560,7 @@ slave: p3: current attached
bond_mode: balance-tcp
bond may use recirculation: yes, <del>
bond-hash-basis: 0
+bond-hash-rss: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -572,6 +578,7 @@ slave p1: enabled
bond_mode: balance-tcp
bond may use recirculation: yes, <del>
bond-hash-basis: 0
+bond-hash-rss: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -692,6 +699,7 @@ slave: p3: current attached
bond_mode: balance-tcp
bond may use recirculation: yes, <del>
bond-hash-basis: 0
+bond-hash-rss: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -709,6 +717,7 @@ slave p1: enabled
bond_mode: balance-tcp
bond may use recirculation: yes, <del>
bond-hash-basis: 0
+bond-hash-rss: disabled
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
@@ -4300,6 +4300,10 @@ port_configure_bond(struct port *port, struct bond_settings *s)
/* OVSDB did not store the last active interface */
s->active_slave_mac = eth_addr_zero;
}
+ if (s->balance == BM_TCP) {
+ s->use_rss_hash = smap_get_bool(&port->cfg->other_config,
+ "bond-hash-rss", false);
+ }
}
/* Returns true if 'port' is synthetic, that is, if we constructed it locally
@@ -1963,6 +1963,16 @@
<code>active-backup</code>.
</column>
+ <column name="other_config" key="bond-hash-rss"
+ type='{"type": "boolean"}'>
+ Enable/disable usage of RSS hash from the ingress port for load
+ balancing flows among output slaves in load balanced bonds in
+ <code>balance-tcp</code>. When enabled, it uses optimized path for
+ balance-tcp mode by using rss hash and avoids recirculation.
+ It affects only new flows, i.e, existing flows remain unchanged.
+ This knob does not affect other balancing modes.
+ </column>
+
<group title="Link Failure Detection">
<p>
An important part of link bonding is detecting that links are down so