@@ -92,6 +92,7 @@ v2.9.0 - 19 Feb 2018
pmd assignments.
* Add rxq utilization of pmd to appctl 'dpif-netdev/pmd-rxq-show'.
* Add support for vHost dequeue zero copy (experimental)
+ * Add support for multi-segment mbufs
- Userspace datapath:
* Output packet batching support.
- vswitchd:
@@ -459,6 +459,13 @@ dpdk_init__(const struct smap *ovs_other_config)
/* Finally, register the dpdk classes */
netdev_dpdk_register();
+
+ bool multi_seg_mbufs_enable = smap_get_bool(ovs_other_config,
+ "dpdk-multi-seg-mbufs", false);
+ if (multi_seg_mbufs_enable) {
+ VLOG_INFO("DPDK multi-segment mbufs enabled\n");
+ netdev_dpdk_multi_segment_mbufs_enable();
+ }
}
void
@@ -66,6 +66,7 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
VLOG_DEFINE_THIS_MODULE(netdev_dpdk);
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+static bool dpdk_multi_segment_mbufs = false;
#define DPDK_PORT_WATCHDOG_INTERVAL 5
@@ -593,6 +594,7 @@ dpdk_mp_create(struct netdev_dpdk *dev, uint16_t mbuf_pkt_data_len)
+ dev->requested_n_txq * dev->requested_txq_size
+ MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST
+ MIN_NB_MBUF;
+ /* XXX: should n_mbufs be increased if multi-seg mbufs are used? */
ovs_mutex_lock(&dpdk_mp_mutex);
do {
@@ -693,7 +695,13 @@ dpdk_mp_release(struct rte_mempool *mp)
/* Tries to allocate a new mempool - or re-use an existing one where
* appropriate - on requested_socket_id with a size determined by
- * requested_mtu and requested Rx/Tx queues.
+ * requested_mtu and requested Rx/Tx queues. Some properties of the mempool's
+ * elements are dependent on the value of 'dpdk_multi_segment_mbufs':
+ * - if 'true', then the mempool contains standard-sized mbufs that are chained
+ * together to accommodate packets of size 'requested_mtu'.
+ * - if 'false', then the members of the allocated mempool are
+ * non-standard-sized mbufs. Each mbuf in the mempool is large enough to
+ * fully accomdate packets of size 'requested_mtu'.
* On success - or when re-using an existing mempool - the new configuration
* will be applied.
* On error, device will be left unchanged. */
@@ -701,10 +709,18 @@ static int
netdev_dpdk_mempool_configure(struct netdev_dpdk *dev)
OVS_REQUIRES(dev->mutex)
{
- uint16_t buf_size = dpdk_buf_size(dev->requested_mtu);
+ uint16_t buf_size = 0;
struct rte_mempool *mp;
int ret = 0;
+ /* Contiguous mbufs in use - permit oversized mbufs */
+ if (!dpdk_multi_segment_mbufs) {
+ buf_size = dpdk_buf_size(dev->requested_mtu);
+ } else {
+ /* multi-segment mbufs - use standard mbuf size */
+ buf_size = dpdk_buf_size(ETHER_MTU);
+ }
+
dpdk_mp_sweep();
mp = dpdk_mp_create(dev, buf_size);
@@ -786,11 +802,25 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
int diag = 0;
int i;
struct rte_eth_conf conf = port_conf;
+ struct rte_eth_txconf txconf;
+
+ /* Multi-segment-mbuf-specific setup. */
+ if (dpdk_multi_segment_mbufs) {
+ struct rte_eth_dev_info dev_info;
+
+ /* DPDK PMDs typically attempt to use simple or vectorized
+ * transmit functions, neither of which are compatible with
+ * multi-segment mbufs. Ensure that these are disabled when
+ * multi-segment mbufs are enabled.
+ */
+ rte_eth_dev_info_get(dev->port_id, &dev_info);
+ txconf = dev_info.default_txconf;
+ txconf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS;
- /* For some NICs (e.g. Niantic), scatter_rx mode needs to be explicitly
- * enabled. */
- if (dev->mtu > ETHER_MTU) {
- conf.rxmode.enable_scatter = 1;
+ /* For some NICs (e.g. Niantic), scattered_rx mode (required for
+ * ingress jumbo frames when multi-segments are enabled) needs to
+ * be explicitly enabled. */
+ conf.rxmode.enable_scatter = 1;
}
conf.rxmode.hw_ip_checksum = (dev->hw_ol_features &
@@ -821,7 +851,9 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
for (i = 0; i < n_txq; i++) {
diag = rte_eth_tx_queue_setup(dev->port_id, i, dev->txq_size,
- dev->socket_id, NULL);
+ dev->socket_id,
+ dpdk_multi_segment_mbufs ? &txconf
+ : NULL);
if (diag) {
VLOG_INFO("Interface %s unable to setup txq(%d): %s",
dev->up.name, i, rte_strerror(-diag));
@@ -3868,6 +3900,12 @@ unlock:
return err;
}
+void
+netdev_dpdk_multi_segment_mbufs_enable(void)
+{
+ dpdk_multi_segment_mbufs = true;
+}
+
#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, \
SET_CONFIG, SET_TX_MULTIQ, SEND, \
GET_CARRIER, GET_STATS, \
@@ -25,6 +25,7 @@ struct dp_packet;
#ifdef DPDK_NETDEV
+void netdev_dpdk_multi_segment_mbufs_enable(void);
void netdev_dpdk_register(void);
void free_dpdk_buf(struct dp_packet *);
@@ -331,6 +331,26 @@
</p>
</column>
+ <column name="other_config" key="dpdk-multi-seg-mbufs"
+ type='{"type": "boolean"}'>
+ <p>
+ Specifies if DPDK uses multi-segment mbufs for handling jumbo frames.
+ </p>
+ <p>
+ If true, DPDK allocates a single mempool per port, irrespective
+ of the ports' requested MTU sizes. The elements of this mempool are
+ 'standard'-sized mbufs (typically 2k MB), which may be chained
+ together to accommodate jumbo frames. In this approach, each mbuf
+ typically stores a fragment of the overall jumbo frame.
+ </p>
+ <p>
+ If not specified, defaults to <code>false</code>, in which case,
+ the size of each mbuf within a DPDK port's mempool will be grown to
+ accommodate jumbo frames within a single mbuf.
+ </p>
+ </column>
+
+
<column name="other_config" key="vhost-sock-dir"
type='{"type": "string"}'>
<p>