diff mbox

[ovs-dev,v3,1/2] netdev-dpdk: Use instant sending instead of queueing of packets.

Message ID 1467034097-31485-2-git-send-email-i.maximets@samsung.com
State Accepted
Headers show

Commit Message

Ilya Maximets June 27, 2016, 1:28 p.m. UTC
Current implementarion of TX packet's queueing is broken in several ways:

	* TX queue flushing implemented on receive assumes that all
	  core_id-s are sequential and starts from zero. This may lead
	  to situation when packets will stuck in queue forever and,
	  also, this influences on latency.

	* For a long time flushing logic depends on uninitialized
	  'txq_needs_locking', because it usually calculated after
	  'netdev_dpdk_alloc_txq' but used inside of this function
	  for initialization of 'flush_tx'.

Testing shows no performance difference with and without queueing.
Lets remove queueing at all because it doesn't work properly now and
also does not increase performance.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
---
 lib/netdev-dpdk.c | 101 ++++++++----------------------------------------------
 1 file changed, 14 insertions(+), 87 deletions(-)

Comments

Daniele Di Proietto July 6, 2016, 10:48 p.m. UTC | #1
I applied this to master, thanks



On 27/06/2016 06:28, "Ilya Maximets" <i.maximets@samsung.com> wrote:

>Current implementarion of TX packet's queueing is broken in several ways:

>

>	* TX queue flushing implemented on receive assumes that all

>	  core_id-s are sequential and starts from zero. This may lead

>	  to situation when packets will stuck in queue forever and,

>	  also, this influences on latency.

>

>	* For a long time flushing logic depends on uninitialized

>	  'txq_needs_locking', because it usually calculated after

>	  'netdev_dpdk_alloc_txq' but used inside of this function

>	  for initialization of 'flush_tx'.

>

>Testing shows no performance difference with and without queueing.

>Lets remove queueing at all because it doesn't work properly now and

>also does not increase performance.

>

>Signed-off-by: Ilya Maximets <i.maximets@samsung.com>

>Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

>---

> lib/netdev-dpdk.c | 101 ++++++++----------------------------------------------

> 1 file changed, 14 insertions(+), 87 deletions(-)

>

>diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c

>index 02e2c58..8bb33d6 100644

>--- a/lib/netdev-dpdk.c

>+++ b/lib/netdev-dpdk.c

>@@ -165,7 +165,6 @@ static const struct rte_eth_conf port_conf = {

>     },

> };

> 

>-enum { MAX_TX_QUEUE_LEN = 384 };

> enum { DPDK_RING_SIZE = 256 };

> BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE));

> enum { DRAIN_TSC = 200000ULL };

>@@ -282,8 +281,7 @@ static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mutex)

>     = OVS_LIST_INITIALIZER(&dpdk_mp_list);

> 

> /* This mutex must be used by non pmd threads when allocating or freeing

>- * mbufs through mempools. Since dpdk_queue_pkts() and dpdk_queue_flush() may

>- * use mempools, a non pmd thread should hold this mutex while calling them */

>+ * mbufs through mempools. */

> static struct ovs_mutex nonpmd_mempool_mutex = OVS_MUTEX_INITIALIZER;

> 

> struct dpdk_mp {

>@@ -297,17 +295,12 @@ struct dpdk_mp {

> /* There should be one 'struct dpdk_tx_queue' created for

>  * each cpu core. */

> struct dpdk_tx_queue {

>-    bool flush_tx;                 /* Set to true to flush queue everytime */

>-                                   /* pkts are queued. */

>-    int count;

>     rte_spinlock_t tx_lock;        /* Protects the members and the NIC queue

>                                     * from concurrent access.  It is used only

>                                     * if the queue is shared among different

>                                     * pmd threads (see 'txq_needs_locking'). */

>     int map;                       /* Mapping of configured vhost-user queues

>                                     * to enabled by guest. */

>-    uint64_t tsc;

>-    struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN];

> };

> 

> /* dpdk has no way to remove dpdk ring ethernet devices

>@@ -720,19 +713,6 @@ netdev_dpdk_alloc_txq(struct netdev_dpdk *dev, unsigned int n_txqs)

> 

>     dev->tx_q = dpdk_rte_mzalloc(n_txqs * sizeof *dev->tx_q);

>     for (i = 0; i < n_txqs; i++) {

>-        int numa_id = ovs_numa_get_numa_id(i);

>-

>-        if (!dev->txq_needs_locking) {

>-            /* Each index is considered as a cpu core id, since there should

>-             * be one tx queue for each cpu core.  If the corresponding core

>-             * is not on the same numa node as 'dev', flags the

>-             * 'flush_tx'. */

>-            dev->tx_q[i].flush_tx = dev->socket_id == numa_id;

>-        } else {

>-            /* Queues are shared among CPUs. Always flush */

>-            dev->tx_q[i].flush_tx = true;

>-        }

>-

>         /* Initialize map for vhost devices. */

>         dev->tx_q[i].map = OVS_VHOST_QUEUE_MAP_UNKNOWN;

>         rte_spinlock_init(&dev->tx_q[i].tx_lock);

>@@ -1088,16 +1068,15 @@ netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)

> }

> 

> static inline void

>-dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)

>+netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,

>+                             struct rte_mbuf **pkts, int cnt)

> {

>-    struct dpdk_tx_queue *txq = &dev->tx_q[qid];

>     uint32_t nb_tx = 0;

> 

>-    while (nb_tx != txq->count) {

>+    while (nb_tx != cnt) {

>         uint32_t ret;

> 

>-        ret = rte_eth_tx_burst(dev->port_id, qid, txq->burst_pkts + nb_tx,

>-                               txq->count - nb_tx);

>+        ret = rte_eth_tx_burst(dev->port_id, qid, pkts + nb_tx, cnt - nb_tx);

>         if (!ret) {

>             break;

>         }

>@@ -1105,32 +1084,18 @@ dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)

>         nb_tx += ret;

>     }

> 

>-    if (OVS_UNLIKELY(nb_tx != txq->count)) {

>+    if (OVS_UNLIKELY(nb_tx != cnt)) {

>         /* free buffers, which we couldn't transmit, one at a time (each

>          * packet could come from a different mempool) */

>         int i;

> 

>-        for (i = nb_tx; i < txq->count; i++) {

>-            rte_pktmbuf_free(txq->burst_pkts[i]);

>+        for (i = nb_tx; i < cnt; i++) {

>+            rte_pktmbuf_free(pkts[i]);

>         }

>         rte_spinlock_lock(&dev->stats_lock);

>-        dev->stats.tx_dropped += txq->count-nb_tx;

>+        dev->stats.tx_dropped += cnt - nb_tx;

>         rte_spinlock_unlock(&dev->stats_lock);

>     }

>-

>-    txq->count = 0;

>-    txq->tsc = rte_get_timer_cycles();

>-}

>-

>-static inline void

>-dpdk_queue_flush(struct netdev_dpdk *dev, int qid)

>-{

>-    struct dpdk_tx_queue *txq = &dev->tx_q[qid];

>-

>-    if (txq->count == 0) {

>-        return;

>-    }

>-    dpdk_queue_flush__(dev, qid);

> }

> 

> static inline bool

>@@ -1303,15 +1268,6 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet **packets,

>     int nb_rx;

>     int dropped = 0;

> 

>-    /* There is only one tx queue for this core.  Do not flush other

>-     * queues.

>-     * Do not flush tx queue which is shared among CPUs

>-     * since it is always flushed */

>-    if (rxq->queue_id == rte_lcore_id() &&

>-        OVS_LIKELY(!dev->txq_needs_locking)) {

>-        dpdk_queue_flush(dev, rxq->queue_id);

>-    }

>-

>     nb_rx = rte_eth_rx_burst(rx->port_id, rxq->queue_id,

>                              (struct rte_mbuf **) packets,

>                              NETDEV_MAX_BURST);

>@@ -1433,35 +1389,6 @@ out:

>     }

> }

> 

>-inline static void

>-dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,

>-               struct rte_mbuf **pkts, int cnt)

>-{

>-    struct dpdk_tx_queue *txq = &dev->tx_q[qid];

>-    uint64_t diff_tsc;

>-

>-    int i = 0;

>-

>-    while (i < cnt) {

>-        int freeslots = MAX_TX_QUEUE_LEN - txq->count;

>-        int tocopy = MIN(freeslots, cnt-i);

>-

>-        memcpy(&txq->burst_pkts[txq->count], &pkts[i],

>-               tocopy * sizeof (struct rte_mbuf *));

>-

>-        txq->count += tocopy;

>-        i += tocopy;

>-

>-        if (txq->count == MAX_TX_QUEUE_LEN || txq->flush_tx) {

>-            dpdk_queue_flush__(dev, qid);

>-        }

>-        diff_tsc = rte_get_timer_cycles() - txq->tsc;

>-        if (diff_tsc >= DRAIN_TSC) {

>-            dpdk_queue_flush__(dev, qid);

>-        }

>-    }

>-}

>-

> /* Tx function. Transmit packets indefinitely */

> static void

> dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,

>@@ -1527,8 +1454,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,

>         newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);

> 

>         dropped += qos_pkts - newcnt;

>-        dpdk_queue_pkts(dev, qid, mbufs, newcnt);

>-        dpdk_queue_flush(dev, qid);

>+        netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);

>     }

> 

>     if (OVS_UNLIKELY(dropped)) {

>@@ -1611,7 +1537,7 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,

>                     temp_cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts,

>                                                 temp_cnt);

>                     dropped += qos_pkts - temp_cnt;

>-                    dpdk_queue_pkts(dev, qid,

>+                    netdev_dpdk_eth_tx_burst(dev, qid,

>                                     (struct rte_mbuf **)&pkts[next_tx_idx],

>                                     temp_cnt);

> 

>@@ -1631,8 +1557,9 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,

> 

>             cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts, cnt);

>             dropped += qos_pkts - cnt;

>-            dpdk_queue_pkts(dev, qid, (struct rte_mbuf **)&pkts[next_tx_idx],

>-                            cnt);

>+            netdev_dpdk_eth_tx_burst(dev, qid,

>+                                     (struct rte_mbuf **)&pkts[next_tx_idx],

>+                                     cnt);

>         }

> 

>         if (OVS_UNLIKELY(dropped)) {

>-- 

>2.7.4

>
Daniele Di Proietto July 15, 2016, 1:58 a.m. UTC | #2
This commit fixes a problem in branch-2.5.

Commit c293b7c7f43a("dpif-netdev: Unique and sequential tx_qids.") stopped
using the core_id as txq_id for the pmd threads, but the flushing logic
in netdev_dpdk_rxq_recv() still assumes that the txq_id is equal to the
core_id.

I see two ways to fix this:

1) Revert c293b7c7f43a on branch-2.5.
2) Backport this patch to branch-2.5.

Given that the flushing logic was so contorted I'd prefer to backport
this patch to branch-2.5, if I don't hear any objections.

Thanks,

Daniele


On 06/07/2016 15:48, "Daniele Di Proietto" <diproiettod@vmware.com> wrote:

>I applied this to master, thanks

>

>

>

>On 27/06/2016 06:28, "Ilya Maximets" <i.maximets@samsung.com> wrote:

>

>>Current implementarion of TX packet's queueing is broken in several ways:

>>

>>	* TX queue flushing implemented on receive assumes that all

>>	  core_id-s are sequential and starts from zero. This may lead

>>	  to situation when packets will stuck in queue forever and,

>>	  also, this influences on latency.

>>

>>	* For a long time flushing logic depends on uninitialized

>>	  'txq_needs_locking', because it usually calculated after

>>	  'netdev_dpdk_alloc_txq' but used inside of this function

>>	  for initialization of 'flush_tx'.

>>

>>Testing shows no performance difference with and without queueing.

>>Lets remove queueing at all because it doesn't work properly now and

>>also does not increase performance.

>>

>>Signed-off-by: Ilya Maximets <i.maximets@samsung.com>

>>Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

>>---

>> lib/netdev-dpdk.c | 101 ++++++++----------------------------------------------

>> 1 file changed, 14 insertions(+), 87 deletions(-)

>>

>>diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c

>>index 02e2c58..8bb33d6 100644

>>--- a/lib/netdev-dpdk.c

>>+++ b/lib/netdev-dpdk.c

>>@@ -165,7 +165,6 @@ static const struct rte_eth_conf port_conf = {

>>     },

>> };

>> 

>>-enum { MAX_TX_QUEUE_LEN = 384 };

>> enum { DPDK_RING_SIZE = 256 };

>> BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE));

>> enum { DRAIN_TSC = 200000ULL };

>>@@ -282,8 +281,7 @@ static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mutex)

>>     = OVS_LIST_INITIALIZER(&dpdk_mp_list);

>> 

>> /* This mutex must be used by non pmd threads when allocating or freeing

>>- * mbufs through mempools. Since dpdk_queue_pkts() and dpdk_queue_flush() may

>>- * use mempools, a non pmd thread should hold this mutex while calling them */

>>+ * mbufs through mempools. */

>> static struct ovs_mutex nonpmd_mempool_mutex = OVS_MUTEX_INITIALIZER;

>> 

>> struct dpdk_mp {

>>@@ -297,17 +295,12 @@ struct dpdk_mp {

>> /* There should be one 'struct dpdk_tx_queue' created for

>>  * each cpu core. */

>> struct dpdk_tx_queue {

>>-    bool flush_tx;                 /* Set to true to flush queue everytime */

>>-                                   /* pkts are queued. */

>>-    int count;

>>     rte_spinlock_t tx_lock;        /* Protects the members and the NIC queue

>>                                     * from concurrent access.  It is used only

>>                                     * if the queue is shared among different

>>                                     * pmd threads (see 'txq_needs_locking'). */

>>     int map;                       /* Mapping of configured vhost-user queues

>>                                     * to enabled by guest. */

>>-    uint64_t tsc;

>>-    struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN];

>> };

>> 

>> /* dpdk has no way to remove dpdk ring ethernet devices

>>@@ -720,19 +713,6 @@ netdev_dpdk_alloc_txq(struct netdev_dpdk *dev, unsigned int n_txqs)

>> 

>>     dev->tx_q = dpdk_rte_mzalloc(n_txqs * sizeof *dev->tx_q);

>>     for (i = 0; i < n_txqs; i++) {

>>-        int numa_id = ovs_numa_get_numa_id(i);

>>-

>>-        if (!dev->txq_needs_locking) {

>>-            /* Each index is considered as a cpu core id, since there should

>>-             * be one tx queue for each cpu core.  If the corresponding core

>>-             * is not on the same numa node as 'dev', flags the

>>-             * 'flush_tx'. */

>>-            dev->tx_q[i].flush_tx = dev->socket_id == numa_id;

>>-        } else {

>>-            /* Queues are shared among CPUs. Always flush */

>>-            dev->tx_q[i].flush_tx = true;

>>-        }

>>-

>>         /* Initialize map for vhost devices. */

>>         dev->tx_q[i].map = OVS_VHOST_QUEUE_MAP_UNKNOWN;

>>         rte_spinlock_init(&dev->tx_q[i].tx_lock);

>>@@ -1088,16 +1068,15 @@ netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)

>> }

>> 

>> static inline void

>>-dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)

>>+netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,

>>+                             struct rte_mbuf **pkts, int cnt)

>> {

>>-    struct dpdk_tx_queue *txq = &dev->tx_q[qid];

>>     uint32_t nb_tx = 0;

>> 

>>-    while (nb_tx != txq->count) {

>>+    while (nb_tx != cnt) {

>>         uint32_t ret;

>> 

>>-        ret = rte_eth_tx_burst(dev->port_id, qid, txq->burst_pkts + nb_tx,

>>-                               txq->count - nb_tx);

>>+        ret = rte_eth_tx_burst(dev->port_id, qid, pkts + nb_tx, cnt - nb_tx);

>>         if (!ret) {

>>             break;

>>         }

>>@@ -1105,32 +1084,18 @@ dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)

>>         nb_tx += ret;

>>     }

>> 

>>-    if (OVS_UNLIKELY(nb_tx != txq->count)) {

>>+    if (OVS_UNLIKELY(nb_tx != cnt)) {

>>         /* free buffers, which we couldn't transmit, one at a time (each

>>          * packet could come from a different mempool) */

>>         int i;

>> 

>>-        for (i = nb_tx; i < txq->count; i++) {

>>-            rte_pktmbuf_free(txq->burst_pkts[i]);

>>+        for (i = nb_tx; i < cnt; i++) {

>>+            rte_pktmbuf_free(pkts[i]);

>>         }

>>         rte_spinlock_lock(&dev->stats_lock);

>>-        dev->stats.tx_dropped += txq->count-nb_tx;

>>+        dev->stats.tx_dropped += cnt - nb_tx;

>>         rte_spinlock_unlock(&dev->stats_lock);

>>     }

>>-

>>-    txq->count = 0;

>>-    txq->tsc = rte_get_timer_cycles();

>>-}

>>-

>>-static inline void

>>-dpdk_queue_flush(struct netdev_dpdk *dev, int qid)

>>-{

>>-    struct dpdk_tx_queue *txq = &dev->tx_q[qid];

>>-

>>-    if (txq->count == 0) {

>>-        return;

>>-    }

>>-    dpdk_queue_flush__(dev, qid);

>> }

>> 

>> static inline bool

>>@@ -1303,15 +1268,6 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet **packets,

>>     int nb_rx;

>>     int dropped = 0;

>> 

>>-    /* There is only one tx queue for this core.  Do not flush other

>>-     * queues.

>>-     * Do not flush tx queue which is shared among CPUs

>>-     * since it is always flushed */

>>-    if (rxq->queue_id == rte_lcore_id() &&

>>-        OVS_LIKELY(!dev->txq_needs_locking)) {

>>-        dpdk_queue_flush(dev, rxq->queue_id);

>>-    }

>>-

>>     nb_rx = rte_eth_rx_burst(rx->port_id, rxq->queue_id,

>>                              (struct rte_mbuf **) packets,

>>                              NETDEV_MAX_BURST);

>>@@ -1433,35 +1389,6 @@ out:

>>     }

>> }

>> 

>>-inline static void

>>-dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,

>>-               struct rte_mbuf **pkts, int cnt)

>>-{

>>-    struct dpdk_tx_queue *txq = &dev->tx_q[qid];

>>-    uint64_t diff_tsc;

>>-

>>-    int i = 0;

>>-

>>-    while (i < cnt) {

>>-        int freeslots = MAX_TX_QUEUE_LEN - txq->count;

>>-        int tocopy = MIN(freeslots, cnt-i);

>>-

>>-        memcpy(&txq->burst_pkts[txq->count], &pkts[i],

>>-               tocopy * sizeof (struct rte_mbuf *));

>>-

>>-        txq->count += tocopy;

>>-        i += tocopy;

>>-

>>-        if (txq->count == MAX_TX_QUEUE_LEN || txq->flush_tx) {

>>-            dpdk_queue_flush__(dev, qid);

>>-        }

>>-        diff_tsc = rte_get_timer_cycles() - txq->tsc;

>>-        if (diff_tsc >= DRAIN_TSC) {

>>-            dpdk_queue_flush__(dev, qid);

>>-        }

>>-    }

>>-}

>>-

>> /* Tx function. Transmit packets indefinitely */

>> static void

>> dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,

>>@@ -1527,8 +1454,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,

>>         newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);

>> 

>>         dropped += qos_pkts - newcnt;

>>-        dpdk_queue_pkts(dev, qid, mbufs, newcnt);

>>-        dpdk_queue_flush(dev, qid);

>>+        netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);

>>     }

>> 

>>     if (OVS_UNLIKELY(dropped)) {

>>@@ -1611,7 +1537,7 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,

>>                     temp_cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts,

>>                                                 temp_cnt);

>>                     dropped += qos_pkts - temp_cnt;

>>-                    dpdk_queue_pkts(dev, qid,

>>+                    netdev_dpdk_eth_tx_burst(dev, qid,

>>                                     (struct rte_mbuf **)&pkts[next_tx_idx],

>>                                     temp_cnt);

>> 

>>@@ -1631,8 +1557,9 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,

>> 

>>             cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts, cnt);

>>             dropped += qos_pkts - cnt;

>>-            dpdk_queue_pkts(dev, qid, (struct rte_mbuf **)&pkts[next_tx_idx],

>>-                            cnt);

>>+            netdev_dpdk_eth_tx_burst(dev, qid,

>>+                                     (struct rte_mbuf **)&pkts[next_tx_idx],

>>+                                     cnt);

>>         }

>> 

>>         if (OVS_UNLIKELY(dropped)) {

>>-- 

>>2.7.4

>>
diff mbox

Patch

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 02e2c58..8bb33d6 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -165,7 +165,6 @@  static const struct rte_eth_conf port_conf = {
     },
 };
 
-enum { MAX_TX_QUEUE_LEN = 384 };
 enum { DPDK_RING_SIZE = 256 };
 BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE));
 enum { DRAIN_TSC = 200000ULL };
@@ -282,8 +281,7 @@  static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mutex)
     = OVS_LIST_INITIALIZER(&dpdk_mp_list);
 
 /* This mutex must be used by non pmd threads when allocating or freeing
- * mbufs through mempools. Since dpdk_queue_pkts() and dpdk_queue_flush() may
- * use mempools, a non pmd thread should hold this mutex while calling them */
+ * mbufs through mempools. */
 static struct ovs_mutex nonpmd_mempool_mutex = OVS_MUTEX_INITIALIZER;
 
 struct dpdk_mp {
@@ -297,17 +295,12 @@  struct dpdk_mp {
 /* There should be one 'struct dpdk_tx_queue' created for
  * each cpu core. */
 struct dpdk_tx_queue {
-    bool flush_tx;                 /* Set to true to flush queue everytime */
-                                   /* pkts are queued. */
-    int count;
     rte_spinlock_t tx_lock;        /* Protects the members and the NIC queue
                                     * from concurrent access.  It is used only
                                     * if the queue is shared among different
                                     * pmd threads (see 'txq_needs_locking'). */
     int map;                       /* Mapping of configured vhost-user queues
                                     * to enabled by guest. */
-    uint64_t tsc;
-    struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN];
 };
 
 /* dpdk has no way to remove dpdk ring ethernet devices
@@ -720,19 +713,6 @@  netdev_dpdk_alloc_txq(struct netdev_dpdk *dev, unsigned int n_txqs)
 
     dev->tx_q = dpdk_rte_mzalloc(n_txqs * sizeof *dev->tx_q);
     for (i = 0; i < n_txqs; i++) {
-        int numa_id = ovs_numa_get_numa_id(i);
-
-        if (!dev->txq_needs_locking) {
-            /* Each index is considered as a cpu core id, since there should
-             * be one tx queue for each cpu core.  If the corresponding core
-             * is not on the same numa node as 'dev', flags the
-             * 'flush_tx'. */
-            dev->tx_q[i].flush_tx = dev->socket_id == numa_id;
-        } else {
-            /* Queues are shared among CPUs. Always flush */
-            dev->tx_q[i].flush_tx = true;
-        }
-
         /* Initialize map for vhost devices. */
         dev->tx_q[i].map = OVS_VHOST_QUEUE_MAP_UNKNOWN;
         rte_spinlock_init(&dev->tx_q[i].tx_lock);
@@ -1088,16 +1068,15 @@  netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
 }
 
 static inline void
-dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)
+netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
+                             struct rte_mbuf **pkts, int cnt)
 {
-    struct dpdk_tx_queue *txq = &dev->tx_q[qid];
     uint32_t nb_tx = 0;
 
-    while (nb_tx != txq->count) {
+    while (nb_tx != cnt) {
         uint32_t ret;
 
-        ret = rte_eth_tx_burst(dev->port_id, qid, txq->burst_pkts + nb_tx,
-                               txq->count - nb_tx);
+        ret = rte_eth_tx_burst(dev->port_id, qid, pkts + nb_tx, cnt - nb_tx);
         if (!ret) {
             break;
         }
@@ -1105,32 +1084,18 @@  dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)
         nb_tx += ret;
     }
 
-    if (OVS_UNLIKELY(nb_tx != txq->count)) {
+    if (OVS_UNLIKELY(nb_tx != cnt)) {
         /* free buffers, which we couldn't transmit, one at a time (each
          * packet could come from a different mempool) */
         int i;
 
-        for (i = nb_tx; i < txq->count; i++) {
-            rte_pktmbuf_free(txq->burst_pkts[i]);
+        for (i = nb_tx; i < cnt; i++) {
+            rte_pktmbuf_free(pkts[i]);
         }
         rte_spinlock_lock(&dev->stats_lock);
-        dev->stats.tx_dropped += txq->count-nb_tx;
+        dev->stats.tx_dropped += cnt - nb_tx;
         rte_spinlock_unlock(&dev->stats_lock);
     }
-
-    txq->count = 0;
-    txq->tsc = rte_get_timer_cycles();
-}
-
-static inline void
-dpdk_queue_flush(struct netdev_dpdk *dev, int qid)
-{
-    struct dpdk_tx_queue *txq = &dev->tx_q[qid];
-
-    if (txq->count == 0) {
-        return;
-    }
-    dpdk_queue_flush__(dev, qid);
 }
 
 static inline bool
@@ -1303,15 +1268,6 @@  netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet **packets,
     int nb_rx;
     int dropped = 0;
 
-    /* There is only one tx queue for this core.  Do not flush other
-     * queues.
-     * Do not flush tx queue which is shared among CPUs
-     * since it is always flushed */
-    if (rxq->queue_id == rte_lcore_id() &&
-        OVS_LIKELY(!dev->txq_needs_locking)) {
-        dpdk_queue_flush(dev, rxq->queue_id);
-    }
-
     nb_rx = rte_eth_rx_burst(rx->port_id, rxq->queue_id,
                              (struct rte_mbuf **) packets,
                              NETDEV_MAX_BURST);
@@ -1433,35 +1389,6 @@  out:
     }
 }
 
-inline static void
-dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,
-               struct rte_mbuf **pkts, int cnt)
-{
-    struct dpdk_tx_queue *txq = &dev->tx_q[qid];
-    uint64_t diff_tsc;
-
-    int i = 0;
-
-    while (i < cnt) {
-        int freeslots = MAX_TX_QUEUE_LEN - txq->count;
-        int tocopy = MIN(freeslots, cnt-i);
-
-        memcpy(&txq->burst_pkts[txq->count], &pkts[i],
-               tocopy * sizeof (struct rte_mbuf *));
-
-        txq->count += tocopy;
-        i += tocopy;
-
-        if (txq->count == MAX_TX_QUEUE_LEN || txq->flush_tx) {
-            dpdk_queue_flush__(dev, qid);
-        }
-        diff_tsc = rte_get_timer_cycles() - txq->tsc;
-        if (diff_tsc >= DRAIN_TSC) {
-            dpdk_queue_flush__(dev, qid);
-        }
-    }
-}
-
 /* Tx function. Transmit packets indefinitely */
 static void
 dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
@@ -1527,8 +1454,7 @@  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
         newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);
 
         dropped += qos_pkts - newcnt;
-        dpdk_queue_pkts(dev, qid, mbufs, newcnt);
-        dpdk_queue_flush(dev, qid);
+        netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);
     }
 
     if (OVS_UNLIKELY(dropped)) {
@@ -1611,7 +1537,7 @@  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
                     temp_cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts,
                                                 temp_cnt);
                     dropped += qos_pkts - temp_cnt;
-                    dpdk_queue_pkts(dev, qid,
+                    netdev_dpdk_eth_tx_burst(dev, qid,
                                     (struct rte_mbuf **)&pkts[next_tx_idx],
                                     temp_cnt);
 
@@ -1631,8 +1557,9 @@  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
 
             cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts, cnt);
             dropped += qos_pkts - cnt;
-            dpdk_queue_pkts(dev, qid, (struct rte_mbuf **)&pkts[next_tx_idx],
-                            cnt);
+            netdev_dpdk_eth_tx_burst(dev, qid,
+                                     (struct rte_mbuf **)&pkts[next_tx_idx],
+                                     cnt);
         }
 
         if (OVS_UNLIKELY(dropped)) {