[ovs-dev,v10,10/14] netdev-dpdk: copy large packet to multi-seg. mbufs

Message ID 1538151315-117132-11-git-send-email-tiago.lam@intel.com
State Superseded
Headers show
Series
  • Support multi-segment mbufs
Related show

Commit Message

Lam, Tiago Sept. 28, 2018, 4:15 p.m.
From: Mark Kavanagh <mark.b.kavanagh@intel.com>

Currently, packets are only copied to a single segment in the function
dpdk_do_tx_copy(). This could be an issue in the case of jumbo frames,
particularly when multi-segment mbufs are involved.

This patch calculates the number of segments needed by a packet and
copies the data to each segment.

A new function, dpdk_buf_alloc(), has also been introduced as a wrapper
around the nonpmd_mp_mutex to serialise allocations from a non-pmd
context.

Co-authored-by: Michael Qiu <qiudayu@chinac.com>
Co-authored-by: Tiago Lam <tiago.lam@intel.com>

Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
Signed-off-by: Michael Qiu <qiudayu@chinac.com>
Signed-off-by: Tiago Lam <tiago.lam@intel.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
---
 lib/netdev-dpdk.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 82 insertions(+), 9 deletions(-)

Comments

Flavio Leitner Oct. 3, 2018, 6:27 p.m. | #1
On Fri, Sep 28, 2018 at 05:15:11PM +0100, Tiago Lam wrote:
> From: Mark Kavanagh <mark.b.kavanagh@intel.com>
> 
> Currently, packets are only copied to a single segment in the function
> dpdk_do_tx_copy(). This could be an issue in the case of jumbo frames,
> particularly when multi-segment mbufs are involved.
> 
> This patch calculates the number of segments needed by a packet and
> copies the data to each segment.
> 
> A new function, dpdk_buf_alloc(), has also been introduced as a wrapper
> around the nonpmd_mp_mutex to serialise allocations from a non-pmd
> context.
> 
> Co-authored-by: Michael Qiu <qiudayu@chinac.com>
> Co-authored-by: Tiago Lam <tiago.lam@intel.com>
> 
> Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
> Signed-off-by: Michael Qiu <qiudayu@chinac.com>
> Signed-off-by: Tiago Lam <tiago.lam@intel.com>
> Acked-by: Eelco Chaudron <echaudro@redhat.com>
> ---
>  lib/netdev-dpdk.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 82 insertions(+), 9 deletions(-)
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 8484239..e58e7ac 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -552,6 +552,27 @@ dpdk_rte_mzalloc(size_t sz)
>      return rte_zmalloc(OVS_VPORT_DPDK, sz, OVS_CACHE_LINE_SIZE);
>  }
>  
> +static struct rte_mbuf *
> +dpdk_buf_alloc(struct rte_mempool *mp)
> +{
> +    struct rte_mbuf *mbuf = NULL;
> +
> +    /* If non-pmd we need to lock on nonpmd_mp_mutex mutex */
> +    if (!dpdk_thread_is_pmd()) {
> +        ovs_mutex_lock(&nonpmd_mp_mutex);
> +
> +        mbuf = rte_pktmbuf_alloc(mp);
> +
> +        ovs_mutex_unlock(&nonpmd_mp_mutex);
> +
> +        return mbuf;
> +    }
> +
> +    mbuf = rte_pktmbuf_alloc(mp);
> +
> +    return mbuf;
> +}

What about:

    if (dpdk_thread_is_pmd()) {
        mbuf = rte_pktmbuf_alloc(mp);
    } else {
        ovs_mutex_lock(&nonpmd_mp_mutex);

        mbuf = rte_pktmbuf_alloc(mp);

        ovs_mutex_unlock(&nonpmd_mp_mutex);
    }

    return mbuf;
    




> +
>  void
>  free_dpdk_buf(struct dp_packet *packet)
>  {
> @@ -2316,6 +2337,56 @@ out:
>      }
>  }
>  
> +static int
> +dpdk_copy_dp_packet_to_mbuf(struct dp_packet *packet, struct rte_mbuf **head,
> +                            struct rte_mempool *mp)
> +{
> +    struct rte_mbuf *mbuf, *fmbuf;
> +    uint16_t max_data_len;
> +    uint32_t nb_segs = 0;
> +    uint32_t size = 0;
> +
> +    /* We will need the whole data for copying below */
> +    if (!dp_packet_is_linear(packet)) {
> +        dp_packet_linearize(packet);
> +    }
> +
> +    /* Allocate first mbuf to know the size of data available */
> +    fmbuf = mbuf = *head = dpdk_buf_alloc(mp);
> +    if (OVS_UNLIKELY(!mbuf)) {
> +        return ENOMEM;
> +    }
> +
> +    size = dp_packet_size(packet);
> +
> +    /* All new allocated mbuf's max data len is the same */
> +    max_data_len = mbuf->buf_len - mbuf->data_off;
> +
> +    /* Calculate # of output mbufs. */
> +    nb_segs = size / max_data_len;
> +    if (size % max_data_len) {
> +        nb_segs = nb_segs + 1;
> +    }
> +
> +    /* Allocate additional mbufs, less the one alredy allocated above */
> +    for (int i = 1; i < nb_segs; i++) {
> +        mbuf->next = dpdk_buf_alloc(mp);
> +        if (!mbuf->next) {
> +            free_dpdk_buf(CONTAINER_OF(fmbuf, struct dp_packet, mbuf));
> +            fmbuf = NULL;
> +            return ENOMEM;
> +        }
> +        mbuf = mbuf->next;
> +    }
> +
> +    fmbuf->nb_segs = nb_segs;
> +    fmbuf->pkt_len = size;
> +
> +    dp_packet_mbuf_write(fmbuf, 0, size, dp_packet_data(packet));
> +
> +    return 0;
> +}
> +
>  /* Tx function. Transmit packets indefinitely */
>  static void
>  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
> @@ -2332,6 +2403,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
>      struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
>      uint32_t cnt = batch_cnt;
>      uint32_t dropped = 0;
> +    uint32_t i;
>  
>      if (dev->type != DPDK_DEV_VHOST) {
>          /* Check if QoS has been configured for this netdev. */
> @@ -2342,28 +2414,29 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
>  
>      uint32_t txcnt = 0;
>  
> -    for (uint32_t i = 0; i < cnt; i++) {
> +    for (i = 0; i < cnt; i++) {
>          struct dp_packet *packet = batch->packets[i];
>          uint32_t size = dp_packet_size(packet);
> +        int err = 0;
>  
>          if (OVS_UNLIKELY(size > dev->max_packet_len)) {
>              VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
>                           size, dev->max_packet_len);
> -
>              dropped++;
>              continue;
>          }
>  
> -        pkts[txcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
> -        if (OVS_UNLIKELY(!pkts[txcnt])) {
> +        err = dpdk_copy_dp_packet_to_mbuf(packet, &pkts[txcnt],
> +                                          dev->dpdk_mp->mp);
> +        if (err != 0) {
> +            if (err == ENOMEM) {
> +                VLOG_ERR_RL(&rl, "Failed to alloc mbufs! %u packets dropped",
> +                            cnt - i);
> +            }
> +
>              dropped += cnt - i;
>              break;
>          }
> -
> -        /* We have to do a copy for now */
> -        memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *),
> -               dp_packet_data(packet), size);
> -        dp_packet_set_size((struct dp_packet *)pkts[txcnt], size);
>          dp_packet_copy_mbuf_flags((struct dp_packet *)pkts[txcnt], packet);
>  
>          txcnt++;
> -- 
> 2.7.4
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Lam, Tiago Oct. 5, 2018, 2:50 p.m. | #2
On 03/10/2018 19:27, Flavio Leitner wrote:
> On Fri, Sep 28, 2018 at 05:15:11PM +0100, Tiago Lam wrote:
>> From: Mark Kavanagh <mark.b.kavanagh@intel.com>
>>
>> Currently, packets are only copied to a single segment in the function
>> dpdk_do_tx_copy(). This could be an issue in the case of jumbo frames,
>> particularly when multi-segment mbufs are involved.
>>
>> This patch calculates the number of segments needed by a packet and
>> copies the data to each segment.
>>
>> A new function, dpdk_buf_alloc(), has also been introduced as a wrapper
>> around the nonpmd_mp_mutex to serialise allocations from a non-pmd
>> context.
>>
>> Co-authored-by: Michael Qiu <qiudayu@chinac.com>
>> Co-authored-by: Tiago Lam <tiago.lam@intel.com>
>>
>> Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
>> Signed-off-by: Michael Qiu <qiudayu@chinac.com>
>> Signed-off-by: Tiago Lam <tiago.lam@intel.com>
>> Acked-by: Eelco Chaudron <echaudro@redhat.com>
>> ---
>>  lib/netdev-dpdk.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++------
>>  1 file changed, 82 insertions(+), 9 deletions(-)
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index 8484239..e58e7ac 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -552,6 +552,27 @@ dpdk_rte_mzalloc(size_t sz)
>>      return rte_zmalloc(OVS_VPORT_DPDK, sz, OVS_CACHE_LINE_SIZE);
>>  }
>>  
>> +static struct rte_mbuf *
>> +dpdk_buf_alloc(struct rte_mempool *mp)
>> +{
>> +    struct rte_mbuf *mbuf = NULL;
>> +
>> +    /* If non-pmd we need to lock on nonpmd_mp_mutex mutex */
>> +    if (!dpdk_thread_is_pmd()) {
>> +        ovs_mutex_lock(&nonpmd_mp_mutex);
>> +
>> +        mbuf = rte_pktmbuf_alloc(mp);
>> +
>> +        ovs_mutex_unlock(&nonpmd_mp_mutex);
>> +
>> +        return mbuf;
>> +    }
>> +
>> +    mbuf = rte_pktmbuf_alloc(mp);
>> +
>> +    return mbuf;
>> +}
> 
> What about:
> 
>     if (dpdk_thread_is_pmd()) {
>         mbuf = rte_pktmbuf_alloc(mp);
>     } else {
>         ovs_mutex_lock(&nonpmd_mp_mutex);
> 
>         mbuf = rte_pktmbuf_alloc(mp);
> 
>         ovs_mutex_unlock(&nonpmd_mp_mutex);
>     }
> 
>     return mbuf;

It does read better. I'll use that, thanks.

Tiago.

>     
> 
> 
> 
> 
>> +
>>  void
>>  free_dpdk_buf(struct dp_packet *packet)
>>  {
>> @@ -2316,6 +2337,56 @@ out:
>>      }
>>  }
>>  
>> +static int
>> +dpdk_copy_dp_packet_to_mbuf(struct dp_packet *packet, struct rte_mbuf **head,
>> +                            struct rte_mempool *mp)
>> +{
>> +    struct rte_mbuf *mbuf, *fmbuf;
>> +    uint16_t max_data_len;
>> +    uint32_t nb_segs = 0;
>> +    uint32_t size = 0;
>> +
>> +    /* We will need the whole data for copying below */
>> +    if (!dp_packet_is_linear(packet)) {
>> +        dp_packet_linearize(packet);
>> +    }
>> +
>> +    /* Allocate first mbuf to know the size of data available */
>> +    fmbuf = mbuf = *head = dpdk_buf_alloc(mp);
>> +    if (OVS_UNLIKELY(!mbuf)) {
>> +        return ENOMEM;
>> +    }
>> +
>> +    size = dp_packet_size(packet);
>> +
>> +    /* All new allocated mbuf's max data len is the same */
>> +    max_data_len = mbuf->buf_len - mbuf->data_off;
>> +
>> +    /* Calculate # of output mbufs. */
>> +    nb_segs = size / max_data_len;
>> +    if (size % max_data_len) {
>> +        nb_segs = nb_segs + 1;
>> +    }
>> +
>> +    /* Allocate additional mbufs, less the one alredy allocated above */
>> +    for (int i = 1; i < nb_segs; i++) {
>> +        mbuf->next = dpdk_buf_alloc(mp);
>> +        if (!mbuf->next) {
>> +            free_dpdk_buf(CONTAINER_OF(fmbuf, struct dp_packet, mbuf));
>> +            fmbuf = NULL;
>> +            return ENOMEM;
>> +        }
>> +        mbuf = mbuf->next;
>> +    }
>> +
>> +    fmbuf->nb_segs = nb_segs;
>> +    fmbuf->pkt_len = size;
>> +
>> +    dp_packet_mbuf_write(fmbuf, 0, size, dp_packet_data(packet));
>> +
>> +    return 0;
>> +}
>> +
>>  /* Tx function. Transmit packets indefinitely */
>>  static void
>>  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
>> @@ -2332,6 +2403,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
>>      struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
>>      uint32_t cnt = batch_cnt;
>>      uint32_t dropped = 0;
>> +    uint32_t i;
>>  
>>      if (dev->type != DPDK_DEV_VHOST) {
>>          /* Check if QoS has been configured for this netdev. */
>> @@ -2342,28 +2414,29 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
>>  
>>      uint32_t txcnt = 0;
>>  
>> -    for (uint32_t i = 0; i < cnt; i++) {
>> +    for (i = 0; i < cnt; i++) {
>>          struct dp_packet *packet = batch->packets[i];
>>          uint32_t size = dp_packet_size(packet);
>> +        int err = 0;
>>  
>>          if (OVS_UNLIKELY(size > dev->max_packet_len)) {
>>              VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
>>                           size, dev->max_packet_len);
>> -
>>              dropped++;
>>              continue;
>>          }
>>  
>> -        pkts[txcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
>> -        if (OVS_UNLIKELY(!pkts[txcnt])) {
>> +        err = dpdk_copy_dp_packet_to_mbuf(packet, &pkts[txcnt],
>> +                                          dev->dpdk_mp->mp);
>> +        if (err != 0) {
>> +            if (err == ENOMEM) {
>> +                VLOG_ERR_RL(&rl, "Failed to alloc mbufs! %u packets dropped",
>> +                            cnt - i);
>> +            }
>> +
>>              dropped += cnt - i;
>>              break;
>>          }
>> -
>> -        /* We have to do a copy for now */
>> -        memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *),
>> -               dp_packet_data(packet), size);
>> -        dp_packet_set_size((struct dp_packet *)pkts[txcnt], size);
>>          dp_packet_copy_mbuf_flags((struct dp_packet *)pkts[txcnt], packet);
>>  
>>          txcnt++;
>> -- 
>> 2.7.4
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>

Patch

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 8484239..e58e7ac 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -552,6 +552,27 @@  dpdk_rte_mzalloc(size_t sz)
     return rte_zmalloc(OVS_VPORT_DPDK, sz, OVS_CACHE_LINE_SIZE);
 }
 
+static struct rte_mbuf *
+dpdk_buf_alloc(struct rte_mempool *mp)
+{
+    struct rte_mbuf *mbuf = NULL;
+
+    /* If non-pmd we need to lock on nonpmd_mp_mutex mutex */
+    if (!dpdk_thread_is_pmd()) {
+        ovs_mutex_lock(&nonpmd_mp_mutex);
+
+        mbuf = rte_pktmbuf_alloc(mp);
+
+        ovs_mutex_unlock(&nonpmd_mp_mutex);
+
+        return mbuf;
+    }
+
+    mbuf = rte_pktmbuf_alloc(mp);
+
+    return mbuf;
+}
+
 void
 free_dpdk_buf(struct dp_packet *packet)
 {
@@ -2316,6 +2337,56 @@  out:
     }
 }
 
+static int
+dpdk_copy_dp_packet_to_mbuf(struct dp_packet *packet, struct rte_mbuf **head,
+                            struct rte_mempool *mp)
+{
+    struct rte_mbuf *mbuf, *fmbuf;
+    uint16_t max_data_len;
+    uint32_t nb_segs = 0;
+    uint32_t size = 0;
+
+    /* We will need the whole data for copying below */
+    if (!dp_packet_is_linear(packet)) {
+        dp_packet_linearize(packet);
+    }
+
+    /* Allocate first mbuf to know the size of data available */
+    fmbuf = mbuf = *head = dpdk_buf_alloc(mp);
+    if (OVS_UNLIKELY(!mbuf)) {
+        return ENOMEM;
+    }
+
+    size = dp_packet_size(packet);
+
+    /* All new allocated mbuf's max data len is the same */
+    max_data_len = mbuf->buf_len - mbuf->data_off;
+
+    /* Calculate # of output mbufs. */
+    nb_segs = size / max_data_len;
+    if (size % max_data_len) {
+        nb_segs = nb_segs + 1;
+    }
+
+    /* Allocate additional mbufs, less the one alredy allocated above */
+    for (int i = 1; i < nb_segs; i++) {
+        mbuf->next = dpdk_buf_alloc(mp);
+        if (!mbuf->next) {
+            free_dpdk_buf(CONTAINER_OF(fmbuf, struct dp_packet, mbuf));
+            fmbuf = NULL;
+            return ENOMEM;
+        }
+        mbuf = mbuf->next;
+    }
+
+    fmbuf->nb_segs = nb_segs;
+    fmbuf->pkt_len = size;
+
+    dp_packet_mbuf_write(fmbuf, 0, size, dp_packet_data(packet));
+
+    return 0;
+}
+
 /* Tx function. Transmit packets indefinitely */
 static void
 dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
@@ -2332,6 +2403,7 @@  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
     struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
     uint32_t cnt = batch_cnt;
     uint32_t dropped = 0;
+    uint32_t i;
 
     if (dev->type != DPDK_DEV_VHOST) {
         /* Check if QoS has been configured for this netdev. */
@@ -2342,28 +2414,29 @@  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
 
     uint32_t txcnt = 0;
 
-    for (uint32_t i = 0; i < cnt; i++) {
+    for (i = 0; i < cnt; i++) {
         struct dp_packet *packet = batch->packets[i];
         uint32_t size = dp_packet_size(packet);
+        int err = 0;
 
         if (OVS_UNLIKELY(size > dev->max_packet_len)) {
             VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
                          size, dev->max_packet_len);
-
             dropped++;
             continue;
         }
 
-        pkts[txcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
-        if (OVS_UNLIKELY(!pkts[txcnt])) {
+        err = dpdk_copy_dp_packet_to_mbuf(packet, &pkts[txcnt],
+                                          dev->dpdk_mp->mp);
+        if (err != 0) {
+            if (err == ENOMEM) {
+                VLOG_ERR_RL(&rl, "Failed to alloc mbufs! %u packets dropped",
+                            cnt - i);
+            }
+
             dropped += cnt - i;
             break;
         }
-
-        /* We have to do a copy for now */
-        memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *),
-               dp_packet_data(packet), size);
-        dp_packet_set_size((struct dp_packet *)pkts[txcnt], size);
         dp_packet_copy_mbuf_flags((struct dp_packet *)pkts[txcnt], packet);
 
         txcnt++;