diff mbox

[ovs-dev,v2] netdev-dpdk: Create separate memory pool for each port

Message ID 1487935632-4890-1-git-send-email-robertx.wojciechowicz@intel.com
State Superseded
Headers show

Commit Message

Robert Wojciechowicz Feb. 24, 2017, 11:27 a.m. UTC
Since it's possible to delete memory pool in DPDK
we can try to estimate better required memory size
when port is reconfigured, e.g. with different number
of rx queues.

Signed-off-by: Robert Wojciechowicz <robertx.wojciechowicz@intel.com>
Acked-by: Ian Stokes <ian.stokes@intel.com>
---
v2:
- removing mempool reference counter
- making sure mempool name isn't longer than RTE_MEMPOOL_NAMESIZE
---
 lib/netdev-dpdk.c | 104 ++++++++++++++++++++++++++----------------------------
 1 file changed, 50 insertions(+), 54 deletions(-)

Comments

Robert Wojciechowicz April 4, 2017, 8:06 a.m. UTC | #1
On Fri, Feb 24, 2017 at 11:27:12AM +0000, Robert Wojciechowicz wrote:
> Since it's possible to delete memory pool in DPDK
> we can try to estimate better required memory size
> when port is reconfigured, e.g. with different number
> of rx queues.
> 
> Signed-off-by: Robert Wojciechowicz <robertx.wojciechowicz@intel.com>
> Acked-by: Ian Stokes <ian.stokes@intel.com>
> ---
> v2:
> - removing mempool reference counter
> - making sure mempool name isn't longer than RTE_MEMPOOL_NAMESIZE

Did anyone have time to look into this patch?
Does it make sense or static allocation is better for some reason?

Br,
Robert
Kevin Traynor April 5, 2017, 4:12 p.m. UTC | #2
On 02/24/2017 11:27 AM, Robert Wojciechowicz wrote:
> Since it's possible to delete memory pool in DPDK
> we can try to estimate better required memory size
> when port is reconfigured, e.g. with different number
> of rx queues.
> 

Hi Robert,

The patch does not apply on master. I think this is probably a good idea
because now that the mtu can be set you can end up with multiple large
mempools per socket.

It's very much down to the wire, I wonder if there should be some amount
of additional buffers (e.g. +MIN_NB_MBUF) for any corner cases which are
hard to find under basic testing. Also, there may be some impact to the
local mbuf cache when it is provisioned so tightly.

Kevin.

> Signed-off-by: Robert Wojciechowicz <robertx.wojciechowicz@intel.com>
> Acked-by: Ian Stokes <ian.stokes@intel.com>
> ---
> v2:
> - removing mempool reference counter
> - making sure mempool name isn't longer than RTE_MEMPOOL_NAMESIZE
> ---
>  lib/netdev-dpdk.c | 104 ++++++++++++++++++++++++++----------------------------
>  1 file changed, 50 insertions(+), 54 deletions(-)
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index ee53c4c..1e10066 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -275,14 +275,12 @@ static struct ovs_list dpdk_list OVS_GUARDED_BY(dpdk_mutex)
>  static struct ovs_mutex dpdk_mp_mutex OVS_ACQ_AFTER(dpdk_mutex)
>      = OVS_MUTEX_INITIALIZER;
>  
> -static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mp_mutex)
> -    = OVS_LIST_INITIALIZER(&dpdk_mp_list);
> -
>  struct dpdk_mp {
>      struct rte_mempool *mp;
>      int mtu;
>      int socket_id;
> -    int refcount;
> +    char if_name[IFNAMSIZ];
> +    unsigned mp_size;
>      struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex);
>  };
>  
> @@ -463,76 +461,80 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp,
>      dp_packet_init_dpdk((struct dp_packet *) pkt, pkt->buf_len);
>  }
>  
> +/*
> + * XXX Full DPDK memory pool name must be unique
> + * and cannot be longer than RTE_MEMPOOL_NAMESIZE
> + */
> +static char*
> +dpdk_mp_name(struct dpdk_mp *dmp)
> +{
> +    uint32_t h = hash_string(dmp->if_name, 0);
> +    char* mp_name = xcalloc(RTE_MEMPOOL_NAMESIZE, sizeof(char));
> +    int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_%x_%d_%u",
> +                       h, dmp->mtu, dmp->mp_size);
> +    if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) {
> +        return NULL;
> +    }
> +    return mp_name;
> +}
> +
>  static struct dpdk_mp *
> -dpdk_mp_create(int socket_id, int mtu)
> +dpdk_mp_create(struct netdev_dpdk *dev, int mtu)
>  {
>      struct rte_pktmbuf_pool_private mbp_priv;
>      struct dpdk_mp *dmp;
> -    unsigned mp_size;
>      char *mp_name;
>  
>      dmp = dpdk_rte_mzalloc(sizeof *dmp);
>      if (!dmp) {
>          return NULL;
>      }
> -    dmp->socket_id = socket_id;
> +    dmp->socket_id = dev->requested_socket_id;
>      dmp->mtu = mtu;
> -    dmp->refcount = 1;
> +    strncpy(dmp->if_name, dev->up.name, IFNAMSIZ);
>      mbp_priv.mbuf_data_room_size = MBUF_SIZE(mtu) - sizeof(struct dp_packet);
>      mbp_priv.mbuf_priv_size = sizeof(struct dp_packet)
> -                              - sizeof(struct rte_mbuf);
> -    /* XXX: this is a really rough method of provisioning memory.
> -     * It's impossible to determine what the exact memory requirements are
> -     * when the number of ports and rxqs that utilize a particular mempool can
> -     * change dynamically at runtime. For now, use this rough heurisitic.
> +        - sizeof(struct rte_mbuf);
> +    /*
> +     * XXX: rough estimation of memory required for port:
> +     * <packets required to fill the device rxqs>
> +     * + <packets that could be stuck on other ports txqs>
> +     * + <packets in the pmd threads>
>       */
> -    if (mtu >= ETHER_MTU) {
> -        mp_size = MAX_NB_MBUF;
> -    } else {
> -        mp_size = MIN_NB_MBUF;
> -    }
>  
> -    do {
> -        mp_name = xasprintf("ovs_mp_%d_%d_%u", dmp->mtu, dmp->socket_id,
> -                            mp_size);
> +    dmp->mp_size = dev->requested_n_rxq * dev->requested_rxq_size
> +        + dev->requested_n_txq * dev->requested_txq_size
> +        + MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST;
>  
> -        dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
> +    do {
> +        mp_name = dpdk_mp_name(dmp);
> +        dmp->mp = rte_mempool_create(mp_name, dmp->mp_size, MBUF_SIZE(mtu),
>                                       MP_CACHE_SZ,
>                                       sizeof(struct rte_pktmbuf_pool_private),
>                                       rte_pktmbuf_pool_init, &mbp_priv,
>                                       ovs_rte_pktmbuf_init, NULL,
> -                                     socket_id, 0);
> +                                     dmp->socket_id, 0);
>          if (dmp->mp) {
>              VLOG_DBG("Allocated \"%s\" mempool with %u mbufs",
> -                     mp_name, mp_size);
> +                     mp_name, dmp->mp_size);
>          }
>          free(mp_name);
>          if (dmp->mp) {
>              return dmp;
>          }
> -    } while (rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF);
> +    } while (rte_errno == ENOMEM && (dmp->mp_size /= 2) >= MIN_NB_MBUF);
>  
>      rte_free(dmp);
>      return NULL;
>  }
>  
>  static struct dpdk_mp *
> -dpdk_mp_get(int socket_id, int mtu)
> +dpdk_mp_get(struct netdev_dpdk *dev, int mtu)
>  {
>      struct dpdk_mp *dmp;
>  
>      ovs_mutex_lock(&dpdk_mp_mutex);
> -    LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
> -        if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
> -            dmp->refcount++;
> -            goto out;
> -        }
> -    }
> -
> -    dmp = dpdk_mp_create(socket_id, mtu);
> -    ovs_list_push_back(&dpdk_mp_list, &dmp->list_node);
> -
> -out:
> +    dmp = dpdk_mp_create(dev, mtu);
>      ovs_mutex_unlock(&dpdk_mp_mutex);
>  
>      return dmp;
> @@ -541,18 +543,18 @@ out:
>  static void
>  dpdk_mp_put(struct dpdk_mp *dmp)
>  {
> +    char *mp_name;
> +
>      if (!dmp) {
>          return;
>      }
>  
>      ovs_mutex_lock(&dpdk_mp_mutex);
> -    ovs_assert(dmp->refcount);
> -
> -    if (!--dmp->refcount) {
> -        ovs_list_remove(&dmp->list_node);
> -        rte_mempool_free(dmp->mp);
> -        rte_free(dmp);
> -    }
> +    mp_name = dpdk_mp_name(dmp);
> +    VLOG_DBG("Releasing \"%s\" mempool", mp_name);
> +    free(mp_name);
> +    rte_mempool_free(dmp->mp);
> +    rte_free(dmp);
>      ovs_mutex_unlock(&dpdk_mp_mutex);
>  }
>  
> @@ -567,7 +569,7 @@ netdev_dpdk_mempool_configure(struct netdev_dpdk *dev)
>      uint32_t buf_size = dpdk_buf_size(dev->requested_mtu);
>      struct dpdk_mp *mp;
>  
> -    mp = dpdk_mp_get(dev->requested_socket_id, FRAME_LEN_TO_MTU(buf_size));
> +    mp = dpdk_mp_get(dev, FRAME_LEN_TO_MTU(buf_size));
>      if (!mp) {
>          VLOG_ERR("Insufficient memory to create memory pool for netdev "
>                   "%s, with MTU %d on socket %d\n",
> @@ -3129,10 +3131,7 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
>  
>      rte_eth_dev_stop(dev->port_id);
>  
> -    if (dev->mtu != dev->requested_mtu
> -        || dev->socket_id != dev->requested_socket_id) {
> -        netdev_dpdk_mempool_configure(dev);
> -    }
> +    netdev_dpdk_mempool_configure(dev);
>  
>      netdev->n_txq = dev->requested_n_txq;
>      netdev->n_rxq = dev->requested_n_rxq;
> @@ -3168,11 +3167,8 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
>  
>      netdev_dpdk_remap_txqs(dev);
>  
> -    if (dev->requested_socket_id != dev->socket_id
> -        || dev->requested_mtu != dev->mtu) {
> -        if (!netdev_dpdk_mempool_configure(dev)) {
> -            netdev_change_seq_changed(&dev->up);
> -        }
> +    if (!netdev_dpdk_mempool_configure(dev)) {
> +        netdev_change_seq_changed(&dev->up);
>      }
>  
>      if (!dev->dpdk_mp) {
>
Robert Wojciechowicz April 6, 2017, 8:25 a.m. UTC | #3
On Wed, Apr 05, 2017 at 05:12:44PM +0100, Kevin Traynor wrote:
> On 02/24/2017 11:27 AM, Robert Wojciechowicz wrote:
> > Since it's possible to delete memory pool in DPDK
> > we can try to estimate better required memory size
> > when port is reconfigured, e.g. with different number
> > of rx queues.
> > 
> 
> Hi Robert,
> 
> The patch does not apply on master. I think this is probably a good idea
> because now that the mtu can be set you can end up with multiple large
> mempools per socket.

Thanks for looking into this patch.
Good to see that it makes sense, so I will rebase it and send once again.
> 
> It's very much down to the wire, I wonder if there should be some amount
> of additional buffers (e.g. +MIN_NB_MBUF) for any corner cases which are
> hard to find under basic testing. Also, there may be some impact to the
> local mbuf cache when it is provisioned so tightly.

Good point, I will think about it.

> 
> Kevin.
> 

Br,
Robert
diff mbox

Patch

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index ee53c4c..1e10066 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -275,14 +275,12 @@  static struct ovs_list dpdk_list OVS_GUARDED_BY(dpdk_mutex)
 static struct ovs_mutex dpdk_mp_mutex OVS_ACQ_AFTER(dpdk_mutex)
     = OVS_MUTEX_INITIALIZER;
 
-static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mp_mutex)
-    = OVS_LIST_INITIALIZER(&dpdk_mp_list);
-
 struct dpdk_mp {
     struct rte_mempool *mp;
     int mtu;
     int socket_id;
-    int refcount;
+    char if_name[IFNAMSIZ];
+    unsigned mp_size;
     struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex);
 };
 
@@ -463,76 +461,80 @@  ovs_rte_pktmbuf_init(struct rte_mempool *mp,
     dp_packet_init_dpdk((struct dp_packet *) pkt, pkt->buf_len);
 }
 
+/*
+ * XXX Full DPDK memory pool name must be unique
+ * and cannot be longer than RTE_MEMPOOL_NAMESIZE
+ */
+static char*
+dpdk_mp_name(struct dpdk_mp *dmp)
+{
+    uint32_t h = hash_string(dmp->if_name, 0);
+    char* mp_name = xcalloc(RTE_MEMPOOL_NAMESIZE, sizeof(char));
+    int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_%x_%d_%u",
+                       h, dmp->mtu, dmp->mp_size);
+    if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) {
+        return NULL;
+    }
+    return mp_name;
+}
+
 static struct dpdk_mp *
-dpdk_mp_create(int socket_id, int mtu)
+dpdk_mp_create(struct netdev_dpdk *dev, int mtu)
 {
     struct rte_pktmbuf_pool_private mbp_priv;
     struct dpdk_mp *dmp;
-    unsigned mp_size;
     char *mp_name;
 
     dmp = dpdk_rte_mzalloc(sizeof *dmp);
     if (!dmp) {
         return NULL;
     }
-    dmp->socket_id = socket_id;
+    dmp->socket_id = dev->requested_socket_id;
     dmp->mtu = mtu;
-    dmp->refcount = 1;
+    strncpy(dmp->if_name, dev->up.name, IFNAMSIZ);
     mbp_priv.mbuf_data_room_size = MBUF_SIZE(mtu) - sizeof(struct dp_packet);
     mbp_priv.mbuf_priv_size = sizeof(struct dp_packet)
-                              - sizeof(struct rte_mbuf);
-    /* XXX: this is a really rough method of provisioning memory.
-     * It's impossible to determine what the exact memory requirements are
-     * when the number of ports and rxqs that utilize a particular mempool can
-     * change dynamically at runtime. For now, use this rough heurisitic.
+        - sizeof(struct rte_mbuf);
+    /*
+     * XXX: rough estimation of memory required for port:
+     * <packets required to fill the device rxqs>
+     * + <packets that could be stuck on other ports txqs>
+     * + <packets in the pmd threads>
      */
-    if (mtu >= ETHER_MTU) {
-        mp_size = MAX_NB_MBUF;
-    } else {
-        mp_size = MIN_NB_MBUF;
-    }
 
-    do {
-        mp_name = xasprintf("ovs_mp_%d_%d_%u", dmp->mtu, dmp->socket_id,
-                            mp_size);
+    dmp->mp_size = dev->requested_n_rxq * dev->requested_rxq_size
+        + dev->requested_n_txq * dev->requested_txq_size
+        + MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST;
 
-        dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
+    do {
+        mp_name = dpdk_mp_name(dmp);
+        dmp->mp = rte_mempool_create(mp_name, dmp->mp_size, MBUF_SIZE(mtu),
                                      MP_CACHE_SZ,
                                      sizeof(struct rte_pktmbuf_pool_private),
                                      rte_pktmbuf_pool_init, &mbp_priv,
                                      ovs_rte_pktmbuf_init, NULL,
-                                     socket_id, 0);
+                                     dmp->socket_id, 0);
         if (dmp->mp) {
             VLOG_DBG("Allocated \"%s\" mempool with %u mbufs",
-                     mp_name, mp_size);
+                     mp_name, dmp->mp_size);
         }
         free(mp_name);
         if (dmp->mp) {
             return dmp;
         }
-    } while (rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF);
+    } while (rte_errno == ENOMEM && (dmp->mp_size /= 2) >= MIN_NB_MBUF);
 
     rte_free(dmp);
     return NULL;
 }
 
 static struct dpdk_mp *
-dpdk_mp_get(int socket_id, int mtu)
+dpdk_mp_get(struct netdev_dpdk *dev, int mtu)
 {
     struct dpdk_mp *dmp;
 
     ovs_mutex_lock(&dpdk_mp_mutex);
-    LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
-        if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
-            dmp->refcount++;
-            goto out;
-        }
-    }
-
-    dmp = dpdk_mp_create(socket_id, mtu);
-    ovs_list_push_back(&dpdk_mp_list, &dmp->list_node);
-
-out:
+    dmp = dpdk_mp_create(dev, mtu);
     ovs_mutex_unlock(&dpdk_mp_mutex);
 
     return dmp;
@@ -541,18 +543,18 @@  out:
 static void
 dpdk_mp_put(struct dpdk_mp *dmp)
 {
+    char *mp_name;
+
     if (!dmp) {
         return;
     }
 
     ovs_mutex_lock(&dpdk_mp_mutex);
-    ovs_assert(dmp->refcount);
-
-    if (!--dmp->refcount) {
-        ovs_list_remove(&dmp->list_node);
-        rte_mempool_free(dmp->mp);
-        rte_free(dmp);
-    }
+    mp_name = dpdk_mp_name(dmp);
+    VLOG_DBG("Releasing \"%s\" mempool", mp_name);
+    free(mp_name);
+    rte_mempool_free(dmp->mp);
+    rte_free(dmp);
     ovs_mutex_unlock(&dpdk_mp_mutex);
 }
 
@@ -567,7 +569,7 @@  netdev_dpdk_mempool_configure(struct netdev_dpdk *dev)
     uint32_t buf_size = dpdk_buf_size(dev->requested_mtu);
     struct dpdk_mp *mp;
 
-    mp = dpdk_mp_get(dev->requested_socket_id, FRAME_LEN_TO_MTU(buf_size));
+    mp = dpdk_mp_get(dev, FRAME_LEN_TO_MTU(buf_size));
     if (!mp) {
         VLOG_ERR("Insufficient memory to create memory pool for netdev "
                  "%s, with MTU %d on socket %d\n",
@@ -3129,10 +3131,7 @@  netdev_dpdk_reconfigure(struct netdev *netdev)
 
     rte_eth_dev_stop(dev->port_id);
 
-    if (dev->mtu != dev->requested_mtu
-        || dev->socket_id != dev->requested_socket_id) {
-        netdev_dpdk_mempool_configure(dev);
-    }
+    netdev_dpdk_mempool_configure(dev);
 
     netdev->n_txq = dev->requested_n_txq;
     netdev->n_rxq = dev->requested_n_rxq;
@@ -3168,11 +3167,8 @@  dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
 
     netdev_dpdk_remap_txqs(dev);
 
-    if (dev->requested_socket_id != dev->socket_id
-        || dev->requested_mtu != dev->mtu) {
-        if (!netdev_dpdk_mempool_configure(dev)) {
-            netdev_change_seq_changed(&dev->up);
-        }
+    if (!netdev_dpdk_mempool_configure(dev)) {
+        netdev_change_seq_changed(&dev->up);
     }
 
     if (!dev->dpdk_mp) {