Message ID | 1487935632-4890-1-git-send-email-robertx.wojciechowicz@intel.com |
---|---|
State | Superseded |
Headers | show |
On Fri, Feb 24, 2017 at 11:27:12AM +0000, Robert Wojciechowicz wrote: > Since it's possible to delete memory pool in DPDK > we can try to estimate better required memory size > when port is reconfigured, e.g. with different number > of rx queues. > > Signed-off-by: Robert Wojciechowicz <robertx.wojciechowicz@intel.com> > Acked-by: Ian Stokes <ian.stokes@intel.com> > --- > v2: > - removing mempool reference counter > - making sure mempool name isn't longer than RTE_MEMPOOL_NAMESIZE Did anyone have time to look into this patch? Does it make sense or static allocation is better for some reason? Br, Robert
On 02/24/2017 11:27 AM, Robert Wojciechowicz wrote: > Since it's possible to delete memory pool in DPDK > we can try to estimate better required memory size > when port is reconfigured, e.g. with different number > of rx queues. > Hi Robert, The patch does not apply on master. I think this is probably a good idea because now that the mtu can be set you can end up with multiple large mempools per socket. It's very much down to the wire, I wonder if there should be some amount of additional buffers (e.g. +MIN_NB_MBUF) for any corner cases which are hard to find under basic testing. Also, there may be some impact to the local mbuf cache when it is provisioned so tightly. Kevin. > Signed-off-by: Robert Wojciechowicz <robertx.wojciechowicz@intel.com> > Acked-by: Ian Stokes <ian.stokes@intel.com> > --- > v2: > - removing mempool reference counter > - making sure mempool name isn't longer than RTE_MEMPOOL_NAMESIZE > --- > lib/netdev-dpdk.c | 104 ++++++++++++++++++++++++++---------------------------- > 1 file changed, 50 insertions(+), 54 deletions(-) > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index ee53c4c..1e10066 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -275,14 +275,12 @@ static struct ovs_list dpdk_list OVS_GUARDED_BY(dpdk_mutex) > static struct ovs_mutex dpdk_mp_mutex OVS_ACQ_AFTER(dpdk_mutex) > = OVS_MUTEX_INITIALIZER; > > -static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mp_mutex) > - = OVS_LIST_INITIALIZER(&dpdk_mp_list); > - > struct dpdk_mp { > struct rte_mempool *mp; > int mtu; > int socket_id; > - int refcount; > + char if_name[IFNAMSIZ]; > + unsigned mp_size; > struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex); > }; > > @@ -463,76 +461,80 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp, > dp_packet_init_dpdk((struct dp_packet *) pkt, pkt->buf_len); > } > > +/* > + * XXX Full DPDK memory pool name must be unique > + * and cannot be longer than RTE_MEMPOOL_NAMESIZE > + */ > +static char* > +dpdk_mp_name(struct dpdk_mp *dmp) > +{ > + uint32_t h = hash_string(dmp->if_name, 0); > + char* mp_name = xcalloc(RTE_MEMPOOL_NAMESIZE, sizeof(char)); > + int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_%x_%d_%u", > + h, dmp->mtu, dmp->mp_size); > + if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) { > + return NULL; > + } > + return mp_name; > +} > + > static struct dpdk_mp * > -dpdk_mp_create(int socket_id, int mtu) > +dpdk_mp_create(struct netdev_dpdk *dev, int mtu) > { > struct rte_pktmbuf_pool_private mbp_priv; > struct dpdk_mp *dmp; > - unsigned mp_size; > char *mp_name; > > dmp = dpdk_rte_mzalloc(sizeof *dmp); > if (!dmp) { > return NULL; > } > - dmp->socket_id = socket_id; > + dmp->socket_id = dev->requested_socket_id; > dmp->mtu = mtu; > - dmp->refcount = 1; > + strncpy(dmp->if_name, dev->up.name, IFNAMSIZ); > mbp_priv.mbuf_data_room_size = MBUF_SIZE(mtu) - sizeof(struct dp_packet); > mbp_priv.mbuf_priv_size = sizeof(struct dp_packet) > - - sizeof(struct rte_mbuf); > - /* XXX: this is a really rough method of provisioning memory. > - * It's impossible to determine what the exact memory requirements are > - * when the number of ports and rxqs that utilize a particular mempool can > - * change dynamically at runtime. For now, use this rough heurisitic. > + - sizeof(struct rte_mbuf); > + /* > + * XXX: rough estimation of memory required for port: > + * <packets required to fill the device rxqs> > + * + <packets that could be stuck on other ports txqs> > + * + <packets in the pmd threads> > */ > - if (mtu >= ETHER_MTU) { > - mp_size = MAX_NB_MBUF; > - } else { > - mp_size = MIN_NB_MBUF; > - } > > - do { > - mp_name = xasprintf("ovs_mp_%d_%d_%u", dmp->mtu, dmp->socket_id, > - mp_size); > + dmp->mp_size = dev->requested_n_rxq * dev->requested_rxq_size > + + dev->requested_n_txq * dev->requested_txq_size > + + MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST; > > - dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu), > + do { > + mp_name = dpdk_mp_name(dmp); > + dmp->mp = rte_mempool_create(mp_name, dmp->mp_size, MBUF_SIZE(mtu), > MP_CACHE_SZ, > sizeof(struct rte_pktmbuf_pool_private), > rte_pktmbuf_pool_init, &mbp_priv, > ovs_rte_pktmbuf_init, NULL, > - socket_id, 0); > + dmp->socket_id, 0); > if (dmp->mp) { > VLOG_DBG("Allocated \"%s\" mempool with %u mbufs", > - mp_name, mp_size); > + mp_name, dmp->mp_size); > } > free(mp_name); > if (dmp->mp) { > return dmp; > } > - } while (rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF); > + } while (rte_errno == ENOMEM && (dmp->mp_size /= 2) >= MIN_NB_MBUF); > > rte_free(dmp); > return NULL; > } > > static struct dpdk_mp * > -dpdk_mp_get(int socket_id, int mtu) > +dpdk_mp_get(struct netdev_dpdk *dev, int mtu) > { > struct dpdk_mp *dmp; > > ovs_mutex_lock(&dpdk_mp_mutex); > - LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) { > - if (dmp->socket_id == socket_id && dmp->mtu == mtu) { > - dmp->refcount++; > - goto out; > - } > - } > - > - dmp = dpdk_mp_create(socket_id, mtu); > - ovs_list_push_back(&dpdk_mp_list, &dmp->list_node); > - > -out: > + dmp = dpdk_mp_create(dev, mtu); > ovs_mutex_unlock(&dpdk_mp_mutex); > > return dmp; > @@ -541,18 +543,18 @@ out: > static void > dpdk_mp_put(struct dpdk_mp *dmp) > { > + char *mp_name; > + > if (!dmp) { > return; > } > > ovs_mutex_lock(&dpdk_mp_mutex); > - ovs_assert(dmp->refcount); > - > - if (!--dmp->refcount) { > - ovs_list_remove(&dmp->list_node); > - rte_mempool_free(dmp->mp); > - rte_free(dmp); > - } > + mp_name = dpdk_mp_name(dmp); > + VLOG_DBG("Releasing \"%s\" mempool", mp_name); > + free(mp_name); > + rte_mempool_free(dmp->mp); > + rte_free(dmp); > ovs_mutex_unlock(&dpdk_mp_mutex); > } > > @@ -567,7 +569,7 @@ netdev_dpdk_mempool_configure(struct netdev_dpdk *dev) > uint32_t buf_size = dpdk_buf_size(dev->requested_mtu); > struct dpdk_mp *mp; > > - mp = dpdk_mp_get(dev->requested_socket_id, FRAME_LEN_TO_MTU(buf_size)); > + mp = dpdk_mp_get(dev, FRAME_LEN_TO_MTU(buf_size)); > if (!mp) { > VLOG_ERR("Insufficient memory to create memory pool for netdev " > "%s, with MTU %d on socket %d\n", > @@ -3129,10 +3131,7 @@ netdev_dpdk_reconfigure(struct netdev *netdev) > > rte_eth_dev_stop(dev->port_id); > > - if (dev->mtu != dev->requested_mtu > - || dev->socket_id != dev->requested_socket_id) { > - netdev_dpdk_mempool_configure(dev); > - } > + netdev_dpdk_mempool_configure(dev); > > netdev->n_txq = dev->requested_n_txq; > netdev->n_rxq = dev->requested_n_rxq; > @@ -3168,11 +3167,8 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev) > > netdev_dpdk_remap_txqs(dev); > > - if (dev->requested_socket_id != dev->socket_id > - || dev->requested_mtu != dev->mtu) { > - if (!netdev_dpdk_mempool_configure(dev)) { > - netdev_change_seq_changed(&dev->up); > - } > + if (!netdev_dpdk_mempool_configure(dev)) { > + netdev_change_seq_changed(&dev->up); > } > > if (!dev->dpdk_mp) { >
On Wed, Apr 05, 2017 at 05:12:44PM +0100, Kevin Traynor wrote: > On 02/24/2017 11:27 AM, Robert Wojciechowicz wrote: > > Since it's possible to delete memory pool in DPDK > > we can try to estimate better required memory size > > when port is reconfigured, e.g. with different number > > of rx queues. > > > > Hi Robert, > > The patch does not apply on master. I think this is probably a good idea > because now that the mtu can be set you can end up with multiple large > mempools per socket. Thanks for looking into this patch. Good to see that it makes sense, so I will rebase it and send once again. > > It's very much down to the wire, I wonder if there should be some amount > of additional buffers (e.g. +MIN_NB_MBUF) for any corner cases which are > hard to find under basic testing. Also, there may be some impact to the > local mbuf cache when it is provisioned so tightly. Good point, I will think about it. > > Kevin. > Br, Robert
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index ee53c4c..1e10066 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -275,14 +275,12 @@ static struct ovs_list dpdk_list OVS_GUARDED_BY(dpdk_mutex) static struct ovs_mutex dpdk_mp_mutex OVS_ACQ_AFTER(dpdk_mutex) = OVS_MUTEX_INITIALIZER; -static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mp_mutex) - = OVS_LIST_INITIALIZER(&dpdk_mp_list); - struct dpdk_mp { struct rte_mempool *mp; int mtu; int socket_id; - int refcount; + char if_name[IFNAMSIZ]; + unsigned mp_size; struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex); }; @@ -463,76 +461,80 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp, dp_packet_init_dpdk((struct dp_packet *) pkt, pkt->buf_len); } +/* + * XXX Full DPDK memory pool name must be unique + * and cannot be longer than RTE_MEMPOOL_NAMESIZE + */ +static char* +dpdk_mp_name(struct dpdk_mp *dmp) +{ + uint32_t h = hash_string(dmp->if_name, 0); + char* mp_name = xcalloc(RTE_MEMPOOL_NAMESIZE, sizeof(char)); + int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_%x_%d_%u", + h, dmp->mtu, dmp->mp_size); + if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) { + return NULL; + } + return mp_name; +} + static struct dpdk_mp * -dpdk_mp_create(int socket_id, int mtu) +dpdk_mp_create(struct netdev_dpdk *dev, int mtu) { struct rte_pktmbuf_pool_private mbp_priv; struct dpdk_mp *dmp; - unsigned mp_size; char *mp_name; dmp = dpdk_rte_mzalloc(sizeof *dmp); if (!dmp) { return NULL; } - dmp->socket_id = socket_id; + dmp->socket_id = dev->requested_socket_id; dmp->mtu = mtu; - dmp->refcount = 1; + strncpy(dmp->if_name, dev->up.name, IFNAMSIZ); mbp_priv.mbuf_data_room_size = MBUF_SIZE(mtu) - sizeof(struct dp_packet); mbp_priv.mbuf_priv_size = sizeof(struct dp_packet) - - sizeof(struct rte_mbuf); - /* XXX: this is a really rough method of provisioning memory. - * It's impossible to determine what the exact memory requirements are - * when the number of ports and rxqs that utilize a particular mempool can - * change dynamically at runtime. For now, use this rough heurisitic. + - sizeof(struct rte_mbuf); + /* + * XXX: rough estimation of memory required for port: + * <packets required to fill the device rxqs> + * + <packets that could be stuck on other ports txqs> + * + <packets in the pmd threads> */ - if (mtu >= ETHER_MTU) { - mp_size = MAX_NB_MBUF; - } else { - mp_size = MIN_NB_MBUF; - } - do { - mp_name = xasprintf("ovs_mp_%d_%d_%u", dmp->mtu, dmp->socket_id, - mp_size); + dmp->mp_size = dev->requested_n_rxq * dev->requested_rxq_size + + dev->requested_n_txq * dev->requested_txq_size + + MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST; - dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu), + do { + mp_name = dpdk_mp_name(dmp); + dmp->mp = rte_mempool_create(mp_name, dmp->mp_size, MBUF_SIZE(mtu), MP_CACHE_SZ, sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, &mbp_priv, ovs_rte_pktmbuf_init, NULL, - socket_id, 0); + dmp->socket_id, 0); if (dmp->mp) { VLOG_DBG("Allocated \"%s\" mempool with %u mbufs", - mp_name, mp_size); + mp_name, dmp->mp_size); } free(mp_name); if (dmp->mp) { return dmp; } - } while (rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF); + } while (rte_errno == ENOMEM && (dmp->mp_size /= 2) >= MIN_NB_MBUF); rte_free(dmp); return NULL; } static struct dpdk_mp * -dpdk_mp_get(int socket_id, int mtu) +dpdk_mp_get(struct netdev_dpdk *dev, int mtu) { struct dpdk_mp *dmp; ovs_mutex_lock(&dpdk_mp_mutex); - LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) { - if (dmp->socket_id == socket_id && dmp->mtu == mtu) { - dmp->refcount++; - goto out; - } - } - - dmp = dpdk_mp_create(socket_id, mtu); - ovs_list_push_back(&dpdk_mp_list, &dmp->list_node); - -out: + dmp = dpdk_mp_create(dev, mtu); ovs_mutex_unlock(&dpdk_mp_mutex); return dmp; @@ -541,18 +543,18 @@ out: static void dpdk_mp_put(struct dpdk_mp *dmp) { + char *mp_name; + if (!dmp) { return; } ovs_mutex_lock(&dpdk_mp_mutex); - ovs_assert(dmp->refcount); - - if (!--dmp->refcount) { - ovs_list_remove(&dmp->list_node); - rte_mempool_free(dmp->mp); - rte_free(dmp); - } + mp_name = dpdk_mp_name(dmp); + VLOG_DBG("Releasing \"%s\" mempool", mp_name); + free(mp_name); + rte_mempool_free(dmp->mp); + rte_free(dmp); ovs_mutex_unlock(&dpdk_mp_mutex); } @@ -567,7 +569,7 @@ netdev_dpdk_mempool_configure(struct netdev_dpdk *dev) uint32_t buf_size = dpdk_buf_size(dev->requested_mtu); struct dpdk_mp *mp; - mp = dpdk_mp_get(dev->requested_socket_id, FRAME_LEN_TO_MTU(buf_size)); + mp = dpdk_mp_get(dev, FRAME_LEN_TO_MTU(buf_size)); if (!mp) { VLOG_ERR("Insufficient memory to create memory pool for netdev " "%s, with MTU %d on socket %d\n", @@ -3129,10 +3131,7 @@ netdev_dpdk_reconfigure(struct netdev *netdev) rte_eth_dev_stop(dev->port_id); - if (dev->mtu != dev->requested_mtu - || dev->socket_id != dev->requested_socket_id) { - netdev_dpdk_mempool_configure(dev); - } + netdev_dpdk_mempool_configure(dev); netdev->n_txq = dev->requested_n_txq; netdev->n_rxq = dev->requested_n_rxq; @@ -3168,11 +3167,8 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev) netdev_dpdk_remap_txqs(dev); - if (dev->requested_socket_id != dev->socket_id - || dev->requested_mtu != dev->mtu) { - if (!netdev_dpdk_mempool_configure(dev)) { - netdev_change_seq_changed(&dev->up); - } + if (!netdev_dpdk_mempool_configure(dev)) { + netdev_change_seq_changed(&dev->up); } if (!dev->dpdk_mp) {