[ovs-dev,v4] netdev-dpdk: Create separate memory pool for each port.
diff mbox

Message ID 1503655752-18595-1-git-send-email-antonio.fischetti@intel.com
State Accepted
Delegated to: Darrell Ball
Headers show

Commit Message

Fischetti, Antonio Aug. 25, 2017, 10:09 a.m. UTC
Since it's possible to delete memory pool in DPDK
we can try to estimate better required memory size
when port is reconfigured, e.g. with different number
of rx queues.

CC: Kevin Traynor <ktraynor@redhat.com>
CC: Aaron Conole <aconole@redhat.com>
Signed-off-by: Robert Wojciechowicz <robertx.wojciechowicz@intel.com>
Co-authored-by: Antonio Fischetti <antonio.fischetti@intel.com>
Signed-off-by: Antonio Fischetti <antonio.fischetti@intel.com>
Acked-by: Ian Stokes <ian.stokes@intel.com>
---
v4:
 - code rebased
 - manage EEXIST err code after rte_pktmbuf_pool_create
 - replaced strncpy with ovs_strzcpy
 - added some VLOG_DBG msgs

v3:
 - adding memory for corner cases

v2:
 - removing mempool reference counter
 - making sure mempool name isn't longer than RTE_MEMPOOL_NAMESIZE
---
 lib/netdev-dpdk.c | 135 ++++++++++++++++++++++++++++++------------------------
 1 file changed, 76 insertions(+), 59 deletions(-)

Comments

Darrell Ball Aug. 29, 2017, 8:39 a.m. UTC | #1
Hi Antonio

I plan to look at this more closely tomorrow and hope to get this in this week. 

Thanks Darrell


On 8/25/17, 3:09 AM, "ovs-dev-bounces@openvswitch.org on behalf of antonio.fischetti@intel.com" <ovs-dev-bounces@openvswitch.org on behalf of antonio.fischetti@intel.com> wrote:

    Since it's possible to delete memory pool in DPDK
    we can try to estimate better required memory size
    when port is reconfigured, e.g. with different number
    of rx queues.
    
    CC: Kevin Traynor <ktraynor@redhat.com>
    CC: Aaron Conole <aconole@redhat.com>
    Signed-off-by: Robert Wojciechowicz <robertx.wojciechowicz@intel.com>
    Co-authored-by: Antonio Fischetti <antonio.fischetti@intel.com>
    Signed-off-by: Antonio Fischetti <antonio.fischetti@intel.com>
    Acked-by: Ian Stokes <ian.stokes@intel.com>
    ---
    v4:
     - code rebased
     - manage EEXIST err code after rte_pktmbuf_pool_create
     - replaced strncpy with ovs_strzcpy
     - added some VLOG_DBG msgs
    
    v3:
     - adding memory for corner cases
    
    v2:
     - removing mempool reference counter
     - making sure mempool name isn't longer than RTE_MEMPOOL_NAMESIZE
    ---
     lib/netdev-dpdk.c | 135 ++++++++++++++++++++++++++++++------------------------
     1 file changed, 76 insertions(+), 59 deletions(-)
    
    diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
    index 1aaf6f7..35da76a 100644
    --- a/lib/netdev-dpdk.c
    +++ b/lib/netdev-dpdk.c
    @@ -303,14 +303,12 @@ static struct ovs_list dpdk_list OVS_GUARDED_BY(dpdk_mutex)
     static struct ovs_mutex dpdk_mp_mutex OVS_ACQ_AFTER(dpdk_mutex)
         = OVS_MUTEX_INITIALIZER;
     
    -static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mp_mutex)
    -    = OVS_LIST_INITIALIZER(&dpdk_mp_list);
    -
     struct dpdk_mp {
         struct rte_mempool *mp;
         int mtu;
         int socket_id;
    -    int refcount;
    +    char if_name[IFNAMSIZ];
    +    unsigned mp_size;
         struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex);
     };
     
    @@ -492,45 +490,81 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp OVS_UNUSED,
         dp_packet_init_dpdk((struct dp_packet *) pkt, pkt->buf_len);
     }
     
    +/*
    + * Full DPDK memory pool name must be unique
    + * and cannot be longer than RTE_MEMPOOL_NAMESIZE
    + */
    +static char *
    +dpdk_mp_name(struct dpdk_mp *dmp)
    +{
    +    uint32_t h = hash_string(dmp->if_name, 0);
    +    char *mp_name = xcalloc(RTE_MEMPOOL_NAMESIZE, sizeof(char));
    +    int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_%x_%d_%u",
    +                       h, dmp->mtu, dmp->mp_size);
    +    if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) {
    +        return NULL;
    +    }
    +    return mp_name;
    +}
    +
     static struct dpdk_mp *
    -dpdk_mp_create(int socket_id, int mtu)
    +dpdk_mp_create(struct netdev_dpdk *dev, int mtu)
     {
         struct dpdk_mp *dmp;
    -    unsigned mp_size;
         char *mp_name;
    +    bool mp_exists = false;
     
         dmp = dpdk_rte_mzalloc(sizeof *dmp);
         if (!dmp) {
             return NULL;
         }
    -    dmp->socket_id = socket_id;
    +    dmp->socket_id = dev->requested_socket_id;
         dmp->mtu = mtu;
    -    dmp->refcount = 1;
    -    /* XXX: this is a really rough method of provisioning memory.
    -     * It's impossible to determine what the exact memory requirements are
    -     * when the number of ports and rxqs that utilize a particular mempool can
    -     * change dynamically at runtime. For now, use this rough heurisitic.
    +    ovs_strzcpy(dmp->if_name, dev->up.name, IFNAMSIZ);
    +
    +    /*
    +     * XXX: rough estimation of memory required for port:
    +     * <packets required to fill the device rxqs>
    +     * + <packets that could be stuck on other ports txqs>
    +     * + <packets in the pmd threads>
    +     * + <additional memory for corner cases>
          */
    -    if (mtu >= ETHER_MTU) {
    -        mp_size = MAX_NB_MBUF;
    -    } else {
    -        mp_size = MIN_NB_MBUF;
    -    }
    +    dmp->mp_size = dev->requested_n_rxq * dev->requested_rxq_size
    +            + dev->requested_n_txq * dev->requested_txq_size
    +            + MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST
    +            + MIN_NB_MBUF;
     
         do {
    -        mp_name = xasprintf("ovs_mp_%d_%d_%u", dmp->mtu, dmp->socket_id,
    -                            mp_size);
    +        mp_name = dpdk_mp_name(dmp);
    +
    +        VLOG_DBG("Requesting a mempool of %u mbufs for netdev %s "
    +                 "with %d Rx and %d Tx queues.",
    +                 dmp->mp_size, dev->up.name,
    +                 dev->requested_n_rxq, dev->requested_n_txq);
     
    -        dmp->mp = rte_pktmbuf_pool_create(mp_name, mp_size,
    +        dmp->mp = rte_pktmbuf_pool_create(mp_name, dmp->mp_size,
                                               MP_CACHE_SZ,
                                               sizeof (struct dp_packet)
                                                      - sizeof (struct rte_mbuf),
                                               MBUF_SIZE(mtu)
                                                      - sizeof(struct dp_packet),
    -                                          socket_id);
    +                                          dmp->socket_id);
             if (dmp->mp) {
                 VLOG_DBG("Allocated \"%s\" mempool with %u mbufs",
    -                     mp_name, mp_size);
    +                     mp_name, dmp->mp_size);
    +        } else if (rte_errno == EEXIST) {
    +            /* A mempool with the same name already exists.  We just
    +             * retrieve its pointer to be returned to the caller. */
    +            dmp->mp = rte_mempool_lookup(mp_name);
    +            VLOG_DBG("A mempool with name %s already exists at %p.",
    +                    mp_name, dmp->mp);
    +            /* As the mempool create returned EEXIST we can expect the
    +             * lookup has returned a valid pointer.  If for some reason
    +             * that's not the case we keep track of it. */
    +            mp_exists = true;
    +        } else {
    +            VLOG_DBG("Mempool create failed with error: %s",
    +                    rte_strerror(rte_errno));
             }
             free(mp_name);
             if (dmp->mp) {
    @@ -541,31 +575,20 @@ dpdk_mp_create(int socket_id, int mtu)
                 rte_mempool_obj_iter(dmp->mp, ovs_rte_pktmbuf_init, NULL);
                 return dmp;
             }
    -    } while (rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF);
    +    } while (!mp_exists &&
    +            (rte_errno == ENOMEM && (dmp->mp_size /= 2) >= MIN_NB_MBUF));
     
         rte_free(dmp);
         return NULL;
     }
     
     static struct dpdk_mp *
    -dpdk_mp_get(int socket_id, int mtu)
    +dpdk_mp_get(struct netdev_dpdk *dev, int mtu)
     {
         struct dpdk_mp *dmp;
     
         ovs_mutex_lock(&dpdk_mp_mutex);
    -    LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
    -        if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
    -            dmp->refcount++;
    -            goto out;
    -        }
    -    }
    -
    -    dmp = dpdk_mp_create(socket_id, mtu);
    -    if (dmp) {
    -        ovs_list_push_back(&dpdk_mp_list, &dmp->list_node);
    -    }
    -
    -out:
    +    dmp = dpdk_mp_create(dev, mtu);
         ovs_mutex_unlock(&dpdk_mp_mutex);
     
         return dmp;
    @@ -574,18 +597,18 @@ out:
     static void
     dpdk_mp_put(struct dpdk_mp *dmp)
     {
    +    char *mp_name;
    +
         if (!dmp) {
             return;
         }
     
         ovs_mutex_lock(&dpdk_mp_mutex);
    -    ovs_assert(dmp->refcount);
    -
    -    if (!--dmp->refcount) {
    -        ovs_list_remove(&dmp->list_node);
    -        rte_mempool_free(dmp->mp);
    -        rte_free(dmp);
    -    }
    +    mp_name = dpdk_mp_name(dmp);
    +    VLOG_DBG("Releasing \"%s\" mempool", mp_name);
    +    free(mp_name);
    +    rte_mempool_free(dmp->mp);
    +    rte_free(dmp);
         ovs_mutex_unlock(&dpdk_mp_mutex);
     }
     
    @@ -600,7 +623,7 @@ netdev_dpdk_mempool_configure(struct netdev_dpdk *dev)
         uint32_t buf_size = dpdk_buf_size(dev->requested_mtu);
         struct dpdk_mp *mp;
     
    -    mp = dpdk_mp_get(dev->requested_socket_id, FRAME_LEN_TO_MTU(buf_size));
    +    mp = dpdk_mp_get(dev, FRAME_LEN_TO_MTU(buf_size));
         if (!mp) {
             VLOG_ERR("Failed to create memory pool for netdev "
                      "%s, with MTU %d on socket %d: %s\n",
    @@ -3173,12 +3196,9 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
     
         rte_eth_dev_stop(dev->port_id);
     
    -    if (dev->mtu != dev->requested_mtu
    -        || dev->socket_id != dev->requested_socket_id) {
    -        err = netdev_dpdk_mempool_configure(dev);
    -        if (err) {
    -            goto out;
    -        }
    +    err = netdev_dpdk_mempool_configure(dev);
    +    if (err) {
    +        goto out;
         }
     
         netdev->n_txq = dev->requested_n_txq;
    @@ -3216,14 +3236,11 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
     
         netdev_dpdk_remap_txqs(dev);
     
    -    if (dev->requested_socket_id != dev->socket_id
    -        || dev->requested_mtu != dev->mtu) {
    -        err = netdev_dpdk_mempool_configure(dev);
    -        if (err) {
    -            return err;
    -        } else {
    -            netdev_change_seq_changed(&dev->up);
    -        }
    +    err = netdev_dpdk_mempool_configure(dev);
    +    if (err) {
    +        return err;
    +    } else {
    +        netdev_change_seq_changed(&dev->up);
         }
     
         if (netdev_dpdk_get_vid(dev) >= 0) {
    -- 
    2.4.11
    
    _______________________________________________
    dev mailing list
    dev@openvswitch.org
    https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=AqIO2iioYT6_E40URpgYB2Vha0AzDZLdqpoMpAli5Nk&s=XvZgZ_MCsuZDBoUCGYGN5BodBNB7R5gHC8p6-CpwvNU&e=
Darrell Ball Sept. 1, 2017, 9:27 p.m. UTC | #2
Nice fix for V4 Antonio,  handling the existing memory pool case.

I applied the patch to dpdk_merge here

https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_darball_ovs_commits_dpdk-5Fmerge&d=DwIGaQ&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=A2_FCacqbp2moAo3HGFlTuxsjONUGhlN42OBcAuQQ6w&s=b6btPKhgvOFr2GOUYvktND6kaC6jc3fXI-mXfvNgXOU&e=

with the following incremental

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index e62e7ec..a850947 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -498,7 +498,7 @@ static char *
 dpdk_mp_name(struct dpdk_mp *dmp)
 {
     uint32_t h = hash_string(dmp->if_name, 0);
-    char *mp_name = xcalloc(RTE_MEMPOOL_NAMESIZE, sizeof(char));
+    char *mp_name = xcalloc(RTE_MEMPOOL_NAMESIZE, sizeof *mp_name);
     int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_%x_%d_%u",
                        h, dmp->mtu, dmp->mp_size);
     if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) {
@@ -510,11 +510,7 @@ dpdk_mp_name(struct dpdk_mp *dmp)
 static struct dpdk_mp *
 dpdk_mp_create(struct netdev_dpdk *dev, int mtu)
 {
-    struct dpdk_mp *dmp;
-    char *mp_name;
-    bool mp_exists = false;
-
-    dmp = dpdk_rte_mzalloc(sizeof *dmp);
+    struct dpdk_mp *dmp = dpdk_rte_mzalloc(sizeof *dmp);
     if (!dmp) {
         return NULL;
     }
@@ -534,8 +530,10 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu)
             + MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST
             + MIN_NB_MBUF;
 
+    bool mp_exists = false;
+
     do {
-        mp_name = dpdk_mp_name(dmp);
+        char *mp_name = dpdk_mp_name(dmp);
 
         VLOG_DBG("Requesting a mempool of %u mbufs for netdev %s "
                  "with %d Rx and %d Tx queues.",
@@ -550,21 +548,21 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu)
                                                  - sizeof(struct dp_packet),
                                           dmp->socket_id);
         if (dmp->mp) {
-            VLOG_DBG("Allocated \"%s\" mempool with %u mbufs",
-                     mp_name, dmp->mp_size);
+            VLOG_DBG("Allocated \"%s\" mempool with %u mbufs", mp_name,
+                     dmp->mp_size);
         } else if (rte_errno == EEXIST) {
             /* A mempool with the same name already exists.  We just
              * retrieve its pointer to be returned to the caller. */
             dmp->mp = rte_mempool_lookup(mp_name);
             VLOG_DBG("A mempool with name %s already exists at %p.",
-                    mp_name, dmp->mp);
+                     mp_name, dmp->mp);
             /* As the mempool create returned EEXIST we can expect the
              * lookup has returned a valid pointer.  If for some reason
              * that's not the case we keep track of it. */
             mp_exists = true;
         } else {
-            VLOG_DBG("Mempool create failed with error: %s",
-                    rte_strerror(rte_errno));
+            VLOG_ERR("Failed mempool \"%s\" create request of %u mbufs",
+                     mp_name, dmp->mp_size);
         }
         free(mp_name);
         if (dmp->mp) {





On 9/1/17, 9:35 AM, "Darrell Ball" <dball@vmware.com> wrote:

    
    
    On 8/29/17, 1:39 AM, "Darrell Ball" <dball@vmware.com> wrote:
    
        Hi Antonio
        
        I plan to look at this more closely tomorrow and hope to get this in this week. 
        
        Thanks Darrell
        
        
        On 8/25/17, 3:09 AM, "ovs-dev-bounces@openvswitch.org on behalf of antonio.fischetti@intel.com" <ovs-dev-bounces@openvswitch.org on behalf of antonio.fischetti@intel.com> wrote:
        
            Since it's possible to delete memory pool in DPDK
            we can try to estimate better required memory size
            when port is reconfigured, e.g. with different number
            of rx queues.
            
            CC: Kevin Traynor <ktraynor@redhat.com>
            CC: Aaron Conole <aconole@redhat.com>
            Signed-off-by: Robert Wojciechowicz <robertx.wojciechowicz@intel.com>
            Co-authored-by: Antonio Fischetti <antonio.fischetti@intel.com>
            Signed-off-by: Antonio Fischetti <antonio.fischetti@intel.com>
            Acked-by: Ian Stokes <ian.stokes@intel.com>
            ---
            v4:
             - code rebased
             - manage EEXIST err code after rte_pktmbuf_pool_create
             - replaced strncpy with ovs_strzcpy
             - added some VLOG_DBG msgs
            
            v3:
             - adding memory for corner cases
            
            v2:
             - removing mempool reference counter
             - making sure mempool name isn't longer than RTE_MEMPOOL_NAMESIZE
            ---
             lib/netdev-dpdk.c | 135 ++++++++++++++++++++++++++++++------------------------
             1 file changed, 76 insertions(+), 59 deletions(-)
            
            diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
            index 1aaf6f7..35da76a 100644
            --- a/lib/netdev-dpdk.c
            +++ b/lib/netdev-dpdk.c
            @@ -303,14 +303,12 @@ static struct ovs_list dpdk_list OVS_GUARDED_BY(dpdk_mutex)
             static struct ovs_mutex dpdk_mp_mutex OVS_ACQ_AFTER(dpdk_mutex)
                 = OVS_MUTEX_INITIALIZER;
             
            -static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mp_mutex)
            -    = OVS_LIST_INITIALIZER(&dpdk_mp_list);
            -
             struct dpdk_mp {
                 struct rte_mempool *mp;
                 int mtu;
                 int socket_id;
            -    int refcount;
            +    char if_name[IFNAMSIZ];
            +    unsigned mp_size;
                 struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex);
             };
             
            @@ -492,45 +490,81 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp OVS_UNUSED,
                 dp_packet_init_dpdk((struct dp_packet *) pkt, pkt->buf_len);
             }
             
            +/*
            + * Full DPDK memory pool name must be unique
            + * and cannot be longer than RTE_MEMPOOL_NAMESIZE
            + */
            +static char *
            +dpdk_mp_name(struct dpdk_mp *dmp)
            +{
            +    uint32_t h = hash_string(dmp->if_name, 0);
            +    char *mp_name = xcalloc(RTE_MEMPOOL_NAMESIZE, sizeof(char));
            +    int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_%x_%d_%u",
            +                       h, dmp->mtu, dmp->mp_size);
            +    if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) {
            +        return NULL;
            +    }
            +    return mp_name;
            +}
            +
             static struct dpdk_mp *
            -dpdk_mp_create(int socket_id, int mtu)
            +dpdk_mp_create(struct netdev_dpdk *dev, int mtu)
             {
                 struct dpdk_mp *dmp;
            -    unsigned mp_size;
                 char *mp_name;
            +    bool mp_exists = false;
             
                 dmp = dpdk_rte_mzalloc(sizeof *dmp);
                 if (!dmp) {
                     return NULL;
                 }
            -    dmp->socket_id = socket_id;
            +    dmp->socket_id = dev->requested_socket_id;
                 dmp->mtu = mtu;
            -    dmp->refcount = 1;
            -    /* XXX: this is a really rough method of provisioning memory.
            -     * It's impossible to determine what the exact memory requirements are
            -     * when the number of ports and rxqs that utilize a particular mempool can
            -     * change dynamically at runtime. For now, use this rough heurisitic.
            +    ovs_strzcpy(dmp->if_name, dev->up.name, IFNAMSIZ);
            +
            +    /*
            +     * XXX: rough estimation of memory required for port:
            +     * <packets required to fill the device rxqs>
            +     * + <packets that could be stuck on other ports txqs>
            +     * + <packets in the pmd threads>
            +     * + <additional memory for corner cases>
                  */
            -    if (mtu >= ETHER_MTU) {
            -        mp_size = MAX_NB_MBUF;
            -    } else {
            -        mp_size = MIN_NB_MBUF;
            -    }
            +    dmp->mp_size = dev->requested_n_rxq * dev->requested_rxq_size
            +            + dev->requested_n_txq * dev->requested_txq_size
            +            + MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST
            +            + MIN_NB_MBUF;
             
                 do {
            -        mp_name = xasprintf("ovs_mp_%d_%d_%u", dmp->mtu, dmp->socket_id,
            -                            mp_size);
            +        mp_name = dpdk_mp_name(dmp);
            +
            +        VLOG_DBG("Requesting a mempool of %u mbufs for netdev %s "
            +                 "with %d Rx and %d Tx queues.",
            +                 dmp->mp_size, dev->up.name,
            +                 dev->requested_n_rxq, dev->requested_n_txq);
             
            -        dmp->mp = rte_pktmbuf_pool_create(mp_name, mp_size,
            +        dmp->mp = rte_pktmbuf_pool_create(mp_name, dmp->mp_size,
                                                       MP_CACHE_SZ,
                                                       sizeof (struct dp_packet)
                                                              - sizeof (struct rte_mbuf),
                                                       MBUF_SIZE(mtu)
                                                              - sizeof(struct dp_packet),
            -                                          socket_id);
            +                                          dmp->socket_id);
                     if (dmp->mp) {
                         VLOG_DBG("Allocated \"%s\" mempool with %u mbufs",
            -                     mp_name, mp_size);
            +                     mp_name, dmp->mp_size);
            +        } else if (rte_errno == EEXIST) {
            +            /* A mempool with the same name already exists.  We just
            +             * retrieve its pointer to be returned to the caller. */
            +            dmp->mp = rte_mempool_lookup(mp_name);
            +            VLOG_DBG("A mempool with name %s already exists at %p.",
            +                    mp_name, dmp->mp);
            +            /* As the mempool create returned EEXIST we can expect the
            +             * lookup has returned a valid pointer.  If for some reason
            +             * that's not the case we keep track of it. */
            +            mp_exists = true;
            +        } else {
            +            VLOG_DBG("Mempool create failed with error: %s",
            +                    rte_strerror(rte_errno));
                     }
                     free(mp_name);
                     if (dmp->mp) {
            @@ -541,31 +575,20 @@ dpdk_mp_create(int socket_id, int mtu)
                         rte_mempool_obj_iter(dmp->mp, ovs_rte_pktmbuf_init, NULL);
                         return dmp;
                     }
            -    } while (rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF);
            +    } while (!mp_exists &&
            +            (rte_errno == ENOMEM && (dmp->mp_size /= 2) >= MIN_NB_MBUF));
             
                 rte_free(dmp);
                 return NULL;
             }
             
             static struct dpdk_mp *
            -dpdk_mp_get(int socket_id, int mtu)
            +dpdk_mp_get(struct netdev_dpdk *dev, int mtu)
             {
                 struct dpdk_mp *dmp;
             
                 ovs_mutex_lock(&dpdk_mp_mutex);
            -    LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
            -        if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
            -            dmp->refcount++;
            -            goto out;
            -        }
            -    }
            -
            -    dmp = dpdk_mp_create(socket_id, mtu);
            -    if (dmp) {
            -        ovs_list_push_back(&dpdk_mp_list, &dmp->list_node);
            -    }
            -
            -out:
            +    dmp = dpdk_mp_create(dev, mtu);
                 ovs_mutex_unlock(&dpdk_mp_mutex);
             
                 return dmp;
            @@ -574,18 +597,18 @@ out:
             static void
             dpdk_mp_put(struct dpdk_mp *dmp)
             {
            +    char *mp_name;
            +
                 if (!dmp) {
                     return;
                 }
             
                 ovs_mutex_lock(&dpdk_mp_mutex);
            -    ovs_assert(dmp->refcount);
            -
            -    if (!--dmp->refcount) {
            -        ovs_list_remove(&dmp->list_node);
            -        rte_mempool_free(dmp->mp);
            -        rte_free(dmp);
            -    }
            +    mp_name = dpdk_mp_name(dmp);
            +    VLOG_DBG("Releasing \"%s\" mempool", mp_name);
            +    free(mp_name);
            +    rte_mempool_free(dmp->mp);
            +    rte_free(dmp);
                 ovs_mutex_unlock(&dpdk_mp_mutex);
             }
             
            @@ -600,7 +623,7 @@ netdev_dpdk_mempool_configure(struct netdev_dpdk *dev)
                 uint32_t buf_size = dpdk_buf_size(dev->requested_mtu);
                 struct dpdk_mp *mp;
             
            -    mp = dpdk_mp_get(dev->requested_socket_id, FRAME_LEN_TO_MTU(buf_size));
            +    mp = dpdk_mp_get(dev, FRAME_LEN_TO_MTU(buf_size));
                 if (!mp) {
                     VLOG_ERR("Failed to create memory pool for netdev "
                              "%s, with MTU %d on socket %d: %s\n",
            @@ -3173,12 +3196,9 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
             
                 rte_eth_dev_stop(dev->port_id);
             
            -    if (dev->mtu != dev->requested_mtu
            -        || dev->socket_id != dev->requested_socket_id) {
            -        err = netdev_dpdk_mempool_configure(dev);
            -        if (err) {
            -            goto out;
            -        }
            +    err = netdev_dpdk_mempool_configure(dev);
            +    if (err) {
            +        goto out;
                 }
             
                 netdev->n_txq = dev->requested_n_txq;
            @@ -3216,14 +3236,11 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
             
                 netdev_dpdk_remap_txqs(dev);
             
            -    if (dev->requested_socket_id != dev->socket_id
            -        || dev->requested_mtu != dev->mtu) {
            -        err = netdev_dpdk_mempool_configure(dev);
            -        if (err) {
            -            return err;
            -        } else {
            -            netdev_change_seq_changed(&dev->up);
            -        }
            +    err = netdev_dpdk_mempool_configure(dev);
            +    if (err) {
            +        return err;
            +    } else {
            +        netdev_change_seq_changed(&dev->up);
                 }
             
                 if (netdev_dpdk_get_vid(dev) >= 0) {
            -- 
            2.4.11
            
            _______________________________________________
            dev mailing list
            dev@openvswitch.org
            https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=AqIO2iioYT6_E40URpgYB2Vha0AzDZLdqpoMpAli5Nk&s=XvZgZ_MCsuZDBoUCGYGN5BodBNB7R5gHC8p6-CpwvNU&e=

Patch
diff mbox

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 1aaf6f7..35da76a 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -303,14 +303,12 @@  static struct ovs_list dpdk_list OVS_GUARDED_BY(dpdk_mutex)
 static struct ovs_mutex dpdk_mp_mutex OVS_ACQ_AFTER(dpdk_mutex)
     = OVS_MUTEX_INITIALIZER;
 
-static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mp_mutex)
-    = OVS_LIST_INITIALIZER(&dpdk_mp_list);
-
 struct dpdk_mp {
     struct rte_mempool *mp;
     int mtu;
     int socket_id;
-    int refcount;
+    char if_name[IFNAMSIZ];
+    unsigned mp_size;
     struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex);
 };
 
@@ -492,45 +490,81 @@  ovs_rte_pktmbuf_init(struct rte_mempool *mp OVS_UNUSED,
     dp_packet_init_dpdk((struct dp_packet *) pkt, pkt->buf_len);
 }
 
+/*
+ * Full DPDK memory pool name must be unique
+ * and cannot be longer than RTE_MEMPOOL_NAMESIZE
+ */
+static char *
+dpdk_mp_name(struct dpdk_mp *dmp)
+{
+    uint32_t h = hash_string(dmp->if_name, 0);
+    char *mp_name = xcalloc(RTE_MEMPOOL_NAMESIZE, sizeof(char));
+    int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_%x_%d_%u",
+                       h, dmp->mtu, dmp->mp_size);
+    if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) {
+        return NULL;
+    }
+    return mp_name;
+}
+
 static struct dpdk_mp *
-dpdk_mp_create(int socket_id, int mtu)
+dpdk_mp_create(struct netdev_dpdk *dev, int mtu)
 {
     struct dpdk_mp *dmp;
-    unsigned mp_size;
     char *mp_name;
+    bool mp_exists = false;
 
     dmp = dpdk_rte_mzalloc(sizeof *dmp);
     if (!dmp) {
         return NULL;
     }
-    dmp->socket_id = socket_id;
+    dmp->socket_id = dev->requested_socket_id;
     dmp->mtu = mtu;
-    dmp->refcount = 1;
-    /* XXX: this is a really rough method of provisioning memory.
-     * It's impossible to determine what the exact memory requirements are
-     * when the number of ports and rxqs that utilize a particular mempool can
-     * change dynamically at runtime. For now, use this rough heurisitic.
+    ovs_strzcpy(dmp->if_name, dev->up.name, IFNAMSIZ);
+
+    /*
+     * XXX: rough estimation of memory required for port:
+     * <packets required to fill the device rxqs>
+     * + <packets that could be stuck on other ports txqs>
+     * + <packets in the pmd threads>
+     * + <additional memory for corner cases>
      */
-    if (mtu >= ETHER_MTU) {
-        mp_size = MAX_NB_MBUF;
-    } else {
-        mp_size = MIN_NB_MBUF;
-    }
+    dmp->mp_size = dev->requested_n_rxq * dev->requested_rxq_size
+            + dev->requested_n_txq * dev->requested_txq_size
+            + MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST
+            + MIN_NB_MBUF;
 
     do {
-        mp_name = xasprintf("ovs_mp_%d_%d_%u", dmp->mtu, dmp->socket_id,
-                            mp_size);
+        mp_name = dpdk_mp_name(dmp);
+
+        VLOG_DBG("Requesting a mempool of %u mbufs for netdev %s "
+                 "with %d Rx and %d Tx queues.",
+                 dmp->mp_size, dev->up.name,
+                 dev->requested_n_rxq, dev->requested_n_txq);
 
-        dmp->mp = rte_pktmbuf_pool_create(mp_name, mp_size,
+        dmp->mp = rte_pktmbuf_pool_create(mp_name, dmp->mp_size,
                                           MP_CACHE_SZ,
                                           sizeof (struct dp_packet)
                                                  - sizeof (struct rte_mbuf),
                                           MBUF_SIZE(mtu)
                                                  - sizeof(struct dp_packet),
-                                          socket_id);
+                                          dmp->socket_id);
         if (dmp->mp) {
             VLOG_DBG("Allocated \"%s\" mempool with %u mbufs",
-                     mp_name, mp_size);
+                     mp_name, dmp->mp_size);
+        } else if (rte_errno == EEXIST) {
+            /* A mempool with the same name already exists.  We just
+             * retrieve its pointer to be returned to the caller. */
+            dmp->mp = rte_mempool_lookup(mp_name);
+            VLOG_DBG("A mempool with name %s already exists at %p.",
+                    mp_name, dmp->mp);
+            /* As the mempool create returned EEXIST we can expect the
+             * lookup has returned a valid pointer.  If for some reason
+             * that's not the case we keep track of it. */
+            mp_exists = true;
+        } else {
+            VLOG_DBG("Mempool create failed with error: %s",
+                    rte_strerror(rte_errno));
         }
         free(mp_name);
         if (dmp->mp) {
@@ -541,31 +575,20 @@  dpdk_mp_create(int socket_id, int mtu)
             rte_mempool_obj_iter(dmp->mp, ovs_rte_pktmbuf_init, NULL);
             return dmp;
         }
-    } while (rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF);
+    } while (!mp_exists &&
+            (rte_errno == ENOMEM && (dmp->mp_size /= 2) >= MIN_NB_MBUF));
 
     rte_free(dmp);
     return NULL;
 }
 
 static struct dpdk_mp *
-dpdk_mp_get(int socket_id, int mtu)
+dpdk_mp_get(struct netdev_dpdk *dev, int mtu)
 {
     struct dpdk_mp *dmp;
 
     ovs_mutex_lock(&dpdk_mp_mutex);
-    LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
-        if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
-            dmp->refcount++;
-            goto out;
-        }
-    }
-
-    dmp = dpdk_mp_create(socket_id, mtu);
-    if (dmp) {
-        ovs_list_push_back(&dpdk_mp_list, &dmp->list_node);
-    }
-
-out:
+    dmp = dpdk_mp_create(dev, mtu);
     ovs_mutex_unlock(&dpdk_mp_mutex);
 
     return dmp;
@@ -574,18 +597,18 @@  out:
 static void
 dpdk_mp_put(struct dpdk_mp *dmp)
 {
+    char *mp_name;
+
     if (!dmp) {
         return;
     }
 
     ovs_mutex_lock(&dpdk_mp_mutex);
-    ovs_assert(dmp->refcount);
-
-    if (!--dmp->refcount) {
-        ovs_list_remove(&dmp->list_node);
-        rte_mempool_free(dmp->mp);
-        rte_free(dmp);
-    }
+    mp_name = dpdk_mp_name(dmp);
+    VLOG_DBG("Releasing \"%s\" mempool", mp_name);
+    free(mp_name);
+    rte_mempool_free(dmp->mp);
+    rte_free(dmp);
     ovs_mutex_unlock(&dpdk_mp_mutex);
 }
 
@@ -600,7 +623,7 @@  netdev_dpdk_mempool_configure(struct netdev_dpdk *dev)
     uint32_t buf_size = dpdk_buf_size(dev->requested_mtu);
     struct dpdk_mp *mp;
 
-    mp = dpdk_mp_get(dev->requested_socket_id, FRAME_LEN_TO_MTU(buf_size));
+    mp = dpdk_mp_get(dev, FRAME_LEN_TO_MTU(buf_size));
     if (!mp) {
         VLOG_ERR("Failed to create memory pool for netdev "
                  "%s, with MTU %d on socket %d: %s\n",
@@ -3173,12 +3196,9 @@  netdev_dpdk_reconfigure(struct netdev *netdev)
 
     rte_eth_dev_stop(dev->port_id);
 
-    if (dev->mtu != dev->requested_mtu
-        || dev->socket_id != dev->requested_socket_id) {
-        err = netdev_dpdk_mempool_configure(dev);
-        if (err) {
-            goto out;
-        }
+    err = netdev_dpdk_mempool_configure(dev);
+    if (err) {
+        goto out;
     }
 
     netdev->n_txq = dev->requested_n_txq;
@@ -3216,14 +3236,11 @@  dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
 
     netdev_dpdk_remap_txqs(dev);
 
-    if (dev->requested_socket_id != dev->socket_id
-        || dev->requested_mtu != dev->mtu) {
-        err = netdev_dpdk_mempool_configure(dev);
-        if (err) {
-            return err;
-        } else {
-            netdev_change_seq_changed(&dev->up);
-        }
+    err = netdev_dpdk_mempool_configure(dev);
+    if (err) {
+        return err;
+    } else {
+        netdev_change_seq_changed(&dev->up);
     }
 
     if (netdev_dpdk_get_vid(dev) >= 0) {