Message ID | 1527094048-105084-12-git-send-email-tiago.lam@intel.com |
---|---|
State | Superseded |
Headers | show |
Series | Support multi-segment mbufs | expand |
> > From: Mark Kavanagh <mark.b.kavanagh@intel.com> > > Currently, packets are only copied to a single segment in > the function dpdk_do_tx_copy(). This could be an issue in > the case of jumbo frames, particularly when multi-segment > mbufs are involved. > > This patch calculates the number of segments needed by a > packet and copies the data to each segment. > > Co-authored-by: Michael Qiu <qiudayu@chinac.com> > > Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com> > Signed-off-by: Michael Qiu <qiudayu@chinac.com> > --- > lib/netdev-dpdk.c | 78 > ++++++++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 68 insertions(+), 10 deletions(-) > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index d3abdde..c6dfe6d 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -2178,6 +2178,71 @@ out: > } > } > > +static int > +dpdk_prep_tx_buf(struct dp_packet *packet, struct rte_mbuf **head, > + struct rte_mempool *mp) > +{ > + struct rte_mbuf *temp; > + uint32_t size = dp_packet_size(packet); > + uint16_t max_data_len, data_len; > + uint32_t nb_segs = 0; > + int i; > + > + temp = *head = rte_pktmbuf_alloc(mp); > + if (OVS_UNLIKELY(!temp)) { > + return 1; > + } > + > + /* All new allocated mbuf's max data len is the same */ > + max_data_len = temp->buf_len - temp->data_off; > + > + /* Calculate # of output mbufs. */ > + nb_segs = size / max_data_len; > + if (size % max_data_len) { > + nb_segs = nb_segs + 1; > + } > + > + /* Allocate additional mbufs when multiple output mbufs required. */ > + for (i = 1; i < nb_segs; i++) { > + temp->next = rte_pktmbuf_alloc(mp); > + if (!temp->next) { > + rte_pktmbuf_free(*head); > + *head = NULL; > + break; > + } > + temp = temp->next; > + } > + /* We have to do a copy for now */ > + rte_pktmbuf_pkt_len(*head) = size; > + temp = *head; > + > + data_len = size < max_data_len ? size: max_data_len; > + if (packet->source == DPBUF_DPDK) { > + *head = &(packet->mbuf); > + while (temp && head && size > 0) { > + rte_memcpy(rte_pktmbuf_mtod(temp, void *), > + dp_packet_data((struct dp_packet *)head), data_len); > + rte_pktmbuf_data_len(temp) = data_len; > + *head = (*head)->next; > + size = size - data_len; > + data_len = size < max_data_len ? size: max_data_len; > + temp = temp->next; > + } > + } else { > + int offset = 0; > + while (temp && size > 0) { > + memcpy(rte_pktmbuf_mtod(temp, void *), > + dp_packet_at(packet, offset, data_len), data_len); > + rte_pktmbuf_data_len(temp) = data_len; > + temp = temp->next; > + size = size - data_len; > + offset += data_len; > + data_len = size < max_data_len ? size: max_data_len; > + } > + } > + return 0; > +} > + > /* Tx function. Transmit packets indefinitely */ > static void > dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch > *batch) > @@ -2194,6 +2259,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, > struct dp_packet_batch *batch) > struct rte_mbuf *pkts[PKT_ARRAY_SIZE]; > uint32_t cnt = batch_cnt; > uint32_t dropped = 0; > + uint32_t i; > > if (dev->type != DPDK_DEV_VHOST) { > /* Check if QoS has been configured for this netdev. */ > @@ -2204,27 +2270,19 @@ dpdk_do_tx_copy(struct netdev *netdev, int > qid, struct dp_packet_batch *batch) > > uint32_t txcnt = 0; > > - for (uint32_t i = 0; i < cnt; i++) { > + for (i = 0; i < cnt; i++) { > struct dp_packet *packet = batch->packets[i]; > uint32_t size = dp_packet_size(packet); > - > if (OVS_UNLIKELY(size > dev->max_packet_len)) { > VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d", > size, dev->max_packet_len); > - > dropped++; > continue; > } > - > - pkts[txcnt] = rte_pktmbuf_alloc(dev->mp); > - if (OVS_UNLIKELY(!pkts[txcnt])) { > + if (!dpdk_prep_tx_buf(packet, &pkts[txcnt], dev->mp)) { We should enter here if the return is not zero. > dropped += cnt - i; > break; > } > - > - /* We have to do a copy for now */ > - memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *), > - dp_packet_data(packet), size); > dp_packet_set_size((struct dp_packet *)pkts[txcnt], size); > dp_packet_copy_mbuf_flags((struct dp_packet *)pkts[txcnt], packet); > > -- > 2.7.4 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
On 23.05.2018 19:47, Tiago Lam wrote: > From: Mark Kavanagh <mark.b.kavanagh@intel.com> > > Currently, packets are only copied to a single segment in > the function dpdk_do_tx_copy(). This could be an issue in > the case of jumbo frames, particularly when multi-segment > mbufs are involved. > > This patch calculates the number of segments needed by a > packet and copies the data to each segment. > > Co-authored-by: Michael Qiu <qiudayu@chinac.com> > > Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com> > Signed-off-by: Michael Qiu <qiudayu@chinac.com> > --- > lib/netdev-dpdk.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 68 insertions(+), 10 deletions(-) > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index d3abdde..c6dfe6d 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -2178,6 +2178,71 @@ out: > } > } > > +static int > +dpdk_prep_tx_buf(struct dp_packet *packet, struct rte_mbuf **head, > + struct rte_mempool *mp) > +{ > + struct rte_mbuf *temp; > + uint32_t size = dp_packet_size(packet); > + uint16_t max_data_len, data_len; > + uint32_t nb_segs = 0; > + int i; > + > + temp = *head = rte_pktmbuf_alloc(mp); All the functions around OVS code *must* use 'dpdk_buf_alloc()' instead of 'rte_pktmbuf_alloc()'. Otherwise, the introduced 'nonpmd_mp_mutex' is useless. Also, all the calls to 'rte_pktmbuf_free()' must be replaced with 'free_dpdk_buf()' for the same reason. And it's better to do this right in the patch 08/13 and maintain in the subsequent patches. > + if (OVS_UNLIKELY(!temp)) { > + return 1; > + } > + > + /* All new allocated mbuf's max data len is the same */ > + max_data_len = temp->buf_len - temp->data_off; > + > + /* Calculate # of output mbufs. */ > + nb_segs = size / max_data_len; > + if (size % max_data_len) { > + nb_segs = nb_segs + 1; > + } > + > + /* Allocate additional mbufs when multiple output mbufs required. */ > + for (i = 1; i < nb_segs; i++) { > + temp->next = rte_pktmbuf_alloc(mp); > + if (!temp->next) { > + rte_pktmbuf_free(*head); > + *head = NULL; > + break; > + } > + temp = temp->next; > + } > + /* We have to do a copy for now */ > + rte_pktmbuf_pkt_len(*head) = size; > + temp = *head; > + > + data_len = size < max_data_len ? size: max_data_len; > + if (packet->source == DPBUF_DPDK) { > + *head = &(packet->mbuf); > + while (temp && head && size > 0) { > + rte_memcpy(rte_pktmbuf_mtod(temp, void *), > + dp_packet_data((struct dp_packet *)head), data_len); > + rte_pktmbuf_data_len(temp) = data_len; > + *head = (*head)->next; > + size = size - data_len; > + data_len = size < max_data_len ? size: max_data_len; > + temp = temp->next; > + } > + } else { > + int offset = 0; > + while (temp && size > 0) { > + memcpy(rte_pktmbuf_mtod(temp, void *), > + dp_packet_at(packet, offset, data_len), data_len); > + rte_pktmbuf_data_len(temp) = data_len; > + temp = temp->next; > + size = size - data_len; > + offset += data_len; > + data_len = size < max_data_len ? size: max_data_len; > + } > + } > + return 0; > +} > + > /* Tx function. Transmit packets indefinitely */ > static void > dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) > @@ -2194,6 +2259,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) > struct rte_mbuf *pkts[PKT_ARRAY_SIZE]; > uint32_t cnt = batch_cnt; > uint32_t dropped = 0; > + uint32_t i; > > if (dev->type != DPDK_DEV_VHOST) { > /* Check if QoS has been configured for this netdev. */ > @@ -2204,27 +2270,19 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) > > uint32_t txcnt = 0; > > - for (uint32_t i = 0; i < cnt; i++) { > + for (i = 0; i < cnt; i++) { > struct dp_packet *packet = batch->packets[i]; > uint32_t size = dp_packet_size(packet); > - > if (OVS_UNLIKELY(size > dev->max_packet_len)) { > VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d", > size, dev->max_packet_len); > - > dropped++; > continue; > } > - > - pkts[txcnt] = rte_pktmbuf_alloc(dev->mp); > - if (OVS_UNLIKELY(!pkts[txcnt])) { > + if (!dpdk_prep_tx_buf(packet, &pkts[txcnt], dev->mp)) { > dropped += cnt - i; > break; > } > - > - /* We have to do a copy for now */ > - memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *), > - dp_packet_data(packet), size); > dp_packet_set_size((struct dp_packet *)pkts[txcnt], size); > dp_packet_copy_mbuf_flags((struct dp_packet *)pkts[txcnt], packet); > >
On 05/06/2018 13:03, Ilya Maximets wrote: > On 23.05.2018 19:47, Tiago Lam wrote: >> From: Mark Kavanagh <mark.b.kavanagh@intel.com> >> >> Currently, packets are only copied to a single segment in >> the function dpdk_do_tx_copy(). This could be an issue in >> the case of jumbo frames, particularly when multi-segment >> mbufs are involved. >> >> This patch calculates the number of segments needed by a >> packet and copies the data to each segment. >> >> Co-authored-by: Michael Qiu <qiudayu@chinac.com> >> >> Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com> >> Signed-off-by: Michael Qiu <qiudayu@chinac.com> >> --- >> lib/netdev-dpdk.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++------- >> 1 file changed, 68 insertions(+), 10 deletions(-) >> >> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c >> index d3abdde..c6dfe6d 100644 >> --- a/lib/netdev-dpdk.c >> +++ b/lib/netdev-dpdk.c >> @@ -2178,6 +2178,71 @@ out: >> } >> } >> >> +static int >> +dpdk_prep_tx_buf(struct dp_packet *packet, struct rte_mbuf **head, >> + struct rte_mempool *mp) >> +{ >> + struct rte_mbuf *temp; >> + uint32_t size = dp_packet_size(packet); >> + uint16_t max_data_len, data_len; >> + uint32_t nb_segs = 0; >> + int i; >> + >> + temp = *head = rte_pktmbuf_alloc(mp); > > All the functions around OVS code *must* use 'dpdk_buf_alloc()' instead > of 'rte_pktmbuf_alloc()'. Otherwise, the introduced 'nonpmd_mp_mutex' > is useless. > > Also, all the calls to 'rte_pktmbuf_free()' must be replaced with > 'free_dpdk_buf()' for the same reason. > And it's better to do this right in the patch 08/13 and maintain in > the subsequent patches. > Hi Ilya, Thanks for pointing that out. I did miss the call to `free_dpdk_buf()` in `dp_packet_set_size()` in patch 04/13. However, from what I can see allocating mbufs directly from 'rte_pktmbuf_free()' was already happening before this patch, in `dpdk_do_tx_copy()`. This patch just happens to allocate more mbufs if multi-segment mbufs are being used. Now, this could have been introduced in the past and been overlooked, but it seems `dpdk_do_tx_copy()` is only called from `netdev_dpdk_vhost_send()` and `netdev_dpdk_send__()`, and that seems to be called with the `non_pmd_mutex` mutex held already (in dpif_netdev.c). Am I missing other call path here? Otherwise it seems to be safe. I'm just confirming that this is the case though, as your suggestions do make sense to keep the code uniform, since we now have the wrappers around the `nonpm_mp_mutex` mutex (and because it shouldn't increase contention, since the `dp->non_pmd_mutex` should already be held for non-pmds for this call path). I'll include it for the next iteration. Regards, Tiago.
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index d3abdde..c6dfe6d 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -2178,6 +2178,71 @@ out: } } +static int +dpdk_prep_tx_buf(struct dp_packet *packet, struct rte_mbuf **head, + struct rte_mempool *mp) +{ + struct rte_mbuf *temp; + uint32_t size = dp_packet_size(packet); + uint16_t max_data_len, data_len; + uint32_t nb_segs = 0; + int i; + + temp = *head = rte_pktmbuf_alloc(mp); + if (OVS_UNLIKELY(!temp)) { + return 1; + } + + /* All new allocated mbuf's max data len is the same */ + max_data_len = temp->buf_len - temp->data_off; + + /* Calculate # of output mbufs. */ + nb_segs = size / max_data_len; + if (size % max_data_len) { + nb_segs = nb_segs + 1; + } + + /* Allocate additional mbufs when multiple output mbufs required. */ + for (i = 1; i < nb_segs; i++) { + temp->next = rte_pktmbuf_alloc(mp); + if (!temp->next) { + rte_pktmbuf_free(*head); + *head = NULL; + break; + } + temp = temp->next; + } + /* We have to do a copy for now */ + rte_pktmbuf_pkt_len(*head) = size; + temp = *head; + + data_len = size < max_data_len ? size: max_data_len; + if (packet->source == DPBUF_DPDK) { + *head = &(packet->mbuf); + while (temp && head && size > 0) { + rte_memcpy(rte_pktmbuf_mtod(temp, void *), + dp_packet_data((struct dp_packet *)head), data_len); + rte_pktmbuf_data_len(temp) = data_len; + *head = (*head)->next; + size = size - data_len; + data_len = size < max_data_len ? size: max_data_len; + temp = temp->next; + } + } else { + int offset = 0; + while (temp && size > 0) { + memcpy(rte_pktmbuf_mtod(temp, void *), + dp_packet_at(packet, offset, data_len), data_len); + rte_pktmbuf_data_len(temp) = data_len; + temp = temp->next; + size = size - data_len; + offset += data_len; + data_len = size < max_data_len ? size: max_data_len; + } + } + return 0; +} + /* Tx function. Transmit packets indefinitely */ static void dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) @@ -2194,6 +2259,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) struct rte_mbuf *pkts[PKT_ARRAY_SIZE]; uint32_t cnt = batch_cnt; uint32_t dropped = 0; + uint32_t i; if (dev->type != DPDK_DEV_VHOST) { /* Check if QoS has been configured for this netdev. */ @@ -2204,27 +2270,19 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) uint32_t txcnt = 0; - for (uint32_t i = 0; i < cnt; i++) { + for (i = 0; i < cnt; i++) { struct dp_packet *packet = batch->packets[i]; uint32_t size = dp_packet_size(packet); - if (OVS_UNLIKELY(size > dev->max_packet_len)) { VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d", size, dev->max_packet_len); - dropped++; continue; } - - pkts[txcnt] = rte_pktmbuf_alloc(dev->mp); - if (OVS_UNLIKELY(!pkts[txcnt])) { + if (!dpdk_prep_tx_buf(packet, &pkts[txcnt], dev->mp)) { dropped += cnt - i; break; } - - /* We have to do a copy for now */ - memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *), - dp_packet_data(packet), size); dp_packet_set_size((struct dp_packet *)pkts[txcnt], size); dp_packet_copy_mbuf_flags((struct dp_packet *)pkts[txcnt], packet);