Message ID | 20220113082317.2329953-1-huanghuibai@didiglobal.com |
---|---|
State | Changes Requested |
Headers | show |
Series | [ovs-dev] netdev-dpdk: prepare for tso offload for tx copy packet | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | warning | apply and check: warning |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
Bleep bloop. Greetings Harold Huang, I am a robot and I have tried out your patch. Thanks for your contribution. I encountered some error that I wasn't expecting. See the details below. checkpatch: WARNING: Line is 80 characters long (recommended limit is 79) #26 FILE: lib/netdev-dpdk.c:2718: dpdk_copy_dp_packet_to_mbuf(struct netdev_dpdk *dev, struct dp_packet *pkt_orig) Lines checked: 61, Warnings: 1, Errors: 0 Please check this out. If you feel there has been an error, please email aconole@redhat.com Thanks, 0-day Robot
Hi, Harold, I am curious how this bug is found as I have an unsolved bug that I believe it's related to some memory issues and may be related to NIC's problem. Harold Huang <baymaxhuang@gmail.com> 于2022年1月13日周四 16:24写道: > From: Harold Huang <baymaxhuang@gmail.com> > > When one flow is output to multiple egress ports, OVS copy the packets > and send the copy packets to the intermediate ports. The original packets > is sent to the last port. If the intermediate port is a dpdk port, the copy > packets should also be prepared for tso offload. > > Fixes: 29cf9c1b3b ("userspace: Add TCP Segmentation Offload support") > Signed-off-by: Harold Huang <baymaxhuang@gmail.com> > --- > lib/netdev-dpdk.c | 13 ++++++------- > 1 file changed, 6 insertions(+), 7 deletions(-) > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 6782d3e8f..83029405e 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -2737,10 +2737,11 @@ dpdk_pktmbuf_alloc(struct rte_mempool *mp, > uint32_t data_len) > } > > static struct dp_packet * > -dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet > *pkt_orig) > +dpdk_copy_dp_packet_to_mbuf(struct netdev_dpdk *dev, struct dp_packet > *pkt_orig) > { > struct rte_mbuf *mbuf_dest; > struct dp_packet *pkt_dest; > + struct rte_mempool *mp = dev->dpdk_mp->mp; > uint32_t pkt_len; > > pkt_len = dp_packet_size(pkt_orig); > @@ -2761,11 +2762,9 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, > struct dp_packet *pkt_orig) > memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size, > sizeof(struct dp_packet) - offsetof(struct dp_packet, > l2_pad_size)); > > - if (mbuf_dest->ol_flags & RTE_MBUF_F_TX_L4_MASK) { > - mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest) > - - (char *)dp_packet_eth(pkt_dest); > - mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest) > - - (char *) dp_packet_l3(pkt_dest); > + if (!netdev_dpdk_prep_hwol_packet(dev, mbuf_dest)) { > perhaps check userspace_tso_enabled()? > + rte_pktmbuf_free(mbuf_dest); > + return NULL; > } > > return pkt_dest; > @@ -2813,7 +2812,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, > struct dp_packet_batch *batch) > continue; > } > > - pkts[txcnt] = dpdk_copy_dp_packet_to_mbuf(dev->dpdk_mp->mp, > packet); > + pkts[txcnt] = dpdk_copy_dp_packet_to_mbuf(dev, packet); > if (OVS_UNLIKELY(!pkts[txcnt])) { > dropped = cnt - i; > break; > -- > 2.27.0 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >
On Thu, Jan 13, 2022 at 04:23:17PM +0800, Harold Huang wrote: > From: Harold Huang <baymaxhuang@gmail.com> > > When one flow is output to multiple egress ports, OVS copy the packets > and send the copy packets to the intermediate ports. The original packets > is sent to the last port. If the intermediate port is a dpdk port, the copy > packets should also be prepared for tso offload. > > Fixes: 29cf9c1b3b ("userspace: Add TCP Segmentation Offload support") > Signed-off-by: Harold Huang <baymaxhuang@gmail.com> > --- > lib/netdev-dpdk.c | 13 ++++++------- > 1 file changed, 6 insertions(+), 7 deletions(-) > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 6782d3e8f..83029405e 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -2737,10 +2737,11 @@ dpdk_pktmbuf_alloc(struct rte_mempool *mp, uint32_t data_len) > } > > static struct dp_packet * > -dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig) > +dpdk_copy_dp_packet_to_mbuf(struct netdev_dpdk *dev, struct dp_packet *pkt_orig) > { > struct rte_mbuf *mbuf_dest; > struct dp_packet *pkt_dest; > + struct rte_mempool *mp = dev->dpdk_mp->mp; > uint32_t pkt_len; > > pkt_len = dp_packet_size(pkt_orig); > @@ -2761,11 +2762,9 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig) > memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size, > sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size)); > > - if (mbuf_dest->ol_flags & RTE_MBUF_F_TX_L4_MASK) { > - mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest) > - - (char *)dp_packet_eth(pkt_dest); > - mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest) > - - (char *) dp_packet_l3(pkt_dest); > + if (!netdev_dpdk_prep_hwol_packet(dev, mbuf_dest)) { > + rte_pktmbuf_free(mbuf_dest); > + return NULL; What happens if a packet comes from a non-DPDK port and goes to a vhost-user port? I think we will get into this: netdev_dpdk_vhost_send() \-- (not a DPDK packet) \-- dpdk_do_tx_copy() \-- dpdk_copy_dp_packet_to_mbuf() | \-- netdev_dpdk_prep_hwol_packet() <-- | \-- __netdev_dpdk_vhost_send() \-- netdev_dpdk_prep_hwol_batch() \-- netdev_dpdk_prep_hwol_packet() <-- I think we will prepare the same packet twice. BTW, this bug should be fixed by the patch here: https://patchwork.ozlabs.org/project/openvswitch/patch/20210110030505.325722-1-fbl@sysclose.org/#2610119 fbl > } > > return pkt_dest; > @@ -2813,7 +2812,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) > continue; > } > > - pkts[txcnt] = dpdk_copy_dp_packet_to_mbuf(dev->dpdk_mp->mp, packet); > + pkts[txcnt] = dpdk_copy_dp_packet_to_mbuf(dev, packet); > if (OVS_UNLIKELY(!pkts[txcnt])) { > dropped = cnt - i; > break; > -- > 2.27.0 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Hi, He Peng, 贺鹏 <xnhp0320@gmail.com> 于2022年1月18日周二 12:43写道: > > Hi, Harold, > > I am curious how this bug is found as I have an unsolved bug that I believe it's related to > some memory issues and may be related to NIC's problem. > I find this problem when I use ovs-tcpdump tool to capture the jumbo frames in the physical dpdk port which supports TSO for the jumbo frames. The jumbo frame is from a virtio-user port and the backend if virtio-user is the vhost-net in the host. Ovs-tcpdump will create a dummy port and the dpcls flow will be modified with multiple output ports. I think a jumbo frame from vhost-user port would have the same problem. > > Harold Huang <baymaxhuang@gmail.com> 于2022年1月13日周四 16:24写道: >> >> From: Harold Huang <baymaxhuang@gmail.com> >> >> When one flow is output to multiple egress ports, OVS copy the packets >> and send the copy packets to the intermediate ports. The original packets >> is sent to the last port. If the intermediate port is a dpdk port, the copy >> packets should also be prepared for tso offload. >> >> Fixes: 29cf9c1b3b ("userspace: Add TCP Segmentation Offload support") >> Signed-off-by: Harold Huang <baymaxhuang@gmail.com> >> --- >> lib/netdev-dpdk.c | 13 ++++++------- >> 1 file changed, 6 insertions(+), 7 deletions(-) >> >> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c >> index 6782d3e8f..83029405e 100644 >> --- a/lib/netdev-dpdk.c >> +++ b/lib/netdev-dpdk.c >> @@ -2737,10 +2737,11 @@ dpdk_pktmbuf_alloc(struct rte_mempool *mp, uint32_t data_len) >> } >> >> static struct dp_packet * >> -dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig) >> +dpdk_copy_dp_packet_to_mbuf(struct netdev_dpdk *dev, struct dp_packet *pkt_orig) >> { >> struct rte_mbuf *mbuf_dest; >> struct dp_packet *pkt_dest; >> + struct rte_mempool *mp = dev->dpdk_mp->mp; >> uint32_t pkt_len; >> >> pkt_len = dp_packet_size(pkt_orig); >> @@ -2761,11 +2762,9 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig) >> memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size, >> sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size)); >> >> - if (mbuf_dest->ol_flags & RTE_MBUF_F_TX_L4_MASK) { >> - mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest) >> - - (char *)dp_packet_eth(pkt_dest); >> - mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest) >> - - (char *) dp_packet_l3(pkt_dest); >> + if (!netdev_dpdk_prep_hwol_packet(dev, mbuf_dest)) { > > > perhaps check userspace_tso_enabled()? Yes, but the old code may only want to support the checksum offload. I think checking userspace_tso_enabled() in netdev_dpdk_prep_hwol_packet() is a better solution. Flavio’s patch could also fix this bug. > >> >> + rte_pktmbuf_free(mbuf_dest); >> + return NULL; >> } >> >> return pkt_dest; >> @@ -2813,7 +2812,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) >> continue; >> } >> >> - pkts[txcnt] = dpdk_copy_dp_packet_to_mbuf(dev->dpdk_mp->mp, packet); >> + pkts[txcnt] = dpdk_copy_dp_packet_to_mbuf(dev, packet); >> if (OVS_UNLIKELY(!pkts[txcnt])) { >> dropped = cnt - i; >> break; >> -- >> 2.27.0 >> >> _______________________________________________ >> dev mailing list >> dev@openvswitch.org >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev > > > > -- > hepeng
Hi, Harold Thanks for the details. I have found my ovs has been patched. So it does not have this issue. But thanks anyway. Btw, i have tried Flavio ’s patch, it is very clean and I have merged it in our own ovs. It has a minor issues that it counts the hwol drop into tx failure error which is different to the original logic. Harold Huang <baymaxhuang@gmail.com>于2022年1月21日 周五下午5:32写道: > Hi, He Peng, > > 贺鹏 <xnhp0320@gmail.com> 于2022年1月18日周二 12:43写道: > > > > > Hi, Harold, > > > > I am curious how this bug is found as I have an unsolved bug that I > believe it's related to > > some memory issues and may be related to NIC's problem. > > > > I find this problem when I use ovs-tcpdump tool to capture the jumbo > frames in the physical dpdk port which supports TSO for the jumbo > frames. The jumbo frame is from a virtio-user port and the backend if > virtio-user is the vhost-net in the host. Ovs-tcpdump will create a > dummy port and the dpcls flow will be modified with multiple output > ports. I think a jumbo frame from vhost-user port would have the same > problem. > > > > > Harold Huang <baymaxhuang@gmail.com> 于2022年1月13日周四 16:24写道: > >> > >> From: Harold Huang <baymaxhuang@gmail.com> > >> > >> When one flow is output to multiple egress ports, OVS copy the packets > >> and send the copy packets to the intermediate ports. The original > packets > >> is sent to the last port. If the intermediate port is a dpdk port, the > copy > >> packets should also be prepared for tso offload. > >> > >> Fixes: 29cf9c1b3b ("userspace: Add TCP Segmentation Offload support") > >> Signed-off-by: Harold Huang <baymaxhuang@gmail.com> > >> --- > >> lib/netdev-dpdk.c | 13 ++++++------- > >> 1 file changed, 6 insertions(+), 7 deletions(-) > >> > >> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > >> index 6782d3e8f..83029405e 100644 > >> --- a/lib/netdev-dpdk.c > >> +++ b/lib/netdev-dpdk.c > >> @@ -2737,10 +2737,11 @@ dpdk_pktmbuf_alloc(struct rte_mempool *mp, > uint32_t data_len) > >> } > >> > >> static struct dp_packet * > >> -dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet > *pkt_orig) > >> +dpdk_copy_dp_packet_to_mbuf(struct netdev_dpdk *dev, struct dp_packet > *pkt_orig) > >> { > >> struct rte_mbuf *mbuf_dest; > >> struct dp_packet *pkt_dest; > >> + struct rte_mempool *mp = dev->dpdk_mp->mp; > >> uint32_t pkt_len; > >> > >> pkt_len = dp_packet_size(pkt_orig); > >> @@ -2761,11 +2762,9 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool > *mp, struct dp_packet *pkt_orig) > >> memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size, > >> sizeof(struct dp_packet) - offsetof(struct dp_packet, > l2_pad_size)); > >> > >> - if (mbuf_dest->ol_flags & RTE_MBUF_F_TX_L4_MASK) { > >> - mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest) > >> - - (char *)dp_packet_eth(pkt_dest); > >> - mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest) > >> - - (char *) dp_packet_l3(pkt_dest); > >> + if (!netdev_dpdk_prep_hwol_packet(dev, mbuf_dest)) { > > > > > > perhaps check userspace_tso_enabled()? > > Yes, but the old code may only want to support the checksum offload. I > think checking userspace_tso_enabled() in > netdev_dpdk_prep_hwol_packet() is a better solution. Flavio’s patch > could also fix this bug. > > > > >> > >> + rte_pktmbuf_free(mbuf_dest); > >> + return NULL; > >> } > >> > >> return pkt_dest; > >> @@ -2813,7 +2812,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, > struct dp_packet_batch *batch) > >> continue; > >> } > >> > >> - pkts[txcnt] = dpdk_copy_dp_packet_to_mbuf(dev->dpdk_mp->mp, > packet); > >> + pkts[txcnt] = dpdk_copy_dp_packet_to_mbuf(dev, packet); > >> if (OVS_UNLIKELY(!pkts[txcnt])) { > >> dropped = cnt - i; > >> break; > >> -- > >> 2.27.0 > >> > >> _______________________________________________ > >> dev mailing list > >> dev@openvswitch.org > >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev > > > > > > > > -- > > hepeng >
Sorry it counts the copy to mbuf error into tx failure error which is different to the original logic Peng He <xnhp0320@gmail.com>于2022年1月21日 周五下午10:53写道: > Hi, Harold > > Thanks for the details. > I have found my ovs has been patched. > So it does not have this issue. > > But thanks anyway. > > Btw, i have tried Flavio ’s patch, it is very clean and I have merged it > in our own ovs. > > It has a minor issues that it counts the hwol drop into tx failure error > which is different to the original logic. > > > > > Harold Huang <baymaxhuang@gmail.com>于2022年1月21日 周五下午5:32写道: > >> Hi, He Peng, >> >> 贺鹏 <xnhp0320@gmail.com> 于2022年1月18日周二 12:43写道: >> >> > >> > Hi, Harold, >> > >> > I am curious how this bug is found as I have an unsolved bug that I >> believe it's related to >> > some memory issues and may be related to NIC's problem. >> > >> >> I find this problem when I use ovs-tcpdump tool to capture the jumbo >> frames in the physical dpdk port which supports TSO for the jumbo >> frames. The jumbo frame is from a virtio-user port and the backend if >> virtio-user is the vhost-net in the host. Ovs-tcpdump will create a >> dummy port and the dpcls flow will be modified with multiple output >> ports. I think a jumbo frame from vhost-user port would have the same >> problem. >> >> > >> > Harold Huang <baymaxhuang@gmail.com> 于2022年1月13日周四 16:24写道: >> >> >> >> From: Harold Huang <baymaxhuang@gmail.com> >> >> >> >> When one flow is output to multiple egress ports, OVS copy the packets >> >> and send the copy packets to the intermediate ports. The original >> packets >> >> is sent to the last port. If the intermediate port is a dpdk port, the >> copy >> >> packets should also be prepared for tso offload. >> >> >> >> Fixes: 29cf9c1b3b ("userspace: Add TCP Segmentation Offload support") >> >> Signed-off-by: Harold Huang <baymaxhuang@gmail.com> >> >> --- >> >> lib/netdev-dpdk.c | 13 ++++++------- >> >> 1 file changed, 6 insertions(+), 7 deletions(-) >> >> >> >> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c >> >> index 6782d3e8f..83029405e 100644 >> >> --- a/lib/netdev-dpdk.c >> >> +++ b/lib/netdev-dpdk.c >> >> @@ -2737,10 +2737,11 @@ dpdk_pktmbuf_alloc(struct rte_mempool *mp, >> uint32_t data_len) >> >> } >> >> >> >> static struct dp_packet * >> >> -dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet >> *pkt_orig) >> >> +dpdk_copy_dp_packet_to_mbuf(struct netdev_dpdk *dev, struct dp_packet >> *pkt_orig) >> >> { >> >> struct rte_mbuf *mbuf_dest; >> >> struct dp_packet *pkt_dest; >> >> + struct rte_mempool *mp = dev->dpdk_mp->mp; >> >> uint32_t pkt_len; >> >> >> >> pkt_len = dp_packet_size(pkt_orig); >> >> @@ -2761,11 +2762,9 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool >> *mp, struct dp_packet *pkt_orig) >> >> memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size, >> >> sizeof(struct dp_packet) - offsetof(struct dp_packet, >> l2_pad_size)); >> >> >> >> - if (mbuf_dest->ol_flags & RTE_MBUF_F_TX_L4_MASK) { >> >> - mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest) >> >> - - (char *)dp_packet_eth(pkt_dest); >> >> - mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest) >> >> - - (char *) dp_packet_l3(pkt_dest); >> >> + if (!netdev_dpdk_prep_hwol_packet(dev, mbuf_dest)) { >> > >> > >> > perhaps check userspace_tso_enabled()? >> >> Yes, but the old code may only want to support the checksum offload. I >> think checking userspace_tso_enabled() in >> netdev_dpdk_prep_hwol_packet() is a better solution. Flavio’s patch >> could also fix this bug. >> >> > >> >> >> >> + rte_pktmbuf_free(mbuf_dest); >> >> + return NULL; >> >> } >> >> >> >> return pkt_dest; >> >> @@ -2813,7 +2812,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, >> struct dp_packet_batch *batch) >> >> continue; >> >> } >> >> >> >> - pkts[txcnt] = dpdk_copy_dp_packet_to_mbuf(dev->dpdk_mp->mp, >> packet); >> >> + pkts[txcnt] = dpdk_copy_dp_packet_to_mbuf(dev, packet); >> >> if (OVS_UNLIKELY(!pkts[txcnt])) { >> >> dropped = cnt - i; >> >> break; >> >> -- >> >> 2.27.0 >> >> >> >> _______________________________________________ >> >> dev mailing list >> >> dev@openvswitch.org >> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev >> > >> > >> > >> > -- >> > hepeng >> > -- > hepeng >
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 6782d3e8f..83029405e 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -2737,10 +2737,11 @@ dpdk_pktmbuf_alloc(struct rte_mempool *mp, uint32_t data_len) } static struct dp_packet * -dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig) +dpdk_copy_dp_packet_to_mbuf(struct netdev_dpdk *dev, struct dp_packet *pkt_orig) { struct rte_mbuf *mbuf_dest; struct dp_packet *pkt_dest; + struct rte_mempool *mp = dev->dpdk_mp->mp; uint32_t pkt_len; pkt_len = dp_packet_size(pkt_orig); @@ -2761,11 +2762,9 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig) memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size, sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size)); - if (mbuf_dest->ol_flags & RTE_MBUF_F_TX_L4_MASK) { - mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest) - - (char *)dp_packet_eth(pkt_dest); - mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest) - - (char *) dp_packet_l3(pkt_dest); + if (!netdev_dpdk_prep_hwol_packet(dev, mbuf_dest)) { + rte_pktmbuf_free(mbuf_dest); + return NULL; } return pkt_dest; @@ -2813,7 +2812,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) continue; } - pkts[txcnt] = dpdk_copy_dp_packet_to_mbuf(dev->dpdk_mp->mp, packet); + pkts[txcnt] = dpdk_copy_dp_packet_to_mbuf(dev, packet); if (OVS_UNLIKELY(!pkts[txcnt])) { dropped = cnt - i; break;