diff mbox series

[ovs-dev,v2] netdev-dpdk: Execute QoS Checking before copying to mbuf

Message ID 20170829113944.4073-1-sysugaozhenyu@gmail.com
State Accepted
Delegated to: Ian Stokes
Headers show
Series [ovs-dev,v2] netdev-dpdk: Execute QoS Checking before copying to mbuf | expand

Commit Message

Gao Zhenyu Aug. 29, 2017, 11:39 a.m. UTC
In dpdk_do_tx_copy function, all packets were copied to mbuf first,
but QoS checking may drop some of them.
Move the QoS checking in front of copying data to mbuf, it helps to
reduce useless copy.

Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com>
---
 lib/netdev-dpdk.c | 55 ++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 36 insertions(+), 19 deletions(-)

Comments

Stokes, Ian Aug. 30, 2017, 3:18 p.m. UTC | #1
> In dpdk_do_tx_copy function, all packets were copied to mbuf first, but
> QoS checking may drop some of them.
> Move the QoS checking in front of copying data to mbuf, it helps to reduce
> useless copy.

Hi Zhenyu, thanks for the patch, I agree with the objective of the patch but have some comments below I'd like to see addressed/tested before signing off.

> 
> Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com>
> ---
>  lib/netdev-dpdk.c | 55 ++++++++++++++++++++++++++++++++++++--------------
> -----
>  1 file changed, 36 insertions(+), 19 deletions(-)
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 1aaf6f7..50874b8
> 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -279,7 +279,7 @@ struct dpdk_qos_ops {
>       * For all QoS implementations it should always be non-null.
>       */
>      int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,
> -                   int pkt_cnt);
> +                   int pkt_cnt, bool may_steal);
>  };
> 
>  /* dpdk_qos_ops for each type of user space QoS implementation */ @@ -
> 1501,7 +1501,8 @@ netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm
> *meter,
> 
>  static int
>  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> -                        struct rte_mbuf **pkts, int pkt_cnt)
> +                        struct rte_mbuf **pkts, int pkt_cnt,
> +                        bool may_steal)
>  {
>      int i = 0;
>      int cnt = 0;
> @@ -1517,7 +1518,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> *meter,
>              }
>              cnt++;
>          } else {
> -            rte_pktmbuf_free(pkt);
> +            if (may_steal) {
> +                rte_pktmbuf_free(pkt);
> +            }
>          }
>      }
> 
> @@ -1526,12 +1529,13 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> *meter,
> 
>  static int
>  ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf
> **pkts,
> -                    int pkt_cnt)
> +                    int pkt_cnt, bool may_steal)
>  {
>      int cnt = 0;
> 
>      rte_spinlock_lock(&policer->policer_lock);
> -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
> +    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
> +                                  pkt_cnt, may_steal);
>      rte_spinlock_unlock(&policer->policer_lock);
> 
>      return cnt;
> @@ -1635,7 +1639,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
>          dropped = nb_rx;
>          nb_rx = ingress_policer_run(policer,
>                                      (struct rte_mbuf **) batch->packets,
> -                                    nb_rx);
> +                                    nb_rx, true);
>          dropped -= nb_rx;
>      }
> 
> @@ -1672,7 +1676,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct
> dp_packet_batch *batch)
>          dropped = nb_rx;
>          nb_rx = ingress_policer_run(policer,
>                                      (struct rte_mbuf **) batch->packets,
> -                                    nb_rx);
> +                                    nb_rx, true);
>          dropped -= nb_rx;
>      }
> 
> @@ -1690,13 +1694,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq,
> struct dp_packet_batch *batch)
> 
>  static inline int
>  netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
> -                    int cnt)
> +                    int cnt, bool may_steal)
>  {
>      struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev-
> >qos_conf);
> 
>      if (qos_conf) {
>          rte_spinlock_lock(&qos_conf->lock);
> -        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
> +        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);
>          rte_spinlock_unlock(&qos_conf->lock);
>      }
> 
> @@ -1770,7 +1774,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
> qid,
> 
>      cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
>      /* Check has QoS has been configured for the netdev */
> -    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
> +    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
>      dropped = total_pkts - cnt;
> 
>      do {
> @@ -1816,10 +1820,22 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> struct dp_packet_batch *batch)  #endif
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
> +    int txcnt_eth = batch->count;
>      int dropped = 0;
>      int newcnt = 0;
>      int i;
> 
> +    if (dev->type != DPDK_DEV_VHOST) {
> +        /* Check if QoS has been configured for this netdev. */
> +        txcnt_eth = netdev_dpdk_qos_run(dev,
> +                                        (struct rte_mbuf **)batch-
> >packets,
> +                                        txcnt_eth, false);

So dpdk_do_tx_copy will be called as part of netdev_dpdk_eth_send__ if its triggered by the following conditions

    if (OVS_UNLIKELY(!may_steal ||
                     batch->packets[0]->source != DPBUF_DPDK)

What will happen in above if the source of the packet is not DPBUF_DPDK?

For the most part it will be ok until 'netdev_dpdk_policer_pkt_handle()' is called later on.

This has specific DPDK calls that expect an mbuf source for the packet. 

Previously QoS took place after the packet had been copied so this was not an issue but now we have DPDK rte functions being called on a non mbuf source packet. I'm not sure of the behavior that could occur in this case. Could an incorrect length be returned for the call rte_pktmbuf_pkt_len(pkt) in 'netdev_dpdk_policer_pkt_handle()'?

It could be the case then that non DPDK specific call maybe required to attain the length of the packet in 'netdev_dpdk_policer_pkt_handle()' before calling the meter itself.

Have you tested this use case?

Ian

> +        if (txcnt_eth == 0) {
> +            dropped = batch->count;
> +            goto out;
> +        }
> +    }
> +
>      dp_packet_batch_apply_cutlen(batch);
> 
>      for (i = 0; i < batch->count; i++) { @@ -1848,21 +1864,20 @@
> dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch
> *batch)
>          rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
> 
>          newcnt++;
> +        if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {
> +            dropped += batch->count - i - 1;
> +            break;
> +        }
>      }
> 
>      if (dev->type == DPDK_DEV_VHOST) {
>          __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
>                                   newcnt);
>      } else {
> -        unsigned int qos_pkts = newcnt;
> -
> -        /* Check if QoS has been configured for this netdev. */
> -        newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt);
> -
> -        dropped += qos_pkts - newcnt;
>          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
>      }
> 
> +out:
>      if (OVS_UNLIKELY(dropped)) {
>          rte_spinlock_lock(&dev->stats_lock);
>          dev->stats.tx_dropped += dropped; @@ -1915,7 +1930,7 @@
> netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>          dp_packet_batch_apply_cutlen(batch);
> 
>          cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
> -        cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
> +        cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
>          dropped = batch->count - cnt;
> 
>          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt); @@ -
> 3132,13 +3147,15 @@ egress_policer_qos_is_equal(const struct qos_conf
> *conf,  }
> 
>  static int
> -egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> pkt_cnt)
> +egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> pkt_cnt,
> +                   bool may_steal)
>  {
>      int cnt = 0;
>      struct egress_policer *policer =
>          CONTAINER_OF(conf, struct egress_policer, qos_conf);
> 
> -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt);
> +    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> +                                  pkt_cnt, may_steal);
> 
>      return cnt;
>  }
> --
> 1.8.3.1
Gao Zhenyu Aug. 31, 2017, 1:56 a.m. UTC | #2
Here is the the testing I just did:

1. Add log in function netdev_dpdk_policer_pkt_handle
static inline
bool
netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm
*meter,
                               struct rte_mbuf *pkt, uint64_t
time)
{
    uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct
ether_hdr);
    VLOG_WARN("GGGG, payload_len:%u,
rte_pkt_len:%u",
              pkt_len, rte_pktmbuf_pkt_len(pkt));
<------------------------print out pkt_len


    return rte_meter_srtcm_color_blind_check(meter, time, pkt_len)
==

e_RTE_METER_GREEN;
}

2. rebuild a rpm, install in machine, enable DPDK.
3. Create a container on the bridge(userspace bridge). (The container use
veth device)
4. Config qos on eth dpdk eth by using : ovs-vsctl set port
dpdk-uplink-eth3 qos=@newqos -- --id=@newqos create qos
type=egress-policer other-config:cir=1250000 other-config:cbs=2048
5. execute ping on the container and monitor ovs-vswitchd.log. Then I can
see:

2017-08-31T01:39:28.036Z|00133|netdev_dpdk|WARN|GGGG, payload_len:88,
rte_pkt_len:102
2017-08-31T01:39:29.036Z|00134|netdev_dpdk|WARN|GGGG, payload_len:88,
rte_pkt_len:102
2017-08-31T01:39:30.036Z|00135|netdev_dpdk|WARN|GGGG, payload_len:88,
rte_pkt_len:102
2017-08-31T01:39:31.036Z|00136|netdev_dpdk|WARN|GGGG, payload_len:88,
rte_pkt_len:102

So the mbuf->pkt_len has a correct length.

On the code side, we have function dp_packet_set_size(), if ovs compiled
with dpdk, this function set the mbuf.data_len and mbuf.pkt_len. Many
functions consume dp_packet_set_size() like netdev_linux_rxq_recv_sock in
Netdev-linux.c

#ifdef DPDK_NETDEV
......

static inline void
dp_packet_set_size(struct dp_packet *b, uint32_t v)
{
.....
    b->mbuf.data_len = (uint16_t)v;  /* Current seg length. */
    b->mbuf.pkt_len = v;             /* Total length of all segments linked
to
                                      * this segment. */
}

Please let me know if you have any concern on it.

Thanks
Zhenyu Gao

2017-08-30 23:18 GMT+08:00 Stokes, Ian <ian.stokes@intel.com>:

> > In dpdk_do_tx_copy function, all packets were copied to mbuf first, but
> > QoS checking may drop some of them.
> > Move the QoS checking in front of copying data to mbuf, it helps to
> reduce
> > useless copy.
>
> Hi Zhenyu, thanks for the patch, I agree with the objective of the patch
> but have some comments below I'd like to see addressed/tested before
> signing off.
>
> >
> > Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com>
> > ---
> >  lib/netdev-dpdk.c | 55 ++++++++++++++++++++++++++++++
> ++++++--------------
> > -----
> >  1 file changed, 36 insertions(+), 19 deletions(-)
> >
> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 1aaf6f7..50874b8
> > 100644
> > --- a/lib/netdev-dpdk.c
> > +++ b/lib/netdev-dpdk.c
> > @@ -279,7 +279,7 @@ struct dpdk_qos_ops {
> >       * For all QoS implementations it should always be non-null.
> >       */
> >      int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,
> > -                   int pkt_cnt);
> > +                   int pkt_cnt, bool may_steal);
> >  };
> >
> >  /* dpdk_qos_ops for each type of user space QoS implementation */ @@ -
> > 1501,7 +1501,8 @@ netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm
> > *meter,
> >
> >  static int
> >  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> > -                        struct rte_mbuf **pkts, int pkt_cnt)
> > +                        struct rte_mbuf **pkts, int pkt_cnt,
> > +                        bool may_steal)
> >  {
> >      int i = 0;
> >      int cnt = 0;
> > @@ -1517,7 +1518,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> > *meter,
> >              }
> >              cnt++;
> >          } else {
> > -            rte_pktmbuf_free(pkt);
> > +            if (may_steal) {
> > +                rte_pktmbuf_free(pkt);
> > +            }
> >          }
> >      }
> >
> > @@ -1526,12 +1529,13 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> > *meter,
> >
> >  static int
> >  ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf
> > **pkts,
> > -                    int pkt_cnt)
> > +                    int pkt_cnt, bool may_steal)
> >  {
> >      int cnt = 0;
> >
> >      rte_spinlock_lock(&policer->policer_lock);
> > -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
> > +    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
> > +                                  pkt_cnt, may_steal);
> >      rte_spinlock_unlock(&policer->policer_lock);
> >
> >      return cnt;
> > @@ -1635,7 +1639,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
> >          dropped = nb_rx;
> >          nb_rx = ingress_policer_run(policer,
> >                                      (struct rte_mbuf **) batch->packets,
> > -                                    nb_rx);
> > +                                    nb_rx, true);
> >          dropped -= nb_rx;
> >      }
> >
> > @@ -1672,7 +1676,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct
> > dp_packet_batch *batch)
> >          dropped = nb_rx;
> >          nb_rx = ingress_policer_run(policer,
> >                                      (struct rte_mbuf **) batch->packets,
> > -                                    nb_rx);
> > +                                    nb_rx, true);
> >          dropped -= nb_rx;
> >      }
> >
> > @@ -1690,13 +1694,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq,
> > struct dp_packet_batch *batch)
> >
> >  static inline int
> >  netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
> > -                    int cnt)
> > +                    int cnt, bool may_steal)
> >  {
> >      struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev-
> > >qos_conf);
> >
> >      if (qos_conf) {
> >          rte_spinlock_lock(&qos_conf->lock);
> > -        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
> > +        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);
> >          rte_spinlock_unlock(&qos_conf->lock);
> >      }
> >
> > @@ -1770,7 +1774,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev,
> int
> > qid,
> >
> >      cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
> >      /* Check has QoS has been configured for the netdev */
> > -    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
> > +    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
> >      dropped = total_pkts - cnt;
> >
> >      do {
> > @@ -1816,10 +1820,22 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> > struct dp_packet_batch *batch)  #endif
> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >      struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
> > +    int txcnt_eth = batch->count;
> >      int dropped = 0;
> >      int newcnt = 0;
> >      int i;
> >
> > +    if (dev->type != DPDK_DEV_VHOST) {
> > +        /* Check if QoS has been configured for this netdev. */
> > +        txcnt_eth = netdev_dpdk_qos_run(dev,
> > +                                        (struct rte_mbuf **)batch-
> > >packets,
> > +                                        txcnt_eth, false);
>
> So dpdk_do_tx_copy will be called as part of netdev_dpdk_eth_send__ if its
> triggered by the following conditions
>
>     if (OVS_UNLIKELY(!may_steal ||
>                      batch->packets[0]->source != DPBUF_DPDK)
>
> What will happen in above if the source of the packet is not DPBUF_DPDK?
>
> For the most part it will be ok until 'netdev_dpdk_policer_pkt_handle()'
> is called later on.
>
> This has specific DPDK calls that expect an mbuf source for the packet.
>
> Previously QoS took place after the packet had been copied so this was not
> an issue but now we have DPDK rte functions being called on a non mbuf
> source packet. I'm not sure of the behavior that could occur in this case.
> Could an incorrect length be returned for the call rte_pktmbuf_pkt_len(pkt)
> in 'netdev_dpdk_policer_pkt_handle()'?
>
> It could be the case then that non DPDK specific call maybe required to
> attain the length of the packet in 'netdev_dpdk_policer_pkt_handle()'
> before calling the meter itself.
>
> Have you tested this use case?
>
> Ian
>
> > +        if (txcnt_eth == 0) {
> > +            dropped = batch->count;
> > +            goto out;
> > +        }
> > +    }
> > +
> >      dp_packet_batch_apply_cutlen(batch);
> >
> >      for (i = 0; i < batch->count; i++) { @@ -1848,21 +1864,20 @@
> > dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch
> > *batch)
> >          rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
> >
> >          newcnt++;
> > +        if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {
> > +            dropped += batch->count - i - 1;
> > +            break;
> > +        }
> >      }
> >
> >      if (dev->type == DPDK_DEV_VHOST) {
> >          __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)
> pkts,
> >                                   newcnt);
> >      } else {
> > -        unsigned int qos_pkts = newcnt;
> > -
> > -        /* Check if QoS has been configured for this netdev. */
> > -        newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt);
> > -
> > -        dropped += qos_pkts - newcnt;
> >          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
> >      }
> >
> > +out:
> >      if (OVS_UNLIKELY(dropped)) {
> >          rte_spinlock_lock(&dev->stats_lock);
> >          dev->stats.tx_dropped += dropped; @@ -1915,7 +1930,7 @@
> > netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> >          dp_packet_batch_apply_cutlen(batch);
> >
> >          cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
> > -        cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
> > +        cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
> >          dropped = batch->count - cnt;
> >
> >          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt); @@ -
> > 3132,13 +3147,15 @@ egress_policer_qos_is_equal(const struct qos_conf
> > *conf,  }
> >
> >  static int
> > -egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> > pkt_cnt)
> > +egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> > pkt_cnt,
> > +                   bool may_steal)
> >  {
> >      int cnt = 0;
> >      struct egress_policer *policer =
> >          CONTAINER_OF(conf, struct egress_policer, qos_conf);
> >
> > -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> pkt_cnt);
> > +    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> > +                                  pkt_cnt, may_steal);
> >
> >      return cnt;
> >  }
> > --
> > 1.8.3.1
>
>
Stokes, Ian Sept. 3, 2017, 10:46 a.m. UTC | #3
OK,

In my own testing I’m seeing the same behavior with a tap interface pinging to a bridge with a DPDK device. As such I think this should be ok.

Acked-by: ian.stokes@intel.com<mailto:ian.stokes@intel.com>


Ian

From: Gao Zhenyu [mailto:sysugaozhenyu@gmail.com]

Sent: Thursday, August 31, 2017 2:56 AM
To: Stokes, Ian <ian.stokes@intel.com>
Cc: dev@openvswitch.org
Subject: Re: [PATCH v2] netdev-dpdk: Execute QoS Checking before copying to mbuf

Here is the the testing I just did:
1. Add log in function netdev_dpdk_policer_pkt_handle
static inline bool
netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
                               struct rte_mbuf *pkt, uint64_t time)
{
    uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct ether_hdr);
    VLOG_WARN("GGGG, payload_len:%u, rte_pkt_len:%u",
              pkt_len, rte_pktmbuf_pkt_len(pkt)); <------------------------print out pkt_len

    return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
                                                e_RTE_METER_GREEN;
}
2. rebuild a rpm, install in machine, enable DPDK.
3. Create a container on the bridge(userspace bridge). (The container use veth device)
4. Config qos on eth dpdk eth by using : ovs-vsctl set port dpdk-uplink-eth3 qos=@newqos -- --id=@newqos create qos  type=egress-policer other-config:cir=1250000 other-config:cbs=2048
5. execute ping on the container and monitor ovs-vswitchd.log. Then I can see:

2017-08-31T01:39:28.036Z|00133|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102
2017-08-31T01:39:29.036Z|00134|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102
2017-08-31T01:39:30.036Z|00135|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102
2017-08-31T01:39:31.036Z|00136|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102
So the mbuf->pkt_len has a correct length.
On the code side, we have function dp_packet_set_size(), if ovs compiled with dpdk, this function set the mbuf.data_len and mbuf.pkt_len. Many functions consume dp_packet_set_size() like netdev_linux_rxq_recv_sock in Netdev-linux.c

#ifdef DPDK_NETDEV
......

static inline void
dp_packet_set_size(struct dp_packet *b, uint32_t v)
{
.....
    b->mbuf.data_len = (uint16_t)v;  /* Current seg length. */
    b->mbuf.pkt_len = v;             /* Total length of all segments linked to
                                      * this segment. */
}
Please let me know if you have any concern on it.

Thanks
Zhenyu Gao

2017-08-30 23:18 GMT+08:00 Stokes, Ian <ian.stokes@intel.com<mailto:ian.stokes@intel.com>>:
> In dpdk_do_tx_copy function, all packets were copied to mbuf first, but

> QoS checking may drop some of them.

> Move the QoS checking in front of copying data to mbuf, it helps to reduce

> useless copy.


Hi Zhenyu, thanks for the patch, I agree with the objective of the patch but have some comments below I'd like to see addressed/tested before signing off.

>

> Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com<mailto:sysugaozhenyu@gmail.com>>

> ---

>  lib/netdev-dpdk.c | 55 ++++++++++++++++++++++++++++++++++++--------------

> -----

>  1 file changed, 36 insertions(+), 19 deletions(-)

>

> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 1aaf6f7..50874b8

> 100644

> --- a/lib/netdev-dpdk.c

> +++ b/lib/netdev-dpdk.c

> @@ -279,7 +279,7 @@ struct dpdk_qos_ops {

>       * For all QoS implementations it should always be non-null.

>       */

>      int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,

> -                   int pkt_cnt);

> +                   int pkt_cnt, bool may_steal);

>  };

>

>  /* dpdk_qos_ops for each type of user space QoS implementation */ @@ -

> 1501,7 +1501,8 @@ netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm

> *meter,

>

>  static int

>  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,

> -                        struct rte_mbuf **pkts, int pkt_cnt)

> +                        struct rte_mbuf **pkts, int pkt_cnt,

> +                        bool may_steal)

>  {

>      int i = 0;

>      int cnt = 0;

> @@ -1517,7 +1518,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm

> *meter,

>              }

>              cnt++;

>          } else {

> -            rte_pktmbuf_free(pkt);

> +            if (may_steal) {

> +                rte_pktmbuf_free(pkt);

> +            }

>          }

>      }

>

> @@ -1526,12 +1529,13 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm

> *meter,

>

>  static int

>  ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf

> **pkts,

> -                    int pkt_cnt)

> +                    int pkt_cnt, bool may_steal)

>  {

>      int cnt = 0;

>

>      rte_spinlock_lock(&policer->policer_lock);

> -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);

> +    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,

> +                                  pkt_cnt, may_steal);

>      rte_spinlock_unlock(&policer->policer_lock);

>

>      return cnt;

> @@ -1635,7 +1639,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,

>          dropped = nb_rx;

>          nb_rx = ingress_policer_run(policer,

>                                      (struct rte_mbuf **) batch->packets,

> -                                    nb_rx);

> +                                    nb_rx, true);

>          dropped -= nb_rx;

>      }

>

> @@ -1672,7 +1676,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct

> dp_packet_batch *batch)

>          dropped = nb_rx;

>          nb_rx = ingress_policer_run(policer,

>                                      (struct rte_mbuf **) batch->packets,

> -                                    nb_rx);

> +                                    nb_rx, true);

>          dropped -= nb_rx;

>      }

>

> @@ -1690,13 +1694,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq,

> struct dp_packet_batch *batch)

>

>  static inline int

>  netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,

> -                    int cnt)

> +                    int cnt, bool may_steal)

>  {

>      struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev-

> >qos_conf);

>

>      if (qos_conf) {

>          rte_spinlock_lock(&qos_conf->lock);

> -        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);

> +        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);

>          rte_spinlock_unlock(&qos_conf->lock);

>      }

>

> @@ -1770,7 +1774,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int

> qid,

>

>      cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);

>      /* Check has QoS has been configured for the netdev */

> -    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);

> +    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);

>      dropped = total_pkts - cnt;

>

>      do {

> @@ -1816,10 +1820,22 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,

> struct dp_packet_batch *batch)  #endif

>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

>      struct rte_mbuf *pkts[PKT_ARRAY_SIZE];

> +    int txcnt_eth = batch->count;

>      int dropped = 0;

>      int newcnt = 0;

>      int i;

>

> +    if (dev->type != DPDK_DEV_VHOST) {

> +        /* Check if QoS has been configured for this netdev. */

> +        txcnt_eth = netdev_dpdk_qos_run(dev,

> +                                        (struct rte_mbuf **)batch-

> >packets,

> +                                        txcnt_eth, false);

So dpdk_do_tx_copy will be called as part of netdev_dpdk_eth_send__ if its triggered by the following conditions

    if (OVS_UNLIKELY(!may_steal ||
                     batch->packets[0]->source != DPBUF_DPDK)

What will happen in above if the source of the packet is not DPBUF_DPDK?

For the most part it will be ok until 'netdev_dpdk_policer_pkt_handle()' is called later on.

This has specific DPDK calls that expect an mbuf source for the packet.

Previously QoS took place after the packet had been copied so this was not an issue but now we have DPDK rte functions being called on a non mbuf source packet. I'm not sure of the behavior that could occur in this case. Could an incorrect length be returned for the call rte_pktmbuf_pkt_len(pkt) in 'netdev_dpdk_policer_pkt_handle()'?

It could be the case then that non DPDK specific call maybe required to attain the length of the packet in 'netdev_dpdk_policer_pkt_handle()' before calling the meter itself.

Have you tested this use case?

Ian

> +        if (txcnt_eth == 0) {

> +            dropped = batch->count;

> +            goto out;

> +        }

> +    }

> +

>      dp_packet_batch_apply_cutlen(batch);

>

>      for (i = 0; i < batch->count; i++) { @@ -1848,21 +1864,20 @@

> dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch

> *batch)

>          rte_pktmbuf_pkt_len(pkts[newcnt]) = size;

>

>          newcnt++;

> +        if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {

> +            dropped += batch->count - i - 1;

> +            break;

> +        }

>      }

>

>      if (dev->type == DPDK_DEV_VHOST) {

>          __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,

>                                   newcnt);

>      } else {

> -        unsigned int qos_pkts = newcnt;

> -

> -        /* Check if QoS has been configured for this netdev. */

> -        newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt);

> -

> -        dropped += qos_pkts - newcnt;

>          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);

>      }

>

> +out:

>      if (OVS_UNLIKELY(dropped)) {

>          rte_spinlock_lock(&dev->stats_lock);

>          dev->stats.tx_dropped += dropped; @@ -1915,7 +1930,7 @@

> netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,

>          dp_packet_batch_apply_cutlen(batch);

>

>          cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);

> -        cnt = netdev_dpdk_qos_run(dev, pkts, cnt);

> +        cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);

>          dropped = batch->count - cnt;

>

>          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt); @@ -

> 3132,13 +3147,15 @@ egress_policer_qos_is_equal(const struct qos_conf

> *conf,  }

>

>  static int

> -egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int

> pkt_cnt)

> +egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int

> pkt_cnt,

> +                   bool may_steal)

>  {

>      int cnt = 0;

>      struct egress_policer *policer =

>          CONTAINER_OF(conf, struct egress_policer, qos_conf);

>

> -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt);

> +    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,

> +                                  pkt_cnt, may_steal);

>

>      return cnt;

>  }

> --

> 1.8.3.1
Gao Zhenyu Sept. 4, 2017, 10:27 a.m. UTC | #4
Thanks for your testing!

Thanks
Zhenyu Gao

2017-09-03 18:46 GMT+08:00 Stokes, Ian <ian.stokes@intel.com>:

> OK,
>
>
>
> In my own testing I’m seeing the same behavior with a tap interface
> pinging to a bridge with a DPDK device. As such I think this should be ok.
>
>
>
> Acked-by: ian.stokes@intel.com
>
>
>
> Ian
>
>
>
> *From:* Gao Zhenyu [mailto:sysugaozhenyu@gmail.com]
> *Sent:* Thursday, August 31, 2017 2:56 AM
> *To:* Stokes, Ian <ian.stokes@intel.com>
> *Cc:* dev@openvswitch.org
> *Subject:* Re: [PATCH v2] netdev-dpdk: Execute QoS Checking before
> copying to mbuf
>
>
>
> Here is the the testing I just did:
>
> 1. Add log in function netdev_dpdk_policer_pkt_handle
> static inline bool
>
> netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm
> *meter,
>                                struct rte_mbuf *pkt, uint64_t
> time)
> {
>     uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct
> ether_hdr);
>     VLOG_WARN("GGGG, payload_len:%u, rte_pkt_len:%u",
>
>               pkt_len, rte_pktmbuf_pkt_len(pkt));
> <------------------------print out pkt_len
>
>
>     return rte_meter_srtcm_color_blind_check(meter, time, pkt_len)
> ==
>
> e_RTE_METER_GREEN;
> }
>
> 2. rebuild a rpm, install in machine, enable DPDK.
>
> 3. Create a container on the bridge(userspace bridge). (The container use
> veth device)
>
> 4. Config qos on eth dpdk eth by using : ovs-vsctl set port
> dpdk-uplink-eth3 qos=@newqos -- --id=@newqos create qos
> type=egress-policer other-config:cir=1250000 other-config:cbs=2048
>
> 5. execute ping on the container and monitor ovs-vswitchd.log. Then I can
> see:
>
> 2017-08-31T01:39:28.036Z|00133|netdev_dpdk|WARN|GGGG, payload_len:88,
> rte_pkt_len:102
> 2017-08-31T01:39:29.036Z|00134|netdev_dpdk|WARN|GGGG, payload_len:88,
> rte_pkt_len:102
> 2017-08-31T01:39:30.036Z|00135|netdev_dpdk|WARN|GGGG, payload_len:88,
> rte_pkt_len:102
> 2017-08-31T01:39:31.036Z|00136|netdev_dpdk|WARN|GGGG, payload_len:88,
> rte_pkt_len:102
>
> So the mbuf->pkt_len has a correct length.
>
> On the code side, we have function dp_packet_set_size(), if ovs compiled
> with dpdk, this function set the mbuf.data_len and mbuf.pkt_len. Many
> functions consume dp_packet_set_size() like netdev_linux_rxq_recv_sock in
> Netdev-linux.c
>
> #ifdef DPDK_NETDEV
> ......
>
> static inline void
> dp_packet_set_size(struct dp_packet *b, uint32_t v)
> {
> .....
>     b->mbuf.data_len = (uint16_t)v;  /* Current seg length. */
>     b->mbuf.pkt_len = v;             /* Total length of all segments
> linked to
>                                       * this segment. */
> }
>
> Please let me know if you have any concern on it.
>
>
>
> Thanks
>
> Zhenyu Gao
>
>
>
> 2017-08-30 23:18 GMT+08:00 Stokes, Ian <ian.stokes@intel.com>:
>
> > In dpdk_do_tx_copy function, all packets were copied to mbuf first, but
> > QoS checking may drop some of them.
> > Move the QoS checking in front of copying data to mbuf, it helps to
> reduce
> > useless copy.
>
> Hi Zhenyu, thanks for the patch, I agree with the objective of the patch
> but have some comments below I'd like to see addressed/tested before
> signing off.
>
>
> >
> > Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com>
> > ---
> >  lib/netdev-dpdk.c | 55 ++++++++++++++++++++++++++++++
> ++++++--------------
> > -----
> >  1 file changed, 36 insertions(+), 19 deletions(-)
> >
> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 1aaf6f7..50874b8
> > 100644
> > --- a/lib/netdev-dpdk.c
> > +++ b/lib/netdev-dpdk.c
> > @@ -279,7 +279,7 @@ struct dpdk_qos_ops {
> >       * For all QoS implementations it should always be non-null.
> >       */
> >      int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,
> > -                   int pkt_cnt);
> > +                   int pkt_cnt, bool may_steal);
> >  };
> >
> >  /* dpdk_qos_ops for each type of user space QoS implementation */ @@ -
> > 1501,7 +1501,8 @@ netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm
> > *meter,
> >
> >  static int
> >  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> > -                        struct rte_mbuf **pkts, int pkt_cnt)
> > +                        struct rte_mbuf **pkts, int pkt_cnt,
> > +                        bool may_steal)
> >  {
> >      int i = 0;
> >      int cnt = 0;
> > @@ -1517,7 +1518,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> > *meter,
> >              }
> >              cnt++;
> >          } else {
> > -            rte_pktmbuf_free(pkt);
> > +            if (may_steal) {
> > +                rte_pktmbuf_free(pkt);
> > +            }
> >          }
> >      }
> >
> > @@ -1526,12 +1529,13 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> > *meter,
> >
> >  static int
> >  ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf
> > **pkts,
> > -                    int pkt_cnt)
> > +                    int pkt_cnt, bool may_steal)
> >  {
> >      int cnt = 0;
> >
> >      rte_spinlock_lock(&policer->policer_lock);
> > -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
> > +    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
> > +                                  pkt_cnt, may_steal);
> >      rte_spinlock_unlock(&policer->policer_lock);
> >
> >      return cnt;
> > @@ -1635,7 +1639,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
> >          dropped = nb_rx;
> >          nb_rx = ingress_policer_run(policer,
> >                                      (struct rte_mbuf **) batch->packets,
> > -                                    nb_rx);
> > +                                    nb_rx, true);
> >          dropped -= nb_rx;
> >      }
> >
> > @@ -1672,7 +1676,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct
> > dp_packet_batch *batch)
> >          dropped = nb_rx;
> >          nb_rx = ingress_policer_run(policer,
> >                                      (struct rte_mbuf **) batch->packets,
> > -                                    nb_rx);
> > +                                    nb_rx, true);
> >          dropped -= nb_rx;
> >      }
> >
> > @@ -1690,13 +1694,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq,
> > struct dp_packet_batch *batch)
> >
> >  static inline int
> >  netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
> > -                    int cnt)
> > +                    int cnt, bool may_steal)
> >  {
> >      struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev-
> > >qos_conf);
> >
> >      if (qos_conf) {
> >          rte_spinlock_lock(&qos_conf->lock);
> > -        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
> > +        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);
> >          rte_spinlock_unlock(&qos_conf->lock);
> >      }
> >
> > @@ -1770,7 +1774,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev,
> int
> > qid,
> >
> >      cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
> >      /* Check has QoS has been configured for the netdev */
> > -    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
> > +    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
> >      dropped = total_pkts - cnt;
> >
> >      do {
> > @@ -1816,10 +1820,22 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> > struct dp_packet_batch *batch)  #endif
> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >      struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
> > +    int txcnt_eth = batch->count;
> >      int dropped = 0;
> >      int newcnt = 0;
> >      int i;
> >
> > +    if (dev->type != DPDK_DEV_VHOST) {
> > +        /* Check if QoS has been configured for this netdev. */
> > +        txcnt_eth = netdev_dpdk_qos_run(dev,
> > +                                        (struct rte_mbuf **)batch-
> > >packets,
> > +                                        txcnt_eth, false);
>
> So dpdk_do_tx_copy will be called as part of netdev_dpdk_eth_send__ if its
> triggered by the following conditions
>
>     if (OVS_UNLIKELY(!may_steal ||
>                      batch->packets[0]->source != DPBUF_DPDK)
>
> What will happen in above if the source of the packet is not DPBUF_DPDK?
>
> For the most part it will be ok until 'netdev_dpdk_policer_pkt_handle()'
> is called later on.
>
> This has specific DPDK calls that expect an mbuf source for the packet.
>
> Previously QoS took place after the packet had been copied so this was not
> an issue but now we have DPDK rte functions being called on a non mbuf
> source packet. I'm not sure of the behavior that could occur in this case.
> Could an incorrect length be returned for the call rte_pktmbuf_pkt_len(pkt)
> in 'netdev_dpdk_policer_pkt_handle()'?
>
> It could be the case then that non DPDK specific call maybe required to
> attain the length of the packet in 'netdev_dpdk_policer_pkt_handle()'
> before calling the meter itself.
>
> Have you tested this use case?
>
> Ian
>
>
> > +        if (txcnt_eth == 0) {
> > +            dropped = batch->count;
> > +            goto out;
> > +        }
> > +    }
> > +
> >      dp_packet_batch_apply_cutlen(batch);
> >
> >      for (i = 0; i < batch->count; i++) { @@ -1848,21 +1864,20 @@
> > dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch
> > *batch)
> >          rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
> >
> >          newcnt++;
> > +        if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {
> > +            dropped += batch->count - i - 1;
> > +            break;
> > +        }
> >      }
> >
> >      if (dev->type == DPDK_DEV_VHOST) {
> >          __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)
> pkts,
> >                                   newcnt);
> >      } else {
> > -        unsigned int qos_pkts = newcnt;
> > -
> > -        /* Check if QoS has been configured for this netdev. */
> > -        newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt);
> > -
> > -        dropped += qos_pkts - newcnt;
> >          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
> >      }
> >
> > +out:
> >      if (OVS_UNLIKELY(dropped)) {
> >          rte_spinlock_lock(&dev->stats_lock);
> >          dev->stats.tx_dropped += dropped; @@ -1915,7 +1930,7 @@
> > netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> >          dp_packet_batch_apply_cutlen(batch);
> >
> >          cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
> > -        cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
> > +        cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
> >          dropped = batch->count - cnt;
> >
> >          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt); @@ -
> > 3132,13 +3147,15 @@ egress_policer_qos_is_equal(const struct qos_conf
> > *conf,  }
> >
> >  static int
> > -egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> > pkt_cnt)
> > +egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> > pkt_cnt,
> > +                   bool may_steal)
> >  {
> >      int cnt = 0;
> >      struct egress_policer *policer =
> >          CONTAINER_OF(conf, struct egress_policer, qos_conf);
> >
> > -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> pkt_cnt);
> > +    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> > +                                  pkt_cnt, may_steal);
> >
> >      return cnt;
> >  }
> > --
> > 1.8.3.1
>
>
>
Darrell Ball Sept. 5, 2017, 4:24 p.m. UTC | #5
Hi Gao

How about the following incremental ?

Thanks Darrell



diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 4808d38..648d719 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1843,64 +1843,58 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
 #endif
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
-    int txcnt_eth = batch->count;
-    int dropped = 0;
-    int newcnt = 0;
-    int i;
+    uint32_t cnt = batch->count;
+    uint32_t dropped = 0;
 
     if (dev->type != DPDK_DEV_VHOST) {
         /* Check if QoS has been configured for this netdev. */
-        txcnt_eth = netdev_dpdk_qos_run(dev,
-                                        (struct rte_mbuf **)batch->packets,
-                                        txcnt_eth, false);
-        if (txcnt_eth == 0) {
-            dropped = batch->count;
-            goto out;
-        }
+        cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **) batch->packets,
+                                  cnt, false);
+        dropped += batch->count - cnt;
     }
 
     dp_packet_batch_apply_cutlen(batch);
 
-    for (i = 0; i < batch->count; i++) {
-        int size = dp_packet_size(batch->packets[i]);
+    uint32_t txcnt = 0;
+
+    for (uint32_t i = 0; i < cnt; i++) {
+
+        uint32_t size = dp_packet_size(batch->packets[i]);
 
         if (OVS_UNLIKELY(size > dev->max_packet_len)) {
-            VLOG_WARN_RL(&rl, "Too big size %d max_packet_len %d",
-                         (int) size, dev->max_packet_len);
+            VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
+                         size, dev->max_packet_len);
 
             dropped++;
             continue;
         }
 
-        pkts[newcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
+        pkts[txcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
 
-        if (!pkts[newcnt]) {
-            dropped += batch->count - i;
+        if (!pkts[txcnt]) {
+            dropped += cnt - i;
             break;
         }
 
         /* We have to do a copy for now */
-        memcpy(rte_pktmbuf_mtod(pkts[newcnt], void *),
+        memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *),
                dp_packet_data(batch->packets[i]), size);
 
-        rte_pktmbuf_data_len(pkts[newcnt]) = size;
-        rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
+        rte_pktmbuf_data_len(pkts[txcnt]) = size;
+        rte_pktmbuf_pkt_len(pkts[txcnt]) = size;
 
-        newcnt++;
-        if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {
-            dropped += batch->count - i - 1;
-            break;
-        }
+        txcnt++;
     }
 
-    if (dev->type == DPDK_DEV_VHOST) {
-        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
-                                 newcnt);
-    } else {
-        dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
+    if (OVS_LIKELY(txcnt)) {
+        if (dev->type == DPDK_DEV_VHOST) {
+            __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
+                                     txcnt);
+        } else {
+            dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, txcnt);
+        }
     }
 
-out:
     if (OVS_UNLIKELY(dropped)) {
         rte_spinlock_lock(&dev->stats_lock);
         dev->stats.tx_dropped += dropped;


//////////////////////////////////////////////////////////////


On 8/29/17, 4:39 AM, "ovs-dev-bounces@openvswitch.org on behalf of Zhenyu Gao" <ovs-dev-bounces@openvswitch.org on behalf of sysugaozhenyu@gmail.com> wrote:

    In dpdk_do_tx_copy function, all packets were copied to mbuf first,
    but QoS checking may drop some of them.
    Move the QoS checking in front of copying data to mbuf, it helps to
    reduce useless copy.
    
    Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com>
    ---
     lib/netdev-dpdk.c | 55 ++++++++++++++++++++++++++++++++++++-------------------
     1 file changed, 36 insertions(+), 19 deletions(-)
    
    diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
    index 1aaf6f7..50874b8 100644
    --- a/lib/netdev-dpdk.c
    +++ b/lib/netdev-dpdk.c
    @@ -279,7 +279,7 @@ struct dpdk_qos_ops {
          * For all QoS implementations it should always be non-null.
          */
         int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,
    -                   int pkt_cnt);
    +                   int pkt_cnt, bool may_steal);
     };
     
     /* dpdk_qos_ops for each type of user space QoS implementation */
    @@ -1501,7 +1501,8 @@ netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
     
     static int
     netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
    -                        struct rte_mbuf **pkts, int pkt_cnt)
    +                        struct rte_mbuf **pkts, int pkt_cnt,
    +                        bool may_steal)
     {
         int i = 0;
         int cnt = 0;
    @@ -1517,7 +1518,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
                 }
                 cnt++;
             } else {
    -            rte_pktmbuf_free(pkt);
    +            if (may_steal) {
    +                rte_pktmbuf_free(pkt);
    +            }
             }
         }
     
    @@ -1526,12 +1529,13 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
     
     static int
     ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf **pkts,
    -                    int pkt_cnt)
    +                    int pkt_cnt, bool may_steal)
     {
         int cnt = 0;
     
         rte_spinlock_lock(&policer->policer_lock);
    -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
    +    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
    +                                  pkt_cnt, may_steal);
         rte_spinlock_unlock(&policer->policer_lock);
     
         return cnt;
    @@ -1635,7 +1639,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
             dropped = nb_rx;
             nb_rx = ingress_policer_run(policer,
                                         (struct rte_mbuf **) batch->packets,
    -                                    nb_rx);
    +                                    nb_rx, true);
             dropped -= nb_rx;
         }
     
    @@ -1672,7 +1676,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)
             dropped = nb_rx;
             nb_rx = ingress_policer_run(policer,
                                         (struct rte_mbuf **) batch->packets,
    -                                    nb_rx);
    +                                    nb_rx, true);
             dropped -= nb_rx;
         }
     
    @@ -1690,13 +1694,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)
     
     static inline int
     netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
    -                    int cnt)
    +                    int cnt, bool may_steal)
     {
         struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev->qos_conf);
     
         if (qos_conf) {
             rte_spinlock_lock(&qos_conf->lock);
    -        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
    +        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);
             rte_spinlock_unlock(&qos_conf->lock);
         }
     
    @@ -1770,7 +1774,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
     
         cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
         /* Check has QoS has been configured for the netdev */
    -    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
    +    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
         dropped = total_pkts - cnt;
     
         do {
    @@ -1816,10 +1820,22 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
     #endif
         struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
         struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
    +    int txcnt_eth = batch->count;
         int dropped = 0;
         int newcnt = 0;
         int i;
     
    +    if (dev->type != DPDK_DEV_VHOST) {
    +        /* Check if QoS has been configured for this netdev. */
    +        txcnt_eth = netdev_dpdk_qos_run(dev,
    +                                        (struct rte_mbuf **)batch->packets,
    +                                        txcnt_eth, false);
    +        if (txcnt_eth == 0) {
    +            dropped = batch->count;
    +            goto out;
    +        }
    +    }
    +
         dp_packet_batch_apply_cutlen(batch);
     
         for (i = 0; i < batch->count; i++) {
    @@ -1848,21 +1864,20 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
             rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
     
             newcnt++;
    +        if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {
    +            dropped += batch->count - i - 1;
    +            break;
    +        }
         }
     
         if (dev->type == DPDK_DEV_VHOST) {
             __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
                                      newcnt);
         } else {
    -        unsigned int qos_pkts = newcnt;
    -
    -        /* Check if QoS has been configured for this netdev. */
    -        newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt);
    -
    -        dropped += qos_pkts - newcnt;
             dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
         }
     
    +out:
         if (OVS_UNLIKELY(dropped)) {
             rte_spinlock_lock(&dev->stats_lock);
             dev->stats.tx_dropped += dropped;
    @@ -1915,7 +1930,7 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
             dp_packet_batch_apply_cutlen(batch);
     
             cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
    -        cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
    +        cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
             dropped = batch->count - cnt;
     
             dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt);
    @@ -3132,13 +3147,15 @@ egress_policer_qos_is_equal(const struct qos_conf *conf,
     }
     
     static int
    -egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int pkt_cnt)
    +egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int pkt_cnt,
    +                   bool may_steal)
     {
         int cnt = 0;
         struct egress_policer *policer =
             CONTAINER_OF(conf, struct egress_policer, qos_conf);
     
    -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt);
    +    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
    +                                  pkt_cnt, may_steal);
     
         return cnt;
     }
    -- 
    1.8.3.1
    
    _______________________________________________
    dev mailing list
    dev@openvswitch.org
    https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=EZzkW98aZhxGnYEEmB3u5e3vaFA90qSr3aIM1t-ggFY&s=fHRBBnkb29ujBAr27aGj1MCWkVzjfKqmJz3R9wEg99o&e=
Gao Zhenyu Sept. 6, 2017, 6:39 a.m. UTC | #6
Thanks Derrell, it makes sence to me and I think the code path is more
clear.

Thanks
Zhenyu Gao

2017-09-06 0:24 GMT+08:00 Darrell Ball <dball@vmware.com>:

> Hi Gao
>
> How about the following incremental ?
>
> Thanks Darrell
>
>
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 4808d38..648d719 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -1843,64 +1843,58 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> struct dp_packet_batch *batch)
>  #endif
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
> -    int txcnt_eth = batch->count;
> -    int dropped = 0;
> -    int newcnt = 0;
> -    int i;
> +    uint32_t cnt = batch->count;
> +    uint32_t dropped = 0;
>
>      if (dev->type != DPDK_DEV_VHOST) {
>          /* Check if QoS has been configured for this netdev. */
> -        txcnt_eth = netdev_dpdk_qos_run(dev,
> -                                        (struct rte_mbuf
> **)batch->packets,
> -                                        txcnt_eth, false);
> -        if (txcnt_eth == 0) {
> -            dropped = batch->count;
> -            goto out;
> -        }
> +        cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **)
> batch->packets,
> +                                  cnt, false);
> +        dropped += batch->count - cnt;
>      }
>
>      dp_packet_batch_apply_cutlen(batch);
>
> -    for (i = 0; i < batch->count; i++) {
> -        int size = dp_packet_size(batch->packets[i]);
> +    uint32_t txcnt = 0;
> +
> +    for (uint32_t i = 0; i < cnt; i++) {
> +
> +        uint32_t size = dp_packet_size(batch->packets[i]);
>
>          if (OVS_UNLIKELY(size > dev->max_packet_len)) {
> -            VLOG_WARN_RL(&rl, "Too big size %d max_packet_len %d",
> -                         (int) size, dev->max_packet_len);
> +            VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
> +                         size, dev->max_packet_len);
>
>              dropped++;
>              continue;
>          }
>
> -        pkts[newcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
> +        pkts[txcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
>
> -        if (!pkts[newcnt]) {
> -            dropped += batch->count - i;
> +        if (!pkts[txcnt]) {
> +            dropped += cnt - i;
>              break;
>          }
>
>          /* We have to do a copy for now */
> -        memcpy(rte_pktmbuf_mtod(pkts[newcnt], void *),
> +        memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *),
>                 dp_packet_data(batch->packets[i]), size);
>
> -        rte_pktmbuf_data_len(pkts[newcnt]) = size;
> -        rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
> +        rte_pktmbuf_data_len(pkts[txcnt]) = size;
> +        rte_pktmbuf_pkt_len(pkts[txcnt]) = size;
>
> -        newcnt++;
> -        if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {
> -            dropped += batch->count - i - 1;
> -            break;
> -        }
> +        txcnt++;
>      }
>
> -    if (dev->type == DPDK_DEV_VHOST) {
> -        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
> -                                 newcnt);
> -    } else {
> -        dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
> +    if (OVS_LIKELY(txcnt)) {
> +        if (dev->type == DPDK_DEV_VHOST) {
> +            __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)
> pkts,
> +                                     txcnt);
> +        } else {
> +            dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, txcnt);
> +        }
>      }
>
> -out:
>      if (OVS_UNLIKELY(dropped)) {
>          rte_spinlock_lock(&dev->stats_lock);
>          dev->stats.tx_dropped += dropped;
>
>
> //////////////////////////////////////////////////////////////
>
>
> On 8/29/17, 4:39 AM, "ovs-dev-bounces@openvswitch.org on behalf of Zhenyu
> Gao" <ovs-dev-bounces@openvswitch.org on behalf of sysugaozhenyu@gmail.com>
> wrote:
>
>     In dpdk_do_tx_copy function, all packets were copied to mbuf first,
>     but QoS checking may drop some of them.
>     Move the QoS checking in front of copying data to mbuf, it helps to
>     reduce useless copy.
>
>     Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com>
>     ---
>      lib/netdev-dpdk.c | 55 ++++++++++++++++++++++++++++++
> ++++++-------------------
>      1 file changed, 36 insertions(+), 19 deletions(-)
>
>     diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>     index 1aaf6f7..50874b8 100644
>     --- a/lib/netdev-dpdk.c
>     +++ b/lib/netdev-dpdk.c
>     @@ -279,7 +279,7 @@ struct dpdk_qos_ops {
>           * For all QoS implementations it should always be non-null.
>           */
>          int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,
>     -                   int pkt_cnt);
>     +                   int pkt_cnt, bool may_steal);
>      };
>
>      /* dpdk_qos_ops for each type of user space QoS implementation */
>     @@ -1501,7 +1501,8 @@ netdev_dpdk_policer_pkt_handle(struct
> rte_meter_srtcm *meter,
>
>      static int
>      netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
>     -                        struct rte_mbuf **pkts, int pkt_cnt)
>     +                        struct rte_mbuf **pkts, int pkt_cnt,
>     +                        bool may_steal)
>      {
>          int i = 0;
>          int cnt = 0;
>     @@ -1517,7 +1518,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> *meter,
>                  }
>                  cnt++;
>              } else {
>     -            rte_pktmbuf_free(pkt);
>     +            if (may_steal) {
>     +                rte_pktmbuf_free(pkt);
>     +            }
>              }
>          }
>
>     @@ -1526,12 +1529,13 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> *meter,
>
>      static int
>      ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf
> **pkts,
>     -                    int pkt_cnt)
>     +                    int pkt_cnt, bool may_steal)
>      {
>          int cnt = 0;
>
>          rte_spinlock_lock(&policer->policer_lock);
>     -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
> pkt_cnt);
>     +    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
>     +                                  pkt_cnt, may_steal);
>          rte_spinlock_unlock(&policer->policer_lock);
>
>          return cnt;
>     @@ -1635,7 +1639,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq
> *rxq,
>              dropped = nb_rx;
>              nb_rx = ingress_policer_run(policer,
>                                          (struct rte_mbuf **)
> batch->packets,
>     -                                    nb_rx);
>     +                                    nb_rx, true);
>              dropped -= nb_rx;
>          }
>
>     @@ -1672,7 +1676,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq,
> struct dp_packet_batch *batch)
>              dropped = nb_rx;
>              nb_rx = ingress_policer_run(policer,
>                                          (struct rte_mbuf **)
> batch->packets,
>     -                                    nb_rx);
>     +                                    nb_rx, true);
>              dropped -= nb_rx;
>          }
>
>     @@ -1690,13 +1694,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq,
> struct dp_packet_batch *batch)
>
>      static inline int
>      netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
>     -                    int cnt)
>     +                    int cnt, bool may_steal)
>      {
>          struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *,
> &dev->qos_conf);
>
>          if (qos_conf) {
>              rte_spinlock_lock(&qos_conf->lock);
>     -        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
>     +        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);
>              rte_spinlock_unlock(&qos_conf->lock);
>          }
>
>     @@ -1770,7 +1774,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev,
> int qid,
>
>          cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
>          /* Check has QoS has been configured for the netdev */
>     -    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
>     +    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
>          dropped = total_pkts - cnt;
>
>          do {
>     @@ -1816,10 +1820,22 @@ dpdk_do_tx_copy(struct netdev *netdev, int
> qid, struct dp_packet_batch *batch)
>      #endif
>          struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>          struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
>     +    int txcnt_eth = batch->count;
>          int dropped = 0;
>          int newcnt = 0;
>          int i;
>
>     +    if (dev->type != DPDK_DEV_VHOST) {
>     +        /* Check if QoS has been configured for this netdev. */
>     +        txcnt_eth = netdev_dpdk_qos_run(dev,
>     +                                        (struct rte_mbuf
> **)batch->packets,
>     +                                        txcnt_eth, false);
>     +        if (txcnt_eth == 0) {
>     +            dropped = batch->count;
>     +            goto out;
>     +        }
>     +    }
>     +
>          dp_packet_batch_apply_cutlen(batch);
>
>          for (i = 0; i < batch->count; i++) {
>     @@ -1848,21 +1864,20 @@ dpdk_do_tx_copy(struct netdev *netdev, int
> qid, struct dp_packet_batch *batch)
>              rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
>
>              newcnt++;
>     +        if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {
>     +            dropped += batch->count - i - 1;
>     +            break;
>     +        }
>          }
>
>          if (dev->type == DPDK_DEV_VHOST) {
>              __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)
> pkts,
>                                       newcnt);
>          } else {
>     -        unsigned int qos_pkts = newcnt;
>     -
>     -        /* Check if QoS has been configured for this netdev. */
>     -        newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt);
>     -
>     -        dropped += qos_pkts - newcnt;
>              dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
>          }
>
>     +out:
>          if (OVS_UNLIKELY(dropped)) {
>              rte_spinlock_lock(&dev->stats_lock);
>              dev->stats.tx_dropped += dropped;
>     @@ -1915,7 +1930,7 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int
> qid,
>              dp_packet_batch_apply_cutlen(batch);
>
>              cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
>     -        cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
>     +        cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
>              dropped = batch->count - cnt;
>
>              dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt);
>     @@ -3132,13 +3147,15 @@ egress_policer_qos_is_equal(const struct
> qos_conf *conf,
>      }
>
>      static int
>     -egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> pkt_cnt)
>     +egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> pkt_cnt,
>     +                   bool may_steal)
>      {
>          int cnt = 0;
>          struct egress_policer *policer =
>              CONTAINER_OF(conf, struct egress_policer, qos_conf);
>
>     -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> pkt_cnt);
>     +    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
>     +                                  pkt_cnt, may_steal);
>
>          return cnt;
>      }
>     --
>     1.8.3.1
>
>     _______________________________________________
>     dev mailing list
>     dev@openvswitch.org
>     https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.
> openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=
> uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=
> EZzkW98aZhxGnYEEmB3u5e3vaFA90qSr3aIM1t-ggFY&s=
> fHRBBnkb29ujBAr27aGj1MCWkVzjfKqmJz3R9wEg99o&e=
>
>
>
>
>
Darrell Ball Sept. 7, 2017, 6:14 a.m. UTC | #7
Hi Gao

I had applied the patch to dpdk_merge here

https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_darball_ovs_commits_dpdk-5Fmerge&d=DwIGaQ&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=A2_FCacqbp2moAo3HGFlTuxsjONUGhlN42OBcAuQQ6w&s=b6btPKhgvOFr2GOUYvktND6kaC6jc3fXI-mXfvNgXOU&e=

Please let me know if I missed anything.

Thanks Darrell


From: Gao Zhenyu <sysugaozhenyu@gmail.com>
Date: Tuesday, September 5, 2017 at 11:39 PM
To: Darrel Ball <dball@vmware.com>
Cc: "ian.stokes@intel.com" <ian.stokes@intel.com>, "dev@openvswitch.org" <dev@openvswitch.org>
Subject: Re: [ovs-dev] [PATCH v2] netdev-dpdk: Execute QoS Checking before copying to mbuf

Thanks Derrell, it makes sence to me and I think the code path is more clear.
Thanks
Zhenyu Gao

2017-09-06 0:24 GMT+08:00 Darrell Ball <dball@vmware.com<mailto:dball@vmware.com>>:
Hi Gao

How about the following incremental ?

Thanks Darrell



diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 4808d38..648d719 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1843,64 +1843,58 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
 #endif
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
-    int txcnt_eth = batch->count;
-    int dropped = 0;
-    int newcnt = 0;
-    int i;
+    uint32_t cnt = batch->count;
+    uint32_t dropped = 0;

     if (dev->type != DPDK_DEV_VHOST) {
         /* Check if QoS has been configured for this netdev. */
-        txcnt_eth = netdev_dpdk_qos_run(dev,
-                                        (struct rte_mbuf **)batch->packets,
-                                        txcnt_eth, false);
-        if (txcnt_eth == 0) {
-            dropped = batch->count;
-            goto out;
-        }
+        cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **) batch->packets,
+                                  cnt, false);
+        dropped += batch->count - cnt;
     }

     dp_packet_batch_apply_cutlen(batch);

-    for (i = 0; i < batch->count; i++) {
-        int size = dp_packet_size(batch->packets[i]);
+    uint32_t txcnt = 0;
+
+    for (uint32_t i = 0; i < cnt; i++) {
+
+        uint32_t size = dp_packet_size(batch->packets[i]);

         if (OVS_UNLIKELY(size > dev->max_packet_len)) {
-            VLOG_WARN_RL(&rl, "Too big size %d max_packet_len %d",
-                         (int) size, dev->max_packet_len);
+            VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
+                         size, dev->max_packet_len);

             dropped++;
             continue;
         }

-        pkts[newcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
+        pkts[txcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);

-        if (!pkts[newcnt]) {
-            dropped += batch->count - i;
+        if (!pkts[txcnt]) {
+            dropped += cnt - i;
             break;
         }

         /* We have to do a copy for now */
-        memcpy(rte_pktmbuf_mtod(pkts[newcnt], void *),
+        memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *),
                dp_packet_data(batch->packets[i]), size);

-        rte_pktmbuf_data_len(pkts[newcnt]) = size;
-        rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
+        rte_pktmbuf_data_len(pkts[txcnt]) = size;
+        rte_pktmbuf_pkt_len(pkts[txcnt]) = size;

-        newcnt++;
-        if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {
-            dropped += batch->count - i - 1;
-            break;
-        }
+        txcnt++;
     }

-    if (dev->type == DPDK_DEV_VHOST) {
-        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
-                                 newcnt);
-    } else {
-        dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
+    if (OVS_LIKELY(txcnt)) {
+        if (dev->type == DPDK_DEV_VHOST) {
+            __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
+                                     txcnt);
+        } else {
+            dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, txcnt);
+        }
     }

-out:
     if (OVS_UNLIKELY(dropped)) {
         rte_spinlock_lock(&dev->stats_lock);
         dev->stats.tx_dropped += dropped;


//////////////////////////////////////////////////////////////


On 8/29/17, 4:39 AM, "ovs-dev-bounces@openvswitch.org<mailto:ovs-dev-bounces@openvswitch.org> on behalf of Zhenyu Gao" <ovs-dev-bounces@openvswitch.org<mailto:ovs-dev-bounces@openvswitch.org> on behalf of sysugaozhenyu@gmail.com<mailto:sysugaozhenyu@gmail.com>> wrote:

    In dpdk_do_tx_copy function, all packets were copied to mbuf first,
    but QoS checking may drop some of them.
    Move the QoS checking in front of copying data to mbuf, it helps to
    reduce useless copy.

    Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com<mailto:sysugaozhenyu@gmail.com>>
    ---
     lib/netdev-dpdk.c | 55 ++++++++++++++++++++++++++++++++++++-------------------
     1 file changed, 36 insertions(+), 19 deletions(-)

    diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
    index 1aaf6f7..50874b8 100644
    --- a/lib/netdev-dpdk.c
    +++ b/lib/netdev-dpdk.c
    @@ -279,7 +279,7 @@ struct dpdk_qos_ops {
          * For all QoS implementations it should always be non-null.
          */
         int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,
    -                   int pkt_cnt);
    +                   int pkt_cnt, bool may_steal);
     };

     /* dpdk_qos_ops for each type of user space QoS implementation */
    @@ -1501,7 +1501,8 @@ netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,

     static int
     netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
    -                        struct rte_mbuf **pkts, int pkt_cnt)
    +                        struct rte_mbuf **pkts, int pkt_cnt,
    +                        bool may_steal)
     {
         int i = 0;
         int cnt = 0;
    @@ -1517,7 +1518,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
                 }
                 cnt++;
             } else {
    -            rte_pktmbuf_free(pkt);
    +            if (may_steal) {
    +                rte_pktmbuf_free(pkt);
    +            }
             }
         }

    @@ -1526,12 +1529,13 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,

     static int
     ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf **pkts,
    -                    int pkt_cnt)
    +                    int pkt_cnt, bool may_steal)
     {
         int cnt = 0;

         rte_spinlock_lock(&policer->policer_lock);
    -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
    +    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
    +                                  pkt_cnt, may_steal);
         rte_spinlock_unlock(&policer->policer_lock);

         return cnt;
    @@ -1635,7 +1639,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
             dropped = nb_rx;
             nb_rx = ingress_policer_run(policer,
                                         (struct rte_mbuf **) batch->packets,
    -                                    nb_rx);
    +                                    nb_rx, true);
             dropped -= nb_rx;
         }

    @@ -1672,7 +1676,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)
             dropped = nb_rx;
             nb_rx = ingress_policer_run(policer,
                                         (struct rte_mbuf **) batch->packets,
    -                                    nb_rx);
    +                                    nb_rx, true);
             dropped -= nb_rx;
         }

    @@ -1690,13 +1694,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)

     static inline int
     netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
    -                    int cnt)
    +                    int cnt, bool may_steal)
     {
         struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev->qos_conf);

         if (qos_conf) {
             rte_spinlock_lock(&qos_conf->lock);
    -        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
    +        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);
             rte_spinlock_unlock(&qos_conf->lock);
         }

    @@ -1770,7 +1774,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,

         cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
         /* Check has QoS has been configured for the netdev */
    -    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
    +    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
         dropped = total_pkts - cnt;

         do {
    @@ -1816,10 +1820,22 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
     #endif
         struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
         struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
    +    int txcnt_eth = batch->count;
         int dropped = 0;
         int newcnt = 0;
         int i;

    +    if (dev->type != DPDK_DEV_VHOST) {
    +        /* Check if QoS has been configured for this netdev. */
    +        txcnt_eth = netdev_dpdk_qos_run(dev,
    +                                        (struct rte_mbuf **)batch->packets,
    +                                        txcnt_eth, false);
    +        if (txcnt_eth == 0) {
    +            dropped = batch->count;
    +            goto out;
    +        }
    +    }
    +
         dp_packet_batch_apply_cutlen(batch);

         for (i = 0; i < batch->count; i++) {
    @@ -1848,21 +1864,20 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
             rte_pktmbuf_pkt_len(pkts[newcnt]) = size;

             newcnt++;
    +        if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {
    +            dropped += batch->count - i - 1;
    +            break;
    +        }
         }

         if (dev->type == DPDK_DEV_VHOST) {
             __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
                                      newcnt);
         } else {
    -        unsigned int qos_pkts = newcnt;
    -
    -        /* Check if QoS has been configured for this netdev. */
    -        newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt);
    -
    -        dropped += qos_pkts - newcnt;
             dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
         }

    +out:
         if (OVS_UNLIKELY(dropped)) {
             rte_spinlock_lock(&dev->stats_lock);
             dev->stats.tx_dropped += dropped;
    @@ -1915,7 +1930,7 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
             dp_packet_batch_apply_cutlen(batch);

             cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
    -        cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
    +        cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
             dropped = batch->count - cnt;

             dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt);
    @@ -3132,13 +3147,15 @@ egress_policer_qos_is_equal(const struct qos_conf *conf,
     }

     static int
    -egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int pkt_cnt)
    +egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int pkt_cnt,
    +                   bool may_steal)
     {
         int cnt = 0;
         struct egress_policer *policer =
             CONTAINER_OF(conf, struct egress_policer, qos_conf);

    -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt);
    +    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
    +                                  pkt_cnt, may_steal);

         return cnt;
     }
    --
    1.8.3.1
    _______________________________________________
    dev mailing list
    dev@openvswitch.org<mailto:dev@openvswitch.org>
    https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=EZzkW98aZhxGnYEEmB3u5e3vaFA90qSr3aIM1t-ggFY&s=fHRBBnkb29ujBAr27aGj1MCWkVzjfKqmJz3R9wEg99o&e=
Ben Pfaff Oct. 30, 2017, 9:37 p.m. UTC | #8
Hi Ian and Gao, it looks like this patch never got applied.  Gao, is it
still relevant and, if so, Ian, will you add it to your next batch of
patches?

Thanks,

Ben.

On Sun, Sep 03, 2017 at 10:46:24AM +0000, Stokes, Ian wrote:
> OK,
> 
> In my own testing I’m seeing the same behavior with a tap interface pinging to a bridge with a DPDK device. As such I think this should be ok.
> 
> Acked-by: ian.stokes@intel.com<mailto:ian.stokes@intel.com>
> 
> Ian
> 
> From: Gao Zhenyu [mailto:sysugaozhenyu@gmail.com]
> Sent: Thursday, August 31, 2017 2:56 AM
> To: Stokes, Ian <ian.stokes@intel.com>
> Cc: dev@openvswitch.org
> Subject: Re: [PATCH v2] netdev-dpdk: Execute QoS Checking before copying to mbuf
> 
> Here is the the testing I just did:
> 1. Add log in function netdev_dpdk_policer_pkt_handle
> static inline bool
> netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
>                                struct rte_mbuf *pkt, uint64_t time)
> {
>     uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct ether_hdr);
>     VLOG_WARN("GGGG, payload_len:%u, rte_pkt_len:%u",
>               pkt_len, rte_pktmbuf_pkt_len(pkt)); <------------------------print out pkt_len
> 
>     return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
>                                                 e_RTE_METER_GREEN;
> }
> 2. rebuild a rpm, install in machine, enable DPDK.
> 3. Create a container on the bridge(userspace bridge). (The container use veth device)
> 4. Config qos on eth dpdk eth by using : ovs-vsctl set port dpdk-uplink-eth3 qos=@newqos -- --id=@newqos create qos  type=egress-policer other-config:cir=1250000 other-config:cbs=2048
> 5. execute ping on the container and monitor ovs-vswitchd.log. Then I can see:
> 
> 2017-08-31T01:39:28.036Z|00133|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102
> 2017-08-31T01:39:29.036Z|00134|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102
> 2017-08-31T01:39:30.036Z|00135|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102
> 2017-08-31T01:39:31.036Z|00136|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102
> So the mbuf->pkt_len has a correct length.
> On the code side, we have function dp_packet_set_size(), if ovs compiled with dpdk, this function set the mbuf.data_len and mbuf.pkt_len. Many functions consume dp_packet_set_size() like netdev_linux_rxq_recv_sock in Netdev-linux.c
> 
> #ifdef DPDK_NETDEV
> ......
> 
> static inline void
> dp_packet_set_size(struct dp_packet *b, uint32_t v)
> {
> .....
>     b->mbuf.data_len = (uint16_t)v;  /* Current seg length. */
>     b->mbuf.pkt_len = v;             /* Total length of all segments linked to
>                                       * this segment. */
> }
> Please let me know if you have any concern on it.
> 
> Thanks
> Zhenyu Gao
> 
> 2017-08-30 23:18 GMT+08:00 Stokes, Ian <ian.stokes@intel.com<mailto:ian.stokes@intel.com>>:
> > In dpdk_do_tx_copy function, all packets were copied to mbuf first, but
> > QoS checking may drop some of them.
> > Move the QoS checking in front of copying data to mbuf, it helps to reduce
> > useless copy.
> 
> Hi Zhenyu, thanks for the patch, I agree with the objective of the patch but have some comments below I'd like to see addressed/tested before signing off.
> 
> >
> > Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com<mailto:sysugaozhenyu@gmail.com>>
> > ---
> >  lib/netdev-dpdk.c | 55 ++++++++++++++++++++++++++++++++++++--------------
> > -----
> >  1 file changed, 36 insertions(+), 19 deletions(-)
> >
> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 1aaf6f7..50874b8
> > 100644
> > --- a/lib/netdev-dpdk.c
> > +++ b/lib/netdev-dpdk.c
> > @@ -279,7 +279,7 @@ struct dpdk_qos_ops {
> >       * For all QoS implementations it should always be non-null.
> >       */
> >      int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,
> > -                   int pkt_cnt);
> > +                   int pkt_cnt, bool may_steal);
> >  };
> >
> >  /* dpdk_qos_ops for each type of user space QoS implementation */ @@ -
> > 1501,7 +1501,8 @@ netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm
> > *meter,
> >
> >  static int
> >  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> > -                        struct rte_mbuf **pkts, int pkt_cnt)
> > +                        struct rte_mbuf **pkts, int pkt_cnt,
> > +                        bool may_steal)
> >  {
> >      int i = 0;
> >      int cnt = 0;
> > @@ -1517,7 +1518,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> > *meter,
> >              }
> >              cnt++;
> >          } else {
> > -            rte_pktmbuf_free(pkt);
> > +            if (may_steal) {
> > +                rte_pktmbuf_free(pkt);
> > +            }
> >          }
> >      }
> >
> > @@ -1526,12 +1529,13 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> > *meter,
> >
> >  static int
> >  ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf
> > **pkts,
> > -                    int pkt_cnt)
> > +                    int pkt_cnt, bool may_steal)
> >  {
> >      int cnt = 0;
> >
> >      rte_spinlock_lock(&policer->policer_lock);
> > -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
> > +    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
> > +                                  pkt_cnt, may_steal);
> >      rte_spinlock_unlock(&policer->policer_lock);
> >
> >      return cnt;
> > @@ -1635,7 +1639,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
> >          dropped = nb_rx;
> >          nb_rx = ingress_policer_run(policer,
> >                                      (struct rte_mbuf **) batch->packets,
> > -                                    nb_rx);
> > +                                    nb_rx, true);
> >          dropped -= nb_rx;
> >      }
> >
> > @@ -1672,7 +1676,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct
> > dp_packet_batch *batch)
> >          dropped = nb_rx;
> >          nb_rx = ingress_policer_run(policer,
> >                                      (struct rte_mbuf **) batch->packets,
> > -                                    nb_rx);
> > +                                    nb_rx, true);
> >          dropped -= nb_rx;
> >      }
> >
> > @@ -1690,13 +1694,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq,
> > struct dp_packet_batch *batch)
> >
> >  static inline int
> >  netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
> > -                    int cnt)
> > +                    int cnt, bool may_steal)
> >  {
> >      struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev-
> > >qos_conf);
> >
> >      if (qos_conf) {
> >          rte_spinlock_lock(&qos_conf->lock);
> > -        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
> > +        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);
> >          rte_spinlock_unlock(&qos_conf->lock);
> >      }
> >
> > @@ -1770,7 +1774,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
> > qid,
> >
> >      cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
> >      /* Check has QoS has been configured for the netdev */
> > -    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
> > +    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
> >      dropped = total_pkts - cnt;
> >
> >      do {
> > @@ -1816,10 +1820,22 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> > struct dp_packet_batch *batch)  #endif
> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >      struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
> > +    int txcnt_eth = batch->count;
> >      int dropped = 0;
> >      int newcnt = 0;
> >      int i;
> >
> > +    if (dev->type != DPDK_DEV_VHOST) {
> > +        /* Check if QoS has been configured for this netdev. */
> > +        txcnt_eth = netdev_dpdk_qos_run(dev,
> > +                                        (struct rte_mbuf **)batch-
> > >packets,
> > +                                        txcnt_eth, false);
> So dpdk_do_tx_copy will be called as part of netdev_dpdk_eth_send__ if its triggered by the following conditions
> 
>     if (OVS_UNLIKELY(!may_steal ||
>                      batch->packets[0]->source != DPBUF_DPDK)
> 
> What will happen in above if the source of the packet is not DPBUF_DPDK?
> 
> For the most part it will be ok until 'netdev_dpdk_policer_pkt_handle()' is called later on.
> 
> This has specific DPDK calls that expect an mbuf source for the packet.
> 
> Previously QoS took place after the packet had been copied so this was not an issue but now we have DPDK rte functions being called on a non mbuf source packet. I'm not sure of the behavior that could occur in this case. Could an incorrect length be returned for the call rte_pktmbuf_pkt_len(pkt) in 'netdev_dpdk_policer_pkt_handle()'?
> 
> It could be the case then that non DPDK specific call maybe required to attain the length of the packet in 'netdev_dpdk_policer_pkt_handle()' before calling the meter itself.
> 
> Have you tested this use case?
> 
> Ian
> 
> > +        if (txcnt_eth == 0) {
> > +            dropped = batch->count;
> > +            goto out;
> > +        }
> > +    }
> > +
> >      dp_packet_batch_apply_cutlen(batch);
> >
> >      for (i = 0; i < batch->count; i++) { @@ -1848,21 +1864,20 @@
> > dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch
> > *batch)
> >          rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
> >
> >          newcnt++;
> > +        if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {
> > +            dropped += batch->count - i - 1;
> > +            break;
> > +        }
> >      }
> >
> >      if (dev->type == DPDK_DEV_VHOST) {
> >          __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
> >                                   newcnt);
> >      } else {
> > -        unsigned int qos_pkts = newcnt;
> > -
> > -        /* Check if QoS has been configured for this netdev. */
> > -        newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt);
> > -
> > -        dropped += qos_pkts - newcnt;
> >          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
> >      }
> >
> > +out:
> >      if (OVS_UNLIKELY(dropped)) {
> >          rte_spinlock_lock(&dev->stats_lock);
> >          dev->stats.tx_dropped += dropped; @@ -1915,7 +1930,7 @@
> > netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> >          dp_packet_batch_apply_cutlen(batch);
> >
> >          cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
> > -        cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
> > +        cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
> >          dropped = batch->count - cnt;
> >
> >          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt); @@ -
> > 3132,13 +3147,15 @@ egress_policer_qos_is_equal(const struct qos_conf
> > *conf,  }
> >
> >  static int
> > -egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> > pkt_cnt)
> > +egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> > pkt_cnt,
> > +                   bool may_steal)
> >  {
> >      int cnt = 0;
> >      struct egress_policer *policer =
> >          CONTAINER_OF(conf, struct egress_policer, qos_conf);
> >
> > -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt);
> > +    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> > +                                  pkt_cnt, may_steal);
> >
> >      return cnt;
> >  }
> > --
> > 1.8.3.1
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Darrell Ball Oct. 30, 2017, 9:53 p.m. UTC | #9
This patch was merged to dpdk_merge on Sept 6 and later merged to ovs.

https://mail.openvswitch.org/pipermail/ovs-dev/2017-September/338413.html


On 10/30/17, 2:38 PM, "ovs-dev-bounces@openvswitch.org on behalf of Ben Pfaff" <ovs-dev-bounces@openvswitch.org on behalf of blp@ovn.org> wrote:

    Hi Ian and Gao, it looks like this patch never got applied.  Gao, is it
    still relevant and, if so, Ian, will you add it to your next batch of
    patches?
    
    Thanks,
    
    Ben.
    
    On Sun, Sep 03, 2017 at 10:46:24AM +0000, Stokes, Ian wrote:
    > OK,

    > 

    > In my own testing I’m seeing the same behavior with a tap interface pinging to a bridge with a DPDK device. As such I think this should be ok.

    > 

    > Acked-by: ian.stokes@intel.com<mailto:ian.stokes@intel.com>

    > 

    > Ian

    > 

    > From: Gao Zhenyu [mailto:sysugaozhenyu@gmail.com]

    > Sent: Thursday, August 31, 2017 2:56 AM

    > To: Stokes, Ian <ian.stokes@intel.com>

    > Cc: dev@openvswitch.org

    > Subject: Re: [PATCH v2] netdev-dpdk: Execute QoS Checking before copying to mbuf

    > 

    > Here is the the testing I just did:

    > 1. Add log in function netdev_dpdk_policer_pkt_handle

    > static inline bool

    > netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,

    >                                struct rte_mbuf *pkt, uint64_t time)

    > {

    >     uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct ether_hdr);

    >     VLOG_WARN("GGGG, payload_len:%u, rte_pkt_len:%u",

    >               pkt_len, rte_pktmbuf_pkt_len(pkt)); <------------------------print out pkt_len

    > 

    >     return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==

    >                                                 e_RTE_METER_GREEN;

    > }

    > 2. rebuild a rpm, install in machine, enable DPDK.

    > 3. Create a container on the bridge(userspace bridge). (The container use veth device)

    > 4. Config qos on eth dpdk eth by using : ovs-vsctl set port dpdk-uplink-eth3 qos=@newqos -- --id=@newqos create qos  type=egress-policer other-config:cir=1250000 other-config:cbs=2048

    > 5. execute ping on the container and monitor ovs-vswitchd.log. Then I can see:

    > 

    > 2017-08-31T01:39:28.036Z|00133|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102

    > 2017-08-31T01:39:29.036Z|00134|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102

    > 2017-08-31T01:39:30.036Z|00135|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102

    > 2017-08-31T01:39:31.036Z|00136|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102

    > So the mbuf->pkt_len has a correct length.

    > On the code side, we have function dp_packet_set_size(), if ovs compiled with dpdk, this function set the mbuf.data_len and mbuf.pkt_len. Many functions consume dp_packet_set_size() like netdev_linux_rxq_recv_sock in Netdev-linux.c

    > 

    > #ifdef DPDK_NETDEV

    > ......

    > 

    > static inline void

    > dp_packet_set_size(struct dp_packet *b, uint32_t v)

    > {

    > .....

    >     b->mbuf.data_len = (uint16_t)v;  /* Current seg length. */

    >     b->mbuf.pkt_len = v;             /* Total length of all segments linked to

    >                                       * this segment. */

    > }

    > Please let me know if you have any concern on it.

    > 

    > Thanks

    > Zhenyu Gao

    > 

    > 2017-08-30 23:18 GMT+08:00 Stokes, Ian <ian.stokes@intel.com<mailto:ian.stokes@intel.com>>:

    > > In dpdk_do_tx_copy function, all packets were copied to mbuf first, but

    > > QoS checking may drop some of them.

    > > Move the QoS checking in front of copying data to mbuf, it helps to reduce

    > > useless copy.

    > 

    > Hi Zhenyu, thanks for the patch, I agree with the objective of the patch but have some comments below I'd like to see addressed/tested before signing off.

    > 

    > >

    > > Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com<mailto:sysugaozhenyu@gmail.com>>

    > > ---

    > >  lib/netdev-dpdk.c | 55 ++++++++++++++++++++++++++++++++++++--------------

    > > -----

    > >  1 file changed, 36 insertions(+), 19 deletions(-)

    > >

    > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 1aaf6f7..50874b8

    > > 100644

    > > --- a/lib/netdev-dpdk.c

    > > +++ b/lib/netdev-dpdk.c

    > > @@ -279,7 +279,7 @@ struct dpdk_qos_ops {

    > >       * For all QoS implementations it should always be non-null.

    > >       */

    > >      int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,

    > > -                   int pkt_cnt);

    > > +                   int pkt_cnt, bool may_steal);

    > >  };

    > >

    > >  /* dpdk_qos_ops for each type of user space QoS implementation */ @@ -

    > > 1501,7 +1501,8 @@ netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm

    > > *meter,

    > >

    > >  static int

    > >  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,

    > > -                        struct rte_mbuf **pkts, int pkt_cnt)

    > > +                        struct rte_mbuf **pkts, int pkt_cnt,

    > > +                        bool may_steal)

    > >  {

    > >      int i = 0;

    > >      int cnt = 0;

    > > @@ -1517,7 +1518,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm

    > > *meter,

    > >              }

    > >              cnt++;

    > >          } else {

    > > -            rte_pktmbuf_free(pkt);

    > > +            if (may_steal) {

    > > +                rte_pktmbuf_free(pkt);

    > > +            }

    > >          }

    > >      }

    > >

    > > @@ -1526,12 +1529,13 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm

    > > *meter,

    > >

    > >  static int

    > >  ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf

    > > **pkts,

    > > -                    int pkt_cnt)

    > > +                    int pkt_cnt, bool may_steal)

    > >  {

    > >      int cnt = 0;

    > >

    > >      rte_spinlock_lock(&policer->policer_lock);

    > > -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);

    > > +    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,

    > > +                                  pkt_cnt, may_steal);

    > >      rte_spinlock_unlock(&policer->policer_lock);

    > >

    > >      return cnt;

    > > @@ -1635,7 +1639,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,

    > >          dropped = nb_rx;

    > >          nb_rx = ingress_policer_run(policer,

    > >                                      (struct rte_mbuf **) batch->packets,

    > > -                                    nb_rx);

    > > +                                    nb_rx, true);

    > >          dropped -= nb_rx;

    > >      }

    > >

    > > @@ -1672,7 +1676,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct

    > > dp_packet_batch *batch)

    > >          dropped = nb_rx;

    > >          nb_rx = ingress_policer_run(policer,

    > >                                      (struct rte_mbuf **) batch->packets,

    > > -                                    nb_rx);

    > > +                                    nb_rx, true);

    > >          dropped -= nb_rx;

    > >      }

    > >

    > > @@ -1690,13 +1694,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq,

    > > struct dp_packet_batch *batch)

    > >

    > >  static inline int

    > >  netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,

    > > -                    int cnt)

    > > +                    int cnt, bool may_steal)

    > >  {

    > >      struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev-

    > > >qos_conf);

    > >

    > >      if (qos_conf) {

    > >          rte_spinlock_lock(&qos_conf->lock);

    > > -        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);

    > > +        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);

    > >          rte_spinlock_unlock(&qos_conf->lock);

    > >      }

    > >

    > > @@ -1770,7 +1774,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int

    > > qid,

    > >

    > >      cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);

    > >      /* Check has QoS has been configured for the netdev */

    > > -    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);

    > > +    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);

    > >      dropped = total_pkts - cnt;

    > >

    > >      do {

    > > @@ -1816,10 +1820,22 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,

    > > struct dp_packet_batch *batch)  #endif

    > >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

    > >      struct rte_mbuf *pkts[PKT_ARRAY_SIZE];

    > > +    int txcnt_eth = batch->count;

    > >      int dropped = 0;

    > >      int newcnt = 0;

    > >      int i;

    > >

    > > +    if (dev->type != DPDK_DEV_VHOST) {

    > > +        /* Check if QoS has been configured for this netdev. */

    > > +        txcnt_eth = netdev_dpdk_qos_run(dev,

    > > +                                        (struct rte_mbuf **)batch-

    > > >packets,

    > > +                                        txcnt_eth, false);

    > So dpdk_do_tx_copy will be called as part of netdev_dpdk_eth_send__ if its triggered by the following conditions

    > 

    >     if (OVS_UNLIKELY(!may_steal ||

    >                      batch->packets[0]->source != DPBUF_DPDK)

    > 

    > What will happen in above if the source of the packet is not DPBUF_DPDK?

    > 

    > For the most part it will be ok until 'netdev_dpdk_policer_pkt_handle()' is called later on.

    > 

    > This has specific DPDK calls that expect an mbuf source for the packet.

    > 

    > Previously QoS took place after the packet had been copied so this was not an issue but now we have DPDK rte functions being called on a non mbuf source packet. I'm not sure of the behavior that could occur in this case. Could an incorrect length be returned for the call rte_pktmbuf_pkt_len(pkt) in 'netdev_dpdk_policer_pkt_handle()'?

    > 

    > It could be the case then that non DPDK specific call maybe required to attain the length of the packet in 'netdev_dpdk_policer_pkt_handle()' before calling the meter itself.

    > 

    > Have you tested this use case?

    > 

    > Ian

    > 

    > > +        if (txcnt_eth == 0) {

    > > +            dropped = batch->count;

    > > +            goto out;

    > > +        }

    > > +    }

    > > +

    > >      dp_packet_batch_apply_cutlen(batch);

    > >

    > >      for (i = 0; i < batch->count; i++) { @@ -1848,21 +1864,20 @@

    > > dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch

    > > *batch)

    > >          rte_pktmbuf_pkt_len(pkts[newcnt]) = size;

    > >

    > >          newcnt++;

    > > +        if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {

    > > +            dropped += batch->count - i - 1;

    > > +            break;

    > > +        }

    > >      }

    > >

    > >      if (dev->type == DPDK_DEV_VHOST) {

    > >          __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,

    > >                                   newcnt);

    > >      } else {

    > > -        unsigned int qos_pkts = newcnt;

    > > -

    > > -        /* Check if QoS has been configured for this netdev. */

    > > -        newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt);

    > > -

    > > -        dropped += qos_pkts - newcnt;

    > >          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);

    > >      }

    > >

    > > +out:

    > >      if (OVS_UNLIKELY(dropped)) {

    > >          rte_spinlock_lock(&dev->stats_lock);

    > >          dev->stats.tx_dropped += dropped; @@ -1915,7 +1930,7 @@

    > > netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,

    > >          dp_packet_batch_apply_cutlen(batch);

    > >

    > >          cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);

    > > -        cnt = netdev_dpdk_qos_run(dev, pkts, cnt);

    > > +        cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);

    > >          dropped = batch->count - cnt;

    > >

    > >          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt); @@ -

    > > 3132,13 +3147,15 @@ egress_policer_qos_is_equal(const struct qos_conf

    > > *conf,  }

    > >

    > >  static int

    > > -egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int

    > > pkt_cnt)

    > > +egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int

    > > pkt_cnt,

    > > +                   bool may_steal)

    > >  {

    > >      int cnt = 0;

    > >      struct egress_policer *policer =

    > >          CONTAINER_OF(conf, struct egress_policer, qos_conf);

    > >

    > > -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt);

    > > +    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,

    > > +                                  pkt_cnt, may_steal);

    > >

    > >      return cnt;

    > >  }

    > > --

    > > 1.8.3.1

    > 

    > _______________________________________________

    > dev mailing list

    > dev@openvswitch.org

    > https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwIGaQ&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=dcQaJCTTWP1qu7IpAUJgDFmLk-ybQ66X_Jc0QNWWRSY&s=UjOv2s3fuqSLjg89oMkiwGeE3Nu-7FX0hG-aR3aJ-3s&e=

    _______________________________________________
    dev mailing list
    dev@openvswitch.org
    https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwIGaQ&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=dcQaJCTTWP1qu7IpAUJgDFmLk-ybQ66X_Jc0QNWWRSY&s=UjOv2s3fuqSLjg89oMkiwGeE3Nu-7FX0hG-aR3aJ-3s&e=
Ben Pfaff Oct. 30, 2017, 9:55 p.m. UTC | #10
OK, great, thanks.

On Mon, Oct 30, 2017 at 09:53:01PM +0000, Darrell Ball wrote:
> This patch was merged to dpdk_merge on Sept 6 and later merged to ovs.
> 
> https://mail.openvswitch.org/pipermail/ovs-dev/2017-September/338413.html
> 
> 
> On 10/30/17, 2:38 PM, "ovs-dev-bounces@openvswitch.org on behalf of Ben Pfaff" <ovs-dev-bounces@openvswitch.org on behalf of blp@ovn.org> wrote:
> 
>     Hi Ian and Gao, it looks like this patch never got applied.  Gao, is it
>     still relevant and, if so, Ian, will you add it to your next batch of
>     patches?
>     
>     Thanks,
>     
>     Ben.
>     
>     On Sun, Sep 03, 2017 at 10:46:24AM +0000, Stokes, Ian wrote:
>     > OK,
>     > 
>     > In my own testing I’m seeing the same behavior with a tap interface pinging to a bridge with a DPDK device. As such I think this should be ok.
>     > 
>     > Acked-by: ian.stokes@intel.com<mailto:ian.stokes@intel.com>
>     > 
>     > Ian
>     > 
>     > From: Gao Zhenyu [mailto:sysugaozhenyu@gmail.com]
>     > Sent: Thursday, August 31, 2017 2:56 AM
>     > To: Stokes, Ian <ian.stokes@intel.com>
>     > Cc: dev@openvswitch.org
>     > Subject: Re: [PATCH v2] netdev-dpdk: Execute QoS Checking before copying to mbuf
>     > 
>     > Here is the the testing I just did:
>     > 1. Add log in function netdev_dpdk_policer_pkt_handle
>     > static inline bool
>     > netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
>     >                                struct rte_mbuf *pkt, uint64_t time)
>     > {
>     >     uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct ether_hdr);
>     >     VLOG_WARN("GGGG, payload_len:%u, rte_pkt_len:%u",
>     >               pkt_len, rte_pktmbuf_pkt_len(pkt)); <------------------------print out pkt_len
>     > 
>     >     return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
>     >                                                 e_RTE_METER_GREEN;
>     > }
>     > 2. rebuild a rpm, install in machine, enable DPDK.
>     > 3. Create a container on the bridge(userspace bridge). (The container use veth device)
>     > 4. Config qos on eth dpdk eth by using : ovs-vsctl set port dpdk-uplink-eth3 qos=@newqos -- --id=@newqos create qos  type=egress-policer other-config:cir=1250000 other-config:cbs=2048
>     > 5. execute ping on the container and monitor ovs-vswitchd.log. Then I can see:
>     > 
>     > 2017-08-31T01:39:28.036Z|00133|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102
>     > 2017-08-31T01:39:29.036Z|00134|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102
>     > 2017-08-31T01:39:30.036Z|00135|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102
>     > 2017-08-31T01:39:31.036Z|00136|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102
>     > So the mbuf->pkt_len has a correct length.
>     > On the code side, we have function dp_packet_set_size(), if ovs compiled with dpdk, this function set the mbuf.data_len and mbuf.pkt_len. Many functions consume dp_packet_set_size() like netdev_linux_rxq_recv_sock in Netdev-linux.c
>     > 
>     > #ifdef DPDK_NETDEV
>     > ......
>     > 
>     > static inline void
>     > dp_packet_set_size(struct dp_packet *b, uint32_t v)
>     > {
>     > .....
>     >     b->mbuf.data_len = (uint16_t)v;  /* Current seg length. */
>     >     b->mbuf.pkt_len = v;             /* Total length of all segments linked to
>     >                                       * this segment. */
>     > }
>     > Please let me know if you have any concern on it.
>     > 
>     > Thanks
>     > Zhenyu Gao
>     > 
>     > 2017-08-30 23:18 GMT+08:00 Stokes, Ian <ian.stokes@intel.com<mailto:ian.stokes@intel.com>>:
>     > > In dpdk_do_tx_copy function, all packets were copied to mbuf first, but
>     > > QoS checking may drop some of them.
>     > > Move the QoS checking in front of copying data to mbuf, it helps to reduce
>     > > useless copy.
>     > 
>     > Hi Zhenyu, thanks for the patch, I agree with the objective of the patch but have some comments below I'd like to see addressed/tested before signing off.
>     > 
>     > >
>     > > Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com<mailto:sysugaozhenyu@gmail.com>>
>     > > ---
>     > >  lib/netdev-dpdk.c | 55 ++++++++++++++++++++++++++++++++++++--------------
>     > > -----
>     > >  1 file changed, 36 insertions(+), 19 deletions(-)
>     > >
>     > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 1aaf6f7..50874b8
>     > > 100644
>     > > --- a/lib/netdev-dpdk.c
>     > > +++ b/lib/netdev-dpdk.c
>     > > @@ -279,7 +279,7 @@ struct dpdk_qos_ops {
>     > >       * For all QoS implementations it should always be non-null.
>     > >       */
>     > >      int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,
>     > > -                   int pkt_cnt);
>     > > +                   int pkt_cnt, bool may_steal);
>     > >  };
>     > >
>     > >  /* dpdk_qos_ops for each type of user space QoS implementation */ @@ -
>     > > 1501,7 +1501,8 @@ netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm
>     > > *meter,
>     > >
>     > >  static int
>     > >  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
>     > > -                        struct rte_mbuf **pkts, int pkt_cnt)
>     > > +                        struct rte_mbuf **pkts, int pkt_cnt,
>     > > +                        bool may_steal)
>     > >  {
>     > >      int i = 0;
>     > >      int cnt = 0;
>     > > @@ -1517,7 +1518,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
>     > > *meter,
>     > >              }
>     > >              cnt++;
>     > >          } else {
>     > > -            rte_pktmbuf_free(pkt);
>     > > +            if (may_steal) {
>     > > +                rte_pktmbuf_free(pkt);
>     > > +            }
>     > >          }
>     > >      }
>     > >
>     > > @@ -1526,12 +1529,13 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
>     > > *meter,
>     > >
>     > >  static int
>     > >  ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf
>     > > **pkts,
>     > > -                    int pkt_cnt)
>     > > +                    int pkt_cnt, bool may_steal)
>     > >  {
>     > >      int cnt = 0;
>     > >
>     > >      rte_spinlock_lock(&policer->policer_lock);
>     > > -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
>     > > +    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
>     > > +                                  pkt_cnt, may_steal);
>     > >      rte_spinlock_unlock(&policer->policer_lock);
>     > >
>     > >      return cnt;
>     > > @@ -1635,7 +1639,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
>     > >          dropped = nb_rx;
>     > >          nb_rx = ingress_policer_run(policer,
>     > >                                      (struct rte_mbuf **) batch->packets,
>     > > -                                    nb_rx);
>     > > +                                    nb_rx, true);
>     > >          dropped -= nb_rx;
>     > >      }
>     > >
>     > > @@ -1672,7 +1676,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct
>     > > dp_packet_batch *batch)
>     > >          dropped = nb_rx;
>     > >          nb_rx = ingress_policer_run(policer,
>     > >                                      (struct rte_mbuf **) batch->packets,
>     > > -                                    nb_rx);
>     > > +                                    nb_rx, true);
>     > >          dropped -= nb_rx;
>     > >      }
>     > >
>     > > @@ -1690,13 +1694,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq,
>     > > struct dp_packet_batch *batch)
>     > >
>     > >  static inline int
>     > >  netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
>     > > -                    int cnt)
>     > > +                    int cnt, bool may_steal)
>     > >  {
>     > >      struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev-
>     > > >qos_conf);
>     > >
>     > >      if (qos_conf) {
>     > >          rte_spinlock_lock(&qos_conf->lock);
>     > > -        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
>     > > +        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);
>     > >          rte_spinlock_unlock(&qos_conf->lock);
>     > >      }
>     > >
>     > > @@ -1770,7 +1774,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
>     > > qid,
>     > >
>     > >      cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
>     > >      /* Check has QoS has been configured for the netdev */
>     > > -    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
>     > > +    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
>     > >      dropped = total_pkts - cnt;
>     > >
>     > >      do {
>     > > @@ -1816,10 +1820,22 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
>     > > struct dp_packet_batch *batch)  #endif
>     > >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>     > >      struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
>     > > +    int txcnt_eth = batch->count;
>     > >      int dropped = 0;
>     > >      int newcnt = 0;
>     > >      int i;
>     > >
>     > > +    if (dev->type != DPDK_DEV_VHOST) {
>     > > +        /* Check if QoS has been configured for this netdev. */
>     > > +        txcnt_eth = netdev_dpdk_qos_run(dev,
>     > > +                                        (struct rte_mbuf **)batch-
>     > > >packets,
>     > > +                                        txcnt_eth, false);
>     > So dpdk_do_tx_copy will be called as part of netdev_dpdk_eth_send__ if its triggered by the following conditions
>     > 
>     >     if (OVS_UNLIKELY(!may_steal ||
>     >                      batch->packets[0]->source != DPBUF_DPDK)
>     > 
>     > What will happen in above if the source of the packet is not DPBUF_DPDK?
>     > 
>     > For the most part it will be ok until 'netdev_dpdk_policer_pkt_handle()' is called later on.
>     > 
>     > This has specific DPDK calls that expect an mbuf source for the packet.
>     > 
>     > Previously QoS took place after the packet had been copied so this was not an issue but now we have DPDK rte functions being called on a non mbuf source packet. I'm not sure of the behavior that could occur in this case. Could an incorrect length be returned for the call rte_pktmbuf_pkt_len(pkt) in 'netdev_dpdk_policer_pkt_handle()'?
>     > 
>     > It could be the case then that non DPDK specific call maybe required to attain the length of the packet in 'netdev_dpdk_policer_pkt_handle()' before calling the meter itself.
>     > 
>     > Have you tested this use case?
>     > 
>     > Ian
>     > 
>     > > +        if (txcnt_eth == 0) {
>     > > +            dropped = batch->count;
>     > > +            goto out;
>     > > +        }
>     > > +    }
>     > > +
>     > >      dp_packet_batch_apply_cutlen(batch);
>     > >
>     > >      for (i = 0; i < batch->count; i++) { @@ -1848,21 +1864,20 @@
>     > > dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch
>     > > *batch)
>     > >          rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
>     > >
>     > >          newcnt++;
>     > > +        if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {
>     > > +            dropped += batch->count - i - 1;
>     > > +            break;
>     > > +        }
>     > >      }
>     > >
>     > >      if (dev->type == DPDK_DEV_VHOST) {
>     > >          __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
>     > >                                   newcnt);
>     > >      } else {
>     > > -        unsigned int qos_pkts = newcnt;
>     > > -
>     > > -        /* Check if QoS has been configured for this netdev. */
>     > > -        newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt);
>     > > -
>     > > -        dropped += qos_pkts - newcnt;
>     > >          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
>     > >      }
>     > >
>     > > +out:
>     > >      if (OVS_UNLIKELY(dropped)) {
>     > >          rte_spinlock_lock(&dev->stats_lock);
>     > >          dev->stats.tx_dropped += dropped; @@ -1915,7 +1930,7 @@
>     > > netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>     > >          dp_packet_batch_apply_cutlen(batch);
>     > >
>     > >          cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
>     > > -        cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
>     > > +        cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
>     > >          dropped = batch->count - cnt;
>     > >
>     > >          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt); @@ -
>     > > 3132,13 +3147,15 @@ egress_policer_qos_is_equal(const struct qos_conf
>     > > *conf,  }
>     > >
>     > >  static int
>     > > -egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
>     > > pkt_cnt)
>     > > +egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
>     > > pkt_cnt,
>     > > +                   bool may_steal)
>     > >  {
>     > >      int cnt = 0;
>     > >      struct egress_policer *policer =
>     > >          CONTAINER_OF(conf, struct egress_policer, qos_conf);
>     > >
>     > > -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt);
>     > > +    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
>     > > +                                  pkt_cnt, may_steal);
>     > >
>     > >      return cnt;
>     > >  }
>     > > --
>     > > 1.8.3.1
>     > 
>     > _______________________________________________
>     > dev mailing list
>     > dev@openvswitch.org
>     > https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwIGaQ&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=dcQaJCTTWP1qu7IpAUJgDFmLk-ybQ66X_Jc0QNWWRSY&s=UjOv2s3fuqSLjg89oMkiwGeE3Nu-7FX0hG-aR3aJ-3s&e=
>     _______________________________________________
>     dev mailing list
>     dev@openvswitch.org
>     https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwIGaQ&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=dcQaJCTTWP1qu7IpAUJgDFmLk-ybQ66X_Jc0QNWWRSY&s=UjOv2s3fuqSLjg89oMkiwGeE3Nu-7FX0hG-aR3aJ-3s&e=
>     
>
diff mbox series

Patch

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 1aaf6f7..50874b8 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -279,7 +279,7 @@  struct dpdk_qos_ops {
      * For all QoS implementations it should always be non-null.
      */
     int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,
-                   int pkt_cnt);
+                   int pkt_cnt, bool may_steal);
 };
 
 /* dpdk_qos_ops for each type of user space QoS implementation */
@@ -1501,7 +1501,8 @@  netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
 
 static int
 netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
-                        struct rte_mbuf **pkts, int pkt_cnt)
+                        struct rte_mbuf **pkts, int pkt_cnt,
+                        bool may_steal)
 {
     int i = 0;
     int cnt = 0;
@@ -1517,7 +1518,9 @@  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
             }
             cnt++;
         } else {
-            rte_pktmbuf_free(pkt);
+            if (may_steal) {
+                rte_pktmbuf_free(pkt);
+            }
         }
     }
 
@@ -1526,12 +1529,13 @@  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
 
 static int
 ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf **pkts,
-                    int pkt_cnt)
+                    int pkt_cnt, bool may_steal)
 {
     int cnt = 0;
 
     rte_spinlock_lock(&policer->policer_lock);
-    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
+    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
+                                  pkt_cnt, may_steal);
     rte_spinlock_unlock(&policer->policer_lock);
 
     return cnt;
@@ -1635,7 +1639,7 @@  netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
         dropped = nb_rx;
         nb_rx = ingress_policer_run(policer,
                                     (struct rte_mbuf **) batch->packets,
-                                    nb_rx);
+                                    nb_rx, true);
         dropped -= nb_rx;
     }
 
@@ -1672,7 +1676,7 @@  netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)
         dropped = nb_rx;
         nb_rx = ingress_policer_run(policer,
                                     (struct rte_mbuf **) batch->packets,
-                                    nb_rx);
+                                    nb_rx, true);
         dropped -= nb_rx;
     }
 
@@ -1690,13 +1694,13 @@  netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)
 
 static inline int
 netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
-                    int cnt)
+                    int cnt, bool may_steal)
 {
     struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev->qos_conf);
 
     if (qos_conf) {
         rte_spinlock_lock(&qos_conf->lock);
-        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
+        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);
         rte_spinlock_unlock(&qos_conf->lock);
     }
 
@@ -1770,7 +1774,7 @@  __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
 
     cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
     /* Check has QoS has been configured for the netdev */
-    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
+    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
     dropped = total_pkts - cnt;
 
     do {
@@ -1816,10 +1820,22 @@  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
 #endif
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
+    int txcnt_eth = batch->count;
     int dropped = 0;
     int newcnt = 0;
     int i;
 
+    if (dev->type != DPDK_DEV_VHOST) {
+        /* Check if QoS has been configured for this netdev. */
+        txcnt_eth = netdev_dpdk_qos_run(dev,
+                                        (struct rte_mbuf **)batch->packets,
+                                        txcnt_eth, false);
+        if (txcnt_eth == 0) {
+            dropped = batch->count;
+            goto out;
+        }
+    }
+
     dp_packet_batch_apply_cutlen(batch);
 
     for (i = 0; i < batch->count; i++) {
@@ -1848,21 +1864,20 @@  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
         rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
 
         newcnt++;
+        if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {
+            dropped += batch->count - i - 1;
+            break;
+        }
     }
 
     if (dev->type == DPDK_DEV_VHOST) {
         __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
                                  newcnt);
     } else {
-        unsigned int qos_pkts = newcnt;
-
-        /* Check if QoS has been configured for this netdev. */
-        newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt);
-
-        dropped += qos_pkts - newcnt;
         dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
     }
 
+out:
     if (OVS_UNLIKELY(dropped)) {
         rte_spinlock_lock(&dev->stats_lock);
         dev->stats.tx_dropped += dropped;
@@ -1915,7 +1930,7 @@  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
         dp_packet_batch_apply_cutlen(batch);
 
         cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
-        cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
+        cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
         dropped = batch->count - cnt;
 
         dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt);
@@ -3132,13 +3147,15 @@  egress_policer_qos_is_equal(const struct qos_conf *conf,
 }
 
 static int
-egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int pkt_cnt)
+egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int pkt_cnt,
+                   bool may_steal)
 {
     int cnt = 0;
     struct egress_policer *policer =
         CONTAINER_OF(conf, struct egress_policer, qos_conf);
 
-    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt);
+    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
+                                  pkt_cnt, may_steal);
 
     return cnt;
 }