Message ID | 1516969207-17326-2-git-send-email-jan.scheurich@ericsson.com |
---|---|
State | Changes Requested |
Delegated to: | Ian Stokes |
Headers | show |
Series | dpif-netdev: Detailed PMD performance metrics and supervision | expand |
LGTM but one thing I don't understand down below... > -----Original Message----- > From: Jan Scheurich [mailto:jan.scheurich@ericsson.com] > Sent: Friday, January 26, 2018 12:20 PM > To: dev@openvswitch.org > Cc: ktraynor@redhat.com; Stokes, Ian <ian.stokes@intel.com>; > i.maximets@samsung.com; O Mahony, Billy <billy.o.mahony@intel.com>; > Jan Scheurich <jan.scheurich@ericsson.com> > Subject: [PATCH v8 1/3] netdev: Add optional qfill output parameter to > rxq_recv() > > If the caller provides a non-NULL qfill pointer and the netdev > implemementation supports reading the rx queue fill level, the rxq_recv() > function returns the remaining number of packets in the rx queue after > reception of the packet burst to the caller. If the implementation does not > support this, it returns -ENOTSUP instead. Reading the remaining queue fill > level should not substantilly slow down the recv() operation. > > A first implementation is provided for ethernet and vhostuser DPDK ports in > netdev-dpdk.c. > > This output parameter will be used in the upcoming commit for PMD > performance metrics to supervise the rx queue fill level for DPDK vhostuser > ports. > > Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com> > --- > lib/dpif-netdev.c | 2 +- > lib/netdev-bsd.c | 8 +++++++- > lib/netdev-dpdk.c | 25 +++++++++++++++++++++++-- > lib/netdev-dummy.c | 8 +++++++- > lib/netdev-linux.c | 7 ++++++- > lib/netdev-provider.h | 7 ++++++- > lib/netdev.c | 5 +++-- > lib/netdev.h | 3 ++- > 8 files changed, 55 insertions(+), 10 deletions(-) > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index ba62128..4a0fcbd > 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -3276,7 +3276,7 @@ dp_netdev_process_rxq_port(struct > dp_netdev_pmd_thread *pmd, > pmd->ctx.last_rxq = rxq; > dp_packet_batch_init(&batch); > > - error = netdev_rxq_recv(rxq->rx, &batch); > + error = netdev_rxq_recv(rxq->rx, &batch, NULL); > if (!error) { > /* At least one packet received. */ > *recirc_depth_get() = 0; > diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c index 05974c1..b70f327 > 100644 > --- a/lib/netdev-bsd.c > +++ b/lib/netdev-bsd.c > @@ -618,7 +618,8 @@ netdev_rxq_bsd_recv_tap(struct netdev_rxq_bsd > *rxq, struct dp_packet *buffer) } > > static int > -netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch > *batch) > +netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch > *batch, > + int *qfill) > { > struct netdev_rxq_bsd *rxq = netdev_rxq_bsd_cast(rxq_); > struct netdev *netdev = rxq->up.netdev; @@ -643,6 +644,11 @@ > netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch > *batch) > batch->packets[0] = packet; > batch->count = 1; > } > + > + if (qfill) { > + *qfill = -ENOTSUP; > + } > + > return retval; > } > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index ac2e38e..bb7dece > 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -1757,7 +1757,7 @@ netdev_dpdk_vhost_update_rx_counters(struct > netdev_stats *stats, > */ > static int > netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq, > - struct dp_packet_batch *batch) > + struct dp_packet_batch *batch, int *qfill) > { > struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev); > struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev); > @@ -1795,11 +1795,24 @@ netdev_dpdk_vhost_rxq_recv(struct > netdev_rxq *rxq, > batch->count = nb_rx; > dp_packet_batch_init_packet_fields(batch); > > + if (qfill) { > + if (nb_rx == NETDEV_MAX_BURST) { > + /* The DPDK API returns a uint32_t which often has invalid bits in > + * the upper 16-bits. Need to restrict the value to uint16_t. */ > + *qfill += rte_vhost_rx_queue_count(netdev_dpdk_get_vid(dev), > + qid * VIRTIO_QNUM + VIRTIO_TXQ) > + & UINT16_MAX; [[BO'M]] Why the += operator? > + } else { > + *qfill = 0; > + } > + } > + > return 0; > } > > static int > -netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch > *batch) > +netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch > *batch, > + int *qfill) > { > struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq); > struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev); @@ -1836,6 > +1849,14 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct > dp_packet_batch *batch) > batch->count = nb_rx; > dp_packet_batch_init_packet_fields(batch); > > + if (qfill) { > + if (nb_rx == NETDEV_MAX_BURST) { > + *qfill += rte_eth_rx_queue_count(rx->port_id, rxq->queue_id); [[BO'M]] Why the += operator? > + } else { > + *qfill = 0; > + } > + } > + > return 0; > } > > diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c index > 4246af3..229cf96 100644 > --- a/lib/netdev-dummy.c > +++ b/lib/netdev-dummy.c > @@ -992,7 +992,8 @@ netdev_dummy_rxq_dealloc(struct netdev_rxq > *rxq_) } > > static int > -netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct > dp_packet_batch *batch) > +netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct > dp_packet_batch *batch, > + int *qfill) > { > struct netdev_rxq_dummy *rx = netdev_rxq_dummy_cast(rxq_); > struct netdev_dummy *netdev = netdev_dummy_cast(rx->up.netdev); > @@ -1037,6 +1038,11 @@ netdev_dummy_rxq_recv(struct netdev_rxq > *rxq_, struct dp_packet_batch *batch) > > batch->packets[0] = packet; > batch->count = 1; > + > + if (qfill) { > + *qfill = -ENOTSUP; > + } > + > return 0; > } > > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 6dae796..5f895a0 > 100644 > --- a/lib/netdev-linux.c > +++ b/lib/netdev-linux.c > @@ -1129,7 +1129,8 @@ netdev_linux_rxq_recv_tap(int fd, struct > dp_packet *buffer) } > > static int > -netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch > *batch) > +netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch > *batch, > + int *qfill) > { > struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_); > struct netdev *netdev = rx->up.netdev; @@ -1158,6 +1159,10 @@ > netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch > *batch) > dp_packet_batch_init_packet(batch, buffer); > } > > + if (qfill) { > + *qfill = -ENOTSUP; > + } > + > return retval; > } > > diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index > 25bd671..37add95 100644 > --- a/lib/netdev-provider.h > +++ b/lib/netdev-provider.h > @@ -786,12 +786,17 @@ struct netdev_class { > * pointers and metadata itself, if desired, e.g. with pkt_metadata_init() > * and miniflow_extract(). > * > + * If the caller provides a non-NULL qfill pointer, the implementation > + * returns the remaining rx queue fill level (zero or more) after the > + * reception of packets, if it supports that, or -ENOTSUP otherwise. > + * > * Implementations should allocate buffers with DP_NETDEV_HEADROOM > bytes of > * headroom. > * > * Returns EAGAIN immediately if no packet is ready to be received or > * another positive errno value if an error was encountered. */ > - int (*rxq_recv)(struct netdev_rxq *rx, struct dp_packet_batch *batch); > + int (*rxq_recv)(struct netdev_rxq *rx, struct dp_packet_batch *batch, > + int *qfill); > > /* Registers with the poll loop to wake up from the next call to > * poll_block() when a packet is ready to be received with diff --git > a/lib/netdev.c b/lib/netdev.c index be05dc6..71035b2 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -694,11 +694,12 @@ netdev_rxq_close(struct netdev_rxq *rx) > * Returns EAGAIN immediately if no packet is ready to be received or > another > * positive errno value if an error was encountered. */ int - > netdev_rxq_recv(struct netdev_rxq *rx, struct dp_packet_batch *batch) > +netdev_rxq_recv(struct netdev_rxq *rx, struct dp_packet_batch *batch, > + int *qfill) > { > int retval; > > - retval = rx->netdev->netdev_class->rxq_recv(rx, batch); > + retval = rx->netdev->netdev_class->rxq_recv(rx, batch, qfill); > if (!retval) { > COVERAGE_INC(netdev_received); > } else { > diff --git a/lib/netdev.h b/lib/netdev.h index ff1b604..3f51634 100644 > --- a/lib/netdev.h > +++ b/lib/netdev.h > @@ -175,7 +175,8 @@ void netdev_rxq_close(struct netdev_rxq *); const > char *netdev_rxq_get_name(const struct netdev_rxq *); int > netdev_rxq_get_queue_id(const struct netdev_rxq *); > > -int netdev_rxq_recv(struct netdev_rxq *rx, struct dp_packet_batch *); > +int netdev_rxq_recv(struct netdev_rxq *rx, struct dp_packet_batch *, > + int *qfill); > void netdev_rxq_wait(struct netdev_rxq *); int netdev_rxq_drain(struct > netdev_rxq *); > > -- > 1.9.1
> > @@ -1795,11 +1795,24 @@ netdev_dpdk_vhost_rxq_recv(struct > > netdev_rxq *rxq, > > batch->count = nb_rx; > > dp_packet_batch_init_packet_fields(batch); > > > > + if (qfill) { > > + if (nb_rx == NETDEV_MAX_BURST) { > > + /* The DPDK API returns a uint32_t which often has invalid bits in > > + * the upper 16-bits. Need to restrict the value to uint16_t. */ > > + *qfill += rte_vhost_rx_queue_count(netdev_dpdk_get_vid(dev), > > + qid * VIRTIO_QNUM + VIRTIO_TXQ) > > + & UINT16_MAX; > [[BO'M]] Why the += operator? Oops: This a leftover from an intermediate version where I returned the total qlen including the received packets. Forgot this when I changed the semantics of the output parameter. > > + } else { > > + *qfill = 0; > > + } > > + } > > + > > return 0; > > } > > > > static int > > -netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch > > *batch) > > +netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch > > *batch, > > + int *qfill) > > { > > struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq); > > struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev); @@ -1836,6 > > +1849,14 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct > > dp_packet_batch *batch) > > batch->count = nb_rx; > > dp_packet_batch_init_packet_fields(batch); > > > > + if (qfill) { > > + if (nb_rx == NETDEV_MAX_BURST) { > > + *qfill += rte_eth_rx_queue_count(rx->port_id, rxq->queue_id); > [[BO'M]] Why the += operator? Same as above. BR, Jan
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index ba62128..4a0fcbd 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -3276,7 +3276,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd, pmd->ctx.last_rxq = rxq; dp_packet_batch_init(&batch); - error = netdev_rxq_recv(rxq->rx, &batch); + error = netdev_rxq_recv(rxq->rx, &batch, NULL); if (!error) { /* At least one packet received. */ *recirc_depth_get() = 0; diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c index 05974c1..b70f327 100644 --- a/lib/netdev-bsd.c +++ b/lib/netdev-bsd.c @@ -618,7 +618,8 @@ netdev_rxq_bsd_recv_tap(struct netdev_rxq_bsd *rxq, struct dp_packet *buffer) } static int -netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch) +netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch, + int *qfill) { struct netdev_rxq_bsd *rxq = netdev_rxq_bsd_cast(rxq_); struct netdev *netdev = rxq->up.netdev; @@ -643,6 +644,11 @@ netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch) batch->packets[0] = packet; batch->count = 1; } + + if (qfill) { + *qfill = -ENOTSUP; + } + return retval; } diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index ac2e38e..bb7dece 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -1757,7 +1757,7 @@ netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats, */ static int netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq, - struct dp_packet_batch *batch) + struct dp_packet_batch *batch, int *qfill) { struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev); struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev); @@ -1795,11 +1795,24 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq, batch->count = nb_rx; dp_packet_batch_init_packet_fields(batch); + if (qfill) { + if (nb_rx == NETDEV_MAX_BURST) { + /* The DPDK API returns a uint32_t which often has invalid bits in + * the upper 16-bits. Need to restrict the value to uint16_t. */ + *qfill += rte_vhost_rx_queue_count(netdev_dpdk_get_vid(dev), + qid * VIRTIO_QNUM + VIRTIO_TXQ) + & UINT16_MAX; + } else { + *qfill = 0; + } + } + return 0; } static int -netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch) +netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch, + int *qfill) { struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq); struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev); @@ -1836,6 +1849,14 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch) batch->count = nb_rx; dp_packet_batch_init_packet_fields(batch); + if (qfill) { + if (nb_rx == NETDEV_MAX_BURST) { + *qfill += rte_eth_rx_queue_count(rx->port_id, rxq->queue_id); + } else { + *qfill = 0; + } + } + return 0; } diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c index 4246af3..229cf96 100644 --- a/lib/netdev-dummy.c +++ b/lib/netdev-dummy.c @@ -992,7 +992,8 @@ netdev_dummy_rxq_dealloc(struct netdev_rxq *rxq_) } static int -netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch) +netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch, + int *qfill) { struct netdev_rxq_dummy *rx = netdev_rxq_dummy_cast(rxq_); struct netdev_dummy *netdev = netdev_dummy_cast(rx->up.netdev); @@ -1037,6 +1038,11 @@ netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch) batch->packets[0] = packet; batch->count = 1; + + if (qfill) { + *qfill = -ENOTSUP; + } + return 0; } diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 6dae796..5f895a0 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -1129,7 +1129,8 @@ netdev_linux_rxq_recv_tap(int fd, struct dp_packet *buffer) } static int -netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch) +netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch, + int *qfill) { struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_); struct netdev *netdev = rx->up.netdev; @@ -1158,6 +1159,10 @@ netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch) dp_packet_batch_init_packet(batch, buffer); } + if (qfill) { + *qfill = -ENOTSUP; + } + return retval; } diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 25bd671..37add95 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -786,12 +786,17 @@ struct netdev_class { * pointers and metadata itself, if desired, e.g. with pkt_metadata_init() * and miniflow_extract(). * + * If the caller provides a non-NULL qfill pointer, the implementation + * returns the remaining rx queue fill level (zero or more) after the + * reception of packets, if it supports that, or -ENOTSUP otherwise. + * * Implementations should allocate buffers with DP_NETDEV_HEADROOM bytes of * headroom. * * Returns EAGAIN immediately if no packet is ready to be received or * another positive errno value if an error was encountered. */ - int (*rxq_recv)(struct netdev_rxq *rx, struct dp_packet_batch *batch); + int (*rxq_recv)(struct netdev_rxq *rx, struct dp_packet_batch *batch, + int *qfill); /* Registers with the poll loop to wake up from the next call to * poll_block() when a packet is ready to be received with diff --git a/lib/netdev.c b/lib/netdev.c index be05dc6..71035b2 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -694,11 +694,12 @@ netdev_rxq_close(struct netdev_rxq *rx) * Returns EAGAIN immediately if no packet is ready to be received or another * positive errno value if an error was encountered. */ int -netdev_rxq_recv(struct netdev_rxq *rx, struct dp_packet_batch *batch) +netdev_rxq_recv(struct netdev_rxq *rx, struct dp_packet_batch *batch, + int *qfill) { int retval; - retval = rx->netdev->netdev_class->rxq_recv(rx, batch); + retval = rx->netdev->netdev_class->rxq_recv(rx, batch, qfill); if (!retval) { COVERAGE_INC(netdev_received); } else { diff --git a/lib/netdev.h b/lib/netdev.h index ff1b604..3f51634 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -175,7 +175,8 @@ void netdev_rxq_close(struct netdev_rxq *); const char *netdev_rxq_get_name(const struct netdev_rxq *); int netdev_rxq_get_queue_id(const struct netdev_rxq *); -int netdev_rxq_recv(struct netdev_rxq *rx, struct dp_packet_batch *); +int netdev_rxq_recv(struct netdev_rxq *rx, struct dp_packet_batch *, + int *qfill); void netdev_rxq_wait(struct netdev_rxq *); int netdev_rxq_drain(struct netdev_rxq *);
If the caller provides a non-NULL qfill pointer and the netdev implemementation supports reading the rx queue fill level, the rxq_recv() function returns the remaining number of packets in the rx queue after reception of the packet burst to the caller. If the implementation does not support this, it returns -ENOTSUP instead. Reading the remaining queue fill level should not substantilly slow down the recv() operation. A first implementation is provided for ethernet and vhostuser DPDK ports in netdev-dpdk.c. This output parameter will be used in the upcoming commit for PMD performance metrics to supervise the rx queue fill level for DPDK vhostuser ports. Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com> --- lib/dpif-netdev.c | 2 +- lib/netdev-bsd.c | 8 +++++++- lib/netdev-dpdk.c | 25 +++++++++++++++++++++++-- lib/netdev-dummy.c | 8 +++++++- lib/netdev-linux.c | 7 ++++++- lib/netdev-provider.h | 7 ++++++- lib/netdev.c | 5 +++-- lib/netdev.h | 3 ++- 8 files changed, 55 insertions(+), 10 deletions(-)