@@ -207,6 +207,25 @@ Using the DPDK with ovs-vswitchd:
./ovs-ofctl add-flow br0 in_port=2,action=output:1
```
+8. Ingress Policing Example
+
+ Assuming you have a vhost-user port receiving traffic consisting of
+ packets of size 64 bytes, the following command would limit the reception
+ rate of the port to ~1,000,000 packets per second:
+
+ `ovs-vsctl set interface vhost-user0 ingress_policing_rate=46000000
+ ingress_policing_burst=4096`
+
+ To examine the ingress policer configuration of the port:
+
+ `ovs-vsctl list interface vhost-user0`
+
+ To clear the ingress policer configuration from the port use the following:
+
+ `ovs-vsctl set interface vhost-user0 ingress_policing_rate=0`
+
+ For more details regarding ingress-policer see the vswitch.xml.
+
Performance Tuning:
-------------------
@@ -18,6 +18,7 @@ Post-v2.5.0
* New appctl command 'dpif-netdev/pmd-rxq-show' to check the port/rxq
assignment.
* Type of log messages from PMD threads changed from INFO to DBG.
+ * Add ingress policing functionality.
- ovs-benchmark: This utility has been removed due to lack of use and
bitrot.
- ovs-appctl:
@@ -53,6 +53,7 @@
#include "rte_config.h"
#include "rte_mbuf.h"
+#include "rte_meter.h"
#include "rte_virtio_net.h"
VLOG_DEFINE_THIS_MODULE(dpdk);
@@ -193,6 +194,11 @@ struct dpdk_ring {
struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex); };
+struct ingress_policer {
+ struct rte_meter_srtcm_params app_srtcm_params;
+ struct rte_meter_srtcm in_policer;
+};
+
struct netdev_dpdk {
struct netdev up;
int port_id;
@@ -231,6 +237,13 @@ struct netdev_dpdk {
/* Identifier used to distinguish vhost devices from each other */
char vhost_id[PATH_MAX];
+ /* Ingress Policer */
+ rte_spinlock_t policer_lock;
+ struct ingress_policer *ingress_policer;
+
+ uint32_t policer_rate;
+ uint32_t policer_burst;
+
/* In dpdk_list. */
struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex); }; @@ -617,6 +630,11 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no,
netdev_->requested_n_rxq = NR_QUEUE;
netdev->real_n_txq = NR_QUEUE;
+ rte_spinlock_init(&netdev->policer_lock);
+ netdev->ingress_policer = NULL;
+ netdev->policer_rate = 0;
+ netdev->policer_burst = 0;
+
if (type == DPDK_DEV_ETH) {
netdev_dpdk_alloc_txq(netdev, NR_QUEUE);
err = dpdk_eth_dev_init(netdev); @@ -1012,12 +1030,14 @@ is_vhost_running(struct virtio_net *dev)
static inline void
netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats,
- struct dp_packet **packets, int count)
+ struct dp_packet **packets, int count,
+ int dropped)
{
int i;
struct dp_packet *packet;
stats->rx_packets += count;
+ stats->rx_dropped += dropped;
for (i = 0; i < count; i++) {
packet = packets[i];
@@ -1039,6 +1059,48 @@ netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats,
}
}
+static inline int
+policer_pkt_handle__(struct rte_meter_srtcm *meter,
+ struct rte_mbuf *pkt, uint64_t time) {
+ uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct
+ether_hdr);
+
+ /* color input is not used for blind modes */
+ if (rte_meter_srtcm_color_blind_check(meter, time, pkt_len) != e_RTE_METER_GREEN) {
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+policer_run(struct netdev *netdev_, struct rte_mbuf **pkts,
+ int pkt_cnt)
+{
+ int i = 0;
+ int cnt = 0;
+ struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
+ struct rte_mbuf *pkt = NULL;
+ uint64_t current_time = rte_rdtsc();
+
+ for (i = 0; i < pkt_cnt; i++) {
+ pkt = pkts[i];
+ /* Handle current packet */
+ if (policer_pkt_handle__(&netdev->ingress_policer->in_policer, pkt,
+ current_time) != 1) {
+ if (cnt != i) {
+ pkts[cnt] = pkt;
+ }
+ cnt++;
+ }
+ else {
+ rte_pktmbuf_free(pkt);
+ }
+ }
+
+ return cnt;
+}
+
/*
* The receive path for the vhost port is the TX path out from guest.
*/
@@ -1052,6 +1114,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq_,
struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(vhost_dev);
int qid = rxq_->queue_id;
uint16_t nb_rx = 0;
+ uint16_t dropped = 0;
if (OVS_UNLIKELY(!is_vhost_running(virtio_dev))) {
return EAGAIN;
@@ -1069,8 +1132,16 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq_,
return EAGAIN;
}
+ rte_spinlock_lock(&vhost_dev->policer_lock);
+ if (vhost_dev->ingress_policer != NULL) {
+ dropped = nb_rx;
+ nb_rx = policer_run(netdev, (struct rte_mbuf **)packets, nb_rx);
+ dropped -= nb_rx;
+ }
+ rte_spinlock_unlock(&vhost_dev->policer_lock);
+
rte_spinlock_lock(&vhost_dev->stats_lock);
- netdev_dpdk_vhost_update_rx_counters(&vhost_dev->stats, packets, nb_rx);
+ netdev_dpdk_vhost_update_rx_counters(&vhost_dev->stats, packets,
+ nb_rx, dropped);
rte_spinlock_unlock(&vhost_dev->stats_lock);
*c = (int) nb_rx;
@@ -1085,6 +1156,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets,
struct netdev *netdev = rx->up.netdev;
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
int nb_rx;
+ int dropped = 0;
/* There is only one tx queue for this core. Do not flush other
* queues.
@@ -1102,6 +1174,21 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets,
return EAGAIN;
}
+ rte_spinlock_lock(&dev->policer_lock);
+ if (dev->ingress_policer != NULL) {
+ dropped = nb_rx;
+ nb_rx = policer_run(netdev, (struct rte_mbuf **) packets, nb_rx);
+ dropped -= nb_rx;
+ }
+ rte_spinlock_unlock(&dev->policer_lock);
+
+ /* Update stats to reflect dropped packets */
+ if (OVS_UNLIKELY(dropped)) {
+ rte_spinlock_lock(&dev->stats_lock);
+ dev->stats.rx_dropped += dropped;
+ rte_spinlock_unlock(&dev->stats_lock);
+ }
+
*c = nb_rx;
return 0;
@@ -1502,12 +1589,12 @@ netdev_dpdk_vhost_get_stats(const struct netdev *netdev,
stats->tx_fifo_errors = UINT64_MAX;
stats->tx_heartbeat_errors = UINT64_MAX;
stats->tx_window_errors = UINT64_MAX;
- stats->rx_dropped += UINT64_MAX;
rte_spinlock_lock(&dev->stats_lock);
/* Supported Stats */
stats->rx_packets += dev->stats.rx_packets;
stats->tx_packets += dev->stats.tx_packets;
+ stats->rx_dropped += dev->stats.rx_dropped;
stats->tx_dropped += dev->stats.tx_dropped;
stats->multicast = dev->stats.multicast;
stats->rx_bytes = dev->stats.rx_bytes; @@ -1545,11 +1632,12 @@ netdev_dpdk_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
rte_spinlock_lock(&dev->stats_lock);
stats->tx_dropped = dev->stats.tx_dropped;
+ stats->rx_dropped = dev->stats.rx_dropped;
rte_spinlock_unlock(&dev->stats_lock);
/* These are the available DPDK counters for packets not received due to
* local resource constraints in DPDK and NIC respectively. */
- stats->rx_dropped = rte_stats.rx_nombuf + rte_stats.imissed;
+ stats->rx_dropped += rte_stats.rx_nombuf + rte_stats.imissed;
stats->collisions = UINT64_MAX;
stats->rx_length_errors = UINT64_MAX; @@ -1617,6 +1705,95 @@ netdev_dpdk_get_features(const struct netdev *netdev_, }
static int
+netdev_dpdk_policer_construct__(struct netdev *netdev_, uint32_t rate,
+ uint32_t burst) {
+ struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
+ struct ingress_policer *policer;
+ int err = 0;
+
+ rte_spinlock_lock(&netdev->policer_lock);
+ policer = xmalloc(sizeof *policer);
+ netdev->ingress_policer = policer;
+
+ policer->app_srtcm_params.cir = rate;
+ policer->app_srtcm_params.cbs = burst;
+ policer->app_srtcm_params.ebs = 0;
+ err = rte_meter_srtcm_config(&policer->in_policer,
+ &policer->app_srtcm_params);
+ rte_spinlock_unlock(&netdev->policer_lock);
+
+ return err;
+}
+
+static int
+netdev_dpdk_policer_destruct__(struct netdev *netdev_) {
+ struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
+ if (netdev->ingress_policer == NULL) {
+ return 0;
+ }
+ else {
+ rte_spinlock_lock(&netdev->policer_lock);
+ free(netdev->ingress_policer);
+ netdev->ingress_policer = NULL;
+ netdev->policer_rate = 0;
+ netdev->policer_burst = 0;
+ rte_spinlock_unlock(&netdev->policer_lock);
+ }
+ return 0;
+}
+
+static int
+netdev_dpdk_set_policing(struct netdev* netdev_, uint32_t policer_rate,
+ uint32_t policer_burst) {
+ struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
+ int err = 0;
+
+ policer_burst = (!policer_rate ? 0 /* Force to 0 if no rate specified. */
+ : !policer_burst ? 4096 /* Default to 4096 bytes if 0. */
+ : policer_burst); /* Stick with user-specified value. */
+
+ ovs_mutex_lock(&netdev->mutex);
+
+ if (netdev->policer_rate == policer_rate &&
+ netdev->policer_burst == policer_burst) {
+ /* Assume that settings haven't changed since we last set them. */
+ ovs_mutex_unlock(&netdev->mutex);
+ return err;
+ }
+
+ /* Destroy any existing ingress policer for the device if one exists
+ * and the policer_rate and policer_burst are being set to 0
+ */
+ if ((netdev->ingress_policer != NULL) &&
+ (policer_rate == 0) && (policer_burst == 0)) {
+
+ /* User has set rate and burst to 0, destroy the policer */
+ err = netdev_dpdk_policer_destruct__(netdev_);
+ ovs_mutex_unlock(&netdev->mutex);
+ return err;
+ }
+
+ /* Destroy existing policer */
+ if (netdev->ingress_policer != NULL) {
+ err = netdev_dpdk_policer_destruct__(netdev_);
+ }
+
+ /* Add an ingress policer */
+ err = netdev_dpdk_policer_construct__(netdev_, policer_rate,
+ policer_burst);
+
+ /* Update policer_rate and policer_burst for the netdev */
+ netdev->policer_rate = policer_rate;
+ netdev->policer_burst = policer_burst;
+
+ ovs_mutex_unlock(&netdev->mutex);
+
+ return err;
+}
+
+static int
netdev_dpdk_get_ifindex(const struct netdev *netdev) {
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); @@ -2193,7 +2370,7 @@ unlock_dpdk:
GET_FEATURES, \
NULL, /* set_advertisements */ \
\
- NULL, /* set_policing */ \
+ netdev_dpdk_set_policing, /* set_policing */ \
NULL, /* get_qos_types */ \
NULL, /* get_qos_capabilities */ \
NULL, /* get_qos */ \
@@ -2384,8 +2384,8 @@
table="Queue"/> tables).
</p>
<p>
- Policing is currently implemented only on Linux. The Linux
- implementation uses a simple ``token bucket'' approach:
+ Policing is implemented on Linux and DPDK interfaces. Both
+ implementations use a simple ``token bucket'' approach:
</p>
<ul>
<li>
@@ -2422,17 +2422,23 @@
</p>
<column name="ingress_policing_rate">
<p>
- Maximum rate for data received on this interface, in kbps. Data
- received faster than this rate is dropped. Set to <code>0</code>
- (the default) to disable policing.
+ Maximum rate for data received on this interface. The metric used
+ for Linux and DPDK interfaces differ. Linux interfaces expect the
+ value to be specified in kbps. DPDK interfaces expect the value to
+ be specified in bytes per second. Data received faster than this
+ rate is dropped. Set to <code>0</code>(the default) to disable
+ policing for both interface types.
</p>
</column>
<column name="ingress_policing_burst">
- <p>Maximum burst size for data received on this interface, in kb. The
- default burst size if set to <code>0</code> is 1000 kb. This value
- has no effect if <ref column="ingress_policing_rate"/>
- is <code>0</code>.</p>
+ <p>Maximum burst size for data received on this interface. The metric
+ used for Linux and DPDK interfaces differ. Linux interfaces expect a
+ value specified in kb. DPDK interfaces expect a value specified in
+ bytes. The default burst size if set to <code>0</code> is 1000 kb for
+ Linux interfaces and 4096 bytes for DPDK. This value has no effect if
+ <ref column="ingress_policing_rate"/> is <code>0</code> for either
+ interface type.</p>
<p>
Specifying a larger burst size lets the algorithm be more forgiving,
which is important for protocols like TCP that react severely to