@@ -710,6 +710,12 @@ dp_packet_batch_is_empty(const struct dp_packet_batch *batch)
return !dp_packet_batch_size(batch);
}
+static inline bool
+dp_packet_batch_is_full(const struct dp_packet_batch *batch)
+{
+ return dp_packet_batch_size(batch) == NETDEV_MAX_BURST;
+}
+
#define DP_PACKET_BATCH_FOR_EACH(PACKET, BATCH) \
for (size_t i = 0; i < dp_packet_batch_size(BATCH); i++) \
if (PACKET = BATCH->packets[i], true)
@@ -84,6 +84,9 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev);
#define MAX_RECIRC_DEPTH 5
DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
+/* Use instant packet send by default. */
+#define DEFAULT_OUTPUT_MAX_LATENCY 0
+
/* Configuration parameters. */
enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */
enum { MAX_METERS = 65536 }; /* Maximum number of meters. */
@@ -262,6 +265,9 @@ struct dp_netdev {
struct ovs_mutex meter_locks[N_METER_LOCKS];
struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
+ /* The time that a packet can wait in output batch for sending. */
+ atomic_uint32_t output_max_latency;
+
/* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
@@ -494,6 +500,7 @@ struct tx_port {
int qid;
long long last_used;
struct hmap_node node;
+ long long output_time;
struct dp_packet_batch output_pkts;
};
@@ -660,7 +667,7 @@ static void dp_netdev_del_rxq_from_pmd(struct dp_netdev_pmd_thread *pmd,
struct rxq_poll *poll)
OVS_REQUIRES(pmd->port_mutex);
static void dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *,
- long long now);
+ long long now, bool force);
static void reconfigure_datapath(struct dp_netdev *dp)
OVS_REQUIRES(dp->port_mutex);
static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd);
@@ -1182,6 +1189,7 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
conntrack_init(&dp->conntrack);
+ atomic_init(&dp->output_max_latency, DEFAULT_OUTPUT_MAX_LATENCY);
atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN);
cmap_init(&dp->poll_threads);
@@ -2848,7 +2856,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
dp_packet_batch_init_packet(&pp, execute->packet);
dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
execute->actions, execute->actions_len, now);
- dp_netdev_pmd_flush_output_packets(pmd, now);
+ dp_netdev_pmd_flush_output_packets(pmd, now, true);
if (pmd->core_id == NON_PMD_CORE_ID) {
ovs_mutex_unlock(&dp->non_pmd_mutex);
@@ -2897,6 +2905,16 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
smap_get_ullong(other_config, "emc-insert-inv-prob",
DEFAULT_EM_FLOW_INSERT_INV_PROB);
uint32_t insert_min, cur_min;
+ uint32_t output_max_latency, cur_max_latency;
+
+ output_max_latency = smap_get_int(other_config, "output-max-latency",
+ DEFAULT_OUTPUT_MAX_LATENCY);
+ atomic_read_relaxed(&dp->output_max_latency, &cur_max_latency);
+ if (output_max_latency != cur_max_latency) {
+ atomic_store_relaxed(&dp->output_max_latency, output_max_latency);
+ VLOG_INFO("Output maximum latency set to %"PRIu32" ms",
+ output_max_latency);
+ }
if (!nullable_string_is_equal(dp->pmd_cmask, cmask)) {
free(dp->pmd_cmask);
@@ -3085,26 +3103,34 @@ cycles_count_end(struct dp_netdev_pmd_thread *pmd,
}
static void
+dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd,
+ struct tx_port *p, long long now)
+{
+ int tx_qid;
+ bool dynamic_txqs;
+
+ dynamic_txqs = p->port->dynamic_txqs;
+ if (dynamic_txqs) {
+ tx_qid = dpif_netdev_xps_get_tx_qid(pmd, p, now);
+ } else {
+ tx_qid = pmd->static_tx_qid;
+ }
+
+ netdev_send(p->port->netdev, tx_qid, &p->output_pkts,
+ dynamic_txqs);
+ dp_packet_batch_init(&p->output_pkts);
+}
+
+static void
dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd,
- long long now)
+ long long now, bool force)
{
struct tx_port *p;
HMAP_FOR_EACH (p, node, &pmd->send_port_cache) {
- if (!dp_packet_batch_is_empty(&p->output_pkts)) {
- int tx_qid;
- bool dynamic_txqs;
-
- dynamic_txqs = p->port->dynamic_txqs;
- if (dynamic_txqs) {
- tx_qid = dpif_netdev_xps_get_tx_qid(pmd, p, now);
- } else {
- tx_qid = pmd->static_tx_qid;
- }
-
- netdev_send(p->port->netdev, tx_qid, &p->output_pkts,
- dynamic_txqs);
- dp_packet_batch_init(&p->output_pkts);
+ if (!dp_packet_batch_is_empty(&p->output_pkts)
+ && (force || p->output_time <= now)) {
+ dp_netdev_pmd_flush_output_on_port(pmd, p, now);
}
}
}
@@ -3128,7 +3154,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
cycles_count_start(pmd);
dp_netdev_input(pmd, &batch, port_no, now);
- dp_netdev_pmd_flush_output_packets(pmd, now);
+ dp_netdev_pmd_flush_output_packets(pmd, now, false);
cycles_count_end(pmd, PMD_CYCLES_PROCESSING);
} else if (error != EAGAIN && error != EOPNOTSUPP) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
@@ -3663,6 +3689,8 @@ pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd)
{
struct tx_port *tx_port_cached;
+ /* Flush all the queued packets. */
+ dp_netdev_pmd_flush_output_packets(pmd, 0, true);
/* Free all used tx queue ids. */
dpif_netdev_xps_revalidate_pmd(pmd, 0, true);
@@ -4388,6 +4416,7 @@ dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
tx->port = port;
tx->qid = -1;
+ tx->output_time = 0LL;
dp_packet_batch_init(&tx->output_pkts);
hmap_insert(&pmd->tx_ports, &tx->node, hash_port_no(tx->port->port_no));
@@ -5054,8 +5083,18 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
}
dp_packet_batch_apply_cutlen(packets_);
+ if (dp_packet_batch_is_empty(&p->output_pkts)) {
+ uint32_t cur_max_latency;
+
+ atomic_read_relaxed(&dp->output_max_latency, &cur_max_latency);
+ p->output_time = now + cur_max_latency;
+ }
+
DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
dp_packet_batch_add(&p->output_pkts, packet);
+ if (OVS_UNLIKELY(dp_packet_batch_is_full(&p->output_pkts))) {
+ dp_netdev_pmd_flush_output_on_port(pmd, p, now);
+ }
}
return;
}
@@ -344,6 +344,21 @@
</p>
</column>
+ <column name="other_config" key="output-max-latency"
+ type='{"type": "integer", "minInteger": 0, "maxInteger": 1000}'>
+ <p>
+ Specifies the time in milliseconds that a packet can wait in output
+ batch for sending i.e. amount of time that packet can spend in an
+ intermediate output queue before sending to netdev.
+ This option can be used to configure balance between throughput
+ and latency. Lower values decreases latency while higher values
+ may be useful to achieve higher performance.
+ </p>
+ <p>
+ Defaults to 0 i.e. instant packet sending (latency optimized).
+ </p>
+ </column>
+
<column name="other_config" key="n-handler-threads"
type='{"type": "integer", "minInteger": 1}'>
<p>