@@ -3302,9 +3302,10 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet **packets,
for (i = 0; i < cnt; i++) {
struct dp_netdev_flow *flow;
+ struct dp_packet *packet = packets[i];
- if (OVS_UNLIKELY(dp_packet_size(packets[i]) < ETH_HEADER_LEN)) {
- dp_packet_delete(packets[i]);
+ if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
+ dp_packet_delete(packet);
n_dropped++;
continue;
}
@@ -3314,18 +3315,18 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet **packets,
OVS_PREFETCH(dp_packet_data(packets[i+1]));
}
- miniflow_extract(packets[i], &key.mf);
+ miniflow_extract(packet, &key.mf);
key.len = 0; /* Not computed yet. */
- key.hash = dpif_netdev_packet_get_rss_hash(packets[i], &key.mf);
+ key.hash = dpif_netdev_packet_get_rss_hash(packet, &key.mf);
flow = emc_lookup(flow_cache, &key);
if (OVS_LIKELY(flow)) {
- dp_netdev_queue_batches(packets[i], flow, &key.mf, batches,
+ dp_netdev_queue_batches(packet, flow, &key.mf, batches,
n_batches);
} else {
/* Exact match cache missed. Group missed packets together at
* the beginning of the 'packets' array. */
- packets[n_missed] = packets[i];
+ packets[n_missed] = packet;
keys[n_missed++] = key;
}
}
For the machines I have access to, Reloading the same pointer from memory seems to inhibit complier optimization somewhat. In emc_processing(), using a single packet pointer, instead reloading it from memory with packets[i], improves performance by 0.3 Mpps (tested with 10G NIC pushing 64 byte packets, with the base line of 12.2 Mpps). Besides improving performance, this patch should also improves code readability. Signed-off-by: Andy Zhou <azhou@ovn.org> --- lib/dpif-netdev.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-)