From patchwork Tue Jul 24 16:58:35 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Luigi Rizzo X-Patchwork-Id: 172949 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 53ACA2C0080 for ; Wed, 25 Jul 2012 02:39:02 +1000 (EST) Received: from localhost ([::1]:39435 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Sti8S-0003rk-Fa for incoming@patchwork.ozlabs.org; Tue, 24 Jul 2012 12:39:00 -0400 Received: from eggs.gnu.org ([208.118.235.92]:40045) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Sti8G-0003qc-Tf for qemu-devel@nongnu.org; Tue, 24 Jul 2012 12:38:52 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Sti89-0003GL-Te for qemu-devel@nongnu.org; Tue, 24 Jul 2012 12:38:48 -0400 Received: from onelab2.iet.unipi.it ([131.114.59.238]:49994) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Sti89-0003Fm-Jj for qemu-devel@nongnu.org; Tue, 24 Jul 2012 12:38:41 -0400 Received: by onelab2.iet.unipi.it (Postfix, from userid 275) id 5FE067300A; Tue, 24 Jul 2012 18:58:35 +0200 (CEST) Date: Tue, 24 Jul 2012 18:58:35 +0200 From: Luigi Rizzo To: qemu-devel@nongnu.org Message-ID: <20120724165835.GB21023@onelab2.iet.unipi.it> Mime-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.4.2.3i X-detected-operating-system: by eggs.gnu.org: FreeBSD 6.x (1) X-Received-From: 131.114.59.238 Subject: [Qemu-devel] interrupt mitigation for e1000 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org I noticed that the various NIC modules in qemu/kvm do not implement interrupt mitigation, which is very beneficial as it dramatically reduces exits from the hypervisor. As a proof of concept i tried to implement it for the e1000 driver (patch below), and it brings tx performance from 9 to 56Kpps on qemu-softmmu, and from ~20 to 140Kpps on qemu-kvm. I am going to measure the rx interrupt mitigation in the next couple of days. Is there any interest in having this code in ? cheers luigi diff -ubwrp --exclude '*.[do]' /tmp/qemu-61dc008/hw/e1000.c ./hw/e1000.c --- /tmp/qemu-61dc008/hw/e1000.c 2012-07-20 01:25:52.000000000 +0200 +++ ./hw/e1000.c 2012-07-24 18:21:39.000000000 +0200 @@ -33,6 +33,8 @@ #include "sysemu.h" #include "dma.h" +#define MITIGATION + #include "e1000_hw.h" #define E1000_DEBUG @@ -127,6 +129,13 @@ typedef struct E1000State_st { } eecd_state; QEMUTimer *autoneg_timer; + +#ifdef MITIGATION + QEMUBH *int_bh; // interrupt mitigation handler + int tx_ics_count; // pending tx int requests + int rx_ics_count; // pending rx int requests + int int_cause; // int cause +#endif // MITIGATION } E1000State; #define defreg(x) x = (E1000_##x>>2) @@ -638,6 +648,26 @@ start_xmit(E1000State *s) return; } +#ifdef MITIGATION + /* we transmit the first few packets, or we do if we are + * approaching a full ring. in the latter case, also + * send an ics. + * + */ +{ + int len, pending; + len = s->mac_reg[TDLEN] / sizeof(desc) ; + pending = s->mac_reg[TDT] - s->mac_reg[TDH]; + if (pending < 0) + pending += len; + /* ignore requests after the first few ones, as long as + * we are not approaching a full ring. + * Otherwise, deliver packets to the backend. + */ + if (s->tx_ics_count > 4 && s->tx_ics_count + pending < len - 5) + return; +#endif // MITIGATION + while (s->mac_reg[TDH] != s->mac_reg[TDT]) { base = tx_desc_base(s) + sizeof(struct e1000_tx_desc) * s->mac_reg[TDH]; @@ -663,7 +693,21 @@ start_xmit(E1000State *s) break; } } +#ifdef MITIGATION + s->int_cause |= cause; // remember the interrupt cause. + s->tx_ics_count += pending; + if (s->tx_ics_count >= len - 5) { + // if the ring is about to become full, generate an interrupt + set_ics(s, 0, s->int_cause); + s->tx_ics_count = 0; + s->int_cause = 0; + } else { // otherwise just schedule it for later. + qemu_bh_schedule_idle(s->int_bh); + } +} +#else /* !MITIGATION */ set_ics(s, 0, cause); +#endif } static int @@ -875,7 +919,27 @@ e1000_receive(VLANClientState *nc, const s->rxbuf_min_shift) n |= E1000_ICS_RXDMT0; +#ifdef MITIGATION +#define MIT_RXDMT0_SENT 100000 // large + s->int_cause |= n; + if (s->rx_ics_count == 0) { + /* deliver the first interrupt */ + set_ics(s, 0, s->int_cause); + s->int_cause = 0; + s->rx_ics_count++; + } else if ( (n & E1000_ICS_RXDMT0) && s->rx_ics_count < MIT_RXDMT0_SENT) { + /* also deliver if we are approaching ring full */ + set_ics(s, 0, s->int_cause); + s->int_cause = 0; + s->rx_ics_count = MIT_RXDMT0_SENT; + } else { + /* otherwise schedule for later */ + s->rx_ics_count++; + qemu_bh_schedule_idle(s->int_bh); + } +#else /* !MITIGATION */ set_ics(s, 0, n); +#endif /* !MITIGATION */ return size; } @@ -1214,6 +1281,20 @@ static NetClientInfo net_e1000_info = { .link_status_changed = e1000_set_link_status, }; +#ifdef MITIGATION +static void e1000_int_bh(void *opaque) +{ + E1000State *s = opaque; + if (s->tx_ics_count < 1 && s->rx_ics_count < 1) + return; + s->tx_ics_count = 0; + s->rx_ics_count = 0; + start_xmit(s); + set_ics(s, 0, s->int_cause); + s->int_cause = 0; +} +#endif /* MITIGATION */ + static int pci_e1000_init(PCIDevice *pci_dev) { E1000State *d = DO_UPCAST(E1000State, dev, pci_dev); @@ -1231,6 +1312,9 @@ static int pci_e1000_init(PCIDevice *pci e1000_mmio_setup(d); +#ifdef MITIGATION + d->int_bh = qemu_bh_new(e1000_int_bh, d); +#endif /* MITIGATION */ pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio); pci_register_bar(&d->dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);