Patchwork e1000: Handle IO Port.

login
register
mail settings
Submitter Anthony PERARD
Date June 27, 2011, 4:34 p.m.
Message ID <1309192483-32468-1-git-send-email-anthony.perard@citrix.com>
Download mbox | patch
Permalink /patch/102226/
State New
Headers show

Comments

Anthony PERARD - June 27, 2011, 4:34 p.m.
This patch introduces the two IOPorts on e1000, IOADDR and IODATA. The
IOADDR is used to specify which register we want to access when we read
or write on IODATA.

This patch fixes some weird behavior that I see when I use e1000 with
QEMU/Xen, the guest memory can be corrupted by this NIC because it will
write on memory that it doesn't own anymore after a reset. It's because
the kernel Linux use the IOPort to reset the network card instead of the
MMIO.

Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
---
 hw/e1000.c |   92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 84 insertions(+), 8 deletions(-)
Peter Maydell - June 27, 2011, 6:07 p.m.
On 27 June 2011 17:34, Anthony PERARD <anthony.perard@citrix.com> wrote:
> @@ -83,6 +86,8 @@ typedef struct E1000State_st {
>     NICState *nic;
>     NICConf conf;
>     int mmio_index;
> +    int ioport_base;
> +    uint32_t ioport_reg[2];

I think ioport_reg[] needs to go in the VMStateDescription as well.
I don't know enough about the PCI subsystem to know whether that's
also true of ioport_base or whether the the map function is called
again on a vmload.

> @@ -202,6 +201,11 @@ rxbufsize(uint32_t v)
>  static void
>  set_ctrl(E1000State *s, int index, uint32_t val)
>  {
> +    DBGOUT(IO, "set ctrl = %08x\n", val);
> +    if (val & E1000_CTRL_RST) {
> +        s->mac_reg[CTRL] = val;
> +        e1000_reset(s);
> +    }

There doesn't seem to be much point in setting mac_reg[CTRL]
when e1000_reset() is going to put it to its reset value anyway;
and you almost certainly don't want to fall through to this:

>     /* RST is self clearing */
>     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;

...which means there's no need for that bit to special case RST.

>  static void
> +e1000_ioport_writel(void *opaque, uint32_t addr, uint32_t val)
> +{
> +    E1000State *s = opaque;
> +
> +    if (addr == s->ioport_base + REG_IOADDR) {
> +        DBGOUT(IO, "e1000_ioport_writel write base: 0x%04x\n", val);
> +        s->ioport_reg[REG_IOADDR] = val & 0xfffff;
> +    } else if (addr == (s->ioport_base + REG_IODATA)) {
> +        unsigned int index = (s->ioport_reg[REG_IOADDR] & 0x1ffff) >> 2;
> +
> +        DBGOUT(IO, "e1000_ioport_writel %x: 0x%04x\n", index, val);
> +
> +        if (index < NWRITEOPS && macreg_writeops[index]) {
> +            macreg_writeops[index](s, index, val);
> +        } else if (index < NREADOPS && macreg_readops[index]) {
> +            DBGOUT(IO, "e1000_ioport_writel RO %x: 0x%04x\n", index << 2, val);
> +        } else {
> +            DBGOUT(UNKNOWN, "IO unknown write index=0x%08x,val=0x%08x\n",
> +                   index, val);
> +        }

This part of this function seems to be duplicating the code in
e1000_mmio_writel: wouldn't it be cleaner just to call that function?
Ditto readl.

-- PMM
Paolo Bonzini - June 28, 2011, 5:49 a.m.
On 06/27/2011 08:07 PM, Peter Maydell wrote:
> >  +    int ioport_base;
> >  +    uint32_t ioport_reg[2];
>
> I think ioport_reg[] needs to go in the VMStateDescription as well.
> I don't know enough about the PCI subsystem to know whether that's
> also true of ioport_base or whether the the map function is called
> again on a vmload.

ioport_reg[1] is never written nor read, so this can be changed to a 
single uint32_t ioport_addr.  Since it will be almost always 0 (either 
because it was never written, or because E1000_CTRL == 0), a subsection 
is useful to have.

Paolo
Anthony PERARD - June 30, 2011, 6:37 p.m.
On Mon, Jun 27, 2011 at 19:07, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 27 June 2011 17:34, Anthony PERARD <anthony.perard@citrix.com> wrote:
>> @@ -83,6 +86,8 @@ typedef struct E1000State_st {
>>     NICState *nic;
>>     NICConf conf;
>>     int mmio_index;
>> +    int ioport_base;
>> +    uint32_t ioport_reg[2];
>
> I think ioport_reg[] needs to go in the VMStateDescription as well.
> I don't know enough about the PCI subsystem to know whether that's
> also true of ioport_base or whether the the map function is called
> again on a vmload.

Yes, I will save ioport_reg in the VMStateDescripption.
I'm not sure, but I think e1000_ioport_map will be called on resume,
so no need to save ioport_base as it will be regenereted.

>> @@ -202,6 +201,11 @@ rxbufsize(uint32_t v)
>>  static void
>>  set_ctrl(E1000State *s, int index, uint32_t val)
>>  {
>> +    DBGOUT(IO, "set ctrl = %08x\n", val);
>> +    if (val & E1000_CTRL_RST) {
>> +        s->mac_reg[CTRL] = val;
>> +        e1000_reset(s);
>> +    }
>
> There doesn't seem to be much point in setting mac_reg[CTRL]
> when e1000_reset() is going to put it to its reset value anyway;
> and you almost certainly don't want to fall through to this:

Well, it seams to be the beavior of the RST flag, after reading the
e1000 doc. It is reset when the device has finished to reset. But if
there is no other read possible during the reset, indeed that will not
make much sens to set it.

>>     /* RST is self clearing */
>>     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
>
> ...which means there's no need for that bit to special case RST.
>
>>  static void
>> +e1000_ioport_writel(void *opaque, uint32_t addr, uint32_t val)
>> +{
>> +    E1000State *s = opaque;
>> +
>> +    if (addr == s->ioport_base + REG_IOADDR) {
>> +        DBGOUT(IO, "e1000_ioport_writel write base: 0x%04x\n", val);
>> +        s->ioport_reg[REG_IOADDR] = val & 0xfffff;
>> +    } else if (addr == (s->ioport_base + REG_IODATA)) {
>> +        unsigned int index = (s->ioport_reg[REG_IOADDR] & 0x1ffff) >> 2;
>> +
>> +        DBGOUT(IO, "e1000_ioport_writel %x: 0x%04x\n", index, val);
>> +
>> +        if (index < NWRITEOPS && macreg_writeops[index]) {
>> +            macreg_writeops[index](s, index, val);
>> +        } else if (index < NREADOPS && macreg_readops[index]) {
>> +            DBGOUT(IO, "e1000_ioport_writel RO %x: 0x%04x\n", index << 2, val);
>> +        } else {
>> +            DBGOUT(UNKNOWN, "IO unknown write index=0x%08x,val=0x%08x\n",
>> +                   index, val);
>> +        }
>
> This part of this function seems to be duplicating the code in
> e1000_mmio_writel: wouldn't it be cleaner just to call that function?
> Ditto readl.

Yes, I  will do that.

Thanks for the review,
Anthony PERARD - June 30, 2011, 6:45 p.m.
On Tue, Jun 28, 2011 at 06:49, Paolo Bonzini <pbonzini@redhat.com> wrote:
> On 06/27/2011 08:07 PM, Peter Maydell wrote:
>>
>> >  +    int ioport_base;
>> >  +    uint32_t ioport_reg[2];
>>
>> I think ioport_reg[] needs to go in the VMStateDescription as well.
>> I don't know enough about the PCI subsystem to know whether that's
>> also true of ioport_base or whether the the map function is called
>> again on a vmload.
>
> ioport_reg[1] is never written nor read, so this can be changed to a single
> uint32_t ioport_addr.  Since it will be almost always 0 (either because it
> was never written, or because E1000_CTRL == 0), a subsection is useful to
> have.

Indeed, I will remove ioport_reg[1] and ioport_addr.

Thanks,

Patch

diff --git a/hw/e1000.c b/hw/e1000.c
index f160bfc..9961f1a 100644
--- a/hw/e1000.c
+++ b/hw/e1000.c
@@ -58,6 +58,9 @@  static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
 #define PNPMMIO_SIZE      0x20000
 #define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
 
+#define REG_IOADDR 0x0
+#define REG_IODATA 0x4
+
 /*
  * HW models:
  *  E1000_DEV_ID_82540EM works with Windows and Linux
@@ -83,6 +86,8 @@  typedef struct E1000State_st {
     NICState *nic;
     NICConf conf;
     int mmio_index;
+    int ioport_base;
+    uint32_t ioport_reg[2];
 
     uint32_t mac_reg[0x8000];
     uint16_t phy_reg[0x20];
@@ -150,13 +155,7 @@  static const char phy_regcap[0x20] = {
     [PHY_ID2] = PHY_R,		[M88E1000_PHY_SPEC_STATUS] = PHY_R
 };
 
-static void
-ioport_map(PCIDevice *pci_dev, int region_num, pcibus_t addr,
-           pcibus_t size, int type)
-{
-    DBGOUT(IO, "e1000_ioport_map addr=0x%04"FMT_PCIBUS
-           " size=0x%08"FMT_PCIBUS"\n", addr, size);
-}
+static void e1000_reset(void *opaque);
 
 static void
 set_interrupt_cause(E1000State *s, int index, uint32_t val)
@@ -202,6 +201,11 @@  rxbufsize(uint32_t v)
 static void
 set_ctrl(E1000State *s, int index, uint32_t val)
 {
+    DBGOUT(IO, "set ctrl = %08x\n", val);
+    if (val & E1000_CTRL_RST) {
+        s->mac_reg[CTRL] = val;
+        e1000_reset(s);
+    }
     /* RST is self clearing */
     s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
 }
@@ -905,6 +909,53 @@  static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
 
 static void
+e1000_ioport_writel(void *opaque, uint32_t addr, uint32_t val)
+{
+    E1000State *s = opaque;
+
+    if (addr == s->ioport_base + REG_IOADDR) {
+        DBGOUT(IO, "e1000_ioport_writel write base: 0x%04x\n", val);
+        s->ioport_reg[REG_IOADDR] = val & 0xfffff;
+    } else if (addr == (s->ioport_base + REG_IODATA)) {
+        unsigned int index = (s->ioport_reg[REG_IOADDR] & 0x1ffff) >> 2;
+
+        DBGOUT(IO, "e1000_ioport_writel %x: 0x%04x\n", index, val);
+
+        if (index < NWRITEOPS && macreg_writeops[index]) {
+            macreg_writeops[index](s, index, val);
+        } else if (index < NREADOPS && macreg_readops[index]) {
+            DBGOUT(IO, "e1000_ioport_writel RO %x: 0x%04x\n", index << 2, val);
+        } else {
+            DBGOUT(UNKNOWN, "IO unknown write index=0x%08x,val=0x%08x\n",
+                   index, val);
+        }
+    } else {
+        DBGOUT(UNKNOWN, "IO unknown write addr=0x%08x,val=0x%08x\n",
+               addr, val);
+    }
+}
+
+static uint32_t
+e1000_ioport_readl(void *opaque, uint32_t addr)
+{
+    E1000State *s = opaque;
+
+    if (addr == s->ioport_base + REG_IOADDR) {
+        return s->ioport_reg[REG_IOADDR] & 0xfffff;
+    } else if (addr == (s->ioport_base + REG_IODATA)) {
+        unsigned int index = (s->ioport_reg[REG_IOADDR] & 0x1ffff) >> 2;
+
+        if (index < NREADOPS && macreg_readops[index]) {
+            return macreg_readops[index](s, index);
+        }
+        DBGOUT(UNKNOWN, "IO unknown read addr=0x%08x\n", index<<2);
+    } else {
+        DBGOUT(UNKNOWN, "IO unknown read addr=0x%08x\n", addr);
+    }
+    return 0;
+}
+
+static void
 e1000_mmio_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
 {
     E1000State *s = opaque;
@@ -1083,6 +1134,30 @@  static const uint32_t mac_reg_init[] = {
 
 /* PCI interface */
 
+static void
+e1000_ioport_map(PCIDevice *pci_dev, int region_num, pcibus_t addr,
+                 pcibus_t size, int type)
+{
+    E1000State *d = DO_UPCAST(E1000State, dev, pci_dev);
+
+    DBGOUT(IO, "e1000_ioport_map addr=0x%04" FMT_PCIBUS
+           " size=0x%08" FMT_PCIBUS "\n", addr, size);
+
+    d->ioport_base = addr;
+
+    /* Writes that are less than 32 bits are ignored on IOADDR.
+     * For the Flash access, a write can be less than 32 bits for
+     * IODATA register, but is not handled.
+     */
+
+    register_ioport_read(addr, size, 1, e1000_ioport_readl, d);
+
+    register_ioport_read(addr, size, 2, e1000_ioport_readl, d);
+
+    register_ioport_write(addr, size, 4, e1000_ioport_writel, d);
+    register_ioport_read(addr, size, 4, e1000_ioport_readl, d);
+}
+
 static CPUWriteMemoryFunc * const e1000_mmio_write[] = {
     e1000_mmio_writeb,	e1000_mmio_writew,	e1000_mmio_writel
 };
@@ -1137,6 +1212,7 @@  static void e1000_reset(void *opaque)
 {
     E1000State *d = opaque;
 
+    memset(d->ioport_reg, 0, sizeof d->ioport_reg);
     memset(d->phy_reg, 0, sizeof d->phy_reg);
     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
     memset(d->mac_reg, 0, sizeof d->mac_reg);
@@ -1183,7 +1259,7 @@  static int pci_e1000_init(PCIDevice *pci_dev)
                            PCI_BASE_ADDRESS_SPACE_MEMORY, e1000_mmio_map);
 
     pci_register_bar(&d->dev, 1, IOPORT_SIZE,
-                           PCI_BASE_ADDRESS_SPACE_IO, ioport_map);
+                           PCI_BASE_ADDRESS_SPACE_IO, e1000_ioport_map);
 
     memmove(d->eeprom_data, e1000_eeprom_template,
         sizeof e1000_eeprom_template);