diff mbox

[RFC,03/13] AMD IOMMU emulation

Message ID 1306892315-7306-4-git-send-email-eduard.munteanu@linux360.ro
State New
Headers show

Commit Message

Eduard - Gabriel Munteanu June 1, 2011, 1:38 a.m. UTC
This introduces emulation for the AMD IOMMU, described in "AMD I/O
Virtualization Technology (IOMMU) Specification".

Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
---
 Makefile.target |    2 +-
 hw/amd_iommu.c  |  712 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/pc.c         |    2 +
 3 files changed, 715 insertions(+), 1 deletions(-)
 create mode 100644 hw/amd_iommu.c
diff mbox

Patch

diff --git a/Makefile.target b/Makefile.target
index ee0c80d..5f9c868 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -224,7 +224,7 @@  obj-i386-y += cirrus_vga.o apic.o ioapic.o piix_pci.o
 obj-i386-y += vmport.o
 obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
 obj-i386-y += debugcon.o multiboot.o
-obj-i386-y += pc_piix.o kvmclock.o dma_rw.o
+obj-i386-y += pc_piix.o kvmclock.o dma_rw.o amd_iommu.o
 obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
 
 # shared objects
diff --git a/hw/amd_iommu.c b/hw/amd_iommu.c
new file mode 100644
index 0000000..650a8f4
--- /dev/null
+++ b/hw/amd_iommu.c
@@ -0,0 +1,712 @@ 
+/*
+ * AMD IOMMU emulation
+ *
+ * Copyright (c) 2011 Eduard - Gabriel Munteanu
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "pc.h"
+#include "hw.h"
+#include "pci.h"
+#include "qlist.h"
+#include "dma_rw.h"
+
+/* Capability registers */
+#define CAPAB_HEADER            0x00
+#define   CAPAB_REV_TYPE        0x02
+#define   CAPAB_FLAGS           0x03
+#define CAPAB_BAR_LOW           0x04
+#define CAPAB_BAR_HIGH          0x08
+#define CAPAB_RANGE             0x0C
+#define CAPAB_MISC              0x10
+
+#define CAPAB_SIZE              0x14
+#define CAPAB_REG_SIZE          0x04
+
+/* Capability header data */
+#define CAPAB_FLAG_IOTLBSUP     (1 << 0)
+#define CAPAB_FLAG_HTTUNNEL     (1 << 1)
+#define CAPAB_FLAG_NPCACHE      (1 << 2)
+#define CAPAB_INIT_REV          (1 << 3)
+#define CAPAB_INIT_TYPE         3
+#define CAPAB_INIT_REV_TYPE     (CAPAB_REV | CAPAB_TYPE)
+#define CAPAB_INIT_FLAGS        (CAPAB_FLAG_NPCACHE | CAPAB_FLAG_HTTUNNEL)
+#define CAPAB_INIT_MISC         ((64 << 15) | (48 << 8))
+#define CAPAB_BAR_MASK          (~((1UL << 14) - 1))
+
+/* MMIO registers */
+#define MMIO_DEVICE_TABLE       0x0000
+#define MMIO_COMMAND_BASE       0x0008
+#define MMIO_EVENT_BASE         0x0010
+#define MMIO_CONTROL            0x0018
+#define MMIO_EXCL_BASE          0x0020
+#define MMIO_EXCL_LIMIT         0x0028
+#define MMIO_COMMAND_HEAD       0x2000
+#define MMIO_COMMAND_TAIL       0x2008
+#define MMIO_EVENT_HEAD         0x2010
+#define MMIO_EVENT_TAIL         0x2018
+#define MMIO_STATUS             0x2020
+
+#define MMIO_SIZE               0x4000
+
+#define MMIO_DEVTAB_SIZE_MASK   ((1ULL << 12) - 1)
+#define MMIO_DEVTAB_BASE_MASK   (((1ULL << 52) - 1) & ~MMIO_DEVTAB_SIZE_MASK)
+#define MMIO_DEVTAB_ENTRY_SIZE  32
+#define MMIO_DEVTAB_SIZE_UNIT   4096
+
+#define MMIO_CMDBUF_SIZE_BYTE       (MMIO_COMMAND_BASE + 7)
+#define MMIO_CMDBUF_SIZE_MASK       0x0F
+#define MMIO_CMDBUF_BASE_MASK       MMIO_DEVTAB_BASE_MASK
+#define MMIO_CMDBUF_DEFAULT_SIZE    8
+#define MMIO_CMDBUF_HEAD_MASK       (((1ULL << 19) - 1) & ~0x0F)
+#define MMIO_CMDBUF_TAIL_MASK       MMIO_EVTLOG_HEAD_MASK
+
+#define MMIO_EVTLOG_SIZE_BYTE       (MMIO_EVENT_BASE + 7)
+#define MMIO_EVTLOG_SIZE_MASK       MMIO_CMDBUF_SIZE_MASK
+#define MMIO_EVTLOG_BASE_MASK       MMIO_CMDBUF_BASE_MASK
+#define MMIO_EVTLOG_DEFAULT_SIZE    MMIO_CMDBUF_DEFAULT_SIZE
+#define MMIO_EVTLOG_HEAD_MASK       (((1ULL << 19) - 1) & ~0x0F)
+#define MMIO_EVTLOG_TAIL_MASK       MMIO_EVTLOG_HEAD_MASK
+
+#define MMIO_EXCL_BASE_MASK         MMIO_DEVTAB_BASE_MASK
+#define MMIO_EXCL_ENABLED_MASK      (1ULL << 0)
+#define MMIO_EXCL_ALLOW_MASK        (1ULL << 1)
+#define MMIO_EXCL_LIMIT_MASK        MMIO_DEVTAB_BASE_MASK
+#define MMIO_EXCL_LIMIT_LOW         0xFFF
+
+#define MMIO_CONTROL_IOMMUEN        (1ULL << 0)
+#define MMIO_CONTROL_HTTUNEN        (1ULL << 1)
+#define MMIO_CONTROL_EVENTLOGEN     (1ULL << 2)
+#define MMIO_CONTROL_EVENTINTEN     (1ULL << 3)
+#define MMIO_CONTROL_COMWAITINTEN   (1ULL << 4)
+#define MMIO_CONTROL_CMDBUFEN       (1ULL << 12)
+
+#define MMIO_STATUS_EVTLOG_OF       (1ULL << 0)
+#define MMIO_STATUS_EVTLOG_INTR     (1ULL << 1)
+#define MMIO_STATUS_COMWAIT_INTR    (1ULL << 2)
+#define MMIO_STATUS_EVTLOG_RUN      (1ULL << 3)
+#define MMIO_STATUS_CMDBUF_RUN      (1ULL << 4)
+
+#define CMDBUF_ID_BYTE              0x07
+#define CMDBUF_ID_RSHIFT            4
+#define CMDBUF_ENTRY_SIZE           0x10
+
+#define CMD_COMPLETION_WAIT         0x01
+#define CMD_INVAL_DEVTAB_ENTRY      0x02
+#define CMD_INVAL_IOMMU_PAGES       0x03
+#define CMD_INVAL_IOTLB_PAGES       0x04
+#define CMD_INVAL_INTR_TABLE        0x05
+
+#define DEVTAB_ENTRY_SIZE           32
+
+/* Device table entry bits 0:63 */
+#define DEV_VALID                   (1ULL << 0)
+#define DEV_TRANSLATION_VALID       (1ULL << 1)
+#define DEV_MODE_MASK               0x7
+#define DEV_MODE_RSHIFT             9
+#define DEV_PT_ROOT_MASK            0xFFFFFFFFFF000
+#define DEV_PT_ROOT_RSHIFT          12
+#define DEV_PERM_SHIFT              61
+#define DEV_PERM_READ               (1ULL << 61)
+#define DEV_PERM_WRITE              (1ULL << 62)
+
+/* Device table entry bits 64:127 */
+#define DEV_DOMAIN_ID_MASK          ((1ULL << 16) - 1)
+#define DEV_IOTLB_SUPPORT           (1ULL << 17)
+#define DEV_SUPPRESS_PF             (1ULL << 18)
+#define DEV_SUPPRESS_ALL_PF         (1ULL << 19)
+#define DEV_IOCTL_MASK              (~3)
+#define DEV_IOCTL_RSHIFT            20
+#define   DEV_IOCTL_DENY            0
+#define   DEV_IOCTL_PASSTHROUGH     1
+#define   DEV_IOCTL_TRANSLATE       2
+#define DEV_CACHE                   (1ULL << 37)
+#define DEV_SNOOP_DISABLE           (1ULL << 38)
+#define DEV_EXCL                    (1ULL << 39)
+
+/* Event codes and flags, as stored in the info field */
+#define EVENT_ILLEGAL_DEVTAB_ENTRY  (0x1U << 24)
+#define EVENT_IOPF                  (0x2U << 24)
+#define   EVENT_IOPF_I              (1U << 3)
+#define   EVENT_IOPF_PR             (1U << 4)
+#define   EVENT_IOPF_RW             (1U << 5)
+#define   EVENT_IOPF_PE             (1U << 6)
+#define   EVENT_IOPF_RZ             (1U << 7)
+#define   EVENT_IOPF_TR             (1U << 8)
+#define EVENT_DEV_TAB_HW_ERROR      (0x3U << 24)
+#define EVENT_PAGE_TAB_HW_ERROR     (0x4U << 24)
+#define EVENT_ILLEGAL_COMMAND_ERROR (0x5U << 24)
+#define EVENT_COMMAND_HW_ERROR      (0x6U << 24)
+#define EVENT_IOTLB_INV_TIMEOUT     (0x7U << 24)
+#define EVENT_INVALID_DEV_REQUEST   (0x8U << 24)
+
+#define EVENT_LEN                   16
+
+#define IOMMU_PERM_READ             (1 << 0)
+#define IOMMU_PERM_WRITE            (1 << 1)
+#define IOMMU_PERM_RW               (IOMMU_PERM_READ | IOMMU_PERM_WRITE)
+
+/* FIXME: Remove these once they go upstream. */
+#define PCI_CLASS_SYSTEM_IOMMU      0x0806
+#define PCI_DEVICE_ID_AMD_IOMMU     0x0000  /* FIXME */
+#define PCI_CAP_ID_SEC              0x0F
+
+typedef struct AMDIOMMUState {
+    PCIDevice                   dev;
+
+    int                         capab_offset;
+    unsigned char               *capab;
+
+    int                         mmio_index;
+    target_phys_addr_t          mmio_addr;
+    unsigned char               *mmio_buf;
+    int                         mmio_enabled;
+
+    int                         enabled;
+    int                         ats_enabled;
+
+    target_phys_addr_t          devtab;
+    size_t                      devtab_len;
+
+    target_phys_addr_t          cmdbuf;
+    int                         cmdbuf_enabled;
+    size_t                      cmdbuf_len;
+    size_t                      cmdbuf_head;
+    size_t                      cmdbuf_tail;
+    int                         completion_wait_intr;
+
+    target_phys_addr_t          evtlog;
+    int                         evtlog_enabled;
+    int                         evtlog_intr;
+    target_phys_addr_t          evtlog_len;
+    target_phys_addr_t          evtlog_head;
+    target_phys_addr_t          evtlog_tail;
+
+    target_phys_addr_t          excl_base;
+    target_phys_addr_t          excl_limit;
+    int                         excl_enabled;
+    int                         excl_allow;
+} AMDIOMMUState;
+
+typedef struct AMDIOMMUEvent {
+    uint16_t    devfn;
+    uint16_t    reserved;
+    uint16_t    domid;
+    uint16_t    info;
+    uint64_t    addr;
+} __attribute__((packed)) AMDIOMMUEvent;
+
+static void amd_iommu_completion_wait(AMDIOMMUState *s,
+                                      uint8_t *cmd)
+{
+    uint64_t addr;
+
+    if (cmd[0] & 1) {
+        addr = le64_to_cpu(*(uint64_t *) cmd) & 0xFFFFFFFFFFFF8;
+        cpu_physical_memory_write(addr, cmd + 8, 8);
+    }
+
+    if (cmd[0] & 2) {
+        s->mmio_buf[MMIO_STATUS] |= MMIO_STATUS_COMWAIT_INTR;
+    }
+}
+
+static void amd_iommu_invalidate_iotlb(AMDIOMMUState *s,
+                                       uint8_t *cmd)
+{
+    PCIDevice *dev;
+    PCIBus *bus = s->dev.bus;
+    int bus_num = pci_bus_num(bus);
+    int devfn = *(uint16_t *) cmd;
+
+    dev = pci_find_device(bus, bus_num, devfn);
+    if (dev) {
+        dma_invalidate_memory_range(&dev->dma, 0, -1);
+    }
+}
+
+static void amd_iommu_cmdbuf_exec(AMDIOMMUState *s)
+{
+    uint8_t cmd[16];
+    int type;
+
+    cpu_physical_memory_read(s->cmdbuf + s->cmdbuf_head, cmd, 16);
+    type = cmd[CMDBUF_ID_BYTE] >> CMDBUF_ID_RSHIFT;
+    switch (type) {
+    case CMD_COMPLETION_WAIT:
+        amd_iommu_completion_wait(s, cmd);
+        break;
+    case CMD_INVAL_DEVTAB_ENTRY:
+        break;
+    case CMD_INVAL_IOMMU_PAGES:
+        break;
+    case CMD_INVAL_IOTLB_PAGES:
+        amd_iommu_invalidate_iotlb(s, cmd);
+        break;
+    case CMD_INVAL_INTR_TABLE:
+        break;
+    default:
+        break;
+    }
+}
+
+static void amd_iommu_cmdbuf_run(AMDIOMMUState *s)
+{
+    uint64_t *mmio_cmdbuf_head = (uint64_t *) s->mmio_buf + MMIO_COMMAND_HEAD;
+
+    if (!s->cmdbuf_enabled) {
+        return;
+    }
+
+    /* Check if there's work to do. */
+    while (s->cmdbuf_head != s->cmdbuf_tail) {
+        /* Wrap head pointer. */
+        if (s->cmdbuf_head >= s->cmdbuf_len * CMDBUF_ENTRY_SIZE) {
+            s->cmdbuf_head = 0;
+        }
+
+        amd_iommu_cmdbuf_exec(s);
+
+        /* Increment head pointer. */
+        s->cmdbuf_head += CMDBUF_ENTRY_SIZE;
+    }
+
+    *mmio_cmdbuf_head = cpu_to_le64(s->cmdbuf_head);
+}
+
+static uint32_t amd_iommu_mmio_buf_read(AMDIOMMUState *s,
+                                        size_t offset,
+                                        size_t size)
+{
+    ssize_t i;
+    uint32_t ret;
+
+    if (!size) {
+        return 0;
+    }
+
+    ret = s->mmio_buf[offset + size - 1];
+    for (i = size - 2; i >= 0; i--) {
+        ret <<= 8;
+        ret |= s->mmio_buf[offset + i];
+    }
+
+    return ret;
+}
+
+static void amd_iommu_mmio_buf_write(AMDIOMMUState *s,
+                                     size_t offset,
+                                     size_t size,
+                                     uint32_t val)
+{
+    size_t i;
+
+    for (i = 0; i < size; i++) {
+        s->mmio_buf[offset + i] = val & 0xFF;
+        val >>= 8;
+    }
+}
+
+static void amd_iommu_update_mmio(AMDIOMMUState *s,
+                                  target_phys_addr_t addr)
+{
+    size_t reg = addr & ~0x07;
+    uint64_t *base = (uint64_t *) &s->mmio_buf[reg];
+    uint64_t val = le64_to_cpu(*base);
+
+    switch (reg) {
+    case MMIO_CONTROL:
+        s->enabled              = !!(val & MMIO_CONTROL_IOMMUEN);
+        s->ats_enabled          = !!(val & MMIO_CONTROL_HTTUNEN);
+        s->evtlog_enabled       = s->enabled &&
+                                  !!(val & MMIO_CONTROL_EVENTLOGEN);
+        s->evtlog_intr          = !!(val & MMIO_CONTROL_EVENTINTEN);
+        s->completion_wait_intr = !!(val & MMIO_CONTROL_COMWAITINTEN);
+        s->cmdbuf_enabled       = s->enabled &&
+                                  !!(val & MMIO_CONTROL_CMDBUFEN);
+
+        /* Update status flags depending on the control register. */
+        if (s->cmdbuf_enabled) {
+            s->mmio_buf[MMIO_STATUS] |= MMIO_STATUS_CMDBUF_RUN;
+        } else {
+            s->mmio_buf[MMIO_STATUS] &= ~MMIO_STATUS_CMDBUF_RUN;
+        }
+        if (s->evtlog_enabled) {
+            s->mmio_buf[MMIO_STATUS] |= MMIO_STATUS_EVTLOG_RUN;
+        } else {
+            s->mmio_buf[MMIO_STATUS] &= ~MMIO_STATUS_EVTLOG_RUN;
+        }
+
+        amd_iommu_cmdbuf_run(s);
+        break;
+    case MMIO_DEVICE_TABLE:
+        s->devtab = (target_phys_addr_t) (val & MMIO_DEVTAB_BASE_MASK);
+        s->devtab_len = ((val & MMIO_DEVTAB_SIZE_MASK) + 1) *
+                         (MMIO_DEVTAB_SIZE_UNIT / MMIO_DEVTAB_ENTRY_SIZE);
+        break;
+    case MMIO_COMMAND_BASE:
+        s->cmdbuf = (target_phys_addr_t) (val & MMIO_CMDBUF_BASE_MASK);
+        s->cmdbuf_len = 1UL << (s->mmio_buf[MMIO_CMDBUF_SIZE_BYTE] &
+                                MMIO_CMDBUF_SIZE_MASK);
+
+        /* We must reset the head and tail pointers. */
+        s->cmdbuf_head = s->cmdbuf_tail = 0;
+        memset(s->mmio_buf + MMIO_COMMAND_HEAD, 0, 8);
+        memset(s->mmio_buf + MMIO_COMMAND_TAIL, 0, 8);
+        break;
+    case MMIO_COMMAND_HEAD:
+        s->cmdbuf_head = val & MMIO_CMDBUF_HEAD_MASK;
+        amd_iommu_cmdbuf_run(s);
+        break;
+    case MMIO_COMMAND_TAIL:
+        s->cmdbuf_tail = val & MMIO_CMDBUF_TAIL_MASK;
+        amd_iommu_cmdbuf_run(s);
+        break;
+    case MMIO_EVENT_BASE:
+        s->evtlog = (target_phys_addr_t) (val & MMIO_EVTLOG_BASE_MASK);
+        s->evtlog_len = 1UL << (s->mmio_buf[MMIO_EVTLOG_SIZE_BYTE] &
+                                MMIO_EVTLOG_SIZE_MASK);
+        break;
+    case MMIO_EVENT_HEAD:
+        s->evtlog_head = val & MMIO_EVTLOG_HEAD_MASK;
+        break;
+    case MMIO_EVENT_TAIL:
+        s->evtlog_tail = val & MMIO_EVTLOG_TAIL_MASK;
+        break;
+    case MMIO_EXCL_BASE:
+        s->excl_base = (target_phys_addr_t) (val & MMIO_EXCL_BASE_MASK);
+        s->excl_enabled = val & MMIO_EXCL_ENABLED_MASK;
+        s->excl_allow = val & MMIO_EXCL_ALLOW_MASK;
+        break;
+    case MMIO_EXCL_LIMIT:
+        s->excl_limit = (target_phys_addr_t) ((val & MMIO_EXCL_LIMIT_MASK) |
+                                              MMIO_EXCL_LIMIT_LOW);
+        break;
+    default:
+        break;
+    }
+}
+
+static uint32_t amd_iommu_mmio_readb(void *opaque, target_phys_addr_t addr)
+{
+    AMDIOMMUState *s = opaque;
+
+    return amd_iommu_mmio_buf_read(s, addr, 1);
+}
+
+static uint32_t amd_iommu_mmio_readw(void *opaque, target_phys_addr_t addr)
+{
+    AMDIOMMUState *s = opaque;
+
+    return amd_iommu_mmio_buf_read(s, addr, 2);
+}
+
+static uint32_t amd_iommu_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+    AMDIOMMUState *s = opaque;
+
+    return amd_iommu_mmio_buf_read(s, addr, 4);
+}
+
+static void amd_iommu_mmio_writeb(void *opaque,
+                                  target_phys_addr_t addr,
+                                  uint32_t val)
+{
+    AMDIOMMUState *s = opaque;
+
+    amd_iommu_mmio_buf_write(s, addr, 1, val);
+    amd_iommu_update_mmio(s, addr);
+}
+
+static void amd_iommu_mmio_writew(void *opaque,
+                                  target_phys_addr_t addr,
+                                  uint32_t val)
+{
+    AMDIOMMUState *s = opaque;
+
+    amd_iommu_mmio_buf_write(s, addr, 2, val);
+    amd_iommu_update_mmio(s, addr);
+}
+
+static void amd_iommu_mmio_writel(void *opaque,
+                                  target_phys_addr_t addr,
+                                  uint32_t val)
+{
+    AMDIOMMUState *s = opaque;
+
+    amd_iommu_mmio_buf_write(s, addr, 4, val);
+    amd_iommu_update_mmio(s, addr);
+}
+
+static CPUReadMemoryFunc * const amd_iommu_mmio_read[] = {
+    amd_iommu_mmio_readb,
+    amd_iommu_mmio_readw,
+    amd_iommu_mmio_readl,
+};
+
+static CPUWriteMemoryFunc * const amd_iommu_mmio_write[] = {
+    amd_iommu_mmio_writeb,
+    amd_iommu_mmio_writew,
+    amd_iommu_mmio_writel,
+};
+
+static void amd_iommu_enable_mmio(AMDIOMMUState *s)
+{
+    target_phys_addr_t addr;
+    uint8_t *capab_wmask = s->dev.wmask + s->capab_offset;
+
+    s->mmio_index = cpu_register_io_memory(amd_iommu_mmio_read,
+                                           amd_iommu_mmio_write,
+                                           s, DEVICE_LITTLE_ENDIAN);
+    if (s->mmio_index < 0) {
+        return;
+    }
+
+    addr = le64_to_cpu(*(uint64_t *) &s->capab[CAPAB_BAR_LOW]) & CAPAB_BAR_MASK;
+    cpu_register_physical_memory(addr, MMIO_SIZE, s->mmio_index);
+
+    s->mmio_addr = addr;
+    s->mmio_enabled = 1;
+
+    /* Further changes to the capability are prohibited. */
+    memset(capab_wmask + CAPAB_BAR_LOW, 0x00, CAPAB_REG_SIZE);
+    memset(capab_wmask + CAPAB_BAR_HIGH, 0x00, CAPAB_REG_SIZE);
+}
+
+static void amd_iommu_write_capab(PCIDevice *dev,
+                                  uint32_t addr, uint32_t val, int len)
+{
+    AMDIOMMUState *s = DO_UPCAST(AMDIOMMUState, dev, dev);
+
+    pci_default_write_config(dev, addr, val, len);
+
+    if (!s->mmio_enabled && s->capab[CAPAB_BAR_LOW] & 0x1) {
+        amd_iommu_enable_mmio(s);
+    }
+}
+
+static void amd_iommu_reset(DeviceState *dev)
+{
+    AMDIOMMUState *s = DO_UPCAST(AMDIOMMUState, dev.qdev, dev);
+    unsigned char *capab = s->capab;
+    uint8_t *capab_wmask = s->dev.wmask + s->capab_offset;
+
+    s->enabled      = 0;
+    s->ats_enabled  = 0;
+    s->mmio_enabled = 0;
+
+    capab[CAPAB_REV_TYPE]  = CAPAB_REV_TYPE;
+    capab[CAPAB_FLAGS]     = CAPAB_FLAGS;
+    capab[CAPAB_BAR_LOW]   = 0;
+    capab[CAPAB_BAR_HIGH]  = 0;
+    capab[CAPAB_RANGE]     = 0;
+    *((uint32_t *) &capab[CAPAB_MISC]) = cpu_to_le32(CAPAB_INIT_MISC);
+
+    /* Changes to the capability are allowed after system reset. */
+    memset(capab_wmask + CAPAB_BAR_LOW, 0xFF, CAPAB_REG_SIZE);
+    memset(capab_wmask + CAPAB_BAR_HIGH, 0xFF, CAPAB_REG_SIZE);
+
+    memset(s->mmio_buf, 0, MMIO_SIZE);
+    s->mmio_buf[MMIO_CMDBUF_SIZE_BYTE] = MMIO_CMDBUF_DEFAULT_SIZE;
+    s->mmio_buf[MMIO_EVTLOG_SIZE_BYTE] = MMIO_EVTLOG_DEFAULT_SIZE;
+}
+
+static void amd_iommu_log_event(AMDIOMMUState *s, AMDIOMMUEvent *evt)
+{
+    if (!s->evtlog_enabled ||
+        (s->mmio_buf[MMIO_STATUS] | MMIO_STATUS_EVTLOG_OF)) {
+        return;
+    }
+
+    if (s->evtlog_tail >= s->evtlog_len) {
+        s->mmio_buf[MMIO_STATUS] |= MMIO_STATUS_EVTLOG_OF;
+    }
+
+    cpu_physical_memory_write(s->evtlog + s->evtlog_tail,
+                              (uint8_t *) evt, EVENT_LEN);
+
+    s->evtlog_tail += EVENT_LEN;
+    s->mmio_buf[MMIO_STATUS] |= MMIO_STATUS_EVTLOG_INTR;
+}
+
+static void amd_iommu_page_fault(AMDIOMMUState *s,
+                                 int devfn,
+                                 unsigned domid,
+                                 target_phys_addr_t addr,
+                                 int present,
+                                 int is_write)
+{
+    AMDIOMMUEvent evt;
+    unsigned info;
+    uint16_t status;
+
+    evt.devfn = cpu_to_le16(devfn);
+    evt.reserved = 0;
+    evt.domid = cpu_to_le16(domid);
+    evt.addr = cpu_to_le64(addr);
+
+    info = EVENT_IOPF;
+    if (present) {
+        info |= EVENT_IOPF_PR;
+    }
+    if (is_write) {
+        info |= EVENT_IOPF_RW;
+    }
+    evt.info = cpu_to_le16(info);
+
+    amd_iommu_log_event(s, &evt);
+
+    /*
+     * Signal a target abort.
+     *
+     * FIXME: There should be a way to turn this off when acked.
+     */
+    status = pci_get_word(s->dev.config + PCI_STATUS);
+    pci_set_word(s->dev.config + PCI_STATUS,
+                 status | PCI_STATUS_SIG_TARGET_ABORT);
+}
+
+static inline uint64_t amd_iommu_get_perms(uint64_t entry)
+{
+    return (entry & (DEV_PERM_READ | DEV_PERM_WRITE)) >> DEV_PERM_SHIFT;
+}
+
+static inline AMDIOMMUState *amd_iommu_dma_to_state(DMADevice *dev)
+{
+    PCIDevice *pci_dev = DO_UPCAST(PCIDevice, qdev, dev->mmu->iommu);
+
+    return DO_UPCAST(AMDIOMMUState, dev, pci_dev);
+}
+
+static int amd_iommu_translate(DMADevice *dev,
+                               dma_addr_t addr,
+                               dma_addr_t *paddr,
+                               dma_addr_t *len,
+                               int is_write)
+{
+    PCIDevice *pci_dev = container_of(dev, PCIDevice, dma);
+    PCIDevice *iommu_dev = DO_UPCAST(PCIDevice, qdev, dev->mmu->iommu);
+    AMDIOMMUState *s = DO_UPCAST(AMDIOMMUState, dev, iommu_dev);
+    int devfn, present;
+    target_phys_addr_t entry_addr, pte_addr;
+    uint64_t entry[4], pte, page_offset, pte_perms;
+    unsigned level, domid;
+    unsigned perms;
+
+    if (!s->enabled) {
+        goto no_translation;
+    }
+
+    /*
+     * It's okay to check for either read or write permissions
+     * even for memory maps, since we don't support R/W maps.
+     */
+    perms = is_write ? IOMMU_PERM_WRITE : IOMMU_PERM_READ;
+
+    /* Get device table entry. */
+    devfn = pci_dev->devfn;
+    entry_addr = s->devtab + devfn * DEVTAB_ENTRY_SIZE;
+    cpu_physical_memory_read(entry_addr, (uint8_t *) entry, 32);
+
+    pte = entry[0];
+    if (!(pte & DEV_VALID) || !(pte & DEV_TRANSLATION_VALID)) {
+        goto no_translation;
+    }
+    domid = entry[1] & DEV_DOMAIN_ID_MASK;
+    level = (pte >> DEV_MODE_RSHIFT) & DEV_MODE_MASK;
+    while (level > 0) {
+        /*
+         * Check permissions: the bitwise
+         * implication perms -> entry_perms must be true.
+         */
+        pte_perms = amd_iommu_get_perms(pte);
+        present = pte & 1;
+        if (!present || perms != (perms & pte_perms)) {
+            amd_iommu_page_fault(s, devfn, domid, addr,
+                                 present, !!(perms & IOMMU_PERM_WRITE));
+            return -EPERM;
+        }
+
+        /* Go to the next lower level. */
+        pte_addr = pte & DEV_PT_ROOT_MASK;
+        pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3;
+        pte = ldq_phys(pte_addr);
+        level = (pte >> DEV_MODE_RSHIFT) & DEV_MODE_MASK;
+    }
+    page_offset = addr & 4095;
+    *paddr = (pte & DEV_PT_ROOT_MASK) + page_offset;
+    *len = 4096 - page_offset;
+
+    return 0;
+
+no_translation:
+    *paddr = addr;
+    *len = -1;
+    return 0;
+}
+
+static int amd_iommu_pci_initfn(PCIDevice *dev)
+{
+    AMDIOMMUState *s = DO_UPCAST(AMDIOMMUState, dev, dev);
+
+    pci_config_set_vendor_id(s->dev.config, PCI_VENDOR_ID_AMD);
+    pci_config_set_device_id(s->dev.config, PCI_DEVICE_ID_AMD_IOMMU);
+    pci_config_set_class(s->dev.config, PCI_CLASS_SYSTEM_IOMMU);
+
+    /* Secure Device capability */
+    s->capab_offset = pci_add_capability(&s->dev,
+                                         PCI_CAP_ID_SEC, 0, CAPAB_SIZE);
+    s->capab = s->dev.config + s->capab_offset;
+    dev->config_write = amd_iommu_write_capab;
+
+    /* Allocate backing space for the MMIO registers. */
+    s->mmio_buf = qemu_malloc(MMIO_SIZE);
+
+    pci_register_iommu(dev, amd_iommu_translate);
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_amd_iommu = {
+    .name                       = "amd-iommu",
+    .version_id                 = 1,
+    .minimum_version_id         = 1,
+    .minimum_version_id_old     = 1,
+    .fields                     = (VMStateField []) {
+        VMSTATE_PCI_DEVICE(dev, AMDIOMMUState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static PCIDeviceInfo amd_iommu_pci_info = {
+    .qdev.name    = "amd-iommu",
+    .qdev.desc    = "AMD IOMMU",
+    .qdev.size    = sizeof(AMDIOMMUState),
+    .qdev.reset   = amd_iommu_reset,
+    .qdev.vmsd    = &vmstate_amd_iommu,
+    .init         = amd_iommu_pci_initfn,
+};
+
+static void amd_iommu_register(void)
+{
+    pci_qdev_register(&amd_iommu_pci_info);
+}
+
+device_init(amd_iommu_register);
diff --git a/hw/pc.c b/hw/pc.c
index 6939c04..b93949c 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1161,6 +1161,8 @@  void pc_pci_device_init(PCIBus *pci_bus)
     int max_bus;
     int bus;
 
+    pci_create_simple(pci_bus, -1, "amd-iommu");
+
     max_bus = drive_get_max_bus(IF_SCSI);
     for (bus = 0; bus <= max_bus; bus++) {
         pci_create_simple(pci_bus, -1, "lsi53c895a");