Patchwork [08/13] ahci: add ahci emulation

login
register
mail settings
Submitter Alexander Graf
Date Nov. 26, 2010, 7:17 p.m.
Message ID <1290799053-27282-9-git-send-email-agraf@suse.de>
Download mbox | patch
Permalink /patch/73233/
State New
Headers show

Comments

Alexander Graf - Nov. 26, 2010, 7:17 p.m.
This patch adds an emulation layer for an ICH-7M AHCI controller. For now
this controller does not do IDE legacy emulation. It is a pure AHCI controller.

Signed-off-by: Alexander Graf <agraf@suse.de>

---

v1 -> v2:

  - rename IDEExtender to IDEBusOps and make a pointer (kraxel)
  - make dma hooks explicit by putting them into ops struct (stefanha)
  - use qdev buses (kraxel)
  - minor cleanups
  - dprintf overhaul
  - add reset function

v2 -> v3:

  - add msi support (kraxel)
  - use MIN macro (kraxel)
  - add msi support (kraxel)
  - fix ncq with multiple ports
  - zap qdev properties (kraxel)
  - redesign legacy IF_SATA hooks (kraxel)
  - don't build ahci as part of target
  - move to ide/ (kwolf)

v3 -> v4:

  - prepare for endianness safety
  - add lspci dump (herbszt)
  - use ich7 instead of ich7m (herbszt)
  - fix lst+fis mapping (kraxel)
  - coding style (blue swirl)
  - explicit mmio setters/getters (blue swirl)

v4 -> v5:

  - s/H2dNcqFis/NCQFrame/g (blue swirl)
  - redo -drive magic (blue swirl)
  - bump BAR to 4k
  - ahci.c: rename to ICH7_AHCI_RAID (herbszt)

v5 -> v6:
  - PCI config space fixes (isaku)
  - remove CONFIG_AHCI from default configs
---
 Makefile.objs |    1 +
 hw/ide/ahci.c | 1380 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1381 insertions(+), 0 deletions(-)
 create mode 100644 hw/ide/ahci.c
Stefan Hajnoczi - Dec. 1, 2010, 2:31 p.m.
On Fri, Nov 26, 2010 at 7:17 PM, Alexander Graf <agraf@suse.de> wrote:
> +static void map_page(uint8_t **ptr, uint64_t addr, uint32_t wanted)
> +{
> +    target_phys_addr_t len = wanted;
> +
> +    if (*ptr) {
> +        cpu_physical_memory_unmap(*ptr, 1, len, len);
> +    }
> +
> +    *ptr = cpu_physical_memory_map(addr, &len, 1);
> +    if (len < wanted) {
> +        *ptr = NULL;

We need to unmap the subset of the requested range.

> +static void ncq_cb(void *opaque, int ret)
> +{
> +    NCQTransferState *ncq_tfs = (NCQTransferState *)opaque;
> +    IDEState *ide_state;
> +
> +    if (ret < 0) {
> +        /* error */

Error handling missing.

> +static int handle_cmd(AHCIState *s, int port, int slot)
> +{
> +    IDEState *ide_state;
> +
> +    int sglist_alloc_hint;
> +    QEMUSGList sglist;
> +    int atapi_packet_len = 0;
> +    AHCIPortRegs *pr;
> +    uint32_t opts;
> +    uint64_t tbl_addr;
> +    AHCICmdHdr *cmd;
> +    uint8_t *cmd_fis;
> +
> +    target_phys_addr_t cmd_len;
> +    int i;
> +
> +    pr = &s->dev[port].port_regs;
> +    cmd = (AHCICmdHdr *)&s->dev[port].lst[slot * 32];

This isn't much easier to read but gets rid of the sizeof(AHCICmdHdr) scaling:
cmd = &((AHCICmdHdr *)s->dev[port].lst)[slot];

> +static int pci_ahci_init(PCIDevice *dev)
> +{
> +    struct AHCIPCIState *d;
> +    d = DO_UPCAST(struct AHCIPCIState, card, dev);
> +
> +    pci_config_set_vendor_id(d->card.config, PCI_VENDOR_ID_INTEL);
> +    pci_config_set_device_id(d->card.config,
> +                             PCI_DEVICE_ID_INTEL_ICH7_AHCI_RAID);
> +    pci_set_word(d->card.config + PCI_COMMAND, PCI_COMMAND_IO |
> +                                               PCI_COMMAND_MEMORY |
> +                                               PCI_COMMAND_MASTER);

We are enabling IO and memory space decode, as well as bus master
here.  According to the AHCI spec 1.3, PCI_COMMAND_MEMORY and
PCI_COMMAND_MASTER should be 0 on reset ("2.1.2 Offset 04h: CMD -
Command").  I thought the guest's firmware or operating system would
set these bits when initializing PCI devices?

> +static int pci_ahci_uninit(PCIDevice *dev)
> +{
> +    struct AHCIPCIState *d;
> +    d = DO_UPCAST(struct AHCIPCIState, card, dev);
> +
> +    if (msi_enabled(&d->card)) {
> +        msi_uninit(dev);
> +    }

It looks like d and &d->card are not needed, we already have dev.

Stefan

Patch

diff --git a/Makefile.objs b/Makefile.objs
index 23b17ce..b1035c8 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -229,6 +229,7 @@  hw-obj-$(CONFIG_IDE_PIIX) += ide/piix.o
 hw-obj-$(CONFIG_IDE_CMD646) += ide/cmd646.o
 hw-obj-$(CONFIG_IDE_MACIO) += ide/macio.o
 hw-obj-$(CONFIG_IDE_VIA) += ide/via.o
+hw-obj-$(CONFIG_AHCI) += ide/ahci.o
 
 # SCSI layer
 hw-obj-y += lsi53c895a.o
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
new file mode 100644
index 0000000..9a55df2
--- /dev/null
+++ b/hw/ide/ahci.c
@@ -0,0 +1,1380 @@ 
+/*
+ * QEMU AHCI Emulation
+ *
+ * Copyright (c) 2010 qiaochong@loongson.cn
+ * Copyright (c) 2010 Roland Elek <elek.roland@gmail.com>
+ * Copyright (c) 2010 Sebastian Herbszt <herbszt@gmx.de>
+ * Copyright (c) 2010 Alexander Graf <agraf@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * lspci dump of a real device:
+ *
+ * 00:1f.2 Class 0104: 8086:27c3 (rev 01)
+ * Subsystem: 1734:1085
+ * Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR- FastB2B-
+ * Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
+ * Latency: 0
+ * Interrupt: pin B routed to IRQ 50
+ * Region 0: I/O ports at 3440 [size=8]
+ * Region 1: I/O ports at 3434 [size=4]
+ * Region 2: I/O ports at 3438 [size=8]
+ * Region 3: I/O ports at 3430 [size=4]
+ * Region 4: I/O ports at 3400 [size=32]
+ * Region 5: Memory at fc000400 (32-bit, non-prefetchable) [size=1K]
+ * Capabilities: [80] Message Signalled Interrupts: Mask- 64bit- Queue=0/0 Enable-
+ * Address: 00000000  Data: 0000
+ * Capabilities: [70] Power Management version 2
+ * Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot+,D3cold-)
+ * Status: D0 PME-Enable- DSel=0 DScale=0 PME-
+ * Capabilities: [a8] #12 [0010]
+ *
+ *
+ * 00:1f.2 RAID bus controller: Intel Corporation 82801GR/GH (ICH7 Family) Serial ATA Storage Controller RAID (rev 01)
+ * 00: 86 80 c3 27 47 00 b0 02 01 00 04 01 00 00 00 00
+ * 10: 41 34 00 00 35 34 00 00 39 34 00 00 31 34 00 00
+ * 20: 01 34 00 00 00 04 00 fc 00 00 00 00 34 17 85 10
+ * 30: 00 00 00 00 80 00 00 00 00 00 00 00 09 02 00 00
+ * 40: 07 a3 07 a3 00 00 00 00 0f 00 11 11 00 00 00 00
+ * 50: 00 00 00 00 ff f0 00 00 00 00 00 00 00 00 00 00
+ * 60: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ * 70: 01 a8 02 40 00 00 00 00 00 00 00 00 00 00 00 00
+ * 80: 05 70 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ * 90: 80 00 3f 00 80 01 00 00 00 00 00 00 00 00 00 00
+ * a0: 00 00 00 00 00 00 00 00 12 00 10 00 48 00 00 00
+ * b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ * c0: 00 00 00 00 05 00 00 00 00 00 00 00 00 00 00 00
+ * d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ * e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ * f0: 00 00 00 00 00 00 00 00 86 0f 01 00 00 00 00 00
+ *
+ */
+
+#include <hw/hw.h>
+#include <hw/msi.h>
+#include <hw/pc.h>
+#include <hw/pci.h>
+
+#include "monitor.h"
+#include "dma.h"
+#include "cpu-common.h"
+#include "blockdev.h"
+#include "internal.h"
+#include <hw/ide/pci.h>
+
+/* #define DEBUG_AHCI */
+
+#ifdef DEBUG_AHCI
+#define DPRINTF(port, fmt, ...) \
+do { fprintf(stderr, "ahci: %s: [%d] ", __FUNCTION__, port); \
+     fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(port, fmt, ...) do {} while(0)
+#endif
+
+#define AHCI_PCI_BAR              5
+#define AHCI_MAX_PORTS            32
+#define AHCI_MAX_SG               168 /* hardware max is 64K */
+#define AHCI_DMA_BOUNDARY         0xffffffff
+#define AHCI_USE_CLUSTERING       0
+#define AHCI_MAX_CMDS             32
+#define AHCI_CMD_SZ               32
+#define AHCI_CMD_SLOT_SZ          (AHCI_MAX_CMDS * AHCI_CMD_SZ)
+#define AHCI_RX_FIS_SZ            256
+#define AHCI_CMD_TBL_CDB          0x40
+#define AHCI_CMD_TBL_HDR_SZ       0x80
+#define AHCI_CMD_TBL_SZ           (AHCI_CMD_TBL_HDR_SZ + (AHCI_MAX_SG * 16))
+#define AHCI_CMD_TBL_AR_SZ        (AHCI_CMD_TBL_SZ * AHCI_MAX_CMDS)
+#define AHCI_PORT_PRIV_DMA_SZ     (AHCI_CMD_SLOT_SZ + AHCI_CMD_TBL_AR_SZ + \
+                                   AHCI_RX_FIS_SZ)
+
+#define AHCI_IRQ_ON_SG            (1 << 31)
+#define AHCI_CMD_ATAPI            (1 << 5)
+#define AHCI_CMD_WRITE            (1 << 6)
+#define AHCI_CMD_PREFETCH         (1 << 7)
+#define AHCI_CMD_RESET            (1 << 8)
+#define AHCI_CMD_CLR_BUSY         (1 << 10)
+
+#define RX_FIS_D2H_REG            0x40 /* offset of D2H Register FIS data */
+#define RX_FIS_SDB                0x58 /* offset of SDB FIS data */
+#define RX_FIS_UNK                0x60 /* offset of Unknown FIS data */
+
+/* global controller registers */
+#define HOST_CAP                  0x00 /* host capabilities */
+#define HOST_CTL                  0x04 /* global host control */
+#define HOST_IRQ_STAT             0x08 /* interrupt status */
+#define HOST_PORTS_IMPL           0x0c /* bitmap of implemented ports */
+#define HOST_VERSION              0x10 /* AHCI spec. version compliancy */
+
+/* HOST_CTL bits */
+#define HOST_CTL_RESET            (1 << 0)  /* reset controller; self-clear */
+#define HOST_CTL_IRQ_EN           (1 << 1)  /* global IRQ enable */
+#define HOST_CTL_AHCI_EN          (1 << 31) /* AHCI enabled */
+
+/* HOST_CAP bits */
+#define HOST_CAP_SSC              (1 << 14) /* Slumber capable */
+#define HOST_CAP_AHCI             (1 << 18) /* AHCI only */
+#define HOST_CAP_CLO              (1 << 24) /* Command List Override support */
+#define HOST_CAP_SSS              (1 << 27) /* Staggered Spin-up */
+#define HOST_CAP_NCQ              (1 << 30) /* Native Command Queueing */
+#define HOST_CAP_64               (1 << 31) /* PCI DAC (64-bit DMA) support */
+
+/* registers for each SATA port */
+#define PORT_LST_ADDR             0x00 /* command list DMA addr */
+#define PORT_LST_ADDR_HI          0x04 /* command list DMA addr hi */
+#define PORT_FIS_ADDR             0x08 /* FIS rx buf addr */
+#define PORT_FIS_ADDR_HI          0x0c /* FIS rx buf addr hi */
+#define PORT_IRQ_STAT             0x10 /* interrupt status */
+#define PORT_IRQ_MASK             0x14 /* interrupt enable/disable mask */
+#define PORT_CMD                  0x18 /* port command */
+#define PORT_TFDATA               0x20 /* taskfile data */
+#define PORT_SIG                  0x24 /* device TF signature */
+#define PORT_SCR_STAT             0x28 /* SATA phy register: SStatus */
+#define PORT_SCR_CTL              0x2c /* SATA phy register: SControl */
+#define PORT_SCR_ERR              0x30 /* SATA phy register: SError */
+#define PORT_SCR_ACT              0x34 /* SATA phy register: SActive */
+#define PORT_CMD_ISSUE            0x38 /* command issue */
+#define PORT_RESERVED             0x3c /* reserved */
+
+/* PORT_IRQ_{STAT,MASK} bits */
+#define PORT_IRQ_COLD_PRES        (1 << 31) /* cold presence detect */
+#define PORT_IRQ_TF_ERR           (1 << 30) /* task file error */
+#define PORT_IRQ_HBUS_ERR         (1 << 29) /* host bus fatal error */
+#define PORT_IRQ_HBUS_DATA_ERR    (1 << 28) /* host bus data error */
+#define PORT_IRQ_IF_ERR           (1 << 27) /* interface fatal error */
+#define PORT_IRQ_IF_NONFATAL      (1 << 26) /* interface non-fatal error */
+#define PORT_IRQ_OVERFLOW         (1 << 24) /* xfer exhausted available S/G */
+#define PORT_IRQ_BAD_PMP          (1 << 23) /* incorrect port multiplier */
+
+#define PORT_IRQ_PHYRDY           (1 << 22) /* PhyRdy changed */
+#define PORT_IRQ_DEV_ILCK         (1 << 7) /* device interlock */
+#define PORT_IRQ_CONNECT          (1 << 6) /* port connect change status */
+#define PORT_IRQ_SG_DONE          (1 << 5) /* descriptor processed */
+#define PORT_IRQ_UNK_FIS          (1 << 4) /* unknown FIS rx'd */
+#define PORT_IRQ_SDB_FIS          (1 << 3) /* Set Device Bits FIS rx'd */
+#define PORT_IRQ_DMAS_FIS         (1 << 2) /* DMA Setup FIS rx'd */
+#define PORT_IRQ_PIOS_FIS         (1 << 1) /* PIO Setup FIS rx'd */
+#define PORT_IRQ_D2H_REG_FIS      (1 << 0) /* D2H Register FIS rx'd */
+
+#define PORT_IRQ_FREEZE           (PORT_IRQ_HBUS_ERR | PORT_IRQ_IF_ERR |   \
+                                   PORT_IRQ_CONNECT | PORT_IRQ_PHYRDY |    \
+                                   PORT_IRQ_UNK_FIS)
+#define PORT_IRQ_ERROR            (PORT_IRQ_FREEZE | PORT_IRQ_TF_ERR |     \
+                                   PORT_IRQ_HBUS_DATA_ERR)
+#define DEF_PORT_IRQ              (PORT_IRQ_ERROR | PORT_IRQ_SG_DONE |     \
+                                   PORT_IRQ_SDB_FIS | PORT_IRQ_DMAS_FIS |  \
+                                   PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS)
+
+/* PORT_CMD bits */
+#define PORT_CMD_ATAPI            (1 << 24) /* Device is ATAPI */
+#define PORT_CMD_LIST_ON          (1 << 15) /* cmd list DMA engine running */
+#define PORT_CMD_FIS_ON           (1 << 14) /* FIS DMA engine running */
+#define PORT_CMD_FIS_RX           (1 << 4) /* Enable FIS receive DMA engine */
+#define PORT_CMD_CLO              (1 << 3) /* Command list override */
+#define PORT_CMD_POWER_ON         (1 << 2) /* Power up device */
+#define PORT_CMD_SPIN_UP          (1 << 1) /* Spin up device */
+#define PORT_CMD_START            (1 << 0) /* Enable port DMA engine */
+
+#define PORT_CMD_ICC_MASK         (0xf << 28) /* i/f ICC state mask */
+#define PORT_CMD_ICC_ACTIVE       (0x1 << 28) /* Put i/f in active state */
+#define PORT_CMD_ICC_PARTIAL      (0x2 << 28) /* Put i/f in partial state */
+#define PORT_CMD_ICC_SLUMBER      (0x6 << 28) /* Put i/f in slumber state */
+
+#define PORT_IRQ_STAT_DHRS        (1 << 0) /* Device to Host Register FIS */
+#define PORT_IRQ_STAT_PSS         (1 << 1) /* PIO Setup FIS */
+#define PORT_IRQ_STAT_DSS         (1 << 2) /* DMA Setup FIS */
+#define PORT_IRQ_STAT_SDBS        (1 << 3) /* Set Device Bits */
+#define PORT_IRQ_STAT_UFS         (1 << 4) /* Unknown FIS */
+#define PORT_IRQ_STAT_DPS         (1 << 5) /* Descriptor Processed */
+#define PORT_IRQ_STAT_PCS         (1 << 6) /* Port Connect Change Status */
+#define PORT_IRQ_STAT_DMPS        (1 << 7) /* Device Mechanical Presence
+                                              Status */
+#define PORT_IRQ_STAT_PRCS        (1 << 22) /* File Ready Status */
+#define PORT_IRQ_STAT_IPMS        (1 << 23) /* Incorrect Port Multiplier
+                                               Status */
+#define PORT_IRQ_STAT_OFS         (1 << 24) /* Overflow Status */
+#define PORT_IRQ_STAT_INFS        (1 << 26) /* Interface Non-Fatal Error
+                                               Status */
+#define PORT_IRQ_STAT_IFS         (1 << 27) /* Interface Fatal Error */
+#define PORT_IRQ_STAT_HBDS        (1 << 28) /* Host Bus Data Error Status */
+#define PORT_IRQ_STAT_HBFS        (1 << 29) /* Host Bus Fatal Error Status */
+#define PORT_IRQ_STAT_TFES        (1 << 30) /* Task File Error Status */
+#define PORT_IRQ_STAT_CPDS        (1 << 31) /* Code Port Detect Status */
+
+/* ap->flags bits */
+#define AHCI_FLAG_NO_NCQ                  (1 << 24)
+#define AHCI_FLAG_IGN_IRQ_IF_ERR          (1 << 25) /* ignore IRQ_IF_ERR */
+#define AHCI_FLAG_HONOR_PI                (1 << 26) /* honor PORTS_IMPL */
+#define AHCI_FLAG_IGN_SERR_INTERNAL       (1 << 27) /* ignore SERR_INTERNAL */
+#define AHCI_FLAG_32BIT_ONLY              (1 << 28) /* force 32bit */
+
+#define ATA_SRST                          (1 << 2)  /* software reset */
+
+#define STATE_RUN                         0
+#define STATE_RESET                       1
+
+#define SATA_SCR_SSTATUS_DET_NODEV        0x0
+#define SATA_SCR_SSTATUS_DET_DEV_PRESENT_PHY_UP 0x3
+
+#define SATA_SCR_SSTATUS_SPD_NODEV        0x00
+#define SATA_SCR_SSTATUS_SPD_GEN1         0x10
+
+#define SATA_SCR_SSTATUS_IPM_NODEV        0x000
+#define SATA_SCR_SSTATUS_IPM_ACTIVE       0X100
+
+#define AHCI_SCR_SCTL_DET                 0xf
+
+#define SATA_FIS_TYPE_REGISTER_H2D        0x27
+#define SATA_FIS_REG_H2D_UPDATE_COMMAND_REGISTER 0x80
+
+#define AHCI_CMD_HDR_CMD_FIS_LEN           0x1f
+#define AHCI_CMD_HDR_PRDT_LEN              16
+
+#define SATA_SIGNATURE_CDROM               0xeb140000
+#define SATA_SIGNATURE_DISK                0x00000101
+
+#define AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR 0x20
+                                            /* Shouldn't this be 0x2c? */
+
+#define SATA_PORTS                         4
+
+#define AHCI_PORT_REGS_START_ADDR          0x100
+#define AHCI_PORT_REGS_END_ADDR (AHCI_PORT_REGS_START_ADDR + SATA_PORTS * 0x80)
+#define AHCI_PORT_ADDR_OFFSET_MASK         0x7f
+
+#define AHCI_NUM_COMMAND_SLOTS             31
+#define AHCI_SUPPORTED_SPEED               20
+#define AHCI_SUPPORTED_SPEED_GEN1          1
+#define AHCI_VERSION_1_0                   0x10000
+
+#define AHCI_PROGMODE_MAJOR_REV_1          1
+
+#define AHCI_COMMAND_TABLE_ACMD            0x40
+
+#define IDE_FEATURE_DMA                    1
+
+#define READ_FPDMA_QUEUED                  0x60
+#define WRITE_FPDMA_QUEUED                 0x61
+
+#define RES_FIS_DSFIS                      0x00
+#define RES_FIS_PSFIS                      0x20
+#define RES_FIS_RFIS                       0x40
+#define RES_FIS_SDBFIS                     0x58
+#define RES_FIS_UFIS                       0x60
+
+typedef struct AHCIControlRegs {
+    uint32_t    cap;
+    uint32_t    ghc;
+    uint32_t    irqstatus;
+    uint32_t    impl;
+    uint32_t    version;
+} AHCIControlRegs;
+
+typedef struct AHCIPortRegs {
+    uint32_t    lst_addr;
+    uint32_t    lst_addr_hi;
+    uint32_t    fis_addr;
+    uint32_t    fis_addr_hi;
+    uint32_t    irq_stat;
+    uint32_t    irq_mask;
+    uint32_t    cmd;
+    uint32_t    unused0;
+    uint32_t    tfdata;
+    uint32_t    sig;
+    uint32_t    scr_stat;
+    uint32_t    scr_ctl;
+    uint32_t    scr_err;
+    uint32_t    scr_act;
+    uint32_t    cmd_issue;
+    uint32_t    reserved;
+} AHCIPortRegs;
+
+typedef struct AHCICmdHdr {
+    uint32_t    opts;
+    uint32_t    status;
+    uint64_t    tbl_addr;
+    uint32_t    reserved[4];
+} __attribute__ ((packed)) AHCICmdHdr;
+
+typedef struct AHCI_SG {
+    uint64_t    addr;
+    uint32_t    reserved;
+    uint32_t    flags_size;
+} __attribute__ ((packed)) AHCI_SG;
+
+typedef struct AHCIDevice AHCIDevice;
+
+typedef struct NCQTransferState {
+    AHCIDevice *drive;
+    QEMUSGList sglist;
+    int is_read;
+    uint16_t sector_count;
+    uint64_t lba;
+    uint8_t tag;
+    int slot;
+    int used;
+} NCQTransferState;
+
+struct AHCIDevice {
+    IDEBus port;
+    BMDMAState bmdma;
+    int port_no;
+    uint32_t port_state;
+    uint32_t finished;
+    AHCIPortRegs port_regs;
+    struct AHCIState *hba;
+    uint8_t *lst;
+    uint8_t *res_fis;
+    uint8_t *cmd_fis;
+    int cmd_fis_len;
+    AHCICmdHdr *cur_cmd;
+    NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
+};
+
+typedef struct AHCIState {
+    AHCIDevice dev[SATA_PORTS];
+    AHCIControlRegs control_regs;
+    int mem;
+    qemu_irq irq;
+} AHCIState;
+
+typedef struct AHCIPCIState {
+    PCIDevice card;
+    AHCIState ahci;
+} AHCIPCIState;
+
+typedef struct NCQFrame {
+    uint8_t fis_type;
+    uint8_t c;
+    uint8_t command;
+    uint8_t sector_count_low;
+    uint8_t lba0;
+    uint8_t lba1;
+    uint8_t lba2;
+    uint8_t fua;
+    uint8_t lba3;
+    uint8_t lba4;
+    uint8_t lba5;
+    uint8_t sector_count_high;
+    uint8_t tag;
+    uint8_t reserved5;
+    uint8_t reserved6;
+    uint8_t control;
+    uint8_t reserved7;
+    uint8_t reserved8;
+    uint8_t reserved9;
+    uint8_t reserved10;
+} __attribute__ ((packed)) NCQFrame;
+
+static void ahci_irq_set_fn(IDEBus *s);
+
+static void check_cmd(AHCIState *s, int port);
+static int handle_cmd(AHCIState *s,int port,int slot);
+static void ahci_reset_port(AHCIState *s, int port);
+static void ahci_write_fis_d2h(AHCIState *s, int port, uint8_t *cmd_fis);
+
+static uint32_t  ahci_port_read(AHCIState *s, int port, int offset)
+{
+    uint32_t val;
+    AHCIPortRegs *pr;
+    pr = &s->dev[port].port_regs;
+
+    switch (offset) {
+    case PORT_LST_ADDR:
+        val = pr->lst_addr;
+        break;
+    case PORT_LST_ADDR_HI:
+        val = pr->lst_addr_hi;
+        break;
+    case PORT_FIS_ADDR:
+        val = pr->fis_addr;
+        break;
+    case PORT_FIS_ADDR_HI:
+        val = pr->fis_addr_hi;
+        break;
+    case PORT_IRQ_STAT:
+        val = pr->irq_stat;
+        break;
+    case PORT_IRQ_MASK:
+        val = pr->irq_mask;
+        break;
+    case PORT_CMD:
+        val = pr->cmd;
+        break;
+    case PORT_TFDATA:
+        val = pr->tfdata;
+        break;
+    case PORT_SIG:
+        val = pr->sig;
+        break;
+    case PORT_SCR_STAT:
+        if (s->dev[port].port.ifs[0].bs) {
+            val = SATA_SCR_SSTATUS_DET_DEV_PRESENT_PHY_UP |
+                  SATA_SCR_SSTATUS_SPD_GEN1 | SATA_SCR_SSTATUS_IPM_ACTIVE;
+        } else {
+            val = SATA_SCR_SSTATUS_DET_NODEV;
+        }
+        break;
+    case PORT_SCR_CTL:
+        val = pr->scr_ctl;
+        break;
+    case PORT_SCR_ERR:
+        val = pr->scr_err;
+        break;
+    case PORT_SCR_ACT:
+        pr->scr_act &= ~s->dev[port].finished;
+        s->dev[port].finished = 0;
+        val = pr->scr_act;
+        break;
+    case PORT_CMD_ISSUE:
+        val = pr->cmd_issue;
+        break;
+    case PORT_RESERVED:
+    default:
+        val = 0;
+    }
+    DPRINTF(port, "offset: 0x%x val: 0x%x\n", offset, val);
+    return val;
+
+}
+
+static void ahci_irq_raise(AHCIState *s, AHCIDevice *dev)
+{
+    struct AHCIPCIState *d = container_of(s, AHCIPCIState, ahci);
+
+    if (msi_enabled(&d->card)) {
+        msi_notify(&d->card, 0);
+    } else {
+        qemu_irq_raise(s->irq);
+    }
+}
+
+static void ahci_irq_lower(AHCIState *s, AHCIDevice *dev)
+{
+    struct AHCIPCIState *d = container_of(s, AHCIPCIState, ahci);
+
+    if (!msi_enabled(&d->card)) {
+        qemu_irq_lower(s->irq);
+    }
+}
+
+static void ahci_trigger_irq(AHCIState *s, AHCIDevice *d,
+                             int irq_type)
+{
+    DPRINTF(d->port_no, "trigger irq %#x -> %x\n",
+            irq_type, d->port_regs.irq_mask & irq_type);
+
+    d->port_regs.irq_stat |= irq_type;
+
+    /* Only trigger an interrupt if unmasked */
+    if (d->port_regs.irq_mask & irq_type) {
+        s->control_regs.irqstatus |= (1 << d->port_no);
+        if (s->control_regs.ghc & HOST_CTL_IRQ_EN) {
+            ahci_irq_raise(s, d);
+        }
+    }
+}
+
+static void ahci_check_irq(AHCIState *s)
+{
+    DPRINTF(-1, "check irq %#x\n", s->control_regs.irqstatus);
+
+    if (s->control_regs.irqstatus &&
+        (s->control_regs.ghc & HOST_CTL_IRQ_EN)) {
+            ahci_irq_raise(s, NULL);
+    }
+}
+
+static void map_page(uint8_t **ptr, uint64_t addr, uint32_t wanted)
+{
+    target_phys_addr_t len = wanted;
+
+    if (*ptr) {
+        cpu_physical_memory_unmap(*ptr, 1, len, len);
+    }
+
+    *ptr = cpu_physical_memory_map(addr, &len, 1);
+    if (len < wanted) {
+        *ptr = NULL;
+    }
+}
+
+static void  ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
+{
+    AHCIPortRegs *pr = &s->dev[port].port_regs;
+
+    DPRINTF(port, "offset: 0x%x val: 0x%x\n", offset, val);
+    switch (offset) {
+        case PORT_LST_ADDR:
+            pr->lst_addr = val;
+            map_page(&s->dev[port].lst,
+                     ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024);
+            break;
+        case PORT_LST_ADDR_HI:
+            pr->lst_addr_hi = val;
+            map_page(&s->dev[port].lst,
+                     ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024);
+            break;
+        case PORT_FIS_ADDR:
+            pr->fis_addr = val;
+            map_page(&s->dev[port].res_fis,
+                     ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256);
+            break;
+        case PORT_FIS_ADDR_HI:
+            pr->fis_addr_hi = val;
+            map_page(&s->dev[port].res_fis,
+                     ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256);
+            break;
+        case PORT_IRQ_STAT:
+            pr->irq_stat &= ~val;
+            break;
+        case PORT_IRQ_MASK:
+            pr->irq_mask = val & 0xfdc000ff;
+            break;
+        case PORT_CMD:
+            pr->cmd = val & ~(PORT_CMD_LIST_ON | PORT_CMD_FIS_ON);
+
+            if (pr->cmd & PORT_CMD_START) {
+                pr->cmd |= PORT_CMD_LIST_ON;
+            }
+
+            if (pr->cmd & PORT_CMD_FIS_RX) {
+                pr->cmd |= PORT_CMD_FIS_ON;
+            }
+
+            check_cmd(s, port);
+            break;
+        case PORT_TFDATA:
+            pr->tfdata = val;
+            break;
+        case PORT_SIG:
+            pr->sig = val;
+            break;
+        case PORT_SCR_STAT:
+            pr->scr_stat = val;
+            break;
+        case PORT_SCR_CTL:
+            if (((pr->scr_ctl & AHCI_SCR_SCTL_DET) == 1) &&
+                ((val & AHCI_SCR_SCTL_DET) == 0)) {
+                ahci_reset_port(s, port);
+            }
+            pr->scr_ctl = val;
+            break;
+        case PORT_SCR_ERR:
+            pr->scr_err &= ~val;
+            break;
+        case PORT_SCR_ACT:
+            /* RW1 */
+            pr->scr_act |= val;
+            break;
+        case PORT_CMD_ISSUE:
+            pr->cmd_issue |= val;
+            check_cmd(s, port);
+            break;
+        default:
+            break;
+    }
+}
+
+static uint32_t ahci_mem_readl(void *ptr, target_phys_addr_t addr)
+{
+    AHCIState *s = ptr;
+    uint32_t val = 0;
+
+    addr = addr & 0xfff;
+    if (addr < AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR) {
+        switch (addr) {
+        case HOST_CAP:
+            val = s->control_regs.cap;
+            break;
+        case HOST_CTL:
+            val = s->control_regs.ghc;
+            break;
+        case HOST_IRQ_STAT:
+            val = s->control_regs.irqstatus;
+            break;
+        case HOST_PORTS_IMPL:
+            val = s->control_regs.impl;
+            break;
+        case HOST_VERSION:
+            val = s->control_regs.version;
+            break;
+        }
+    } else if ((addr >= AHCI_PORT_REGS_START_ADDR) &&
+               (addr < AHCI_PORT_REGS_END_ADDR)) {
+        val = ahci_port_read(s, (addr - AHCI_PORT_REGS_START_ADDR) >> 7,
+                             addr & AHCI_PORT_ADDR_OFFSET_MASK);
+    }
+
+    DPRINTF(-1, "(addr 0x%08X), val 0x%08X\n", (unsigned) addr, val);
+
+    return val;
+}
+
+
+
+static void ahci_mem_writel(void *ptr, target_phys_addr_t addr, uint32_t val)
+{
+    AHCIState *s = ptr;
+    addr = addr & 0xfff;
+    int i;
+
+    DPRINTF(-1, "(addr 0x%08X), val 0x%08X\n", (unsigned) addr, val);
+
+    /* Only aligned reads are allowed on AHCI */
+    if (addr & 3) {
+        fprintf(stderr, "ahci: Mis-aligned write to addr 0x"
+                TARGET_FMT_plx "\n", addr);
+        return;
+    }
+
+    if (addr < AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR) {
+        switch (addr) {
+            case HOST_CAP: /* R/WO, RO */
+                /* FIXME handle R/WO */
+                break;
+            case HOST_CTL: /* R/W */
+                if (val & HOST_CTL_RESET) {
+                    DPRINTF(-1, "HBA Reset\n");
+                    /* FIXME reset? */
+                } else {
+                    s->control_regs.ghc = (val & 0x3) | HOST_CTL_AHCI_EN;
+                    ahci_check_irq(s);
+                }
+                break;
+            case HOST_IRQ_STAT: /* R/WC, RO */
+                s->control_regs.irqstatus &= ~val;
+                for (i = 0; i < SATA_PORTS; i++) {
+                    if (s->dev[i].port_regs.irq_stat) {
+                        s->control_regs.irqstatus |= (1 << i);
+                        ahci_irq_lower(s, &s->dev[i]);
+                        ahci_irq_raise(s, &s->dev[i]);
+                    }
+                }
+                if (!s->control_regs.irqstatus) {
+                    ahci_irq_lower(s, NULL);
+                }
+                break;
+            case HOST_PORTS_IMPL: /* R/WO, RO */
+                /* FIXME handle R/WO */
+                break;
+            case HOST_VERSION: /* RO */
+                /* FIXME report write? */
+                break;
+            default:
+                DPRINTF(-1, "write to unknown register 0x%x\n", (unsigned)addr);
+        }
+    } else if ((addr >= AHCI_PORT_REGS_START_ADDR) &&
+               (addr < AHCI_PORT_REGS_END_ADDR)) {
+        ahci_port_write(s, (addr - AHCI_PORT_REGS_START_ADDR) >> 7,
+                        addr & AHCI_PORT_ADDR_OFFSET_MASK, val);
+    }
+
+}
+
+static CPUReadMemoryFunc * const ahci_readfn[3]={
+    ahci_mem_readl,
+    ahci_mem_readl,
+    ahci_mem_readl
+};
+
+static CPUWriteMemoryFunc * const ahci_writefn[3]={
+    ahci_mem_writel,
+    ahci_mem_writel,
+    ahci_mem_writel
+};
+
+static void ahci_reg_init(AHCIState *s)
+{
+    int i;
+
+    s->control_regs.cap = (SATA_PORTS - 1) |
+                          (AHCI_NUM_COMMAND_SLOTS << 8) |
+                          (AHCI_SUPPORTED_SPEED_GEN1 << AHCI_SUPPORTED_SPEED) |
+                          HOST_CAP_NCQ | HOST_CAP_AHCI;
+
+    s->control_regs.impl = (1 << SATA_PORTS) - 1;
+
+    s->control_regs.version = AHCI_VERSION_1_0;
+
+    for (i = 0; i < SATA_PORTS; i++) {
+        s->dev[i].port_state = STATE_RUN;
+    }
+}
+
+static uint32_t write_to_sglist(uint8_t *buffer, uint32_t len,
+                                QEMUSGList *sglist)
+{
+    uint32_t i = 0;
+    uint32_t total = 0, once;
+    ScatterGatherEntry *cur_prd;
+    uint32_t sgcount;
+
+    cur_prd = sglist->sg;
+    sgcount = sglist->nsg;
+    for (i = 0; len && sgcount; i++) {
+        once = MIN(cur_prd->len + 1, len);
+        cpu_physical_memory_write(cur_prd->base, buffer, once);
+        cur_prd++;
+        sgcount--;
+        len -= once;
+        buffer += once;
+        total += once;
+    }
+
+    return total;
+}
+
+static void check_cmd(AHCIState *s, int port)
+{
+    AHCIPortRegs *pr = &s->dev[port].port_regs;
+    int slot;
+
+    if (pr->cmd & PORT_CMD_START) {
+        for (slot = 0; (slot < 32) && pr->cmd_issue; slot++) {
+            if ((pr->cmd_issue & (1 << slot)) &&
+                !handle_cmd(s, port, slot)) {
+                pr->cmd_issue &= ~(1 << slot);
+            }
+        }
+    }
+}
+
+static void ahci_reset_port(AHCIState *s, int port)
+{
+    IDEState *ide_state;
+    uint8_t init_fis[0x20];
+    uint32_t tfd;
+
+    DPRINTF(port, "reset port\n");
+
+    ide_state = &s->dev[port].port.ifs[0];
+    if (!ide_state->bs) {
+        return;
+    }
+
+    memset(init_fis, 0, sizeof(init_fis));
+    s->dev[port].port_state = STATE_RUN;
+    if (!ide_state->bs) {
+        s->dev[port].port_regs.sig = 0;
+        tfd = (1 << 8) | SEEK_STAT | WRERR_STAT;
+    } else if (ide_state->drive_kind == IDE_CD) {
+        s->dev[port].port_regs.sig = SATA_SIGNATURE_CDROM;
+        ide_state->lcyl = 0x14;
+        ide_state->hcyl = 0xeb;
+        DPRINTF(port, "set lcyl = %d\n", ide_state->lcyl);
+        init_fis[5] = ide_state->lcyl;
+        init_fis[6] = ide_state->hcyl;
+        tfd = (1 << 8) | SEEK_STAT | WRERR_STAT | READY_STAT;
+    } else {
+        s->dev[port].port_regs.sig = SATA_SIGNATURE_DISK;
+        tfd = (1 << 8) | SEEK_STAT | WRERR_STAT;
+    }
+
+    ide_state->error = 1;
+    ide_state->status = 0;
+    init_fis[4] = 1;
+    init_fis[12] = 1;
+    ahci_write_fis_d2h(s, port, init_fis);
+
+    s->dev[port].port_regs.tfdata = tfd;
+}
+
+static void debug_print_fis(uint8_t *fis, int cmd_len)
+{
+#ifdef DEBUG_AHCI
+    int i;
+
+    fprintf(stderr, "fis:");
+    for (i = 0; i < cmd_len; i++) {
+        if ((i & 0xf) == 0) {
+            fprintf(stderr, "\n%02x:",i);
+        }
+        fprintf(stderr, "%02x ",fis[i]);
+    }
+    fprintf(stderr, "\n");
+#endif
+}
+
+static void ahci_write_fis_sdb(AHCIState *s, int port, uint32_t finished)
+{
+    AHCIPortRegs *pr = &s->dev[port].port_regs;
+    IDEState *ide_state;
+    uint8_t *sdb_fis;
+
+    if (!s->dev[port].res_fis ||
+        !(pr->cmd & PORT_CMD_FIS_RX)) {
+        return;
+    }
+
+    sdb_fis = &s->dev[port].res_fis[RES_FIS_SDBFIS];
+    ide_state = &s->dev[port].port.ifs[0];
+
+    pr->tfdata = (uint16_t)ide_state->error << 8 | ide_state->status;
+
+    /* clear memory */
+    *(uint32_t*)sdb_fis = 0;
+
+    /* write values */
+    sdb_fis[0] = ide_state->error;
+    sdb_fis[2] = ide_state->status & 0x77;
+    s->dev[port].finished |= finished;
+    *(uint32_t*)(sdb_fis + 4) = cpu_to_le32(s->dev[port].finished);
+
+    ahci_trigger_irq(s, &s->dev[port], PORT_IRQ_STAT_SDBS);
+}
+
+static void ahci_write_fis_d2h(AHCIState *s, int port, uint8_t *cmd_fis)
+{
+    AHCIPortRegs *pr = &s->dev[port].port_regs;
+    uint8_t *d2h_fis;
+    int i;
+
+    if (!s->dev[port].res_fis ||
+        !(pr->cmd & PORT_CMD_FIS_RX)) {
+        return;
+    }
+
+    d2h_fis = &s->dev[port].res_fis[RES_FIS_RFIS];
+
+    d2h_fis[0] = 0x34;
+    d2h_fis[1] = (s->control_regs.irqstatus ? (1 << 6) : 0);
+    d2h_fis[2] = s->dev[port].port.ifs[0].status;
+    d2h_fis[3] = s->dev[port].port.ifs[0].error;
+
+    d2h_fis[4] = cmd_fis[4];
+    d2h_fis[5] = cmd_fis[5];
+    d2h_fis[6] = cmd_fis[6];
+    d2h_fis[7] = cmd_fis[7];
+    d2h_fis[8] = cmd_fis[8];
+    d2h_fis[9] = cmd_fis[9];
+    d2h_fis[10] = cmd_fis[10];
+    d2h_fis[11] = cmd_fis[11];
+    d2h_fis[12] = cmd_fis[12];
+    d2h_fis[13] = cmd_fis[13];
+    for (i = 14; i < 0x20; i++) {
+        d2h_fis[i] = 0;
+    }
+
+    pr->tfdata = (uint16_t)d2h_fis[3] << 8 | d2h_fis[2];
+
+    if (d2h_fis[2] & ERR_STAT) {
+        ahci_trigger_irq(s, &s->dev[port], PORT_IRQ_STAT_TFES);
+    }
+
+    ahci_trigger_irq(s, &s->dev[port], PORT_IRQ_D2H_REG_FIS);
+}
+
+static void ncq_cb(void *opaque, int ret)
+{
+    NCQTransferState *ncq_tfs = (NCQTransferState *)opaque;
+    IDEState *ide_state;
+
+    if (ret < 0) {
+        /* error */
+    }
+
+    /* Clear bit for this tag in SActive */
+    ncq_tfs->drive->port_regs.scr_act &= ~(1 << ncq_tfs->tag);
+
+    ide_state = &ncq_tfs->drive->port.ifs[0];
+    ide_state->status = READY_STAT | SEEK_STAT;
+
+    /* XXX do we send a d2h fis here? */
+    ahci_write_fis_d2h(ncq_tfs->drive->hba, ncq_tfs->drive->port_no,
+                       ncq_tfs->drive->cmd_fis);
+
+    ahci_write_fis_sdb(ncq_tfs->drive->hba, ncq_tfs->drive->port_no,
+                       (1 << ncq_tfs->tag));
+
+    DPRINTF(ncq_tfs->drive->port_no, "NCQ transfer tag %d finished\n",
+            ncq_tfs->tag);
+
+    qemu_sglist_destroy(&ncq_tfs->sglist);
+    cpu_physical_memory_unmap(ncq_tfs->drive->cmd_fis, 1,
+                              ncq_tfs->drive->cmd_fis_len,
+                              ncq_tfs->drive->cmd_fis_len);
+
+    ncq_tfs->used = 0;
+}
+
+static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis,
+                                int slot, QEMUSGList *sg)
+{
+    NCQFrame *ncq_fis = (NCQFrame*)cmd_fis;
+    uint8_t tag = ncq_fis->tag >> 3;
+    NCQTransferState *ncq_tfs = &s->dev[port].ncq_tfs[tag];
+
+    if (ncq_tfs->used) {
+        /* error - already in use */
+        fprintf(stderr, "%s: tag %d already used\n", __FUNCTION__, tag);
+        return;
+    }
+
+    ncq_tfs->used = 1;
+    ncq_tfs->drive = &s->dev[port];
+    ncq_tfs->drive->cmd_fis = cmd_fis;
+    ncq_tfs->drive->cmd_fis_len = 0x20;
+    ncq_tfs->slot = slot;
+    ncq_tfs->lba = ((uint64_t)ncq_fis->lba5 << 40) |
+                   ((uint64_t)ncq_fis->lba4 << 32) |
+                   ((uint64_t)ncq_fis->lba3 << 24) |
+                   ((uint64_t)ncq_fis->lba2 << 16) |
+                   ((uint64_t)ncq_fis->lba1 << 8) |
+                   (uint64_t)ncq_fis->lba0;
+
+    /* Note: We calculate the sector count, but don't currently rely on it.
+     * The total size of the DMA buffer tells us the transfer size instead. */
+    ncq_tfs->sector_count = ((uint16_t)ncq_fis->sector_count_high << 8) |
+                                ncq_fis->sector_count_low;
+
+    DPRINTF(port, "NCQ transfer LBA from %ld to %ld, drive max %ld\n",
+            ncq_tfs->lba, ncq_tfs->lba + ncq_tfs->sector_count - 2,
+            s->dev[port].port.ifs[0].nb_sectors - 1);
+
+    ncq_tfs->sglist = *sg;
+    ncq_tfs->tag = tag;
+
+    switch(ncq_fis->command) {
+        case READ_FPDMA_QUEUED:
+            DPRINTF(port, "NCQ reading %d sectors from LBA %ld, tag %d\n",
+                    ncq_tfs->sector_count-1, ncq_tfs->lba, ncq_tfs->tag);
+            ncq_tfs->is_read = 1;
+
+            /* XXX: The specification is unclear about whether the DMA Setup
+             * FIS here should have the I bit set, but it suggest that it should
+             * not. Linux works without this interrupt, so I disabled it.
+             * If someone knows if it is needed, please tell me, or fix this. */
+
+            /* ahci_trigger_irq(s,s->dev[port],PORT_IRQ_STAT_DSS); */
+            DPRINTF(port, "tag %d aio read %ld\n", ncq_tfs->tag, ncq_tfs->lba);
+            dma_bdrv_read(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->sglist,
+                          ncq_tfs->lba, ncq_cb, ncq_tfs);
+            break;
+        case WRITE_FPDMA_QUEUED:
+            DPRINTF(port, "NCQ writing %d sectors to LBA %ld, tag %d\n",
+                    ncq_tfs->sector_count-1, ncq_tfs->lba, ncq_tfs->tag);
+            ncq_tfs->is_read = 0;
+            /* ahci_trigger_irq(s,s->dev[port],PORT_IRQ_STAT_DSS); */
+            DPRINTF(port, "tag %d aio write %ld\n", ncq_tfs->tag, ncq_tfs->lba);
+            dma_bdrv_write(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->sglist,
+                           ncq_tfs->lba, ncq_cb, ncq_tfs);
+            break;
+        default:
+            hw_error("ahci: tried to process non-NCQ command as NCQ\n");
+            break;
+    }
+}
+
+static int handle_cmd(AHCIState *s, int port, int slot)
+{
+    IDEState *ide_state;
+
+    int sglist_alloc_hint;
+    QEMUSGList sglist;
+    int atapi_packet_len = 0;
+    AHCIPortRegs *pr;
+    uint32_t opts;
+    uint64_t tbl_addr;
+    AHCICmdHdr *cmd;
+    uint8_t *cmd_fis;
+
+    target_phys_addr_t cmd_len;
+    int i;
+
+    pr = &s->dev[port].port_regs;
+    cmd = (AHCICmdHdr *)&s->dev[port].lst[slot * 32];
+
+    if (!s->dev[port].lst) {
+        hw_error("%s: lst not given but cmd handled", __FUNCTION__);
+    }
+
+    opts = le32_to_cpu(cmd->opts);
+    tbl_addr = le64_to_cpu(cmd->tbl_addr);
+
+    sglist_alloc_hint = opts >> AHCI_CMD_HDR_PRDT_LEN;
+    cmd_len = 0x80 + (sglist_alloc_hint * sizeof(AHCI_SG));
+    cmd_fis = cpu_physical_memory_map(tbl_addr, &cmd_len, 1);
+
+    /* The device we are working for */
+    ide_state = &s->dev[port].port.ifs[0];
+
+    if (!ide_state->bs) {
+        hw_error("%s: guest accessed unused port", __FUNCTION__);
+    }
+
+    /* Get number of entries in the PRDT, init a qemu sglist accordingly */
+    memset(&sglist, 0, sizeof(sglist));
+
+    if (sglist_alloc_hint > 0) {
+        AHCI_SG *tbl = (AHCI_SG *)(&cmd_fis[0x80]);
+
+        qemu_sglist_init(&sglist, sglist_alloc_hint);
+        /* Parse the PRDs and create qemu sglist entries accordingly */
+        for (i = 0; i < sglist_alloc_hint; i++) {
+            /* flags_size is zero-based */
+            qemu_sglist_add(&sglist, le64_to_cpu(tbl[i].addr),
+                            le32_to_cpu(tbl[i].flags_size) + 1);
+        }
+    }
+
+    debug_print_fis(cmd_fis, (opts & AHCI_CMD_HDR_CMD_FIS_LEN) * 4);
+
+    switch (cmd_fis[0]) {
+        case SATA_FIS_TYPE_REGISTER_H2D:
+            break;
+        default:
+            hw_error("unknown command cmd_fis[0]=%02x cmd_fis[1]=%02x cmd_fis[2]=%02x\n",
+                     cmd_fis[0], cmd_fis[1], cmd_fis[2]);
+            break;
+    }
+
+    switch (cmd_fis[1]) {
+        case SATA_FIS_REG_H2D_UPDATE_COMMAND_REGISTER:
+            break;
+        case 0:
+            break;
+        default:
+            hw_error("unknown command cmd_fis[0]=%02x cmd_fis[1]=%02x cmd_fis[2]=%02x\n",
+                     cmd_fis[0], cmd_fis[1], cmd_fis[2]);
+            break;
+    }
+
+    switch (s->dev[port].port_state) {
+        case STATE_RUN:
+            if (cmd_fis[15] & ATA_SRST) {
+                s->dev[port].port_state = STATE_RESET;
+            }
+            break;
+        case STATE_RESET:
+            if (!(cmd_fis[15] & ATA_SRST)) {
+                ahci_reset_port(s, port);
+            }
+            break;
+    }
+
+    if (cmd_fis[1] == SATA_FIS_REG_H2D_UPDATE_COMMAND_REGISTER) {
+
+        /* Check for NCQ command */
+        if ((cmd_fis[2] == READ_FPDMA_QUEUED) ||
+            (cmd_fis[2] == WRITE_FPDMA_QUEUED)) {
+            process_ncq_command(s, port, cmd_fis, slot, &sglist);
+            goto out;
+        }
+
+        /* If the command is not NCQ, the sglist is needed in the core */
+        ide_state->sg = sglist;
+
+        /* Decompose the FIS  */
+        ide_state->nsector = (int64_t)((cmd_fis[13] << 8) | cmd_fis[12]);
+        if (!ide_state->nsector) {
+            ide_state->nsector = 256;
+        }
+
+        if (ide_state->drive_kind != IDE_CD) {
+            ide_set_sector(ide_state, (cmd_fis[6] << 16) | (cmd_fis[5] << 8) |
+                           cmd_fis[4]);
+        }
+
+        /* Copy the ACMD field (ATAPI packet, if any) from the AHCI command
+         * table to ide_state->io_buffer
+         */
+        if (opts & AHCI_CMD_ATAPI) {
+            atapi_packet_len = ((ide_state->hcyl) << 8) + ide_state->lcyl;
+            memcpy(ide_state->io_buffer, &cmd_fis[AHCI_COMMAND_TABLE_ACMD], 0x10);
+        }
+
+        ide_state->error = 0;
+        s->dev[port].cur_cmd = cmd;
+
+        /* We're ready to process the command in FIS byte 2. */
+        ide_exec_cmd(&s->dev[port].port, cmd_fis[2]);
+
+        /* we're DMA'ing, so we're not ready yet, postpone cleanup to later */
+        if (s->dev[port].bmdma.status & BM_STATUS_DMAING) {
+            cmd->status = 0;
+            s->dev[port].cmd_fis = cmd_fis;
+            s->dev[port].cmd_fis_len = cmd_len;
+            return 0;
+        }
+
+        ahci_write_fis_d2h(s, port, cmd_fis);
+    }
+
+out:
+    cpu_physical_memory_unmap(cmd_fis, 1, cmd_len, cmd_len);
+
+    return 0;
+}
+
+static void ahci_transfer_start(IDEState *s, uint8_t *buf, int size,
+                                EndTransferFunc *end_transfer_func)
+{
+    AHCIDevice *ad;
+    AHCIState *as;
+
+    s->end_transfer_func = end_transfer_func;
+
+    ad = DO_UPCAST(AHCIDevice, port, s->bus);
+    as = ad->hba;
+
+    write_to_sglist(buf, (uint32_t)size, &s->sg);
+
+    /* update number of transferred bytes */
+    ad->cur_cmd->status = cpu_to_le32(le32_to_cpu(ad->cur_cmd->status) + size);
+
+    end_transfer_func(s);
+}
+
+static void ahci_dma_start_fn(IDEState *s, BlockDriverCompletionFunc *dma_cb)
+{
+    AHCIDevice *ad;
+    AHCIState *as;
+    BMDMAState *bm = s->bus->bmdma;
+
+    ad = DO_UPCAST(AHCIDevice, port, s->bus);
+    as = ad->hba;
+
+    if (!bm) {
+        return;
+    }
+
+    bm->unit = s->unit;
+    bm->dma_cb = dma_cb;
+    bm->cur_prd_last = 0;
+    bm->cur_prd_addr = 0;
+    bm->cur_prd_len = 0;
+    bm->cur_addr = 0;
+    bm->sector_num = ide_get_sector(s);
+    bm->nsector = s->nsector;
+    bmdma_cmd_writeb(bm, 0, 1);
+}
+
+static int ahci_dma_buf_prepare(BMDMAState *bm, int is_write)
+{
+    IDEState *s = bmdma_active_if(bm);
+    int i;
+
+    s->io_buffer_size = 0;
+    for (i = 0; i < s->sg.nsg; i++) {
+        s->io_buffer_size += s->sg.sg[i].len;
+    }
+
+    DPRINTF(-1, "len=%#x\n", s->io_buffer_size);
+    return s->io_buffer_size != 0;
+}
+
+static int ahci_dma_buf_rw(BMDMAState *bm, int is_write)
+{
+    IDEState *s = bmdma_active_if(bm);
+    int l, len;
+
+    for (;;) {
+        l = s->io_buffer_size - s->io_buffer_index;
+
+        DPRINTF(-1, "size=%#x idx=%#x l=%#x\n",
+                s->io_buffer_size, s->io_buffer_index, l);
+
+        if (l <= 0) {
+            break;
+        }
+
+        if (bm->cur_prd_len == 0) {
+            /* end of table */
+            if (bm->cur_prd_last) {
+                bm->cur_addr = 0;
+                return 0;
+            }
+
+            len = s->sg.sg[bm->cur_addr].len;
+            bm->cur_prd_len = len;
+            bm->cur_prd_addr = s->sg.sg[bm->cur_addr].base;
+
+            DPRINTF(-1, "[%d] base=%#x len=%#x\n",
+                    bm->cur_addr, bm->cur_prd_addr, len);
+
+            bm->cur_addr++;
+            bm->cur_prd_last = (bm->cur_addr == s->sg.nsg);
+        }
+
+        if (l > bm->cur_prd_len) {
+            l = bm->cur_prd_len;
+        }
+
+        if (l > 0) {
+            if (is_write) {
+                cpu_physical_memory_write(bm->cur_prd_addr,
+                                          s->io_buffer + s->io_buffer_index, l);
+            } else {
+                cpu_physical_memory_read(bm->cur_prd_addr,
+                                          s->io_buffer + s->io_buffer_index, l);
+            }
+            bm->cur_prd_addr += l;
+            bm->cur_prd_len -= l;
+            s->io_buffer_index += l;
+        }
+    }
+
+    return 1;
+}
+
+static void ahci_irq_set_fn(IDEBus *s)
+{
+    AHCIDevice *ad;
+    AHCIState *as;
+
+    ad = DO_UPCAST(AHCIDevice, port, s);
+    as = ad->hba;
+
+    /* error interrupts will be triggered later */
+    if (ad->port.ifs[0].status & ERR_STAT) {
+        return;
+    }
+
+    /* DMA is done */
+    /* XXX find actual end point of a DMA and only do then */
+    if (!(ad->bmdma.status & BM_STATUS_DMAING)) {
+        ahci_trigger_irq(as, ad, PORT_IRQ_STAT_DSS);
+    }
+
+    /* update d2h status */
+    if (ad->cmd_fis) {
+        ahci_write_fis_d2h(as, ad->port_no, ad->cmd_fis);
+        cpu_physical_memory_unmap(ad->cmd_fis, 1, ad->cmd_fis_len, ad->cmd_fis_len);
+        ad->cmd_fis = NULL;
+    }
+}
+
+static IDEBusOps ahci_bus_ops = {
+    .transfer_start_fn = ahci_transfer_start,
+    .irq_set_fn = ahci_irq_set_fn,
+    .dma_start_fn = ahci_dma_start_fn,
+    .dma_prepare_fn = ahci_dma_buf_prepare,
+    .dma_rw_fn = ahci_dma_buf_rw,
+};
+
+static void ahci_init(AHCIState *s, DeviceState *qdev)
+{
+    int i;
+
+    ahci_reg_init(s);
+    s->mem = cpu_register_io_memory(ahci_readfn, ahci_writefn, s);
+
+    for (i = 0; i < SATA_PORTS; i++) {
+        AHCIDevice *ad = &s->dev[i];
+
+        ide_bus_new(&ad->port, qdev);
+        ide_init2(&ad->port, 0);
+
+        ad->hba = s;
+        ad->port_no = i;
+        ad->port.bmdma = &ad->bmdma;
+        ad->bmdma.bus = &ad->port;
+        ad->port.ops = &ahci_bus_ops;
+        ad->port_regs.cmd = PORT_CMD_SPIN_UP | PORT_CMD_POWER_ON;
+    }
+}
+
+static void ahci_pci_map(PCIDevice *pci_dev, int region_num,
+        pcibus_t addr, pcibus_t size, int type)
+{
+    struct AHCIPCIState *d = (struct AHCIPCIState *)pci_dev;
+    AHCIState *s = &d->ahci;
+
+    cpu_register_physical_memory(addr, size, s->mem);
+}
+
+static void ahci_reset(void *opaque)
+{
+    struct AHCIPCIState *d = opaque;
+    int i;
+
+    for (i = 0; i < SATA_PORTS; i++) {
+        AHCIDevice *ad = &d->ahci.dev[i];
+
+        ide_bus_reset(&d->ahci.dev[i].port);
+        ide_dma_reset(&d->ahci.dev[i].bmdma);
+
+        ad->port.ifs[0].feature |= IDE_FEATURE_DMA;
+        ad->port.ifs[0].ncq_queues = AHCI_MAX_CMDS;
+    }
+}
+
+static int pci_ahci_init(PCIDevice *dev)
+{
+    struct AHCIPCIState *d;
+    d = DO_UPCAST(struct AHCIPCIState, card, dev);
+
+    pci_config_set_vendor_id(d->card.config, PCI_VENDOR_ID_INTEL);
+    pci_config_set_device_id(d->card.config,
+                             PCI_DEVICE_ID_INTEL_ICH7_AHCI_RAID);
+    pci_set_word(d->card.config + PCI_COMMAND, PCI_COMMAND_IO |
+                                               PCI_COMMAND_MEMORY |
+                                               PCI_COMMAND_MASTER);
+
+    pci_config_set_class(d->card.config, PCI_CLASS_STORAGE_SATA);
+    pci_config_set_prog_interface(d->card.config, AHCI_PROGMODE_MAJOR_REV_1);
+
+    d->card.config[PCI_CACHE_LINE_SIZE] = 0x08;  /* Cache line size */
+    d->card.config[PCI_LATENCY_TIMER]   = 0x00;  /* Latency timer */
+    pci_config_set_interrupt_pin(d->card.config, 1);
+
+    qemu_register_reset(ahci_reset, d);
+
+    /* XXX BAR size should be 1k, but that breaks, so bump it to 4k for now */
+    pci_register_bar(&d->card, 5, 0x1000, PCI_BASE_ADDRESS_SPACE_MEMORY,
+                     ahci_pci_map);
+
+    msi_init(dev, 0x50, 1, true, false);
+
+    ahci_init(&d->ahci, &dev->qdev);
+    d->ahci.irq = d->card.irq[0];
+
+    return 0;
+}
+
+static int pci_ahci_uninit(PCIDevice *dev)
+{
+    struct AHCIPCIState *d;
+    d = DO_UPCAST(struct AHCIPCIState, card, dev);
+
+    if (msi_enabled(&d->card)) {
+        msi_uninit(dev);
+    }
+
+    return 0;
+}
+
+static void pci_ahci_write_config(PCIDevice *pci, uint32_t addr,
+                                  uint32_t val, int len)
+{
+    pci_default_write_config(pci, addr, val, len);
+    msi_write_config(pci, addr, val, len);
+}
+
+static PCIDeviceInfo ahci_info = {
+    .qdev.name  = "ahci",
+    .qdev.size  = sizeof(AHCIPCIState),
+    .init       = pci_ahci_init,
+    .exit       = pci_ahci_uninit,
+    .config_write = pci_ahci_write_config,
+};
+
+static void ahci_pci_register_devices(void)
+{
+    pci_qdev_register(&ahci_info);
+}
+
+device_init(ahci_pci_register_devices)