Patchwork [08/10] ahci: add ahci emulation

login
register
mail settings
Submitter Alexander Graf
Date Nov. 18, 2010, 3:27 a.m.
Message ID <1290050875-23848-9-git-send-email-agraf@suse.de>
Download mbox | patch
Permalink /patch/72042/
State New
Headers show

Comments

Alexander Graf - Nov. 18, 2010, 3:27 a.m.
This patch adds an emulation layer for an ICH-7M AHCI controller. For now
this controller does not do IDE legacy emulation. It is a pure AHCI controller.

Signed-off-by: Alexander Graf <agraf@suse.de>

---

v1 -> v2:

  - rename IDEExtender to IDEBusOps and make a pointer (kraxel)
  - make dma hooks explicit by putting them into ops struct (stefanha)
  - use qdev buses (kraxel)
  - minor cleanups
  - dprintf overhaul
  - add reset function
---
 Makefile.objs                      |    1 +
 Makefile.target                    |    4 +
 default-configs/i386-softmmu.mak   |    1 +
 default-configs/x86_64-softmmu.mak |    1 +
 hw/ahci.c                          | 1280 ++++++++++++++++++++++++++++++++++++
 5 files changed, 1287 insertions(+), 0 deletions(-)
 create mode 100644 hw/ahci.c
Gerd Hoffmann - Nov. 18, 2010, 8:01 a.m.
Hi,

> +static void ahci_trigger_irq(AHCIState *s, AHCIDevice *d,
> +                             int irq_type)

> +static void ahci_check_irq(AHCIState *s)

MSI support would be nice to have.

> +#ifndef min
> +#define min(a, b) ((a)<  (b) ? (a) : (b))
> +#endif

osdep.h has MIN/MAX macros.

> +static void ahci_init(AHCIState *s, DeviceState *qdev)
> +{

> +        ide_bus_new(&ad->port, qdev);
> +        ide_init2(&ad->port, 0);

Good.

> +        if (dinfo) {
> +            ide_create_drive(&ad->port, 0, dinfo);
> +        }

That doesn't belong into the qdev init function.

Look how ide/isa.c handles it:  The qdev init function (isa_ide_initfn) 
doesn't create ide-drives at all.  And there is a convinience function 
(isa_ide_init) which first creates the controller, then attaches the 
drives.  The later is called from pc_init() with the drives specified 
via -drive if=ide (or -hda).

Does AHCI support drive hotplug btw?

> +static void ahci_reset(void *opaque)
> +{

> +    pci_config_set_vendor_id(d->card.config, PCI_VENDOR_ID_INTEL);
> +    pci_config_set_device_id(d->card.config, PCI_DEVICE_ID_INTEL_ICH7M_AHCI);

> +    pci_config_set_class(d->card.config, PCI_CLASS_STORAGE_SATA);
> +    pci_config_set_prog_interface(d->card.config, AHCI_PROGMODE_MAJOR_REV_1);

> +    d->card.config[PCI_HEADER_TYPE]     = PCI_HEADER_TYPE_NORMAL;
> +    d->card.config[PCI_INTERRUPT_PIN]   = 1;     /* interrupt pin 0 */

Why is that in the reset function instead of init?  It should never ever 
change, should it?

> +static PCIDeviceInfo ahci_info = {
> +    .qdev.name  = "ahci",
> +    .qdev.size  = sizeof(AHCIPciState),
> +    .init       = pci_ahci_init,
> +    .exit       = pci_ahci_uninit,
> +    .qdev.props = (Property[]) {
> +        DEFINE_PROP_END_OF_LIST(),
> +    }

If there are no properties you can zap the last tree lines altogether ;)

cheers,
   Gerd
Alexander Graf - Nov. 18, 2010, 6:05 p.m.
Gerd Hoffmann wrote:
>   Hi,
>
>> +static void ahci_trigger_irq(AHCIState *s, AHCIDevice *d,
>> +                             int irq_type)
>
>> +static void ahci_check_irq(AHCIState *s)
>
> MSI support would be nice to have.

Alrighty, I added MSI support. But I only have a single vector in use
atm as nvec > 0 doesn't get written to the PCI config space so the code
trips on that later on.

[...]

>
>> +        if (dinfo) {
>> +            ide_create_drive(&ad->port, 0, dinfo);
>> +        }
>
> That doesn't belong into the qdev init function.
>
> Look how ide/isa.c handles it:  The qdev init function
> (isa_ide_initfn) doesn't create ide-drives at all.  And there is a
> convinience function (isa_ide_init) which first creates the
> controller, then attaches the drives.  The later is called from
> pc_init() with the drives specified via -drive if=ide (or -hda).

Hrm. I guess I'll just put it into the PC specific initialization
function :).

>
> Does AHCI support drive hotplug btw?

AHCI does, my code does not yet :). Feel free to add support for it! :D


Alex
Gerd Hoffmann - Nov. 19, 2010, 9:12 a.m.
>>> +static void ahci_check_irq(AHCIState *s)
>>
>> MSI support would be nice to have.
>
> Alrighty, I added MSI support. But I only have a single vector in use
> atm as nvec>  0 doesn't get written to the PCI config space so the code
> trips on that later on.

For multiple vectors MSI-X is the best choice as it fixes a few design 
issues MSI has when it comes to multiple vectors.  I think linux never 
ever uses MSI with multiple vectors anyway.

cheers,
   Gerd
Joerg Roedel - Nov. 19, 2010, 10:08 a.m.
On Fri, Nov 19, 2010 at 04:12:43AM -0500, Gerd Hoffmann wrote:
> >>> +static void ahci_check_irq(AHCIState *s)
> >>
> >> MSI support would be nice to have.
> >
> > Alrighty, I added MSI support. But I only have a single vector in use
> > atm as nvec>  0 doesn't get written to the PCI config space so the code
> > trips on that later on.
> 
> For multiple vectors MSI-X is the best choice as it fixes a few design 
> issues MSI has when it comes to multiple vectors.  I think linux never 
> ever uses MSI with multiple vectors anyway.

Linux doesn't even support MSI with multiple vectors today.

	Joerg

Patch

diff --git a/Makefile.objs b/Makefile.objs
index 15569af..8dfcefa 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -229,6 +229,7 @@  hw-obj-$(CONFIG_IDE_PIIX) += ide/piix.o
 hw-obj-$(CONFIG_IDE_CMD646) += ide/cmd646.o
 hw-obj-$(CONFIG_IDE_MACIO) += ide/macio.o
 hw-obj-$(CONFIG_IDE_VIA) += ide/via.o
+hw-obj-$(CONFIG_AHCI) += ahci.o
 
 # SCSI layer
 hw-obj-y += lsi53c895a.o
diff --git a/Makefile.target b/Makefile.target
index 91e6e74..bd899cd 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -195,6 +195,10 @@  obj-y += e1000.o
 # Inter-VM PCI shared memory
 obj-$(CONFIG_KVM) += ivshmem.o
 
+# ahci
+#
+obj-$(CONFIG_AHCI) += ahci.o
+
 # Hardware support
 obj-i386-y += vga.o
 obj-i386-y += mc146818rtc.o i8259.o pc.o
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index ed00471..66b92af 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -19,6 +19,7 @@  CONFIG_IDE_QDEV=y
 CONFIG_IDE_PCI=y
 CONFIG_IDE_ISA=y
 CONFIG_IDE_PIIX=y
+CONFIG_AHCI=y
 CONFIG_NE2000_ISA=y
 CONFIG_PIIX_PCI=y
 CONFIG_SOUND=y
diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak
index 5183203..508e843 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -19,6 +19,7 @@  CONFIG_IDE_QDEV=y
 CONFIG_IDE_PCI=y
 CONFIG_IDE_ISA=y
 CONFIG_IDE_PIIX=y
+CONFIG_AHCI=y
 CONFIG_NE2000_ISA=y
 CONFIG_PIIX_PCI=y
 CONFIG_SOUND=y
diff --git a/hw/ahci.c b/hw/ahci.c
new file mode 100644
index 0000000..0ee021e
--- /dev/null
+++ b/hw/ahci.c
@@ -0,0 +1,1280 @@ 
+/*
+ * QEMU AHCI Emulation
+ *
+ * Copyright (c) 2010 qiaochong@loongson.cn
+ * Copyright (c) 2010 Roland Elek <elek.roland@gmail.com>
+ * Copyright (c) 2010 Sebastian Herbszt <herbszt@gmx.de>
+ * Copyright (c) 2010 Alexander Graf <agraf@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "hw.h"
+#include "monitor.h"
+#include "pci.h"
+#include "dma.h"
+#include "cpu-common.h"
+#include "scsi-defs.h"
+#include "scsi.h"
+#include "blockdev.h"
+#include <hw/ide/internal.h>
+#include <hw/ide/pci.h>
+
+/* #define DEBUG_AHCI */
+
+#ifdef DEBUG_AHCI
+#define DPRINTF(port, fmt, ...) \
+do { fprintf(stderr, "ahci: %s: [%d] ", __FUNCTION__, port); \
+     fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(port, fmt, ...) do {} while(0)
+#endif
+
+#define AHCI_PCI_BAR              5
+#define AHCI_MAX_PORTS            32
+#define AHCI_MAX_SG               168 /* hardware max is 64K */
+#define AHCI_DMA_BOUNDARY         0xffffffff
+#define AHCI_USE_CLUSTERING       0
+#define AHCI_MAX_CMDS             32
+#define AHCI_CMD_SZ               32
+#define AHCI_CMD_SLOT_SZ          (AHCI_MAX_CMDS * AHCI_CMD_SZ)
+#define AHCI_RX_FIS_SZ            256
+#define AHCI_CMD_TBL_CDB          0x40
+#define AHCI_CMD_TBL_HDR_SZ       0x80
+#define AHCI_CMD_TBL_SZ           (AHCI_CMD_TBL_HDR_SZ + (AHCI_MAX_SG * 16))
+#define AHCI_CMD_TBL_AR_SZ        (AHCI_CMD_TBL_SZ * AHCI_MAX_CMDS)
+#define AHCI_PORT_PRIV_DMA_SZ     (AHCI_CMD_SLOT_SZ + AHCI_CMD_TBL_AR_SZ + \
+                                   AHCI_RX_FIS_SZ)
+
+#define AHCI_IRQ_ON_SG            (1 << 31)
+#define AHCI_CMD_ATAPI            (1 << 5)
+#define AHCI_CMD_WRITE            (1 << 6)
+#define AHCI_CMD_PREFETCH         (1 << 7)
+#define AHCI_CMD_RESET            (1 << 8)
+#define AHCI_CMD_CLR_BUSY         (1 << 10)
+
+#define RX_FIS_D2H_REG            0x40 /* offset of D2H Register FIS data */
+#define RX_FIS_SDB                0x58 /* offset of SDB FIS data */
+#define RX_FIS_UNK                0x60 /* offset of Unknown FIS data */
+
+/* global controller registers */
+#define HOST_CAP                  0x00 /* host capabilities */
+#define HOST_CTL                  0x04 /* global host control */
+#define HOST_IRQ_STAT             0x08 /* interrupt status */
+#define HOST_PORTS_IMPL           0x0c /* bitmap of implemented ports */
+#define HOST_VERSION              0x10 /* AHCI spec. version compliancy */
+
+/* HOST_CTL bits */
+#define HOST_CTL_RESET            (1 << 0)  /* reset controller; self-clear */
+#define HOST_CTL_IRQ_EN           (1 << 1)  /* global IRQ enable */
+#define HOST_CTL_AHCI_EN          (1 << 31) /* AHCI enabled */
+
+/* HOST_CAP bits */
+#define HOST_CAP_SSC              (1 << 14) /* Slumber capable */
+#define HOST_CAP_AHCI             (1 << 18) /* AHCI only */
+#define HOST_CAP_CLO              (1 << 24) /* Command List Override support */
+#define HOST_CAP_SSS              (1 << 27) /* Staggered Spin-up */
+#define HOST_CAP_NCQ              (1 << 30) /* Native Command Queueing */
+#define HOST_CAP_64               (1 << 31) /* PCI DAC (64-bit DMA) support */
+
+/* registers for each SATA port */
+#define PORT_LST_ADDR             0x00 /* command list DMA addr */
+#define PORT_LST_ADDR_HI          0x04 /* command list DMA addr hi */
+#define PORT_FIS_ADDR             0x08 /* FIS rx buf addr */
+#define PORT_FIS_ADDR_HI          0x0c /* FIS rx buf addr hi */
+#define PORT_IRQ_STAT             0x10 /* interrupt status */
+#define PORT_IRQ_MASK             0x14 /* interrupt enable/disable mask */
+#define PORT_CMD                  0x18 /* port command */
+#define PORT_TFDATA               0x20 /* taskfile data */
+#define PORT_SIG                  0x24 /* device TF signature */
+#define PORT_SCR_STAT             0x28 /* SATA phy register: SStatus */
+#define PORT_SCR_CTL              0x2c /* SATA phy register: SControl */
+#define PORT_SCR_ERR              0x30 /* SATA phy register: SError */
+#define PORT_SCR_ACT              0x34 /* SATA phy register: SActive */
+#define PORT_CMD_ISSUE            0x38 /* command issue */
+#define PORT_RESERVED             0x3c /* reserved */
+
+/* PORT_IRQ_{STAT,MASK} bits */
+#define PORT_IRQ_COLD_PRES        (1 << 31) /* cold presence detect */
+#define PORT_IRQ_TF_ERR           (1 << 30) /* task file error */
+#define PORT_IRQ_HBUS_ERR         (1 << 29) /* host bus fatal error */
+#define PORT_IRQ_HBUS_DATA_ERR    (1 << 28) /* host bus data error */
+#define PORT_IRQ_IF_ERR           (1 << 27) /* interface fatal error */
+#define PORT_IRQ_IF_NONFATAL      (1 << 26) /* interface non-fatal error */
+#define PORT_IRQ_OVERFLOW         (1 << 24) /* xfer exhausted available S/G */
+#define PORT_IRQ_BAD_PMP          (1 << 23) /* incorrect port multiplier */
+
+#define PORT_IRQ_PHYRDY           (1 << 22) /* PhyRdy changed */
+#define PORT_IRQ_DEV_ILCK         (1 << 7) /* device interlock */
+#define PORT_IRQ_CONNECT          (1 << 6) /* port connect change status */
+#define PORT_IRQ_SG_DONE          (1 << 5) /* descriptor processed */
+#define PORT_IRQ_UNK_FIS          (1 << 4) /* unknown FIS rx'd */
+#define PORT_IRQ_SDB_FIS          (1 << 3) /* Set Device Bits FIS rx'd */
+#define PORT_IRQ_DMAS_FIS         (1 << 2) /* DMA Setup FIS rx'd */
+#define PORT_IRQ_PIOS_FIS         (1 << 1) /* PIO Setup FIS rx'd */
+#define PORT_IRQ_D2H_REG_FIS      (1 << 0) /* D2H Register FIS rx'd */
+
+#define PORT_IRQ_FREEZE           (PORT_IRQ_HBUS_ERR | PORT_IRQ_IF_ERR |   \
+                                   PORT_IRQ_CONNECT | PORT_IRQ_PHYRDY |    \
+                                   PORT_IRQ_UNK_FIS)
+#define PORT_IRQ_ERROR            (PORT_IRQ_FREEZE | PORT_IRQ_TF_ERR |     \
+                                   PORT_IRQ_HBUS_DATA_ERR)
+#define DEF_PORT_IRQ              (PORT_IRQ_ERROR | PORT_IRQ_SG_DONE |     \
+                                   PORT_IRQ_SDB_FIS | PORT_IRQ_DMAS_FIS |  \
+                                   PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS)
+
+/* PORT_CMD bits */
+#define PORT_CMD_ATAPI            (1 << 24) /* Device is ATAPI */
+#define PORT_CMD_LIST_ON          (1 << 15) /* cmd list DMA engine running */
+#define PORT_CMD_FIS_ON           (1 << 14) /* FIS DMA engine running */
+#define PORT_CMD_FIS_RX           (1 << 4) /* Enable FIS receive DMA engine */
+#define PORT_CMD_CLO              (1 << 3) /* Command list override */
+#define PORT_CMD_POWER_ON         (1 << 2) /* Power up device */
+#define PORT_CMD_SPIN_UP          (1 << 1) /* Spin up device */
+#define PORT_CMD_START            (1 << 0) /* Enable port DMA engine */
+
+#define PORT_CMD_ICC_MASK         (0xf << 28) /* i/f ICC state mask */
+#define PORT_CMD_ICC_ACTIVE       (0x1 << 28) /* Put i/f in active state */
+#define PORT_CMD_ICC_PARTIAL      (0x2 << 28) /* Put i/f in partial state */
+#define PORT_CMD_ICC_SLUMBER      (0x6 << 28) /* Put i/f in slumber state */
+
+#define PORT_IRQ_STAT_DHRS        (1 << 0) /* Device to Host Register FIS */
+#define PORT_IRQ_STAT_PSS         (1 << 1) /* PIO Setup FIS */
+#define PORT_IRQ_STAT_DSS         (1 << 2) /* DMA Setup FIS */
+#define PORT_IRQ_STAT_SDBS        (1 << 3) /* Set Device Bits */
+#define PORT_IRQ_STAT_UFS         (1 << 4) /* Unknown FIS */
+#define PORT_IRQ_STAT_DPS         (1 << 5) /* Descriptor Processed */
+#define PORT_IRQ_STAT_PCS         (1 << 6) /* Port Connect Change Status */
+#define PORT_IRQ_STAT_DMPS        (1 << 7) /* Device Mechanical Presence
+                                              Status */
+#define PORT_IRQ_STAT_PRCS        (1 << 22) /* File Ready Status */
+#define PORT_IRQ_STAT_IPMS        (1 << 23) /* Incorrect Port Multiplier
+                                               Status */
+#define PORT_IRQ_STAT_OFS         (1 << 24) /* Overflow Status */
+#define PORT_IRQ_STAT_INFS        (1 << 26) /* Interface Non-Fatal Error
+                                               Status */
+#define PORT_IRQ_STAT_IFS         (1 << 27) /* Interface Fatal Error */
+#define PORT_IRQ_STAT_HBDS        (1 << 28) /* Host Bus Data Error Status */
+#define PORT_IRQ_STAT_HBFS        (1 << 29) /* Host Bus Fatal Error Status */
+#define PORT_IRQ_STAT_TFES        (1 << 30) /* Task File Error Status */
+#define PORT_IRQ_STAT_CPDS        (1 << 31) /* Code Port Detect Status */
+
+/* ap->flags bits */
+#define AHCI_FLAG_NO_NCQ                  (1 << 24)
+#define AHCI_FLAG_IGN_IRQ_IF_ERR          (1 << 25) /* ignore IRQ_IF_ERR */
+#define AHCI_FLAG_HONOR_PI                (1 << 26) /* honor PORTS_IMPL */
+#define AHCI_FLAG_IGN_SERR_INTERNAL       (1 << 27) /* ignore SERR_INTERNAL */
+#define AHCI_FLAG_32BIT_ONLY              (1 << 28) /* force 32bit */
+
+#define ATA_SRST                          (1 << 2)  /* software reset */
+
+#define STATE_RUN                         0
+#define STATE_RESET                       1
+
+#define SATA_SCR_SSTATUS_DET_NODEV        0x0
+#define SATA_SCR_SSTATUS_DET_DEV_PRESENT_PHY_UP 0x3
+
+#define SATA_SCR_SSTATUS_SPD_NODEV        0x00
+#define SATA_SCR_SSTATUS_SPD_GEN1         0x10
+
+#define SATA_SCR_SSTATUS_IPM_NODEV        0x000
+#define SATA_SCR_SSTATUS_IPM_ACTIVE       0X100
+
+#define AHCI_SCR_SCTL_DET                 0xf
+
+#define SATA_FIS_TYPE_REGISTER_H2D        0x27
+#define SATA_FIS_REG_H2D_UPDATE_COMMAND_REGISTER 0x80
+
+#define AHCI_CMD_HDR_CMD_FIS_LEN           0x1f
+#define AHCI_CMD_HDR_PRDT_LEN              16
+
+#define SATA_SIGNATURE_CDROM               0xeb140000
+#define SATA_SIGNATURE_DISK                0x00000101
+
+#define AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR 0x20
+                                            /* Shouldn't this be 0x2c? */
+
+#define SATA_PORTS                         4
+
+#define AHCI_PORT_REGS_START_ADDR          0x100
+#define AHCI_PORT_REGS_END_ADDR (AHCI_PORT_REGS_START_ADDR + SATA_PORTS * 0x80)
+#define AHCI_PORT_ADDR_OFFSET_MASK         0x7f
+
+#define AHCI_NUM_COMMAND_SLOTS             31
+#define AHCI_SUPPORTED_SPEED               20
+#define AHCI_SUPPORTED_SPEED_GEN1          1
+#define AHCI_VERSION_1_0                   0x10000
+
+#define AHCI_PROGMODE_MAJOR_REV_1          1
+
+#define AHCI_COMMAND_TABLE_ACMD            0x40
+
+#define IDE_FEATURE_DMA                    1
+
+#define READ_FPDMA_QUEUED                  0x60
+#define WRITE_FPDMA_QUEUED                 0x61
+
+#define RES_FIS_DSFIS                      0x00
+#define RES_FIS_PSFIS                      0x20
+#define RES_FIS_RFIS                       0x40
+#define RES_FIS_SDBFIS                     0x58
+#define RES_FIS_UFIS                       0x60
+
+typedef struct AHCIControlRegs {
+    uint32_t    cap;
+    uint32_t    ghc;
+    uint32_t    irqstatus;
+    uint32_t    impl;
+    uint32_t    version;
+} __attribute__ ((packed)) AHCIControlRegs;
+
+typedef struct AHCIPortRegs {
+    uint32_t    lst_addr;
+    uint32_t    lst_addr_hi;
+    uint32_t    fis_addr;
+    uint32_t    fis_addr_hi;
+    uint32_t    irq_stat;
+    uint32_t    irq_mask;
+    uint32_t    cmd;
+    uint32_t    unused0;
+    uint32_t    tfdata;
+    uint32_t    sig;
+    uint32_t    scr_stat;
+    uint32_t    scr_ctl;
+    uint32_t    scr_err;
+    uint32_t    scr_act;
+    uint32_t    cmd_issue;
+    uint32_t    reserved;
+} __attribute__ ((packed)) AHCIPortRegs;
+
+typedef struct AHCICmdHdr {
+    uint32_t    opts;
+    uint32_t    status;
+    uint64_t    tbl_addr;
+    uint32_t    reserved[4];
+} __attribute__ ((packed)) AHCICmdHdr;
+
+typedef struct AHCI_SG {
+    uint32_t    addr;
+    uint32_t    addr_hi;
+    uint32_t    reserved;
+    uint32_t    flags_size;
+} __attribute__ ((packed)) AHCI_SG;
+
+typedef struct AHCIDevice AHCIDevice;
+
+struct AHCIDevice {
+    IDEBus port;
+    BMDMAState bmdma;
+    int port_no;
+    uint32_t port_state;
+    uint32_t finished;
+    AHCIPortRegs port_regs;
+    struct AHCIState *hba;
+    uint8_t *lst;
+    uint8_t *res_fis;
+    uint8_t *cmd_fis;
+    int cmd_fis_len;
+    AHCICmdHdr *cur_cmd;
+};
+
+typedef struct NCQTransferState {
+    AHCIDevice *drive;
+    QEMUSGList sglist;
+    int is_read;
+    uint16_t sector_count;
+    uint64_t lba;
+    uint8_t tag;
+    int slot;
+    int used;
+} NCQTransferState;
+
+typedef struct AHCIState{
+    AHCIDevice dev[SATA_PORTS];
+    AHCIControlRegs control_regs;
+    int mem;
+    qemu_irq irq;
+    NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
+} AHCIState;
+
+typedef struct AHCIPciState {
+    PCIDevice card;
+    AHCIState ahci;
+} AHCIPciState;
+
+typedef struct H2D_NCQ_FIS {
+    uint8_t fis_type;
+    uint8_t c;
+    uint8_t command;
+    uint8_t sector_count_low;
+    uint8_t lba0;
+    uint8_t lba1;
+    uint8_t lba2;
+    uint8_t fua;
+    uint8_t lba3;
+    uint8_t lba4;
+    uint8_t lba5;
+    uint8_t sector_count_high;
+    uint8_t tag;
+    uint8_t reserved5;
+    uint8_t reserved6;
+    uint8_t control;
+    uint8_t reserved7;
+    uint8_t reserved8;
+    uint8_t reserved9;
+    uint8_t reserved10;
+} __attribute__ ((packed)) H2D_NCQ_FIS;
+
+static void ahci_irq_set_fn(IDEBus *s);
+
+static void check_cmd(AHCIState *s, int port);
+static int handle_cmd(AHCIState *s,int port,int slot);
+static void ahci_reset_port(AHCIState *s, int port);
+static void ahci_write_fis_d2h(AHCIState *s, int port, uint8_t *cmd_fis);
+
+static uint32_t  ahci_port_read(AHCIState *s, int port, int offset)
+{
+    uint32_t val;
+    uint32_t *p;
+    AHCIPortRegs *pr;
+    pr = &s->dev[port].port_regs;
+
+    switch (offset) {
+        case PORT_SCR_STAT:
+            if (s->dev[port].port.ifs[0].bs) {
+                val = SATA_SCR_SSTATUS_DET_DEV_PRESENT_PHY_UP |
+                      SATA_SCR_SSTATUS_SPD_GEN1 | SATA_SCR_SSTATUS_IPM_ACTIVE;
+            } else {
+                val = SATA_SCR_SSTATUS_DET_NODEV;
+            }
+            break;
+        case PORT_IRQ_STAT:
+            val = pr->irq_stat;
+            break;
+        case PORT_CMD_ISSUE:
+            val = 0;
+            break;
+        case PORT_SCR_ACT:
+            pr->scr_act &= ~s->dev[port].finished;
+            s->dev[port].finished = 0;
+            val = pr->scr_act;
+            break;
+        case PORT_TFDATA:
+        case PORT_SIG:
+        case PORT_SCR_CTL:
+        case PORT_SCR_ERR:
+        default:
+            p = (uint32_t *)&s->dev[port].port_regs;
+            val = p[offset / sizeof(*p)];
+            break;
+    }
+    DPRINTF(port, "offset: 0x%x val: 0x%x\n", offset, val);
+    return val;
+
+}
+
+static void ahci_trigger_irq(AHCIState *s, AHCIDevice *d,
+                             int irq_type)
+{
+    DPRINTF(d->port_no, "trigger irq %#x -> %x\n",
+            irq_type, d->port_regs.irq_mask & irq_type);
+
+    d->port_regs.irq_stat |= irq_type;
+
+    /* Only trigger an interrupt if unmasked */
+    if (d->port_regs.irq_mask & irq_type) {
+        s->control_regs.irqstatus |= (1 << d->port_no);
+        if (s->control_regs.ghc & HOST_CTL_IRQ_EN) {
+            qemu_irq_raise(s->irq);
+        }
+    }
+}
+
+static void ahci_check_irq(AHCIState *s)
+{
+    DPRINTF(-1, "check irq %#x\n", s->control_regs.irqstatus);
+
+    if (s->control_regs.irqstatus &&
+        (s->control_regs.ghc & HOST_CTL_IRQ_EN)) {
+            qemu_irq_raise(s->irq);
+    }
+}
+
+static void map_page(uint8_t **ptr, uint64_t addr)
+{
+    target_phys_addr_t len = 4096;
+
+    if (*ptr) {
+        cpu_physical_memory_unmap(*ptr, 1, len, len);
+    }
+
+    *ptr = cpu_physical_memory_map(addr, &len, 1);
+    if (len < 4096) {
+        *ptr = NULL;
+    }
+}
+
+static void  ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
+{
+    AHCIPortRegs *pr = &s->dev[port].port_regs;
+    uint32_t *p;
+
+    DPRINTF(port, "offset: 0x%x val: 0x%x\n", offset, val);
+    switch (offset) {
+        case PORT_LST_ADDR:
+            pr->lst_addr = val;
+            map_page(&s->dev[port].lst,
+                     ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr);
+            break;
+        case PORT_LST_ADDR_HI:
+            pr->lst_addr_hi = val;
+            map_page(&s->dev[port].lst,
+                     ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr);
+            break;
+        case PORT_FIS_ADDR:
+            pr->fis_addr = val;
+            map_page(&s->dev[port].res_fis,
+                     ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr);
+            break;
+        case PORT_FIS_ADDR_HI:
+            pr->fis_addr_hi = val;
+            map_page(&s->dev[port].res_fis,
+                     ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr);
+            break;
+        case PORT_IRQ_STAT:
+            pr->irq_stat &= ~val;
+            break;
+        case PORT_IRQ_MASK:
+            pr->irq_mask = val & 0xfdc000ff;
+            break;
+        case PORT_CMD:
+            pr->cmd = val & ~(PORT_CMD_LIST_ON | PORT_CMD_FIS_ON);
+
+            if (pr->cmd & PORT_CMD_START) {
+                pr->cmd |= PORT_CMD_LIST_ON;
+            }
+
+            if (pr->cmd & PORT_CMD_FIS_RX) {
+                pr->cmd |= PORT_CMD_FIS_ON;
+            }
+
+            check_cmd(s, port);
+            break;
+        case PORT_CMD_ISSUE:
+            pr->cmd_issue |= val;
+            check_cmd(s, port);
+            break;
+        case PORT_SCR_ERR:
+            pr->scr_err &= ~val;
+            break;
+        case PORT_SCR_ACT:
+            /* RW1 */
+            pr->scr_act |= val;
+            break;
+        case PORT_SCR_CTL:
+            if (((pr->scr_ctl & AHCI_SCR_SCTL_DET) == 1) &&
+                ((val & AHCI_SCR_SCTL_DET) == 0)) {
+                ahci_reset_port(s, port);
+            }
+            pr->scr_ctl = val;
+            break;
+        case PORT_TFDATA:
+        case PORT_SIG:
+        case PORT_SCR_STAT:
+        default:
+            p = (uint32_t *)pr;
+            p[offset / sizeof(*p)] = val;
+            break;
+    }
+
+}
+
+static uint32_t ahci_mem_readl(void *ptr, target_phys_addr_t addr)
+{
+    AHCIState *s = ptr;
+    uint32_t val;
+    uint32_t *p;
+    addr = addr & 0xfff;
+    if (addr < AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR) {
+        switch (addr) {
+            case HOST_IRQ_STAT:
+            default:
+                /* genernal host control */
+                p = (uint32_t *)&s->control_regs;
+                val = p[addr / sizeof(*p)];
+        }
+    } else if((addr >= AHCI_PORT_REGS_START_ADDR) &&
+              (addr < AHCI_PORT_REGS_END_ADDR)) {
+        val = ahci_port_read(s, (addr - AHCI_PORT_REGS_START_ADDR) >> 7,
+                             addr & AHCI_PORT_ADDR_OFFSET_MASK);
+    } else {
+        val = 0;
+    }
+
+    DPRINTF(-1, "(addr 0x%08X), val 0x%08X\n", (unsigned) addr, val);
+
+    return val;
+}
+
+
+
+static void ahci_mem_writel(void *ptr, target_phys_addr_t addr, uint32_t val)
+{
+    AHCIState *s = ptr;
+    addr = addr & 0xfff;
+    int i;
+
+    /* Only aligned reads are allowed on AHCI */
+    if (addr & 3) {
+        fprintf(stderr, "ahci: Mis-aligned write to addr 0x"
+                TARGET_FMT_plx "\n", addr);
+        return;
+    }
+
+    if (addr < AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR) {
+        switch (addr) {
+            case HOST_CAP: /* R/WO, RO */
+                /* FIXME handle R/WO */
+                break;
+            case HOST_CTL: /* R/W */
+                if (val & HOST_CTL_RESET) {
+                    DPRINTF(-1, "HBA Reset\n");
+                    /* FIXME reset? */
+                } else {
+                    s->control_regs.ghc = (val & 0x3) | HOST_CTL_AHCI_EN;
+                    ahci_check_irq(s);
+                }
+                break;
+            case HOST_IRQ_STAT: /* R/WC, RO */
+                s->control_regs.irqstatus &= ~val;
+                for (i = 0; i < SATA_PORTS; i++) {
+                    if (s->dev[i].port_regs.irq_stat) {
+                        s->control_regs.irqstatus |= (1 << i);
+                        qemu_irq_lower(s->irq);
+                        qemu_irq_raise(s->irq);
+                    }
+                }
+                if (!s->control_regs.irqstatus) {
+                    qemu_irq_lower(s->irq);
+                }
+                break;
+            case HOST_PORTS_IMPL: /* R/WO, RO */
+                /* FIXME handle R/WO */
+                break;
+            case HOST_VERSION: /* RO */
+                /* FIXME report write? */
+                break;
+            default:
+                DPRINTF(-1, "write to unknown register 0x%x\n", (unsigned)addr);
+        }
+    } else if((addr >= AHCI_PORT_REGS_START_ADDR) &&
+              (addr < AHCI_PORT_REGS_END_ADDR)) {
+        ahci_port_write(s, (addr - AHCI_PORT_REGS_START_ADDR) >> 7,
+                        addr & AHCI_PORT_ADDR_OFFSET_MASK, val);
+    }
+
+    DPRINTF(-1, "(addr 0x%08X), val 0x%08X\n", (unsigned) addr, val);
+
+}
+
+static CPUReadMemoryFunc *ahci_readfn[3]={
+    ahci_mem_readl,
+    ahci_mem_readl,
+    ahci_mem_readl
+};
+
+static CPUWriteMemoryFunc *ahci_writefn[3]={
+    ahci_mem_writel,
+    ahci_mem_writel,
+    ahci_mem_writel
+};
+
+static void ahci_reg_init(AHCIState *s)
+{
+    int i;
+
+    s->control_regs.cap = (SATA_PORTS - 1) |
+                          (AHCI_NUM_COMMAND_SLOTS << 8) |
+                          (AHCI_SUPPORTED_SPEED_GEN1 << AHCI_SUPPORTED_SPEED) |
+                          HOST_CAP_NCQ | HOST_CAP_AHCI;
+
+    s->control_regs.impl = (1 << SATA_PORTS) - 1;
+
+    s->control_regs.version = AHCI_VERSION_1_0;
+
+    for (i = 0; i < SATA_PORTS; i++) {
+        s->dev[i].port_state = STATE_RUN;
+    }
+}
+
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+static uint32_t write_to_sglist(uint8_t *buffer, uint32_t len,
+                                QEMUSGList *sglist)
+{
+    uint32_t i = 0;
+    uint32_t total = 0, once;
+    ScatterGatherEntry *cur_prd;
+    uint32_t sgcount;
+
+    cur_prd = sglist->sg;
+    sgcount = sglist->nsg;
+    for (i = 0; len && sgcount; i++) {
+        once = min(cur_prd->len + 1, len);
+        cpu_physical_memory_write(cur_prd->base, buffer, once);
+        cur_prd++;
+        sgcount--;
+        len -= once;
+        buffer += once;
+        total += once;
+    }
+
+    return total;
+}
+
+static void check_cmd(AHCIState *s, int port)
+{
+    AHCIPortRegs *pr = &s->dev[port].port_regs;
+    int slot;
+
+    if (pr->cmd & PORT_CMD_START) {
+        for (slot = 0; (slot < 32) && pr->cmd_issue; slot++) {
+            if ((pr->cmd_issue & (1 << slot)) &&
+                !handle_cmd(s, port, slot)) {
+                pr->cmd_issue &= ~(1 << slot);
+            }
+        }
+    }
+}
+
+static void ahci_reset_port(AHCIState *s, int port)
+{
+    IDEState *ide_state;
+    uint8_t init_fis[0x20];
+    uint32_t tfd;
+
+    DPRINTF(port, "reset port\n");
+
+    ide_state = &s->dev[port].port.ifs[0];
+    if (!ide_state->bs) {
+        return;
+    }
+
+    memset(init_fis, 0, sizeof(init_fis));
+    s->dev[port].port_state = STATE_RUN;
+    if (!ide_state->bs) {
+        s->dev[port].port_regs.sig = 0;
+        tfd = (1 << 8) | SEEK_STAT | WRERR_STAT;
+    } else if (ide_state->drive_kind == IDE_CD) {
+        s->dev[port].port_regs.sig = SATA_SIGNATURE_CDROM;
+        ide_state->lcyl = 0x14;
+        ide_state->hcyl = 0xeb;
+        DPRINTF(port, "set lcyl = %d\n", ide_state->lcyl);
+        init_fis[5] = ide_state->lcyl;
+        init_fis[6] = ide_state->hcyl;
+        tfd = (1 << 8) | SEEK_STAT | WRERR_STAT | READY_STAT;
+    } else {
+        s->dev[port].port_regs.sig = SATA_SIGNATURE_DISK;
+        tfd = (1 << 8) | SEEK_STAT | WRERR_STAT;
+    }
+
+    ide_state->error = 1;
+    ide_state->status = 0;
+    init_fis[4] = 1;
+    init_fis[12] = 1;
+    ahci_write_fis_d2h(s, port, init_fis);
+
+    s->dev[port].port_regs.tfdata = tfd;
+}
+
+static void debug_print_fis(uint8_t *fis, int cmd_len)
+{
+#ifdef DEBUG_AHCI
+    int i;
+
+    fprintf(stderr, "fis:");
+    for (i = 0; i < cmd_len; i++) {
+        if ((i & 0xf) == 0) {
+            fprintf(stderr, "\n%02x:",i);
+        }
+        fprintf(stderr, "%02x ",fis[i]);
+    }
+    fprintf(stderr, "\n");
+#endif
+}
+
+static void ahci_write_fis_sdb(AHCIState *s, int port, uint32_t finished)
+{
+    AHCIPortRegs *pr = &s->dev[port].port_regs;
+    IDEState *ide_state;
+    uint8_t *sdb_fis;
+
+    if (!s->dev[port].res_fis ||
+        !(pr->cmd & PORT_CMD_FIS_RX)) {
+        return;
+    }
+
+    sdb_fis = &s->dev[port].res_fis[RES_FIS_SDBFIS];
+    ide_state = &s->dev[port].port.ifs[0];
+
+    pr->tfdata = (uint16_t)ide_state->error << 8 | ide_state->status;
+
+    /* clear memory */
+    *(uint32_t*)sdb_fis = 0;
+
+    /* write values */
+    sdb_fis[0] = ide_state->error;
+    sdb_fis[2] = ide_state->status & 0x77;
+    s->dev[port].finished |= finished;
+    *(uint32_t*)(sdb_fis + 4) = cpu_to_le32(s->dev[port].finished);
+
+    ahci_trigger_irq(s, &s->dev[port], PORT_IRQ_STAT_SDBS);
+}
+
+static void ahci_write_fis_d2h(AHCIState *s, int port, uint8_t *cmd_fis)
+{
+    AHCIPortRegs *pr = &s->dev[port].port_regs;
+    uint8_t *d2h_fis;
+    int i;
+
+    if (!s->dev[port].res_fis ||
+        !(pr->cmd & PORT_CMD_FIS_RX)) {
+        return;
+    }
+
+    d2h_fis = &s->dev[port].res_fis[RES_FIS_RFIS];
+
+    d2h_fis[0] = 0x34;
+    d2h_fis[1] = (s->control_regs.irqstatus ? (1 << 6) : 0);
+    d2h_fis[2] = s->dev[port].port.ifs[0].status;
+    d2h_fis[3] = s->dev[port].port.ifs[0].error;
+
+    d2h_fis[4] = cmd_fis[4];
+    d2h_fis[5] = cmd_fis[5];
+    d2h_fis[6] = cmd_fis[6];
+    d2h_fis[7] = cmd_fis[7];
+    d2h_fis[8] = cmd_fis[8];
+    d2h_fis[9] = cmd_fis[9];
+    d2h_fis[10] = cmd_fis[10];
+    d2h_fis[11] = cmd_fis[11];
+    d2h_fis[12] = cmd_fis[12];
+    d2h_fis[13] = cmd_fis[13];
+    for (i = 14; i < 0x20; i++) {
+        d2h_fis[i] = 0;
+    }
+
+    pr->tfdata = (uint16_t)d2h_fis[3] << 8 | d2h_fis[2];
+
+    if (d2h_fis[2] & ERR_STAT) {
+        ahci_trigger_irq(s, &s->dev[port], PORT_IRQ_STAT_TFES);
+    }
+
+    ahci_trigger_irq(s, &s->dev[port], PORT_IRQ_D2H_REG_FIS);
+}
+
+static void ncq_cb(void *opaque, int ret)
+{
+    NCQTransferState *ncq_tfs = (NCQTransferState *)opaque;
+    IDEState *ide_state;
+
+    if (ret < 0) {
+        /* error */
+    }
+
+    /* Clear bit for this tag in SActive */
+    ncq_tfs->drive->port_regs.scr_act &= ~(1 << ncq_tfs->tag);
+
+    ide_state = &ncq_tfs->drive->port.ifs[0];
+    ide_state->status = READY_STAT | SEEK_STAT;
+
+    /* XXX do we send a d2h fis here? */
+    ahci_write_fis_d2h(ncq_tfs->drive->hba, ncq_tfs->drive->port_no,
+                       ncq_tfs->drive->cmd_fis);
+
+    ahci_write_fis_sdb(ncq_tfs->drive->hba, ncq_tfs->drive->port_no,
+                       (1 << ncq_tfs->tag));
+
+    DPRINTF(ncq_tfs->drive->port_no, "NCQ transfer tag %d finished\n",
+            ncq_tfs->tag);
+
+    qemu_sglist_destroy(&ncq_tfs->sglist);
+    cpu_physical_memory_unmap(ncq_tfs->drive->cmd_fis, 1,
+                              ncq_tfs->drive->cmd_fis_len,
+                              ncq_tfs->drive->cmd_fis_len);
+
+    ncq_tfs->used = 0;
+}
+
+static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis,
+                                int slot, QEMUSGList *sg)
+{
+    H2D_NCQ_FIS *ncq_fis = (H2D_NCQ_FIS*)cmd_fis;
+    uint8_t tag = ncq_fis->tag >> 3;
+    NCQTransferState *ncq_tfs = &s->ncq_tfs[tag];
+
+    if (ncq_tfs->used) {
+        /* error - already in use */
+        fprintf(stderr, "%s: tag %d already used\n", __FUNCTION__, tag);
+        return;
+    }
+
+    ncq_tfs->used = 1;
+    ncq_tfs->drive = &s->dev[port];
+    ncq_tfs->drive->cmd_fis = cmd_fis;
+    ncq_tfs->drive->cmd_fis_len = 0x20;
+    ncq_tfs->slot = slot;
+    ncq_tfs->lba = ((uint64_t)ncq_fis->lba5 << 40) |
+                   ((uint64_t)ncq_fis->lba4 << 32) |
+                   ((uint64_t)ncq_fis->lba3 << 24) |
+                   ((uint64_t)ncq_fis->lba2 << 16) |
+                   ((uint64_t)ncq_fis->lba1 << 8) |
+                   (uint64_t)ncq_fis->lba0;
+
+    /* Note: We calculate the sector count, but don't currently rely on it.
+     * The total size of the DMA buffer tells us the transfer size instead. */
+    ncq_tfs->sector_count = ((uint16_t)ncq_fis->sector_count_high << 8) |
+                                ncq_fis->sector_count_low;
+
+    DPRINTF(port, "NCQ transfer LBA from %ld to %ld, drive max %ld\n",
+            ncq_tfs->lba, ncq_tfs->lba + ncq_tfs->sector_count - 2,
+            s->dev[port].port.ifs[0].nb_sectors - 1);
+
+    ncq_tfs->sglist = *sg;
+    ncq_tfs->tag = tag;
+
+    switch(ncq_fis->command) {
+        case READ_FPDMA_QUEUED:
+            DPRINTF(port, "NCQ reading %d sectors from LBA %ld, tag %d\n",
+                    ncq_tfs->sector_count-1, ncq_tfs->lba, ncq_tfs->tag);
+            ncq_tfs->is_read = 1;
+
+            /* XXX: The specification is unclear about whether the DMA Setup
+             * FIS here should have the I bit set, but it suggest that it should
+             * not. Linux works without this interrupt, so I disabled it.
+             * If someone knows if it is needed, please tell me, or fix this. */
+
+            /* ahci_trigger_irq(s,s->dev[port],PORT_IRQ_STAT_DSS); */
+            DPRINTF(port, "tag %d aio read %ld\n", ncq_tfs->tag, ncq_tfs->lba);
+            dma_bdrv_read(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->sglist,
+                          ncq_tfs->lba, ncq_cb, ncq_tfs);
+            break;
+        case WRITE_FPDMA_QUEUED:
+            DPRINTF(port, "NCQ writing %d sectors to LBA %ld, tag %d\n",
+                    ncq_tfs->sector_count-1, ncq_tfs->lba, ncq_tfs->tag);
+            ncq_tfs->is_read = 0;
+            /* ahci_trigger_irq(s,s->dev[port],PORT_IRQ_STAT_DSS); */
+            DPRINTF(port, "tag %d aio write %ld\n", ncq_tfs->tag, ncq_tfs->lba);
+            dma_bdrv_write(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->sglist,
+                           ncq_tfs->lba, ncq_cb, ncq_tfs);
+            break;
+        default:
+            hw_error("ahci: tried to process non-NCQ command as NCQ\n");
+            break;
+    }
+}
+
+static int handle_cmd(AHCIState *s, int port, int slot)
+{
+    IDEState *ide_state;
+
+    int sglist_alloc_hint;
+    QEMUSGList sglist;
+    int atapi_packet_len = 0;
+    AHCIPortRegs *pr;
+    uint32_t opts;
+    uint64_t tbl_addr;
+    AHCICmdHdr *cmd;
+    uint8_t *cmd_fis;
+
+    target_phys_addr_t cmd_len;
+    int i;
+
+    pr = &s->dev[port].port_regs;
+    cmd = (AHCICmdHdr *)&s->dev[port].lst[slot * 32];
+
+    if (!s->dev[port].lst) {
+        hw_error("%s: lst not given but cmd handled", __FUNCTION__);
+    }
+
+    opts = le32_to_cpu(cmd->opts);
+    tbl_addr = le64_to_cpu(cmd->tbl_addr);
+
+    cmd_len = (opts & AHCI_CMD_HDR_CMD_FIS_LEN) * 4;
+    cmd_fis = cpu_physical_memory_map(tbl_addr, &cmd_len, 1);
+
+    /* The device we are working for */
+    ide_state = &s->dev[port].port.ifs[0];
+
+    if (!ide_state->bs) {
+        hw_error("%s: guest accessed unused port", __FUNCTION__);
+    }
+
+    /* Get number of entries in the PRDT, init a qemu sglist accordingly */
+    sglist_alloc_hint = opts >> AHCI_CMD_HDR_PRDT_LEN;
+    memset(&sglist, 0, sizeof(sglist));
+
+    if (sglist_alloc_hint > 0) {
+        qemu_sglist_init(&sglist, sglist_alloc_hint);
+        /* Parse the PRDs and create qemu sglist entries accordingly */
+        for (i = 0; i < sglist_alloc_hint; i++) {
+            target_phys_addr_t cur_prd_addr;
+
+            cur_prd_addr = tbl_addr + 0x80 + i * sizeof(AHCI_SG);
+            /* flags_size is zero-based */
+            qemu_sglist_add(&sglist,
+                    ldl_phys(cur_prd_addr + offsetof(AHCI_SG, addr)),
+                    ldl_phys(cur_prd_addr + offsetof(AHCI_SG, flags_size)) + 1);
+        }
+    }
+
+    debug_print_fis(cmd_fis, cmd_len);
+
+    switch (cmd_fis[0]) {
+        case SATA_FIS_TYPE_REGISTER_H2D:
+            break;
+        default:
+            hw_error("unknown command cmd_fis[0]=%02x cmd_fis[1]=%02x cmd_fis[2]=%02x\n",
+                     cmd_fis[0], cmd_fis[1], cmd_fis[2]);
+            break;
+    }
+
+    switch (cmd_fis[1]) {
+        case SATA_FIS_REG_H2D_UPDATE_COMMAND_REGISTER:
+            break;
+        case 0:
+            break;
+        default:
+            hw_error("unknown command cmd_fis[0]=%02x cmd_fis[1]=%02x cmd_fis[2]=%02x\n",
+                     cmd_fis[0], cmd_fis[1], cmd_fis[2]);
+            break;
+    }
+
+    switch (s->dev[port].port_state) {
+        case STATE_RUN:
+            if (cmd_fis[15] & ATA_SRST) {
+                s->dev[port].port_state = STATE_RESET;
+            }
+            break;
+        case STATE_RESET:
+            if (!(cmd_fis[15] & ATA_SRST)) {
+                ahci_reset_port(s, port);
+            }
+            break;
+    }
+
+    if (cmd_fis[1] == SATA_FIS_REG_H2D_UPDATE_COMMAND_REGISTER) {
+
+        /* Check for NCQ command */
+        if ((cmd_fis[2] == READ_FPDMA_QUEUED) ||
+            (cmd_fis[2] == WRITE_FPDMA_QUEUED)) {
+            process_ncq_command(s, port, cmd_fis, slot, &sglist);
+            goto out;
+        }
+
+        /* If the command is not NCQ, the sglist is needed in the core */
+        ide_state->sg = sglist;
+
+        /* Decompose the FIS  */
+        ide_state->nsector = (int64_t)((cmd_fis[13] << 8) | cmd_fis[12]);
+        if (!ide_state->nsector) {
+            ide_state->nsector = 256;
+        }
+
+        if (ide_state->drive_kind != IDE_CD) {
+            ide_set_sector(ide_state, (cmd_fis[6] << 16) | (cmd_fis[5] << 8) |
+                           cmd_fis[4]);
+        }
+
+        /* Copy the ACMD field (ATAPI packet, if any) from the AHCI command
+         * table to ide_state->io_buffer
+         */
+        if (opts & AHCI_CMD_ATAPI) {
+            atapi_packet_len = ((ide_state->hcyl) << 8) + ide_state->lcyl;
+            cpu_physical_memory_read(tbl_addr + AHCI_COMMAND_TABLE_ACMD,
+                                     ide_state->io_buffer, 0x10);
+        }
+
+        ide_state->error = 0;
+        s->dev[port].cur_cmd = cmd;
+
+        /* We're ready to process the command in FIS byte 2. */
+        ide_exec_cmd(&s->dev[port].port, cmd_fis[2]);
+
+        /* we're DMA'ing, so we're not ready yet, postpone cleanup to later */
+        if (s->dev[port].bmdma.status & BM_STATUS_DMAING) {
+            cmd->status = 0;
+            s->dev[port].cmd_fis = cmd_fis;
+            s->dev[port].cmd_fis_len = cmd_len;
+            return 0;
+        }
+
+        ahci_write_fis_d2h(s, port, cmd_fis);
+    }
+
+out:
+    cpu_physical_memory_unmap(cmd_fis, 1, cmd_len, cmd_len);
+
+    return 0;
+}
+
+static void ahci_transfer_start(IDEState *s, uint8_t *buf, int size,
+                                EndTransferFunc *end_transfer_func)
+{
+    AHCIDevice *ad;
+    AHCIState *as;
+
+    s->end_transfer_func = end_transfer_func;
+
+    ad = DO_UPCAST(AHCIDevice, port, s->bus);
+    as = ad->hba;
+
+    write_to_sglist(buf, (uint32_t)size, &s->sg);
+
+    /* update number of transferred bytes */
+    ad->cur_cmd->status = cpu_to_le32(le32_to_cpu(ad->cur_cmd->status) + size);
+
+    end_transfer_func(s);
+}
+
+static void ahci_dma_start_fn(IDEState *s, BlockDriverCompletionFunc *dma_cb)
+{
+    AHCIDevice *ad;
+    AHCIState *as;
+    BMDMAState *bm = s->bus->bmdma;
+
+    ad = DO_UPCAST(AHCIDevice, port, s->bus);
+    as = ad->hba;
+
+    if (!bm) {
+        return;
+    }
+
+    bm->unit = s->unit;
+    bm->dma_cb = dma_cb;
+    bm->cur_prd_last = 0;
+    bm->cur_prd_addr = 0;
+    bm->cur_prd_len = 0;
+    bm->cur_addr = 0;
+    bm->sector_num = ide_get_sector(s);
+    bm->nsector = s->nsector;
+    bmdma_cmd_writeb(bm, 0, 1);
+}
+
+static int ahci_dma_buf_prepare(BMDMAState *bm, int is_write)
+{
+    IDEState *s = bmdma_active_if(bm);
+    int i;
+
+    s->io_buffer_size = 0;
+    for (i = 0; i < s->sg.nsg; i++) {
+        s->io_buffer_size += s->sg.sg[i].len;
+    }
+
+    DPRINTF(-1, "len=%#x\n", s->io_buffer_size);
+    return s->io_buffer_size != 0;
+}
+
+static int ahci_dma_buf_rw(BMDMAState *bm, int is_write)
+{
+    IDEState *s = bmdma_active_if(bm);
+    int l, len;
+
+    for (;;) {
+        l = s->io_buffer_size - s->io_buffer_index;
+
+        DPRINTF(-1, "size=%#x idx=%#x l=%#x\n",
+                s->io_buffer_size, s->io_buffer_index, l);
+
+        if (l <= 0) {
+            break;
+        }
+
+        if (bm->cur_prd_len == 0) {
+            /* end of table */
+            if (bm->cur_prd_last) {
+                bm->cur_addr = 0;
+                return 0;
+            }
+
+            len = s->sg.sg[bm->cur_addr].len;
+            bm->cur_prd_len = len;
+            bm->cur_prd_addr = s->sg.sg[bm->cur_addr].base;
+
+            DPRINTF(-1, "[%d] base=%#x len=%#x\n",
+                    bm->cur_addr, bm->cur_prd_addr, len);
+
+            bm->cur_addr++;
+            bm->cur_prd_last = (bm->cur_addr == s->sg.nsg);
+        }
+
+        if (l > bm->cur_prd_len) {
+            l = bm->cur_prd_len;
+        }
+
+        if (l > 0) {
+            if (is_write) {
+                cpu_physical_memory_write(bm->cur_prd_addr,
+                                          s->io_buffer + s->io_buffer_index, l);
+            } else {
+                cpu_physical_memory_read(bm->cur_prd_addr,
+                                          s->io_buffer + s->io_buffer_index, l);
+            }
+            bm->cur_prd_addr += l;
+            bm->cur_prd_len -= l;
+            s->io_buffer_index += l;
+        }
+    }
+
+    return 1;
+}
+
+static void ahci_irq_set_fn(IDEBus *s)
+{
+    AHCIDevice *ad;
+    AHCIState *as;
+
+    ad = DO_UPCAST(AHCIDevice, port, s);
+    as = ad->hba;
+
+    /* error interrupts will be triggered later */
+    if (ad->port.ifs[0].status & ERR_STAT) {
+        return;
+    }
+
+    /* DMA is done */
+    /* XXX find actual end point of a DMA and only do then */
+    if (!(ad->bmdma.status & BM_STATUS_DMAING)) {
+        ahci_trigger_irq(as, ad, PORT_IRQ_STAT_DSS);
+    }
+
+    /* update d2h status */
+    if (ad->cmd_fis) {
+        ahci_write_fis_d2h(as, ad->port_no, ad->cmd_fis);
+        cpu_physical_memory_unmap(ad->cmd_fis, 1, ad->cmd_fis_len, ad->cmd_fis_len);
+        ad->cmd_fis = NULL;
+    }
+}
+
+static IDEBusOps ahci_bus_ops = {
+    .transfer_start_fn = ahci_transfer_start,
+    .irq_set_fn = ahci_irq_set_fn,
+    .dma_start_fn = ahci_dma_start_fn,
+    .dma_prepare_fn = ahci_dma_buf_prepare,
+    .dma_rw_fn = ahci_dma_buf_rw,
+};
+
+static void ahci_init(AHCIState *s, DeviceState *qdev)
+{
+    int i;
+
+    ahci_reg_init(s);
+    s->mem = cpu_register_io_memory(ahci_readfn, ahci_writefn, s);
+
+    for (i = 0; i < SATA_PORTS; i++) {
+        DriveInfo *dinfo = drive_get(IF_SATA, 0, i);
+        AHCIDevice *ad = &s->dev[i];
+
+        ide_bus_new(&ad->port, qdev);
+        ide_init2(&ad->port, 0);
+
+        if (dinfo) {
+            ide_create_drive(&ad->port, 0, dinfo);
+        }
+
+        ad->hba = s;
+        ad->port_no = i;
+        ad->port.bmdma = &ad->bmdma;
+        ad->bmdma.bus = &ad->port;
+        ad->port.ops = &ahci_bus_ops;
+        ad->port_regs.cmd = PORT_CMD_SPIN_UP | PORT_CMD_POWER_ON;
+    }
+}
+
+static void ahci_pci_map(PCIDevice *pci_dev, int region_num,
+        pcibus_t addr, pcibus_t size, int type)
+{
+    struct AHCIPciState *d = (struct AHCIPciState *)pci_dev;
+    AHCIState *s = &d->ahci;
+
+    cpu_register_physical_memory(addr, size, s->mem);
+}
+
+static void ahci_reset(void *opaque)
+{
+    struct AHCIPciState *d = opaque;
+    int i;
+
+    for (i = 0; i < SATA_PORTS; i++) {
+        AHCIDevice *ad = &d->ahci.dev[i];
+
+        ide_bus_reset(&d->ahci.dev[i].port);
+        ide_dma_reset(&d->ahci.dev[i].bmdma);
+
+        ad->port.ifs[0].feature |= IDE_FEATURE_DMA;
+        ad->port.ifs[0].ncq_queues = AHCI_MAX_CMDS;
+    }
+
+    pci_config_set_vendor_id(d->card.config, PCI_VENDOR_ID_INTEL);
+    pci_config_set_device_id(d->card.config, PCI_DEVICE_ID_INTEL_ICH7M_AHCI);
+    d->card.config[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
+                                  PCI_COMMAND_MASTER;
+
+    pci_config_set_class(d->card.config, PCI_CLASS_STORAGE_SATA);
+    pci_config_set_prog_interface(d->card.config, AHCI_PROGMODE_MAJOR_REV_1);
+
+    d->card.config[PCI_CACHE_LINE_SIZE] = 0x08;  /* Cache line size */
+    d->card.config[PCI_LATENCY_TIMER]   = 0x00;  /* Latency timer */
+    d->card.config[PCI_HEADER_TYPE]     = PCI_HEADER_TYPE_NORMAL;
+    d->card.config[PCI_INTERRUPT_PIN]   = 1;     /* interrupt pin 0 */
+}
+
+static int pci_ahci_init(PCIDevice *dev)
+{
+    struct AHCIPciState *d;
+    d = DO_UPCAST(struct AHCIPciState, card, dev);
+
+    qemu_register_reset(ahci_reset, d);
+
+    pci_register_bar(&d->card, 5, 0x400, PCI_BASE_ADDRESS_SPACE_MEMORY,
+                     ahci_pci_map);
+
+    ahci_init(&d->ahci, &dev->qdev);
+    d->ahci.irq = d->card.irq[0];
+
+    return 0;
+}
+
+static int pci_ahci_uninit(PCIDevice *dev)
+{
+    return 0;
+}
+
+static PCIDeviceInfo ahci_info = {
+    .qdev.name  = "ahci",
+    .qdev.size  = sizeof(AHCIPciState),
+    .init       = pci_ahci_init,
+    .exit       = pci_ahci_uninit,
+    .qdev.props = (Property[]) {
+        DEFINE_PROP_END_OF_LIST(),
+    }
+};
+
+static void ahci_pci_register_devices(void)
+{
+    pci_qdev_register(&ahci_info);
+}
+
+device_init(ahci_pci_register_devices)