diff mbox

[4/4,V3] PVCSI paravirtualized device implementation

Message ID CABg=H3NObiotJOTFrv_f7Y92h1EsU+So84jqOVZA6eeazDjb8w@mail.gmail.com
State New
Headers show

Commit Message

Deep Debroy July 7, 2012, 5:07 a.m. UTC
Signed-off-by: Deep Debroy <ddebroy@gmail.com>
---
 default-configs/pci.mak    |    1 +
 docs/specs/pvscsi-spec.txt |   92 ++++
 hw/Makefile.objs           |    1 +
 hw/pci.h                   |    1 +
 hw/pvscsi.c                | 1260 ++++++++++++++++++++++++++++++++++++++++++++
 hw/pvscsi.h                |  442 ++++++++++++++++
 6 files changed, 1797 insertions(+)
 create mode 100644 docs/specs/pvscsi-spec.txt
 create mode 100644 hw/pvscsi.c
 create mode 100644 hw/pvscsi.h

Comments

Blue Swirl July 7, 2012, 8:38 a.m. UTC | #1
On Sat, Jul 7, 2012 at 5:07 AM, Deep Debroy <ddebroy@gmail.com> wrote:
> Signed-off-by: Deep Debroy <ddebroy@gmail.com>
> ---
>  default-configs/pci.mak    |    1 +
>  docs/specs/pvscsi-spec.txt |   92 ++++
>  hw/Makefile.objs           |    1 +
>  hw/pci.h                   |    1 +
>  hw/pvscsi.c                | 1260 ++++++++++++++++++++++++++++++++++++++++++++
>  hw/pvscsi.h                |  442 ++++++++++++++++
>  6 files changed, 1797 insertions(+)
>  create mode 100644 docs/specs/pvscsi-spec.txt
>  create mode 100644 hw/pvscsi.c
>  create mode 100644 hw/pvscsi.h
>
> diff --git a/default-configs/pci.mak b/default-configs/pci.mak
> index 9d3e1db..9fd4896 100644
> --- a/default-configs/pci.mak
> +++ b/default-configs/pci.mak
> @@ -10,6 +10,7 @@ CONFIG_EEPRO100_PCI=y
>  CONFIG_PCNET_PCI=y
>  CONFIG_PCNET_COMMON=y
>  CONFIG_LSI_SCSI_PCI=y
> +CONFIG_PVSCSI_SCSI_PCI=y
>  CONFIG_RTL8139_PCI=y
>  CONFIG_E1000_PCI=y
>  CONFIG_IDE_CORE=y
> diff --git a/docs/specs/pvscsi-spec.txt b/docs/specs/pvscsi-spec.txt
> new file mode 100644
> index 0000000..27f5529
> --- /dev/null
> +++ b/docs/specs/pvscsi-spec.txt
> @@ -0,0 +1,92 @@
> +General Description
> +===================
> +
> +This document describes VMWare PVSCSI device interface specification.
> +Created by Dmitry Fleytman (dmitry@daynix.com), Daynix Computing LTD.
> +Based on source code of PVSCSI Linux driver from kernel 3.0.4
> +
> +PVSCSI Device Interface Overview
> +================================
> +
> +The interface is based on memory area shared between hypervisor and VM.
> +Memory area is obtained by driver as device IO memory resource of
> +PVSCSI_MEM_SPACE_SIZE length.
> +The shared memory consists of registers area and rings area.
> +The registers area is used to raise hypervisorinterrupts and issue device

hypervisor interrupts

> +commands. The rings area is used to transfer data descruiptors and SCSI

descriptors

> +commands from VM to hypervisor and to transfer messages produced by
> +hypervisor to VM. Data itself is transferred via virtual scatter-gather DMA.
> +
> +PVSCSI Device Registers
> +=======================
> +
> +Registers area length is 1 page (PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES).
> +Registers area structure is described by PVSCSIRegOffset enumeration.
> +There are registers to issue device command (with optional short data),
> +issue device interrupt, control interrupts masking.
> +
> +PVSCSI Device Rings
> +===================
> +
> +There are three rings in shared memory:
> +
> +    1. Request ring (struct PVSCSIRingReqDesc *req_ring)
> +        - ring for OS to device requests
> +    2. Completion ring (struct PVSCSIRingCmpDesc *cmp_ring)
> +        - ring for device request completions
> +    3. Message ring (struct PVSCSIRingMsgDesc *msg_ring)
> +        - ring for messages from device.
> +       This ring is optional and may be not configured.
> +There is a control area (struct PVSCSIRingsState *rings_state) used to control
> +rings operation.
> +
> +PVSCSI Device to Host Interrupts
> +================================
> +There are following interrupt types supported by PVSCSI device:
> +    1. Completion interrupts (completion ring notifications):
> +        PVSCSI_INTR_CMPL_0
> +        PVSCSI_INTR_CMPL_1
> +    2. Message interrupts (message ring notifications):
> +        PVSCSI_INTR_MSG_0
> +        PVSCSI_INTR_MSG_1
> +
> +Interrupts are controlled via PVSCSI_REG_OFFSET_INTR_MASK register
> +Bit set means interrupt enabled, bit cleared - disabled
> +
> +Interrupt modes supported are legacy, MSI and MSI-X
> +In case of legacy interrupts register PVSCSI_REG_OFFSET_INTR_STATUS
> +used to verify interrupt arrival and to clear interrupt state
> +Interrupts are cleared by writing processed bits back
> +to interrupt status register.
> +
> +PVSCSI Device Operation Sequences
> +=================================
> +
> +1. Startup sequence:
> +    a. Issue PVSCSI_CMD_ADAPTER_RESET command;
> +    aa. Windows driver reads interrupt status register here;
> +    b. Issue PVSCSI_CMD_SETUP_MSG_RING command with no additional data,
> +       check status and disable device messages if error returned;
> +       (Omitted if device messages disabled by driver configuration)
> +    c. Issue PVSCSI_CMD_SETUP_RINGS command, provide rings configuration
> +       as struct PVSCSICmdDescSetupRings;
> +    d. Issue PVSCSI_CMD_SETUP_MSG_RING command again, provide
> +       rings configuration as struct PVSCSICmdDescSetupMsgRing;
> +    e. Unmask completion and message (if device messages enabled) interrupts.
> +
> +2. Shutdown sequences
> +    a. Mask interrupts;
> +    b. Flush request ring using PVSCSI_REG_OFFSET_KICK_NON_RW_IO;
> +    c. Issue PVSCSI_CMD_ADAPTER_RESET command.
> +
> +3. Send request
> +    a. Fill next free request ring descriptor;
> +    b. Issue PVSCSI_REG_OFFSET_KICK_RW_IO for R/W operations;
> +       or PVSCSI_REG_OFFSET_KICK_NON_RW_IO for other operations.
> +
> +4. Abort command
> +    a. Issue PVSCSI_CMD_ABORT_CMD command;
> +
> +5. Request completion processing
> +    a. Upon completion interrupt arrival process completion
> +       and message (if enabled) rings.
> diff --git a/hw/Makefile.objs b/hw/Makefile.objs
> index 3d77259..6dd1eea 100644
> --- a/hw/Makefile.objs
> +++ b/hw/Makefile.objs
> @@ -86,6 +86,7 @@ hw-obj-$(CONFIG_OPENCORES_ETH) += opencores_eth.o
>
>  # SCSI layer
>  hw-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o
> +hw-obj-$(CONFIG_PVSCSI_SCSI_PCI) += pvscsi.o
>  hw-obj-$(CONFIG_ESP) += esp.o
>
>  hw-obj-y += sysbus.o isa-bus.o
> diff --git a/hw/pci.h b/hw/pci.h
> index 79d38fd..3c197d9 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -60,6 +60,7 @@
>  #define PCI_DEVICE_ID_VMWARE_NET         0x0720
>  #define PCI_DEVICE_ID_VMWARE_SCSI        0x0730
>  #define PCI_DEVICE_ID_VMWARE_IDE         0x1729
> +#define PCI_DEVICE_ID_VMWARE_PVSCSI      0x07C0

Please retain numeric order.

>
>  /* Intel (0x8086) */
>  #define PCI_DEVICE_ID_INTEL_82551IT      0x1209
> diff --git a/hw/pvscsi.c b/hw/pvscsi.c
> new file mode 100644
> index 0000000..719ea98
> --- /dev/null
> +++ b/hw/pvscsi.c
> @@ -0,0 +1,1260 @@
> +/*
> + * QEMU VMWARE PVSCSI paravirtual SCSI bus
> + *
> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
> + *
> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> + *
> + * Based on implementation by Paolo Bonzini
> + * http://lists.gnu.org/archive/html/qemu-devel/2011-08/msg00729.html
> + *
> + * Authors:
> + * Paolo Bonzini <pbonzini@redhat.com>
> + * Dmitry Fleytman <dmitry@daynix.com>
> + * Yan Vugenfirer <yvugenfi@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#define DEBUG_PVSCSI_WARNINGS
> +#define DEBUG_PVSCSI_ERRORS
> +/*
> +#define DEBUG_PVSCSI_CALLBACKS
> +#define DEBUG_PVSCSI_RINGS
> +#define DEBUG_PVSCSI_COMMANDS
> +#define DEBUG_PVSCSI_INTERRUPTS
> +#define DEBUG_PVSCSI_SHMEM_ACCESS
> +#define DEBUG_PVSCSI_SHMEM_ACCESS
> +#define DEBUG_SCSI_EXCHANGE
> +*/

Please use //, it's easier to enable one line in the middle. With
tracepoints there would be no need.

> +
> +#ifdef DEBUG_PVSCSI_SHMEM_ACCESS
> +#define DSHPRINTF(fmt, ...)                                                 \
> +    do {                                                                    \
> +        printf("[pvscsi][SH][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
> +    } while (0)
> +#else
> +#define DSHPRINTF(fmt, ...) do {} while (0)
> +#endif
> +
> +#ifdef DEBUG_PVSCSI_CALLBACKS
> +#define DCBPRINTF(fmt, ...)                                                 \
> +    do {                                                                    \
> +        printf("[pvscsi][CB][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
> +    } while (0)
> +#else
> +#define DCBPRINTF(fmt, ...) do {} while (0)
> +#endif
> +
> +#ifdef DEBUG_PVSCSI_WARNINGS
> +#define DWRPRINTF(fmt, ...)                                                 \
> +    do {                                                                    \
> +        printf("[pvscsi][WR][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
> +    } while (0)
> +#else
> +#define DWRPRINTF(fmt, ...) do {} while (0)
> +#endif

Please check if qemu_log(LOG_UNIMP,...) would make sense for the cases
where the guest may be issuing commands which QEMU doesn't implement
yet.

> +
> +#ifdef DEBUG_PVSCSI_ERRORS
> +#define DERPRINTF(fmt, ...)                                                 \
> +    do {                                                                    \
> +        printf("[pvscsi][ER][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
> +    } while (0)
> +#else
> +#define DERPRINTF(fmt, ...) do {} while (0)
> +#endif
> +
> +#ifdef DEBUG_PVSCSI_COMMANDS
> +#define DCMPRINTF(fmt, ...)                                                 \
> +    do {                                                                    \
> +        printf("[pvscsi][CM][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
> +    } while (0)
> +#else
> +#define DCMPRINTF(fmt, ...) do {} while (0)
> +#endif
> +
> +#ifdef DEBUG_SCSI_EXCHANGE
> +#define DSXPRINTF(fmt, ...)                                                 \
> +    do {                                                                    \
> +        printf("[pvscsi][SX][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
> +    } while (0)
> +#else
> +#define DSXPRINTF(fmt, ...) do {} while (0)
> +#endif
> +
> +#ifdef DEBUG_PVSCSI_RINGS
> +#define DRIPRINTF(fmt, ...)                                                 \
> +    do {                                                                    \
> +        printf("[pvscsi][RI][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
> +    } while (0)
> +#else
> +#define DRIPRINTF(fmt, ...) do {} while (0)
> +#endif
> +
> +#ifdef DEBUG_PVSCSI_INTERRUPTS
> +#define DIRPRINTF(fmt, ...)                                                 \
> +    do {                                                                    \
> +        printf("[pvscsi][IR][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
> +    } while (0)
> +#else
> +#define DIRPRINTF(fmt, ...) do {} while (0)
> +#endif
> +
> +/*
> + * MSI-X support is disabled because it leads Windows OS to crash on startup.
> + * The crash happens because Windows driver requires MSI-X shared memory
> + * to be part of the same BAR used for rings state, registers, etc.
> + * This is not supported by QEMU infrastructure so separate BAR created from
> + * MSI-X purposes. Windows driver fails to deal with 2 BARs.
> + * Linux works fine with current MSI-X implementation enabled.
> + */
> +#undef PVSCSI_ENABLE_MSIX
> +
> +#include "softfloat.h"
> +#include "compiler.h"
> +#include "scsi-defs.h"
> +#include "hw/hw.h"
> +#include "hw/pci.h"
> +#include "hw/scsi.h"
> +#ifdef PVSCSI_ENABLE_MSIX

Please remove.

> +#include "msix.h"
> +#endif
> +#include "msi.h"
> +#include "qemu-queue.h"
> +#include "pvscsi.h"
> +#include "vmware_utils.h"

The include lines should be just after the copyright header.

> +
> +#define PVSCSI_MAX_DEVS                   (64)
> +#define PVSCSI_MSIX_NUM_VECTORS           (1)
> +
> +typedef struct {

typedef struct PVSCSIRingsMgr {

> +    uint64_t            rs_pa;
> +
> +    uint32_t            txr_len_mask;
> +    uint32_t            rxr_len_mask;
> +    uint64_t            req_ring_pages_pa[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
> +    uint64_t            cmp_ring_pages_pa[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
> +    uint64_t            consumed_ptr;
> +    uint64_t            filled_cmp_ptr;
> +} PVSCSIRingsMgr;
> +
> +#define _RS_GET_FIELD(rs_pa, field) \
> +    (vmw_shmem_ld32(rs_pa + offsetof(struct PVSCSIRingsState, field)))
> +#define _RS_SET_FIELD(rs_pa, field, val) \
> +    (vmw_shmem_st32(rs_pa + offsetof(struct PVSCSIRingsState, field), val))

Don't use identifiers with leading underscores.

> +
> +/* Integer binary logarithm */
> +static int
> +pvscsi_log2(uint32_t input)
> +{
> +    int log = 0;
> +    assert(input > 0);
> +    while (input >> ++log) {
> +
> +        };

Useless semicolon.

> +    return log;
> +}
> +
> +static void
> +pvscsi_rings_mgr_init(PVSCSIRingsMgr *m, struct PVSCSICmdDescSetupRings *ri)
> +{
> +    int i;
> +    uint32_t txr_len_log2, rxr_len_log2;
> +    uint32_t req_ring_size, cmp_ring_size;
> +    m->rs_pa = ri->ringsStatePPN << PAGE_SHIFT;
> +
> +    req_ring_size = ri->reqRingNumPages * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
> +    cmp_ring_size = ri->cmpRingNumPages * PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
> +    txr_len_log2 = pvscsi_log2(req_ring_size - 1);
> +    rxr_len_log2 = pvscsi_log2(cmp_ring_size - 1);
> +
> +    m->txr_len_mask = MASK(txr_len_log2);
> +    m->rxr_len_mask = MASK(rxr_len_log2);
> +
> +    m->consumed_ptr = 0;
> +    m->filled_cmp_ptr = 0;
> +
> +    for (i = 0; i < ri->reqRingNumPages; i++) {
> +        m->req_ring_pages_pa[i] = ri->reqRingPPNs[i] << PAGE_SHIFT;
> +    }
> +
> +    for (i = 0; i < ri->cmpRingNumPages; i++) {
> +        m->cmp_ring_pages_pa[i] = ri->cmpRingPPNs[i] << PAGE_SHIFT;
> +    }
> +
> +    _RS_SET_FIELD(m->rs_pa, reqProdIdx, 0);
> +    _RS_SET_FIELD(m->rs_pa, reqConsIdx, 0);
> +    _RS_SET_FIELD(m->rs_pa, reqNumEntriesLog2, txr_len_log2);
> +
> +    _RS_SET_FIELD(m->rs_pa, cmpProdIdx, 0);
> +    _RS_SET_FIELD(m->rs_pa, cmpConsIdx, 0);
> +    _RS_SET_FIELD(m->rs_pa, cmpNumEntriesLog2, rxr_len_log2);
> +
> +    _RS_SET_FIELD(m->rs_pa, msgProdIdx, 0);
> +    _RS_SET_FIELD(m->rs_pa, msgConsIdx, 0);
> +    _RS_SET_FIELD(m->rs_pa, msgNumEntriesLog2, 0);
> +
> +    DRIPRINTF("TX/RX rings logarithms set to %d/%d",
> +              txr_len_log2, rxr_len_log2);
> +
> +    /* Flush ring state page changes */
> +    smp_wmb();
> +}
> +
> +static void
> +pvscsi_rings_mgr_cleanup(PVSCSIRingsMgr *mgr)
> +{
> +    mgr->rs_pa = 0;
> +    mgr->txr_len_mask = 0;
> +    mgr->consumed_ptr = 0;
> +    mgr->filled_cmp_ptr = 0;
> +    memset(mgr->req_ring_pages_pa, 0, sizeof(mgr->req_ring_pages_pa));
> +    memset(mgr->cmp_ring_pages_pa, 0, sizeof(mgr->cmp_ring_pages_pa));
> +}
> +
> +static inline target_phys_addr_t

'inline' may be premature optimization.

> +pvscsi_rings_mgr_pop_req_descr(PVSCSIRingsMgr *mgr)
> +{
> +    uint32_t ready_prt = _RS_GET_FIELD(mgr->rs_pa, reqProdIdx);
> +
> +    if (ready_prt != mgr->consumed_ptr) {
> +        uint32_t next_ready_ptr =
> +            mgr->consumed_ptr++ & mgr->txr_len_mask;
> +        uint32_t next_ready_page =
> +            next_ready_ptr / PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
> +        uint32_t inpage_idx =
> +            next_ready_ptr % PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
> +
> +        return mgr->req_ring_pages_pa[next_ready_page] +
> +               inpage_idx * sizeof(PVSCSIRingReqDesc);
> +    } else {
> +        return 0;
> +    }
> +}
> +
> +static inline void
> +pvscsi_rings_mgr_flush_req_ring(PVSCSIRingsMgr *mgr)
> +{
> +    _RS_SET_FIELD(mgr->rs_pa, reqConsIdx, mgr->consumed_ptr);
> +}
> +
> +static inline target_phys_addr_t
> +pvscsi_rings_mgr_pop_cmp_descr(PVSCSIRingsMgr *mgr)
> +{
> +    /*
> +     * According to Linux driver code it explicitly verifies that number
> +     * of requests being processed by device is less then the size of
> +     * completion queue, so device may omit completion queue overflow
> +     * conditions check. We assume that this is true for other (Windows)
> +     * drivers as well.
> +     */
> +
> +    uint32_t free_cmp_ptr =
> +        mgr->filled_cmp_ptr++ & mgr->rxr_len_mask;
> +    uint32_t free_cmp_page =
> +        free_cmp_ptr / PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
> +    uint32_t inpage_idx =
> +        free_cmp_ptr % PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
> +    return mgr->cmp_ring_pages_pa[free_cmp_page] +
> +           inpage_idx * sizeof(PVSCSIRingCmpDesc);
> +}
> +
> +static inline void
> +pvscsi_rings_mgr_flush_cmp_ring(PVSCSIRingsMgr *mgr)
> +{
> +    /* Flush descriptor changes */
> +    smp_wmb();
> +
> +    DRIPRINTF("New production counter of completion ring is %" PRIx64,
> +              mgr->filled_cmp_ptr);
> +
> +    _RS_SET_FIELD(mgr->rs_pa, cmpProdIdx, mgr->filled_cmp_ptr);
> +}
> +
> +typedef QTAILQ_HEAD(, PVSCSIRequest) PVSCSIRequestList;
> +#define PVSCSI_MAX_CMD_DATA_WORDS \
> +    (sizeof(struct PVSCSICmdDescSetupRings)/sizeof(uint32_t))
> +
> +typedef struct {
> +    PCIDevice dev;
> +    MemoryRegion io_space;
> +    SCSIBus bus;
> +    QEMUBH *completion_worker;
> +    PVSCSIRequestList pending_queue;
> +    PVSCSIRequestList completion_queue;
> +
> +#ifdef PVSCSI_ENABLE_MSIX

I think this can be removed.

> +    MemoryRegion msix_space;
> +    uint8_t msix_used;   /* Whether MSI-X support was installed successfully */
> +#endif
> +    uint8_t msi_used;    /* Whether MSI support was installed successfully   */
> +    uint64_t reg_interrupt_status;        /* Interrupt status register value */
> +    uint64_t reg_interrupt_enabled;       /* Interrupt mask register value   */
> +    uint64_t reg_command_status;          /* Command status register value   */
> +
> +    /* Command data adoption mechanism */
> +    uint64_t curr_cmd;                   /* Last command arrived             */
> +    uint32_t curr_cmd_data_cntr;      /* Amount of data for last command  */
> +
> +    /* Collector for current command data */
> +    uint32_t curr_cmd_data[PVSCSI_MAX_CMD_DATA_WORDS];
> +
> +    uint8_t rings_info_valid;            /* Whether rings are initialized    */
> +    PVSCSIRingsMgr rings;                /* Data transfer rings manager      */
> +} PVSCSI_State;

The order of the fields could be optimized, there are a lot of
structure holes due to alignment.

> +
> +static void
> +pvscsi_reset_state(PVSCSI_State *s)
> +{
> +    s->curr_cmd = PVSCSI_CMD_FIRST;
> +    s->curr_cmd_data_cntr = 0;
> +    s->reg_command_status = PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
> +    s->reg_interrupt_status = 0;
> +    pvscsi_rings_mgr_cleanup(&s->rings);
> +    s->rings_info_valid = FALSE;
> +    QTAILQ_INIT(&s->pending_queue);
> +    QTAILQ_INIT(&s->completion_queue);
> +}
> +
> +typedef struct PVSCSISGState {
> +    target_phys_addr_t elemAddr;
> +    target_phys_addr_t dataAddr;
> +    uint32_t resid;
> +} PVSCSISGState;
> +
> +typedef struct PVSCSIRequest {
> +    SCSIRequest *sreq;
> +    uint8_t sense_key;
> +    uint8_t completed;
> +    int lun;
> +    QEMUSGList sgl;
> +    PVSCSISGState sg;
> +    struct PVSCSIRingReqDesc req;
> +    struct PVSCSIRingCmpDesc cmp;
> +    QTAILQ_ENTRY(PVSCSIRequest) next;
> +} PVSCSIRequest;
> +
> +static void
> +pvscsi_free_queue(PVSCSIRequestList *req_list)
> +{
> +    PVSCSIRequest *pvscsi_req;
> +
> +    while (!QTAILQ_EMPTY(req_list)) {
> +        pvscsi_req = QTAILQ_FIRST(req_list);
> +        QTAILQ_REMOVE(req_list, pvscsi_req, next);
> +        g_free(pvscsi_req);
> +    }
> +}
> +
> +static void
> +pvscsi_reset_adapter(PVSCSI_State *s)
> +{
> +    qbus_reset_all_fn(&s->bus);
> +    pvscsi_free_queue(&s->completion_queue);
> +    assert(QTAILQ_EMPTY(&s->pending_queue));
> +    pvscsi_reset_state(s);
> +}
> +
> +static inline void
> +pvscsi_update_irq_status(PVSCSI_State *s)
> +{
> +    int should_raise = !!(s->reg_interrupt_enabled & s->reg_interrupt_status);

bool

> +
> +    DIRPRINTF("Interrupt level set to %d (MASK: 0x%llX, STATUS: 0x%llx)",
> +              should_raise,
> +              (long long unsigned int) s->reg_interrupt_enabled,
> +              (long long unsigned int) s->reg_interrupt_status);

Just use PRIx64 and remove the casts.

> +
> +#ifdef PVSCSI_ENABLE_MSIX
> +    if (s->msix_used && msix_enabled(&s->dev)) {
> +        if (should_raise) {
> +            DIRPRINTF("Sending MSI-X notification");
> +            msix_notify(&s->dev, PVSCSI_VECTOR_COMPLETION);
> +        }
> +        return;
> +    }
> +#endif
> +
> +    if (s->msi_used && msi_enabled(&s->dev)) {
> +        if (should_raise) {
> +            DIRPRINTF("Sending MSI notification");
> +            msi_notify(&s->dev, PVSCSI_VECTOR_COMPLETION);
> +        }
> +        return;
> +    }
> +
> +    qemu_set_irq(s->dev.irq[0], should_raise);
> +}
> +
> +static inline void
> +pvscsi_raise_completion_interrupt(PVSCSI_State *s)
> +{
> +    s->reg_interrupt_status |= PVSCSI_INTR_CMPL_0;
> +
> +    /* Memory barrier to flush interrupt status register changes*/
> +    smp_wmb();
> +
> +    pvscsi_update_irq_status(s);
> +}
> +
> +static void
> +pvscsi_cmp_ring_put(PVSCSI_State *s, struct PVSCSIRingCmpDesc *cmp_desc)
> +{
> +    target_phys_addr_t cmp_descr_pa;
> +
> +    cmp_descr_pa = pvscsi_rings_mgr_pop_cmp_descr(&s->rings);
> +    DRIPRINTF("Got completion descriptor %"PRIx64, cmp_descr_pa);
> +
> +    vmw_shmem_write(cmp_descr_pa, (void *)cmp_desc, sizeof(*cmp_desc));
> +}
> +
> +static void
> +pvscsi_process_completion_queue(void *opaque)
> +{
> +    PVSCSI_State *s = opaque;
> +    PVSCSIRequest *pvscsi_req;
> +    bool has_completed = false;
> +
> +    while (!QTAILQ_EMPTY(&s->completion_queue)) {
> +        pvscsi_req = QTAILQ_FIRST(&s->completion_queue);
> +        QTAILQ_REMOVE(&s->completion_queue, pvscsi_req, next);
> +        pvscsi_cmp_ring_put(s, &pvscsi_req->cmp);
> +        g_free(pvscsi_req);
> +        has_completed++;
> +    }
> +
> +    if (has_completed) {
> +        pvscsi_rings_mgr_flush_cmp_ring(&s->rings);
> +        pvscsi_raise_completion_interrupt(s);
> +    }
> +}
> +
> +static inline void
> +pvscsi_schedule_completion_processing(PVSCSI_State *s)
> +{
> +    /* Try putting more complete requests on the ring. */
> +    if (!QTAILQ_EMPTY(&s->completion_queue)) {
> +        qemu_bh_schedule(s->completion_worker);
> +    }
> +}
> +
> +static inline void
> +pvscsi_complete_request(PVSCSI_State *s, PVSCSIRequest *r)
> +{
> +    assert(!r->completed);
> +    DSXPRINTF("Completion: ctx: %"PRIx64", len: %"PRIx64", sense key: %u",
> +              r->cmp.context, r->cmp.dataLen, r->sense_key);
> +    if (r->sreq != NULL) {
> +        scsi_req_unref(r->sreq);
> +        r->sreq = NULL;
> +    }
> +    r->completed = 1;
> +    QTAILQ_REMOVE(&s->pending_queue, r, next);
> +    QTAILQ_INSERT_TAIL(&s->completion_queue, r, next);
> +    pvscsi_schedule_completion_processing(s);
> +}
> +
> +static QEMUSGList *pvscsi_get_sg_list(SCSIRequest *r)
> +{
> +    PVSCSIRequest *req = r->hba_private;
> +
> +    DSXPRINTF("Get SG list: depth: %u, size: %lu",
> +              req->sgl.nsg, req->sgl.size);
> +
> +    return &req->sgl;
> +}
> +
> +static void
> +pvscsi_get_next_sg_elem(PVSCSISGState *sg)
> +{
> +    struct PVSCSISGElement elem;
> +
> +    for (;; sg->elemAddr = elem.addr) {
> +        vmw_shmem_read(sg->elemAddr, (void *)&elem, sizeof(elem));
> +        if (0 != (elem.flags & ~PVSCSI_KNOWN_FLAGS)) {

The order in the expression is unnatural, please reverse.

> +            /*
> +             * There is PVSCSI_SGE_FLAG_CHAIN_ELEMENT flag described in
> +             * header file but its value is unknown. This flag requires
> +             * additional processing, so we put warning here to catch it
> +             * some day and make proper implementation
> +             */
> +            DWRPRINTF("Unknown flags in SG element (val: 0x%X)", elem.flags);

Lowercase hex, please.

> +        }
> +        break;
> +    }
> +
> +    sg->elemAddr += sizeof(elem);
> +    sg->dataAddr = elem.addr;
> +    sg->resid = elem.length;
> +}
> +
> +static inline void
> +pvscsi_write_sense(PVSCSIRequest *r, uint8_t *sense, int len)
> +{
> +    r->cmp.senseLen = MIN(r->req.senseLen, len);
> +    r->sense_key = sense[2];
> +    vmw_shmem_write(r->req.senseAddr, sense, r->cmp.senseLen);
> +}
> +
> +static void
> +pvscsi_command_complete(SCSIRequest *req, uint32_t status, size_t resid)
> +{
> +    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev.qdev, req->bus->qbus.parent);
> +    PVSCSIRequest *pvscsi_req = req->hba_private;
> +
> +    if (!pvscsi_req) {
> +        DERPRINTF("PVSCSI: Can't find request for tag 0x%x", req->tag);
> +        return;
> +    }
> +
> +    if (resid) {
> +        /* Short transfer.  */
> +        DSXPRINTF("Not all data required for command transferred.");
> +        pvscsi_req->cmp.hostStatus = BTSTAT_DATARUN;
> +    }
> +
> +    pvscsi_req->cmp.scsiStatus = status;
> +    if (CHECK_CONDITION == pvscsi_req->cmp.scsiStatus) {

Order

> +        uint8_t sense[SCSI_SENSE_BUF_SIZE];
> +        int sense_len =
> +            scsi_req_get_sense(pvscsi_req->sreq, sense, sizeof(sense));
> +
> +        DSXPRINTF("Sense information length is %d bytes", sense_len);
> +        pvscsi_write_sense(pvscsi_req, sense, sense_len);
> +    }
> +    qemu_sglist_destroy(&pvscsi_req->sgl);
> +    pvscsi_complete_request(s, pvscsi_req);
> +}
> +
> +static void
> +pvscsi_request_cancelled(SCSIRequest *req)
> +{
> +    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev.qdev, req->bus->qbus.parent);
> +    PVSCSIRequest *pvscsi_req = req->hba_private;
> +
> +    if (BTSTAT_SUCCESS == pvscsi_req->cmp.hostStatus) {
> +        pvscsi_req->cmp.hostStatus = BTSTAT_ABORTQUEUE;
> +    }
> +    pvscsi_complete_request(s, pvscsi_req);
> +}
> +
> +static inline SCSIDevice*
> +pvscsi_device_find(PVSCSI_State *s, int channel, int target,
> +                   uint8_t *requested_lun, uint8_t *target_lun)
> +{
> +    if (requested_lun[0] || requested_lun[2] || requested_lun[3] ||
> +        requested_lun[4] || requested_lun[5] || requested_lun[6] ||
> +        requested_lun[7] || (PVSCSI_MAX_DEVS < target)) {
> +        return NULL;
> +    } else {
> +        *target_lun = requested_lun[1];
> +        return scsi_device_find(&s->bus, channel, target, *target_lun);
> +    }
> +}
> +
> +static PVSCSIRequest *
> +pvscsi_queue_pending_descriptor(PVSCSI_State *s, SCSIDevice **d,
> +                                struct PVSCSIRingReqDesc *descr)
> +{
> +    PVSCSIRequest *pvscsi_req;
> +    uint8_t lun;
> +
> +    pvscsi_req = g_malloc0(sizeof(*pvscsi_req));
> +    pvscsi_req->req = *descr;
> +    pvscsi_req->cmp.context = pvscsi_req->req.context;
> +    QTAILQ_INSERT_TAIL(&s->pending_queue, pvscsi_req, next);
> +
> +    *d = pvscsi_device_find(s, descr->bus, descr->target, descr->lun, &lun);
> +    if (!*d) {
> +        return pvscsi_req;
> +    }
> +
> +    pvscsi_req->lun = lun;
> +    return pvscsi_req;
> +}
> +
> +static void
> +pvscsi_convert_sglist(PVSCSIRequest *r)
> +{
> +    int chunk_size;
> +    uint64_t data_length = r->req.dataLen;
> +    PVSCSISGState sg = r->sg;
> +    while (data_length) {
> +        while (!sg.resid) {
> +            pvscsi_get_next_sg_elem(&sg);
> +            DCMPRINTF("Element: ctx: %"PRIx64", addr: %"PRIx64", len: 0x%ul",
> +                      r->req.context, (uint64_t) r->sg.dataAddr, r->sg.resid);

TARGET_FMT_plx? Didn't check the type.

> +        }
> +        assert(data_length > 0);
> +        chunk_size = MIN((unsigned) data_length, sg.resid);
> +        if (chunk_size) {
> +            qemu_sglist_add(&r->sgl, sg.dataAddr, chunk_size);
> +        }
> +
> +        sg.dataAddr += chunk_size;
> +        data_length -= chunk_size;
> +        sg.resid -= chunk_size;
> +    }
> +}
> +
> +static inline void
> +pvscsi_build_sglist(PVSCSIRequest *r)
> +{
> +    qemu_sglist_init(&r->sgl, 1, NULL);
> +    if (r->req.flags & PVSCSI_FLAG_CMD_WITH_SG_LIST) {
> +        pvscsi_convert_sglist(r);
> +    } else {
> +        qemu_sglist_add(&r->sgl, r->req.dataAddr, r->req.dataLen);
> +    }
> +}
> +
> +static void
> +pvscsi_process_request_descriptor(PVSCSI_State *s,
> +                                  struct PVSCSIRingReqDesc *descr)
> +{
> +    SCSIDevice *d;
> +    PVSCSIRequest *r = pvscsi_queue_pending_descriptor(s, &d, descr);
> +    int64_t n;
> +
> +    DSXPRINTF("SCSI cmd 0x%X, ctx: %" PRIx64, descr->cdb[0], descr->context);
> +
> +    if (!d) {
> +        r->cmp.hostStatus = BTSTAT_SELTIMEO;
> +        DSXPRINTF("Command directed to unknown device rejected.");
> +        pvscsi_complete_request(s, r);
> +        return;
> +    }
> +
> +    if (descr->flags & PVSCSI_FLAG_CMD_WITH_SG_LIST) {
> +        r->sg.elemAddr = descr->dataAddr;
> +    }
> +
> +    r->sreq = scsi_req_new(d, descr->context, r->lun, descr->cdb, r);
> +    if (r->sreq->cmd.mode == SCSI_XFER_FROM_DEV &&
> +        (descr->flags & PVSCSI_FLAG_CMD_DIR_TODEVICE)) {
> +        r->cmp.hostStatus = BTSTAT_BADMSG;
> +        DSXPRINTF("Command with invalid transfer direction rejected.");
> +        scsi_req_cancel(r->sreq);
> +        return;
> +    }
> +    if (r->sreq->cmd.mode == SCSI_XFER_TO_DEV &&
> +        (descr->flags & PVSCSI_FLAG_CMD_DIR_TOHOST)) {
> +        r->cmp.hostStatus = BTSTAT_BADMSG;
> +        DSXPRINTF("Command with invalid transfer direction rejected.");
> +        scsi_req_cancel(r->sreq);
> +        return;
> +    }
> +
> +    pvscsi_build_sglist(r);
> +    n = scsi_req_enqueue(r->sreq);
> +
> +    if (n) {
> +        scsi_req_continue(r->sreq);
> +    }
> +}
> +
> +static void
> +pvscsi_process_io(PVSCSI_State *s)
> +{
> +    PVSCSIRingReqDesc descr;
> +    target_phys_addr_t next_descr_pa;
> +
> +    assert(s->rings_info_valid);
> +    while (0 != (next_descr_pa = pvscsi_rings_mgr_pop_req_descr(&s->rings))) {

Order

> +        DRIPRINTF("Got descriptor %"PRIx64, next_descr_pa);
> +        vmw_shmem_read(next_descr_pa, &descr, sizeof(descr));
> +        pvscsi_process_request_descriptor(s, &descr);
> +    }
> +
> +    pvscsi_rings_mgr_flush_req_ring(&s->rings);
> +}
> +
> +static void
> +pvscsi_dbg_dump_tx_rings_config(struct PVSCSICmdDescSetupRings *rc)
> +{
> +#ifdef DEBUG_PVSCSI_RINGS
> +    int i;
> +    DRIPRINTF("TX Rings configuration received:");

Since the code is surrounded by #ifdeffery, you can just use fprintf.
With tracepoints, the #ifdeffery could be removed.

> +    DRIPRINTF("Rings state page: %"PRIx64":", rc->ringsStatePPN);
> +
> +    DRIPRINTF("Request ring pages: %u:", rc->reqRingNumPages);
> +    for (i = 0; i < rc->reqRingNumPages; i++) {
> +        DRIPRINTF("\t%"PRIx64" ", rc->reqRingPPNs[i]);
> +    }
> +
> +    DRIPRINTF("Confirm ring pages: %u:", rc->cmpRingNumPages);
> +    for (i = 0; i < rc->cmpRingNumPages; i++) {
> +        DRIPRINTF("\t%"PRIx64" ", rc->cmpRingPPNs[i]);
> +    }
> +#endif
> +}
> +
> +static uint64_t
> +pvscsi_on_cmd_config(PVSCSI_State *s)
> +{
> +    DWRPRINTF("Unimplemented command PVSCSI_CMD_CONFIG ignored.");
> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
> +}
> +
> +static uint64_t
> +pvscsi_on_cmd_unplug(PVSCSI_State *s)
> +{
> +    DWRPRINTF("Unimplemented command PVSCSI_CMD_DEVICE_UNPLUG ignored.");
> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
> +}
> +
> +static uint64_t
> +pvscsi_on_issue_scsi(PVSCSI_State *s)
> +{
> +    DWRPRINTF("Unimplemented command PVSCSI_CMD_ISSUE_SCSI ignored.");
> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
> +}
> +
> +static uint64_t
> +pvscsi_on_cmd_setup_rings(PVSCSI_State *s)
> +{
> +    struct PVSCSICmdDescSetupRings *rc =
> +        (struct PVSCSICmdDescSetupRings *) s->curr_cmd_data;
> +
> +    DCMPRINTF("Command PVSCSI_CMD_SETUP_RINGS arrived");
> +
> +    pvscsi_dbg_dump_tx_rings_config(rc);
> +    pvscsi_rings_mgr_init(&s->rings, rc);
> +    s->rings_info_valid = TRUE;
> +    return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
> +}
> +
> +static uint64_t
> +pvscsi_on_cmd_abort(PVSCSI_State *s)
> +{
> +#ifdef DEBUG_PVSCSI_COMMANDS
> +    struct PVSCSICmdDescAbortCmd *data =
> +        (struct PVSCSICmdDescAbortCmd *) s->curr_cmd_data;
> +
> +    DCMPRINTF("PVSCSI_CMD_ABORT_CMD for ctx %"PRIx64", target 0x%X",
> +              data->context, data->target);
> +#endif
> +
> +    return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
> +}
> +
> +static uint64_t
> +pvscsi_on_cmd_unknown(PVSCSI_State *s)
> +{
> +    DWRPRINTF("Data for unknown command : 0x%X", s->curr_cmd_data[0]);
> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
> +}
> +
> +static uint64_t
> +pvscsi_on_cmd_reset_device(PVSCSI_State *s)
> +{
> +    uint8_t target_lun = 0;
> +    struct PVSCSICmdDescResetDevice *cmd =
> +        (struct PVSCSICmdDescResetDevice *) s->curr_cmd_data;
> +    SCSIDevice *sdev;
> +
> +    sdev = pvscsi_device_find(s, 0, cmd->target, cmd->lun, &target_lun);
> +
> +    DWRPRINTF("PVSCSI_CMD_RESET_DEVICE[target 0x%u lun 0x%d (dev 0x%p)]",
> +              cmd->target, (int) target_lun, sdev);
> +
> +    if (sdev != NULL) {
> +        device_reset(&sdev->qdev);
> +        return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
> +    }
> +
> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
> +}
> +
> +static uint64_t
> +pvscsi_on_cmd_reset_bus(PVSCSI_State *s)
> +{
> +    DCMPRINTF("Command PVSCSI_CMD_RESET_BUS arrived");
> +
> +    qbus_reset_all_fn(&s->bus);
> +    return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
> +}
> +
> +static uint64_t
> +pvscsi_on_cmd_setup_msg_ring(PVSCSI_State *s)
> +{
> +    /* No message ring needed for HDD devices */
> +    DCMPRINTF("Command PVSCSI_CMD_SETUP_MSG_RING arrived");
> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
> +}
> +
> +static uint64_t
> +pvscsi_on_cmd_adapter_reset(PVSCSI_State *s)
> +{
> +    /* No message ring needed for HDD devices */
> +    DCMPRINTF("Command PVSCSI_CMD_ADAPTER_RESET arrived");
> +
> +    pvscsi_reset_adapter(s);
> +    return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
> +}
> +
> +struct {

'static const'

> +    int       data_size;
> +    uint64_t  (*handler_fn)(PVSCSI_State *s);
> +} pvscsi_commands[] = {
> +    [PVSCSI_CMD_FIRST] = {
> +        .data_size = 0,
> +        .handler_fn = pvscsi_on_cmd_unknown

Comma at the end of line is more friendly for future patches.

> +    },
> +
> +    /* Not implemented, data size defined based on what arrives on windows */
> +    [PVSCSI_CMD_CONFIG] = {
> +        .data_size = 6*sizeof(uint32_t),

Missing spaces around '*'. Please use checkpatch.pl to catch these.

> +        .handler_fn = pvscsi_on_cmd_config
> +    },
> +
> +    /* Command not implemented, data size is unknown */
> +    [PVSCSI_CMD_ISSUE_SCSI] = {
> +        .data_size = 0,
> +        .handler_fn = pvscsi_on_issue_scsi
> +    },
> +
> +    /* Command not implemented, data size is unknown */
> +    [PVSCSI_CMD_DEVICE_UNPLUG] = {
> +        .data_size = 0,
> +        .handler_fn = pvscsi_on_cmd_unplug
> +    },
> +
> +    [PVSCSI_CMD_SETUP_RINGS] = {
> +        .data_size = sizeof(struct PVSCSICmdDescSetupRings),
> +        .handler_fn = pvscsi_on_cmd_setup_rings
> +    },
> +
> +    [PVSCSI_CMD_RESET_DEVICE] = {
> +        .data_size = sizeof(struct PVSCSICmdDescResetDevice),
> +        .handler_fn = pvscsi_on_cmd_reset_device
> +    },
> +
> +    [PVSCSI_CMD_RESET_BUS] = {
> +        .data_size = 0,
> +        .handler_fn = pvscsi_on_cmd_reset_bus
> +    },
> +
> +    [PVSCSI_CMD_SETUP_MSG_RING] = {
> +        .data_size = 0,
> +        .handler_fn = pvscsi_on_cmd_setup_msg_ring
> +    },
> +
> +    [PVSCSI_CMD_ADAPTER_RESET] = {
> +        .data_size = 0,
> +        .handler_fn = pvscsi_on_cmd_adapter_reset
> +    },
> +
> +    [PVSCSI_CMD_ABORT_CMD] = {
> +        .data_size = sizeof(struct PVSCSICmdDescAbortCmd),
> +        .handler_fn = pvscsi_on_cmd_abort
> +    }
> +};
> +
> +static void
> +pvscsi_do_command_processing(PVSCSI_State *s)
> +{
> +    size_t bytes_arrived = s->curr_cmd_data_cntr*sizeof(uint32_t);

Spacing.

> +
> +    if (bytes_arrived >= pvscsi_commands[s->curr_cmd].data_size) {
> +        s->reg_command_status = pvscsi_commands[s->curr_cmd].handler_fn(s);
> +        s->curr_cmd = PVSCSI_CMD_FIRST;
> +        s->curr_cmd_data_cntr   = 0;
> +    }
> +}
> +
> +static void
> +pvscsi_on_command_data(PVSCSI_State *s, uint32_t value)
> +{
> +    size_t bytes_arrived = s->curr_cmd_data_cntr*sizeof(uint32_t);
> +
> +    assert(bytes_arrived < sizeof(s->curr_cmd_data));
> +    s->curr_cmd_data[s->curr_cmd_data_cntr++] = value;
> +
> +    pvscsi_do_command_processing(s);
> +}
> +
> +static void
> +pvscsi_on_command(PVSCSI_State *s, uint64_t cmd_id)
> +{
> +    if ((cmd_id > PVSCSI_CMD_FIRST) && (cmd_id < PVSCSI_CMD_LAST)) {
> +        s->curr_cmd = cmd_id;
> +    } else {
> +        s->curr_cmd = PVSCSI_CMD_FIRST;
> +        DWRPRINTF("Unknown command %"PRIx64, cmd_id);
> +    }
> +
> +    s->curr_cmd_data_cntr = 0;
> +    s->reg_command_status = PVSCSI_COMMAND_PROCESSING_FAILED;
> +
> +    pvscsi_do_command_processing(s);
> +}
> +
> +static void
> +pvscsi_io_write(void *opaque, target_phys_addr_t addr,
> +                uint64_t val, unsigned size)
> +{
> +    PVSCSI_State *s = opaque;
> +
> +    switch (addr) {
> +    case PVSCSI_REG_OFFSET_COMMAND:
> +        pvscsi_on_command(s, val);
> +        break;
> +
> +    case PVSCSI_REG_OFFSET_COMMAND_DATA:
> +        pvscsi_on_command_data(s, (uint32_t) val);
> +        break;
> +
> +    case PVSCSI_REG_OFFSET_INTR_STATUS:
> +        DCBPRINTF("PVSCSI_REG_OFFSET_INTR_STATUS write: %"PRIx64, val);
> +        s->reg_interrupt_status &= ~val;
> +        pvscsi_update_irq_status(s);
> +        pvscsi_schedule_completion_processing(s);
> +        break;
> +
> +    case PVSCSI_REG_OFFSET_INTR_MASK:
> +        DCBPRINTF("PVSCSI_REG_OFFSET_INTR_MASK write: %"PRIx64, val);
> +        s->reg_interrupt_enabled = val;
> +        pvscsi_update_irq_status(s);
> +        break;
> +
> +    case PVSCSI_REG_OFFSET_KICK_NON_RW_IO:
> +        DCBPRINTF("PVSCSI_REG_OFFSET_KICK_NON_RW_IO write: %"PRIx64, val);
> +        pvscsi_process_io(s);
> +        break;
> +
> +    case PVSCSI_REG_OFFSET_KICK_RW_IO:
> +        DCBPRINTF("PVSCSI_REG_OFFSET_KICK_RW_IO write: %"PRIx64, val);
> +        pvscsi_process_io(s);
> +        break;
> +
> +    case PVSCSI_REG_OFFSET_DEBUG:
> +        DCBPRINTF("PVSCSI_REG_OFFSET_DEBUG write: %"PRIx64, val);
> +        break;
> +
> +    default:
> +        DWRPRINTF("Unknown write: %" PRIx64 "%u bytes, val %" PRIx64,
> +                  (uint64_t) addr, size, val);

TARGET_FMT_plx

> +        break;
> +    }
> +
> +}
> +
> +static uint64_t
> +pvscsi_io_read(void *opaque, target_phys_addr_t addr, unsigned size)
> +{
> +    PVSCSI_State *s = opaque;
> +
> +    switch (addr) {
> +    case PVSCSI_REG_OFFSET_INTR_STATUS:
> +        DCBPRINTF("PVSCSI_REG_OFFSET_INTR_STATUS read: %"PRIx64,
> +                  s->reg_interrupt_status);
> +        return s->reg_interrupt_status;
> +
> +    case PVSCSI_REG_OFFSET_INTR_MASK:
> +        DCBPRINTF("PVSCSI_REG_OFFSET_INTR_MASK read: %"PRIx64,
> +                  s->reg_interrupt_enabled);
> +        return s->reg_interrupt_enabled;
> +
> +    case PVSCSI_REG_OFFSET_COMMAND_STATUS:
> +        DCBPRINTF("PVSCSI_REG_OFFSET_COMMAND_STATUS read: %"PRIx64,
> +                  s->reg_command_status);
> +        return s->reg_command_status;
> +
> +    default:
> +        DWRPRINTF("Unknown read, address %" PRIx64 ", %d bytes",
> +                  (uint64_t) addr, size);

Ditto

> +        return 0;
> +    }
> +}
> +
> +#ifdef PVSCSI_ENABLE_MSIX
> +static bool
> +pvscsi_init_msix(PVSCSI_State *s)
> +{
> +    int res;
> +    memory_region_init(&s->msix_space, "pvscsi-msix", PAGE_SIZE);
> +    res = msix_init(&s->dev, PVSCSI_MSIX_NUM_VECTORS, &s->msix_space, 1, 0);
> +    if (0 > res) {

Unnatural order again, also below.

> +        DWRPRINTF("Failed to initialize MSI-X, error %d", res);
> +        memory_region_destroy(&s->msix_space);
> +        s->msix_used = false;
> +    } else {
> +        res = msix_vector_use(&s->dev, PVSCSI_VECTOR_COMPLETION);
> +        if (0 > res) {
> +            DWRPRINTF("Failed to use MSI-X vector, error %d", res);
> +            msix_uninit(&s->dev, &s->msix_space);
> +            memory_region_destroy(&s->msix_space);
> +            s->msix_used = false;
> +        } else {
> +            pci_register_bar(&s->dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY,
> +                             &s->msix_space);
> +            s->msix_used = true;
> +        }
> +    }

How about moving the #ifdef lower and add here
#else
  s->msix_used = false;
#endif

> +    return s->msix_used;
> +}
> +
> +static void
> +pvscsi_cleanup_msix(PVSCSI_State *s)
> +{

and here #ifdef to make the function empty on !msix?

> +    if (s->msix_used) {
> +        msix_vector_unuse(&s->dev, PVSCSI_VECTOR_COMPLETION);
> +        msix_uninit(&s->dev, &s->msix_space);
> +        memory_region_destroy(&s->msix_space);
> +    }
> +}
> +#endif
> +
> +static bool
> +pvscsi_init_msi(PVSCSI_State *s)
> +{
> +#define PVSCSI_MSI_OFFSET        (0x50)
> +#define PVSCSI_USE_64BIT         (true)
> +#define PVSCSI_PER_VECTOR_MASK   (false)

#defines should go to top of the file.

> +
> +    int res;
> +    res = msi_init(&s->dev, PVSCSI_MSI_OFFSET, PVSCSI_MSIX_NUM_VECTORS,
> +                   PVSCSI_USE_64BIT, PVSCSI_PER_VECTOR_MASK);
> +    if (0 > res) {

Order

> +        DWRPRINTF("Failed to initialize MSI, error %d", res);
> +        s->msi_used = false;
> +    } else {
> +        s->msi_used = true;
> +    }
> +
> +    return s->msi_used;
> +}
> +
> +static void
> +pvscsi_cleanup_msi(PVSCSI_State *s)
> +{
> +    if (s->msi_used) {
> +        msi_uninit(&s->dev);
> +    }
> +}
> +
> +#ifdef PVSCSI_ENABLE_MSIX
> +
> +static void
> +pvscsi_msix_save(QEMUFile *f, void *opaque)
> +{
> +    msix_save(&((PVSCSI_State *)opaque)->dev, f);
> +}
> +
> +static int
> +pvscsi_msix_load(QEMUFile *f, void *opaque, int version_id)
> +{
> +    msix_load(&((PVSCSI_State *)opaque)->dev, f);
> +    return 0;
> +}
> +
> +#endif
> +
> +static int
> +pvscsi_init(PCIDevice *dev)
> +{
> +    static const MemoryRegionOps pv_scsi_ops = {
> +            .read = pvscsi_io_read,
> +            .write = pvscsi_io_write,
> +            .endianness = DEVICE_NATIVE_ENDIAN,
> +            .impl = {
> +                    .min_access_size = 4,
> +                    .max_access_size = 4,
> +            },
> +    };
> +
> +    static const struct SCSIBusInfo pvscsi_scsi_info = {
> +            .tcq = true,
> +            .max_target = PVSCSI_MAX_DEVS,
> +            .max_channel = 0,
> +            .max_lun = 0,
> +
> +            .get_sg_list = pvscsi_get_sg_list,
> +            .complete = pvscsi_command_complete,
> +            .cancel = pvscsi_request_cancelled

comma

> +    };
> +
> +    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev, dev);
> +
> +    DCBPRINTF("Starting init...");
> +
> +    /* PCI subsystem ID */
> +    s->dev.config[PCI_SUBSYSTEM_ID] = 0x00;
> +    s->dev.config[PCI_SUBSYSTEM_ID + 1] = 0x10;
> +
> +    /* PCI latency timer = 255 */
> +    s->dev.config[PCI_LATENCY_TIMER] = 0xff;
> +
> +    /* Interrupt pin A */
> +    s->dev.config[PCI_INTERRUPT_PIN] = 0x01;
> +
> +    memory_region_init_io(&s->io_space, &pv_scsi_ops, s,
> +                          "pvscsi-io", PVSCSI_MEM_SPACE_SIZE);
> +    pci_register_bar(&s->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->io_space);
> +
> +#ifdef PVSCSI_ENABLE_MSIX
> +    register_savevm(&dev->qdev, "pvscsi-msix", -1, 1,
> +                    pvscsi_msix_save, pvscsi_msix_load, s);
> +
> +    if (!pvscsi_init_msix(s)) {
> +        DWRPRINTF("Failed to initialize MSI-X.");
> +    }
> +#endif

With the adjusted #ifdeffery for the functions themselves, this code
could be enabled always.

> +    if (!pvscsi_init_msi(s)) {
> +        DWRPRINTF("Failed to initialize MSI.");
> +    }
> +
> +    s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s);
> +    if (!s->completion_worker) {
> +        pvscsi_cleanup_msi(s);
> +#ifdef PVSCSI_ENABLE_MSIX
> +        pvscsi_cleanup_msix(s);
> +#endif
> +        memory_region_destroy(&s->io_space);
> +        return -ENOMEM;
> +    }
> +
> +    scsi_bus_new(&s->bus, &dev->qdev, &pvscsi_scsi_info);
> +    pvscsi_reset_state(s);
> +
> +    return 0;
> +}
> +
> +static int
> +pvscsi_uninit(PCIDevice *dev)
> +{
> +    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev, dev);
> +
> +    DCBPRINTF("Starting uninit...");
> +    qemu_bh_delete(s->completion_worker);
> +
> +#ifdef PVSCSI_ENABLE_MSIX
> +    unregister_savevm(&dev->qdev, "pvscsi-msix", s);
> +    pvscsi_cleanup_msix(s);
> +#endif
> +    pvscsi_cleanup_msi(s);
> +
> +    memory_region_destroy(&s->io_space);
> +
> +    return 0;
> +}
> +
> +static void
> +pvscsi_reset(DeviceState *dev)
> +{
> +    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev.qdev, dev);
> +    DCBPRINTF("Starting reset...");
> +    pvscsi_reset_adapter(s);
> +}
> +
> +static void
> +pvscsi_pre_save(void *opaque)
> +{
> +    PVSCSI_State *s = (PVSCSI_State *) opaque;
> +
> +    DCBPRINTF("Starting presave...");
> +
> +    assert(QTAILQ_EMPTY(&s->pending_queue));
> +    assert(QTAILQ_EMPTY(&s->completion_queue));
> +}
> +
> +static int
> +pvscsi_post_load(void *opaque, int version_id)
> +{
> +#ifdef PVSCSI_ENABLE_MSIX
> +    PVSCSI_State *s = (PVSCSI_State *) opaque;

Useless cast.

> +#endif
> +    DCBPRINTF("Starting postload...");
> +
> +#ifdef PVSCSI_ENABLE_MSIX
> +    if (s->msix_used) {
> +        if (!msix_vector_use(&s->dev, PVSCSI_VECTOR_COMPLETION)) {
> +            DERPRINTF("Failed to re-use MSI-X vectors");
> +            return -1;
> +        }
> +    }
> +#endif
> +    return 0;
> +}
> +
> +static VMStateDescription vmstate_pvscsi = {
> +        .name = "pvscsi",
> +        .version_id = 0,
> +        .minimum_version_id = 0,
> +        .minimum_version_id_old = 0,
> +        .pre_save = pvscsi_pre_save,
> +        .post_load = pvscsi_post_load,
> +        .fields      = (VMStateField[]) {
> +                VMSTATE_PCI_DEVICE(dev, PVSCSI_State),
> +#ifdef PVSCSI_ENABLE_MSIX
> +                VMSTATE_UINT8(msix_used, PVSCSI_State),
> +#endif
> +                VMSTATE_UINT8(msi_used, PVSCSI_State),
> +                VMSTATE_UINT64(reg_interrupt_status, PVSCSI_State),
> +                VMSTATE_UINT64(reg_interrupt_enabled, PVSCSI_State),
> +                VMSTATE_UINT64(reg_command_status, PVSCSI_State),
> +                VMSTATE_UINT64(curr_cmd, PVSCSI_State),
> +                VMSTATE_UINT32(curr_cmd_data_cntr, PVSCSI_State),
> +                VMSTATE_UINT32_ARRAY(curr_cmd_data, PVSCSI_State,
> +                    ARRAY_SIZE(((PVSCSI_State *)NULL)->curr_cmd_data)),
> +                VMSTATE_UINT8(rings_info_valid, PVSCSI_State),
> +
> +                VMSTATE_UINT64(rings.rs_pa, PVSCSI_State),
> +                VMSTATE_UINT32(rings.txr_len_mask, PVSCSI_State),
> +                VMSTATE_UINT32(rings.rxr_len_mask, PVSCSI_State),
> +                VMSTATE_UINT64_ARRAY(rings.req_ring_pages_pa, PVSCSI_State,
> +                    PVSCSI_SETUP_RINGS_MAX_NUM_PAGES),
> +                VMSTATE_UINT64_ARRAY(rings.cmp_ring_pages_pa, PVSCSI_State,
> +                    PVSCSI_SETUP_RINGS_MAX_NUM_PAGES),
> +                VMSTATE_UINT64(rings.consumed_ptr, PVSCSI_State),
> +                VMSTATE_UINT64(rings.filled_cmp_ptr, PVSCSI_State),
> +
> +                VMSTATE_END_OF_LIST()
> +            }
> +};
> +
> +static void
> +pvscsi_write_config(PCIDevice *pci, uint32_t addr, uint32_t val, int len)
> +{
> +    pci_default_write_config(pci, addr, val, len);
> +    msi_write_config(pci, addr, val, len);
> +#if defined(PVSCSI_ENABLE_MSIX)
> +    msix_write_config(pci, addr, val, len);
> +#endif
> +}
> +
> +static void pvscsi_class_init(ObjectClass *klass, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
> +
> +    k->init = pvscsi_init;
> +    k->exit = pvscsi_uninit;
> +    k->vendor_id = PCI_VENDOR_ID_VMWARE;
> +    k->device_id = PCI_DEVICE_ID_VMWARE_PVSCSI;
> +    k->class_id = PCI_CLASS_STORAGE_SCSI;
> +    k->subsystem_id = 0x1000;
> +    dc->reset = pvscsi_reset;
> +    dc->vmsd = &vmstate_pvscsi;
> +    k->config_write = pvscsi_write_config;
> +}
> +
> +static TypeInfo pvscsi_info = {
> +    .name          = "pvscsi",
> +    .parent        = TYPE_PCI_DEVICE,
> +    .instance_size = sizeof(PVSCSI_State),
> +    .class_init    = pvscsi_class_init,
> +};
> +
> +static void
> +pvscsi_register_types(void)
> +{
> +    type_register_static(&pvscsi_info);
> +
> +    DCBPRINTF("PVSCSI QEMU device emulation registered");
> +}
> +
> +type_init(pvscsi_register_types);
> diff --git a/hw/pvscsi.h b/hw/pvscsi.h
> new file mode 100644
> index 0000000..611c549
> --- /dev/null
> +++ b/hw/pvscsi.h
> @@ -0,0 +1,442 @@
> +/*
> + * QEMU VMWARE PVSCSI paravirtual SCSI bus
> + *
> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
> + *
> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> + *
> + * Based on implementation by Paolo Bonzini
> + * http://lists.gnu.org/archive/html/qemu-devel/2011-08/msg00729.html
> + *
> + * Authors:
> + * Paolo Bonzini <pbonzini@redhat.com>
> + * Dmitry Fleytman <dmitry@daynix.com>
> + * Yan Vugenfirer <yvugenfi@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef _PVSCSI_H_
> +#define _PVSCSI_H_

Underscores

> +
> +#define PAGE_SIZE (4096)
> +#define PAGE_SHIFT (12)

These probably conflict with system defines, please rename. Also,
define size using shift.

> +
> +#define PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT 128
> +
> +#define MASK(n)        ((1 << (n)) - 1)        /* make an n-bit mask */
> +
> +/*
> + * Following is an interface definition for
> + * PVSCSI device as provided by VMWARE
> + * See original copyright from Linux kernel v3.2.8
> + * header file drivers/scsi/vmw_pvscsi.h below.
> + */
> +
> + /*
> +  * VMware PVSCSI header file
> +  *
> +  * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
> +  *
> +  * This program is free software; you can redistribute it and/or modify it
> +  * under the terms of the GNU General Public License as published by the
> +  * Free Software Foundation; version 2 of the License and no later version.

GPLv2only conflicts with GPLv2+.

> +  *
> +  * This program is distributed in the hope that it will be useful, but
> +  * WITHOUT ANY WARRANTY; without even the implied warranty of
> +  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
> +  * NON INFRINGEMENT.  See the GNU General Public License for more
> +  * details.
> +  *
> +  * You should have received a copy of the GNU General Public License
> +  * along with this program; if not, write to the Free Software
> +  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

Web version, please.

> +  *
> +  * Maintained by: Alok N Kataria <address@hidden>

<address@hidden>?

> +  *
> +  */
> +
> +/*
> + * host adapter status/error codes
> + */
> +enum HostBusAdapterStatus {
> +   BTSTAT_SUCCESS       = 0x00,  /* CCB complete normally with no errors */
> +   BTSTAT_LINKED_COMMAND_COMPLETED           = 0x0a,
> +   BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG = 0x0b,
> +   BTSTAT_DATA_UNDERRUN = 0x0c,
> +   BTSTAT_SELTIMEO      = 0x11,  /* SCSI selection timeout */
> +   BTSTAT_DATARUN       = 0x12,  /* data overrun/underrun */
> +   BTSTAT_BUSFREE       = 0x13,  /* unexpected bus free */
> +   BTSTAT_INVPHASE      = 0x14,  /* invalid bus phase or sequence */
> +                                 /* requested by target           */
> +   BTSTAT_LUNMISMATCH   = 0x17,  /* linked CCB has different LUN  */
> +                                 /* from first CCB                */
> +   BTSTAT_SENSFAILED    = 0x1b,  /* auto request sense failed */
> +   BTSTAT_TAGREJECT     = 0x1c,  /* SCSI II tagged queueing message */
> +                                 /* rejected by target              */
> +   BTSTAT_BADMSG        = 0x1d,  /* unsupported message received by */
> +                                 /* the host adapter                */
> +   BTSTAT_HAHARDWARE    = 0x20,  /* host adapter hardware failed */
> +   BTSTAT_NORESPONSE    = 0x21,  /* target did not respond to SCSI ATN, */
> +                                 /* sent a SCSI RST                     */
> +   BTSTAT_SENTRST       = 0x22,  /* host adapter asserted a SCSI RST */
> +   BTSTAT_RECVRST       = 0x23,  /* other SCSI devices asserted a SCSI RST */
> +   BTSTAT_DISCONNECT    = 0x24,  /* target device reconnected improperly */
> +                                 /* (w/o tag)                            */
> +   BTSTAT_BUSRESET      = 0x25,  /* host adapter issued BUS device reset */
> +   BTSTAT_ABORTQUEUE    = 0x26,  /* abort queue generated */
> +   BTSTAT_HASOFTWARE    = 0x27,  /* host adapter software error */
> +   BTSTAT_HATIMEOUT     = 0x30,  /* host adapter hardware timeout error */
> +   BTSTAT_SCSIPARITY    = 0x34,  /* SCSI parity error detected */
> +};
> +
> +/*
> + * Register offsets.
> + *
> + * These registers are accessible both via i/o space and mm i/o.
> + */
> +
> +enum PVSCSIRegOffset {
> +    PVSCSI_REG_OFFSET_COMMAND        =    0x0,
> +    PVSCSI_REG_OFFSET_COMMAND_DATA   =    0x4,
> +    PVSCSI_REG_OFFSET_COMMAND_STATUS =    0x8,
> +    PVSCSI_REG_OFFSET_LAST_STS_0     =  0x100,
> +    PVSCSI_REG_OFFSET_LAST_STS_1     =  0x104,
> +    PVSCSI_REG_OFFSET_LAST_STS_2     =  0x108,
> +    PVSCSI_REG_OFFSET_LAST_STS_3     =  0x10c,
> +    PVSCSI_REG_OFFSET_INTR_STATUS    = 0x100c,
> +    PVSCSI_REG_OFFSET_INTR_MASK      = 0x2010,
> +    PVSCSI_REG_OFFSET_KICK_NON_RW_IO = 0x3014,
> +    PVSCSI_REG_OFFSET_DEBUG          = 0x3018,
> +    PVSCSI_REG_OFFSET_KICK_RW_IO     = 0x4018,
> +};
> +
> +/*
> + * Virtual h/w commands.
> + */
> +
> +enum PVSCSICommands {
> +    PVSCSI_CMD_FIRST             = 0, /* has to be first */
> +
> +    PVSCSI_CMD_ADAPTER_RESET     = 1,
> +    PVSCSI_CMD_ISSUE_SCSI        = 2,
> +    PVSCSI_CMD_SETUP_RINGS       = 3,
> +    PVSCSI_CMD_RESET_BUS         = 4,
> +    PVSCSI_CMD_RESET_DEVICE      = 5,
> +    PVSCSI_CMD_ABORT_CMD         = 6,
> +    PVSCSI_CMD_CONFIG            = 7,
> +    PVSCSI_CMD_SETUP_MSG_RING    = 8,
> +    PVSCSI_CMD_DEVICE_UNPLUG     = 9,
> +
> +    PVSCSI_CMD_LAST              = 10  /* has to be last */
> +};
> +
> +#define PVSCSI_COMMAND_PROCESSING_SUCCEEDED   (0)
> +#define PVSCSI_COMMAND_PROCESSING_FAILED     (-1)
> +
> +/*
> + * Command descriptor for PVSCSI_CMD_RESET_DEVICE --
> + */
> +
> +struct PVSCSICmdDescResetDevice {
> +    uint32_t    target;
> +    uint8_t     lun[8];
> +} QEMU_PACKED;

Typedefs missing (also below), please add.

> +
> +/*
> + * Command descriptor for PVSCSI_CMD_ABORT_CMD --
> + *
> + * - currently does not support specifying the LUN.
> + * - _pad should be 0.
> + */
> +
> +struct PVSCSICmdDescAbortCmd {
> +    uint64_t    context;
> +    uint32_t    target;
> +    uint32_t    _pad;
> +} QEMU_PACKED;
> +
> +/*
> + * Command descriptor for PVSCSI_CMD_SETUP_RINGS --
> + *
> + * Notes:
> + * - reqRingNumPages and cmpRingNumPages need to be power of two.
> + * - reqRingNumPages and cmpRingNumPages need to be different from 0,
> + * - reqRingNumPages and cmpRingNumPages need to be inferior to
> + *   PVSCSI_SETUP_RINGS_MAX_NUM_PAGES.
> + */
> +
> +#define PVSCSI_SETUP_RINGS_MAX_NUM_PAGES        32
> +struct PVSCSICmdDescSetupRings {
> +    uint32_t    reqRingNumPages;
> +    uint32_t    cmpRingNumPages;
> +    uint64_t    ringsStatePPN;
> +    uint64_t    reqRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
> +    uint64_t    cmpRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
> +} QEMU_PACKED;
> +
> +/*
> + * Command descriptor for PVSCSI_CMD_SETUP_MSG_RING --
> + *
> + * Notes:
> + * - this command was not supported in the initial revision of the h/w
> + *   interface. Before using it, you need to check that it is supported by
> + *   writing PVSCSI_CMD_SETUP_MSG_RING to the 'command' register, then
> + *   immediately after read the 'command status' register:
> + *       * a value of -1 means that the cmd is NOT supported,
> + *       * a value != -1 means that the cmd IS supported.
> + *   If it's supported the 'command status' register should return:
> + *      sizeof(PVSCSICmdDescSetupMsgRing) / sizeof(uint32_t).
> + * - this command should be issued _after_ the usual SETUP_RINGS so that the
> + *   RingsState page is already setup. If not, the command is a nop.
> + * - numPages needs to be a power of two,
> + * - numPages needs to be different from 0,
> + * - _pad should be zero.
> + */
> +
> +#define PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES  16
> +
> +struct PVSCSICmdDescSetupMsgRing {
> +    uint32_t    numPages;
> +    uint32_t    _pad;

Underscore

> +    uint64_t    ringPPNs[PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES];
> +} QEMU_PACKED;
> +
> +enum PVSCSIMsgType {
> +    PVSCSI_MSG_DEV_ADDED          = 0,
> +    PVSCSI_MSG_DEV_REMOVED        = 1,
> +    PVSCSI_MSG_LAST               = 2,
> +};
> +
> +/*
> + * Msg descriptor.
> + *
> + * sizeof(struct PVSCSIRingMsgDesc) == 128.
> + *
> + * - type is of type enum PVSCSIMsgType.
> + * - the content of args depend on the type of event being delivered.
> + */
> +
> +struct PVSCSIRingMsgDesc {
> +    uint32_t    type;
> +    uint32_t    args[31];
> +} QEMU_PACKED;
> +
> +struct PVSCSIMsgDescDevStatusChanged {
> +    uint32_t    type;  /* PVSCSI_MSG_DEV _ADDED / _REMOVED */
> +    uint32_t    bus;
> +    uint32_t    target;
> +    uint8_t     lun[8];
> +    uint32_t    pad[27];
> +} QEMU_PACKED;
> +
> +/*
> + * Rings state.
> + *
> + * - the fields:
> + *    . msgProdIdx,
> + *    . msgConsIdx,
> + *    . msgNumEntriesLog2,
> + *   .. are only used once the SETUP_MSG_RING cmd has been issued.
> + * - '_pad' helps to ensure that the msg related fields are on their own
> + *   cache-line.
> + */
> +
> +struct PVSCSIRingsState {
> +    uint32_t    reqProdIdx;
> +    uint32_t    reqConsIdx;
> +    uint32_t    reqNumEntriesLog2;
> +
> +    uint32_t    cmpProdIdx;
> +    uint32_t    cmpConsIdx;
> +    uint32_t    cmpNumEntriesLog2;
> +
> +    uint8_t     _pad[104];

Underscore.

> +
> +    uint32_t    msgProdIdx;
> +    uint32_t    msgConsIdx;
> +    uint32_t    msgNumEntriesLog2;
> +} QEMU_PACKED;
> +
> +/*
> + * Request descriptor.
> + *
> + * sizeof(RingReqDesc) = 128
> + *
> + * - context: is a unique identifier of a command. It could normally be any
> + *   64bit value, however we currently store it in the serialNumber variable
> + *   of struct SCSI_Command, so we have the following restrictions due to the
> + *   way this field is handled in the vmkernel storage stack:
> + *    * this value can't be 0,
> + *    * the upper 32bit need to be 0 since serialNumber is as a uint32_t.
> + *   Currently tracked as PR 292060.
> + * - dataLen: contains the total number of bytes that need to be transferred.
> + * - dataAddr:
> + *   * if PVSCSI_FLAG_CMD_WITH_SG_LIST is set: dataAddr is the PA of the first
> + *     s/g table segment, each s/g segment is entirely contained on a single
> + *     page of physical memory,
> + *   * if PVSCSI_FLAG_CMD_WITH_SG_LIST is NOT set, then dataAddr is the PA of
> + *     the buffer used for the DMA transfer,
> + * - flags:
> + *   * PVSCSI_FLAG_CMD_WITH_SG_LIST: see dataAddr above,
> + *   * PVSCSI_FLAG_CMD_DIR_NONE: no DMA involved,
> + *   * PVSCSI_FLAG_CMD_DIR_TOHOST: transfer from device to main memory,
> + *   * PVSCSI_FLAG_CMD_DIR_TODEVICE: transfer from main memory to device,
> + *   * PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB: reserved to handle CDBs larger than
> + *     16bytes. To be specified.
> + * - vcpuHint: vcpuId of the processor that will be most likely waiting for the
> + *   completion of the i/o. For guest OSes that use lowest priority message
> + *   delivery mode (such as windows), we use this "hint" to deliver the
> + *   completion action to the proper vcpu. For now, we can use the vcpuId of
> + *   the processor that initiated the i/o as a likely candidate for the vcpu
> + *   that will be waiting for the completion..
> + * - bus should be 0: we currently only support bus 0 for now.
> + * - unused should be zero'd.
> + */
> +
> +#define PVSCSI_FLAG_CMD_WITH_SG_LIST        (1 << 0)
> +#define PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB     (1 << 1)
> +#define PVSCSI_FLAG_CMD_DIR_NONE            (1 << 2)
> +#define PVSCSI_FLAG_CMD_DIR_TOHOST          (1 << 3)
> +#define PVSCSI_FLAG_CMD_DIR_TODEVICE        (1 << 4)
> +
> +#define PVSCSI_KNOWN_FLAGS \
> +  (PVSCSI_FLAG_CMD_WITH_SG_LIST     | \
> +   PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB  | \
> +   PVSCSI_FLAG_CMD_DIR_NONE         | \
> +   PVSCSI_FLAG_CMD_DIR_TOHOST       | \
> +   PVSCSI_FLAG_CMD_DIR_TODEVICE)
> +
> +struct PVSCSIRingReqDesc {
> +    uint64_t    context;
> +    uint64_t    dataAddr;
> +    uint64_t    dataLen;
> +    uint64_t    senseAddr;
> +    uint32_t    senseLen;
> +    uint32_t    flags;
> +    uint8_t     cdb[16];
> +    uint8_t     cdbLen;
> +    uint8_t     lun[8];
> +    uint8_t     tag;
> +    uint8_t     bus;
> +    uint8_t     target;
> +    uint8_t     vcpuHint;
> +    uint8_t     unused[59];
> +} QEMU_PACKED;
> +
> +typedef struct PVSCSIRingReqDesc PVSCSIRingReqDesc;
> +
> +/*
> + * Scatter-gather list management.
> + *
> + * As described above, when PVSCSI_FLAG_CMD_WITH_SG_LIST is set in the
> + * RingReqDesc.flags, then RingReqDesc.dataAddr is the PA of the first s/g
> + * table segment.
> + *
> + * - each segment of the s/g table contain a succession of struct
> + *   PVSCSISGElement.
> + * - each segment is entirely contained on a single physical page of memory.
> + * - a "chain" s/g element has the flag PVSCSI_SGE_FLAG_CHAIN_ELEMENT set in
> + *   PVSCSISGElement.flags and in this case:
> + *     * addr is the PA of the next s/g segment,
> + *     * length is undefined, assumed to be 0.
> + */
> +
> +struct PVSCSISGElement {
> +    uint64_t    addr;
> +    uint32_t    length;
> +    uint32_t    flags;
> +} QEMU_PACKED;
> +
> +/*
> + * Completion descriptor.
> + *
> + * sizeof(RingCmpDesc) = 32
> + *
> + * - context: identifier of the command. The same thing that was specified
> + *   under "context" as part of struct RingReqDesc at initiation time,
> + * - dataLen: number of bytes transferred for the actual i/o operation,
> + * - senseLen: number of bytes written into the sense buffer,
> + * - hostStatus: adapter status,
> + * - scsiStatus: device status,
> + * - _pad should be zero.
> + */
> +
> +struct PVSCSIRingCmpDesc {
> +    uint64_t    context;
> +    uint64_t    dataLen;
> +    uint32_t    senseLen;
> +    uint16_t    hostStatus;
> +    uint16_t    scsiStatus;
> +    uint32_t    _pad[2];
> +} QEMU_PACKED;
> +typedef struct PVSCSIRingCmpDesc PVSCSIRingCmpDesc;
> +
> +/*
> + * Interrupt status / IRQ bits.
> + */
> +
> +#define PVSCSI_INTR_CMPL_0                 (1 << 0)
> +#define PVSCSI_INTR_CMPL_1                 (1 << 1)
> +#define PVSCSI_INTR_CMPL_MASK              MASK(2)
> +
> +#define PVSCSI_INTR_MSG_0                  (1 << 2)
> +#define PVSCSI_INTR_MSG_1                  (1 << 3)
> +#define PVSCSI_INTR_MSG_MASK               (MASK(2) << 2)
> +
> +#define PVSCSI_INTR_ALL_SUPPORTED          MASK(4)
> +
> +/*
> + * Number of MSI-X vectors supported.
> + */
> +#define PVSCSI_MAX_INTRS        24
> +
> +/*
> + * Enumeration of supported MSI-X vectors
> + */
> +#define PVSCSI_VECTOR_COMPLETION   0
> +
> +/*
> + * Misc constants for the rings.
> + */
> +
> +#define PVSCSI_MAX_NUM_PAGES_REQ_RING   PVSCSI_SETUP_RINGS_MAX_NUM_PAGES
> +#define PVSCSI_MAX_NUM_PAGES_CMP_RING   PVSCSI_SETUP_RINGS_MAX_NUM_PAGES
> +#define PVSCSI_MAX_NUM_PAGES_MSG_RING   PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES
> +
> +#define PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE \
> +                (PAGE_SIZE / sizeof(struct PVSCSIRingReqDesc))
> +
> +#define PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE \
> +                (PAGE_SIZE / sizeof(PVSCSIRingCmpDesc))
> +
> +#define PVSCSI_MAX_REQ_QUEUE_DEPTH \
> +    (PVSCSI_MAX_NUM_PAGES_REQ_RING * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE)
> +
> +#define PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES     1
> +#define PVSCSI_MEM_SPACE_INTR_STATUS_NUM_PAGES 1
> +#define PVSCSI_MEM_SPACE_MISC_NUM_PAGES        2
> +#define PVSCSI_MEM_SPACE_KICK_IO_NUM_PAGES     2
> +#define PVSCSI_MEM_SPACE_MSIX_NUM_PAGES        2
> +
> +enum PVSCSIMemSpace {
> +    PVSCSI_MEM_SPACE_COMMAND_PAGE       = 0,
> +    PVSCSI_MEM_SPACE_INTR_STATUS_PAGE   = 1,
> +    PVSCSI_MEM_SPACE_MISC_PAGE          = 2,
> +    PVSCSI_MEM_SPACE_KICK_IO_PAGE       = 4,
> +    PVSCSI_MEM_SPACE_MSIX_TABLE_PAGE    = 6,
> +    PVSCSI_MEM_SPACE_MSIX_PBA_PAGE      = 7,
> +};
> +
> +#define PVSCSI_MEM_SPACE_NUM_PAGES \
> +    (PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES +       \
> +     PVSCSI_MEM_SPACE_INTR_STATUS_NUM_PAGES +   \
> +     PVSCSI_MEM_SPACE_MISC_NUM_PAGES +          \
> +     PVSCSI_MEM_SPACE_KICK_IO_NUM_PAGES +       \
> +     PVSCSI_MEM_SPACE_MSIX_NUM_PAGES)
> +
> +#define PVSCSI_MEM_SPACE_SIZE        (PVSCSI_MEM_SPACE_NUM_PAGES * PAGE_SIZE)
> +
> +#endif /* _VMW_PVSCSI_H_ */
> --
> 1.7.9.5
>
Deep Debroy July 9, 2012, 7:31 a.m. UTC | #2
On Sat, Jul 7, 2012 at 1:38 AM, Blue Swirl <blauwirbel@gmail.com> wrote:
> On Sat, Jul 7, 2012 at 5:07 AM, Deep Debroy <ddebroy@gmail.com> wrote:
>> Signed-off-by: Deep Debroy <ddebroy@gmail.com>
>> ---
>>  default-configs/pci.mak    |    1 +
>>  docs/specs/pvscsi-spec.txt |   92 ++++
>>  hw/Makefile.objs           |    1 +
>>  hw/pci.h                   |    1 +
>>  hw/pvscsi.c                | 1260 ++++++++++++++++++++++++++++++++++++++++++++
>>  hw/pvscsi.h                |  442 ++++++++++++++++
>>  6 files changed, 1797 insertions(+)
>>  create mode 100644 docs/specs/pvscsi-spec.txt
>>  create mode 100644 hw/pvscsi.c
>>  create mode 100644 hw/pvscsi.h
>>
>> diff --git a/default-configs/pci.mak b/default-configs/pci.mak
>> index 9d3e1db..9fd4896 100644
>> --- a/default-configs/pci.mak
>> +++ b/default-configs/pci.mak
>> @@ -10,6 +10,7 @@ CONFIG_EEPRO100_PCI=y
>>  CONFIG_PCNET_PCI=y
>>  CONFIG_PCNET_COMMON=y
>>  CONFIG_LSI_SCSI_PCI=y
>> +CONFIG_PVSCSI_SCSI_PCI=y
>>  CONFIG_RTL8139_PCI=y
>>  CONFIG_E1000_PCI=y
>>  CONFIG_IDE_CORE=y
>> diff --git a/docs/specs/pvscsi-spec.txt b/docs/specs/pvscsi-spec.txt
>> new file mode 100644
>> index 0000000..27f5529
>> --- /dev/null
>> +++ b/docs/specs/pvscsi-spec.txt
>> @@ -0,0 +1,92 @@
>> +General Description
>> +===================
>> +
>> +This document describes VMWare PVSCSI device interface specification.
>> +Created by Dmitry Fleytman (dmitry@daynix.com), Daynix Computing LTD.
>> +Based on source code of PVSCSI Linux driver from kernel 3.0.4
>> +
>> +PVSCSI Device Interface Overview
>> +================================
>> +
>> +The interface is based on memory area shared between hypervisor and VM.
>> +Memory area is obtained by driver as device IO memory resource of
>> +PVSCSI_MEM_SPACE_SIZE length.
>> +The shared memory consists of registers area and rings area.
>> +The registers area is used to raise hypervisorinterrupts and issue device
>
> hypervisor interrupts
>
>> +commands. The rings area is used to transfer data descruiptors and SCSI
>
> descriptors
>
>> +commands from VM to hypervisor and to transfer messages produced by
>> +hypervisor to VM. Data itself is transferred via virtual scatter-gather DMA.
>> +
>> +PVSCSI Device Registers
>> +=======================
>> +
>> +Registers area length is 1 page (PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES).
>> +Registers area structure is described by PVSCSIRegOffset enumeration.
>> +There are registers to issue device command (with optional short data),
>> +issue device interrupt, control interrupts masking.
>> +
>> +PVSCSI Device Rings
>> +===================
>> +
>> +There are three rings in shared memory:
>> +
>> +    1. Request ring (struct PVSCSIRingReqDesc *req_ring)
>> +        - ring for OS to device requests
>> +    2. Completion ring (struct PVSCSIRingCmpDesc *cmp_ring)
>> +        - ring for device request completions
>> +    3. Message ring (struct PVSCSIRingMsgDesc *msg_ring)
>> +        - ring for messages from device.
>> +       This ring is optional and may be not configured.
>> +There is a control area (struct PVSCSIRingsState *rings_state) used to control
>> +rings operation.
>> +
>> +PVSCSI Device to Host Interrupts
>> +================================
>> +There are following interrupt types supported by PVSCSI device:
>> +    1. Completion interrupts (completion ring notifications):
>> +        PVSCSI_INTR_CMPL_0
>> +        PVSCSI_INTR_CMPL_1
>> +    2. Message interrupts (message ring notifications):
>> +        PVSCSI_INTR_MSG_0
>> +        PVSCSI_INTR_MSG_1
>> +
>> +Interrupts are controlled via PVSCSI_REG_OFFSET_INTR_MASK register
>> +Bit set means interrupt enabled, bit cleared - disabled
>> +
>> +Interrupt modes supported are legacy, MSI and MSI-X
>> +In case of legacy interrupts register PVSCSI_REG_OFFSET_INTR_STATUS
>> +used to verify interrupt arrival and to clear interrupt state
>> +Interrupts are cleared by writing processed bits back
>> +to interrupt status register.
>> +
>> +PVSCSI Device Operation Sequences
>> +=================================
>> +
>> +1. Startup sequence:
>> +    a. Issue PVSCSI_CMD_ADAPTER_RESET command;
>> +    aa. Windows driver reads interrupt status register here;
>> +    b. Issue PVSCSI_CMD_SETUP_MSG_RING command with no additional data,
>> +       check status and disable device messages if error returned;
>> +       (Omitted if device messages disabled by driver configuration)
>> +    c. Issue PVSCSI_CMD_SETUP_RINGS command, provide rings configuration
>> +       as struct PVSCSICmdDescSetupRings;
>> +    d. Issue PVSCSI_CMD_SETUP_MSG_RING command again, provide
>> +       rings configuration as struct PVSCSICmdDescSetupMsgRing;
>> +    e. Unmask completion and message (if device messages enabled) interrupts.
>> +
>> +2. Shutdown sequences
>> +    a. Mask interrupts;
>> +    b. Flush request ring using PVSCSI_REG_OFFSET_KICK_NON_RW_IO;
>> +    c. Issue PVSCSI_CMD_ADAPTER_RESET command.
>> +
>> +3. Send request
>> +    a. Fill next free request ring descriptor;
>> +    b. Issue PVSCSI_REG_OFFSET_KICK_RW_IO for R/W operations;
>> +       or PVSCSI_REG_OFFSET_KICK_NON_RW_IO for other operations.
>> +
>> +4. Abort command
>> +    a. Issue PVSCSI_CMD_ABORT_CMD command;
>> +
>> +5. Request completion processing
>> +    a. Upon completion interrupt arrival process completion
>> +       and message (if enabled) rings.
>> diff --git a/hw/Makefile.objs b/hw/Makefile.objs
>> index 3d77259..6dd1eea 100644
>> --- a/hw/Makefile.objs
>> +++ b/hw/Makefile.objs
>> @@ -86,6 +86,7 @@ hw-obj-$(CONFIG_OPENCORES_ETH) += opencores_eth.o
>>
>>  # SCSI layer
>>  hw-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o
>> +hw-obj-$(CONFIG_PVSCSI_SCSI_PCI) += pvscsi.o
>>  hw-obj-$(CONFIG_ESP) += esp.o
>>
>>  hw-obj-y += sysbus.o isa-bus.o
>> diff --git a/hw/pci.h b/hw/pci.h
>> index 79d38fd..3c197d9 100644
>> --- a/hw/pci.h
>> +++ b/hw/pci.h
>> @@ -60,6 +60,7 @@
>>  #define PCI_DEVICE_ID_VMWARE_NET         0x0720
>>  #define PCI_DEVICE_ID_VMWARE_SCSI        0x0730
>>  #define PCI_DEVICE_ID_VMWARE_IDE         0x1729
>> +#define PCI_DEVICE_ID_VMWARE_PVSCSI      0x07C0
>
> Please retain numeric order.

The pvscsi driver in the linux guests use the 0x7C0 ID as part of
MODULE_DEVICE_TABLE. So I will have to keep that intact. The other IDs
(_NET, _SCSI and _IDE) don't appear to be used by anything in qemu.
Should I remove them?
>
>>
>>  /* Intel (0x8086) */
>>  #define PCI_DEVICE_ID_INTEL_82551IT      0x1209
>> diff --git a/hw/pvscsi.c b/hw/pvscsi.c
>> new file mode 100644
>> index 0000000..719ea98
>> --- /dev/null
>> +++ b/hw/pvscsi.c
>> @@ -0,0 +1,1260 @@
>> +/*
>> + * QEMU VMWARE PVSCSI paravirtual SCSI bus
>> + *
>> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
>> + *
>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
>> + *
>> + * Based on implementation by Paolo Bonzini
>> + * http://lists.gnu.org/archive/html/qemu-devel/2011-08/msg00729.html
>> + *
>> + * Authors:
>> + * Paolo Bonzini <pbonzini@redhat.com>
>> + * Dmitry Fleytman <dmitry@daynix.com>
>> + * Yan Vugenfirer <yvugenfi@redhat.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#define DEBUG_PVSCSI_WARNINGS
>> +#define DEBUG_PVSCSI_ERRORS
>> +/*
>> +#define DEBUG_PVSCSI_CALLBACKS
>> +#define DEBUG_PVSCSI_RINGS
>> +#define DEBUG_PVSCSI_COMMANDS
>> +#define DEBUG_PVSCSI_INTERRUPTS
>> +#define DEBUG_PVSCSI_SHMEM_ACCESS
>> +#define DEBUG_PVSCSI_SHMEM_ACCESS
>> +#define DEBUG_SCSI_EXCHANGE
>> +*/
>
> Please use //, it's easier to enable one line in the middle. With
> tracepoints there would be no need.

Sounds good. Will modify the code to use tracepoints.
>
>> +
>> +#ifdef DEBUG_PVSCSI_SHMEM_ACCESS
>> +#define DSHPRINTF(fmt, ...)                                                 \
>> +    do {                                                                    \
>> +        printf("[pvscsi][SH][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
>> +    } while (0)
>> +#else
>> +#define DSHPRINTF(fmt, ...) do {} while (0)
>> +#endif
>> +
>> +#ifdef DEBUG_PVSCSI_CALLBACKS
>> +#define DCBPRINTF(fmt, ...)                                                 \
>> +    do {                                                                    \
>> +        printf("[pvscsi][CB][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
>> +    } while (0)
>> +#else
>> +#define DCBPRINTF(fmt, ...) do {} while (0)
>> +#endif
>> +
>> +#ifdef DEBUG_PVSCSI_WARNINGS
>> +#define DWRPRINTF(fmt, ...)                                                 \
>> +    do {                                                                    \
>> +        printf("[pvscsi][WR][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
>> +    } while (0)
>> +#else
>> +#define DWRPRINTF(fmt, ...) do {} while (0)
>> +#endif
>
> Please check if qemu_log(LOG_UNIMP,...) would make sense for the cases
> where the guest may be issuing commands which QEMU doesn't implement
> yet.
>
>> +
>> +#ifdef DEBUG_PVSCSI_ERRORS
>> +#define DERPRINTF(fmt, ...)                                                 \
>> +    do {                                                                    \
>> +        printf("[pvscsi][ER][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
>> +    } while (0)
>> +#else
>> +#define DERPRINTF(fmt, ...) do {} while (0)
>> +#endif
>> +
>> +#ifdef DEBUG_PVSCSI_COMMANDS
>> +#define DCMPRINTF(fmt, ...)                                                 \
>> +    do {                                                                    \
>> +        printf("[pvscsi][CM][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
>> +    } while (0)
>> +#else
>> +#define DCMPRINTF(fmt, ...) do {} while (0)
>> +#endif
>> +
>> +#ifdef DEBUG_SCSI_EXCHANGE
>> +#define DSXPRINTF(fmt, ...)                                                 \
>> +    do {                                                                    \
>> +        printf("[pvscsi][SX][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
>> +    } while (0)
>> +#else
>> +#define DSXPRINTF(fmt, ...) do {} while (0)
>> +#endif
>> +
>> +#ifdef DEBUG_PVSCSI_RINGS
>> +#define DRIPRINTF(fmt, ...)                                                 \
>> +    do {                                                                    \
>> +        printf("[pvscsi][RI][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
>> +    } while (0)
>> +#else
>> +#define DRIPRINTF(fmt, ...) do {} while (0)
>> +#endif
>> +
>> +#ifdef DEBUG_PVSCSI_INTERRUPTS
>> +#define DIRPRINTF(fmt, ...)                                                 \
>> +    do {                                                                    \
>> +        printf("[pvscsi][IR][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
>> +    } while (0)
>> +#else
>> +#define DIRPRINTF(fmt, ...) do {} while (0)
>> +#endif
>> +
>> +/*
>> + * MSI-X support is disabled because it leads Windows OS to crash on startup.
>> + * The crash happens because Windows driver requires MSI-X shared memory
>> + * to be part of the same BAR used for rings state, registers, etc.
>> + * This is not supported by QEMU infrastructure so separate BAR created from
>> + * MSI-X purposes. Windows driver fails to deal with 2 BARs.
>> + * Linux works fine with current MSI-X implementation enabled.
>> + */
>> +#undef PVSCSI_ENABLE_MSIX
>> +
>> +#include "softfloat.h"
>> +#include "compiler.h"
>> +#include "scsi-defs.h"
>> +#include "hw/hw.h"
>> +#include "hw/pci.h"
>> +#include "hw/scsi.h"
>> +#ifdef PVSCSI_ENABLE_MSIX
>
> Please remove.

For now, I think I will remove MSIX support altogether from pvscsi on
the qemu side so that both Linux and Windows guests can work using
regular MSI. If I can figure out a way to get the Windows driver to
work with MSIX, I will introduce that later. Does that sound good?
>
>> +#include "msix.h"
>> +#endif
>> +#include "msi.h"
>> +#include "qemu-queue.h"
>> +#include "pvscsi.h"
>> +#include "vmware_utils.h"
>
> The include lines should be just after the copyright header.
>
>> +
>> +#define PVSCSI_MAX_DEVS                   (64)
>> +#define PVSCSI_MSIX_NUM_VECTORS           (1)
>> +
>> +typedef struct {
>
> typedef struct PVSCSIRingsMgr {
>
>> +    uint64_t            rs_pa;
>> +
>> +    uint32_t            txr_len_mask;
>> +    uint32_t            rxr_len_mask;
>> +    uint64_t            req_ring_pages_pa[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
>> +    uint64_t            cmp_ring_pages_pa[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
>> +    uint64_t            consumed_ptr;
>> +    uint64_t            filled_cmp_ptr;
>> +} PVSCSIRingsMgr;
>> +
>> +#define _RS_GET_FIELD(rs_pa, field) \
>> +    (vmw_shmem_ld32(rs_pa + offsetof(struct PVSCSIRingsState, field)))
>> +#define _RS_SET_FIELD(rs_pa, field, val) \
>> +    (vmw_shmem_st32(rs_pa + offsetof(struct PVSCSIRingsState, field), val))
>
> Don't use identifiers with leading underscores.
>
>> +
>> +/* Integer binary logarithm */
>> +static int
>> +pvscsi_log2(uint32_t input)
>> +{
>> +    int log = 0;
>> +    assert(input > 0);
>> +    while (input >> ++log) {
>> +
>> +        };
>
> Useless semicolon.
>
>> +    return log;
>> +}
>> +
>> +static void
>> +pvscsi_rings_mgr_init(PVSCSIRingsMgr *m, struct PVSCSICmdDescSetupRings *ri)
>> +{
>> +    int i;
>> +    uint32_t txr_len_log2, rxr_len_log2;
>> +    uint32_t req_ring_size, cmp_ring_size;
>> +    m->rs_pa = ri->ringsStatePPN << PAGE_SHIFT;
>> +
>> +    req_ring_size = ri->reqRingNumPages * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
>> +    cmp_ring_size = ri->cmpRingNumPages * PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
>> +    txr_len_log2 = pvscsi_log2(req_ring_size - 1);
>> +    rxr_len_log2 = pvscsi_log2(cmp_ring_size - 1);
>> +
>> +    m->txr_len_mask = MASK(txr_len_log2);
>> +    m->rxr_len_mask = MASK(rxr_len_log2);
>> +
>> +    m->consumed_ptr = 0;
>> +    m->filled_cmp_ptr = 0;
>> +
>> +    for (i = 0; i < ri->reqRingNumPages; i++) {
>> +        m->req_ring_pages_pa[i] = ri->reqRingPPNs[i] << PAGE_SHIFT;
>> +    }
>> +
>> +    for (i = 0; i < ri->cmpRingNumPages; i++) {
>> +        m->cmp_ring_pages_pa[i] = ri->cmpRingPPNs[i] << PAGE_SHIFT;
>> +    }
>> +
>> +    _RS_SET_FIELD(m->rs_pa, reqProdIdx, 0);
>> +    _RS_SET_FIELD(m->rs_pa, reqConsIdx, 0);
>> +    _RS_SET_FIELD(m->rs_pa, reqNumEntriesLog2, txr_len_log2);
>> +
>> +    _RS_SET_FIELD(m->rs_pa, cmpProdIdx, 0);
>> +    _RS_SET_FIELD(m->rs_pa, cmpConsIdx, 0);
>> +    _RS_SET_FIELD(m->rs_pa, cmpNumEntriesLog2, rxr_len_log2);
>> +
>> +    _RS_SET_FIELD(m->rs_pa, msgProdIdx, 0);
>> +    _RS_SET_FIELD(m->rs_pa, msgConsIdx, 0);
>> +    _RS_SET_FIELD(m->rs_pa, msgNumEntriesLog2, 0);
>> +
>> +    DRIPRINTF("TX/RX rings logarithms set to %d/%d",
>> +              txr_len_log2, rxr_len_log2);
>> +
>> +    /* Flush ring state page changes */
>> +    smp_wmb();
>> +}
>> +
>> +static void
>> +pvscsi_rings_mgr_cleanup(PVSCSIRingsMgr *mgr)
>> +{
>> +    mgr->rs_pa = 0;
>> +    mgr->txr_len_mask = 0;
>> +    mgr->consumed_ptr = 0;
>> +    mgr->filled_cmp_ptr = 0;
>> +    memset(mgr->req_ring_pages_pa, 0, sizeof(mgr->req_ring_pages_pa));
>> +    memset(mgr->cmp_ring_pages_pa, 0, sizeof(mgr->cmp_ring_pages_pa));
>> +}
>> +
>> +static inline target_phys_addr_t
>
> 'inline' may be premature optimization.
>
>> +pvscsi_rings_mgr_pop_req_descr(PVSCSIRingsMgr *mgr)
>> +{
>> +    uint32_t ready_prt = _RS_GET_FIELD(mgr->rs_pa, reqProdIdx);
>> +
>> +    if (ready_prt != mgr->consumed_ptr) {
>> +        uint32_t next_ready_ptr =
>> +            mgr->consumed_ptr++ & mgr->txr_len_mask;
>> +        uint32_t next_ready_page =
>> +            next_ready_ptr / PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
>> +        uint32_t inpage_idx =
>> +            next_ready_ptr % PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
>> +
>> +        return mgr->req_ring_pages_pa[next_ready_page] +
>> +               inpage_idx * sizeof(PVSCSIRingReqDesc);
>> +    } else {
>> +        return 0;
>> +    }
>> +}
>> +
>> +static inline void
>> +pvscsi_rings_mgr_flush_req_ring(PVSCSIRingsMgr *mgr)
>> +{
>> +    _RS_SET_FIELD(mgr->rs_pa, reqConsIdx, mgr->consumed_ptr);
>> +}
>> +
>> +static inline target_phys_addr_t
>> +pvscsi_rings_mgr_pop_cmp_descr(PVSCSIRingsMgr *mgr)
>> +{
>> +    /*
>> +     * According to Linux driver code it explicitly verifies that number
>> +     * of requests being processed by device is less then the size of
>> +     * completion queue, so device may omit completion queue overflow
>> +     * conditions check. We assume that this is true for other (Windows)
>> +     * drivers as well.
>> +     */
>> +
>> +    uint32_t free_cmp_ptr =
>> +        mgr->filled_cmp_ptr++ & mgr->rxr_len_mask;
>> +    uint32_t free_cmp_page =
>> +        free_cmp_ptr / PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
>> +    uint32_t inpage_idx =
>> +        free_cmp_ptr % PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
>> +    return mgr->cmp_ring_pages_pa[free_cmp_page] +
>> +           inpage_idx * sizeof(PVSCSIRingCmpDesc);
>> +}
>> +
>> +static inline void
>> +pvscsi_rings_mgr_flush_cmp_ring(PVSCSIRingsMgr *mgr)
>> +{
>> +    /* Flush descriptor changes */
>> +    smp_wmb();
>> +
>> +    DRIPRINTF("New production counter of completion ring is %" PRIx64,
>> +              mgr->filled_cmp_ptr);
>> +
>> +    _RS_SET_FIELD(mgr->rs_pa, cmpProdIdx, mgr->filled_cmp_ptr);
>> +}
>> +
>> +typedef QTAILQ_HEAD(, PVSCSIRequest) PVSCSIRequestList;
>> +#define PVSCSI_MAX_CMD_DATA_WORDS \
>> +    (sizeof(struct PVSCSICmdDescSetupRings)/sizeof(uint32_t))
>> +
>> +typedef struct {
>> +    PCIDevice dev;
>> +    MemoryRegion io_space;
>> +    SCSIBus bus;
>> +    QEMUBH *completion_worker;
>> +    PVSCSIRequestList pending_queue;
>> +    PVSCSIRequestList completion_queue;
>> +
>> +#ifdef PVSCSI_ENABLE_MSIX
>
> I think this can be removed.
>
>> +    MemoryRegion msix_space;
>> +    uint8_t msix_used;   /* Whether MSI-X support was installed successfully */
>> +#endif
>> +    uint8_t msi_used;    /* Whether MSI support was installed successfully   */
>> +    uint64_t reg_interrupt_status;        /* Interrupt status register value */
>> +    uint64_t reg_interrupt_enabled;       /* Interrupt mask register value   */
>> +    uint64_t reg_command_status;          /* Command status register value   */
>> +
>> +    /* Command data adoption mechanism */
>> +    uint64_t curr_cmd;                   /* Last command arrived             */
>> +    uint32_t curr_cmd_data_cntr;      /* Amount of data for last command  */
>> +
>> +    /* Collector for current command data */
>> +    uint32_t curr_cmd_data[PVSCSI_MAX_CMD_DATA_WORDS];
>> +
>> +    uint8_t rings_info_valid;            /* Whether rings are initialized    */
>> +    PVSCSIRingsMgr rings;                /* Data transfer rings manager      */
>> +} PVSCSI_State;
>
> The order of the fields could be optimized, there are a lot of
> structure holes due to alignment.
>
>> +
>> +static void
>> +pvscsi_reset_state(PVSCSI_State *s)
>> +{
>> +    s->curr_cmd = PVSCSI_CMD_FIRST;
>> +    s->curr_cmd_data_cntr = 0;
>> +    s->reg_command_status = PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
>> +    s->reg_interrupt_status = 0;
>> +    pvscsi_rings_mgr_cleanup(&s->rings);
>> +    s->rings_info_valid = FALSE;
>> +    QTAILQ_INIT(&s->pending_queue);
>> +    QTAILQ_INIT(&s->completion_queue);
>> +}
>> +
>> +typedef struct PVSCSISGState {
>> +    target_phys_addr_t elemAddr;
>> +    target_phys_addr_t dataAddr;
>> +    uint32_t resid;
>> +} PVSCSISGState;
>> +
>> +typedef struct PVSCSIRequest {
>> +    SCSIRequest *sreq;
>> +    uint8_t sense_key;
>> +    uint8_t completed;
>> +    int lun;
>> +    QEMUSGList sgl;
>> +    PVSCSISGState sg;
>> +    struct PVSCSIRingReqDesc req;
>> +    struct PVSCSIRingCmpDesc cmp;
>> +    QTAILQ_ENTRY(PVSCSIRequest) next;
>> +} PVSCSIRequest;
>> +
>> +static void
>> +pvscsi_free_queue(PVSCSIRequestList *req_list)
>> +{
>> +    PVSCSIRequest *pvscsi_req;
>> +
>> +    while (!QTAILQ_EMPTY(req_list)) {
>> +        pvscsi_req = QTAILQ_FIRST(req_list);
>> +        QTAILQ_REMOVE(req_list, pvscsi_req, next);
>> +        g_free(pvscsi_req);
>> +    }
>> +}
>> +
>> +static void
>> +pvscsi_reset_adapter(PVSCSI_State *s)
>> +{
>> +    qbus_reset_all_fn(&s->bus);
>> +    pvscsi_free_queue(&s->completion_queue);
>> +    assert(QTAILQ_EMPTY(&s->pending_queue));
>> +    pvscsi_reset_state(s);
>> +}
>> +
>> +static inline void
>> +pvscsi_update_irq_status(PVSCSI_State *s)
>> +{
>> +    int should_raise = !!(s->reg_interrupt_enabled & s->reg_interrupt_status);
>
> bool
>
>> +
>> +    DIRPRINTF("Interrupt level set to %d (MASK: 0x%llX, STATUS: 0x%llx)",
>> +              should_raise,
>> +              (long long unsigned int) s->reg_interrupt_enabled,
>> +              (long long unsigned int) s->reg_interrupt_status);
>
> Just use PRIx64 and remove the casts.
>
>> +
>> +#ifdef PVSCSI_ENABLE_MSIX
>> +    if (s->msix_used && msix_enabled(&s->dev)) {
>> +        if (should_raise) {
>> +            DIRPRINTF("Sending MSI-X notification");
>> +            msix_notify(&s->dev, PVSCSI_VECTOR_COMPLETION);
>> +        }
>> +        return;
>> +    }
>> +#endif
>> +
>> +    if (s->msi_used && msi_enabled(&s->dev)) {
>> +        if (should_raise) {
>> +            DIRPRINTF("Sending MSI notification");
>> +            msi_notify(&s->dev, PVSCSI_VECTOR_COMPLETION);
>> +        }
>> +        return;
>> +    }
>> +
>> +    qemu_set_irq(s->dev.irq[0], should_raise);
>> +}
>> +
>> +static inline void
>> +pvscsi_raise_completion_interrupt(PVSCSI_State *s)
>> +{
>> +    s->reg_interrupt_status |= PVSCSI_INTR_CMPL_0;
>> +
>> +    /* Memory barrier to flush interrupt status register changes*/
>> +    smp_wmb();
>> +
>> +    pvscsi_update_irq_status(s);
>> +}
>> +
>> +static void
>> +pvscsi_cmp_ring_put(PVSCSI_State *s, struct PVSCSIRingCmpDesc *cmp_desc)
>> +{
>> +    target_phys_addr_t cmp_descr_pa;
>> +
>> +    cmp_descr_pa = pvscsi_rings_mgr_pop_cmp_descr(&s->rings);
>> +    DRIPRINTF("Got completion descriptor %"PRIx64, cmp_descr_pa);
>> +
>> +    vmw_shmem_write(cmp_descr_pa, (void *)cmp_desc, sizeof(*cmp_desc));
>> +}
>> +
>> +static void
>> +pvscsi_process_completion_queue(void *opaque)
>> +{
>> +    PVSCSI_State *s = opaque;
>> +    PVSCSIRequest *pvscsi_req;
>> +    bool has_completed = false;
>> +
>> +    while (!QTAILQ_EMPTY(&s->completion_queue)) {
>> +        pvscsi_req = QTAILQ_FIRST(&s->completion_queue);
>> +        QTAILQ_REMOVE(&s->completion_queue, pvscsi_req, next);
>> +        pvscsi_cmp_ring_put(s, &pvscsi_req->cmp);
>> +        g_free(pvscsi_req);
>> +        has_completed++;
>> +    }
>> +
>> +    if (has_completed) {
>> +        pvscsi_rings_mgr_flush_cmp_ring(&s->rings);
>> +        pvscsi_raise_completion_interrupt(s);
>> +    }
>> +}
>> +
>> +static inline void
>> +pvscsi_schedule_completion_processing(PVSCSI_State *s)
>> +{
>> +    /* Try putting more complete requests on the ring. */
>> +    if (!QTAILQ_EMPTY(&s->completion_queue)) {
>> +        qemu_bh_schedule(s->completion_worker);
>> +    }
>> +}
>> +
>> +static inline void
>> +pvscsi_complete_request(PVSCSI_State *s, PVSCSIRequest *r)
>> +{
>> +    assert(!r->completed);
>> +    DSXPRINTF("Completion: ctx: %"PRIx64", len: %"PRIx64", sense key: %u",
>> +              r->cmp.context, r->cmp.dataLen, r->sense_key);
>> +    if (r->sreq != NULL) {
>> +        scsi_req_unref(r->sreq);
>> +        r->sreq = NULL;
>> +    }
>> +    r->completed = 1;
>> +    QTAILQ_REMOVE(&s->pending_queue, r, next);
>> +    QTAILQ_INSERT_TAIL(&s->completion_queue, r, next);
>> +    pvscsi_schedule_completion_processing(s);
>> +}
>> +
>> +static QEMUSGList *pvscsi_get_sg_list(SCSIRequest *r)
>> +{
>> +    PVSCSIRequest *req = r->hba_private;
>> +
>> +    DSXPRINTF("Get SG list: depth: %u, size: %lu",
>> +              req->sgl.nsg, req->sgl.size);
>> +
>> +    return &req->sgl;
>> +}
>> +
>> +static void
>> +pvscsi_get_next_sg_elem(PVSCSISGState *sg)
>> +{
>> +    struct PVSCSISGElement elem;
>> +
>> +    for (;; sg->elemAddr = elem.addr) {
>> +        vmw_shmem_read(sg->elemAddr, (void *)&elem, sizeof(elem));
>> +        if (0 != (elem.flags & ~PVSCSI_KNOWN_FLAGS)) {
>
> The order in the expression is unnatural, please reverse.
>
>> +            /*
>> +             * There is PVSCSI_SGE_FLAG_CHAIN_ELEMENT flag described in
>> +             * header file but its value is unknown. This flag requires
>> +             * additional processing, so we put warning here to catch it
>> +             * some day and make proper implementation
>> +             */
>> +            DWRPRINTF("Unknown flags in SG element (val: 0x%X)", elem.flags);
>
> Lowercase hex, please.
>
>> +        }
>> +        break;
>> +    }
>> +
>> +    sg->elemAddr += sizeof(elem);
>> +    sg->dataAddr = elem.addr;
>> +    sg->resid = elem.length;
>> +}
>> +
>> +static inline void
>> +pvscsi_write_sense(PVSCSIRequest *r, uint8_t *sense, int len)
>> +{
>> +    r->cmp.senseLen = MIN(r->req.senseLen, len);
>> +    r->sense_key = sense[2];
>> +    vmw_shmem_write(r->req.senseAddr, sense, r->cmp.senseLen);
>> +}
>> +
>> +static void
>> +pvscsi_command_complete(SCSIRequest *req, uint32_t status, size_t resid)
>> +{
>> +    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev.qdev, req->bus->qbus.parent);
>> +    PVSCSIRequest *pvscsi_req = req->hba_private;
>> +
>> +    if (!pvscsi_req) {
>> +        DERPRINTF("PVSCSI: Can't find request for tag 0x%x", req->tag);
>> +        return;
>> +    }
>> +
>> +    if (resid) {
>> +        /* Short transfer.  */
>> +        DSXPRINTF("Not all data required for command transferred.");
>> +        pvscsi_req->cmp.hostStatus = BTSTAT_DATARUN;
>> +    }
>> +
>> +    pvscsi_req->cmp.scsiStatus = status;
>> +    if (CHECK_CONDITION == pvscsi_req->cmp.scsiStatus) {
>
> Order
>
>> +        uint8_t sense[SCSI_SENSE_BUF_SIZE];
>> +        int sense_len =
>> +            scsi_req_get_sense(pvscsi_req->sreq, sense, sizeof(sense));
>> +
>> +        DSXPRINTF("Sense information length is %d bytes", sense_len);
>> +        pvscsi_write_sense(pvscsi_req, sense, sense_len);
>> +    }
>> +    qemu_sglist_destroy(&pvscsi_req->sgl);
>> +    pvscsi_complete_request(s, pvscsi_req);
>> +}
>> +
>> +static void
>> +pvscsi_request_cancelled(SCSIRequest *req)
>> +{
>> +    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev.qdev, req->bus->qbus.parent);
>> +    PVSCSIRequest *pvscsi_req = req->hba_private;
>> +
>> +    if (BTSTAT_SUCCESS == pvscsi_req->cmp.hostStatus) {
>> +        pvscsi_req->cmp.hostStatus = BTSTAT_ABORTQUEUE;
>> +    }
>> +    pvscsi_complete_request(s, pvscsi_req);
>> +}
>> +
>> +static inline SCSIDevice*
>> +pvscsi_device_find(PVSCSI_State *s, int channel, int target,
>> +                   uint8_t *requested_lun, uint8_t *target_lun)
>> +{
>> +    if (requested_lun[0] || requested_lun[2] || requested_lun[3] ||
>> +        requested_lun[4] || requested_lun[5] || requested_lun[6] ||
>> +        requested_lun[7] || (PVSCSI_MAX_DEVS < target)) {
>> +        return NULL;
>> +    } else {
>> +        *target_lun = requested_lun[1];
>> +        return scsi_device_find(&s->bus, channel, target, *target_lun);
>> +    }
>> +}
>> +
>> +static PVSCSIRequest *
>> +pvscsi_queue_pending_descriptor(PVSCSI_State *s, SCSIDevice **d,
>> +                                struct PVSCSIRingReqDesc *descr)
>> +{
>> +    PVSCSIRequest *pvscsi_req;
>> +    uint8_t lun;
>> +
>> +    pvscsi_req = g_malloc0(sizeof(*pvscsi_req));
>> +    pvscsi_req->req = *descr;
>> +    pvscsi_req->cmp.context = pvscsi_req->req.context;
>> +    QTAILQ_INSERT_TAIL(&s->pending_queue, pvscsi_req, next);
>> +
>> +    *d = pvscsi_device_find(s, descr->bus, descr->target, descr->lun, &lun);
>> +    if (!*d) {
>> +        return pvscsi_req;
>> +    }
>> +
>> +    pvscsi_req->lun = lun;
>> +    return pvscsi_req;
>> +}
>> +
>> +static void
>> +pvscsi_convert_sglist(PVSCSIRequest *r)
>> +{
>> +    int chunk_size;
>> +    uint64_t data_length = r->req.dataLen;
>> +    PVSCSISGState sg = r->sg;
>> +    while (data_length) {
>> +        while (!sg.resid) {
>> +            pvscsi_get_next_sg_elem(&sg);
>> +            DCMPRINTF("Element: ctx: %"PRIx64", addr: %"PRIx64", len: 0x%ul",
>> +                      r->req.context, (uint64_t) r->sg.dataAddr, r->sg.resid);
>
> TARGET_FMT_plx? Didn't check the type.
>
>> +        }
>> +        assert(data_length > 0);
>> +        chunk_size = MIN((unsigned) data_length, sg.resid);
>> +        if (chunk_size) {
>> +            qemu_sglist_add(&r->sgl, sg.dataAddr, chunk_size);
>> +        }
>> +
>> +        sg.dataAddr += chunk_size;
>> +        data_length -= chunk_size;
>> +        sg.resid -= chunk_size;
>> +    }
>> +}
>> +
>> +static inline void
>> +pvscsi_build_sglist(PVSCSIRequest *r)
>> +{
>> +    qemu_sglist_init(&r->sgl, 1, NULL);
>> +    if (r->req.flags & PVSCSI_FLAG_CMD_WITH_SG_LIST) {
>> +        pvscsi_convert_sglist(r);
>> +    } else {
>> +        qemu_sglist_add(&r->sgl, r->req.dataAddr, r->req.dataLen);
>> +    }
>> +}
>> +
>> +static void
>> +pvscsi_process_request_descriptor(PVSCSI_State *s,
>> +                                  struct PVSCSIRingReqDesc *descr)
>> +{
>> +    SCSIDevice *d;
>> +    PVSCSIRequest *r = pvscsi_queue_pending_descriptor(s, &d, descr);
>> +    int64_t n;
>> +
>> +    DSXPRINTF("SCSI cmd 0x%X, ctx: %" PRIx64, descr->cdb[0], descr->context);
>> +
>> +    if (!d) {
>> +        r->cmp.hostStatus = BTSTAT_SELTIMEO;
>> +        DSXPRINTF("Command directed to unknown device rejected.");
>> +        pvscsi_complete_request(s, r);
>> +        return;
>> +    }
>> +
>> +    if (descr->flags & PVSCSI_FLAG_CMD_WITH_SG_LIST) {
>> +        r->sg.elemAddr = descr->dataAddr;
>> +    }
>> +
>> +    r->sreq = scsi_req_new(d, descr->context, r->lun, descr->cdb, r);
>> +    if (r->sreq->cmd.mode == SCSI_XFER_FROM_DEV &&
>> +        (descr->flags & PVSCSI_FLAG_CMD_DIR_TODEVICE)) {
>> +        r->cmp.hostStatus = BTSTAT_BADMSG;
>> +        DSXPRINTF("Command with invalid transfer direction rejected.");
>> +        scsi_req_cancel(r->sreq);
>> +        return;
>> +    }
>> +    if (r->sreq->cmd.mode == SCSI_XFER_TO_DEV &&
>> +        (descr->flags & PVSCSI_FLAG_CMD_DIR_TOHOST)) {
>> +        r->cmp.hostStatus = BTSTAT_BADMSG;
>> +        DSXPRINTF("Command with invalid transfer direction rejected.");
>> +        scsi_req_cancel(r->sreq);
>> +        return;
>> +    }
>> +
>> +    pvscsi_build_sglist(r);
>> +    n = scsi_req_enqueue(r->sreq);
>> +
>> +    if (n) {
>> +        scsi_req_continue(r->sreq);
>> +    }
>> +}
>> +
>> +static void
>> +pvscsi_process_io(PVSCSI_State *s)
>> +{
>> +    PVSCSIRingReqDesc descr;
>> +    target_phys_addr_t next_descr_pa;
>> +
>> +    assert(s->rings_info_valid);
>> +    while (0 != (next_descr_pa = pvscsi_rings_mgr_pop_req_descr(&s->rings))) {
>
> Order
>
>> +        DRIPRINTF("Got descriptor %"PRIx64, next_descr_pa);
>> +        vmw_shmem_read(next_descr_pa, &descr, sizeof(descr));
>> +        pvscsi_process_request_descriptor(s, &descr);
>> +    }
>> +
>> +    pvscsi_rings_mgr_flush_req_ring(&s->rings);
>> +}
>> +
>> +static void
>> +pvscsi_dbg_dump_tx_rings_config(struct PVSCSICmdDescSetupRings *rc)
>> +{
>> +#ifdef DEBUG_PVSCSI_RINGS
>> +    int i;
>> +    DRIPRINTF("TX Rings configuration received:");
>
> Since the code is surrounded by #ifdeffery, you can just use fprintf.
> With tracepoints, the #ifdeffery could be removed.
>
>> +    DRIPRINTF("Rings state page: %"PRIx64":", rc->ringsStatePPN);
>> +
>> +    DRIPRINTF("Request ring pages: %u:", rc->reqRingNumPages);
>> +    for (i = 0; i < rc->reqRingNumPages; i++) {
>> +        DRIPRINTF("\t%"PRIx64" ", rc->reqRingPPNs[i]);
>> +    }
>> +
>> +    DRIPRINTF("Confirm ring pages: %u:", rc->cmpRingNumPages);
>> +    for (i = 0; i < rc->cmpRingNumPages; i++) {
>> +        DRIPRINTF("\t%"PRIx64" ", rc->cmpRingPPNs[i]);
>> +    }
>> +#endif
>> +}
>> +
>> +static uint64_t
>> +pvscsi_on_cmd_config(PVSCSI_State *s)
>> +{
>> +    DWRPRINTF("Unimplemented command PVSCSI_CMD_CONFIG ignored.");
>> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
>> +}
>> +
>> +static uint64_t
>> +pvscsi_on_cmd_unplug(PVSCSI_State *s)
>> +{
>> +    DWRPRINTF("Unimplemented command PVSCSI_CMD_DEVICE_UNPLUG ignored.");
>> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
>> +}
>> +
>> +static uint64_t
>> +pvscsi_on_issue_scsi(PVSCSI_State *s)
>> +{
>> +    DWRPRINTF("Unimplemented command PVSCSI_CMD_ISSUE_SCSI ignored.");
>> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
>> +}
>> +
>> +static uint64_t
>> +pvscsi_on_cmd_setup_rings(PVSCSI_State *s)
>> +{
>> +    struct PVSCSICmdDescSetupRings *rc =
>> +        (struct PVSCSICmdDescSetupRings *) s->curr_cmd_data;
>> +
>> +    DCMPRINTF("Command PVSCSI_CMD_SETUP_RINGS arrived");
>> +
>> +    pvscsi_dbg_dump_tx_rings_config(rc);
>> +    pvscsi_rings_mgr_init(&s->rings, rc);
>> +    s->rings_info_valid = TRUE;
>> +    return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
>> +}
>> +
>> +static uint64_t
>> +pvscsi_on_cmd_abort(PVSCSI_State *s)
>> +{
>> +#ifdef DEBUG_PVSCSI_COMMANDS
>> +    struct PVSCSICmdDescAbortCmd *data =
>> +        (struct PVSCSICmdDescAbortCmd *) s->curr_cmd_data;
>> +
>> +    DCMPRINTF("PVSCSI_CMD_ABORT_CMD for ctx %"PRIx64", target 0x%X",
>> +              data->context, data->target);
>> +#endif
>> +
>> +    return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
>> +}
>> +
>> +static uint64_t
>> +pvscsi_on_cmd_unknown(PVSCSI_State *s)
>> +{
>> +    DWRPRINTF("Data for unknown command : 0x%X", s->curr_cmd_data[0]);
>> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
>> +}
>> +
>> +static uint64_t
>> +pvscsi_on_cmd_reset_device(PVSCSI_State *s)
>> +{
>> +    uint8_t target_lun = 0;
>> +    struct PVSCSICmdDescResetDevice *cmd =
>> +        (struct PVSCSICmdDescResetDevice *) s->curr_cmd_data;
>> +    SCSIDevice *sdev;
>> +
>> +    sdev = pvscsi_device_find(s, 0, cmd->target, cmd->lun, &target_lun);
>> +
>> +    DWRPRINTF("PVSCSI_CMD_RESET_DEVICE[target 0x%u lun 0x%d (dev 0x%p)]",
>> +              cmd->target, (int) target_lun, sdev);
>> +
>> +    if (sdev != NULL) {
>> +        device_reset(&sdev->qdev);
>> +        return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
>> +    }
>> +
>> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
>> +}
>> +
>> +static uint64_t
>> +pvscsi_on_cmd_reset_bus(PVSCSI_State *s)
>> +{
>> +    DCMPRINTF("Command PVSCSI_CMD_RESET_BUS arrived");
>> +
>> +    qbus_reset_all_fn(&s->bus);
>> +    return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
>> +}
>> +
>> +static uint64_t
>> +pvscsi_on_cmd_setup_msg_ring(PVSCSI_State *s)
>> +{
>> +    /* No message ring needed for HDD devices */
>> +    DCMPRINTF("Command PVSCSI_CMD_SETUP_MSG_RING arrived");
>> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
>> +}
>> +
>> +static uint64_t
>> +pvscsi_on_cmd_adapter_reset(PVSCSI_State *s)
>> +{
>> +    /* No message ring needed for HDD devices */
>> +    DCMPRINTF("Command PVSCSI_CMD_ADAPTER_RESET arrived");
>> +
>> +    pvscsi_reset_adapter(s);
>> +    return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
>> +}
>> +
>> +struct {
>
> 'static const'
>
>> +    int       data_size;
>> +    uint64_t  (*handler_fn)(PVSCSI_State *s);
>> +} pvscsi_commands[] = {
>> +    [PVSCSI_CMD_FIRST] = {
>> +        .data_size = 0,
>> +        .handler_fn = pvscsi_on_cmd_unknown
>
> Comma at the end of line is more friendly for future patches.
>
>> +    },
>> +
>> +    /* Not implemented, data size defined based on what arrives on windows */
>> +    [PVSCSI_CMD_CONFIG] = {
>> +        .data_size = 6*sizeof(uint32_t),
>
> Missing spaces around '*'. Please use checkpatch.pl to catch these.
>
>> +        .handler_fn = pvscsi_on_cmd_config
>> +    },
>> +
>> +    /* Command not implemented, data size is unknown */
>> +    [PVSCSI_CMD_ISSUE_SCSI] = {
>> +        .data_size = 0,
>> +        .handler_fn = pvscsi_on_issue_scsi
>> +    },
>> +
>> +    /* Command not implemented, data size is unknown */
>> +    [PVSCSI_CMD_DEVICE_UNPLUG] = {
>> +        .data_size = 0,
>> +        .handler_fn = pvscsi_on_cmd_unplug
>> +    },
>> +
>> +    [PVSCSI_CMD_SETUP_RINGS] = {
>> +        .data_size = sizeof(struct PVSCSICmdDescSetupRings),
>> +        .handler_fn = pvscsi_on_cmd_setup_rings
>> +    },
>> +
>> +    [PVSCSI_CMD_RESET_DEVICE] = {
>> +        .data_size = sizeof(struct PVSCSICmdDescResetDevice),
>> +        .handler_fn = pvscsi_on_cmd_reset_device
>> +    },
>> +
>> +    [PVSCSI_CMD_RESET_BUS] = {
>> +        .data_size = 0,
>> +        .handler_fn = pvscsi_on_cmd_reset_bus
>> +    },
>> +
>> +    [PVSCSI_CMD_SETUP_MSG_RING] = {
>> +        .data_size = 0,
>> +        .handler_fn = pvscsi_on_cmd_setup_msg_ring
>> +    },
>> +
>> +    [PVSCSI_CMD_ADAPTER_RESET] = {
>> +        .data_size = 0,
>> +        .handler_fn = pvscsi_on_cmd_adapter_reset
>> +    },
>> +
>> +    [PVSCSI_CMD_ABORT_CMD] = {
>> +        .data_size = sizeof(struct PVSCSICmdDescAbortCmd),
>> +        .handler_fn = pvscsi_on_cmd_abort
>> +    }
>> +};
>> +
>> +static void
>> +pvscsi_do_command_processing(PVSCSI_State *s)
>> +{
>> +    size_t bytes_arrived = s->curr_cmd_data_cntr*sizeof(uint32_t);
>
> Spacing.
>
>> +
>> +    if (bytes_arrived >= pvscsi_commands[s->curr_cmd].data_size) {
>> +        s->reg_command_status = pvscsi_commands[s->curr_cmd].handler_fn(s);
>> +        s->curr_cmd = PVSCSI_CMD_FIRST;
>> +        s->curr_cmd_data_cntr   = 0;
>> +    }
>> +}
>> +
>> +static void
>> +pvscsi_on_command_data(PVSCSI_State *s, uint32_t value)
>> +{
>> +    size_t bytes_arrived = s->curr_cmd_data_cntr*sizeof(uint32_t);
>> +
>> +    assert(bytes_arrived < sizeof(s->curr_cmd_data));
>> +    s->curr_cmd_data[s->curr_cmd_data_cntr++] = value;
>> +
>> +    pvscsi_do_command_processing(s);
>> +}
>> +
>> +static void
>> +pvscsi_on_command(PVSCSI_State *s, uint64_t cmd_id)
>> +{
>> +    if ((cmd_id > PVSCSI_CMD_FIRST) && (cmd_id < PVSCSI_CMD_LAST)) {
>> +        s->curr_cmd = cmd_id;
>> +    } else {
>> +        s->curr_cmd = PVSCSI_CMD_FIRST;
>> +        DWRPRINTF("Unknown command %"PRIx64, cmd_id);
>> +    }
>> +
>> +    s->curr_cmd_data_cntr = 0;
>> +    s->reg_command_status = PVSCSI_COMMAND_PROCESSING_FAILED;
>> +
>> +    pvscsi_do_command_processing(s);
>> +}
>> +
>> +static void
>> +pvscsi_io_write(void *opaque, target_phys_addr_t addr,
>> +                uint64_t val, unsigned size)
>> +{
>> +    PVSCSI_State *s = opaque;
>> +
>> +    switch (addr) {
>> +    case PVSCSI_REG_OFFSET_COMMAND:
>> +        pvscsi_on_command(s, val);
>> +        break;
>> +
>> +    case PVSCSI_REG_OFFSET_COMMAND_DATA:
>> +        pvscsi_on_command_data(s, (uint32_t) val);
>> +        break;
>> +
>> +    case PVSCSI_REG_OFFSET_INTR_STATUS:
>> +        DCBPRINTF("PVSCSI_REG_OFFSET_INTR_STATUS write: %"PRIx64, val);
>> +        s->reg_interrupt_status &= ~val;
>> +        pvscsi_update_irq_status(s);
>> +        pvscsi_schedule_completion_processing(s);
>> +        break;
>> +
>> +    case PVSCSI_REG_OFFSET_INTR_MASK:
>> +        DCBPRINTF("PVSCSI_REG_OFFSET_INTR_MASK write: %"PRIx64, val);
>> +        s->reg_interrupt_enabled = val;
>> +        pvscsi_update_irq_status(s);
>> +        break;
>> +
>> +    case PVSCSI_REG_OFFSET_KICK_NON_RW_IO:
>> +        DCBPRINTF("PVSCSI_REG_OFFSET_KICK_NON_RW_IO write: %"PRIx64, val);
>> +        pvscsi_process_io(s);
>> +        break;
>> +
>> +    case PVSCSI_REG_OFFSET_KICK_RW_IO:
>> +        DCBPRINTF("PVSCSI_REG_OFFSET_KICK_RW_IO write: %"PRIx64, val);
>> +        pvscsi_process_io(s);
>> +        break;
>> +
>> +    case PVSCSI_REG_OFFSET_DEBUG:
>> +        DCBPRINTF("PVSCSI_REG_OFFSET_DEBUG write: %"PRIx64, val);
>> +        break;
>> +
>> +    default:
>> +        DWRPRINTF("Unknown write: %" PRIx64 "%u bytes, val %" PRIx64,
>> +                  (uint64_t) addr, size, val);
>
> TARGET_FMT_plx
>
>> +        break;
>> +    }
>> +
>> +}
>> +
>> +static uint64_t
>> +pvscsi_io_read(void *opaque, target_phys_addr_t addr, unsigned size)
>> +{
>> +    PVSCSI_State *s = opaque;
>> +
>> +    switch (addr) {
>> +    case PVSCSI_REG_OFFSET_INTR_STATUS:
>> +        DCBPRINTF("PVSCSI_REG_OFFSET_INTR_STATUS read: %"PRIx64,
>> +                  s->reg_interrupt_status);
>> +        return s->reg_interrupt_status;
>> +
>> +    case PVSCSI_REG_OFFSET_INTR_MASK:
>> +        DCBPRINTF("PVSCSI_REG_OFFSET_INTR_MASK read: %"PRIx64,
>> +                  s->reg_interrupt_enabled);
>> +        return s->reg_interrupt_enabled;
>> +
>> +    case PVSCSI_REG_OFFSET_COMMAND_STATUS:
>> +        DCBPRINTF("PVSCSI_REG_OFFSET_COMMAND_STATUS read: %"PRIx64,
>> +                  s->reg_command_status);
>> +        return s->reg_command_status;
>> +
>> +    default:
>> +        DWRPRINTF("Unknown read, address %" PRIx64 ", %d bytes",
>> +                  (uint64_t) addr, size);
>
> Ditto
>
>> +        return 0;
>> +    }
>> +}
>> +
>> +#ifdef PVSCSI_ENABLE_MSIX
>> +static bool
>> +pvscsi_init_msix(PVSCSI_State *s)
>> +{
>> +    int res;
>> +    memory_region_init(&s->msix_space, "pvscsi-msix", PAGE_SIZE);
>> +    res = msix_init(&s->dev, PVSCSI_MSIX_NUM_VECTORS, &s->msix_space, 1, 0);
>> +    if (0 > res) {
>
> Unnatural order again, also below.
>
>> +        DWRPRINTF("Failed to initialize MSI-X, error %d", res);
>> +        memory_region_destroy(&s->msix_space);
>> +        s->msix_used = false;
>> +    } else {
>> +        res = msix_vector_use(&s->dev, PVSCSI_VECTOR_COMPLETION);
>> +        if (0 > res) {
>> +            DWRPRINTF("Failed to use MSI-X vector, error %d", res);
>> +            msix_uninit(&s->dev, &s->msix_space);
>> +            memory_region_destroy(&s->msix_space);
>> +            s->msix_used = false;
>> +        } else {
>> +            pci_register_bar(&s->dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY,
>> +                             &s->msix_space);
>> +            s->msix_used = true;
>> +        }
>> +    }
>
> How about moving the #ifdef lower and add here
> #else
>   s->msix_used = false;
> #endif
>
>> +    return s->msix_used;
>> +}
>> +
>> +static void
>> +pvscsi_cleanup_msix(PVSCSI_State *s)
>> +{
>
> and here #ifdef to make the function empty on !msix?
>
>> +    if (s->msix_used) {
>> +        msix_vector_unuse(&s->dev, PVSCSI_VECTOR_COMPLETION);
>> +        msix_uninit(&s->dev, &s->msix_space);
>> +        memory_region_destroy(&s->msix_space);
>> +    }
>> +}
>> +#endif
>> +
>> +static bool
>> +pvscsi_init_msi(PVSCSI_State *s)
>> +{
>> +#define PVSCSI_MSI_OFFSET        (0x50)
>> +#define PVSCSI_USE_64BIT         (true)
>> +#define PVSCSI_PER_VECTOR_MASK   (false)
>
> #defines should go to top of the file.
>
>> +
>> +    int res;
>> +    res = msi_init(&s->dev, PVSCSI_MSI_OFFSET, PVSCSI_MSIX_NUM_VECTORS,
>> +                   PVSCSI_USE_64BIT, PVSCSI_PER_VECTOR_MASK);
>> +    if (0 > res) {
>
> Order
>
>> +        DWRPRINTF("Failed to initialize MSI, error %d", res);
>> +        s->msi_used = false;
>> +    } else {
>> +        s->msi_used = true;
>> +    }
>> +
>> +    return s->msi_used;
>> +}
>> +
>> +static void
>> +pvscsi_cleanup_msi(PVSCSI_State *s)
>> +{
>> +    if (s->msi_used) {
>> +        msi_uninit(&s->dev);
>> +    }
>> +}
>> +
>> +#ifdef PVSCSI_ENABLE_MSIX
>> +
>> +static void
>> +pvscsi_msix_save(QEMUFile *f, void *opaque)
>> +{
>> +    msix_save(&((PVSCSI_State *)opaque)->dev, f);
>> +}
>> +
>> +static int
>> +pvscsi_msix_load(QEMUFile *f, void *opaque, int version_id)
>> +{
>> +    msix_load(&((PVSCSI_State *)opaque)->dev, f);
>> +    return 0;
>> +}
>> +
>> +#endif
>> +
>> +static int
>> +pvscsi_init(PCIDevice *dev)
>> +{
>> +    static const MemoryRegionOps pv_scsi_ops = {
>> +            .read = pvscsi_io_read,
>> +            .write = pvscsi_io_write,
>> +            .endianness = DEVICE_NATIVE_ENDIAN,
>> +            .impl = {
>> +                    .min_access_size = 4,
>> +                    .max_access_size = 4,
>> +            },
>> +    };
>> +
>> +    static const struct SCSIBusInfo pvscsi_scsi_info = {
>> +            .tcq = true,
>> +            .max_target = PVSCSI_MAX_DEVS,
>> +            .max_channel = 0,
>> +            .max_lun = 0,
>> +
>> +            .get_sg_list = pvscsi_get_sg_list,
>> +            .complete = pvscsi_command_complete,
>> +            .cancel = pvscsi_request_cancelled
>
> comma
>
>> +    };
>> +
>> +    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev, dev);
>> +
>> +    DCBPRINTF("Starting init...");
>> +
>> +    /* PCI subsystem ID */
>> +    s->dev.config[PCI_SUBSYSTEM_ID] = 0x00;
>> +    s->dev.config[PCI_SUBSYSTEM_ID + 1] = 0x10;
>> +
>> +    /* PCI latency timer = 255 */
>> +    s->dev.config[PCI_LATENCY_TIMER] = 0xff;
>> +
>> +    /* Interrupt pin A */
>> +    s->dev.config[PCI_INTERRUPT_PIN] = 0x01;
>> +
>> +    memory_region_init_io(&s->io_space, &pv_scsi_ops, s,
>> +                          "pvscsi-io", PVSCSI_MEM_SPACE_SIZE);
>> +    pci_register_bar(&s->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->io_space);
>> +
>> +#ifdef PVSCSI_ENABLE_MSIX
>> +    register_savevm(&dev->qdev, "pvscsi-msix", -1, 1,
>> +                    pvscsi_msix_save, pvscsi_msix_load, s);
>> +
>> +    if (!pvscsi_init_msix(s)) {
>> +        DWRPRINTF("Failed to initialize MSI-X.");
>> +    }
>> +#endif
>
> With the adjusted #ifdeffery for the functions themselves, this code
> could be enabled always.
>
>> +    if (!pvscsi_init_msi(s)) {
>> +        DWRPRINTF("Failed to initialize MSI.");
>> +    }
>> +
>> +    s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s);
>> +    if (!s->completion_worker) {
>> +        pvscsi_cleanup_msi(s);
>> +#ifdef PVSCSI_ENABLE_MSIX
>> +        pvscsi_cleanup_msix(s);
>> +#endif
>> +        memory_region_destroy(&s->io_space);
>> +        return -ENOMEM;
>> +    }
>> +
>> +    scsi_bus_new(&s->bus, &dev->qdev, &pvscsi_scsi_info);
>> +    pvscsi_reset_state(s);
>> +
>> +    return 0;
>> +}
>> +
>> +static int
>> +pvscsi_uninit(PCIDevice *dev)
>> +{
>> +    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev, dev);
>> +
>> +    DCBPRINTF("Starting uninit...");
>> +    qemu_bh_delete(s->completion_worker);
>> +
>> +#ifdef PVSCSI_ENABLE_MSIX
>> +    unregister_savevm(&dev->qdev, "pvscsi-msix", s);
>> +    pvscsi_cleanup_msix(s);
>> +#endif
>> +    pvscsi_cleanup_msi(s);
>> +
>> +    memory_region_destroy(&s->io_space);
>> +
>> +    return 0;
>> +}
>> +
>> +static void
>> +pvscsi_reset(DeviceState *dev)
>> +{
>> +    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev.qdev, dev);
>> +    DCBPRINTF("Starting reset...");
>> +    pvscsi_reset_adapter(s);
>> +}
>> +
>> +static void
>> +pvscsi_pre_save(void *opaque)
>> +{
>> +    PVSCSI_State *s = (PVSCSI_State *) opaque;
>> +
>> +    DCBPRINTF("Starting presave...");
>> +
>> +    assert(QTAILQ_EMPTY(&s->pending_queue));
>> +    assert(QTAILQ_EMPTY(&s->completion_queue));
>> +}
>> +
>> +static int
>> +pvscsi_post_load(void *opaque, int version_id)
>> +{
>> +#ifdef PVSCSI_ENABLE_MSIX
>> +    PVSCSI_State *s = (PVSCSI_State *) opaque;
>
> Useless cast.
>
>> +#endif
>> +    DCBPRINTF("Starting postload...");
>> +
>> +#ifdef PVSCSI_ENABLE_MSIX
>> +    if (s->msix_used) {
>> +        if (!msix_vector_use(&s->dev, PVSCSI_VECTOR_COMPLETION)) {
>> +            DERPRINTF("Failed to re-use MSI-X vectors");
>> +            return -1;
>> +        }
>> +    }
>> +#endif
>> +    return 0;
>> +}
>> +
>> +static VMStateDescription vmstate_pvscsi = {
>> +        .name = "pvscsi",
>> +        .version_id = 0,
>> +        .minimum_version_id = 0,
>> +        .minimum_version_id_old = 0,
>> +        .pre_save = pvscsi_pre_save,
>> +        .post_load = pvscsi_post_load,
>> +        .fields      = (VMStateField[]) {
>> +                VMSTATE_PCI_DEVICE(dev, PVSCSI_State),
>> +#ifdef PVSCSI_ENABLE_MSIX
>> +                VMSTATE_UINT8(msix_used, PVSCSI_State),
>> +#endif
>> +                VMSTATE_UINT8(msi_used, PVSCSI_State),
>> +                VMSTATE_UINT64(reg_interrupt_status, PVSCSI_State),
>> +                VMSTATE_UINT64(reg_interrupt_enabled, PVSCSI_State),
>> +                VMSTATE_UINT64(reg_command_status, PVSCSI_State),
>> +                VMSTATE_UINT64(curr_cmd, PVSCSI_State),
>> +                VMSTATE_UINT32(curr_cmd_data_cntr, PVSCSI_State),
>> +                VMSTATE_UINT32_ARRAY(curr_cmd_data, PVSCSI_State,
>> +                    ARRAY_SIZE(((PVSCSI_State *)NULL)->curr_cmd_data)),
>> +                VMSTATE_UINT8(rings_info_valid, PVSCSI_State),
>> +
>> +                VMSTATE_UINT64(rings.rs_pa, PVSCSI_State),
>> +                VMSTATE_UINT32(rings.txr_len_mask, PVSCSI_State),
>> +                VMSTATE_UINT32(rings.rxr_len_mask, PVSCSI_State),
>> +                VMSTATE_UINT64_ARRAY(rings.req_ring_pages_pa, PVSCSI_State,
>> +                    PVSCSI_SETUP_RINGS_MAX_NUM_PAGES),
>> +                VMSTATE_UINT64_ARRAY(rings.cmp_ring_pages_pa, PVSCSI_State,
>> +                    PVSCSI_SETUP_RINGS_MAX_NUM_PAGES),
>> +                VMSTATE_UINT64(rings.consumed_ptr, PVSCSI_State),
>> +                VMSTATE_UINT64(rings.filled_cmp_ptr, PVSCSI_State),
>> +
>> +                VMSTATE_END_OF_LIST()
>> +            }
>> +};
>> +
>> +static void
>> +pvscsi_write_config(PCIDevice *pci, uint32_t addr, uint32_t val, int len)
>> +{
>> +    pci_default_write_config(pci, addr, val, len);
>> +    msi_write_config(pci, addr, val, len);
>> +#if defined(PVSCSI_ENABLE_MSIX)
>> +    msix_write_config(pci, addr, val, len);
>> +#endif
>> +}
>> +
>> +static void pvscsi_class_init(ObjectClass *klass, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>> +    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
>> +
>> +    k->init = pvscsi_init;
>> +    k->exit = pvscsi_uninit;
>> +    k->vendor_id = PCI_VENDOR_ID_VMWARE;
>> +    k->device_id = PCI_DEVICE_ID_VMWARE_PVSCSI;
>> +    k->class_id = PCI_CLASS_STORAGE_SCSI;
>> +    k->subsystem_id = 0x1000;
>> +    dc->reset = pvscsi_reset;
>> +    dc->vmsd = &vmstate_pvscsi;
>> +    k->config_write = pvscsi_write_config;
>> +}
>> +
>> +static TypeInfo pvscsi_info = {
>> +    .name          = "pvscsi",
>> +    .parent        = TYPE_PCI_DEVICE,
>> +    .instance_size = sizeof(PVSCSI_State),
>> +    .class_init    = pvscsi_class_init,
>> +};
>> +
>> +static void
>> +pvscsi_register_types(void)
>> +{
>> +    type_register_static(&pvscsi_info);
>> +
>> +    DCBPRINTF("PVSCSI QEMU device emulation registered");
>> +}
>> +
>> +type_init(pvscsi_register_types);
>> diff --git a/hw/pvscsi.h b/hw/pvscsi.h
>> new file mode 100644
>> index 0000000..611c549
>> --- /dev/null
>> +++ b/hw/pvscsi.h
>> @@ -0,0 +1,442 @@
>> +/*
>> + * QEMU VMWARE PVSCSI paravirtual SCSI bus
>> + *
>> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
>> + *
>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
>> + *
>> + * Based on implementation by Paolo Bonzini
>> + * http://lists.gnu.org/archive/html/qemu-devel/2011-08/msg00729.html
>> + *
>> + * Authors:
>> + * Paolo Bonzini <pbonzini@redhat.com>
>> + * Dmitry Fleytman <dmitry@daynix.com>
>> + * Yan Vugenfirer <yvugenfi@redhat.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#ifndef _PVSCSI_H_
>> +#define _PVSCSI_H_
>
> Underscores
>
>> +
>> +#define PAGE_SIZE (4096)
>> +#define PAGE_SHIFT (12)
>
> These probably conflict with system defines, please rename. Also,
> define size using shift.
>
>> +
>> +#define PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT 128
>> +
>> +#define MASK(n)        ((1 << (n)) - 1)        /* make an n-bit mask */
>> +
>> +/*
>> + * Following is an interface definition for
>> + * PVSCSI device as provided by VMWARE
>> + * See original copyright from Linux kernel v3.2.8
>> + * header file drivers/scsi/vmw_pvscsi.h below.
>> + */
>> +
>> + /*
>> +  * VMware PVSCSI header file
>> +  *
>> +  * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
>> +  *
>> +  * This program is free software; you can redistribute it and/or modify it
>> +  * under the terms of the GNU General Public License as published by the
>> +  * Free Software Foundation; version 2 of the License and no later version.
>
> GPLv2only conflicts with GPLv2+.

Indeed. Please note that this blurb was earlier copy-pasted from the
driver's code checked into the linux kernel (and it still seems to say
v2). I will update this with the latest maintainer's name and address.
However, I was wondering if the copyright blurb needs to be
copy-pasted here at all or is a reference to the source's location in
the linux kernel tree good enough?
>
>> +  *
>> +  * This program is distributed in the hope that it will be useful, but
>> +  * WITHOUT ANY WARRANTY; without even the implied warranty of
>> +  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
>> +  * NON INFRINGEMENT.  See the GNU General Public License for more
>> +  * details.
>> +  *
>> +  * You should have received a copy of the GNU General Public License
>> +  * along with this program; if not, write to the Free Software
>> +  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
>
> Web version, please.
>
>> +  *
>> +  * Maintained by: Alok N Kataria <address@hidden>
>
> <address@hidden>?
>
>> +  *
>> +  */
>> +
>> +/*
>> + * host adapter status/error codes
>> + */
>> +enum HostBusAdapterStatus {
>> +   BTSTAT_SUCCESS       = 0x00,  /* CCB complete normally with no errors */
>> +   BTSTAT_LINKED_COMMAND_COMPLETED           = 0x0a,
>> +   BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG = 0x0b,
>> +   BTSTAT_DATA_UNDERRUN = 0x0c,
>> +   BTSTAT_SELTIMEO      = 0x11,  /* SCSI selection timeout */
>> +   BTSTAT_DATARUN       = 0x12,  /* data overrun/underrun */
>> +   BTSTAT_BUSFREE       = 0x13,  /* unexpected bus free */
>> +   BTSTAT_INVPHASE      = 0x14,  /* invalid bus phase or sequence */
>> +                                 /* requested by target           */
>> +   BTSTAT_LUNMISMATCH   = 0x17,  /* linked CCB has different LUN  */
>> +                                 /* from first CCB                */
>> +   BTSTAT_SENSFAILED    = 0x1b,  /* auto request sense failed */
>> +   BTSTAT_TAGREJECT     = 0x1c,  /* SCSI II tagged queueing message */
>> +                                 /* rejected by target              */
>> +   BTSTAT_BADMSG        = 0x1d,  /* unsupported message received by */
>> +                                 /* the host adapter                */
>> +   BTSTAT_HAHARDWARE    = 0x20,  /* host adapter hardware failed */
>> +   BTSTAT_NORESPONSE    = 0x21,  /* target did not respond to SCSI ATN, */
>> +                                 /* sent a SCSI RST                     */
>> +   BTSTAT_SENTRST       = 0x22,  /* host adapter asserted a SCSI RST */
>> +   BTSTAT_RECVRST       = 0x23,  /* other SCSI devices asserted a SCSI RST */
>> +   BTSTAT_DISCONNECT    = 0x24,  /* target device reconnected improperly */
>> +                                 /* (w/o tag)                            */
>> +   BTSTAT_BUSRESET      = 0x25,  /* host adapter issued BUS device reset */
>> +   BTSTAT_ABORTQUEUE    = 0x26,  /* abort queue generated */
>> +   BTSTAT_HASOFTWARE    = 0x27,  /* host adapter software error */
>> +   BTSTAT_HATIMEOUT     = 0x30,  /* host adapter hardware timeout error */
>> +   BTSTAT_SCSIPARITY    = 0x34,  /* SCSI parity error detected */
>> +};
>> +
>> +/*
>> + * Register offsets.
>> + *
>> + * These registers are accessible both via i/o space and mm i/o.
>> + */
>> +
>> +enum PVSCSIRegOffset {
>> +    PVSCSI_REG_OFFSET_COMMAND        =    0x0,
>> +    PVSCSI_REG_OFFSET_COMMAND_DATA   =    0x4,
>> +    PVSCSI_REG_OFFSET_COMMAND_STATUS =    0x8,
>> +    PVSCSI_REG_OFFSET_LAST_STS_0     =  0x100,
>> +    PVSCSI_REG_OFFSET_LAST_STS_1     =  0x104,
>> +    PVSCSI_REG_OFFSET_LAST_STS_2     =  0x108,
>> +    PVSCSI_REG_OFFSET_LAST_STS_3     =  0x10c,
>> +    PVSCSI_REG_OFFSET_INTR_STATUS    = 0x100c,
>> +    PVSCSI_REG_OFFSET_INTR_MASK      = 0x2010,
>> +    PVSCSI_REG_OFFSET_KICK_NON_RW_IO = 0x3014,
>> +    PVSCSI_REG_OFFSET_DEBUG          = 0x3018,
>> +    PVSCSI_REG_OFFSET_KICK_RW_IO     = 0x4018,
>> +};
>> +
>> +/*
>> + * Virtual h/w commands.
>> + */
>> +
>> +enum PVSCSICommands {
>> +    PVSCSI_CMD_FIRST             = 0, /* has to be first */
>> +
>> +    PVSCSI_CMD_ADAPTER_RESET     = 1,
>> +    PVSCSI_CMD_ISSUE_SCSI        = 2,
>> +    PVSCSI_CMD_SETUP_RINGS       = 3,
>> +    PVSCSI_CMD_RESET_BUS         = 4,
>> +    PVSCSI_CMD_RESET_DEVICE      = 5,
>> +    PVSCSI_CMD_ABORT_CMD         = 6,
>> +    PVSCSI_CMD_CONFIG            = 7,
>> +    PVSCSI_CMD_SETUP_MSG_RING    = 8,
>> +    PVSCSI_CMD_DEVICE_UNPLUG     = 9,
>> +
>> +    PVSCSI_CMD_LAST              = 10  /* has to be last */
>> +};
>> +
>> +#define PVSCSI_COMMAND_PROCESSING_SUCCEEDED   (0)
>> +#define PVSCSI_COMMAND_PROCESSING_FAILED     (-1)
>> +
>> +/*
>> + * Command descriptor for PVSCSI_CMD_RESET_DEVICE --
>> + */
>> +
>> +struct PVSCSICmdDescResetDevice {
>> +    uint32_t    target;
>> +    uint8_t     lun[8];
>> +} QEMU_PACKED;
>
> Typedefs missing (also below), please add.
>
>> +
>> +/*
>> + * Command descriptor for PVSCSI_CMD_ABORT_CMD --
>> + *
>> + * - currently does not support specifying the LUN.
>> + * - _pad should be 0.
>> + */
>> +
>> +struct PVSCSICmdDescAbortCmd {
>> +    uint64_t    context;
>> +    uint32_t    target;
>> +    uint32_t    _pad;
>> +} QEMU_PACKED;
>> +
>> +/*
>> + * Command descriptor for PVSCSI_CMD_SETUP_RINGS --
>> + *
>> + * Notes:
>> + * - reqRingNumPages and cmpRingNumPages need to be power of two.
>> + * - reqRingNumPages and cmpRingNumPages need to be different from 0,
>> + * - reqRingNumPages and cmpRingNumPages need to be inferior to
>> + *   PVSCSI_SETUP_RINGS_MAX_NUM_PAGES.
>> + */
>> +
>> +#define PVSCSI_SETUP_RINGS_MAX_NUM_PAGES        32
>> +struct PVSCSICmdDescSetupRings {
>> +    uint32_t    reqRingNumPages;
>> +    uint32_t    cmpRingNumPages;
>> +    uint64_t    ringsStatePPN;
>> +    uint64_t    reqRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
>> +    uint64_t    cmpRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
>> +} QEMU_PACKED;
>> +
>> +/*
>> + * Command descriptor for PVSCSI_CMD_SETUP_MSG_RING --
>> + *
>> + * Notes:
>> + * - this command was not supported in the initial revision of the h/w
>> + *   interface. Before using it, you need to check that it is supported by
>> + *   writing PVSCSI_CMD_SETUP_MSG_RING to the 'command' register, then
>> + *   immediately after read the 'command status' register:
>> + *       * a value of -1 means that the cmd is NOT supported,
>> + *       * a value != -1 means that the cmd IS supported.
>> + *   If it's supported the 'command status' register should return:
>> + *      sizeof(PVSCSICmdDescSetupMsgRing) / sizeof(uint32_t).
>> + * - this command should be issued _after_ the usual SETUP_RINGS so that the
>> + *   RingsState page is already setup. If not, the command is a nop.
>> + * - numPages needs to be a power of two,
>> + * - numPages needs to be different from 0,
>> + * - _pad should be zero.
>> + */
>> +
>> +#define PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES  16
>> +
>> +struct PVSCSICmdDescSetupMsgRing {
>> +    uint32_t    numPages;
>> +    uint32_t    _pad;
>
> Underscore
>
>> +    uint64_t    ringPPNs[PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES];
>> +} QEMU_PACKED;
>> +
>> +enum PVSCSIMsgType {
>> +    PVSCSI_MSG_DEV_ADDED          = 0,
>> +    PVSCSI_MSG_DEV_REMOVED        = 1,
>> +    PVSCSI_MSG_LAST               = 2,
>> +};
>> +
>> +/*
>> + * Msg descriptor.
>> + *
>> + * sizeof(struct PVSCSIRingMsgDesc) == 128.
>> + *
>> + * - type is of type enum PVSCSIMsgType.
>> + * - the content of args depend on the type of event being delivered.
>> + */
>> +
>> +struct PVSCSIRingMsgDesc {
>> +    uint32_t    type;
>> +    uint32_t    args[31];
>> +} QEMU_PACKED;
>> +
>> +struct PVSCSIMsgDescDevStatusChanged {
>> +    uint32_t    type;  /* PVSCSI_MSG_DEV _ADDED / _REMOVED */
>> +    uint32_t    bus;
>> +    uint32_t    target;
>> +    uint8_t     lun[8];
>> +    uint32_t    pad[27];
>> +} QEMU_PACKED;
>> +
>> +/*
>> + * Rings state.
>> + *
>> + * - the fields:
>> + *    . msgProdIdx,
>> + *    . msgConsIdx,
>> + *    . msgNumEntriesLog2,
>> + *   .. are only used once the SETUP_MSG_RING cmd has been issued.
>> + * - '_pad' helps to ensure that the msg related fields are on their own
>> + *   cache-line.
>> + */
>> +
>> +struct PVSCSIRingsState {
>> +    uint32_t    reqProdIdx;
>> +    uint32_t    reqConsIdx;
>> +    uint32_t    reqNumEntriesLog2;
>> +
>> +    uint32_t    cmpProdIdx;
>> +    uint32_t    cmpConsIdx;
>> +    uint32_t    cmpNumEntriesLog2;
>> +
>> +    uint8_t     _pad[104];
>
> Underscore.
>
>> +
>> +    uint32_t    msgProdIdx;
>> +    uint32_t    msgConsIdx;
>> +    uint32_t    msgNumEntriesLog2;
>> +} QEMU_PACKED;
>> +
>> +/*
>> + * Request descriptor.
>> + *
>> + * sizeof(RingReqDesc) = 128
>> + *
>> + * - context: is a unique identifier of a command. It could normally be any
>> + *   64bit value, however we currently store it in the serialNumber variable
>> + *   of struct SCSI_Command, so we have the following restrictions due to the
>> + *   way this field is handled in the vmkernel storage stack:
>> + *    * this value can't be 0,
>> + *    * the upper 32bit need to be 0 since serialNumber is as a uint32_t.
>> + *   Currently tracked as PR 292060.
>> + * - dataLen: contains the total number of bytes that need to be transferred.
>> + * - dataAddr:
>> + *   * if PVSCSI_FLAG_CMD_WITH_SG_LIST is set: dataAddr is the PA of the first
>> + *     s/g table segment, each s/g segment is entirely contained on a single
>> + *     page of physical memory,
>> + *   * if PVSCSI_FLAG_CMD_WITH_SG_LIST is NOT set, then dataAddr is the PA of
>> + *     the buffer used for the DMA transfer,
>> + * - flags:
>> + *   * PVSCSI_FLAG_CMD_WITH_SG_LIST: see dataAddr above,
>> + *   * PVSCSI_FLAG_CMD_DIR_NONE: no DMA involved,
>> + *   * PVSCSI_FLAG_CMD_DIR_TOHOST: transfer from device to main memory,
>> + *   * PVSCSI_FLAG_CMD_DIR_TODEVICE: transfer from main memory to device,
>> + *   * PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB: reserved to handle CDBs larger than
>> + *     16bytes. To be specified.
>> + * - vcpuHint: vcpuId of the processor that will be most likely waiting for the
>> + *   completion of the i/o. For guest OSes that use lowest priority message
>> + *   delivery mode (such as windows), we use this "hint" to deliver the
>> + *   completion action to the proper vcpu. For now, we can use the vcpuId of
>> + *   the processor that initiated the i/o as a likely candidate for the vcpu
>> + *   that will be waiting for the completion..
>> + * - bus should be 0: we currently only support bus 0 for now.
>> + * - unused should be zero'd.
>> + */
>> +
>> +#define PVSCSI_FLAG_CMD_WITH_SG_LIST        (1 << 0)
>> +#define PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB     (1 << 1)
>> +#define PVSCSI_FLAG_CMD_DIR_NONE            (1 << 2)
>> +#define PVSCSI_FLAG_CMD_DIR_TOHOST          (1 << 3)
>> +#define PVSCSI_FLAG_CMD_DIR_TODEVICE        (1 << 4)
>> +
>> +#define PVSCSI_KNOWN_FLAGS \
>> +  (PVSCSI_FLAG_CMD_WITH_SG_LIST     | \
>> +   PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB  | \
>> +   PVSCSI_FLAG_CMD_DIR_NONE         | \
>> +   PVSCSI_FLAG_CMD_DIR_TOHOST       | \
>> +   PVSCSI_FLAG_CMD_DIR_TODEVICE)
>> +
>> +struct PVSCSIRingReqDesc {
>> +    uint64_t    context;
>> +    uint64_t    dataAddr;
>> +    uint64_t    dataLen;
>> +    uint64_t    senseAddr;
>> +    uint32_t    senseLen;
>> +    uint32_t    flags;
>> +    uint8_t     cdb[16];
>> +    uint8_t     cdbLen;
>> +    uint8_t     lun[8];
>> +    uint8_t     tag;
>> +    uint8_t     bus;
>> +    uint8_t     target;
>> +    uint8_t     vcpuHint;
>> +    uint8_t     unused[59];
>> +} QEMU_PACKED;
>> +
>> +typedef struct PVSCSIRingReqDesc PVSCSIRingReqDesc;
>> +
>> +/*
>> + * Scatter-gather list management.
>> + *
>> + * As described above, when PVSCSI_FLAG_CMD_WITH_SG_LIST is set in the
>> + * RingReqDesc.flags, then RingReqDesc.dataAddr is the PA of the first s/g
>> + * table segment.
>> + *
>> + * - each segment of the s/g table contain a succession of struct
>> + *   PVSCSISGElement.
>> + * - each segment is entirely contained on a single physical page of memory.
>> + * - a "chain" s/g element has the flag PVSCSI_SGE_FLAG_CHAIN_ELEMENT set in
>> + *   PVSCSISGElement.flags and in this case:
>> + *     * addr is the PA of the next s/g segment,
>> + *     * length is undefined, assumed to be 0.
>> + */
>> +
>> +struct PVSCSISGElement {
>> +    uint64_t    addr;
>> +    uint32_t    length;
>> +    uint32_t    flags;
>> +} QEMU_PACKED;
>> +
>> +/*
>> + * Completion descriptor.
>> + *
>> + * sizeof(RingCmpDesc) = 32
>> + *
>> + * - context: identifier of the command. The same thing that was specified
>> + *   under "context" as part of struct RingReqDesc at initiation time,
>> + * - dataLen: number of bytes transferred for the actual i/o operation,
>> + * - senseLen: number of bytes written into the sense buffer,
>> + * - hostStatus: adapter status,
>> + * - scsiStatus: device status,
>> + * - _pad should be zero.
>> + */
>> +
>> +struct PVSCSIRingCmpDesc {
>> +    uint64_t    context;
>> +    uint64_t    dataLen;
>> +    uint32_t    senseLen;
>> +    uint16_t    hostStatus;
>> +    uint16_t    scsiStatus;
>> +    uint32_t    _pad[2];
>> +} QEMU_PACKED;
>> +typedef struct PVSCSIRingCmpDesc PVSCSIRingCmpDesc;
>> +
>> +/*
>> + * Interrupt status / IRQ bits.
>> + */
>> +
>> +#define PVSCSI_INTR_CMPL_0                 (1 << 0)
>> +#define PVSCSI_INTR_CMPL_1                 (1 << 1)
>> +#define PVSCSI_INTR_CMPL_MASK              MASK(2)
>> +
>> +#define PVSCSI_INTR_MSG_0                  (1 << 2)
>> +#define PVSCSI_INTR_MSG_1                  (1 << 3)
>> +#define PVSCSI_INTR_MSG_MASK               (MASK(2) << 2)
>> +
>> +#define PVSCSI_INTR_ALL_SUPPORTED          MASK(4)
>> +
>> +/*
>> + * Number of MSI-X vectors supported.
>> + */
>> +#define PVSCSI_MAX_INTRS        24
>> +
>> +/*
>> + * Enumeration of supported MSI-X vectors
>> + */
>> +#define PVSCSI_VECTOR_COMPLETION   0
>> +
>> +/*
>> + * Misc constants for the rings.
>> + */
>> +
>> +#define PVSCSI_MAX_NUM_PAGES_REQ_RING   PVSCSI_SETUP_RINGS_MAX_NUM_PAGES
>> +#define PVSCSI_MAX_NUM_PAGES_CMP_RING   PVSCSI_SETUP_RINGS_MAX_NUM_PAGES
>> +#define PVSCSI_MAX_NUM_PAGES_MSG_RING   PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES
>> +
>> +#define PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE \
>> +                (PAGE_SIZE / sizeof(struct PVSCSIRingReqDesc))
>> +
>> +#define PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE \
>> +                (PAGE_SIZE / sizeof(PVSCSIRingCmpDesc))
>> +
>> +#define PVSCSI_MAX_REQ_QUEUE_DEPTH \
>> +    (PVSCSI_MAX_NUM_PAGES_REQ_RING * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE)
>> +
>> +#define PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES     1
>> +#define PVSCSI_MEM_SPACE_INTR_STATUS_NUM_PAGES 1
>> +#define PVSCSI_MEM_SPACE_MISC_NUM_PAGES        2
>> +#define PVSCSI_MEM_SPACE_KICK_IO_NUM_PAGES     2
>> +#define PVSCSI_MEM_SPACE_MSIX_NUM_PAGES        2
>> +
>> +enum PVSCSIMemSpace {
>> +    PVSCSI_MEM_SPACE_COMMAND_PAGE       = 0,
>> +    PVSCSI_MEM_SPACE_INTR_STATUS_PAGE   = 1,
>> +    PVSCSI_MEM_SPACE_MISC_PAGE          = 2,
>> +    PVSCSI_MEM_SPACE_KICK_IO_PAGE       = 4,
>> +    PVSCSI_MEM_SPACE_MSIX_TABLE_PAGE    = 6,
>> +    PVSCSI_MEM_SPACE_MSIX_PBA_PAGE      = 7,
>> +};
>> +
>> +#define PVSCSI_MEM_SPACE_NUM_PAGES \
>> +    (PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES +       \
>> +     PVSCSI_MEM_SPACE_INTR_STATUS_NUM_PAGES +   \
>> +     PVSCSI_MEM_SPACE_MISC_NUM_PAGES +          \
>> +     PVSCSI_MEM_SPACE_KICK_IO_NUM_PAGES +       \
>> +     PVSCSI_MEM_SPACE_MSIX_NUM_PAGES)
>> +
>> +#define PVSCSI_MEM_SPACE_SIZE        (PVSCSI_MEM_SPACE_NUM_PAGES * PAGE_SIZE)
>> +
>> +#endif /* _VMW_PVSCSI_H_ */
>> --
>> 1.7.9.5
>>

Thanks for the detailed review. I will address all the rest of the
items pointed out in the next version.

Thanks,
Deep
Blue Swirl July 10, 2012, 6:57 p.m. UTC | #3
On Mon, Jul 9, 2012 at 7:31 AM, Deep Debroy <ddebroy@gmail.com> wrote:
> On Sat, Jul 7, 2012 at 1:38 AM, Blue Swirl <blauwirbel@gmail.com> wrote:
>> On Sat, Jul 7, 2012 at 5:07 AM, Deep Debroy <ddebroy@gmail.com> wrote:
>>> Signed-off-by: Deep Debroy <ddebroy@gmail.com>
>>> ---
>>>  default-configs/pci.mak    |    1 +
>>>  docs/specs/pvscsi-spec.txt |   92 ++++
>>>  hw/Makefile.objs           |    1 +
>>>  hw/pci.h                   |    1 +
>>>  hw/pvscsi.c                | 1260 ++++++++++++++++++++++++++++++++++++++++++++
>>>  hw/pvscsi.h                |  442 ++++++++++++++++
>>>  6 files changed, 1797 insertions(+)
>>>  create mode 100644 docs/specs/pvscsi-spec.txt
>>>  create mode 100644 hw/pvscsi.c
>>>  create mode 100644 hw/pvscsi.h
>>>
>>> diff --git a/default-configs/pci.mak b/default-configs/pci.mak
>>> index 9d3e1db..9fd4896 100644
>>> --- a/default-configs/pci.mak
>>> +++ b/default-configs/pci.mak
>>> @@ -10,6 +10,7 @@ CONFIG_EEPRO100_PCI=y
>>>  CONFIG_PCNET_PCI=y
>>>  CONFIG_PCNET_COMMON=y
>>>  CONFIG_LSI_SCSI_PCI=y
>>> +CONFIG_PVSCSI_SCSI_PCI=y
>>>  CONFIG_RTL8139_PCI=y
>>>  CONFIG_E1000_PCI=y
>>>  CONFIG_IDE_CORE=y
>>> diff --git a/docs/specs/pvscsi-spec.txt b/docs/specs/pvscsi-spec.txt
>>> new file mode 100644
>>> index 0000000..27f5529
>>> --- /dev/null
>>> +++ b/docs/specs/pvscsi-spec.txt
>>> @@ -0,0 +1,92 @@
>>> +General Description
>>> +===================
>>> +
>>> +This document describes VMWare PVSCSI device interface specification.
>>> +Created by Dmitry Fleytman (dmitry@daynix.com), Daynix Computing LTD.
>>> +Based on source code of PVSCSI Linux driver from kernel 3.0.4
>>> +
>>> +PVSCSI Device Interface Overview
>>> +================================
>>> +
>>> +The interface is based on memory area shared between hypervisor and VM.
>>> +Memory area is obtained by driver as device IO memory resource of
>>> +PVSCSI_MEM_SPACE_SIZE length.
>>> +The shared memory consists of registers area and rings area.
>>> +The registers area is used to raise hypervisorinterrupts and issue device
>>
>> hypervisor interrupts
>>
>>> +commands. The rings area is used to transfer data descruiptors and SCSI
>>
>> descriptors
>>
>>> +commands from VM to hypervisor and to transfer messages produced by
>>> +hypervisor to VM. Data itself is transferred via virtual scatter-gather DMA.
>>> +
>>> +PVSCSI Device Registers
>>> +=======================
>>> +
>>> +Registers area length is 1 page (PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES).
>>> +Registers area structure is described by PVSCSIRegOffset enumeration.
>>> +There are registers to issue device command (with optional short data),
>>> +issue device interrupt, control interrupts masking.
>>> +
>>> +PVSCSI Device Rings
>>> +===================
>>> +
>>> +There are three rings in shared memory:
>>> +
>>> +    1. Request ring (struct PVSCSIRingReqDesc *req_ring)
>>> +        - ring for OS to device requests
>>> +    2. Completion ring (struct PVSCSIRingCmpDesc *cmp_ring)
>>> +        - ring for device request completions
>>> +    3. Message ring (struct PVSCSIRingMsgDesc *msg_ring)
>>> +        - ring for messages from device.
>>> +       This ring is optional and may be not configured.
>>> +There is a control area (struct PVSCSIRingsState *rings_state) used to control
>>> +rings operation.
>>> +
>>> +PVSCSI Device to Host Interrupts
>>> +================================
>>> +There are following interrupt types supported by PVSCSI device:
>>> +    1. Completion interrupts (completion ring notifications):
>>> +        PVSCSI_INTR_CMPL_0
>>> +        PVSCSI_INTR_CMPL_1
>>> +    2. Message interrupts (message ring notifications):
>>> +        PVSCSI_INTR_MSG_0
>>> +        PVSCSI_INTR_MSG_1
>>> +
>>> +Interrupts are controlled via PVSCSI_REG_OFFSET_INTR_MASK register
>>> +Bit set means interrupt enabled, bit cleared - disabled
>>> +
>>> +Interrupt modes supported are legacy, MSI and MSI-X
>>> +In case of legacy interrupts register PVSCSI_REG_OFFSET_INTR_STATUS
>>> +used to verify interrupt arrival and to clear interrupt state
>>> +Interrupts are cleared by writing processed bits back
>>> +to interrupt status register.
>>> +
>>> +PVSCSI Device Operation Sequences
>>> +=================================
>>> +
>>> +1. Startup sequence:
>>> +    a. Issue PVSCSI_CMD_ADAPTER_RESET command;
>>> +    aa. Windows driver reads interrupt status register here;
>>> +    b. Issue PVSCSI_CMD_SETUP_MSG_RING command with no additional data,
>>> +       check status and disable device messages if error returned;
>>> +       (Omitted if device messages disabled by driver configuration)
>>> +    c. Issue PVSCSI_CMD_SETUP_RINGS command, provide rings configuration
>>> +       as struct PVSCSICmdDescSetupRings;
>>> +    d. Issue PVSCSI_CMD_SETUP_MSG_RING command again, provide
>>> +       rings configuration as struct PVSCSICmdDescSetupMsgRing;
>>> +    e. Unmask completion and message (if device messages enabled) interrupts.
>>> +
>>> +2. Shutdown sequences
>>> +    a. Mask interrupts;
>>> +    b. Flush request ring using PVSCSI_REG_OFFSET_KICK_NON_RW_IO;
>>> +    c. Issue PVSCSI_CMD_ADAPTER_RESET command.
>>> +
>>> +3. Send request
>>> +    a. Fill next free request ring descriptor;
>>> +    b. Issue PVSCSI_REG_OFFSET_KICK_RW_IO for R/W operations;
>>> +       or PVSCSI_REG_OFFSET_KICK_NON_RW_IO for other operations.
>>> +
>>> +4. Abort command
>>> +    a. Issue PVSCSI_CMD_ABORT_CMD command;
>>> +
>>> +5. Request completion processing
>>> +    a. Upon completion interrupt arrival process completion
>>> +       and message (if enabled) rings.
>>> diff --git a/hw/Makefile.objs b/hw/Makefile.objs
>>> index 3d77259..6dd1eea 100644
>>> --- a/hw/Makefile.objs
>>> +++ b/hw/Makefile.objs
>>> @@ -86,6 +86,7 @@ hw-obj-$(CONFIG_OPENCORES_ETH) += opencores_eth.o
>>>
>>>  # SCSI layer
>>>  hw-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o
>>> +hw-obj-$(CONFIG_PVSCSI_SCSI_PCI) += pvscsi.o
>>>  hw-obj-$(CONFIG_ESP) += esp.o
>>>
>>>  hw-obj-y += sysbus.o isa-bus.o
>>> diff --git a/hw/pci.h b/hw/pci.h
>>> index 79d38fd..3c197d9 100644
>>> --- a/hw/pci.h
>>> +++ b/hw/pci.h
>>> @@ -60,6 +60,7 @@
>>>  #define PCI_DEVICE_ID_VMWARE_NET         0x0720
>>>  #define PCI_DEVICE_ID_VMWARE_SCSI        0x0730
>>>  #define PCI_DEVICE_ID_VMWARE_IDE         0x1729
>>> +#define PCI_DEVICE_ID_VMWARE_PVSCSI      0x07C0
>>
>> Please retain numeric order.
>
> The pvscsi driver in the linux guests use the 0x7C0 ID as part of
> MODULE_DEVICE_TABLE. So I will have to keep that intact. The other IDs
> (_NET, _SCSI and _IDE) don't appear to be used by anything in qemu.
> Should I remove them?

I meant that the numbers should be monotonically increasing:
#define PCI_DEVICE_ID_VMWARE_NET         0x0720
#define PCI_DEVICE_ID_VMWARE_SCSI        0x0730
#define PCI_DEVICE_ID_VMWARE_PVSCSI      0x07C0
#define PCI_DEVICE_ID_VMWARE_IDE         0x1729

>>
>>>
>>>  /* Intel (0x8086) */
>>>  #define PCI_DEVICE_ID_INTEL_82551IT      0x1209
>>> diff --git a/hw/pvscsi.c b/hw/pvscsi.c
>>> new file mode 100644
>>> index 0000000..719ea98
>>> --- /dev/null
>>> +++ b/hw/pvscsi.c
>>> @@ -0,0 +1,1260 @@
>>> +/*
>>> + * QEMU VMWARE PVSCSI paravirtual SCSI bus
>>> + *
>>> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
>>> + *
>>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
>>> + *
>>> + * Based on implementation by Paolo Bonzini
>>> + * http://lists.gnu.org/archive/html/qemu-devel/2011-08/msg00729.html
>>> + *
>>> + * Authors:
>>> + * Paolo Bonzini <pbonzini@redhat.com>
>>> + * Dmitry Fleytman <dmitry@daynix.com>
>>> + * Yan Vugenfirer <yvugenfi@redhat.com>
>>> + *
>>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>>> + * See the COPYING file in the top-level directory.
>>> + *
>>> + */
>>> +
>>> +#define DEBUG_PVSCSI_WARNINGS
>>> +#define DEBUG_PVSCSI_ERRORS
>>> +/*
>>> +#define DEBUG_PVSCSI_CALLBACKS
>>> +#define DEBUG_PVSCSI_RINGS
>>> +#define DEBUG_PVSCSI_COMMANDS
>>> +#define DEBUG_PVSCSI_INTERRUPTS
>>> +#define DEBUG_PVSCSI_SHMEM_ACCESS
>>> +#define DEBUG_PVSCSI_SHMEM_ACCESS
>>> +#define DEBUG_SCSI_EXCHANGE
>>> +*/
>>
>> Please use //, it's easier to enable one line in the middle. With
>> tracepoints there would be no need.
>
> Sounds good. Will modify the code to use tracepoints.
>>
>>> +
>>> +#ifdef DEBUG_PVSCSI_SHMEM_ACCESS
>>> +#define DSHPRINTF(fmt, ...)                                                 \
>>> +    do {                                                                    \
>>> +        printf("[pvscsi][SH][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
>>> +    } while (0)
>>> +#else
>>> +#define DSHPRINTF(fmt, ...) do {} while (0)
>>> +#endif
>>> +
>>> +#ifdef DEBUG_PVSCSI_CALLBACKS
>>> +#define DCBPRINTF(fmt, ...)                                                 \
>>> +    do {                                                                    \
>>> +        printf("[pvscsi][CB][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
>>> +    } while (0)
>>> +#else
>>> +#define DCBPRINTF(fmt, ...) do {} while (0)
>>> +#endif
>>> +
>>> +#ifdef DEBUG_PVSCSI_WARNINGS
>>> +#define DWRPRINTF(fmt, ...)                                                 \
>>> +    do {                                                                    \
>>> +        printf("[pvscsi][WR][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
>>> +    } while (0)
>>> +#else
>>> +#define DWRPRINTF(fmt, ...) do {} while (0)
>>> +#endif
>>
>> Please check if qemu_log(LOG_UNIMP,...) would make sense for the cases
>> where the guest may be issuing commands which QEMU doesn't implement
>> yet.
>>
>>> +
>>> +#ifdef DEBUG_PVSCSI_ERRORS
>>> +#define DERPRINTF(fmt, ...)                                                 \
>>> +    do {                                                                    \
>>> +        printf("[pvscsi][ER][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
>>> +    } while (0)
>>> +#else
>>> +#define DERPRINTF(fmt, ...) do {} while (0)
>>> +#endif
>>> +
>>> +#ifdef DEBUG_PVSCSI_COMMANDS
>>> +#define DCMPRINTF(fmt, ...)                                                 \
>>> +    do {                                                                    \
>>> +        printf("[pvscsi][CM][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
>>> +    } while (0)
>>> +#else
>>> +#define DCMPRINTF(fmt, ...) do {} while (0)
>>> +#endif
>>> +
>>> +#ifdef DEBUG_SCSI_EXCHANGE
>>> +#define DSXPRINTF(fmt, ...)                                                 \
>>> +    do {                                                                    \
>>> +        printf("[pvscsi][SX][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
>>> +    } while (0)
>>> +#else
>>> +#define DSXPRINTF(fmt, ...) do {} while (0)
>>> +#endif
>>> +
>>> +#ifdef DEBUG_PVSCSI_RINGS
>>> +#define DRIPRINTF(fmt, ...)                                                 \
>>> +    do {                                                                    \
>>> +        printf("[pvscsi][RI][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
>>> +    } while (0)
>>> +#else
>>> +#define DRIPRINTF(fmt, ...) do {} while (0)
>>> +#endif
>>> +
>>> +#ifdef DEBUG_PVSCSI_INTERRUPTS
>>> +#define DIRPRINTF(fmt, ...)                                                 \
>>> +    do {                                                                    \
>>> +        printf("[pvscsi][IR][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
>>> +    } while (0)
>>> +#else
>>> +#define DIRPRINTF(fmt, ...) do {} while (0)
>>> +#endif
>>> +
>>> +/*
>>> + * MSI-X support is disabled because it leads Windows OS to crash on startup.
>>> + * The crash happens because Windows driver requires MSI-X shared memory
>>> + * to be part of the same BAR used for rings state, registers, etc.
>>> + * This is not supported by QEMU infrastructure so separate BAR created from
>>> + * MSI-X purposes. Windows driver fails to deal with 2 BARs.
>>> + * Linux works fine with current MSI-X implementation enabled.
>>> + */
>>> +#undef PVSCSI_ENABLE_MSIX
>>> +
>>> +#include "softfloat.h"
>>> +#include "compiler.h"
>>> +#include "scsi-defs.h"
>>> +#include "hw/hw.h"
>>> +#include "hw/pci.h"
>>> +#include "hw/scsi.h"
>>> +#ifdef PVSCSI_ENABLE_MSIX
>>
>> Please remove.
>
> For now, I think I will remove MSIX support altogether from pvscsi on
> the qemu side so that both Linux and Windows guests can work using
> regular MSI. If I can figure out a way to get the Windows driver to
> work with MSIX, I will introduce that later. Does that sound good?

That's OK too. I was proposing that the MSIX #ifdeffery should be kept minimal.

>>
>>> +#include "msix.h"
>>> +#endif
>>> +#include "msi.h"
>>> +#include "qemu-queue.h"
>>> +#include "pvscsi.h"
>>> +#include "vmware_utils.h"
>>
>> The include lines should be just after the copyright header.
>>
>>> +
>>> +#define PVSCSI_MAX_DEVS                   (64)
>>> +#define PVSCSI_MSIX_NUM_VECTORS           (1)
>>> +
>>> +typedef struct {
>>
>> typedef struct PVSCSIRingsMgr {
>>
>>> +    uint64_t            rs_pa;
>>> +
>>> +    uint32_t            txr_len_mask;
>>> +    uint32_t            rxr_len_mask;
>>> +    uint64_t            req_ring_pages_pa[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
>>> +    uint64_t            cmp_ring_pages_pa[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
>>> +    uint64_t            consumed_ptr;
>>> +    uint64_t            filled_cmp_ptr;
>>> +} PVSCSIRingsMgr;
>>> +
>>> +#define _RS_GET_FIELD(rs_pa, field) \
>>> +    (vmw_shmem_ld32(rs_pa + offsetof(struct PVSCSIRingsState, field)))
>>> +#define _RS_SET_FIELD(rs_pa, field, val) \
>>> +    (vmw_shmem_st32(rs_pa + offsetof(struct PVSCSIRingsState, field), val))
>>
>> Don't use identifiers with leading underscores.
>>
>>> +
>>> +/* Integer binary logarithm */
>>> +static int
>>> +pvscsi_log2(uint32_t input)
>>> +{
>>> +    int log = 0;
>>> +    assert(input > 0);
>>> +    while (input >> ++log) {
>>> +
>>> +        };
>>
>> Useless semicolon.
>>
>>> +    return log;
>>> +}
>>> +
>>> +static void
>>> +pvscsi_rings_mgr_init(PVSCSIRingsMgr *m, struct PVSCSICmdDescSetupRings *ri)
>>> +{
>>> +    int i;
>>> +    uint32_t txr_len_log2, rxr_len_log2;
>>> +    uint32_t req_ring_size, cmp_ring_size;
>>> +    m->rs_pa = ri->ringsStatePPN << PAGE_SHIFT;
>>> +
>>> +    req_ring_size = ri->reqRingNumPages * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
>>> +    cmp_ring_size = ri->cmpRingNumPages * PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
>>> +    txr_len_log2 = pvscsi_log2(req_ring_size - 1);
>>> +    rxr_len_log2 = pvscsi_log2(cmp_ring_size - 1);
>>> +
>>> +    m->txr_len_mask = MASK(txr_len_log2);
>>> +    m->rxr_len_mask = MASK(rxr_len_log2);
>>> +
>>> +    m->consumed_ptr = 0;
>>> +    m->filled_cmp_ptr = 0;
>>> +
>>> +    for (i = 0; i < ri->reqRingNumPages; i++) {
>>> +        m->req_ring_pages_pa[i] = ri->reqRingPPNs[i] << PAGE_SHIFT;
>>> +    }
>>> +
>>> +    for (i = 0; i < ri->cmpRingNumPages; i++) {
>>> +        m->cmp_ring_pages_pa[i] = ri->cmpRingPPNs[i] << PAGE_SHIFT;
>>> +    }
>>> +
>>> +    _RS_SET_FIELD(m->rs_pa, reqProdIdx, 0);
>>> +    _RS_SET_FIELD(m->rs_pa, reqConsIdx, 0);
>>> +    _RS_SET_FIELD(m->rs_pa, reqNumEntriesLog2, txr_len_log2);
>>> +
>>> +    _RS_SET_FIELD(m->rs_pa, cmpProdIdx, 0);
>>> +    _RS_SET_FIELD(m->rs_pa, cmpConsIdx, 0);
>>> +    _RS_SET_FIELD(m->rs_pa, cmpNumEntriesLog2, rxr_len_log2);
>>> +
>>> +    _RS_SET_FIELD(m->rs_pa, msgProdIdx, 0);
>>> +    _RS_SET_FIELD(m->rs_pa, msgConsIdx, 0);
>>> +    _RS_SET_FIELD(m->rs_pa, msgNumEntriesLog2, 0);
>>> +
>>> +    DRIPRINTF("TX/RX rings logarithms set to %d/%d",
>>> +              txr_len_log2, rxr_len_log2);
>>> +
>>> +    /* Flush ring state page changes */
>>> +    smp_wmb();
>>> +}
>>> +
>>> +static void
>>> +pvscsi_rings_mgr_cleanup(PVSCSIRingsMgr *mgr)
>>> +{
>>> +    mgr->rs_pa = 0;
>>> +    mgr->txr_len_mask = 0;
>>> +    mgr->consumed_ptr = 0;
>>> +    mgr->filled_cmp_ptr = 0;
>>> +    memset(mgr->req_ring_pages_pa, 0, sizeof(mgr->req_ring_pages_pa));
>>> +    memset(mgr->cmp_ring_pages_pa, 0, sizeof(mgr->cmp_ring_pages_pa));
>>> +}
>>> +
>>> +static inline target_phys_addr_t
>>
>> 'inline' may be premature optimization.
>>
>>> +pvscsi_rings_mgr_pop_req_descr(PVSCSIRingsMgr *mgr)
>>> +{
>>> +    uint32_t ready_prt = _RS_GET_FIELD(mgr->rs_pa, reqProdIdx);
>>> +
>>> +    if (ready_prt != mgr->consumed_ptr) {
>>> +        uint32_t next_ready_ptr =
>>> +            mgr->consumed_ptr++ & mgr->txr_len_mask;
>>> +        uint32_t next_ready_page =
>>> +            next_ready_ptr / PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
>>> +        uint32_t inpage_idx =
>>> +            next_ready_ptr % PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
>>> +
>>> +        return mgr->req_ring_pages_pa[next_ready_page] +
>>> +               inpage_idx * sizeof(PVSCSIRingReqDesc);
>>> +    } else {
>>> +        return 0;
>>> +    }
>>> +}
>>> +
>>> +static inline void
>>> +pvscsi_rings_mgr_flush_req_ring(PVSCSIRingsMgr *mgr)
>>> +{
>>> +    _RS_SET_FIELD(mgr->rs_pa, reqConsIdx, mgr->consumed_ptr);
>>> +}
>>> +
>>> +static inline target_phys_addr_t
>>> +pvscsi_rings_mgr_pop_cmp_descr(PVSCSIRingsMgr *mgr)
>>> +{
>>> +    /*
>>> +     * According to Linux driver code it explicitly verifies that number
>>> +     * of requests being processed by device is less then the size of
>>> +     * completion queue, so device may omit completion queue overflow
>>> +     * conditions check. We assume that this is true for other (Windows)
>>> +     * drivers as well.
>>> +     */
>>> +
>>> +    uint32_t free_cmp_ptr =
>>> +        mgr->filled_cmp_ptr++ & mgr->rxr_len_mask;
>>> +    uint32_t free_cmp_page =
>>> +        free_cmp_ptr / PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
>>> +    uint32_t inpage_idx =
>>> +        free_cmp_ptr % PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
>>> +    return mgr->cmp_ring_pages_pa[free_cmp_page] +
>>> +           inpage_idx * sizeof(PVSCSIRingCmpDesc);
>>> +}
>>> +
>>> +static inline void
>>> +pvscsi_rings_mgr_flush_cmp_ring(PVSCSIRingsMgr *mgr)
>>> +{
>>> +    /* Flush descriptor changes */
>>> +    smp_wmb();
>>> +
>>> +    DRIPRINTF("New production counter of completion ring is %" PRIx64,
>>> +              mgr->filled_cmp_ptr);
>>> +
>>> +    _RS_SET_FIELD(mgr->rs_pa, cmpProdIdx, mgr->filled_cmp_ptr);
>>> +}
>>> +
>>> +typedef QTAILQ_HEAD(, PVSCSIRequest) PVSCSIRequestList;
>>> +#define PVSCSI_MAX_CMD_DATA_WORDS \
>>> +    (sizeof(struct PVSCSICmdDescSetupRings)/sizeof(uint32_t))
>>> +
>>> +typedef struct {
>>> +    PCIDevice dev;
>>> +    MemoryRegion io_space;
>>> +    SCSIBus bus;
>>> +    QEMUBH *completion_worker;
>>> +    PVSCSIRequestList pending_queue;
>>> +    PVSCSIRequestList completion_queue;
>>> +
>>> +#ifdef PVSCSI_ENABLE_MSIX
>>
>> I think this can be removed.
>>
>>> +    MemoryRegion msix_space;
>>> +    uint8_t msix_used;   /* Whether MSI-X support was installed successfully */
>>> +#endif
>>> +    uint8_t msi_used;    /* Whether MSI support was installed successfully   */
>>> +    uint64_t reg_interrupt_status;        /* Interrupt status register value */
>>> +    uint64_t reg_interrupt_enabled;       /* Interrupt mask register value   */
>>> +    uint64_t reg_command_status;          /* Command status register value   */
>>> +
>>> +    /* Command data adoption mechanism */
>>> +    uint64_t curr_cmd;                   /* Last command arrived             */
>>> +    uint32_t curr_cmd_data_cntr;      /* Amount of data for last command  */
>>> +
>>> +    /* Collector for current command data */
>>> +    uint32_t curr_cmd_data[PVSCSI_MAX_CMD_DATA_WORDS];
>>> +
>>> +    uint8_t rings_info_valid;            /* Whether rings are initialized    */
>>> +    PVSCSIRingsMgr rings;                /* Data transfer rings manager      */
>>> +} PVSCSI_State;
>>
>> The order of the fields could be optimized, there are a lot of
>> structure holes due to alignment.
>>
>>> +
>>> +static void
>>> +pvscsi_reset_state(PVSCSI_State *s)
>>> +{
>>> +    s->curr_cmd = PVSCSI_CMD_FIRST;
>>> +    s->curr_cmd_data_cntr = 0;
>>> +    s->reg_command_status = PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
>>> +    s->reg_interrupt_status = 0;
>>> +    pvscsi_rings_mgr_cleanup(&s->rings);
>>> +    s->rings_info_valid = FALSE;
>>> +    QTAILQ_INIT(&s->pending_queue);
>>> +    QTAILQ_INIT(&s->completion_queue);
>>> +}
>>> +
>>> +typedef struct PVSCSISGState {
>>> +    target_phys_addr_t elemAddr;
>>> +    target_phys_addr_t dataAddr;
>>> +    uint32_t resid;
>>> +} PVSCSISGState;
>>> +
>>> +typedef struct PVSCSIRequest {
>>> +    SCSIRequest *sreq;
>>> +    uint8_t sense_key;
>>> +    uint8_t completed;
>>> +    int lun;
>>> +    QEMUSGList sgl;
>>> +    PVSCSISGState sg;
>>> +    struct PVSCSIRingReqDesc req;
>>> +    struct PVSCSIRingCmpDesc cmp;
>>> +    QTAILQ_ENTRY(PVSCSIRequest) next;
>>> +} PVSCSIRequest;
>>> +
>>> +static void
>>> +pvscsi_free_queue(PVSCSIRequestList *req_list)
>>> +{
>>> +    PVSCSIRequest *pvscsi_req;
>>> +
>>> +    while (!QTAILQ_EMPTY(req_list)) {
>>> +        pvscsi_req = QTAILQ_FIRST(req_list);
>>> +        QTAILQ_REMOVE(req_list, pvscsi_req, next);
>>> +        g_free(pvscsi_req);
>>> +    }
>>> +}
>>> +
>>> +static void
>>> +pvscsi_reset_adapter(PVSCSI_State *s)
>>> +{
>>> +    qbus_reset_all_fn(&s->bus);
>>> +    pvscsi_free_queue(&s->completion_queue);
>>> +    assert(QTAILQ_EMPTY(&s->pending_queue));
>>> +    pvscsi_reset_state(s);
>>> +}
>>> +
>>> +static inline void
>>> +pvscsi_update_irq_status(PVSCSI_State *s)
>>> +{
>>> +    int should_raise = !!(s->reg_interrupt_enabled & s->reg_interrupt_status);
>>
>> bool
>>
>>> +
>>> +    DIRPRINTF("Interrupt level set to %d (MASK: 0x%llX, STATUS: 0x%llx)",
>>> +              should_raise,
>>> +              (long long unsigned int) s->reg_interrupt_enabled,
>>> +              (long long unsigned int) s->reg_interrupt_status);
>>
>> Just use PRIx64 and remove the casts.
>>
>>> +
>>> +#ifdef PVSCSI_ENABLE_MSIX
>>> +    if (s->msix_used && msix_enabled(&s->dev)) {
>>> +        if (should_raise) {
>>> +            DIRPRINTF("Sending MSI-X notification");
>>> +            msix_notify(&s->dev, PVSCSI_VECTOR_COMPLETION);
>>> +        }
>>> +        return;
>>> +    }
>>> +#endif
>>> +
>>> +    if (s->msi_used && msi_enabled(&s->dev)) {
>>> +        if (should_raise) {
>>> +            DIRPRINTF("Sending MSI notification");
>>> +            msi_notify(&s->dev, PVSCSI_VECTOR_COMPLETION);
>>> +        }
>>> +        return;
>>> +    }
>>> +
>>> +    qemu_set_irq(s->dev.irq[0], should_raise);
>>> +}
>>> +
>>> +static inline void
>>> +pvscsi_raise_completion_interrupt(PVSCSI_State *s)
>>> +{
>>> +    s->reg_interrupt_status |= PVSCSI_INTR_CMPL_0;
>>> +
>>> +    /* Memory barrier to flush interrupt status register changes*/
>>> +    smp_wmb();
>>> +
>>> +    pvscsi_update_irq_status(s);
>>> +}
>>> +
>>> +static void
>>> +pvscsi_cmp_ring_put(PVSCSI_State *s, struct PVSCSIRingCmpDesc *cmp_desc)
>>> +{
>>> +    target_phys_addr_t cmp_descr_pa;
>>> +
>>> +    cmp_descr_pa = pvscsi_rings_mgr_pop_cmp_descr(&s->rings);
>>> +    DRIPRINTF("Got completion descriptor %"PRIx64, cmp_descr_pa);
>>> +
>>> +    vmw_shmem_write(cmp_descr_pa, (void *)cmp_desc, sizeof(*cmp_desc));
>>> +}
>>> +
>>> +static void
>>> +pvscsi_process_completion_queue(void *opaque)
>>> +{
>>> +    PVSCSI_State *s = opaque;
>>> +    PVSCSIRequest *pvscsi_req;
>>> +    bool has_completed = false;
>>> +
>>> +    while (!QTAILQ_EMPTY(&s->completion_queue)) {
>>> +        pvscsi_req = QTAILQ_FIRST(&s->completion_queue);
>>> +        QTAILQ_REMOVE(&s->completion_queue, pvscsi_req, next);
>>> +        pvscsi_cmp_ring_put(s, &pvscsi_req->cmp);
>>> +        g_free(pvscsi_req);
>>> +        has_completed++;
>>> +    }
>>> +
>>> +    if (has_completed) {
>>> +        pvscsi_rings_mgr_flush_cmp_ring(&s->rings);
>>> +        pvscsi_raise_completion_interrupt(s);
>>> +    }
>>> +}
>>> +
>>> +static inline void
>>> +pvscsi_schedule_completion_processing(PVSCSI_State *s)
>>> +{
>>> +    /* Try putting more complete requests on the ring. */
>>> +    if (!QTAILQ_EMPTY(&s->completion_queue)) {
>>> +        qemu_bh_schedule(s->completion_worker);
>>> +    }
>>> +}
>>> +
>>> +static inline void
>>> +pvscsi_complete_request(PVSCSI_State *s, PVSCSIRequest *r)
>>> +{
>>> +    assert(!r->completed);
>>> +    DSXPRINTF("Completion: ctx: %"PRIx64", len: %"PRIx64", sense key: %u",
>>> +              r->cmp.context, r->cmp.dataLen, r->sense_key);
>>> +    if (r->sreq != NULL) {
>>> +        scsi_req_unref(r->sreq);
>>> +        r->sreq = NULL;
>>> +    }
>>> +    r->completed = 1;
>>> +    QTAILQ_REMOVE(&s->pending_queue, r, next);
>>> +    QTAILQ_INSERT_TAIL(&s->completion_queue, r, next);
>>> +    pvscsi_schedule_completion_processing(s);
>>> +}
>>> +
>>> +static QEMUSGList *pvscsi_get_sg_list(SCSIRequest *r)
>>> +{
>>> +    PVSCSIRequest *req = r->hba_private;
>>> +
>>> +    DSXPRINTF("Get SG list: depth: %u, size: %lu",
>>> +              req->sgl.nsg, req->sgl.size);
>>> +
>>> +    return &req->sgl;
>>> +}
>>> +
>>> +static void
>>> +pvscsi_get_next_sg_elem(PVSCSISGState *sg)
>>> +{
>>> +    struct PVSCSISGElement elem;
>>> +
>>> +    for (;; sg->elemAddr = elem.addr) {
>>> +        vmw_shmem_read(sg->elemAddr, (void *)&elem, sizeof(elem));
>>> +        if (0 != (elem.flags & ~PVSCSI_KNOWN_FLAGS)) {
>>
>> The order in the expression is unnatural, please reverse.
>>
>>> +            /*
>>> +             * There is PVSCSI_SGE_FLAG_CHAIN_ELEMENT flag described in
>>> +             * header file but its value is unknown. This flag requires
>>> +             * additional processing, so we put warning here to catch it
>>> +             * some day and make proper implementation
>>> +             */
>>> +            DWRPRINTF("Unknown flags in SG element (val: 0x%X)", elem.flags);
>>
>> Lowercase hex, please.
>>
>>> +        }
>>> +        break;
>>> +    }
>>> +
>>> +    sg->elemAddr += sizeof(elem);
>>> +    sg->dataAddr = elem.addr;
>>> +    sg->resid = elem.length;
>>> +}
>>> +
>>> +static inline void
>>> +pvscsi_write_sense(PVSCSIRequest *r, uint8_t *sense, int len)
>>> +{
>>> +    r->cmp.senseLen = MIN(r->req.senseLen, len);
>>> +    r->sense_key = sense[2];
>>> +    vmw_shmem_write(r->req.senseAddr, sense, r->cmp.senseLen);
>>> +}
>>> +
>>> +static void
>>> +pvscsi_command_complete(SCSIRequest *req, uint32_t status, size_t resid)
>>> +{
>>> +    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev.qdev, req->bus->qbus.parent);
>>> +    PVSCSIRequest *pvscsi_req = req->hba_private;
>>> +
>>> +    if (!pvscsi_req) {
>>> +        DERPRINTF("PVSCSI: Can't find request for tag 0x%x", req->tag);
>>> +        return;
>>> +    }
>>> +
>>> +    if (resid) {
>>> +        /* Short transfer.  */
>>> +        DSXPRINTF("Not all data required for command transferred.");
>>> +        pvscsi_req->cmp.hostStatus = BTSTAT_DATARUN;
>>> +    }
>>> +
>>> +    pvscsi_req->cmp.scsiStatus = status;
>>> +    if (CHECK_CONDITION == pvscsi_req->cmp.scsiStatus) {
>>
>> Order
>>
>>> +        uint8_t sense[SCSI_SENSE_BUF_SIZE];
>>> +        int sense_len =
>>> +            scsi_req_get_sense(pvscsi_req->sreq, sense, sizeof(sense));
>>> +
>>> +        DSXPRINTF("Sense information length is %d bytes", sense_len);
>>> +        pvscsi_write_sense(pvscsi_req, sense, sense_len);
>>> +    }
>>> +    qemu_sglist_destroy(&pvscsi_req->sgl);
>>> +    pvscsi_complete_request(s, pvscsi_req);
>>> +}
>>> +
>>> +static void
>>> +pvscsi_request_cancelled(SCSIRequest *req)
>>> +{
>>> +    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev.qdev, req->bus->qbus.parent);
>>> +    PVSCSIRequest *pvscsi_req = req->hba_private;
>>> +
>>> +    if (BTSTAT_SUCCESS == pvscsi_req->cmp.hostStatus) {
>>> +        pvscsi_req->cmp.hostStatus = BTSTAT_ABORTQUEUE;
>>> +    }
>>> +    pvscsi_complete_request(s, pvscsi_req);
>>> +}
>>> +
>>> +static inline SCSIDevice*
>>> +pvscsi_device_find(PVSCSI_State *s, int channel, int target,
>>> +                   uint8_t *requested_lun, uint8_t *target_lun)
>>> +{
>>> +    if (requested_lun[0] || requested_lun[2] || requested_lun[3] ||
>>> +        requested_lun[4] || requested_lun[5] || requested_lun[6] ||
>>> +        requested_lun[7] || (PVSCSI_MAX_DEVS < target)) {
>>> +        return NULL;
>>> +    } else {
>>> +        *target_lun = requested_lun[1];
>>> +        return scsi_device_find(&s->bus, channel, target, *target_lun);
>>> +    }
>>> +}
>>> +
>>> +static PVSCSIRequest *
>>> +pvscsi_queue_pending_descriptor(PVSCSI_State *s, SCSIDevice **d,
>>> +                                struct PVSCSIRingReqDesc *descr)
>>> +{
>>> +    PVSCSIRequest *pvscsi_req;
>>> +    uint8_t lun;
>>> +
>>> +    pvscsi_req = g_malloc0(sizeof(*pvscsi_req));
>>> +    pvscsi_req->req = *descr;
>>> +    pvscsi_req->cmp.context = pvscsi_req->req.context;
>>> +    QTAILQ_INSERT_TAIL(&s->pending_queue, pvscsi_req, next);
>>> +
>>> +    *d = pvscsi_device_find(s, descr->bus, descr->target, descr->lun, &lun);
>>> +    if (!*d) {
>>> +        return pvscsi_req;
>>> +    }
>>> +
>>> +    pvscsi_req->lun = lun;
>>> +    return pvscsi_req;
>>> +}
>>> +
>>> +static void
>>> +pvscsi_convert_sglist(PVSCSIRequest *r)
>>> +{
>>> +    int chunk_size;
>>> +    uint64_t data_length = r->req.dataLen;
>>> +    PVSCSISGState sg = r->sg;
>>> +    while (data_length) {
>>> +        while (!sg.resid) {
>>> +            pvscsi_get_next_sg_elem(&sg);
>>> +            DCMPRINTF("Element: ctx: %"PRIx64", addr: %"PRIx64", len: 0x%ul",
>>> +                      r->req.context, (uint64_t) r->sg.dataAddr, r->sg.resid);
>>
>> TARGET_FMT_plx? Didn't check the type.
>>
>>> +        }
>>> +        assert(data_length > 0);
>>> +        chunk_size = MIN((unsigned) data_length, sg.resid);
>>> +        if (chunk_size) {
>>> +            qemu_sglist_add(&r->sgl, sg.dataAddr, chunk_size);
>>> +        }
>>> +
>>> +        sg.dataAddr += chunk_size;
>>> +        data_length -= chunk_size;
>>> +        sg.resid -= chunk_size;
>>> +    }
>>> +}
>>> +
>>> +static inline void
>>> +pvscsi_build_sglist(PVSCSIRequest *r)
>>> +{
>>> +    qemu_sglist_init(&r->sgl, 1, NULL);
>>> +    if (r->req.flags & PVSCSI_FLAG_CMD_WITH_SG_LIST) {
>>> +        pvscsi_convert_sglist(r);
>>> +    } else {
>>> +        qemu_sglist_add(&r->sgl, r->req.dataAddr, r->req.dataLen);
>>> +    }
>>> +}
>>> +
>>> +static void
>>> +pvscsi_process_request_descriptor(PVSCSI_State *s,
>>> +                                  struct PVSCSIRingReqDesc *descr)
>>> +{
>>> +    SCSIDevice *d;
>>> +    PVSCSIRequest *r = pvscsi_queue_pending_descriptor(s, &d, descr);
>>> +    int64_t n;
>>> +
>>> +    DSXPRINTF("SCSI cmd 0x%X, ctx: %" PRIx64, descr->cdb[0], descr->context);
>>> +
>>> +    if (!d) {
>>> +        r->cmp.hostStatus = BTSTAT_SELTIMEO;
>>> +        DSXPRINTF("Command directed to unknown device rejected.");
>>> +        pvscsi_complete_request(s, r);
>>> +        return;
>>> +    }
>>> +
>>> +    if (descr->flags & PVSCSI_FLAG_CMD_WITH_SG_LIST) {
>>> +        r->sg.elemAddr = descr->dataAddr;
>>> +    }
>>> +
>>> +    r->sreq = scsi_req_new(d, descr->context, r->lun, descr->cdb, r);
>>> +    if (r->sreq->cmd.mode == SCSI_XFER_FROM_DEV &&
>>> +        (descr->flags & PVSCSI_FLAG_CMD_DIR_TODEVICE)) {
>>> +        r->cmp.hostStatus = BTSTAT_BADMSG;
>>> +        DSXPRINTF("Command with invalid transfer direction rejected.");
>>> +        scsi_req_cancel(r->sreq);
>>> +        return;
>>> +    }
>>> +    if (r->sreq->cmd.mode == SCSI_XFER_TO_DEV &&
>>> +        (descr->flags & PVSCSI_FLAG_CMD_DIR_TOHOST)) {
>>> +        r->cmp.hostStatus = BTSTAT_BADMSG;
>>> +        DSXPRINTF("Command with invalid transfer direction rejected.");
>>> +        scsi_req_cancel(r->sreq);
>>> +        return;
>>> +    }
>>> +
>>> +    pvscsi_build_sglist(r);
>>> +    n = scsi_req_enqueue(r->sreq);
>>> +
>>> +    if (n) {
>>> +        scsi_req_continue(r->sreq);
>>> +    }
>>> +}
>>> +
>>> +static void
>>> +pvscsi_process_io(PVSCSI_State *s)
>>> +{
>>> +    PVSCSIRingReqDesc descr;
>>> +    target_phys_addr_t next_descr_pa;
>>> +
>>> +    assert(s->rings_info_valid);
>>> +    while (0 != (next_descr_pa = pvscsi_rings_mgr_pop_req_descr(&s->rings))) {
>>
>> Order
>>
>>> +        DRIPRINTF("Got descriptor %"PRIx64, next_descr_pa);
>>> +        vmw_shmem_read(next_descr_pa, &descr, sizeof(descr));
>>> +        pvscsi_process_request_descriptor(s, &descr);
>>> +    }
>>> +
>>> +    pvscsi_rings_mgr_flush_req_ring(&s->rings);
>>> +}
>>> +
>>> +static void
>>> +pvscsi_dbg_dump_tx_rings_config(struct PVSCSICmdDescSetupRings *rc)
>>> +{
>>> +#ifdef DEBUG_PVSCSI_RINGS
>>> +    int i;
>>> +    DRIPRINTF("TX Rings configuration received:");
>>
>> Since the code is surrounded by #ifdeffery, you can just use fprintf.
>> With tracepoints, the #ifdeffery could be removed.
>>
>>> +    DRIPRINTF("Rings state page: %"PRIx64":", rc->ringsStatePPN);
>>> +
>>> +    DRIPRINTF("Request ring pages: %u:", rc->reqRingNumPages);
>>> +    for (i = 0; i < rc->reqRingNumPages; i++) {
>>> +        DRIPRINTF("\t%"PRIx64" ", rc->reqRingPPNs[i]);
>>> +    }
>>> +
>>> +    DRIPRINTF("Confirm ring pages: %u:", rc->cmpRingNumPages);
>>> +    for (i = 0; i < rc->cmpRingNumPages; i++) {
>>> +        DRIPRINTF("\t%"PRIx64" ", rc->cmpRingPPNs[i]);
>>> +    }
>>> +#endif
>>> +}
>>> +
>>> +static uint64_t
>>> +pvscsi_on_cmd_config(PVSCSI_State *s)
>>> +{
>>> +    DWRPRINTF("Unimplemented command PVSCSI_CMD_CONFIG ignored.");
>>> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
>>> +}
>>> +
>>> +static uint64_t
>>> +pvscsi_on_cmd_unplug(PVSCSI_State *s)
>>> +{
>>> +    DWRPRINTF("Unimplemented command PVSCSI_CMD_DEVICE_UNPLUG ignored.");
>>> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
>>> +}
>>> +
>>> +static uint64_t
>>> +pvscsi_on_issue_scsi(PVSCSI_State *s)
>>> +{
>>> +    DWRPRINTF("Unimplemented command PVSCSI_CMD_ISSUE_SCSI ignored.");
>>> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
>>> +}
>>> +
>>> +static uint64_t
>>> +pvscsi_on_cmd_setup_rings(PVSCSI_State *s)
>>> +{
>>> +    struct PVSCSICmdDescSetupRings *rc =
>>> +        (struct PVSCSICmdDescSetupRings *) s->curr_cmd_data;
>>> +
>>> +    DCMPRINTF("Command PVSCSI_CMD_SETUP_RINGS arrived");
>>> +
>>> +    pvscsi_dbg_dump_tx_rings_config(rc);
>>> +    pvscsi_rings_mgr_init(&s->rings, rc);
>>> +    s->rings_info_valid = TRUE;
>>> +    return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
>>> +}
>>> +
>>> +static uint64_t
>>> +pvscsi_on_cmd_abort(PVSCSI_State *s)
>>> +{
>>> +#ifdef DEBUG_PVSCSI_COMMANDS
>>> +    struct PVSCSICmdDescAbortCmd *data =
>>> +        (struct PVSCSICmdDescAbortCmd *) s->curr_cmd_data;
>>> +
>>> +    DCMPRINTF("PVSCSI_CMD_ABORT_CMD for ctx %"PRIx64", target 0x%X",
>>> +              data->context, data->target);
>>> +#endif
>>> +
>>> +    return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
>>> +}
>>> +
>>> +static uint64_t
>>> +pvscsi_on_cmd_unknown(PVSCSI_State *s)
>>> +{
>>> +    DWRPRINTF("Data for unknown command : 0x%X", s->curr_cmd_data[0]);
>>> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
>>> +}
>>> +
>>> +static uint64_t
>>> +pvscsi_on_cmd_reset_device(PVSCSI_State *s)
>>> +{
>>> +    uint8_t target_lun = 0;
>>> +    struct PVSCSICmdDescResetDevice *cmd =
>>> +        (struct PVSCSICmdDescResetDevice *) s->curr_cmd_data;
>>> +    SCSIDevice *sdev;
>>> +
>>> +    sdev = pvscsi_device_find(s, 0, cmd->target, cmd->lun, &target_lun);
>>> +
>>> +    DWRPRINTF("PVSCSI_CMD_RESET_DEVICE[target 0x%u lun 0x%d (dev 0x%p)]",
>>> +              cmd->target, (int) target_lun, sdev);
>>> +
>>> +    if (sdev != NULL) {
>>> +        device_reset(&sdev->qdev);
>>> +        return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
>>> +    }
>>> +
>>> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
>>> +}
>>> +
>>> +static uint64_t
>>> +pvscsi_on_cmd_reset_bus(PVSCSI_State *s)
>>> +{
>>> +    DCMPRINTF("Command PVSCSI_CMD_RESET_BUS arrived");
>>> +
>>> +    qbus_reset_all_fn(&s->bus);
>>> +    return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
>>> +}
>>> +
>>> +static uint64_t
>>> +pvscsi_on_cmd_setup_msg_ring(PVSCSI_State *s)
>>> +{
>>> +    /* No message ring needed for HDD devices */
>>> +    DCMPRINTF("Command PVSCSI_CMD_SETUP_MSG_RING arrived");
>>> +    return PVSCSI_COMMAND_PROCESSING_FAILED;
>>> +}
>>> +
>>> +static uint64_t
>>> +pvscsi_on_cmd_adapter_reset(PVSCSI_State *s)
>>> +{
>>> +    /* No message ring needed for HDD devices */
>>> +    DCMPRINTF("Command PVSCSI_CMD_ADAPTER_RESET arrived");
>>> +
>>> +    pvscsi_reset_adapter(s);
>>> +    return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
>>> +}
>>> +
>>> +struct {
>>
>> 'static const'
>>
>>> +    int       data_size;
>>> +    uint64_t  (*handler_fn)(PVSCSI_State *s);
>>> +} pvscsi_commands[] = {
>>> +    [PVSCSI_CMD_FIRST] = {
>>> +        .data_size = 0,
>>> +        .handler_fn = pvscsi_on_cmd_unknown
>>
>> Comma at the end of line is more friendly for future patches.
>>
>>> +    },
>>> +
>>> +    /* Not implemented, data size defined based on what arrives on windows */
>>> +    [PVSCSI_CMD_CONFIG] = {
>>> +        .data_size = 6*sizeof(uint32_t),
>>
>> Missing spaces around '*'. Please use checkpatch.pl to catch these.
>>
>>> +        .handler_fn = pvscsi_on_cmd_config
>>> +    },
>>> +
>>> +    /* Command not implemented, data size is unknown */
>>> +    [PVSCSI_CMD_ISSUE_SCSI] = {
>>> +        .data_size = 0,
>>> +        .handler_fn = pvscsi_on_issue_scsi
>>> +    },
>>> +
>>> +    /* Command not implemented, data size is unknown */
>>> +    [PVSCSI_CMD_DEVICE_UNPLUG] = {
>>> +        .data_size = 0,
>>> +        .handler_fn = pvscsi_on_cmd_unplug
>>> +    },
>>> +
>>> +    [PVSCSI_CMD_SETUP_RINGS] = {
>>> +        .data_size = sizeof(struct PVSCSICmdDescSetupRings),
>>> +        .handler_fn = pvscsi_on_cmd_setup_rings
>>> +    },
>>> +
>>> +    [PVSCSI_CMD_RESET_DEVICE] = {
>>> +        .data_size = sizeof(struct PVSCSICmdDescResetDevice),
>>> +        .handler_fn = pvscsi_on_cmd_reset_device
>>> +    },
>>> +
>>> +    [PVSCSI_CMD_RESET_BUS] = {
>>> +        .data_size = 0,
>>> +        .handler_fn = pvscsi_on_cmd_reset_bus
>>> +    },
>>> +
>>> +    [PVSCSI_CMD_SETUP_MSG_RING] = {
>>> +        .data_size = 0,
>>> +        .handler_fn = pvscsi_on_cmd_setup_msg_ring
>>> +    },
>>> +
>>> +    [PVSCSI_CMD_ADAPTER_RESET] = {
>>> +        .data_size = 0,
>>> +        .handler_fn = pvscsi_on_cmd_adapter_reset
>>> +    },
>>> +
>>> +    [PVSCSI_CMD_ABORT_CMD] = {
>>> +        .data_size = sizeof(struct PVSCSICmdDescAbortCmd),
>>> +        .handler_fn = pvscsi_on_cmd_abort
>>> +    }
>>> +};
>>> +
>>> +static void
>>> +pvscsi_do_command_processing(PVSCSI_State *s)
>>> +{
>>> +    size_t bytes_arrived = s->curr_cmd_data_cntr*sizeof(uint32_t);
>>
>> Spacing.
>>
>>> +
>>> +    if (bytes_arrived >= pvscsi_commands[s->curr_cmd].data_size) {
>>> +        s->reg_command_status = pvscsi_commands[s->curr_cmd].handler_fn(s);
>>> +        s->curr_cmd = PVSCSI_CMD_FIRST;
>>> +        s->curr_cmd_data_cntr   = 0;
>>> +    }
>>> +}
>>> +
>>> +static void
>>> +pvscsi_on_command_data(PVSCSI_State *s, uint32_t value)
>>> +{
>>> +    size_t bytes_arrived = s->curr_cmd_data_cntr*sizeof(uint32_t);
>>> +
>>> +    assert(bytes_arrived < sizeof(s->curr_cmd_data));
>>> +    s->curr_cmd_data[s->curr_cmd_data_cntr++] = value;
>>> +
>>> +    pvscsi_do_command_processing(s);
>>> +}
>>> +
>>> +static void
>>> +pvscsi_on_command(PVSCSI_State *s, uint64_t cmd_id)
>>> +{
>>> +    if ((cmd_id > PVSCSI_CMD_FIRST) && (cmd_id < PVSCSI_CMD_LAST)) {
>>> +        s->curr_cmd = cmd_id;
>>> +    } else {
>>> +        s->curr_cmd = PVSCSI_CMD_FIRST;
>>> +        DWRPRINTF("Unknown command %"PRIx64, cmd_id);
>>> +    }
>>> +
>>> +    s->curr_cmd_data_cntr = 0;
>>> +    s->reg_command_status = PVSCSI_COMMAND_PROCESSING_FAILED;
>>> +
>>> +    pvscsi_do_command_processing(s);
>>> +}
>>> +
>>> +static void
>>> +pvscsi_io_write(void *opaque, target_phys_addr_t addr,
>>> +                uint64_t val, unsigned size)
>>> +{
>>> +    PVSCSI_State *s = opaque;
>>> +
>>> +    switch (addr) {
>>> +    case PVSCSI_REG_OFFSET_COMMAND:
>>> +        pvscsi_on_command(s, val);
>>> +        break;
>>> +
>>> +    case PVSCSI_REG_OFFSET_COMMAND_DATA:
>>> +        pvscsi_on_command_data(s, (uint32_t) val);
>>> +        break;
>>> +
>>> +    case PVSCSI_REG_OFFSET_INTR_STATUS:
>>> +        DCBPRINTF("PVSCSI_REG_OFFSET_INTR_STATUS write: %"PRIx64, val);
>>> +        s->reg_interrupt_status &= ~val;
>>> +        pvscsi_update_irq_status(s);
>>> +        pvscsi_schedule_completion_processing(s);
>>> +        break;
>>> +
>>> +    case PVSCSI_REG_OFFSET_INTR_MASK:
>>> +        DCBPRINTF("PVSCSI_REG_OFFSET_INTR_MASK write: %"PRIx64, val);
>>> +        s->reg_interrupt_enabled = val;
>>> +        pvscsi_update_irq_status(s);
>>> +        break;
>>> +
>>> +    case PVSCSI_REG_OFFSET_KICK_NON_RW_IO:
>>> +        DCBPRINTF("PVSCSI_REG_OFFSET_KICK_NON_RW_IO write: %"PRIx64, val);
>>> +        pvscsi_process_io(s);
>>> +        break;
>>> +
>>> +    case PVSCSI_REG_OFFSET_KICK_RW_IO:
>>> +        DCBPRINTF("PVSCSI_REG_OFFSET_KICK_RW_IO write: %"PRIx64, val);
>>> +        pvscsi_process_io(s);
>>> +        break;
>>> +
>>> +    case PVSCSI_REG_OFFSET_DEBUG:
>>> +        DCBPRINTF("PVSCSI_REG_OFFSET_DEBUG write: %"PRIx64, val);
>>> +        break;
>>> +
>>> +    default:
>>> +        DWRPRINTF("Unknown write: %" PRIx64 "%u bytes, val %" PRIx64,
>>> +                  (uint64_t) addr, size, val);
>>
>> TARGET_FMT_plx
>>
>>> +        break;
>>> +    }
>>> +
>>> +}
>>> +
>>> +static uint64_t
>>> +pvscsi_io_read(void *opaque, target_phys_addr_t addr, unsigned size)
>>> +{
>>> +    PVSCSI_State *s = opaque;
>>> +
>>> +    switch (addr) {
>>> +    case PVSCSI_REG_OFFSET_INTR_STATUS:
>>> +        DCBPRINTF("PVSCSI_REG_OFFSET_INTR_STATUS read: %"PRIx64,
>>> +                  s->reg_interrupt_status);
>>> +        return s->reg_interrupt_status;
>>> +
>>> +    case PVSCSI_REG_OFFSET_INTR_MASK:
>>> +        DCBPRINTF("PVSCSI_REG_OFFSET_INTR_MASK read: %"PRIx64,
>>> +                  s->reg_interrupt_enabled);
>>> +        return s->reg_interrupt_enabled;
>>> +
>>> +    case PVSCSI_REG_OFFSET_COMMAND_STATUS:
>>> +        DCBPRINTF("PVSCSI_REG_OFFSET_COMMAND_STATUS read: %"PRIx64,
>>> +                  s->reg_command_status);
>>> +        return s->reg_command_status;
>>> +
>>> +    default:
>>> +        DWRPRINTF("Unknown read, address %" PRIx64 ", %d bytes",
>>> +                  (uint64_t) addr, size);
>>
>> Ditto
>>
>>> +        return 0;
>>> +    }
>>> +}
>>> +
>>> +#ifdef PVSCSI_ENABLE_MSIX
>>> +static bool
>>> +pvscsi_init_msix(PVSCSI_State *s)
>>> +{
>>> +    int res;
>>> +    memory_region_init(&s->msix_space, "pvscsi-msix", PAGE_SIZE);
>>> +    res = msix_init(&s->dev, PVSCSI_MSIX_NUM_VECTORS, &s->msix_space, 1, 0);
>>> +    if (0 > res) {
>>
>> Unnatural order again, also below.
>>
>>> +        DWRPRINTF("Failed to initialize MSI-X, error %d", res);
>>> +        memory_region_destroy(&s->msix_space);
>>> +        s->msix_used = false;
>>> +    } else {
>>> +        res = msix_vector_use(&s->dev, PVSCSI_VECTOR_COMPLETION);
>>> +        if (0 > res) {
>>> +            DWRPRINTF("Failed to use MSI-X vector, error %d", res);
>>> +            msix_uninit(&s->dev, &s->msix_space);
>>> +            memory_region_destroy(&s->msix_space);
>>> +            s->msix_used = false;
>>> +        } else {
>>> +            pci_register_bar(&s->dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY,
>>> +                             &s->msix_space);
>>> +            s->msix_used = true;
>>> +        }
>>> +    }
>>
>> How about moving the #ifdef lower and add here
>> #else
>>   s->msix_used = false;
>> #endif
>>
>>> +    return s->msix_used;
>>> +}
>>> +
>>> +static void
>>> +pvscsi_cleanup_msix(PVSCSI_State *s)
>>> +{
>>
>> and here #ifdef to make the function empty on !msix?
>>
>>> +    if (s->msix_used) {
>>> +        msix_vector_unuse(&s->dev, PVSCSI_VECTOR_COMPLETION);
>>> +        msix_uninit(&s->dev, &s->msix_space);
>>> +        memory_region_destroy(&s->msix_space);
>>> +    }
>>> +}
>>> +#endif
>>> +
>>> +static bool
>>> +pvscsi_init_msi(PVSCSI_State *s)
>>> +{
>>> +#define PVSCSI_MSI_OFFSET        (0x50)
>>> +#define PVSCSI_USE_64BIT         (true)
>>> +#define PVSCSI_PER_VECTOR_MASK   (false)
>>
>> #defines should go to top of the file.
>>
>>> +
>>> +    int res;
>>> +    res = msi_init(&s->dev, PVSCSI_MSI_OFFSET, PVSCSI_MSIX_NUM_VECTORS,
>>> +                   PVSCSI_USE_64BIT, PVSCSI_PER_VECTOR_MASK);
>>> +    if (0 > res) {
>>
>> Order
>>
>>> +        DWRPRINTF("Failed to initialize MSI, error %d", res);
>>> +        s->msi_used = false;
>>> +    } else {
>>> +        s->msi_used = true;
>>> +    }
>>> +
>>> +    return s->msi_used;
>>> +}
>>> +
>>> +static void
>>> +pvscsi_cleanup_msi(PVSCSI_State *s)
>>> +{
>>> +    if (s->msi_used) {
>>> +        msi_uninit(&s->dev);
>>> +    }
>>> +}
>>> +
>>> +#ifdef PVSCSI_ENABLE_MSIX
>>> +
>>> +static void
>>> +pvscsi_msix_save(QEMUFile *f, void *opaque)
>>> +{
>>> +    msix_save(&((PVSCSI_State *)opaque)->dev, f);
>>> +}
>>> +
>>> +static int
>>> +pvscsi_msix_load(QEMUFile *f, void *opaque, int version_id)
>>> +{
>>> +    msix_load(&((PVSCSI_State *)opaque)->dev, f);
>>> +    return 0;
>>> +}
>>> +
>>> +#endif
>>> +
>>> +static int
>>> +pvscsi_init(PCIDevice *dev)
>>> +{
>>> +    static const MemoryRegionOps pv_scsi_ops = {
>>> +            .read = pvscsi_io_read,
>>> +            .write = pvscsi_io_write,
>>> +            .endianness = DEVICE_NATIVE_ENDIAN,
>>> +            .impl = {
>>> +                    .min_access_size = 4,
>>> +                    .max_access_size = 4,
>>> +            },
>>> +    };
>>> +
>>> +    static const struct SCSIBusInfo pvscsi_scsi_info = {
>>> +            .tcq = true,
>>> +            .max_target = PVSCSI_MAX_DEVS,
>>> +            .max_channel = 0,
>>> +            .max_lun = 0,
>>> +
>>> +            .get_sg_list = pvscsi_get_sg_list,
>>> +            .complete = pvscsi_command_complete,
>>> +            .cancel = pvscsi_request_cancelled
>>
>> comma
>>
>>> +    };
>>> +
>>> +    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev, dev);
>>> +
>>> +    DCBPRINTF("Starting init...");
>>> +
>>> +    /* PCI subsystem ID */
>>> +    s->dev.config[PCI_SUBSYSTEM_ID] = 0x00;
>>> +    s->dev.config[PCI_SUBSYSTEM_ID + 1] = 0x10;
>>> +
>>> +    /* PCI latency timer = 255 */
>>> +    s->dev.config[PCI_LATENCY_TIMER] = 0xff;
>>> +
>>> +    /* Interrupt pin A */
>>> +    s->dev.config[PCI_INTERRUPT_PIN] = 0x01;
>>> +
>>> +    memory_region_init_io(&s->io_space, &pv_scsi_ops, s,
>>> +                          "pvscsi-io", PVSCSI_MEM_SPACE_SIZE);
>>> +    pci_register_bar(&s->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->io_space);
>>> +
>>> +#ifdef PVSCSI_ENABLE_MSIX
>>> +    register_savevm(&dev->qdev, "pvscsi-msix", -1, 1,
>>> +                    pvscsi_msix_save, pvscsi_msix_load, s);
>>> +
>>> +    if (!pvscsi_init_msix(s)) {
>>> +        DWRPRINTF("Failed to initialize MSI-X.");
>>> +    }
>>> +#endif
>>
>> With the adjusted #ifdeffery for the functions themselves, this code
>> could be enabled always.
>>
>>> +    if (!pvscsi_init_msi(s)) {
>>> +        DWRPRINTF("Failed to initialize MSI.");
>>> +    }
>>> +
>>> +    s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s);
>>> +    if (!s->completion_worker) {
>>> +        pvscsi_cleanup_msi(s);
>>> +#ifdef PVSCSI_ENABLE_MSIX
>>> +        pvscsi_cleanup_msix(s);
>>> +#endif
>>> +        memory_region_destroy(&s->io_space);
>>> +        return -ENOMEM;
>>> +    }
>>> +
>>> +    scsi_bus_new(&s->bus, &dev->qdev, &pvscsi_scsi_info);
>>> +    pvscsi_reset_state(s);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static int
>>> +pvscsi_uninit(PCIDevice *dev)
>>> +{
>>> +    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev, dev);
>>> +
>>> +    DCBPRINTF("Starting uninit...");
>>> +    qemu_bh_delete(s->completion_worker);
>>> +
>>> +#ifdef PVSCSI_ENABLE_MSIX
>>> +    unregister_savevm(&dev->qdev, "pvscsi-msix", s);
>>> +    pvscsi_cleanup_msix(s);
>>> +#endif
>>> +    pvscsi_cleanup_msi(s);
>>> +
>>> +    memory_region_destroy(&s->io_space);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static void
>>> +pvscsi_reset(DeviceState *dev)
>>> +{
>>> +    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev.qdev, dev);
>>> +    DCBPRINTF("Starting reset...");
>>> +    pvscsi_reset_adapter(s);
>>> +}
>>> +
>>> +static void
>>> +pvscsi_pre_save(void *opaque)
>>> +{
>>> +    PVSCSI_State *s = (PVSCSI_State *) opaque;
>>> +
>>> +    DCBPRINTF("Starting presave...");
>>> +
>>> +    assert(QTAILQ_EMPTY(&s->pending_queue));
>>> +    assert(QTAILQ_EMPTY(&s->completion_queue));
>>> +}
>>> +
>>> +static int
>>> +pvscsi_post_load(void *opaque, int version_id)
>>> +{
>>> +#ifdef PVSCSI_ENABLE_MSIX
>>> +    PVSCSI_State *s = (PVSCSI_State *) opaque;
>>
>> Useless cast.
>>
>>> +#endif
>>> +    DCBPRINTF("Starting postload...");
>>> +
>>> +#ifdef PVSCSI_ENABLE_MSIX
>>> +    if (s->msix_used) {
>>> +        if (!msix_vector_use(&s->dev, PVSCSI_VECTOR_COMPLETION)) {
>>> +            DERPRINTF("Failed to re-use MSI-X vectors");
>>> +            return -1;
>>> +        }
>>> +    }
>>> +#endif
>>> +    return 0;
>>> +}
>>> +
>>> +static VMStateDescription vmstate_pvscsi = {
>>> +        .name = "pvscsi",
>>> +        .version_id = 0,
>>> +        .minimum_version_id = 0,
>>> +        .minimum_version_id_old = 0,
>>> +        .pre_save = pvscsi_pre_save,
>>> +        .post_load = pvscsi_post_load,
>>> +        .fields      = (VMStateField[]) {
>>> +                VMSTATE_PCI_DEVICE(dev, PVSCSI_State),
>>> +#ifdef PVSCSI_ENABLE_MSIX
>>> +                VMSTATE_UINT8(msix_used, PVSCSI_State),
>>> +#endif
>>> +                VMSTATE_UINT8(msi_used, PVSCSI_State),
>>> +                VMSTATE_UINT64(reg_interrupt_status, PVSCSI_State),
>>> +                VMSTATE_UINT64(reg_interrupt_enabled, PVSCSI_State),
>>> +                VMSTATE_UINT64(reg_command_status, PVSCSI_State),
>>> +                VMSTATE_UINT64(curr_cmd, PVSCSI_State),
>>> +                VMSTATE_UINT32(curr_cmd_data_cntr, PVSCSI_State),
>>> +                VMSTATE_UINT32_ARRAY(curr_cmd_data, PVSCSI_State,
>>> +                    ARRAY_SIZE(((PVSCSI_State *)NULL)->curr_cmd_data)),
>>> +                VMSTATE_UINT8(rings_info_valid, PVSCSI_State),
>>> +
>>> +                VMSTATE_UINT64(rings.rs_pa, PVSCSI_State),
>>> +                VMSTATE_UINT32(rings.txr_len_mask, PVSCSI_State),
>>> +                VMSTATE_UINT32(rings.rxr_len_mask, PVSCSI_State),
>>> +                VMSTATE_UINT64_ARRAY(rings.req_ring_pages_pa, PVSCSI_State,
>>> +                    PVSCSI_SETUP_RINGS_MAX_NUM_PAGES),
>>> +                VMSTATE_UINT64_ARRAY(rings.cmp_ring_pages_pa, PVSCSI_State,
>>> +                    PVSCSI_SETUP_RINGS_MAX_NUM_PAGES),
>>> +                VMSTATE_UINT64(rings.consumed_ptr, PVSCSI_State),
>>> +                VMSTATE_UINT64(rings.filled_cmp_ptr, PVSCSI_State),
>>> +
>>> +                VMSTATE_END_OF_LIST()
>>> +            }
>>> +};
>>> +
>>> +static void
>>> +pvscsi_write_config(PCIDevice *pci, uint32_t addr, uint32_t val, int len)
>>> +{
>>> +    pci_default_write_config(pci, addr, val, len);
>>> +    msi_write_config(pci, addr, val, len);
>>> +#if defined(PVSCSI_ENABLE_MSIX)
>>> +    msix_write_config(pci, addr, val, len);
>>> +#endif
>>> +}
>>> +
>>> +static void pvscsi_class_init(ObjectClass *klass, void *data)
>>> +{
>>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>>> +    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
>>> +
>>> +    k->init = pvscsi_init;
>>> +    k->exit = pvscsi_uninit;
>>> +    k->vendor_id = PCI_VENDOR_ID_VMWARE;
>>> +    k->device_id = PCI_DEVICE_ID_VMWARE_PVSCSI;
>>> +    k->class_id = PCI_CLASS_STORAGE_SCSI;
>>> +    k->subsystem_id = 0x1000;
>>> +    dc->reset = pvscsi_reset;
>>> +    dc->vmsd = &vmstate_pvscsi;
>>> +    k->config_write = pvscsi_write_config;
>>> +}
>>> +
>>> +static TypeInfo pvscsi_info = {
>>> +    .name          = "pvscsi",
>>> +    .parent        = TYPE_PCI_DEVICE,
>>> +    .instance_size = sizeof(PVSCSI_State),
>>> +    .class_init    = pvscsi_class_init,
>>> +};
>>> +
>>> +static void
>>> +pvscsi_register_types(void)
>>> +{
>>> +    type_register_static(&pvscsi_info);
>>> +
>>> +    DCBPRINTF("PVSCSI QEMU device emulation registered");
>>> +}
>>> +
>>> +type_init(pvscsi_register_types);
>>> diff --git a/hw/pvscsi.h b/hw/pvscsi.h
>>> new file mode 100644
>>> index 0000000..611c549
>>> --- /dev/null
>>> +++ b/hw/pvscsi.h
>>> @@ -0,0 +1,442 @@
>>> +/*
>>> + * QEMU VMWARE PVSCSI paravirtual SCSI bus
>>> + *
>>> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
>>> + *
>>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
>>> + *
>>> + * Based on implementation by Paolo Bonzini
>>> + * http://lists.gnu.org/archive/html/qemu-devel/2011-08/msg00729.html
>>> + *
>>> + * Authors:
>>> + * Paolo Bonzini <pbonzini@redhat.com>
>>> + * Dmitry Fleytman <dmitry@daynix.com>
>>> + * Yan Vugenfirer <yvugenfi@redhat.com>
>>> + *
>>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>>> + * See the COPYING file in the top-level directory.
>>> + *
>>> + */
>>> +
>>> +#ifndef _PVSCSI_H_
>>> +#define _PVSCSI_H_
>>
>> Underscores
>>
>>> +
>>> +#define PAGE_SIZE (4096)
>>> +#define PAGE_SHIFT (12)
>>
>> These probably conflict with system defines, please rename. Also,
>> define size using shift.
>>
>>> +
>>> +#define PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT 128
>>> +
>>> +#define MASK(n)        ((1 << (n)) - 1)        /* make an n-bit mask */
>>> +
>>> +/*
>>> + * Following is an interface definition for
>>> + * PVSCSI device as provided by VMWARE
>>> + * See original copyright from Linux kernel v3.2.8
>>> + * header file drivers/scsi/vmw_pvscsi.h below.
>>> + */
>>> +
>>> + /*
>>> +  * VMware PVSCSI header file
>>> +  *
>>> +  * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
>>> +  *
>>> +  * This program is free software; you can redistribute it and/or modify it
>>> +  * under the terms of the GNU General Public License as published by the
>>> +  * Free Software Foundation; version 2 of the License and no later version.
>>
>> GPLv2only conflicts with GPLv2+.
>
> Indeed. Please note that this blurb was earlier copy-pasted from the
> driver's code checked into the linux kernel (and it still seems to say
> v2). I will update this with the latest maintainer's name and address.
> However, I was wondering if the copyright blurb needs to be
> copy-pasted here at all or is a reference to the source's location in
> the linux kernel tree good enough?

I'd keep the blurb here. But what I meant is that earlier in the file
you stated the license as GPLv2+, but now this part of the file
conflicts with that. Perhaps the VMware header file should be
separate? Stage the file so that the first commit is original
unmodified VMware file, then a separate commit adds your changes?

>>
>>> +  *
>>> +  * This program is distributed in the hope that it will be useful, but
>>> +  * WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
>>> +  * NON INFRINGEMENT.  See the GNU General Public License for more
>>> +  * details.
>>> +  *
>>> +  * You should have received a copy of the GNU General Public License
>>> +  * along with this program; if not, write to the Free Software
>>> +  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
>>
>> Web version, please.
>>
>>> +  *
>>> +  * Maintained by: Alok N Kataria <address@hidden>
>>
>> <address@hidden>?
>>
>>> +  *
>>> +  */
>>> +
>>> +/*
>>> + * host adapter status/error codes
>>> + */
>>> +enum HostBusAdapterStatus {
>>> +   BTSTAT_SUCCESS       = 0x00,  /* CCB complete normally with no errors */
>>> +   BTSTAT_LINKED_COMMAND_COMPLETED           = 0x0a,
>>> +   BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG = 0x0b,
>>> +   BTSTAT_DATA_UNDERRUN = 0x0c,
>>> +   BTSTAT_SELTIMEO      = 0x11,  /* SCSI selection timeout */
>>> +   BTSTAT_DATARUN       = 0x12,  /* data overrun/underrun */
>>> +   BTSTAT_BUSFREE       = 0x13,  /* unexpected bus free */
>>> +   BTSTAT_INVPHASE      = 0x14,  /* invalid bus phase or sequence */
>>> +                                 /* requested by target           */
>>> +   BTSTAT_LUNMISMATCH   = 0x17,  /* linked CCB has different LUN  */
>>> +                                 /* from first CCB                */
>>> +   BTSTAT_SENSFAILED    = 0x1b,  /* auto request sense failed */
>>> +   BTSTAT_TAGREJECT     = 0x1c,  /* SCSI II tagged queueing message */
>>> +                                 /* rejected by target              */
>>> +   BTSTAT_BADMSG        = 0x1d,  /* unsupported message received by */
>>> +                                 /* the host adapter                */
>>> +   BTSTAT_HAHARDWARE    = 0x20,  /* host adapter hardware failed */
>>> +   BTSTAT_NORESPONSE    = 0x21,  /* target did not respond to SCSI ATN, */
>>> +                                 /* sent a SCSI RST                     */
>>> +   BTSTAT_SENTRST       = 0x22,  /* host adapter asserted a SCSI RST */
>>> +   BTSTAT_RECVRST       = 0x23,  /* other SCSI devices asserted a SCSI RST */
>>> +   BTSTAT_DISCONNECT    = 0x24,  /* target device reconnected improperly */
>>> +                                 /* (w/o tag)                            */
>>> +   BTSTAT_BUSRESET      = 0x25,  /* host adapter issued BUS device reset */
>>> +   BTSTAT_ABORTQUEUE    = 0x26,  /* abort queue generated */
>>> +   BTSTAT_HASOFTWARE    = 0x27,  /* host adapter software error */
>>> +   BTSTAT_HATIMEOUT     = 0x30,  /* host adapter hardware timeout error */
>>> +   BTSTAT_SCSIPARITY    = 0x34,  /* SCSI parity error detected */
>>> +};
>>> +
>>> +/*
>>> + * Register offsets.
>>> + *
>>> + * These registers are accessible both via i/o space and mm i/o.
>>> + */
>>> +
>>> +enum PVSCSIRegOffset {
>>> +    PVSCSI_REG_OFFSET_COMMAND        =    0x0,
>>> +    PVSCSI_REG_OFFSET_COMMAND_DATA   =    0x4,
>>> +    PVSCSI_REG_OFFSET_COMMAND_STATUS =    0x8,
>>> +    PVSCSI_REG_OFFSET_LAST_STS_0     =  0x100,
>>> +    PVSCSI_REG_OFFSET_LAST_STS_1     =  0x104,
>>> +    PVSCSI_REG_OFFSET_LAST_STS_2     =  0x108,
>>> +    PVSCSI_REG_OFFSET_LAST_STS_3     =  0x10c,
>>> +    PVSCSI_REG_OFFSET_INTR_STATUS    = 0x100c,
>>> +    PVSCSI_REG_OFFSET_INTR_MASK      = 0x2010,
>>> +    PVSCSI_REG_OFFSET_KICK_NON_RW_IO = 0x3014,
>>> +    PVSCSI_REG_OFFSET_DEBUG          = 0x3018,
>>> +    PVSCSI_REG_OFFSET_KICK_RW_IO     = 0x4018,
>>> +};
>>> +
>>> +/*
>>> + * Virtual h/w commands.
>>> + */
>>> +
>>> +enum PVSCSICommands {
>>> +    PVSCSI_CMD_FIRST             = 0, /* has to be first */
>>> +
>>> +    PVSCSI_CMD_ADAPTER_RESET     = 1,
>>> +    PVSCSI_CMD_ISSUE_SCSI        = 2,
>>> +    PVSCSI_CMD_SETUP_RINGS       = 3,
>>> +    PVSCSI_CMD_RESET_BUS         = 4,
>>> +    PVSCSI_CMD_RESET_DEVICE      = 5,
>>> +    PVSCSI_CMD_ABORT_CMD         = 6,
>>> +    PVSCSI_CMD_CONFIG            = 7,
>>> +    PVSCSI_CMD_SETUP_MSG_RING    = 8,
>>> +    PVSCSI_CMD_DEVICE_UNPLUG     = 9,
>>> +
>>> +    PVSCSI_CMD_LAST              = 10  /* has to be last */
>>> +};
>>> +
>>> +#define PVSCSI_COMMAND_PROCESSING_SUCCEEDED   (0)
>>> +#define PVSCSI_COMMAND_PROCESSING_FAILED     (-1)
>>> +
>>> +/*
>>> + * Command descriptor for PVSCSI_CMD_RESET_DEVICE --
>>> + */
>>> +
>>> +struct PVSCSICmdDescResetDevice {
>>> +    uint32_t    target;
>>> +    uint8_t     lun[8];
>>> +} QEMU_PACKED;
>>
>> Typedefs missing (also below), please add.
>>
>>> +
>>> +/*
>>> + * Command descriptor for PVSCSI_CMD_ABORT_CMD --
>>> + *
>>> + * - currently does not support specifying the LUN.
>>> + * - _pad should be 0.
>>> + */
>>> +
>>> +struct PVSCSICmdDescAbortCmd {
>>> +    uint64_t    context;
>>> +    uint32_t    target;
>>> +    uint32_t    _pad;
>>> +} QEMU_PACKED;
>>> +
>>> +/*
>>> + * Command descriptor for PVSCSI_CMD_SETUP_RINGS --
>>> + *
>>> + * Notes:
>>> + * - reqRingNumPages and cmpRingNumPages need to be power of two.
>>> + * - reqRingNumPages and cmpRingNumPages need to be different from 0,
>>> + * - reqRingNumPages and cmpRingNumPages need to be inferior to
>>> + *   PVSCSI_SETUP_RINGS_MAX_NUM_PAGES.
>>> + */
>>> +
>>> +#define PVSCSI_SETUP_RINGS_MAX_NUM_PAGES        32
>>> +struct PVSCSICmdDescSetupRings {
>>> +    uint32_t    reqRingNumPages;
>>> +    uint32_t    cmpRingNumPages;
>>> +    uint64_t    ringsStatePPN;
>>> +    uint64_t    reqRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
>>> +    uint64_t    cmpRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
>>> +} QEMU_PACKED;
>>> +
>>> +/*
>>> + * Command descriptor for PVSCSI_CMD_SETUP_MSG_RING --
>>> + *
>>> + * Notes:
>>> + * - this command was not supported in the initial revision of the h/w
>>> + *   interface. Before using it, you need to check that it is supported by
>>> + *   writing PVSCSI_CMD_SETUP_MSG_RING to the 'command' register, then
>>> + *   immediately after read the 'command status' register:
>>> + *       * a value of -1 means that the cmd is NOT supported,
>>> + *       * a value != -1 means that the cmd IS supported.
>>> + *   If it's supported the 'command status' register should return:
>>> + *      sizeof(PVSCSICmdDescSetupMsgRing) / sizeof(uint32_t).
>>> + * - this command should be issued _after_ the usual SETUP_RINGS so that the
>>> + *   RingsState page is already setup. If not, the command is a nop.
>>> + * - numPages needs to be a power of two,
>>> + * - numPages needs to be different from 0,
>>> + * - _pad should be zero.
>>> + */
>>> +
>>> +#define PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES  16
>>> +
>>> +struct PVSCSICmdDescSetupMsgRing {
>>> +    uint32_t    numPages;
>>> +    uint32_t    _pad;
>>
>> Underscore
>>
>>> +    uint64_t    ringPPNs[PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES];
>>> +} QEMU_PACKED;
>>> +
>>> +enum PVSCSIMsgType {
>>> +    PVSCSI_MSG_DEV_ADDED          = 0,
>>> +    PVSCSI_MSG_DEV_REMOVED        = 1,
>>> +    PVSCSI_MSG_LAST               = 2,
>>> +};
>>> +
>>> +/*
>>> + * Msg descriptor.
>>> + *
>>> + * sizeof(struct PVSCSIRingMsgDesc) == 128.
>>> + *
>>> + * - type is of type enum PVSCSIMsgType.
>>> + * - the content of args depend on the type of event being delivered.
>>> + */
>>> +
>>> +struct PVSCSIRingMsgDesc {
>>> +    uint32_t    type;
>>> +    uint32_t    args[31];
>>> +} QEMU_PACKED;
>>> +
>>> +struct PVSCSIMsgDescDevStatusChanged {
>>> +    uint32_t    type;  /* PVSCSI_MSG_DEV _ADDED / _REMOVED */
>>> +    uint32_t    bus;
>>> +    uint32_t    target;
>>> +    uint8_t     lun[8];
>>> +    uint32_t    pad[27];
>>> +} QEMU_PACKED;
>>> +
>>> +/*
>>> + * Rings state.
>>> + *
>>> + * - the fields:
>>> + *    . msgProdIdx,
>>> + *    . msgConsIdx,
>>> + *    . msgNumEntriesLog2,
>>> + *   .. are only used once the SETUP_MSG_RING cmd has been issued.
>>> + * - '_pad' helps to ensure that the msg related fields are on their own
>>> + *   cache-line.
>>> + */
>>> +
>>> +struct PVSCSIRingsState {
>>> +    uint32_t    reqProdIdx;
>>> +    uint32_t    reqConsIdx;
>>> +    uint32_t    reqNumEntriesLog2;
>>> +
>>> +    uint32_t    cmpProdIdx;
>>> +    uint32_t    cmpConsIdx;
>>> +    uint32_t    cmpNumEntriesLog2;
>>> +
>>> +    uint8_t     _pad[104];
>>
>> Underscore.
>>
>>> +
>>> +    uint32_t    msgProdIdx;
>>> +    uint32_t    msgConsIdx;
>>> +    uint32_t    msgNumEntriesLog2;
>>> +} QEMU_PACKED;
>>> +
>>> +/*
>>> + * Request descriptor.
>>> + *
>>> + * sizeof(RingReqDesc) = 128
>>> + *
>>> + * - context: is a unique identifier of a command. It could normally be any
>>> + *   64bit value, however we currently store it in the serialNumber variable
>>> + *   of struct SCSI_Command, so we have the following restrictions due to the
>>> + *   way this field is handled in the vmkernel storage stack:
>>> + *    * this value can't be 0,
>>> + *    * the upper 32bit need to be 0 since serialNumber is as a uint32_t.
>>> + *   Currently tracked as PR 292060.
>>> + * - dataLen: contains the total number of bytes that need to be transferred.
>>> + * - dataAddr:
>>> + *   * if PVSCSI_FLAG_CMD_WITH_SG_LIST is set: dataAddr is the PA of the first
>>> + *     s/g table segment, each s/g segment is entirely contained on a single
>>> + *     page of physical memory,
>>> + *   * if PVSCSI_FLAG_CMD_WITH_SG_LIST is NOT set, then dataAddr is the PA of
>>> + *     the buffer used for the DMA transfer,
>>> + * - flags:
>>> + *   * PVSCSI_FLAG_CMD_WITH_SG_LIST: see dataAddr above,
>>> + *   * PVSCSI_FLAG_CMD_DIR_NONE: no DMA involved,
>>> + *   * PVSCSI_FLAG_CMD_DIR_TOHOST: transfer from device to main memory,
>>> + *   * PVSCSI_FLAG_CMD_DIR_TODEVICE: transfer from main memory to device,
>>> + *   * PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB: reserved to handle CDBs larger than
>>> + *     16bytes. To be specified.
>>> + * - vcpuHint: vcpuId of the processor that will be most likely waiting for the
>>> + *   completion of the i/o. For guest OSes that use lowest priority message
>>> + *   delivery mode (such as windows), we use this "hint" to deliver the
>>> + *   completion action to the proper vcpu. For now, we can use the vcpuId of
>>> + *   the processor that initiated the i/o as a likely candidate for the vcpu
>>> + *   that will be waiting for the completion..
>>> + * - bus should be 0: we currently only support bus 0 for now.
>>> + * - unused should be zero'd.
>>> + */
>>> +
>>> +#define PVSCSI_FLAG_CMD_WITH_SG_LIST        (1 << 0)
>>> +#define PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB     (1 << 1)
>>> +#define PVSCSI_FLAG_CMD_DIR_NONE            (1 << 2)
>>> +#define PVSCSI_FLAG_CMD_DIR_TOHOST          (1 << 3)
>>> +#define PVSCSI_FLAG_CMD_DIR_TODEVICE        (1 << 4)
>>> +
>>> +#define PVSCSI_KNOWN_FLAGS \
>>> +  (PVSCSI_FLAG_CMD_WITH_SG_LIST     | \
>>> +   PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB  | \
>>> +   PVSCSI_FLAG_CMD_DIR_NONE         | \
>>> +   PVSCSI_FLAG_CMD_DIR_TOHOST       | \
>>> +   PVSCSI_FLAG_CMD_DIR_TODEVICE)
>>> +
>>> +struct PVSCSIRingReqDesc {
>>> +    uint64_t    context;
>>> +    uint64_t    dataAddr;
>>> +    uint64_t    dataLen;
>>> +    uint64_t    senseAddr;
>>> +    uint32_t    senseLen;
>>> +    uint32_t    flags;
>>> +    uint8_t     cdb[16];
>>> +    uint8_t     cdbLen;
>>> +    uint8_t     lun[8];
>>> +    uint8_t     tag;
>>> +    uint8_t     bus;
>>> +    uint8_t     target;
>>> +    uint8_t     vcpuHint;
>>> +    uint8_t     unused[59];
>>> +} QEMU_PACKED;
>>> +
>>> +typedef struct PVSCSIRingReqDesc PVSCSIRingReqDesc;
>>> +
>>> +/*
>>> + * Scatter-gather list management.
>>> + *
>>> + * As described above, when PVSCSI_FLAG_CMD_WITH_SG_LIST is set in the
>>> + * RingReqDesc.flags, then RingReqDesc.dataAddr is the PA of the first s/g
>>> + * table segment.
>>> + *
>>> + * - each segment of the s/g table contain a succession of struct
>>> + *   PVSCSISGElement.
>>> + * - each segment is entirely contained on a single physical page of memory.
>>> + * - a "chain" s/g element has the flag PVSCSI_SGE_FLAG_CHAIN_ELEMENT set in
>>> + *   PVSCSISGElement.flags and in this case:
>>> + *     * addr is the PA of the next s/g segment,
>>> + *     * length is undefined, assumed to be 0.
>>> + */
>>> +
>>> +struct PVSCSISGElement {
>>> +    uint64_t    addr;
>>> +    uint32_t    length;
>>> +    uint32_t    flags;
>>> +} QEMU_PACKED;
>>> +
>>> +/*
>>> + * Completion descriptor.
>>> + *
>>> + * sizeof(RingCmpDesc) = 32
>>> + *
>>> + * - context: identifier of the command. The same thing that was specified
>>> + *   under "context" as part of struct RingReqDesc at initiation time,
>>> + * - dataLen: number of bytes transferred for the actual i/o operation,
>>> + * - senseLen: number of bytes written into the sense buffer,
>>> + * - hostStatus: adapter status,
>>> + * - scsiStatus: device status,
>>> + * - _pad should be zero.
>>> + */
>>> +
>>> +struct PVSCSIRingCmpDesc {
>>> +    uint64_t    context;
>>> +    uint64_t    dataLen;
>>> +    uint32_t    senseLen;
>>> +    uint16_t    hostStatus;
>>> +    uint16_t    scsiStatus;
>>> +    uint32_t    _pad[2];
>>> +} QEMU_PACKED;
>>> +typedef struct PVSCSIRingCmpDesc PVSCSIRingCmpDesc;
>>> +
>>> +/*
>>> + * Interrupt status / IRQ bits.
>>> + */
>>> +
>>> +#define PVSCSI_INTR_CMPL_0                 (1 << 0)
>>> +#define PVSCSI_INTR_CMPL_1                 (1 << 1)
>>> +#define PVSCSI_INTR_CMPL_MASK              MASK(2)
>>> +
>>> +#define PVSCSI_INTR_MSG_0                  (1 << 2)
>>> +#define PVSCSI_INTR_MSG_1                  (1 << 3)
>>> +#define PVSCSI_INTR_MSG_MASK               (MASK(2) << 2)
>>> +
>>> +#define PVSCSI_INTR_ALL_SUPPORTED          MASK(4)
>>> +
>>> +/*
>>> + * Number of MSI-X vectors supported.
>>> + */
>>> +#define PVSCSI_MAX_INTRS        24
>>> +
>>> +/*
>>> + * Enumeration of supported MSI-X vectors
>>> + */
>>> +#define PVSCSI_VECTOR_COMPLETION   0
>>> +
>>> +/*
>>> + * Misc constants for the rings.
>>> + */
>>> +
>>> +#define PVSCSI_MAX_NUM_PAGES_REQ_RING   PVSCSI_SETUP_RINGS_MAX_NUM_PAGES
>>> +#define PVSCSI_MAX_NUM_PAGES_CMP_RING   PVSCSI_SETUP_RINGS_MAX_NUM_PAGES
>>> +#define PVSCSI_MAX_NUM_PAGES_MSG_RING   PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES
>>> +
>>> +#define PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE \
>>> +                (PAGE_SIZE / sizeof(struct PVSCSIRingReqDesc))
>>> +
>>> +#define PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE \
>>> +                (PAGE_SIZE / sizeof(PVSCSIRingCmpDesc))
>>> +
>>> +#define PVSCSI_MAX_REQ_QUEUE_DEPTH \
>>> +    (PVSCSI_MAX_NUM_PAGES_REQ_RING * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE)
>>> +
>>> +#define PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES     1
>>> +#define PVSCSI_MEM_SPACE_INTR_STATUS_NUM_PAGES 1
>>> +#define PVSCSI_MEM_SPACE_MISC_NUM_PAGES        2
>>> +#define PVSCSI_MEM_SPACE_KICK_IO_NUM_PAGES     2
>>> +#define PVSCSI_MEM_SPACE_MSIX_NUM_PAGES        2
>>> +
>>> +enum PVSCSIMemSpace {
>>> +    PVSCSI_MEM_SPACE_COMMAND_PAGE       = 0,
>>> +    PVSCSI_MEM_SPACE_INTR_STATUS_PAGE   = 1,
>>> +    PVSCSI_MEM_SPACE_MISC_PAGE          = 2,
>>> +    PVSCSI_MEM_SPACE_KICK_IO_PAGE       = 4,
>>> +    PVSCSI_MEM_SPACE_MSIX_TABLE_PAGE    = 6,
>>> +    PVSCSI_MEM_SPACE_MSIX_PBA_PAGE      = 7,
>>> +};
>>> +
>>> +#define PVSCSI_MEM_SPACE_NUM_PAGES \
>>> +    (PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES +       \
>>> +     PVSCSI_MEM_SPACE_INTR_STATUS_NUM_PAGES +   \
>>> +     PVSCSI_MEM_SPACE_MISC_NUM_PAGES +          \
>>> +     PVSCSI_MEM_SPACE_KICK_IO_NUM_PAGES +       \
>>> +     PVSCSI_MEM_SPACE_MSIX_NUM_PAGES)
>>> +
>>> +#define PVSCSI_MEM_SPACE_SIZE        (PVSCSI_MEM_SPACE_NUM_PAGES * PAGE_SIZE)
>>> +
>>> +#endif /* _VMW_PVSCSI_H_ */
>>> --
>>> 1.7.9.5
>>>
>
> Thanks for the detailed review. I will address all the rest of the
> items pointed out in the next version.
>
> Thanks,
> Deep
diff mbox

Patch

diff --git a/default-configs/pci.mak b/default-configs/pci.mak
index 9d3e1db..9fd4896 100644
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak
@@ -10,6 +10,7 @@  CONFIG_EEPRO100_PCI=y
 CONFIG_PCNET_PCI=y
 CONFIG_PCNET_COMMON=y
 CONFIG_LSI_SCSI_PCI=y
+CONFIG_PVSCSI_SCSI_PCI=y
 CONFIG_RTL8139_PCI=y
 CONFIG_E1000_PCI=y
 CONFIG_IDE_CORE=y
diff --git a/docs/specs/pvscsi-spec.txt b/docs/specs/pvscsi-spec.txt
new file mode 100644
index 0000000..27f5529
--- /dev/null
+++ b/docs/specs/pvscsi-spec.txt
@@ -0,0 +1,92 @@ 
+General Description
+===================
+
+This document describes VMWare PVSCSI device interface specification.
+Created by Dmitry Fleytman (dmitry@daynix.com), Daynix Computing LTD.
+Based on source code of PVSCSI Linux driver from kernel 3.0.4
+
+PVSCSI Device Interface Overview
+================================
+
+The interface is based on memory area shared between hypervisor and VM.
+Memory area is obtained by driver as device IO memory resource of
+PVSCSI_MEM_SPACE_SIZE length.
+The shared memory consists of registers area and rings area.
+The registers area is used to raise hypervisorinterrupts and issue device
+commands. The rings area is used to transfer data descruiptors and SCSI
+commands from VM to hypervisor and to transfer messages produced by
+hypervisor to VM. Data itself is transferred via virtual scatter-gather DMA.
+
+PVSCSI Device Registers
+=======================
+
+Registers area length is 1 page (PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES).
+Registers area structure is described by PVSCSIRegOffset enumeration.
+There are registers to issue device command (with optional short data),
+issue device interrupt, control interrupts masking.
+
+PVSCSI Device Rings
+===================
+
+There are three rings in shared memory:
+
+    1. Request ring (struct PVSCSIRingReqDesc *req_ring)
+        - ring for OS to device requests
+    2. Completion ring (struct PVSCSIRingCmpDesc *cmp_ring)
+        - ring for device request completions
+    3. Message ring (struct PVSCSIRingMsgDesc *msg_ring)
+        - ring for messages from device.
+       This ring is optional and may be not configured.
+There is a control area (struct PVSCSIRingsState *rings_state) used to control
+rings operation.
+
+PVSCSI Device to Host Interrupts
+================================
+There are following interrupt types supported by PVSCSI device:
+    1. Completion interrupts (completion ring notifications):
+        PVSCSI_INTR_CMPL_0
+        PVSCSI_INTR_CMPL_1
+    2. Message interrupts (message ring notifications):
+        PVSCSI_INTR_MSG_0
+        PVSCSI_INTR_MSG_1
+
+Interrupts are controlled via PVSCSI_REG_OFFSET_INTR_MASK register
+Bit set means interrupt enabled, bit cleared - disabled
+
+Interrupt modes supported are legacy, MSI and MSI-X
+In case of legacy interrupts register PVSCSI_REG_OFFSET_INTR_STATUS
+used to verify interrupt arrival and to clear interrupt state
+Interrupts are cleared by writing processed bits back
+to interrupt status register.
+
+PVSCSI Device Operation Sequences
+=================================
+
+1. Startup sequence:
+    a. Issue PVSCSI_CMD_ADAPTER_RESET command;
+    aa. Windows driver reads interrupt status register here;
+    b. Issue PVSCSI_CMD_SETUP_MSG_RING command with no additional data,
+       check status and disable device messages if error returned;
+       (Omitted if device messages disabled by driver configuration)
+    c. Issue PVSCSI_CMD_SETUP_RINGS command, provide rings configuration
+       as struct PVSCSICmdDescSetupRings;
+    d. Issue PVSCSI_CMD_SETUP_MSG_RING command again, provide
+       rings configuration as struct PVSCSICmdDescSetupMsgRing;
+    e. Unmask completion and message (if device messages enabled) interrupts.
+
+2. Shutdown sequences
+    a. Mask interrupts;
+    b. Flush request ring using PVSCSI_REG_OFFSET_KICK_NON_RW_IO;
+    c. Issue PVSCSI_CMD_ADAPTER_RESET command.
+
+3. Send request
+    a. Fill next free request ring descriptor;
+    b. Issue PVSCSI_REG_OFFSET_KICK_RW_IO for R/W operations;
+       or PVSCSI_REG_OFFSET_KICK_NON_RW_IO for other operations.
+
+4. Abort command
+    a. Issue PVSCSI_CMD_ABORT_CMD command;
+
+5. Request completion processing
+    a. Upon completion interrupt arrival process completion
+       and message (if enabled) rings.
diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index 3d77259..6dd1eea 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -86,6 +86,7 @@  hw-obj-$(CONFIG_OPENCORES_ETH) += opencores_eth.o

 # SCSI layer
 hw-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o
+hw-obj-$(CONFIG_PVSCSI_SCSI_PCI) += pvscsi.o
 hw-obj-$(CONFIG_ESP) += esp.o

 hw-obj-y += sysbus.o isa-bus.o
diff --git a/hw/pci.h b/hw/pci.h
index 79d38fd..3c197d9 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -60,6 +60,7 @@ 
 #define PCI_DEVICE_ID_VMWARE_NET         0x0720
 #define PCI_DEVICE_ID_VMWARE_SCSI        0x0730
 #define PCI_DEVICE_ID_VMWARE_IDE         0x1729
+#define PCI_DEVICE_ID_VMWARE_PVSCSI      0x07C0

 /* Intel (0x8086) */
 #define PCI_DEVICE_ID_INTEL_82551IT      0x1209
diff --git a/hw/pvscsi.c b/hw/pvscsi.c
new file mode 100644
index 0000000..719ea98
--- /dev/null
+++ b/hw/pvscsi.c
@@ -0,0 +1,1260 @@ 
+/*
+ * QEMU VMWARE PVSCSI paravirtual SCSI bus
+ *
+ * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Based on implementation by Paolo Bonzini
+ * http://lists.gnu.org/archive/html/qemu-devel/2011-08/msg00729.html
+ *
+ * Authors:
+ * Paolo Bonzini <pbonzini@redhat.com>
+ * Dmitry Fleytman <dmitry@daynix.com>
+ * Yan Vugenfirer <yvugenfi@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#define DEBUG_PVSCSI_WARNINGS
+#define DEBUG_PVSCSI_ERRORS
+/*
+#define DEBUG_PVSCSI_CALLBACKS
+#define DEBUG_PVSCSI_RINGS
+#define DEBUG_PVSCSI_COMMANDS
+#define DEBUG_PVSCSI_INTERRUPTS
+#define DEBUG_PVSCSI_SHMEM_ACCESS
+#define DEBUG_PVSCSI_SHMEM_ACCESS
+#define DEBUG_SCSI_EXCHANGE
+*/
+
+#ifdef DEBUG_PVSCSI_SHMEM_ACCESS
+#define DSHPRINTF(fmt, ...)                                                 \
+    do {                                                                    \
+        printf("[pvscsi][SH][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
+    } while (0)
+#else
+#define DSHPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#ifdef DEBUG_PVSCSI_CALLBACKS
+#define DCBPRINTF(fmt, ...)                                                 \
+    do {                                                                    \
+        printf("[pvscsi][CB][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
+    } while (0)
+#else
+#define DCBPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#ifdef DEBUG_PVSCSI_WARNINGS
+#define DWRPRINTF(fmt, ...)                                                 \
+    do {                                                                    \
+        printf("[pvscsi][WR][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
+    } while (0)
+#else
+#define DWRPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#ifdef DEBUG_PVSCSI_ERRORS
+#define DERPRINTF(fmt, ...)                                                 \
+    do {                                                                    \
+        printf("[pvscsi][ER][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
+    } while (0)
+#else
+#define DERPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#ifdef DEBUG_PVSCSI_COMMANDS
+#define DCMPRINTF(fmt, ...)                                                 \
+    do {                                                                    \
+        printf("[pvscsi][CM][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
+    } while (0)
+#else
+#define DCMPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#ifdef DEBUG_SCSI_EXCHANGE
+#define DSXPRINTF(fmt, ...)                                                 \
+    do {                                                                    \
+        printf("[pvscsi][SX][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
+    } while (0)
+#else
+#define DSXPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#ifdef DEBUG_PVSCSI_RINGS
+#define DRIPRINTF(fmt, ...)                                                 \
+    do {                                                                    \
+        printf("[pvscsi][RI][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
+    } while (0)
+#else
+#define DRIPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#ifdef DEBUG_PVSCSI_INTERRUPTS
+#define DIRPRINTF(fmt, ...)                                                 \
+    do {                                                                    \
+        printf("[pvscsi][IR][%s]: " fmt "\n", __func__, ## __VA_ARGS__);    \
+    } while (0)
+#else
+#define DIRPRINTF(fmt, ...) do {} while (0)
+#endif
+
+/*
+ * MSI-X support is disabled because it leads Windows OS to crash on startup.
+ * The crash happens because Windows driver requires MSI-X shared memory
+ * to be part of the same BAR used for rings state, registers, etc.
+ * This is not supported by QEMU infrastructure so separate BAR created from
+ * MSI-X purposes. Windows driver fails to deal with 2 BARs.
+ * Linux works fine with current MSI-X implementation enabled.
+ */
+#undef PVSCSI_ENABLE_MSIX
+
+#include "softfloat.h"
+#include "compiler.h"
+#include "scsi-defs.h"
+#include "hw/hw.h"
+#include "hw/pci.h"
+#include "hw/scsi.h"
+#ifdef PVSCSI_ENABLE_MSIX
+#include "msix.h"
+#endif
+#include "msi.h"
+#include "qemu-queue.h"
+#include "pvscsi.h"
+#include "vmware_utils.h"
+
+#define PVSCSI_MAX_DEVS                   (64)
+#define PVSCSI_MSIX_NUM_VECTORS           (1)
+
+typedef struct {
+    uint64_t            rs_pa;
+
+    uint32_t            txr_len_mask;
+    uint32_t            rxr_len_mask;
+    uint64_t            req_ring_pages_pa[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
+    uint64_t            cmp_ring_pages_pa[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
+    uint64_t            consumed_ptr;
+    uint64_t            filled_cmp_ptr;
+} PVSCSIRingsMgr;
+
+#define _RS_GET_FIELD(rs_pa, field) \
+    (vmw_shmem_ld32(rs_pa + offsetof(struct PVSCSIRingsState, field)))
+#define _RS_SET_FIELD(rs_pa, field, val) \
+    (vmw_shmem_st32(rs_pa + offsetof(struct PVSCSIRingsState, field), val))
+
+/* Integer binary logarithm */
+static int
+pvscsi_log2(uint32_t input)
+{
+    int log = 0;
+    assert(input > 0);
+    while (input >> ++log) {
+
+        };
+    return log;
+}
+
+static void
+pvscsi_rings_mgr_init(PVSCSIRingsMgr *m, struct PVSCSICmdDescSetupRings *ri)
+{
+    int i;
+    uint32_t txr_len_log2, rxr_len_log2;
+    uint32_t req_ring_size, cmp_ring_size;
+    m->rs_pa = ri->ringsStatePPN << PAGE_SHIFT;
+
+    req_ring_size = ri->reqRingNumPages * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
+    cmp_ring_size = ri->cmpRingNumPages * PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
+    txr_len_log2 = pvscsi_log2(req_ring_size - 1);
+    rxr_len_log2 = pvscsi_log2(cmp_ring_size - 1);
+
+    m->txr_len_mask = MASK(txr_len_log2);
+    m->rxr_len_mask = MASK(rxr_len_log2);
+
+    m->consumed_ptr = 0;
+    m->filled_cmp_ptr = 0;
+
+    for (i = 0; i < ri->reqRingNumPages; i++) {
+        m->req_ring_pages_pa[i] = ri->reqRingPPNs[i] << PAGE_SHIFT;
+    }
+
+    for (i = 0; i < ri->cmpRingNumPages; i++) {
+        m->cmp_ring_pages_pa[i] = ri->cmpRingPPNs[i] << PAGE_SHIFT;
+    }
+
+    _RS_SET_FIELD(m->rs_pa, reqProdIdx, 0);
+    _RS_SET_FIELD(m->rs_pa, reqConsIdx, 0);
+    _RS_SET_FIELD(m->rs_pa, reqNumEntriesLog2, txr_len_log2);
+
+    _RS_SET_FIELD(m->rs_pa, cmpProdIdx, 0);
+    _RS_SET_FIELD(m->rs_pa, cmpConsIdx, 0);
+    _RS_SET_FIELD(m->rs_pa, cmpNumEntriesLog2, rxr_len_log2);
+
+    _RS_SET_FIELD(m->rs_pa, msgProdIdx, 0);
+    _RS_SET_FIELD(m->rs_pa, msgConsIdx, 0);
+    _RS_SET_FIELD(m->rs_pa, msgNumEntriesLog2, 0);
+
+    DRIPRINTF("TX/RX rings logarithms set to %d/%d",
+              txr_len_log2, rxr_len_log2);
+
+    /* Flush ring state page changes */
+    smp_wmb();
+}
+
+static void
+pvscsi_rings_mgr_cleanup(PVSCSIRingsMgr *mgr)
+{
+    mgr->rs_pa = 0;
+    mgr->txr_len_mask = 0;
+    mgr->consumed_ptr = 0;
+    mgr->filled_cmp_ptr = 0;
+    memset(mgr->req_ring_pages_pa, 0, sizeof(mgr->req_ring_pages_pa));
+    memset(mgr->cmp_ring_pages_pa, 0, sizeof(mgr->cmp_ring_pages_pa));
+}
+
+static inline target_phys_addr_t
+pvscsi_rings_mgr_pop_req_descr(PVSCSIRingsMgr *mgr)
+{
+    uint32_t ready_prt = _RS_GET_FIELD(mgr->rs_pa, reqProdIdx);
+
+    if (ready_prt != mgr->consumed_ptr) {
+        uint32_t next_ready_ptr =
+            mgr->consumed_ptr++ & mgr->txr_len_mask;
+        uint32_t next_ready_page =
+            next_ready_ptr / PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
+        uint32_t inpage_idx =
+            next_ready_ptr % PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
+
+        return mgr->req_ring_pages_pa[next_ready_page] +
+               inpage_idx * sizeof(PVSCSIRingReqDesc);
+    } else {
+        return 0;
+    }
+}
+
+static inline void
+pvscsi_rings_mgr_flush_req_ring(PVSCSIRingsMgr *mgr)
+{
+    _RS_SET_FIELD(mgr->rs_pa, reqConsIdx, mgr->consumed_ptr);
+}
+
+static inline target_phys_addr_t
+pvscsi_rings_mgr_pop_cmp_descr(PVSCSIRingsMgr *mgr)
+{
+    /*
+     * According to Linux driver code it explicitly verifies that number
+     * of requests being processed by device is less then the size of
+     * completion queue, so device may omit completion queue overflow
+     * conditions check. We assume that this is true for other (Windows)
+     * drivers as well.
+     */
+
+    uint32_t free_cmp_ptr =
+        mgr->filled_cmp_ptr++ & mgr->rxr_len_mask;
+    uint32_t free_cmp_page =
+        free_cmp_ptr / PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
+    uint32_t inpage_idx =
+        free_cmp_ptr % PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
+    return mgr->cmp_ring_pages_pa[free_cmp_page] +
+           inpage_idx * sizeof(PVSCSIRingCmpDesc);
+}
+
+static inline void
+pvscsi_rings_mgr_flush_cmp_ring(PVSCSIRingsMgr *mgr)
+{
+    /* Flush descriptor changes */
+    smp_wmb();
+
+    DRIPRINTF("New production counter of completion ring is %" PRIx64,
+              mgr->filled_cmp_ptr);
+
+    _RS_SET_FIELD(mgr->rs_pa, cmpProdIdx, mgr->filled_cmp_ptr);
+}
+
+typedef QTAILQ_HEAD(, PVSCSIRequest) PVSCSIRequestList;
+#define PVSCSI_MAX_CMD_DATA_WORDS \
+    (sizeof(struct PVSCSICmdDescSetupRings)/sizeof(uint32_t))
+
+typedef struct {
+    PCIDevice dev;
+    MemoryRegion io_space;
+    SCSIBus bus;
+    QEMUBH *completion_worker;
+    PVSCSIRequestList pending_queue;
+    PVSCSIRequestList completion_queue;
+
+#ifdef PVSCSI_ENABLE_MSIX
+    MemoryRegion msix_space;
+    uint8_t msix_used;   /* Whether MSI-X support was installed successfully */
+#endif
+    uint8_t msi_used;    /* Whether MSI support was installed successfully   */
+    uint64_t reg_interrupt_status;        /* Interrupt status register value */
+    uint64_t reg_interrupt_enabled;       /* Interrupt mask register value   */
+    uint64_t reg_command_status;          /* Command status register value   */
+
+    /* Command data adoption mechanism */
+    uint64_t curr_cmd;                   /* Last command arrived             */
+    uint32_t curr_cmd_data_cntr;      /* Amount of data for last command  */
+
+    /* Collector for current command data */
+    uint32_t curr_cmd_data[PVSCSI_MAX_CMD_DATA_WORDS];
+
+    uint8_t rings_info_valid;            /* Whether rings are initialized    */
+    PVSCSIRingsMgr rings;                /* Data transfer rings manager      */
+} PVSCSI_State;
+
+static void
+pvscsi_reset_state(PVSCSI_State *s)
+{
+    s->curr_cmd = PVSCSI_CMD_FIRST;
+    s->curr_cmd_data_cntr = 0;
+    s->reg_command_status = PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
+    s->reg_interrupt_status = 0;
+    pvscsi_rings_mgr_cleanup(&s->rings);
+    s->rings_info_valid = FALSE;
+    QTAILQ_INIT(&s->pending_queue);
+    QTAILQ_INIT(&s->completion_queue);
+}
+
+typedef struct PVSCSISGState {
+    target_phys_addr_t elemAddr;
+    target_phys_addr_t dataAddr;
+    uint32_t resid;
+} PVSCSISGState;
+
+typedef struct PVSCSIRequest {
+    SCSIRequest *sreq;
+    uint8_t sense_key;
+    uint8_t completed;
+    int lun;
+    QEMUSGList sgl;
+    PVSCSISGState sg;
+    struct PVSCSIRingReqDesc req;
+    struct PVSCSIRingCmpDesc cmp;
+    QTAILQ_ENTRY(PVSCSIRequest) next;
+} PVSCSIRequest;
+
+static void
+pvscsi_free_queue(PVSCSIRequestList *req_list)
+{
+    PVSCSIRequest *pvscsi_req;
+
+    while (!QTAILQ_EMPTY(req_list)) {
+        pvscsi_req = QTAILQ_FIRST(req_list);
+        QTAILQ_REMOVE(req_list, pvscsi_req, next);
+        g_free(pvscsi_req);
+    }
+}
+
+static void
+pvscsi_reset_adapter(PVSCSI_State *s)
+{
+    qbus_reset_all_fn(&s->bus);
+    pvscsi_free_queue(&s->completion_queue);
+    assert(QTAILQ_EMPTY(&s->pending_queue));
+    pvscsi_reset_state(s);
+}
+
+static inline void
+pvscsi_update_irq_status(PVSCSI_State *s)
+{
+    int should_raise = !!(s->reg_interrupt_enabled & s->reg_interrupt_status);
+
+    DIRPRINTF("Interrupt level set to %d (MASK: 0x%llX, STATUS: 0x%llx)",
+              should_raise,
+              (long long unsigned int) s->reg_interrupt_enabled,
+              (long long unsigned int) s->reg_interrupt_status);
+
+#ifdef PVSCSI_ENABLE_MSIX
+    if (s->msix_used && msix_enabled(&s->dev)) {
+        if (should_raise) {
+            DIRPRINTF("Sending MSI-X notification");
+            msix_notify(&s->dev, PVSCSI_VECTOR_COMPLETION);
+        }
+        return;
+    }
+#endif
+
+    if (s->msi_used && msi_enabled(&s->dev)) {
+        if (should_raise) {
+            DIRPRINTF("Sending MSI notification");
+            msi_notify(&s->dev, PVSCSI_VECTOR_COMPLETION);
+        }
+        return;
+    }
+
+    qemu_set_irq(s->dev.irq[0], should_raise);
+}
+
+static inline void
+pvscsi_raise_completion_interrupt(PVSCSI_State *s)
+{
+    s->reg_interrupt_status |= PVSCSI_INTR_CMPL_0;
+
+    /* Memory barrier to flush interrupt status register changes*/
+    smp_wmb();
+
+    pvscsi_update_irq_status(s);
+}
+
+static void
+pvscsi_cmp_ring_put(PVSCSI_State *s, struct PVSCSIRingCmpDesc *cmp_desc)
+{
+    target_phys_addr_t cmp_descr_pa;
+
+    cmp_descr_pa = pvscsi_rings_mgr_pop_cmp_descr(&s->rings);
+    DRIPRINTF("Got completion descriptor %"PRIx64, cmp_descr_pa);
+
+    vmw_shmem_write(cmp_descr_pa, (void *)cmp_desc, sizeof(*cmp_desc));
+}
+
+static void
+pvscsi_process_completion_queue(void *opaque)
+{
+    PVSCSI_State *s = opaque;
+    PVSCSIRequest *pvscsi_req;
+    bool has_completed = false;
+
+    while (!QTAILQ_EMPTY(&s->completion_queue)) {
+        pvscsi_req = QTAILQ_FIRST(&s->completion_queue);
+        QTAILQ_REMOVE(&s->completion_queue, pvscsi_req, next);
+        pvscsi_cmp_ring_put(s, &pvscsi_req->cmp);
+        g_free(pvscsi_req);
+        has_completed++;
+    }
+
+    if (has_completed) {
+        pvscsi_rings_mgr_flush_cmp_ring(&s->rings);
+        pvscsi_raise_completion_interrupt(s);
+    }
+}
+
+static inline void
+pvscsi_schedule_completion_processing(PVSCSI_State *s)
+{
+    /* Try putting more complete requests on the ring. */
+    if (!QTAILQ_EMPTY(&s->completion_queue)) {
+        qemu_bh_schedule(s->completion_worker);
+    }
+}
+
+static inline void
+pvscsi_complete_request(PVSCSI_State *s, PVSCSIRequest *r)
+{
+    assert(!r->completed);
+    DSXPRINTF("Completion: ctx: %"PRIx64", len: %"PRIx64", sense key: %u",
+              r->cmp.context, r->cmp.dataLen, r->sense_key);
+    if (r->sreq != NULL) {
+        scsi_req_unref(r->sreq);
+        r->sreq = NULL;
+    }
+    r->completed = 1;
+    QTAILQ_REMOVE(&s->pending_queue, r, next);
+    QTAILQ_INSERT_TAIL(&s->completion_queue, r, next);
+    pvscsi_schedule_completion_processing(s);
+}
+
+static QEMUSGList *pvscsi_get_sg_list(SCSIRequest *r)
+{
+    PVSCSIRequest *req = r->hba_private;
+
+    DSXPRINTF("Get SG list: depth: %u, size: %lu",
+              req->sgl.nsg, req->sgl.size);
+
+    return &req->sgl;
+}
+
+static void
+pvscsi_get_next_sg_elem(PVSCSISGState *sg)
+{
+    struct PVSCSISGElement elem;
+
+    for (;; sg->elemAddr = elem.addr) {
+        vmw_shmem_read(sg->elemAddr, (void *)&elem, sizeof(elem));
+        if (0 != (elem.flags & ~PVSCSI_KNOWN_FLAGS)) {
+            /*
+             * There is PVSCSI_SGE_FLAG_CHAIN_ELEMENT flag described in
+             * header file but its value is unknown. This flag requires
+             * additional processing, so we put warning here to catch it
+             * some day and make proper implementation
+             */
+            DWRPRINTF("Unknown flags in SG element (val: 0x%X)", elem.flags);
+        }
+        break;
+    }
+
+    sg->elemAddr += sizeof(elem);
+    sg->dataAddr = elem.addr;
+    sg->resid = elem.length;
+}
+
+static inline void
+pvscsi_write_sense(PVSCSIRequest *r, uint8_t *sense, int len)
+{
+    r->cmp.senseLen = MIN(r->req.senseLen, len);
+    r->sense_key = sense[2];
+    vmw_shmem_write(r->req.senseAddr, sense, r->cmp.senseLen);
+}
+
+static void
+pvscsi_command_complete(SCSIRequest *req, uint32_t status, size_t resid)
+{
+    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev.qdev, req->bus->qbus.parent);
+    PVSCSIRequest *pvscsi_req = req->hba_private;
+
+    if (!pvscsi_req) {
+        DERPRINTF("PVSCSI: Can't find request for tag 0x%x", req->tag);
+        return;
+    }
+
+    if (resid) {
+        /* Short transfer.  */
+        DSXPRINTF("Not all data required for command transferred.");
+        pvscsi_req->cmp.hostStatus = BTSTAT_DATARUN;
+    }
+
+    pvscsi_req->cmp.scsiStatus = status;
+    if (CHECK_CONDITION == pvscsi_req->cmp.scsiStatus) {
+        uint8_t sense[SCSI_SENSE_BUF_SIZE];
+        int sense_len =
+            scsi_req_get_sense(pvscsi_req->sreq, sense, sizeof(sense));
+
+        DSXPRINTF("Sense information length is %d bytes", sense_len);
+        pvscsi_write_sense(pvscsi_req, sense, sense_len);
+    }
+    qemu_sglist_destroy(&pvscsi_req->sgl);
+    pvscsi_complete_request(s, pvscsi_req);
+}
+
+static void
+pvscsi_request_cancelled(SCSIRequest *req)
+{
+    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev.qdev, req->bus->qbus.parent);
+    PVSCSIRequest *pvscsi_req = req->hba_private;
+
+    if (BTSTAT_SUCCESS == pvscsi_req->cmp.hostStatus) {
+        pvscsi_req->cmp.hostStatus = BTSTAT_ABORTQUEUE;
+    }
+    pvscsi_complete_request(s, pvscsi_req);
+}
+
+static inline SCSIDevice*
+pvscsi_device_find(PVSCSI_State *s, int channel, int target,
+                   uint8_t *requested_lun, uint8_t *target_lun)
+{
+    if (requested_lun[0] || requested_lun[2] || requested_lun[3] ||
+        requested_lun[4] || requested_lun[5] || requested_lun[6] ||
+        requested_lun[7] || (PVSCSI_MAX_DEVS < target)) {
+        return NULL;
+    } else {
+        *target_lun = requested_lun[1];
+        return scsi_device_find(&s->bus, channel, target, *target_lun);
+    }
+}
+
+static PVSCSIRequest *
+pvscsi_queue_pending_descriptor(PVSCSI_State *s, SCSIDevice **d,
+                                struct PVSCSIRingReqDesc *descr)
+{
+    PVSCSIRequest *pvscsi_req;
+    uint8_t lun;
+
+    pvscsi_req = g_malloc0(sizeof(*pvscsi_req));
+    pvscsi_req->req = *descr;
+    pvscsi_req->cmp.context = pvscsi_req->req.context;
+    QTAILQ_INSERT_TAIL(&s->pending_queue, pvscsi_req, next);
+
+    *d = pvscsi_device_find(s, descr->bus, descr->target, descr->lun, &lun);
+    if (!*d) {
+        return pvscsi_req;
+    }
+
+    pvscsi_req->lun = lun;
+    return pvscsi_req;
+}
+
+static void
+pvscsi_convert_sglist(PVSCSIRequest *r)
+{
+    int chunk_size;
+    uint64_t data_length = r->req.dataLen;
+    PVSCSISGState sg = r->sg;
+    while (data_length) {
+        while (!sg.resid) {
+            pvscsi_get_next_sg_elem(&sg);
+            DCMPRINTF("Element: ctx: %"PRIx64", addr: %"PRIx64", len: 0x%ul",
+                      r->req.context, (uint64_t) r->sg.dataAddr, r->sg.resid);
+        }
+        assert(data_length > 0);
+        chunk_size = MIN((unsigned) data_length, sg.resid);
+        if (chunk_size) {
+            qemu_sglist_add(&r->sgl, sg.dataAddr, chunk_size);
+        }
+
+        sg.dataAddr += chunk_size;
+        data_length -= chunk_size;
+        sg.resid -= chunk_size;
+    }
+}
+
+static inline void
+pvscsi_build_sglist(PVSCSIRequest *r)
+{
+    qemu_sglist_init(&r->sgl, 1, NULL);
+    if (r->req.flags & PVSCSI_FLAG_CMD_WITH_SG_LIST) {
+        pvscsi_convert_sglist(r);
+    } else {
+        qemu_sglist_add(&r->sgl, r->req.dataAddr, r->req.dataLen);
+    }
+}
+
+static void
+pvscsi_process_request_descriptor(PVSCSI_State *s,
+                                  struct PVSCSIRingReqDesc *descr)
+{
+    SCSIDevice *d;
+    PVSCSIRequest *r = pvscsi_queue_pending_descriptor(s, &d, descr);
+    int64_t n;
+
+    DSXPRINTF("SCSI cmd 0x%X, ctx: %" PRIx64, descr->cdb[0], descr->context);
+
+    if (!d) {
+        r->cmp.hostStatus = BTSTAT_SELTIMEO;
+        DSXPRINTF("Command directed to unknown device rejected.");
+        pvscsi_complete_request(s, r);
+        return;
+    }
+
+    if (descr->flags & PVSCSI_FLAG_CMD_WITH_SG_LIST) {
+        r->sg.elemAddr = descr->dataAddr;
+    }
+
+    r->sreq = scsi_req_new(d, descr->context, r->lun, descr->cdb, r);
+    if (r->sreq->cmd.mode == SCSI_XFER_FROM_DEV &&
+        (descr->flags & PVSCSI_FLAG_CMD_DIR_TODEVICE)) {
+        r->cmp.hostStatus = BTSTAT_BADMSG;
+        DSXPRINTF("Command with invalid transfer direction rejected.");
+        scsi_req_cancel(r->sreq);
+        return;
+    }
+    if (r->sreq->cmd.mode == SCSI_XFER_TO_DEV &&
+        (descr->flags & PVSCSI_FLAG_CMD_DIR_TOHOST)) {
+        r->cmp.hostStatus = BTSTAT_BADMSG;
+        DSXPRINTF("Command with invalid transfer direction rejected.");
+        scsi_req_cancel(r->sreq);
+        return;
+    }
+
+    pvscsi_build_sglist(r);
+    n = scsi_req_enqueue(r->sreq);
+
+    if (n) {
+        scsi_req_continue(r->sreq);
+    }
+}
+
+static void
+pvscsi_process_io(PVSCSI_State *s)
+{
+    PVSCSIRingReqDesc descr;
+    target_phys_addr_t next_descr_pa;
+
+    assert(s->rings_info_valid);
+    while (0 != (next_descr_pa = pvscsi_rings_mgr_pop_req_descr(&s->rings))) {
+        DRIPRINTF("Got descriptor %"PRIx64, next_descr_pa);
+        vmw_shmem_read(next_descr_pa, &descr, sizeof(descr));
+        pvscsi_process_request_descriptor(s, &descr);
+    }
+
+    pvscsi_rings_mgr_flush_req_ring(&s->rings);
+}
+
+static void
+pvscsi_dbg_dump_tx_rings_config(struct PVSCSICmdDescSetupRings *rc)
+{
+#ifdef DEBUG_PVSCSI_RINGS
+    int i;
+    DRIPRINTF("TX Rings configuration received:");
+    DRIPRINTF("Rings state page: %"PRIx64":", rc->ringsStatePPN);
+
+    DRIPRINTF("Request ring pages: %u:", rc->reqRingNumPages);
+    for (i = 0; i < rc->reqRingNumPages; i++) {
+        DRIPRINTF("\t%"PRIx64" ", rc->reqRingPPNs[i]);
+    }
+
+    DRIPRINTF("Confirm ring pages: %u:", rc->cmpRingNumPages);
+    for (i = 0; i < rc->cmpRingNumPages; i++) {
+        DRIPRINTF("\t%"PRIx64" ", rc->cmpRingPPNs[i]);
+    }
+#endif
+}
+
+static uint64_t
+pvscsi_on_cmd_config(PVSCSI_State *s)
+{
+    DWRPRINTF("Unimplemented command PVSCSI_CMD_CONFIG ignored.");
+    return PVSCSI_COMMAND_PROCESSING_FAILED;
+}
+
+static uint64_t
+pvscsi_on_cmd_unplug(PVSCSI_State *s)
+{
+    DWRPRINTF("Unimplemented command PVSCSI_CMD_DEVICE_UNPLUG ignored.");
+    return PVSCSI_COMMAND_PROCESSING_FAILED;
+}
+
+static uint64_t
+pvscsi_on_issue_scsi(PVSCSI_State *s)
+{
+    DWRPRINTF("Unimplemented command PVSCSI_CMD_ISSUE_SCSI ignored.");
+    return PVSCSI_COMMAND_PROCESSING_FAILED;
+}
+
+static uint64_t
+pvscsi_on_cmd_setup_rings(PVSCSI_State *s)
+{
+    struct PVSCSICmdDescSetupRings *rc =
+        (struct PVSCSICmdDescSetupRings *) s->curr_cmd_data;
+
+    DCMPRINTF("Command PVSCSI_CMD_SETUP_RINGS arrived");
+
+    pvscsi_dbg_dump_tx_rings_config(rc);
+    pvscsi_rings_mgr_init(&s->rings, rc);
+    s->rings_info_valid = TRUE;
+    return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
+}
+
+static uint64_t
+pvscsi_on_cmd_abort(PVSCSI_State *s)
+{
+#ifdef DEBUG_PVSCSI_COMMANDS
+    struct PVSCSICmdDescAbortCmd *data =
+        (struct PVSCSICmdDescAbortCmd *) s->curr_cmd_data;
+
+    DCMPRINTF("PVSCSI_CMD_ABORT_CMD for ctx %"PRIx64", target 0x%X",
+              data->context, data->target);
+#endif
+
+    return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
+}
+
+static uint64_t
+pvscsi_on_cmd_unknown(PVSCSI_State *s)
+{
+    DWRPRINTF("Data for unknown command : 0x%X", s->curr_cmd_data[0]);
+    return PVSCSI_COMMAND_PROCESSING_FAILED;
+}
+
+static uint64_t
+pvscsi_on_cmd_reset_device(PVSCSI_State *s)
+{
+    uint8_t target_lun = 0;
+    struct PVSCSICmdDescResetDevice *cmd =
+        (struct PVSCSICmdDescResetDevice *) s->curr_cmd_data;
+    SCSIDevice *sdev;
+
+    sdev = pvscsi_device_find(s, 0, cmd->target, cmd->lun, &target_lun);
+
+    DWRPRINTF("PVSCSI_CMD_RESET_DEVICE[target 0x%u lun 0x%d (dev 0x%p)]",
+              cmd->target, (int) target_lun, sdev);
+
+    if (sdev != NULL) {
+        device_reset(&sdev->qdev);
+        return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
+    }
+
+    return PVSCSI_COMMAND_PROCESSING_FAILED;
+}
+
+static uint64_t
+pvscsi_on_cmd_reset_bus(PVSCSI_State *s)
+{
+    DCMPRINTF("Command PVSCSI_CMD_RESET_BUS arrived");
+
+    qbus_reset_all_fn(&s->bus);
+    return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
+}
+
+static uint64_t
+pvscsi_on_cmd_setup_msg_ring(PVSCSI_State *s)
+{
+    /* No message ring needed for HDD devices */
+    DCMPRINTF("Command PVSCSI_CMD_SETUP_MSG_RING arrived");
+    return PVSCSI_COMMAND_PROCESSING_FAILED;
+}
+
+static uint64_t
+pvscsi_on_cmd_adapter_reset(PVSCSI_State *s)
+{
+    /* No message ring needed for HDD devices */
+    DCMPRINTF("Command PVSCSI_CMD_ADAPTER_RESET arrived");
+
+    pvscsi_reset_adapter(s);
+    return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
+}
+
+struct {
+    int       data_size;
+    uint64_t  (*handler_fn)(PVSCSI_State *s);
+} pvscsi_commands[] = {
+    [PVSCSI_CMD_FIRST] = {
+        .data_size = 0,
+        .handler_fn = pvscsi_on_cmd_unknown
+    },
+
+    /* Not implemented, data size defined based on what arrives on windows */
+    [PVSCSI_CMD_CONFIG] = {
+        .data_size = 6*sizeof(uint32_t),
+        .handler_fn = pvscsi_on_cmd_config
+    },
+
+    /* Command not implemented, data size is unknown */
+    [PVSCSI_CMD_ISSUE_SCSI] = {
+        .data_size = 0,
+        .handler_fn = pvscsi_on_issue_scsi
+    },
+
+    /* Command not implemented, data size is unknown */
+    [PVSCSI_CMD_DEVICE_UNPLUG] = {
+        .data_size = 0,
+        .handler_fn = pvscsi_on_cmd_unplug
+    },
+
+    [PVSCSI_CMD_SETUP_RINGS] = {
+        .data_size = sizeof(struct PVSCSICmdDescSetupRings),
+        .handler_fn = pvscsi_on_cmd_setup_rings
+    },
+
+    [PVSCSI_CMD_RESET_DEVICE] = {
+        .data_size = sizeof(struct PVSCSICmdDescResetDevice),
+        .handler_fn = pvscsi_on_cmd_reset_device
+    },
+
+    [PVSCSI_CMD_RESET_BUS] = {
+        .data_size = 0,
+        .handler_fn = pvscsi_on_cmd_reset_bus
+    },
+
+    [PVSCSI_CMD_SETUP_MSG_RING] = {
+        .data_size = 0,
+        .handler_fn = pvscsi_on_cmd_setup_msg_ring
+    },
+
+    [PVSCSI_CMD_ADAPTER_RESET] = {
+        .data_size = 0,
+        .handler_fn = pvscsi_on_cmd_adapter_reset
+    },
+
+    [PVSCSI_CMD_ABORT_CMD] = {
+        .data_size = sizeof(struct PVSCSICmdDescAbortCmd),
+        .handler_fn = pvscsi_on_cmd_abort
+    }
+};
+
+static void
+pvscsi_do_command_processing(PVSCSI_State *s)
+{
+    size_t bytes_arrived = s->curr_cmd_data_cntr*sizeof(uint32_t);
+
+    if (bytes_arrived >= pvscsi_commands[s->curr_cmd].data_size) {
+        s->reg_command_status = pvscsi_commands[s->curr_cmd].handler_fn(s);
+        s->curr_cmd = PVSCSI_CMD_FIRST;
+        s->curr_cmd_data_cntr   = 0;
+    }
+}
+
+static void
+pvscsi_on_command_data(PVSCSI_State *s, uint32_t value)
+{
+    size_t bytes_arrived = s->curr_cmd_data_cntr*sizeof(uint32_t);
+
+    assert(bytes_arrived < sizeof(s->curr_cmd_data));
+    s->curr_cmd_data[s->curr_cmd_data_cntr++] = value;
+
+    pvscsi_do_command_processing(s);
+}
+
+static void
+pvscsi_on_command(PVSCSI_State *s, uint64_t cmd_id)
+{
+    if ((cmd_id > PVSCSI_CMD_FIRST) && (cmd_id < PVSCSI_CMD_LAST)) {
+        s->curr_cmd = cmd_id;
+    } else {
+        s->curr_cmd = PVSCSI_CMD_FIRST;
+        DWRPRINTF("Unknown command %"PRIx64, cmd_id);
+    }
+
+    s->curr_cmd_data_cntr = 0;
+    s->reg_command_status = PVSCSI_COMMAND_PROCESSING_FAILED;
+
+    pvscsi_do_command_processing(s);
+}
+
+static void
+pvscsi_io_write(void *opaque, target_phys_addr_t addr,
+                uint64_t val, unsigned size)
+{
+    PVSCSI_State *s = opaque;
+
+    switch (addr) {
+    case PVSCSI_REG_OFFSET_COMMAND:
+        pvscsi_on_command(s, val);
+        break;
+
+    case PVSCSI_REG_OFFSET_COMMAND_DATA:
+        pvscsi_on_command_data(s, (uint32_t) val);
+        break;
+
+    case PVSCSI_REG_OFFSET_INTR_STATUS:
+        DCBPRINTF("PVSCSI_REG_OFFSET_INTR_STATUS write: %"PRIx64, val);
+        s->reg_interrupt_status &= ~val;
+        pvscsi_update_irq_status(s);
+        pvscsi_schedule_completion_processing(s);
+        break;
+
+    case PVSCSI_REG_OFFSET_INTR_MASK:
+        DCBPRINTF("PVSCSI_REG_OFFSET_INTR_MASK write: %"PRIx64, val);
+        s->reg_interrupt_enabled = val;
+        pvscsi_update_irq_status(s);
+        break;
+
+    case PVSCSI_REG_OFFSET_KICK_NON_RW_IO:
+        DCBPRINTF("PVSCSI_REG_OFFSET_KICK_NON_RW_IO write: %"PRIx64, val);
+        pvscsi_process_io(s);
+        break;
+
+    case PVSCSI_REG_OFFSET_KICK_RW_IO:
+        DCBPRINTF("PVSCSI_REG_OFFSET_KICK_RW_IO write: %"PRIx64, val);
+        pvscsi_process_io(s);
+        break;
+
+    case PVSCSI_REG_OFFSET_DEBUG:
+        DCBPRINTF("PVSCSI_REG_OFFSET_DEBUG write: %"PRIx64, val);
+        break;
+
+    default:
+        DWRPRINTF("Unknown write: %" PRIx64 "%u bytes, val %" PRIx64,
+                  (uint64_t) addr, size, val);
+        break;
+    }
+
+}
+
+static uint64_t
+pvscsi_io_read(void *opaque, target_phys_addr_t addr, unsigned size)
+{
+    PVSCSI_State *s = opaque;
+
+    switch (addr) {
+    case PVSCSI_REG_OFFSET_INTR_STATUS:
+        DCBPRINTF("PVSCSI_REG_OFFSET_INTR_STATUS read: %"PRIx64,
+                  s->reg_interrupt_status);
+        return s->reg_interrupt_status;
+
+    case PVSCSI_REG_OFFSET_INTR_MASK:
+        DCBPRINTF("PVSCSI_REG_OFFSET_INTR_MASK read: %"PRIx64,
+                  s->reg_interrupt_enabled);
+        return s->reg_interrupt_enabled;
+
+    case PVSCSI_REG_OFFSET_COMMAND_STATUS:
+        DCBPRINTF("PVSCSI_REG_OFFSET_COMMAND_STATUS read: %"PRIx64,
+                  s->reg_command_status);
+        return s->reg_command_status;
+
+    default:
+        DWRPRINTF("Unknown read, address %" PRIx64 ", %d bytes",
+                  (uint64_t) addr, size);
+        return 0;
+    }
+}
+
+#ifdef PVSCSI_ENABLE_MSIX
+static bool
+pvscsi_init_msix(PVSCSI_State *s)
+{
+    int res;
+    memory_region_init(&s->msix_space, "pvscsi-msix", PAGE_SIZE);
+    res = msix_init(&s->dev, PVSCSI_MSIX_NUM_VECTORS, &s->msix_space, 1, 0);
+    if (0 > res) {
+        DWRPRINTF("Failed to initialize MSI-X, error %d", res);
+        memory_region_destroy(&s->msix_space);
+        s->msix_used = false;
+    } else {
+        res = msix_vector_use(&s->dev, PVSCSI_VECTOR_COMPLETION);
+        if (0 > res) {
+            DWRPRINTF("Failed to use MSI-X vector, error %d", res);
+            msix_uninit(&s->dev, &s->msix_space);
+            memory_region_destroy(&s->msix_space);
+            s->msix_used = false;
+        } else {
+            pci_register_bar(&s->dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY,
+                             &s->msix_space);
+            s->msix_used = true;
+        }
+    }
+    return s->msix_used;
+}
+
+static void
+pvscsi_cleanup_msix(PVSCSI_State *s)
+{
+    if (s->msix_used) {
+        msix_vector_unuse(&s->dev, PVSCSI_VECTOR_COMPLETION);
+        msix_uninit(&s->dev, &s->msix_space);
+        memory_region_destroy(&s->msix_space);
+    }
+}
+#endif
+
+static bool
+pvscsi_init_msi(PVSCSI_State *s)
+{
+#define PVSCSI_MSI_OFFSET        (0x50)
+#define PVSCSI_USE_64BIT         (true)
+#define PVSCSI_PER_VECTOR_MASK   (false)
+
+    int res;
+    res = msi_init(&s->dev, PVSCSI_MSI_OFFSET, PVSCSI_MSIX_NUM_VECTORS,
+                   PVSCSI_USE_64BIT, PVSCSI_PER_VECTOR_MASK);
+    if (0 > res) {
+        DWRPRINTF("Failed to initialize MSI, error %d", res);
+        s->msi_used = false;
+    } else {
+        s->msi_used = true;
+    }
+
+    return s->msi_used;
+}
+
+static void
+pvscsi_cleanup_msi(PVSCSI_State *s)
+{
+    if (s->msi_used) {
+        msi_uninit(&s->dev);
+    }
+}
+
+#ifdef PVSCSI_ENABLE_MSIX
+
+static void
+pvscsi_msix_save(QEMUFile *f, void *opaque)
+{
+    msix_save(&((PVSCSI_State *)opaque)->dev, f);
+}
+
+static int
+pvscsi_msix_load(QEMUFile *f, void *opaque, int version_id)
+{
+    msix_load(&((PVSCSI_State *)opaque)->dev, f);
+    return 0;
+}
+
+#endif
+
+static int
+pvscsi_init(PCIDevice *dev)
+{
+    static const MemoryRegionOps pv_scsi_ops = {
+            .read = pvscsi_io_read,
+            .write = pvscsi_io_write,
+            .endianness = DEVICE_NATIVE_ENDIAN,
+            .impl = {
+                    .min_access_size = 4,
+                    .max_access_size = 4,
+            },
+    };
+
+    static const struct SCSIBusInfo pvscsi_scsi_info = {
+            .tcq = true,
+            .max_target = PVSCSI_MAX_DEVS,
+            .max_channel = 0,
+            .max_lun = 0,
+
+            .get_sg_list = pvscsi_get_sg_list,
+            .complete = pvscsi_command_complete,
+            .cancel = pvscsi_request_cancelled
+    };
+
+    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev, dev);
+
+    DCBPRINTF("Starting init...");
+
+    /* PCI subsystem ID */
+    s->dev.config[PCI_SUBSYSTEM_ID] = 0x00;
+    s->dev.config[PCI_SUBSYSTEM_ID + 1] = 0x10;
+
+    /* PCI latency timer = 255 */
+    s->dev.config[PCI_LATENCY_TIMER] = 0xff;
+
+    /* Interrupt pin A */
+    s->dev.config[PCI_INTERRUPT_PIN] = 0x01;
+
+    memory_region_init_io(&s->io_space, &pv_scsi_ops, s,
+                          "pvscsi-io", PVSCSI_MEM_SPACE_SIZE);
+    pci_register_bar(&s->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->io_space);
+
+#ifdef PVSCSI_ENABLE_MSIX
+    register_savevm(&dev->qdev, "pvscsi-msix", -1, 1,
+                    pvscsi_msix_save, pvscsi_msix_load, s);
+
+    if (!pvscsi_init_msix(s)) {
+        DWRPRINTF("Failed to initialize MSI-X.");
+    }
+#endif
+    if (!pvscsi_init_msi(s)) {
+        DWRPRINTF("Failed to initialize MSI.");
+    }
+
+    s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s);
+    if (!s->completion_worker) {
+        pvscsi_cleanup_msi(s);
+#ifdef PVSCSI_ENABLE_MSIX
+        pvscsi_cleanup_msix(s);
+#endif
+        memory_region_destroy(&s->io_space);
+        return -ENOMEM;
+    }
+
+    scsi_bus_new(&s->bus, &dev->qdev, &pvscsi_scsi_info);
+    pvscsi_reset_state(s);
+
+    return 0;
+}
+
+static int
+pvscsi_uninit(PCIDevice *dev)
+{
+    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev, dev);
+
+    DCBPRINTF("Starting uninit...");
+    qemu_bh_delete(s->completion_worker);
+
+#ifdef PVSCSI_ENABLE_MSIX
+    unregister_savevm(&dev->qdev, "pvscsi-msix", s);
+    pvscsi_cleanup_msix(s);
+#endif
+    pvscsi_cleanup_msi(s);
+
+    memory_region_destroy(&s->io_space);
+
+    return 0;
+}
+
+static void
+pvscsi_reset(DeviceState *dev)
+{
+    PVSCSI_State *s = DO_UPCAST(PVSCSI_State, dev.qdev, dev);
+    DCBPRINTF("Starting reset...");
+    pvscsi_reset_adapter(s);
+}
+
+static void
+pvscsi_pre_save(void *opaque)
+{
+    PVSCSI_State *s = (PVSCSI_State *) opaque;
+
+    DCBPRINTF("Starting presave...");
+
+    assert(QTAILQ_EMPTY(&s->pending_queue));
+    assert(QTAILQ_EMPTY(&s->completion_queue));
+}
+
+static int
+pvscsi_post_load(void *opaque, int version_id)
+{
+#ifdef PVSCSI_ENABLE_MSIX
+    PVSCSI_State *s = (PVSCSI_State *) opaque;
+#endif
+    DCBPRINTF("Starting postload...");
+
+#ifdef PVSCSI_ENABLE_MSIX
+    if (s->msix_used) {
+        if (!msix_vector_use(&s->dev, PVSCSI_VECTOR_COMPLETION)) {
+            DERPRINTF("Failed to re-use MSI-X vectors");
+            return -1;
+        }
+    }
+#endif
+    return 0;
+}
+
+static VMStateDescription vmstate_pvscsi = {
+        .name = "pvscsi",
+        .version_id = 0,
+        .minimum_version_id = 0,
+        .minimum_version_id_old = 0,
+        .pre_save = pvscsi_pre_save,
+        .post_load = pvscsi_post_load,
+        .fields      = (VMStateField[]) {
+                VMSTATE_PCI_DEVICE(dev, PVSCSI_State),
+#ifdef PVSCSI_ENABLE_MSIX
+                VMSTATE_UINT8(msix_used, PVSCSI_State),
+#endif
+                VMSTATE_UINT8(msi_used, PVSCSI_State),
+                VMSTATE_UINT64(reg_interrupt_status, PVSCSI_State),
+                VMSTATE_UINT64(reg_interrupt_enabled, PVSCSI_State),
+                VMSTATE_UINT64(reg_command_status, PVSCSI_State),
+                VMSTATE_UINT64(curr_cmd, PVSCSI_State),
+                VMSTATE_UINT32(curr_cmd_data_cntr, PVSCSI_State),
+                VMSTATE_UINT32_ARRAY(curr_cmd_data, PVSCSI_State,
+                    ARRAY_SIZE(((PVSCSI_State *)NULL)->curr_cmd_data)),
+                VMSTATE_UINT8(rings_info_valid, PVSCSI_State),
+
+                VMSTATE_UINT64(rings.rs_pa, PVSCSI_State),
+                VMSTATE_UINT32(rings.txr_len_mask, PVSCSI_State),
+                VMSTATE_UINT32(rings.rxr_len_mask, PVSCSI_State),
+                VMSTATE_UINT64_ARRAY(rings.req_ring_pages_pa, PVSCSI_State,
+                    PVSCSI_SETUP_RINGS_MAX_NUM_PAGES),
+                VMSTATE_UINT64_ARRAY(rings.cmp_ring_pages_pa, PVSCSI_State,
+                    PVSCSI_SETUP_RINGS_MAX_NUM_PAGES),
+                VMSTATE_UINT64(rings.consumed_ptr, PVSCSI_State),
+                VMSTATE_UINT64(rings.filled_cmp_ptr, PVSCSI_State),
+
+                VMSTATE_END_OF_LIST()
+            }
+};
+
+static void
+pvscsi_write_config(PCIDevice *pci, uint32_t addr, uint32_t val, int len)
+{
+    pci_default_write_config(pci, addr, val, len);
+    msi_write_config(pci, addr, val, len);
+#if defined(PVSCSI_ENABLE_MSIX)
+    msix_write_config(pci, addr, val, len);
+#endif
+}
+
+static void pvscsi_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->init = pvscsi_init;
+    k->exit = pvscsi_uninit;
+    k->vendor_id = PCI_VENDOR_ID_VMWARE;
+    k->device_id = PCI_DEVICE_ID_VMWARE_PVSCSI;
+    k->class_id = PCI_CLASS_STORAGE_SCSI;
+    k->subsystem_id = 0x1000;
+    dc->reset = pvscsi_reset;
+    dc->vmsd = &vmstate_pvscsi;
+    k->config_write = pvscsi_write_config;
+}
+
+static TypeInfo pvscsi_info = {
+    .name          = "pvscsi",
+    .parent        = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(PVSCSI_State),
+    .class_init    = pvscsi_class_init,
+};
+
+static void
+pvscsi_register_types(void)
+{
+    type_register_static(&pvscsi_info);
+
+    DCBPRINTF("PVSCSI QEMU device emulation registered");
+}
+
+type_init(pvscsi_register_types);
diff --git a/hw/pvscsi.h b/hw/pvscsi.h
new file mode 100644
index 0000000..611c549
--- /dev/null
+++ b/hw/pvscsi.h
@@ -0,0 +1,442 @@ 
+/*
+ * QEMU VMWARE PVSCSI paravirtual SCSI bus
+ *
+ * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Based on implementation by Paolo Bonzini
+ * http://lists.gnu.org/archive/html/qemu-devel/2011-08/msg00729.html
+ *
+ * Authors:
+ * Paolo Bonzini <pbonzini@redhat.com>
+ * Dmitry Fleytman <dmitry@daynix.com>
+ * Yan Vugenfirer <yvugenfi@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef _PVSCSI_H_
+#define _PVSCSI_H_
+
+#define PAGE_SIZE (4096)
+#define PAGE_SHIFT (12)
+
+#define PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT 128
+
+#define MASK(n)        ((1 << (n)) - 1)        /* make an n-bit mask */
+
+/*
+ * Following is an interface definition for
+ * PVSCSI device as provided by VMWARE
+ * See original copyright from Linux kernel v3.2.8
+ * header file drivers/scsi/vmw_pvscsi.h below.
+ */
+
+ /*
+  * VMware PVSCSI header file
+  *
+  * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
+  *
+  * This program is free software; you can redistribute it and/or modify it
+  * under the terms of the GNU General Public License as published by the
+  * Free Software Foundation; version 2 of the License and no later version.
+  *
+  * This program is distributed in the hope that it will be useful, but
+  * WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+  * NON INFRINGEMENT.  See the GNU General Public License for more
+  * details.
+  *
+  * You should have received a copy of the GNU General Public License
+  * along with this program; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+  *
+  * Maintained by: Alok N Kataria <address@hidden>
+  *
+  */
+
+/*
+ * host adapter status/error codes
+ */
+enum HostBusAdapterStatus {
+   BTSTAT_SUCCESS       = 0x00,  /* CCB complete normally with no errors */
+   BTSTAT_LINKED_COMMAND_COMPLETED           = 0x0a,
+   BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG = 0x0b,
+   BTSTAT_DATA_UNDERRUN = 0x0c,
+   BTSTAT_SELTIMEO      = 0x11,  /* SCSI selection timeout */
+   BTSTAT_DATARUN       = 0x12,  /* data overrun/underrun */
+   BTSTAT_BUSFREE       = 0x13,  /* unexpected bus free */
+   BTSTAT_INVPHASE      = 0x14,  /* invalid bus phase or sequence */
+                                 /* requested by target           */
+   BTSTAT_LUNMISMATCH   = 0x17,  /* linked CCB has different LUN  */
+                                 /* from first CCB                */
+   BTSTAT_SENSFAILED    = 0x1b,  /* auto request sense failed */
+   BTSTAT_TAGREJECT     = 0x1c,  /* SCSI II tagged queueing message */
+                                 /* rejected by target              */
+   BTSTAT_BADMSG        = 0x1d,  /* unsupported message received by */
+                                 /* the host adapter                */
+   BTSTAT_HAHARDWARE    = 0x20,  /* host adapter hardware failed */
+   BTSTAT_NORESPONSE    = 0x21,  /* target did not respond to SCSI ATN, */
+                                 /* sent a SCSI RST                     */
+   BTSTAT_SENTRST       = 0x22,  /* host adapter asserted a SCSI RST */
+   BTSTAT_RECVRST       = 0x23,  /* other SCSI devices asserted a SCSI RST */
+   BTSTAT_DISCONNECT    = 0x24,  /* target device reconnected improperly */
+                                 /* (w/o tag)                            */
+   BTSTAT_BUSRESET      = 0x25,  /* host adapter issued BUS device reset */
+   BTSTAT_ABORTQUEUE    = 0x26,  /* abort queue generated */
+   BTSTAT_HASOFTWARE    = 0x27,  /* host adapter software error */
+   BTSTAT_HATIMEOUT     = 0x30,  /* host adapter hardware timeout error */
+   BTSTAT_SCSIPARITY    = 0x34,  /* SCSI parity error detected */
+};
+
+/*
+ * Register offsets.
+ *
+ * These registers are accessible both via i/o space and mm i/o.
+ */
+
+enum PVSCSIRegOffset {
+    PVSCSI_REG_OFFSET_COMMAND        =    0x0,
+    PVSCSI_REG_OFFSET_COMMAND_DATA   =    0x4,
+    PVSCSI_REG_OFFSET_COMMAND_STATUS =    0x8,
+    PVSCSI_REG_OFFSET_LAST_STS_0     =  0x100,
+    PVSCSI_REG_OFFSET_LAST_STS_1     =  0x104,
+    PVSCSI_REG_OFFSET_LAST_STS_2     =  0x108,
+    PVSCSI_REG_OFFSET_LAST_STS_3     =  0x10c,
+    PVSCSI_REG_OFFSET_INTR_STATUS    = 0x100c,
+    PVSCSI_REG_OFFSET_INTR_MASK      = 0x2010,
+    PVSCSI_REG_OFFSET_KICK_NON_RW_IO = 0x3014,
+    PVSCSI_REG_OFFSET_DEBUG          = 0x3018,
+    PVSCSI_REG_OFFSET_KICK_RW_IO     = 0x4018,
+};
+
+/*
+ * Virtual h/w commands.
+ */
+
+enum PVSCSICommands {
+    PVSCSI_CMD_FIRST             = 0, /* has to be first */
+
+    PVSCSI_CMD_ADAPTER_RESET     = 1,
+    PVSCSI_CMD_ISSUE_SCSI        = 2,
+    PVSCSI_CMD_SETUP_RINGS       = 3,
+    PVSCSI_CMD_RESET_BUS         = 4,
+    PVSCSI_CMD_RESET_DEVICE      = 5,
+    PVSCSI_CMD_ABORT_CMD         = 6,
+    PVSCSI_CMD_CONFIG            = 7,
+    PVSCSI_CMD_SETUP_MSG_RING    = 8,
+    PVSCSI_CMD_DEVICE_UNPLUG     = 9,
+
+    PVSCSI_CMD_LAST              = 10  /* has to be last */
+};
+
+#define PVSCSI_COMMAND_PROCESSING_SUCCEEDED   (0)
+#define PVSCSI_COMMAND_PROCESSING_FAILED     (-1)
+
+/*
+ * Command descriptor for PVSCSI_CMD_RESET_DEVICE --
+ */
+
+struct PVSCSICmdDescResetDevice {
+    uint32_t    target;
+    uint8_t     lun[8];
+} QEMU_PACKED;
+
+/*
+ * Command descriptor for PVSCSI_CMD_ABORT_CMD --
+ *
+ * - currently does not support specifying the LUN.
+ * - _pad should be 0.
+ */
+
+struct PVSCSICmdDescAbortCmd {
+    uint64_t    context;
+    uint32_t    target;
+    uint32_t    _pad;
+} QEMU_PACKED;
+
+/*
+ * Command descriptor for PVSCSI_CMD_SETUP_RINGS --
+ *
+ * Notes:
+ * - reqRingNumPages and cmpRingNumPages need to be power of two.
+ * - reqRingNumPages and cmpRingNumPages need to be different from 0,
+ * - reqRingNumPages and cmpRingNumPages need to be inferior to
+ *   PVSCSI_SETUP_RINGS_MAX_NUM_PAGES.
+ */
+
+#define PVSCSI_SETUP_RINGS_MAX_NUM_PAGES        32
+struct PVSCSICmdDescSetupRings {
+    uint32_t    reqRingNumPages;
+    uint32_t    cmpRingNumPages;
+    uint64_t    ringsStatePPN;
+    uint64_t    reqRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
+    uint64_t    cmpRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
+} QEMU_PACKED;
+
+/*
+ * Command descriptor for PVSCSI_CMD_SETUP_MSG_RING --
+ *
+ * Notes:
+ * - this command was not supported in the initial revision of the h/w
+ *   interface. Before using it, you need to check that it is supported by
+ *   writing PVSCSI_CMD_SETUP_MSG_RING to the 'command' register, then
+ *   immediately after read the 'command status' register:
+ *       * a value of -1 means that the cmd is NOT supported,
+ *       * a value != -1 means that the cmd IS supported.
+ *   If it's supported the 'command status' register should return:
+ *      sizeof(PVSCSICmdDescSetupMsgRing) / sizeof(uint32_t).
+ * - this command should be issued _after_ the usual SETUP_RINGS so that the
+ *   RingsState page is already setup. If not, the command is a nop.
+ * - numPages needs to be a power of two,
+ * - numPages needs to be different from 0,
+ * - _pad should be zero.
+ */
+
+#define PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES  16
+
+struct PVSCSICmdDescSetupMsgRing {
+    uint32_t    numPages;
+    uint32_t    _pad;
+    uint64_t    ringPPNs[PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES];
+} QEMU_PACKED;
+
+enum PVSCSIMsgType {
+    PVSCSI_MSG_DEV_ADDED          = 0,
+    PVSCSI_MSG_DEV_REMOVED        = 1,
+    PVSCSI_MSG_LAST               = 2,
+};
+
+/*
+ * Msg descriptor.
+ *
+ * sizeof(struct PVSCSIRingMsgDesc) == 128.
+ *
+ * - type is of type enum PVSCSIMsgType.
+ * - the content of args depend on the type of event being delivered.
+ */
+
+struct PVSCSIRingMsgDesc {
+    uint32_t    type;
+    uint32_t    args[31];
+} QEMU_PACKED;
+
+struct PVSCSIMsgDescDevStatusChanged {
+    uint32_t    type;  /* PVSCSI_MSG_DEV _ADDED / _REMOVED */
+    uint32_t    bus;
+    uint32_t    target;
+    uint8_t     lun[8];
+    uint32_t    pad[27];
+} QEMU_PACKED;
+
+/*
+ * Rings state.
+ *
+ * - the fields:
+ *    . msgProdIdx,
+ *    . msgConsIdx,
+ *    . msgNumEntriesLog2,
+ *   .. are only used once the SETUP_MSG_RING cmd has been issued.
+ * - '_pad' helps to ensure that the msg related fields are on their own
+ *   cache-line.
+ */
+
+struct PVSCSIRingsState {
+    uint32_t    reqProdIdx;
+    uint32_t    reqConsIdx;
+    uint32_t    reqNumEntriesLog2;
+
+    uint32_t    cmpProdIdx;
+    uint32_t    cmpConsIdx;
+    uint32_t    cmpNumEntriesLog2;
+
+    uint8_t     _pad[104];
+
+    uint32_t    msgProdIdx;
+    uint32_t    msgConsIdx;
+    uint32_t    msgNumEntriesLog2;
+} QEMU_PACKED;
+
+/*
+ * Request descriptor.
+ *
+ * sizeof(RingReqDesc) = 128
+ *
+ * - context: is a unique identifier of a command. It could normally be any
+ *   64bit value, however we currently store it in the serialNumber variable
+ *   of struct SCSI_Command, so we have the following restrictions due to the
+ *   way this field is handled in the vmkernel storage stack:
+ *    * this value can't be 0,
+ *    * the upper 32bit need to be 0 since serialNumber is as a uint32_t.
+ *   Currently tracked as PR 292060.
+ * - dataLen: contains the total number of bytes that need to be transferred.
+ * - dataAddr:
+ *   * if PVSCSI_FLAG_CMD_WITH_SG_LIST is set: dataAddr is the PA of the first
+ *     s/g table segment, each s/g segment is entirely contained on a single
+ *     page of physical memory,
+ *   * if PVSCSI_FLAG_CMD_WITH_SG_LIST is NOT set, then dataAddr is the PA of
+ *     the buffer used for the DMA transfer,
+ * - flags:
+ *   * PVSCSI_FLAG_CMD_WITH_SG_LIST: see dataAddr above,
+ *   * PVSCSI_FLAG_CMD_DIR_NONE: no DMA involved,
+ *   * PVSCSI_FLAG_CMD_DIR_TOHOST: transfer from device to main memory,
+ *   * PVSCSI_FLAG_CMD_DIR_TODEVICE: transfer from main memory to device,
+ *   * PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB: reserved to handle CDBs larger than
+ *     16bytes. To be specified.
+ * - vcpuHint: vcpuId of the processor that will be most likely waiting for the
+ *   completion of the i/o. For guest OSes that use lowest priority message
+ *   delivery mode (such as windows), we use this "hint" to deliver the
+ *   completion action to the proper vcpu. For now, we can use the vcpuId of
+ *   the processor that initiated the i/o as a likely candidate for the vcpu
+ *   that will be waiting for the completion..
+ * - bus should be 0: we currently only support bus 0 for now.
+ * - unused should be zero'd.
+ */
+
+#define PVSCSI_FLAG_CMD_WITH_SG_LIST        (1 << 0)
+#define PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB     (1 << 1)
+#define PVSCSI_FLAG_CMD_DIR_NONE            (1 << 2)
+#define PVSCSI_FLAG_CMD_DIR_TOHOST          (1 << 3)
+#define PVSCSI_FLAG_CMD_DIR_TODEVICE        (1 << 4)
+
+#define PVSCSI_KNOWN_FLAGS \
+  (PVSCSI_FLAG_CMD_WITH_SG_LIST     | \
+   PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB  | \
+   PVSCSI_FLAG_CMD_DIR_NONE         | \
+   PVSCSI_FLAG_CMD_DIR_TOHOST       | \
+   PVSCSI_FLAG_CMD_DIR_TODEVICE)
+
+struct PVSCSIRingReqDesc {
+    uint64_t    context;
+    uint64_t    dataAddr;
+    uint64_t    dataLen;
+    uint64_t    senseAddr;
+    uint32_t    senseLen;
+    uint32_t    flags;
+    uint8_t     cdb[16];
+    uint8_t     cdbLen;
+    uint8_t     lun[8];
+    uint8_t     tag;
+    uint8_t     bus;
+    uint8_t     target;
+    uint8_t     vcpuHint;
+    uint8_t     unused[59];
+} QEMU_PACKED;
+
+typedef struct PVSCSIRingReqDesc PVSCSIRingReqDesc;
+
+/*
+ * Scatter-gather list management.
+ *
+ * As described above, when PVSCSI_FLAG_CMD_WITH_SG_LIST is set in the
+ * RingReqDesc.flags, then RingReqDesc.dataAddr is the PA of the first s/g
+ * table segment.
+ *
+ * - each segment of the s/g table contain a succession of struct
+ *   PVSCSISGElement.
+ * - each segment is entirely contained on a single physical page of memory.
+ * - a "chain" s/g element has the flag PVSCSI_SGE_FLAG_CHAIN_ELEMENT set in
+ *   PVSCSISGElement.flags and in this case:
+ *     * addr is the PA of the next s/g segment,
+ *     * length is undefined, assumed to be 0.
+ */
+
+struct PVSCSISGElement {
+    uint64_t    addr;
+    uint32_t    length;
+    uint32_t    flags;
+} QEMU_PACKED;
+
+/*
+ * Completion descriptor.
+ *
+ * sizeof(RingCmpDesc) = 32
+ *
+ * - context: identifier of the command. The same thing that was specified
+ *   under "context" as part of struct RingReqDesc at initiation time,
+ * - dataLen: number of bytes transferred for the actual i/o operation,
+ * - senseLen: number of bytes written into the sense buffer,
+ * - hostStatus: adapter status,
+ * - scsiStatus: device status,
+ * - _pad should be zero.
+ */
+
+struct PVSCSIRingCmpDesc {
+    uint64_t    context;
+    uint64_t    dataLen;
+    uint32_t    senseLen;
+    uint16_t    hostStatus;
+    uint16_t    scsiStatus;
+    uint32_t    _pad[2];
+} QEMU_PACKED;
+typedef struct PVSCSIRingCmpDesc PVSCSIRingCmpDesc;
+
+/*
+ * Interrupt status / IRQ bits.
+ */
+
+#define PVSCSI_INTR_CMPL_0                 (1 << 0)
+#define PVSCSI_INTR_CMPL_1                 (1 << 1)
+#define PVSCSI_INTR_CMPL_MASK              MASK(2)
+
+#define PVSCSI_INTR_MSG_0                  (1 << 2)
+#define PVSCSI_INTR_MSG_1                  (1 << 3)
+#define PVSCSI_INTR_MSG_MASK               (MASK(2) << 2)
+
+#define PVSCSI_INTR_ALL_SUPPORTED          MASK(4)
+
+/*
+ * Number of MSI-X vectors supported.
+ */
+#define PVSCSI_MAX_INTRS        24
+
+/*
+ * Enumeration of supported MSI-X vectors
+ */
+#define PVSCSI_VECTOR_COMPLETION   0
+
+/*
+ * Misc constants for the rings.
+ */
+
+#define PVSCSI_MAX_NUM_PAGES_REQ_RING   PVSCSI_SETUP_RINGS_MAX_NUM_PAGES
+#define PVSCSI_MAX_NUM_PAGES_CMP_RING   PVSCSI_SETUP_RINGS_MAX_NUM_PAGES
+#define PVSCSI_MAX_NUM_PAGES_MSG_RING   PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES
+
+#define PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE \
+                (PAGE_SIZE / sizeof(struct PVSCSIRingReqDesc))
+
+#define PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE \
+                (PAGE_SIZE / sizeof(PVSCSIRingCmpDesc))
+
+#define PVSCSI_MAX_REQ_QUEUE_DEPTH \
+    (PVSCSI_MAX_NUM_PAGES_REQ_RING * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE)
+
+#define PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES     1
+#define PVSCSI_MEM_SPACE_INTR_STATUS_NUM_PAGES 1
+#define PVSCSI_MEM_SPACE_MISC_NUM_PAGES        2
+#define PVSCSI_MEM_SPACE_KICK_IO_NUM_PAGES     2
+#define PVSCSI_MEM_SPACE_MSIX_NUM_PAGES        2
+
+enum PVSCSIMemSpace {
+    PVSCSI_MEM_SPACE_COMMAND_PAGE       = 0,
+    PVSCSI_MEM_SPACE_INTR_STATUS_PAGE   = 1,
+    PVSCSI_MEM_SPACE_MISC_PAGE          = 2,
+    PVSCSI_MEM_SPACE_KICK_IO_PAGE       = 4,
+    PVSCSI_MEM_SPACE_MSIX_TABLE_PAGE    = 6,
+    PVSCSI_MEM_SPACE_MSIX_PBA_PAGE      = 7,
+};
+
+#define PVSCSI_MEM_SPACE_NUM_PAGES \
+    (PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES +       \
+     PVSCSI_MEM_SPACE_INTR_STATUS_NUM_PAGES +   \
+     PVSCSI_MEM_SPACE_MISC_NUM_PAGES +          \
+     PVSCSI_MEM_SPACE_KICK_IO_NUM_PAGES +       \
+     PVSCSI_MEM_SPACE_MSIX_NUM_PAGES)
+
+#define PVSCSI_MEM_SPACE_SIZE        (PVSCSI_MEM_SPACE_NUM_PAGES * PAGE_SIZE)
+
+#endif /* _VMW_PVSCSI_H_ */