@@ -255,6 +255,7 @@ hw-obj-$(CONFIG_AHCI) += ide/ich.o
# SCSI layer
hw-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o
+hw-obj-$(CONFIG_VMWARE_PVSCSI_PCI) += vmw_pvscsi.o
hw-obj-$(CONFIG_ESP) += esp.o
hw-obj-y += dma-helpers.o sysbus.o isa-bus.o
@@ -8,6 +8,7 @@ CONFIG_EEPRO100_PCI=y
CONFIG_PCNET_PCI=y
CONFIG_PCNET_COMMON=y
CONFIG_LSI_SCSI_PCI=y
+CONFIG_VMWARE_PVSCSI_PCI=y
CONFIG_RTL8139_PCI=y
CONFIG_E1000_PCI=y
CONFIG_IDE_CORE=y
@@ -59,6 +59,7 @@
#define PCI_DEVICE_ID_VMWARE_NET 0x0720
#define PCI_DEVICE_ID_VMWARE_SCSI 0x0730
#define PCI_DEVICE_ID_VMWARE_IDE 0x1729
+#define PCI_DEVICE_ID_VMWARE_PVSCSI 0x07c0
/* Intel (0x8086) */
#define PCI_DEVICE_ID_INTEL_82551IT 0x1209
new file mode 100644
@@ -0,0 +1,911 @@
+/*
+ * VMware Paravirtualized SCSI Host Bus Adapter emulation
+ *
+ * Copyright (c) 2011 Red Hat, Inc.
+ * Written by Paolo Bonzini
+ *
+ * This code is licensed under GPLv2 or later.
+ */
+
+#include <assert.h>
+
+#include "hw.h"
+#include "pci.h"
+#include "scsi.h"
+#include "scsi-defs.h"
+#include "vmw_pvscsi.h"
+#include "block_int.h"
+#include "host-utils.h"
+#include "trace.h"
+
+#define PVSCSI_MAX_DEVS 127
+#define PAGE_SIZE 4096
+#define PAGE_SHIFT 12
+
+typedef struct PVSCSIRequest {
+ SCSIDevice *sdev;
+ uint8_t sensing;
+ uint8_t sense_key;
+ uint8_t completed;
+ int lun;
+ target_phys_addr_t sg_current_addr;
+ target_phys_addr_t sg_current_dataAddr;
+ uint32_t sg_current_resid;
+ uint64_t resid;
+ struct PVSCSIRingReqDesc req;
+ struct PVSCSIRingCmpDesc cmp;
+ QTAILQ_ENTRY(PVSCSIRequest) next;
+} PVSCSIRequest;
+
+typedef QTAILQ_HEAD(, PVSCSIRequest) PVSCSIRequestList;
+
+typedef struct {
+ PCIDevice dev;
+ SCSIBus bus;
+ QEMUBH *complete_reqs_bh;
+
+ int mmio_io_addr;
+
+ /* zeroed on reset */
+ uint32_t cmd_latch;
+ uint32_t cmd_buffer[sizeof(struct PVSCSICmdDescSetupRings)
+ / sizeof(uint32_t)];
+ uint32_t cmd_ptr;
+ uint32_t cmd_status;
+ uint32_t intr_status;
+ uint32_t intr_mask;
+ uint32_t intr_cmpl;
+ uint32_t intr_msg;
+ struct PVSCSICmdDescSetupRings rings;
+ struct PVSCSICmdDescSetupMsgRing msgRing;
+ uint32_t reqNumEntriesLog2;
+ uint32_t cmpNumEntriesLog2;
+ uint32_t msgNumEntriesLog2;
+
+ PVSCSIRequestList pending_queue;
+ PVSCSIRequestList complete_queue;
+} PVSCSIState;
+
+
+static inline int pvscsi_get_lun(uint8_t *lun)
+{
+ uint64_t lunval;
+ lunval = ((uint64_t)lun[0] << 56) || ((uint64_t)lun[1] << 48) ||
+ ((uint64_t)lun[2] << 40) || ((uint64_t)lun[3] << 32) ||
+ ((uint64_t)lun[4] << 24) || ((uint64_t)lun[5] << 16) ||
+ ((uint64_t)lun[6] << 8) || (uint64_t)lun[7];
+ if ((lunval & ~(uint64_t) 255) != 0) {
+ return -1;
+ }
+ return lunval & 255;
+}
+
+static inline int pvscsi_get_dev_lun(PVSCSIState *s,
+ uint8_t *lun, uint32_t target,
+ SCSIDevice **sdev)
+{
+ SCSIBus *bus = &s->bus;
+ int lunval;
+ *sdev = NULL;
+ if (target > PVSCSI_MAX_DEVS) {
+ return -1;
+ }
+ lunval = pvscsi_get_lun(lun);
+ if (lunval < 0) {
+ return -1;
+ }
+ *sdev = bus->devs[target];
+ if (!sdev) {
+ return -1;
+ }
+ return lunval;
+}
+
+
+/* Add a command to the pending queue. */
+static PVSCSIRequest *pvscsi_queue_request(PVSCSIState *s,
+ struct PVSCSIRingReqDesc *req)
+{
+ SCSIDevice *sdev;
+ PVSCSIRequest *p;
+ int lun;
+
+ trace_pvscsi_queue_request(req->context, req->cdb[0], req->dataLen);
+
+ p = qemu_mallocz(sizeof(*p));
+ p->req = *req;
+ p->cmp.context = p->req.context;
+ QTAILQ_INSERT_TAIL(&s->pending_queue, p, next);
+
+ lun = pvscsi_get_dev_lun(s, req->lun, req->target, &sdev);
+ if (!sdev) {
+ return p;
+ }
+
+ p->lun = lun;
+ p->sdev = sdev;
+ return p;
+}
+
+/* Get PVSCSIRequest for this tag. */
+static PVSCSIRequest *pvscsi_find_request(PVSCSIState *s, uint32_t tag)
+{
+ PVSCSIRequest *p;
+
+ QTAILQ_FOREACH(p, &s->pending_queue, next) {
+ if (p->req.context == tag) {
+ return p;
+ }
+ }
+ return NULL;
+}
+
+static void pvscsi_free_queue(PVSCSIRequestList *q)
+{
+ PVSCSIRequest *p;
+
+ while (!QTAILQ_EMPTY(q)) {
+ p = QTAILQ_FIRST(q);
+ QTAILQ_REMOVE(q, p, next);
+ qemu_free(p);
+ }
+}
+
+static void pvscsi_soft_reset(PVSCSIState *s)
+{
+ qbus_reset_all_fn(&s->bus);
+ pvscsi_free_queue(&s->complete_queue);
+ assert(QTAILQ_EMPTY(&s->pending_queue));
+ memset(&s->cmd_latch, 0, sizeof(*s) - offsetof(PVSCSIState, cmd_latch));
+ s->intr_cmpl = PVSCSI_INTR_CMPL_0;
+ s->intr_msg = PVSCSI_INTR_MSG_0;
+ QTAILQ_INIT(&s->pending_queue);
+ QTAILQ_INIT(&s->complete_queue);
+}
+
+
+static void pvscsi_raise_intr(PVSCSIState *s, int mask)
+{
+ int intr_raised = mask & ~s->intr_status;
+ s->intr_status |= mask;
+ trace_pvscsi_raise_intr(intr_raised,
+ (intr_raised & s->intr_mask) == 0 ? "masked" : "");
+ if (intr_raised & s->intr_mask) {
+ qemu_set_irq(s->dev.irq[0], 1);
+ }
+}
+
+static void pvscsi_acknowledge_intr(PVSCSIState *s, int mask)
+{
+ trace_pvscsi_acknowledge_intr(mask);
+ s->intr_status &= ~mask;
+ if (mask == s->intr_cmpl) {
+ s->intr_cmpl ^= PVSCSI_INTR_CMPL_MASK;
+
+ /* Try putting more complete requests on the ring. */
+ if (!QTAILQ_EMPTY(&s->complete_queue)) {
+ qemu_bh_schedule(s->complete_reqs_bh);
+ }
+ }
+ if (mask == s->intr_msg) {
+ s->intr_msg ^= PVSCSI_INTR_MSG_MASK;
+ }
+ if ((s->intr_status & s->intr_mask) == 0) {
+ qemu_set_irq(s->dev.irq[0], 0);
+ }
+}
+
+static void pvscsi_set_intr_mask(PVSCSIState *s, int mask)
+{
+ int intr_enabled = mask & ~s->intr_mask;
+ s->intr_mask = mask;
+ if (s->intr_status & intr_enabled) {
+ qemu_set_irq(s->dev.irq[0], 1);
+ }
+ if ((s->intr_status & mask) == 0) {
+ qemu_set_irq(s->dev.irq[0], 0);
+ }
+}
+
+
+#define pvscsi_ld_ring_state(s, field) \
+ ldl_phys(s->rings.ringsStatePPN * PAGE_SIZE + offsetof(struct PVSCSIRingsState, field))
+
+#define pvscsi_st_ring_state(s, field, val) \
+ stl_phys(s->rings.ringsStatePPN * PAGE_SIZE + offsetof(struct PVSCSIRingsState, field), \
+ val)
+
+/* Return number of free elements in the completion ring. */
+static inline int pvscsi_cmp_free(PVSCSIState *s)
+{
+ return ((1 << s->cmpNumEntriesLog2) - 1 -
+ (pvscsi_ld_ring_state(s, cmpProdIdx) - pvscsi_ld_ring_state(s, cmpConsIdx)));
+}
+
+/* Return number of pending elements in the request ring. */
+static inline int pvscsi_req_pending(PVSCSIState *s)
+{
+ return pvscsi_ld_ring_state(s, reqProdIdx) - pvscsi_ld_ring_state(s, reqConsIdx);
+}
+
+/* Return the physical address of the idx-th element in the ring
+ * whose physical page numbers are given by ppn. Each element in
+ * the ring has size bytes. */
+static target_phys_addr_t pvscsi_get_ring_addr(PVSCSIState *s, int idx,
+ int size, uint64_t *ppn)
+{
+ uint32_t ofs = idx * size;
+ return (ppn[ofs >> PAGE_SHIFT] * PAGE_SIZE) | (ofs & (PAGE_SIZE - 1));
+}
+
+
+#define barrier()
+
+/* Copy cmp_desc on the completion ring, assuming there is a free entry. */
+static void pvscsi_cmp_ring_put(PVSCSIState *s,
+ struct PVSCSIRingCmpDesc *cmp_desc)
+{
+ uint32_t cmp_entries = s->cmpNumEntriesLog2;
+ uint32_t val = pvscsi_ld_ring_state(s, cmpProdIdx);
+ uint32_t idx = val & MASK(cmp_entries);
+ target_phys_addr_t addr;
+
+ trace_pvscsi_cmp_ring_put(cmp_desc->context);
+ addr = pvscsi_get_ring_addr(s, idx, sizeof(struct PVSCSIRingCmpDesc),
+ s->rings.cmpRingPPNs);
+
+ barrier();
+ cpu_physical_memory_write(addr, (void *)cmp_desc, sizeof(*cmp_desc));
+ barrier();
+ pvscsi_st_ring_state(s, cmpProdIdx, val + 1);
+}
+
+/* Put all completed requests on the completion ring. */
+static void pvscsi_complete_reqs(void *opaque)
+{
+ PVSCSIState *s = opaque;
+ PVSCSIRequest *p;
+ int n = pvscsi_cmp_free(s);
+ int done = 0;
+ while (n > 0 && !QTAILQ_EMPTY(&s->complete_queue)) {
+ p = QTAILQ_FIRST(&s->complete_queue);
+ QTAILQ_REMOVE(&s->complete_queue, p, next);
+ pvscsi_cmp_ring_put(s, &p->cmp);
+ qemu_free(p);
+ n--;
+ done++;
+ }
+ if (done) {
+ pvscsi_raise_intr(s, s->intr_cmpl);
+ }
+}
+
+/* Prepare to put r on the completion ring. */
+static void pvscsi_complete_req(PVSCSIState *s, PVSCSIRequest *p)
+{
+ assert(!p->completed);
+ trace_pvscsi_complete_req(p->cmp.context, p->cmp.dataLen, p->sense_key);
+ p->completed = 1;
+ QTAILQ_REMOVE(&s->pending_queue, p, next);
+ QTAILQ_INSERT_TAIL(&s->complete_queue, p, next);
+ qemu_bh_schedule(s->complete_reqs_bh);
+}
+
+/* Fetch sense data for a completed request. */
+static bool pvscsi_send_request_sense(SCSIDevice *sdev, int tag, int lun)
+{
+ uint8_t cdb[6] = { 3, lun << 5, 0, 0, 96, 0 };
+ trace_pvscsi_request_sense(tag, lun);
+ int n = sdev->info->send_command(sdev, tag, cdb, lun);
+ if (n < 0) {
+ /* should not happen, just leave sense data empty in this case. */
+ sdev->info->cancel_io(sdev, tag);
+ } else if (n > 0) {
+ sdev->info->read_data(sdev, tag);
+ return true;
+ }
+ return false;
+}
+
+/* Write sense data for a completed request. */
+static void pvscsi_write_sense(PVSCSIRequest *p, uint8_t *buf, int len)
+{
+ p->cmp.senseLen = MIN(p->req.senseLen, len);
+ p->sense_key = buf[2];
+ cpu_physical_memory_write(p->req.senseAddr, buf, p->cmp.senseLen);
+}
+
+static void pvscsi_transfer_data_with_buffer(PVSCSIRequest *p, bool to_host,
+ uint8_t *buf, int len)
+{
+ if (len) {
+ cpu_physical_memory_rw(p->req.dataAddr, buf, len, to_host);
+ p->cmp.dataLen += len;
+ p->req.dataAddr += len;
+ p->resid -= len;
+ }
+}
+
+static void pvscsi_get_next_sg_elem(struct PVSCSIRequest *p)
+{
+ struct PVSCSISGElement elem;
+
+ for (;; p->sg_current_addr = elem.addr) {
+ cpu_physical_memory_read(p->sg_current_addr, (void *)&elem,
+ sizeof(elem));
+#if 0
+ /* PVSCSI_SGE_FLAG_CHAIN_ELEMENT not in the header file! */
+ if ((elem.flags & PVSCSI_SGE_FLAG_CHAIN_ELEMENT) == 0) {
+ break;
+ }
+#else
+ break;
+#endif
+ }
+
+ p->sg_current_addr += sizeof(elem);
+ p->sg_current_dataAddr = elem.addr;
+ p->sg_current_resid = elem.length;
+ trace_pvscsi_sg_elem(p->req.context, elem.addr, elem.length);
+}
+
+static void pvscsi_transfer_data_with_sg_list(PVSCSIRequest *p, bool to_host,
+ uint8_t *buf, int len)
+{
+ int n;
+ while (len) {
+ while (!p->sg_current_resid) {
+ pvscsi_get_next_sg_elem(p);
+ }
+ assert(len > 0);
+ n = MIN((unsigned) len, p->sg_current_resid);
+ if (n) {
+ cpu_physical_memory_rw(p->sg_current_dataAddr, buf, n, to_host);
+ }
+
+ buf += n;
+ p->cmp.dataLen += n;
+ p->sg_current_dataAddr += n;
+
+ len -= n;
+ p->resid -= n;
+ p->sg_current_resid -= n;
+ }
+}
+
+static bool pvscsi_transfer_data(PVSCSIRequest *p, void *buf, int len)
+{
+ int to_host = (p->req.flags & PVSCSI_FLAG_CMD_DIR_TOHOST) != 0;
+ if (len > p->resid) {
+ /* Do nothing upon underrun. */
+ return false;
+ }
+
+ trace_pvscsi_transfer_data(p->req.context, len);
+ if (p->req.flags & PVSCSI_FLAG_CMD_WITH_SG_LIST) {
+ pvscsi_transfer_data_with_sg_list(p, to_host, buf, len);
+ } else {
+ pvscsi_transfer_data_with_buffer(p, to_host, buf, len);
+ }
+ return true;
+}
+
+static void pvscsi_kick_device(PVSCSIRequest *p)
+{
+ if (p->req.flags & PVSCSI_FLAG_CMD_DIR_TODEVICE) {
+ p->sdev->info->write_data(p->sdev, p->req.context);
+ } else {
+ p->sdev->info->read_data(p->sdev, p->req.context);
+ }
+}
+
+/* Callback to indicate that the SCSI layer has completed a transfer. */
+static void pvscsi_command_complete(SCSIBus *bus, int reason, uint32_t tag,
+ uint32_t arg)
+{
+ PVSCSIState *s = DO_UPCAST(PVSCSIState, dev.qdev, bus->qbus.parent);
+ PVSCSIRequest *p = pvscsi_find_request(s, tag);
+ SCSIDevice *sdev;
+ uint8_t *buf;
+
+ if (!p) {
+ fprintf(stderr, "PVSCSI: Can't find request for tag 0x%x\n", tag);
+ return;
+ }
+
+ sdev = p->sdev;
+ if (reason == SCSI_REASON_DATA && !p->sensing) {
+ assert(p->resid);
+ if (!arg) {
+ /* Short transfer. */
+ sdev->info->cancel_io(sdev, tag);
+ p->cmp.hostStatus = BTSTAT_DATARUN;
+ p->cmp.scsiStatus = CHECK_CONDITION;
+ goto complete;
+ }
+
+ buf = sdev->info->get_buf(sdev, tag);
+ if (!pvscsi_transfer_data(p, buf, arg)) {
+ /* Small buffer. */
+ sdev->info->cancel_io(sdev, tag);
+ p->cmp.hostStatus = BTSTAT_DATARUN;
+ p->cmp.scsiStatus = CHECK_CONDITION;
+ goto complete;
+ }
+
+ pvscsi_kick_device(p);
+
+ /* We'll be called back asynchronously, exit. */
+ return;
+ }
+
+ /* Here to complete the request. */
+ if (reason == SCSI_REASON_DONE) {
+ p->cmp.scsiStatus = arg;
+ }
+
+complete:
+ if (p->sensing == 0 && p->cmp.scsiStatus == CHECK_CONDITION) {
+ p->sensing = 1;
+ if (pvscsi_send_request_sense(sdev, tag, p->lun)) {
+ return;
+ }
+
+ } else if (p->sensing == 1 && reason == SCSI_REASON_DATA) {
+ /* Got sense data. Write it back and kick the device to complete
+ * the request. */
+ if (arg) {
+ buf = sdev->info->get_buf(sdev, tag);
+ pvscsi_write_sense(p, buf, arg);
+ if (buf[2] == NO_SENSE) {
+ p->cmp.scsiStatus = GOOD;
+ }
+ }
+ p->sensing = 2;
+ pvscsi_kick_device(p);
+ return;
+ }
+
+ pvscsi_complete_req(s, p);
+}
+
+
+/* Process a request from the request ring. */
+static void pvscsi_process_req(PVSCSIState *s, struct PVSCSIRingReqDesc *r)
+{
+ PVSCSIRequest *p = pvscsi_queue_request(s, r);
+ int64_t datalen, n;
+
+ if (!p->sdev) {
+ p->cmp.hostStatus = BTSTAT_SELTIMEO;
+ goto fail_nocancel;
+ }
+
+ if (r->flags & PVSCSI_FLAG_CMD_WITH_SG_LIST) {
+ p->sg_current_addr = r->dataAddr;
+ }
+
+ n = p->sdev->info->send_command(p->sdev, r->context, r->cdb, p->lun);
+ if ((n > 0) && (r->flags & PVSCSI_FLAG_CMD_DIR_TODEVICE)) {
+ p->cmp.hostStatus = BTSTAT_BADMSG;
+ goto fail;
+ }
+ if ((n < 0) && (r->flags & PVSCSI_FLAG_CMD_DIR_TOHOST)) {
+ p->cmp.hostStatus = BTSTAT_BADMSG;
+ goto fail;
+ }
+
+ datalen = (n < 0 ? -n : n);
+ p->resid = MIN(n, r->dataLen);
+ if (n) {
+ pvscsi_kick_device(p);
+ }
+ return;
+
+fail:
+ p->sdev->info->cancel_io(p->sdev, r->context);
+fail_nocancel:
+ pvscsi_complete_req(s, p);
+}
+
+/* Process pending requests on the request ring. */
+static void pvscsi_process_req_ring(PVSCSIState *s)
+{
+ uint32_t req_entries = s->reqNumEntriesLog2;
+
+ trace_pvscsi_kick_io();
+ while (pvscsi_req_pending(s)) {
+ uint32_t val = pvscsi_ld_ring_state(s, reqConsIdx);
+ uint32_t idx = val & MASK(req_entries);
+ target_phys_addr_t addr;
+ struct PVSCSIRingReqDesc req_desc;
+
+ addr = pvscsi_get_ring_addr(s, idx, sizeof(struct PVSCSIRingReqDesc),
+ s->rings.reqRingPPNs);
+
+ barrier();
+ cpu_physical_memory_read(addr, (void *)&req_desc, sizeof(req_desc));
+ pvscsi_process_req(s, &req_desc);
+ barrier();
+ pvscsi_st_ring_state(s, reqConsIdx, val + 1);
+ }
+}
+
+
+static int32_t pvscsi_cmd_bad(PVSCSIState *s)
+{
+ fprintf(stderr, "vmw_pvscsi: bad command %d\n", s->cmd_latch);
+ return -1;
+}
+
+static int32_t pvscsi_cmd_unimpl(PVSCSIState *s)
+{
+ fprintf(stderr, "vmw_pvscsi: unimplemented command %d\n", s->cmd_latch);
+ return -1;
+}
+
+static int32_t pvscsi_cmd_adapter_reset(PVSCSIState *s)
+{
+ pvscsi_soft_reset(s);
+ return 0;
+}
+
+static int floor_log2(int x)
+{
+ assert(x);
+ return 31 - clz32(x);
+}
+
+/* Setup ring buffers and initialize the ring state page. */
+static int32_t pvscsi_cmd_setup_rings(PVSCSIState *s)
+{
+ memcpy(&s->rings, s->cmd_buffer, sizeof(s->rings));
+ if (s->rings.reqRingNumPages == 0 ||
+ s->rings.cmpRingNumPages == 0) {
+ return -1;
+ }
+
+ s->reqNumEntriesLog2 = floor_log2(s->rings.reqRingNumPages * PAGE_SIZE
+ / sizeof(struct PVSCSIRingReqDesc));
+ s->cmpNumEntriesLog2 = floor_log2(s->rings.cmpRingNumPages * PAGE_SIZE
+ / sizeof(struct PVSCSIRingCmpDesc));
+
+ trace_pvscsi_setup_req_ring(s->rings.reqRingNumPages,
+ 1 << s->reqNumEntriesLog2);
+ trace_pvscsi_setup_cmp_ring(s->rings.cmpRingNumPages,
+ 1 << s->cmpNumEntriesLog2);
+
+ pvscsi_st_ring_state(s, reqNumEntriesLog2, s->reqNumEntriesLog2);
+ pvscsi_st_ring_state(s, cmpNumEntriesLog2, s->cmpNumEntriesLog2);
+ pvscsi_st_ring_state(s, cmpProdIdx, 0);
+ pvscsi_st_ring_state(s, cmpConsIdx, 0);
+ pvscsi_st_ring_state(s, reqProdIdx, 0);
+ pvscsi_st_ring_state(s, reqConsIdx, 0);
+ return 0;
+}
+
+static int32_t pvscsi_cmd_reset_bus(PVSCSIState *s)
+{
+ qbus_reset_all_fn(&s->bus);
+ return 0;
+}
+
+static int32_t pvscsi_cmd_reset_device(PVSCSIState *s)
+{
+ struct PVSCSICmdDescResetDevice *cmd =
+ (struct PVSCSICmdDescResetDevice *) &s->cmd_buffer;
+ SCSIDevice *sdev;
+
+ pvscsi_get_dev_lun(s, cmd->lun, cmd->target, &sdev);
+ if (sdev != NULL && sdev->info->qdev.reset) {
+ sdev->info->qdev.reset(&sdev->qdev);
+ }
+
+ return 0;
+}
+
+static int32_t pvscsi_cmd_abort_cmd(PVSCSIState *s)
+{
+ return 0;
+}
+
+static int32_t pvscsi_cmd_setup_msg_ring(PVSCSIState *s)
+{
+ memcpy(&s->msgRing, s->cmd_buffer, sizeof(s->msgRing));
+ if (s->msgRing.numPages == 0) {
+ return -1;
+ }
+
+ s->msgNumEntriesLog2 = floor_log2(s->msgRing.numPages * PAGE_SIZE
+ / sizeof(struct PVSCSIRingMsgDesc));
+
+ trace_pvscsi_setup_msg_ring(s->msgRing.numPages,
+ 1 << s->msgNumEntriesLog2);
+
+ pvscsi_st_ring_state(s, msgNumEntriesLog2, s->msgNumEntriesLog2);
+ pvscsi_st_ring_state(s, msgProdIdx, 0);
+ pvscsi_st_ring_state(s, msgConsIdx, 0);
+ return 0;
+}
+
+typedef struct {
+ int nargs;
+ int32_t (*fn)(PVSCSIState *);
+} PVSCSICmd;
+
+static const PVSCSICmd pvscsi_commands[PVSCSI_CMD_LAST] = {
+ [PVSCSI_CMD_FIRST] = {
+ .nargs = 0,
+ .fn = pvscsi_cmd_bad,
+ },
+ [PVSCSI_CMD_ADAPTER_RESET] = {
+ .nargs = 0,
+ .fn = pvscsi_cmd_adapter_reset
+ },
+ [PVSCSI_CMD_ISSUE_SCSI] = {
+ .nargs = 0, /* unknown */
+ .fn = pvscsi_cmd_unimpl
+ },
+ [PVSCSI_CMD_SETUP_RINGS] = {
+ .nargs = sizeof(struct PVSCSICmdDescSetupRings) / sizeof(uint32_t),
+ .fn = pvscsi_cmd_setup_rings
+ },
+ [PVSCSI_CMD_RESET_BUS] = {
+ .nargs = 0,
+ .fn = pvscsi_cmd_reset_bus
+ },
+ [PVSCSI_CMD_RESET_DEVICE] = {
+ .nargs = sizeof(struct PVSCSICmdDescResetDevice) / sizeof(uint32_t),
+ .fn = pvscsi_cmd_reset_device
+ },
+ [PVSCSI_CMD_ABORT_CMD] = {
+ .nargs = sizeof(struct PVSCSICmdDescAbortCmd) / sizeof(uint32_t),
+ .fn = pvscsi_cmd_abort_cmd
+ },
+ [PVSCSI_CMD_CONFIG] = {
+ .nargs = 0, /* unknown */
+ .fn = pvscsi_cmd_unimpl
+ },
+ [PVSCSI_CMD_SETUP_MSG_RING] = {
+ .nargs = sizeof(struct PVSCSICmdDescSetupMsgRing) / sizeof(uint32_t),
+ .fn = pvscsi_cmd_setup_msg_ring
+ },
+ [PVSCSI_CMD_DEVICE_UNPLUG] = {
+ .nargs = 0, /* unknown */
+ .fn = pvscsi_cmd_unimpl
+ }
+};
+
+
+static void pvscsi_maybe_do_cmd(PVSCSIState *s)
+{
+ int cmd = s->cmd_latch >= PVSCSI_CMD_LAST ? PVSCSI_CMD_FIRST : s->cmd_latch;
+ const PVSCSICmd *cmd_info = &pvscsi_commands[cmd];
+
+ if (s->cmd_ptr >= cmd_info->nargs) {
+ s->cmd_status = cmd_info->fn(s);
+ s->cmd_latch = 0;
+ s->cmd_ptr = 0;
+ }
+}
+
+static uint32_t pvscsi_reg_readl(PVSCSIState *s, int offset)
+{
+ switch (offset) {
+ case PVSCSI_REG_OFFSET_COMMAND:
+ case PVSCSI_REG_OFFSET_COMMAND_DATA:
+ case PVSCSI_REG_OFFSET_KICK_NON_RW_IO:
+ case PVSCSI_REG_OFFSET_KICK_RW_IO:
+ fprintf(stderr, "vmw_pvscsi: read to write-only register %x\n", offset);
+ break;
+ case PVSCSI_REG_OFFSET_COMMAND_STATUS:
+ return s->cmd_status;
+ break;
+ case PVSCSI_REG_OFFSET_INTR_STATUS:
+ return s->intr_status;
+ break;
+ case PVSCSI_REG_OFFSET_INTR_MASK:
+ return s->intr_mask;
+ break;
+ case PVSCSI_REG_OFFSET_LAST_STS_0:
+ case PVSCSI_REG_OFFSET_LAST_STS_1:
+ case PVSCSI_REG_OFFSET_LAST_STS_2:
+ case PVSCSI_REG_OFFSET_LAST_STS_3:
+ case PVSCSI_REG_OFFSET_DEBUG:
+ fprintf(stderr, "vmw_pvscsi: read from unsupported register %x\n", offset);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void pvscsi_reg_write(PVSCSIState *s, int offset, uint32_t val, int size)
+{
+ if (size != 4) {
+ switch (offset) {
+ case PVSCSI_REG_OFFSET_COMMAND:
+ case PVSCSI_REG_OFFSET_COMMAND_DATA:
+ case PVSCSI_REG_OFFSET_COMMAND_STATUS:
+ case PVSCSI_REG_OFFSET_INTR_STATUS:
+ case PVSCSI_REG_OFFSET_INTR_MASK:
+ abort();
+ default:
+ break;
+ }
+ }
+
+ switch (offset) {
+ case PVSCSI_REG_OFFSET_COMMAND:
+ trace_pvscsi_cmd(val);
+ s->cmd_latch = val;
+ s->cmd_ptr = 0;
+ pvscsi_maybe_do_cmd(s);
+ break;
+ case PVSCSI_REG_OFFSET_COMMAND_DATA:
+ s->cmd_buffer[s->cmd_ptr++] = val;
+ pvscsi_maybe_do_cmd(s);
+ break;
+ case PVSCSI_REG_OFFSET_COMMAND_STATUS:
+ fprintf(stderr, "vmw_pvscsi: write to read-only register %x\n", offset);
+ break;
+ case PVSCSI_REG_OFFSET_INTR_STATUS:
+ pvscsi_acknowledge_intr(s, val);
+ break;
+ case PVSCSI_REG_OFFSET_INTR_MASK:
+ pvscsi_set_intr_mask(s, val);
+ break;
+ case PVSCSI_REG_OFFSET_KICK_NON_RW_IO:
+ case PVSCSI_REG_OFFSET_KICK_RW_IO:
+ pvscsi_process_req_ring(s);
+ break;
+
+ case PVSCSI_REG_OFFSET_LAST_STS_0:
+ case PVSCSI_REG_OFFSET_LAST_STS_1:
+ case PVSCSI_REG_OFFSET_LAST_STS_2:
+ case PVSCSI_REG_OFFSET_LAST_STS_3:
+ case PVSCSI_REG_OFFSET_DEBUG:
+ fprintf(stderr, "vmw_pvscsi: write to unsupported register %x\n", offset);
+ break;
+ default:
+ break;
+ }
+}
+
+static void pvscsi_mmio_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ PVSCSIState *s = opaque;
+
+ addr &= PVSCSI_MEM_SPACE_SIZE - 1;
+ pvscsi_reg_write(s, addr, val, 1);
+}
+
+static void pvscsi_mmio_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ PVSCSIState *s = opaque;
+
+ addr &= PVSCSI_MEM_SPACE_SIZE - 1;
+ pvscsi_reg_write(s, addr, val, 2);
+}
+
+static void pvscsi_mmio_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ PVSCSIState *s = opaque;
+
+ addr &= PVSCSI_MEM_SPACE_SIZE - 1;
+ pvscsi_reg_write(s, addr, val, 4);
+}
+
+static uint32_t pvscsi_mmio_readb(void *opaque, target_phys_addr_t addr)
+{
+ abort();
+}
+
+static uint32_t pvscsi_mmio_readw(void *opaque, target_phys_addr_t addr)
+{
+ abort();
+}
+
+static uint32_t pvscsi_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+ PVSCSIState *s = opaque;
+
+ addr &= PVSCSI_MEM_SPACE_SIZE - 1;
+ return pvscsi_reg_readl(s, addr);
+}
+
+static CPUReadMemoryFunc * const pvscsi_mmio_readfn[3] = {
+ pvscsi_mmio_readb,
+ pvscsi_mmio_readw,
+ pvscsi_mmio_readl,
+};
+
+static CPUWriteMemoryFunc * const pvscsi_mmio_writefn[3] = {
+ pvscsi_mmio_writeb,
+ pvscsi_mmio_writew,
+ pvscsi_mmio_writel,
+};
+
+static void pvscsi_mmio_mapfunc(PCIDevice *pci_dev, int region_num,
+ pcibus_t addr, pcibus_t size, int type)
+{
+ PVSCSIState *s = DO_UPCAST(PVSCSIState, dev, pci_dev);
+
+ cpu_register_physical_memory(addr, PVSCSI_MEM_SPACE_SIZE, s->mmio_io_addr);
+}
+
+static void pvscsi_reset(DeviceState *dev)
+{
+ PVSCSIState *s = DO_UPCAST(PVSCSIState, dev.qdev, dev);
+
+ pvscsi_soft_reset(s);
+}
+
+static int pvscsi_uninit(PCIDevice *d)
+{
+ PVSCSIState *s = DO_UPCAST(PVSCSIState, dev, d);
+
+ cpu_unregister_io_memory(s->mmio_io_addr);
+
+ return 0;
+}
+
+static int pvscsi_init(PCIDevice *dev)
+{
+ PVSCSIState *s = DO_UPCAST(PVSCSIState, dev, dev);
+ uint8_t *pci_conf;
+
+ pci_conf = s->dev.config;
+
+ pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_VMWARE);
+ pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_VMWARE_PVSCSI);
+ pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_SCSI);
+
+ /* PCI subsystem ID */
+ pci_conf[PCI_SUBSYSTEM_ID] = 0x00;
+ pci_conf[PCI_SUBSYSTEM_ID + 1] = 0x10;
+
+ /* PCI latency timer = 255 */
+ pci_conf[PCI_LATENCY_TIMER] = 0xff;
+
+ /* Interrupt pin 1 */
+ pci_conf[PCI_INTERRUPT_PIN] = 0x01;
+
+ s->mmio_io_addr = cpu_register_io_memory(pvscsi_mmio_readfn,
+ pvscsi_mmio_writefn, s,
+ DEVICE_NATIVE_ENDIAN);
+ pci_register_bar(&s->dev, 0, PVSCSI_MEM_SPACE_SIZE,
+ PCI_BASE_ADDRESS_SPACE_MEMORY, pvscsi_mmio_mapfunc);
+
+#if 0
+ s->pio_io_addr = cpu_register_io_memory(pvscsi_mmio_readfn,
+ pvscsi_mmio_writefn, s,
+ DEVICE_NATIVE_ENDIAN);
+ pci_register_bar(&s->dev, 1, 256,
+ PCI_BASE_ADDRESS_SPACE_IO, pvscsi_io_mapfunc);
+#endif
+
+ s->complete_reqs_bh = qemu_bh_new(pvscsi_complete_reqs, s);
+
+ scsi_bus_new(&s->bus, &dev->qdev, 1, PVSCSI_MAX_DEVS,
+ pvscsi_command_complete);
+ if (!dev->qdev.hotplugged) {
+ return scsi_bus_legacy_handle_cmdline(&s->bus);
+ }
+ return 0;
+}
+
+static PCIDeviceInfo pvscsi_info = {
+ .qdev.name = "vmw_pvscsi",
+ .qdev.size = sizeof(PVSCSIState),
+ .qdev.reset = pvscsi_reset,
+ .init = pvscsi_init,
+ .exit = pvscsi_uninit,
+};
+
+static void vmw_pvscsi_register_devices(void)
+{
+ pci_qdev_register(&pvscsi_info);
+}
+
+device_init(vmw_pvscsi_register_devices);
new file mode 100644
@@ -0,0 +1,389 @@
+/*
+ * VMware PVSCSI header file
+ *
+ * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _VMW_PVSCSI_H_
+#define _VMW_PVSCSI_H_
+
+#define PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT 128
+
+#define MASK(n) ((1 << (n)) - 1) /* make an n-bit mask */
+
+#define __packed __attribute__((packed))
+
+/*
+ * host adapter status/error codes
+ */
+enum HostBusAdapterStatus {
+ BTSTAT_SUCCESS = 0x00, /* CCB complete normally with no errors */
+ BTSTAT_LINKED_COMMAND_COMPLETED = 0x0a,
+ BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG = 0x0b,
+ BTSTAT_DATA_UNDERRUN = 0x0c,
+ BTSTAT_SELTIMEO = 0x11, /* SCSI selection timeout */
+ BTSTAT_DATARUN = 0x12, /* data overrun/underrun */
+ BTSTAT_BUSFREE = 0x13, /* unexpected bus free */
+ BTSTAT_INVPHASE = 0x14, /* invalid bus phase or sequence requested by target */
+ BTSTAT_LUNMISMATCH = 0x17, /* linked CCB has different LUN from first CCB */
+ BTSTAT_SENSFAILED = 0x1b, /* auto request sense failed */
+ BTSTAT_TAGREJECT = 0x1c, /* SCSI II tagged queueing message rejected by target */
+ BTSTAT_BADMSG = 0x1d, /* unsupported message received by the host adapter */
+ BTSTAT_HAHARDWARE = 0x20, /* host adapter hardware failed */
+ BTSTAT_NORESPONSE = 0x21, /* target did not respond to SCSI ATN, sent a SCSI RST */
+ BTSTAT_SENTRST = 0x22, /* host adapter asserted a SCSI RST */
+ BTSTAT_RECVRST = 0x23, /* other SCSI devices asserted a SCSI RST */
+ BTSTAT_DISCONNECT = 0x24, /* target device reconnected improperly (w/o tag) */
+ BTSTAT_BUSRESET = 0x25, /* host adapter issued BUS device reset */
+ BTSTAT_ABORTQUEUE = 0x26, /* abort queue generated */
+ BTSTAT_HASOFTWARE = 0x27, /* host adapter software error */
+ BTSTAT_HATIMEOUT = 0x30, /* host adapter hardware timeout error */
+ BTSTAT_SCSIPARITY = 0x34, /* SCSI parity error detected */
+};
+
+/*
+ * Register offsets.
+ *
+ * These registers are accessible both via i/o space and mm i/o.
+ */
+
+enum PVSCSIRegOffset {
+ PVSCSI_REG_OFFSET_COMMAND = 0x0,
+ PVSCSI_REG_OFFSET_COMMAND_DATA = 0x4,
+ PVSCSI_REG_OFFSET_COMMAND_STATUS = 0x8,
+ PVSCSI_REG_OFFSET_LAST_STS_0 = 0x100,
+ PVSCSI_REG_OFFSET_LAST_STS_1 = 0x104,
+ PVSCSI_REG_OFFSET_LAST_STS_2 = 0x108,
+ PVSCSI_REG_OFFSET_LAST_STS_3 = 0x10c,
+ PVSCSI_REG_OFFSET_INTR_STATUS = 0x100c,
+ PVSCSI_REG_OFFSET_INTR_MASK = 0x2010,
+ PVSCSI_REG_OFFSET_KICK_NON_RW_IO = 0x3014,
+ PVSCSI_REG_OFFSET_DEBUG = 0x3018,
+ PVSCSI_REG_OFFSET_KICK_RW_IO = 0x4018,
+};
+
+/*
+ * Virtual h/w commands.
+ */
+
+enum PVSCSICommands {
+ PVSCSI_CMD_FIRST = 0, /* has to be first */
+
+ PVSCSI_CMD_ADAPTER_RESET = 1,
+ PVSCSI_CMD_ISSUE_SCSI = 2,
+ PVSCSI_CMD_SETUP_RINGS = 3,
+ PVSCSI_CMD_RESET_BUS = 4,
+ PVSCSI_CMD_RESET_DEVICE = 5,
+ PVSCSI_CMD_ABORT_CMD = 6,
+ PVSCSI_CMD_CONFIG = 7,
+ PVSCSI_CMD_SETUP_MSG_RING = 8,
+ PVSCSI_CMD_DEVICE_UNPLUG = 9,
+
+ PVSCSI_CMD_LAST = 10 /* has to be last */
+};
+
+/*
+ * Command descriptor for PVSCSI_CMD_RESET_DEVICE --
+ */
+
+struct PVSCSICmdDescResetDevice {
+ uint32_t target;
+ uint8_t lun[8];
+} __packed;
+
+/*
+ * Command descriptor for PVSCSI_CMD_ABORT_CMD --
+ *
+ * - currently does not support specifying the LUN.
+ * - _pad should be 0.
+ */
+
+struct PVSCSICmdDescAbortCmd {
+ uint64_t context;
+ uint32_t target;
+ uint32_t _pad;
+} __packed;
+
+/*
+ * Command descriptor for PVSCSI_CMD_SETUP_RINGS --
+ *
+ * Notes:
+ * - reqRingNumPages and cmpRingNumPages need to be power of two.
+ * - reqRingNumPages and cmpRingNumPages need to be different from 0,
+ * - reqRingNumPages and cmpRingNumPages need to be inferior to
+ * PVSCSI_SETUP_RINGS_MAX_NUM_PAGES.
+ */
+
+#define PVSCSI_SETUP_RINGS_MAX_NUM_PAGES 32
+struct PVSCSICmdDescSetupRings {
+ uint32_t reqRingNumPages;
+ uint32_t cmpRingNumPages;
+ uint64_t ringsStatePPN;
+ uint64_t reqRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
+ uint64_t cmpRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
+} __packed;
+
+/*
+ * Command descriptor for PVSCSI_CMD_SETUP_MSG_RING --
+ *
+ * Notes:
+ * - this command was not supported in the initial revision of the h/w
+ * interface. Before using it, you need to check that it is supported by
+ * writing PVSCSI_CMD_SETUP_MSG_RING to the 'command' register, then
+ * immediately after read the 'command status' register:
+ * * a value of -1 means that the cmd is NOT supported,
+ * * a value != -1 means that the cmd IS supported.
+ * If it's supported the 'command status' register should return:
+ * sizeof(PVSCSICmdDescSetupMsgRing) / sizeof(uint32_t).
+ * - this command should be issued _after_ the usual SETUP_RINGS so that the
+ * RingsState page is already setup. If not, the command is a nop.
+ * - numPages needs to be a power of two,
+ * - numPages needs to be different from 0,
+ * - _pad should be zero.
+ */
+
+#define PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES 16
+
+struct PVSCSICmdDescSetupMsgRing {
+ uint32_t numPages;
+ uint32_t _pad;
+ uint64_t ringPPNs[PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES];
+} __packed;
+
+enum PVSCSIMsgType {
+ PVSCSI_MSG_DEV_ADDED = 0,
+ PVSCSI_MSG_DEV_REMOVED = 1,
+ PVSCSI_MSG_LAST = 2,
+};
+
+/*
+ * Msg descriptor.
+ *
+ * sizeof(struct PVSCSIRingMsgDesc) == 128.
+ *
+ * - type is of type enum PVSCSIMsgType.
+ * - the content of args depend on the type of event being delivered.
+ */
+
+struct PVSCSIRingMsgDesc {
+ uint32_t type;
+ uint32_t args[31];
+} __packed;
+
+struct PVSCSIMsgDescDevStatusChanged {
+ uint32_t type; /* PVSCSI_MSG_DEV _ADDED / _REMOVED */
+ uint32_t bus;
+ uint32_t target;
+ uint8_t lun[8];
+ uint32_t pad[27];
+} __packed;
+
+/*
+ * Rings state.
+ *
+ * - the fields:
+ * . msgProdIdx,
+ * . msgConsIdx,
+ * . msgNumEntriesLog2,
+ * .. are only used once the SETUP_MSG_RING cmd has been issued.
+ * - '_pad' helps to ensure that the msg related fields are on their own
+ * cache-line.
+ */
+
+struct PVSCSIRingsState {
+ uint32_t reqProdIdx;
+ uint32_t reqConsIdx;
+ uint32_t reqNumEntriesLog2;
+
+ uint32_t cmpProdIdx;
+ uint32_t cmpConsIdx;
+ uint32_t cmpNumEntriesLog2;
+
+ uint8_t _pad[104];
+
+ uint32_t msgProdIdx;
+ uint32_t msgConsIdx;
+ uint32_t msgNumEntriesLog2;
+} __packed;
+
+/*
+ * Request descriptor.
+ *
+ * sizeof(RingReqDesc) = 128
+ *
+ * - context: is a unique identifier of a command. It could normally be any
+ * 64bit value, however we currently store it in the serialNumber variable
+ * of struct SCSI_Command, so we have the following restrictions due to the
+ * way this field is handled in the vmkernel storage stack:
+ * * this value can't be 0,
+ * * the upper 32bit need to be 0 since serialNumber is as a uint32_t.
+ * Currently tracked as PR 292060.
+ * - dataLen: contains the total number of bytes that need to be transferred.
+ * - dataAddr:
+ * * if PVSCSI_FLAG_CMD_WITH_SG_LIST is set: dataAddr is the PA of the first
+ * s/g table segment, each s/g segment is entirely contained on a single
+ * page of physical memory,
+ * * if PVSCSI_FLAG_CMD_WITH_SG_LIST is NOT set, then dataAddr is the PA of
+ * the buffer used for the DMA transfer,
+ * - flags:
+ * * PVSCSI_FLAG_CMD_WITH_SG_LIST: see dataAddr above,
+ * * PVSCSI_FLAG_CMD_DIR_NONE: no DMA involved,
+ * * PVSCSI_FLAG_CMD_DIR_TOHOST: transfer from device to main memory,
+ * * PVSCSI_FLAG_CMD_DIR_TODEVICE: transfer from main memory to device,
+ * * PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB: reserved to handle CDBs larger than
+ * 16bytes. To be specified.
+ * - vcpuHint: vcpuId of the processor that will be most likely waiting for the
+ * completion of the i/o. For guest OSes that use lowest priority message
+ * delivery mode (such as windows), we use this "hint" to deliver the
+ * completion action to the proper vcpu. For now, we can use the vcpuId of
+ * the processor that initiated the i/o as a likely candidate for the vcpu
+ * that will be waiting for the completion..
+ * - bus should be 0: we currently only support bus 0 for now.
+ * - unused should be zero'd.
+ */
+
+#define PVSCSI_FLAG_CMD_WITH_SG_LIST (1 << 0)
+#define PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB (1 << 1)
+#define PVSCSI_FLAG_CMD_DIR_NONE (1 << 2)
+#define PVSCSI_FLAG_CMD_DIR_TOHOST (1 << 3)
+#define PVSCSI_FLAG_CMD_DIR_TODEVICE (1 << 4)
+
+struct PVSCSIRingReqDesc {
+ uint64_t context;
+ uint64_t dataAddr;
+ uint64_t dataLen;
+ uint64_t senseAddr;
+ uint32_t senseLen;
+ uint32_t flags;
+ uint8_t cdb[16];
+ uint8_t cdbLen;
+ uint8_t lun[8];
+ uint8_t tag;
+ uint8_t bus;
+ uint8_t target;
+ uint8_t vcpuHint;
+ uint8_t unused[59];
+} __packed;
+
+/*
+ * Scatter-gather list management.
+ *
+ * As described above, when PVSCSI_FLAG_CMD_WITH_SG_LIST is set in the
+ * RingReqDesc.flags, then RingReqDesc.dataAddr is the PA of the first s/g
+ * table segment.
+ *
+ * - each segment of the s/g table contain a succession of struct
+ * PVSCSISGElement.
+ * - each segment is entirely contained on a single physical page of memory.
+ * - a "chain" s/g element has the flag PVSCSI_SGE_FLAG_CHAIN_ELEMENT set in
+ * PVSCSISGElement.flags and in this case:
+ * * addr is the PA of the next s/g segment,
+ * * length is undefined, assumed to be 0.
+ */
+
+struct PVSCSISGElement {
+ uint64_t addr;
+ uint32_t length;
+ uint32_t flags;
+} __packed;
+
+/*
+ * Completion descriptor.
+ *
+ * sizeof(RingCmpDesc) = 32
+ *
+ * - context: identifier of the command. The same thing that was specified
+ * under "context" as part of struct RingReqDesc at initiation time,
+ * - dataLen: number of bytes transferred for the actual i/o operation,
+ * - senseLen: number of bytes written into the sense buffer,
+ * - hostStatus: adapter status,
+ * - scsiStatus: device status,
+ * - _pad should be zero.
+ */
+
+struct PVSCSIRingCmpDesc {
+ uint64_t context;
+ uint64_t dataLen;
+ uint32_t senseLen;
+ uint16_t hostStatus;
+ uint16_t scsiStatus;
+ uint32_t _pad[2];
+} __packed;
+
+/*
+ * Interrupt status / IRQ bits.
+ */
+
+#define PVSCSI_INTR_CMPL_0 (1 << 0)
+#define PVSCSI_INTR_CMPL_1 (1 << 1)
+#define PVSCSI_INTR_CMPL_MASK MASK(2)
+
+#define PVSCSI_INTR_MSG_0 (1 << 2)
+#define PVSCSI_INTR_MSG_1 (1 << 3)
+#define PVSCSI_INTR_MSG_MASK (MASK(2) << 2)
+
+#define PVSCSI_INTR_ALL_SUPPORTED MASK(4)
+
+/*
+ * Number of MSI-X vectors supported.
+ */
+#define PVSCSI_MAX_INTRS 24
+
+/*
+ * Enumeration of supported MSI-X vectors
+ */
+#define PVSCSI_VECTOR_COMPLETION 0
+
+/*
+ * Misc constants for the rings.
+ */
+
+#define PVSCSI_MAX_NUM_PAGES_REQ_RING PVSCSI_SETUP_RINGS_MAX_NUM_PAGES
+#define PVSCSI_MAX_NUM_PAGES_CMP_RING PVSCSI_SETUP_RINGS_MAX_NUM_PAGES
+#define PVSCSI_MAX_NUM_PAGES_MSG_RING PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES
+
+#define PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE \
+ (PAGE_SIZE / sizeof(struct PVSCSIRingReqDesc))
+
+#define PVSCSI_MAX_REQ_QUEUE_DEPTH \
+ (PVSCSI_MAX_NUM_PAGES_REQ_RING * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE)
+
+#define PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES 1
+#define PVSCSI_MEM_SPACE_INTR_STATUS_NUM_PAGES 1
+#define PVSCSI_MEM_SPACE_MISC_NUM_PAGES 2
+#define PVSCSI_MEM_SPACE_KICK_IO_NUM_PAGES 2
+#define PVSCSI_MEM_SPACE_MSIX_NUM_PAGES 2
+
+enum PVSCSIMemSpace {
+ PVSCSI_MEM_SPACE_COMMAND_PAGE = 0,
+ PVSCSI_MEM_SPACE_INTR_STATUS_PAGE = 1,
+ PVSCSI_MEM_SPACE_MISC_PAGE = 2,
+ PVSCSI_MEM_SPACE_KICK_IO_PAGE = 4,
+ PVSCSI_MEM_SPACE_MSIX_TABLE_PAGE = 6,
+ PVSCSI_MEM_SPACE_MSIX_PBA_PAGE = 7,
+};
+
+#define PVSCSI_MEM_SPACE_NUM_PAGES \
+ (PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES + \
+ PVSCSI_MEM_SPACE_INTR_STATUS_NUM_PAGES + \
+ PVSCSI_MEM_SPACE_MISC_NUM_PAGES + \
+ PVSCSI_MEM_SPACE_KICK_IO_NUM_PAGES + \
+ PVSCSI_MEM_SPACE_MSIX_NUM_PAGES)
+
+#define PVSCSI_MEM_SPACE_SIZE (PVSCSI_MEM_SPACE_NUM_PAGES * PAGE_SIZE)
+
+#endif /* _VMW_PVSCSI_H_ */
@@ -211,6 +211,21 @@ disable scsi_req_dequeue(int target, int lun, int tag) "target %d lun %d tag %d"
disable scsi_req_parsed(int target, int lun, int tag, int cmd, const char *cmdname, int mode, int xfer, uint64_t lba) "target %d lun %d tag %d command %d (%s) dir %d length %d lba %"PRIu64""
disable scsi_req_parse_bad(int target, int lun, int tag, int cmd) "target %d lun %d tag %d command %d"
+# hw/vmw_pvscsi.c
+disable pvscsi_queue_request(uint64_t context, uint8_t command, uint64_t dataLen) "context %"PRIu64" command %d length %"PRIu64""
+disable pvscsi_sg_elem(uint64_t context, uint64_t addr, uint64_t length) "context %"PRIu64" addr %"PRIu64" length %"PRIu64""
+disable pvscsi_transfer_data(uint64_t context, uint64_t length) "context %"PRIu64" length %"PRIu64""
+disable pvscsi_request_sense(uint64_t context, int lun) "context %"PRIu64" lun %d"
+disable pvscsi_kick_io(void) "kick request ring"
+disable pvscsi_complete_req(uint64_t context, uint64_t length, uint8_t sense) "context %"PRIu64" length %"PRIu64" sense %d"
+disable pvscsi_cmp_ring_put(uint64_t context) "context %"PRIu64""
+disable pvscsi_raise_intr(uint32_t intr, const char *state) "raised intr %d %s"
+disable pvscsi_acknowledge_intr(uint32_t intr) "acknowledged intr %d"
+disable pvscsi_setup_req_ring(uint32_t pages, uint32_t entries) "req ring - %d pages %d entries"
+disable pvscsi_setup_cmp_ring(uint32_t pages, uint32_t entries) "cmp ring - %d pages %d entries"
+disable pvscsi_setup_msg_ring(uint32_t pages, uint32_t entries) "msg ring - %d pages %d entries"
+disable pvscsi_cmd(int cmd) "command %d"
+
# vl.c
disable vm_state_notify(int running, int reason) "running %d reason %d"
Lightly tested with Linux guests; at least it can successfully partition and format a disk. scsi-generic also lightly tested. Doesn't do migration, doesn't do hotplug (the device would support that, but it is not 100% documented and the Linux driver in particular cannot initiate hot-unplug). I did it as quick one-day hack to study the SCSI subsystem and it is my first real foray into device model land, please be gentle. :) vmw_pvscsi.h is taken from Linux, so it doesn't fully respect coding standards. I think that's fair. Size is curiously close to the recently added sPAPR adapter: 911 2354 25553 hw/vmw_pvscsi.c 988 3177 29628 hw/spapr_vscsi.c Sounds like that's just the amount of code it takes to implement a SCSI HBA in QEMU. :) Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Cc: Zachary Amsden <zamsden@redhat.com> --- Makefile.objs | 1 + default-configs/pci.mak | 1 + hw/pci.h | 1 + hw/vmw_pvscsi.c | 911 +++++++++++++++++++++++++++++++++++++++++++++++ hw/vmw_pvscsi.h | 389 ++++++++++++++++++++ trace-events | 15 + 6 files changed, 1318 insertions(+), 0 deletions(-) create mode 100644 hw/vmw_pvscsi.c create mode 100644 hw/vmw_pvscsi.h