Message ID | 1300251423-6715-25-git-send-email-david@gibson.dropbear.id.au |
---|---|
State | New |
Headers | show |
On 03/16/2011 05:57 AM, David Gibson wrote: > This patch implements the infrastructure and hypercalls necessary for > the PAPR specified Virtual SCSI interface. This is the normal method > for providing (virtual) disks to PAPR partitions. > > Signed-off-by: Ben Herrenschmidt<benh@kernel.crashing.org> > Signed-off-by: David Gibson<dwg@au1.ibm.com> > --- > Makefile.target | 2 +- > hw/ppc-viosrp.h | 216 ++++++++++++ > hw/spapr.c | 10 +- > hw/spapr_vio.h | 3 + > hw/spapr_vscsi.c | 960 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/srp.h | 241 ++++++++++++++ > 6 files changed, 1430 insertions(+), 2 deletions(-) > create mode 100644 hw/ppc-viosrp.h > create mode 100644 hw/spapr_vscsi.c > create mode 100644 hw/srp.h > > diff --git a/Makefile.target b/Makefile.target > index ef86d43..49f9e9a 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -233,7 +233,7 @@ obj-ppc-y += ppc_oldworld.o > obj-ppc-y += ppc_newworld.o > # IBM pSeries (sPAPR) > obj-ppc-y += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o > -obj-ppc-y += xics.o spapr_vty.o spapr_llan.o > +obj-ppc-y += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o > # PowerPC 4xx boards > obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o > obj-ppc-y += ppc440.o ppc440_bamboo.o > diff --git a/hw/ppc-viosrp.h b/hw/ppc-viosrp.h > new file mode 100644 > index 0000000..9afcf7a > --- /dev/null > +++ b/hw/ppc-viosrp.h > @@ -0,0 +1,216 @@ > +/*****************************************************************************/ > +/* srp.h -- SCSI RDMA Protocol definitions */ > +/* */ > +/* Written By: Colin Devilbis, IBM Corporation */ > +/* */ > +/* Copyright (C) 2003 IBM Corporation */ > +/* */ > +/* This program is free software; you can redistribute it and/or modify */ > +/* it under the terms of the GNU General Public License as published by */ > +/* the Free Software Foundation; either version 2 of the License, or */ > +/* (at your option) any later version. */ > +/* */ > +/* This program is distributed in the hope that it will be useful, */ > +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ > +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ > +/* GNU General Public License for more details. */ > +/* */ > +/* You should have received a copy of the GNU General Public License */ > +/* along with this program; if not, write to the Free Software */ > +/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ > +/* */ > +/* */ > +/* This file contains structures and definitions for IBM RPA (RS/6000 */ > +/* platform architecture) implementation of the SRP (SCSI RDMA Protocol) */ > +/* standard. SRP is used on IBM iSeries and pSeries platforms to send SCSI */ > +/* commands between logical partitions. */ > +/* */ > +/* SRP Information Units (IUs) are sent on a "Command/Response Queue" (CRQ) */ > +/* between partitions. The definitions in this file are architected, */ > +/* and cannot be changed without breaking compatibility with other versions */ > +/* of Linux and other operating systems (AIX, OS/400) that talk this protocol*/ > +/* between logical partitions */ > +/*****************************************************************************/ > +#ifndef PPC_VIOSRP_H > +#define PPC_VIOSRP_H > + > +#define SRP_VERSION "16.a" > +#define SRP_MAX_IU_LEN 256 > +#define SRP_MAX_LOC_LEN 32 > + > +union srp_iu { > + struct srp_login_req login_req; > + struct srp_login_rsp login_rsp; > + struct srp_login_rej login_rej; > + struct srp_i_logout i_logout; > + struct srp_t_logout t_logout; > + struct srp_tsk_mgmt tsk_mgmt; > + struct srp_cmd cmd; > + struct srp_rsp rsp; > + uint8_t reserved[SRP_MAX_IU_LEN]; > +}; > + > +enum viosrp_crq_formats { > + VIOSRP_SRP_FORMAT = 0x01, > + VIOSRP_MAD_FORMAT = 0x02, > + VIOSRP_OS400_FORMAT = 0x03, > + VIOSRP_AIX_FORMAT = 0x04, > + VIOSRP_LINUX_FORMAT = 0x06, > + VIOSRP_INLINE_FORMAT = 0x07 > +}; > + > +enum viosrp_crq_status { > + VIOSRP_OK = 0x0, > + VIOSRP_NONRECOVERABLE_ERR = 0x1, > + VIOSRP_VIOLATES_MAX_XFER = 0x2, > + VIOSRP_PARTNER_PANIC = 0x3, > + VIOSRP_DEVICE_BUSY = 0x8, > + VIOSRP_ADAPTER_FAIL = 0x10, > + VIOSRP_OK2 = 0x99, > +}; > + > +struct viosrp_crq { > + uint8_t valid; /* used by RPA */ > + uint8_t format; /* SCSI vs out-of-band */ > + uint8_t reserved; > + uint8_t status; /* non-scsi failure? (e.g. DMA failure) */ > + uint16_t timeout; /* in seconds */ > + uint16_t IU_length; /* in bytes */ > + uint64_t IU_data_ptr; /* the TCE for transferring data */ > +}; > + > +/* MADs are Management requests above and beyond the IUs defined in the SRP > + * standard. > + */ > +enum viosrp_mad_types { > + VIOSRP_EMPTY_IU_TYPE = 0x01, > + VIOSRP_ERROR_LOG_TYPE = 0x02, > + VIOSRP_ADAPTER_INFO_TYPE = 0x03, > + VIOSRP_HOST_CONFIG_TYPE = 0x04, > + VIOSRP_CAPABILITIES_TYPE = 0x05, > + VIOSRP_ENABLE_FAST_FAIL = 0x08, > +}; > + > +enum viosrp_mad_status { > + VIOSRP_MAD_SUCCESS = 0x00, > + VIOSRP_MAD_NOT_SUPPORTED = 0xF1, > + VIOSRP_MAD_FAILED = 0xF7, > +}; > + > +enum viosrp_capability_type { > + MIGRATION_CAPABILITIES = 0x01, > + RESERVATION_CAPABILITIES = 0x02, > +}; > + > +enum viosrp_capability_support { > + SERVER_DOES_NOT_SUPPORTS_CAP = 0x0, > + SERVER_SUPPORTS_CAP = 0x01, > + SERVER_CAP_DATA = 0x02, > +}; > + > +enum viosrp_reserve_type { > + CLIENT_RESERVE_SCSI_2 = 0x01, > +}; > + > +enum viosrp_capability_flag { > + CLIENT_MIGRATED = 0x01, > + CLIENT_RECONNECT = 0x02, > + CAP_LIST_SUPPORTED = 0x04, > + CAP_LIST_DATA = 0x08, > +}; > + > +/* > + * Common MAD header > + */ > +struct mad_common { > + uint32_t type; > + uint16_t status; > + uint16_t length; > + uint64_t tag; Is this an in-memory representation? If so, it should be packed, right? Same goes for the ones below. > +}; > + > +/* > + * All SRP (and MAD) requests normally flow from the > + * client to the server. There is no way for the server to send > + * an asynchronous message back to the client. The Empty IU is used > + * to hang out a meaningless request to the server so that it can respond > + * asynchrouously with something like a SCSI AER > + */ > +struct viosrp_empty_iu { > + struct mad_common common; > + uint64_t buffer; > + uint32_t port; > +}; > + > +struct viosrp_error_log { > + struct mad_common common; > + uint64_t buffer; > +}; > + > +struct viosrp_adapter_info { > + struct mad_common common; > + uint64_t buffer; > +}; > + > +struct viosrp_host_config { > + struct mad_common common; > + uint64_t buffer; > +}; > + > +struct viosrp_fast_fail { > + struct mad_common common; > +}; > + > +struct viosrp_capabilities { > + struct mad_common common; > + uint64_t buffer; > +}; > + > +struct mad_capability_common { > + uint32_t cap_type; > + uint16_t length; > + uint16_t server_support; > +}; > + > +struct mad_reserve_cap { > + struct mad_capability_common common; > + uint32_t type; > +}; > + > +struct mad_migration_cap { > + struct mad_capability_common common; > + uint32_t ecl; > +}; > + > +struct capabilities{ Space, but unused struct > + uint32_t flags; > + char name[SRP_MAX_LOC_LEN]; > + char loc[SRP_MAX_LOC_LEN]; > + struct mad_migration_cap migration; > + struct mad_reserve_cap reserve; > +}; > + > +union mad_iu { > + struct viosrp_empty_iu empty_iu; > + struct viosrp_error_log error_log; > + struct viosrp_adapter_info adapter_info; > + struct viosrp_host_config host_config; > + struct viosrp_fast_fail fast_fail; > + struct viosrp_capabilities capabilities; > +}; > + > +union viosrp_iu { > + union srp_iu srp; > + union mad_iu mad; > +}; > + > +struct mad_adapter_info_data { > + char srp_version[8]; > + char partition_name[96]; > + uint32_t partition_number; > + uint32_t mad_version; > + uint32_t os_type; > + uint32_t port_max_txu[8]; /* per-port maximum transfer */ > +}; > + > +#endif > diff --git a/hw/spapr.c b/hw/spapr.c > index cb97a16..5f868fc 100644 > --- a/hw/spapr.c > +++ b/hw/spapr.c > @@ -28,6 +28,7 @@ > #include "hw.h" > #include "elf.h" > #include "net.h" > +#include "blockdev.h" > > #include "hw/boards.h" > #include "hw/ppc.h" > @@ -316,7 +317,7 @@ static void ppc_spapr_init(ram_addr_t ram_size, > qemu_free(filename); > > /* Set up Interrupt Controller */ > - spapr->icp = xics_system_init(smp_cpus, envs, MAX_SERIAL_PORTS + nb_nics); > + spapr->icp = xics_system_init(smp_cpus, envs, MAX_SERIAL_PORTS + nb_nics + drive_get_max_bus(IF_SCSI) + 1); This looks like it's exceeding 80 characters :) > > /* Set up VIO bus */ > spapr->vio_bus = spapr_vio_bus_init(); > @@ -346,6 +347,12 @@ static void ppc_spapr_init(ram_addr_t ram_size, > } > } > > + for (i = 0; i<= drive_get_max_bus(IF_SCSI); i++) { > + spapr_vscsi_create(spapr->vio_bus, 0x2000 + i, > + xics_find_qirq(spapr->icp, irq), irq); > + irq++; > + } > + > if (kernel_filename) { > uint64_t lowaddr = 0; > > @@ -406,6 +413,7 @@ static QEMUMachine spapr_machine = { > .max_cpus = MAX_CPUS, > .no_vga = 1, > .no_parallel = 1, > + .use_scsi = 1, > }; > > static void spapr_machine_init(void) > diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h > index ba16795..b7d0daa 100644 > --- a/hw/spapr_vio.h > +++ b/hw/spapr_vio.h > @@ -101,4 +101,7 @@ void spapr_vty_create(VIOsPAPRBus *bus, > void spapr_vlan_create(VIOsPAPRBus *bus, uint32_t reg, NICInfo *nd, > qemu_irq qirq, uint32_t vio_irq_num); > > +void spapr_vscsi_create(VIOsPAPRBus *bus, uint32_t reg, > + qemu_irq qirq, uint32_t vio_irq_num); > + > #endif /* _HW_SPAPR_VIO_H */ > diff --git a/hw/spapr_vscsi.c b/hw/spapr_vscsi.c > new file mode 100644 > index 0000000..0a67095 > --- /dev/null > +++ b/hw/spapr_vscsi.c > @@ -0,0 +1,960 @@ License header > +/* TODO: > + * > + * - Cleanups :-) > + * - Sort out better how to assign devices to VSCSI instances > + * - Fix residual counts > + * - Add indirect descriptors support > + * - Maybe do autosense (PAPR seems to mandate it, linux doesn't care) > + */ > +#include "hw.h" > +#include "scsi.h" > +#include "scsi-defs.h" > +#include "net.h" /* Remove that when we can */ > +#include "srp.h" > +#include "hw/qdev.h" > +#include "hw/spapr.h" > +#include "hw/spapr_vio.h" > +#include "hw/ppc-viosrp.h" > + > +#include<libfdt.h> > + > +//#define DEBUG_VSCSI > + > +#ifdef DEBUG_VSCSI > +#define dprintf(fmt, ...) \ > + do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) > +#else > +#define dprintf(fmt, ...) \ > + do { } while (0) > +#endif > + > +#define min(a, b) ((a)< (b) ? (a) : (b)) There's MIN for that > + > +/* > + * Virtual SCSI device > + */ > + > +/* Random numbers */ > +#define VSCSI_MAX_SECTORS 4096/*1024*//*256*/ Probably good to just remove the commented out ones > +#define VSCSI_REQ_LIMIT 24 > + > +#define SCSI_SENSE_BUF_SIZE 96 > +#define SRP_RSP_SENSE_DATA_LEN 18 > + > +typedef union vscsi_crq { > + struct viosrp_crq s; > + uint8_t raw[16]; > +} vscsi_crq; > + > +typedef struct vscsi_req > +{ > + vscsi_crq crq; > + union viosrp_iu iu; > + > + /* SCSI request tracking */ > + SCSIDevice *sdev; > + uint32_t qtag; /* qemu tag != srp tag */ > + int lun; > + int active; > + long data_len; > + int writing; > + int sensing; > + int senselen; > + uint8_t sense[SCSI_SENSE_BUF_SIZE]; > + > + /* RDMA related bits */ > + uint8_t dma_fmt; > + struct srp_direct_buf ext_desc; > + struct srp_direct_buf *cur_desc; > + struct srp_indirect_buf *ind_desc; > + int local_desc; > + int total_desc; > + > +} vscsi_req; > + > + > +typedef struct { > + VIOsPAPRDevice vdev; > + SCSIBus bus; > + vscsi_req reqs[VSCSI_REQ_LIMIT]; > +} VSCSIState; > + > +/* XXX Debug only */ > +static VSCSIState *dbg_vscsi_state; > + > + > +static struct vscsi_req *vscsi_get_req(VSCSIState *s) > +{ > + vscsi_req *req; > + int i; > + > + for (i = 0; i< VSCSI_REQ_LIMIT; i++) { > + req =&s->reqs[i]; > + if (!req->active) { > + memset(req, 0, sizeof(*req)); > + req->qtag = i; > + req->active = 1; > + return req; > + } > + } > + return NULL; > +} > + > +static void vscsi_put_req(VSCSIState *s, vscsi_req *req) > +{ > + req->active = 0; > +} > + > +static vscsi_req *vscsi_find_req(VSCSIState *s, uint32_t tag) > +{ > + if (tag>= VSCSI_REQ_LIMIT || !s->reqs[tag].active) { > + return NULL; > + } > + return&s->reqs[tag]; > +} > + > +static void vscsi_decode_id_lun(uint64_t srp_lun, int *id, int *lun) > +{ > + /* XXX Figure that one out properly ! This is crackpot */ > + *id = (srp_lun>> 56)& 0x7f; > + *lun = (srp_lun>> 48)& 0xff; > +} > + > +static int vscsi_send_iu(VSCSIState *s, vscsi_req *req, > + uint64_t length, uint8_t format) > +{ > + long rc, rc1; > + > + /* First copy the SRP */ > + rc = spapr_tce_dma_write(&s->vdev, req->crq.s.IU_data_ptr, > +&req->iu, length); > + if (rc) { > + fprintf(stderr, "vscsi_send_iu: DMA write failure !\n"); > + } > + > + req->crq.s.valid = 0x80; > + req->crq.s.format = format; > + req->crq.s.reserved = 0x00; > + req->crq.s.timeout = cpu_to_be16(0x0000); > + req->crq.s.IU_length = cpu_to_be16(length); > + req->crq.s.IU_data_ptr = req->iu.srp.rsp.tag; /* right byte order */ > + > + if (rc == 0) { > + req->crq.s.status = 0x99; /* Just needs to be non-zero */ > + } else { > + req->crq.s.status = 0x00; > + } > + > + rc1 = spapr_vio_send_crq(&s->vdev, req->crq.raw); > + if (rc1) { > + fprintf(stderr, "vscsi_send_iu: Error sending response\n"); > + return rc1; > + } > + > + return rc; > +} > + > +static void vscsi_makeup_sense(VSCSIState *s, vscsi_req *req, > + uint8_t key, uint8_t asc, uint8_t ascq) > +{ > + req->senselen = SRP_RSP_SENSE_DATA_LEN; > + > + /* Valid bit and 'current errors' */ > + req->sense[0] = (0x1<< 7 | 0x70); > + /* Sense key */ > + req->sense[2] = key; > + /* Additional sense length */ > + req->sense[7] = 0xa; /* 10 bytes */ > + /* Additional sense code */ > + req->sense[12] = asc; > + req->sense[13] = ascq; > +} > + > +static int vscsi_send_rsp(VSCSIState *s, vscsi_req *req, > + uint8_t status, int32_t res_in, int32_t res_out) > +{ > + union viosrp_iu *iu =&req->iu; > + uint64_t tag = iu->srp.rsp.tag; > + int total_len = sizeof(iu->srp.rsp); > + > + dprintf("VSCSI: Sending resp status: 0x%x, " > + "res_in: %d, res_out: %d \n", status, res_in, res_out); > + > + memset(iu, 0, sizeof(struct srp_rsp)); > + iu->srp.rsp.opcode = SRP_RSP; > + iu->srp.rsp.req_lim_delta = cpu_to_be32(1); > + iu->srp.rsp.tag = tag; > + > + /* Handle residuals */ > + if (res_in< 0) { > + iu->srp.rsp.flags |= SRP_RSP_FLAG_DIUNDER; > + res_in = -res_in; > + } else if (res_in) { > + iu->srp.rsp.flags |= SRP_RSP_FLAG_DIOVER; > + } > + if (res_out< 0) { > + iu->srp.rsp.flags |= SRP_RSP_FLAG_DOUNDER; > + res_out = -res_out; > + } else if (res_out) { > + iu->srp.rsp.flags |= SRP_RSP_FLAG_DOOVER; > + } > + iu->srp.rsp.data_in_res_cnt = cpu_to_be32(res_in); > + iu->srp.rsp.data_out_res_cnt = cpu_to_be32(res_out); > + > + /* We don't do response data */ > + /* iu->srp.rsp.flags&= ~SRP_RSP_FLAG_RSPVALID; */ > + iu->srp.rsp.resp_data_len = cpu_to_be32(0); > + > + /* Handle success vs. failure */ > + iu->srp.rsp.status = status; > + if (status) { > + iu->srp.rsp.sol_not = (iu->srp.cmd.sol_not& 0x04)>> 2; > + if (req->senselen) { > + req->iu.srp.rsp.flags |= SRP_RSP_FLAG_SNSVALID; > + req->iu.srp.rsp.sense_data_len = cpu_to_be32(req->senselen); > + memcpy(req->iu.srp.rsp.data, req->sense, req->senselen); > + total_len += req->senselen; > + } > + } else { > + iu->srp.rsp.sol_not = (iu->srp.cmd.sol_not& 0x02)>> 1; > + } > + > + vscsi_send_iu(s, req, total_len, VIOSRP_SRP_FORMAT); > + return 0; > +} > + > +static inline void vscsi_swap_desc(struct srp_direct_buf *desc) > +{ > + desc->va = be64_to_cpu(desc->va); > + desc->len = be32_to_cpu(desc->len); > +} > + > +static int vscsi_srp_direct_data(VSCSIState *s, vscsi_req *req, > + uint8_t *buf, uint32_t len) > +{ > + struct srp_direct_buf *md = req->cur_desc; > + uint32_t llen; > + int rc; > + > + dprintf("VSCSI: direct segment 0x%x bytes, va=0x%llx desc len=0x%x\n", > + len, (unsigned long long)md->va, md->len); > + > + llen = min(len, md->len); > + if (llen) { > + if (req->writing) { /* writing = to device = reading from memory */ > + rc = spapr_tce_dma_read(&s->vdev, md->va, buf, llen); > + } else { > + rc = spapr_tce_dma_write(&s->vdev, md->va, buf, llen); > + } > + } > + md->len -= llen; > + md->va += llen; > + > + if (rc) { > + return -1; > + } > + return llen; > +} > + > +static int vscsi_srp_indirect_data(VSCSIState *s, vscsi_req *req, > + uint8_t *buf, uint32_t len) > +{ > + struct srp_direct_buf *td =&req->ind_desc->table_desc; > + struct srp_direct_buf *md = req->cur_desc; > + int rc = 0; > + uint32_t llen, total = 0; > + > + dprintf("VSCSI: indirect segment 0x%x bytes, td va=0x%llx len=0x%x\n", > + len, (unsigned long long)td->va, td->len); > + > + /* While we have data ... */ > + while(len) { > + /* If we have a descriptor but it's empty, go fetch a new one */ > + if (md&& md->len == 0) { > + /* More local available, use one */ > + if (req->local_desc) { > + md = ++req->cur_desc; > + --req->local_desc; > + --req->total_desc; > + td->va += sizeof(struct srp_direct_buf); > + } else { > + md = req->cur_desc = NULL; > + } > + } > + /* No descriptor at hand, fetch one */ > + if (!md) { > + if (!req->total_desc) { > + dprintf("VSCSI: Out of descriptors !\n"); > + break; > + } > + md = req->cur_desc =&req->ext_desc; > + dprintf("VSCSI: Reading desc from 0x%llx\n", (unsigned long long)td->va); > + rc = spapr_tce_dma_read(&s->vdev, td->va, md, sizeof(struct srp_direct_buf)); > + if (rc) { > + dprintf("VSCSI: tce_dma_read -> %d reading ext_desc\n", rc); > + break; > + } > + vscsi_swap_desc(md); > + td->va += sizeof(struct srp_direct_buf); > + --req->total_desc; > + } > + dprintf("VSCSI: [desc va=0x%llx,len=0x%x] remaining=0x%x\n", > + (unsigned long long)md->va, md->len, len); > + > + /* Perform transfer */ > + llen = min(len, md->len); > + if (req->writing) { /* writing = to device = reading from memory */ > + rc = spapr_tce_dma_read(&s->vdev, md->va, buf, llen); > + spurious line > + } else { > + rc = spapr_tce_dma_write(&s->vdev, md->va, buf, llen); > + } > + if (rc) { > + dprintf("VSCSI: tce_dma_r/w(%d) -> %d\n", req->writing, rc); > + break; > + } > + dprintf("VSCSI: data: %02x %02x %02x %02x...\n", > + buf[0], buf[1], buf[2], buf[3]); > + > + len -= llen; > + buf += llen; > + total += llen; > + md->va += llen; > + md->len -= llen; > + } > + return rc ? -1 : total; > +} > + > +static int vscsi_srp_transfer_data(VSCSIState *s, vscsi_req *req, > + int writing, uint8_t *buf, uint32_t len) > +{ > + int err = 0; > + > + switch (req->dma_fmt) { > + case SRP_NO_DATA_DESC: > + dprintf("VSCSI: no data desc transfer, skipping 0x%x bytes\n", len); > + break; > + case SRP_DATA_DESC_DIRECT: > + err = vscsi_srp_direct_data(s, req, buf, len); > + break; > + case SRP_DATA_DESC_INDIRECT: > + err = vscsi_srp_indirect_data(s, req, buf, len); > + break; > + } > + return err; > +} > + > +/* Bits from linux srp */ > +static int data_out_desc_size(struct srp_cmd *cmd) > +{ > + int size = 0; > + uint8_t fmt = cmd->buf_fmt>> 4; > + > + switch (fmt) { > + case SRP_NO_DATA_DESC: > + break; > + case SRP_DATA_DESC_DIRECT: > + size = sizeof(struct srp_direct_buf); > + break; > + case SRP_DATA_DESC_INDIRECT: > + size = sizeof(struct srp_indirect_buf) + > + sizeof(struct srp_direct_buf) * cmd->data_out_desc_cnt; > + break; > + default: > + break; > + } > + return size; > +} > + > +static int vscsi_preprocess_desc(vscsi_req *req) > +{ > + struct srp_cmd *cmd =&req->iu.srp.cmd; > + int offset, i; > + > + offset = cmd->add_cdb_len& ~3; > + > + if (req->writing) { > + req->dma_fmt = cmd->buf_fmt>> 4; > + } else { > + offset += data_out_desc_size(cmd); > + req->dma_fmt = cmd->buf_fmt& ((1U<< 4) - 1); > + } > + > + switch (req->dma_fmt) { > + case SRP_NO_DATA_DESC: > + break; > + case SRP_DATA_DESC_DIRECT: > + req->cur_desc = (struct srp_direct_buf *)(cmd->add_data + offset); > + req->total_desc = req->local_desc = 1; > + vscsi_swap_desc(req->cur_desc); > + dprintf("VSCSI: using direct RDMA %s, 0x%x bytes MD: 0x%llx\n", > + req->writing ? "write" : "read", > + req->cur_desc->len, (unsigned long long)req->cur_desc->va); > + break; > + case SRP_DATA_DESC_INDIRECT: > + req->ind_desc = (struct srp_indirect_buf *)(cmd->add_data + offset); > + vscsi_swap_desc(&req->ind_desc->table_desc); > + req->total_desc = req->ind_desc->table_desc.len / sizeof(struct srp_direct_buf); > + req->local_desc = req->writing ? cmd->data_out_desc_cnt : > + cmd->data_in_desc_cnt; > + for (i = 0; i< req->local_desc; i++) Braces > + vscsi_swap_desc(&req->ind_desc->desc_list[i]); > + req->cur_desc = req->local_desc ?&req->ind_desc->desc_list[0] : NULL; > + dprintf("VSCSI: using indirect RDMA %s, 0x%x bytes %d descs (%d local) VA: 0x%llx\n", > + req->writing ? "read" : "write", be32_to_cpu(req->ind_desc->len), > + req->total_desc, req->local_desc, > + (unsigned long long)req->ind_desc->table_desc.va); > + break; > + default: > + fprintf(stderr, > + "vscsi_preprocess_desc: Unknown format %x\n", req->dma_fmt); > + return -1; > + } > + > + return 0; > +} > + > +static void vscsi_send_request_sense(VSCSIState *s, vscsi_req *req) > +{ > + SCSIDevice *sdev = req->sdev; > + uint8_t *cdb = req->iu.srp.cmd.cdb; > + int n; > + > + cdb[0] = 3; > + cdb[1] = 0; > + cdb[2] = 0; > + cdb[3] = 0; > + cdb[4] = 96; > + cdb[5] = 0; > + req->sensing = 1; > + n = sdev->info->send_command(sdev, req->qtag, cdb, req->lun); > + dprintf("VSCSI: Queued request sense tag 0x%x \n", req->qtag); > + if (n< 0) { > + fprintf(stderr, "VSCSI: REQUEST_SENSE wants write data !?!?!?\n"); > + sdev->info->cancel_io(sdev, req->qtag); > + vscsi_makeup_sense(s, req, HARDWARE_ERROR, 0, 0); > + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); > + vscsi_put_req(s, req); > + return; > + } else if (n == 0) { > + return; > + } > + sdev->info->read_data(sdev, req->qtag); > +} > + > +/* Callback to indicate that the SCSI layer has completed a transfer. */ > +static void vscsi_command_complete(SCSIBus *bus, int reason, uint32_t tag, > + uint32_t arg) > +{ > + VSCSIState *s = DO_UPCAST(VSCSIState, vdev.qdev, bus->qbus.parent); > + vscsi_req *req = vscsi_find_req(s, tag); > + SCSIDevice *sdev; > + uint8_t *buf; > + int32_t res_in = 0, res_out = 0; > + int len, rc = 0; > + > + dprintf("VSCSI: SCSI cmd complete, r=0x%x tag=0x%x arg=0x%x, req=%p\n", > + reason, tag, arg, req); > + if (req == NULL) { > + fprintf(stderr, "VSCSI: Can't find request for tag 0x%x\n", tag); > + return; > + } > + sdev = req->sdev; > + > + if (req->sensing) { > + if (reason == SCSI_REASON_DONE) { > + dprintf("VSCSI: Sense done !\n"); > + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); > + vscsi_put_req(s, req); > + } else { > + uint8_t *buf = sdev->info->get_buf(sdev, tag); > + > + len = min(arg, SCSI_SENSE_BUF_SIZE); > + dprintf("VSCSI: Sense data, %d bytes:\n", len); > + dprintf(" %02x %02x %02x %02x %02x %02x %02x %02x\n", > + buf[0], buf[1], buf[2], buf[3], > + buf[4], buf[5], buf[6], buf[7]); > + dprintf(" %02x %02x %02x %02x %02x %02x %02x %02x\n", > + buf[8], buf[9], buf[10], buf[11], > + buf[12], buf[13], buf[14], buf[15]); > + memcpy(req->sense, buf, len); > + req->senselen = len; > + sdev->info->read_data(sdev, req->qtag); > + } > + return; > + } > + > + if (reason == SCSI_REASON_DONE) { > + dprintf("VSCSI: Command complete err=%d\n", arg); > + if (arg == 0) { > + /* We handle overflows, not underflows for normal commands, > + * but hopefully nobody cares > + */ > + if (req->writing) Braces > + res_out = req->data_len; > + else > + res_in = req->data_len; > + vscsi_send_rsp(s, req, 0, res_in, res_out); > + } else if (arg == CHECK_CONDITION) { > + dprintf("VSCSI: Got CHECK_CONDITION, requesting sense...\n"); > + vscsi_send_request_sense(s, req); > + return; > + } else { > + vscsi_send_rsp(s, req, arg, 0, 0); > + } > + vscsi_put_req(s, req); > + return; > + } > + > + /* "arg" is how much we have read for reads and how much we want > + * to write for writes (ie, how much is to be DMA'd) > + */ > + if (arg) { > + buf = sdev->info->get_buf(sdev, tag); > + rc = vscsi_srp_transfer_data(s, req, req->writing, buf, arg); > + } > + if (rc< 0) { > + fprintf(stderr, "VSCSI: RDMA error rc=%d!\n", rc); > + sdev->info->cancel_io(sdev, req->qtag); > + vscsi_makeup_sense(s, req, HARDWARE_ERROR, 0, 0); > + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); > + vscsi_put_req(s, req); > + return; > + } > + > + /* Start next chunk */ > + req->data_len -= rc; > + if (req->writing) { > + sdev->info->write_data(sdev, req->qtag); > + } else { > + sdev->info->read_data(sdev, req->qtag); > + } > +} > + > +static void vscsi_process_login(VSCSIState *s, vscsi_req *req) > +{ > + union viosrp_iu *iu =&req->iu; > + struct srp_login_rsp *rsp =&iu->srp.login_rsp; > + uint64_t tag = iu->srp.rsp.tag; > + > + dprintf("VSCSI: Got login, sendin response !\n"); > + > + /* TODO handle case that requested size is wrong and > + * buffer format is wrong > + */ > + memset(iu, 0, sizeof(struct srp_login_rsp)); > + rsp->opcode = SRP_LOGIN_RSP; > + /* Don't advertise quite as many request as we support to > + * keep room for management stuff etc... > + */ > + rsp->req_lim_delta = cpu_to_be32(VSCSI_REQ_LIMIT-2); > + rsp->tag = tag; > + rsp->max_it_iu_len = cpu_to_be32(sizeof(union srp_iu)); > + rsp->max_ti_iu_len = cpu_to_be32(sizeof(union srp_iu)); > + /* direct and indirect */ > + rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT); > + > + vscsi_send_iu(s, req, sizeof(*rsp), VIOSRP_SRP_FORMAT); > +} > + > +static void vscsi_inquiry_no_target(VSCSIState *s, vscsi_req *req) > +{ > + uint8_t *cdb = req->iu.srp.cmd.cdb; > + uint8_t resp_data[36]; > + int rc, len, alen; > + > + /* We dont do EVPD. Also check that page_code is 0 */ > + if ((cdb[1]& 0x01) || (cdb[1]& 0x01) || cdb[2] != 0) { > + /* Send INVALID FIELD IN CDB */ > + vscsi_makeup_sense(s, req, ILLEGAL_REQUEST, 0x24, 0); > + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); > + return; > + } > + alen = cdb[3]; > + alen = (alen<< 8) | cdb[4]; > + len = min(alen, 36); > + > + /* Fake up inquiry using PQ=3 */ > + memset(resp_data, 0, 36); > + resp_data[0] = 0x7f; /* Not capable of supporting a device here */ > + resp_data[2] = 0x06; /* SPS-4 */ > + resp_data[3] = 0x02; /* Resp data format */ > + resp_data[4] = 36 - 5; /* Additional length */ > + resp_data[7] = 0x10; /* Sync transfers */ > + memcpy(&resp_data[16], "QEMU EMPTY ", 16); > + memcpy(&resp_data[8], "QEMU ", 8); > + > + req->writing = 0; > + vscsi_preprocess_desc(req); > + rc = vscsi_srp_transfer_data(s, req, 0, resp_data, len); > + if (rc< 0) { > + vscsi_makeup_sense(s, req, HARDWARE_ERROR, 0, 0); > + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); > + } else { > + vscsi_send_rsp(s, req, 0, 36 - rc, 0); > + } > +} > + > +static int vscsi_queue_cmd(VSCSIState *s, vscsi_req *req) > +{ > + union srp_iu *srp =&req->iu.srp; > + SCSIDevice *sdev; > + int n, id, lun; > + > + vscsi_decode_id_lun(be64_to_cpu(srp->cmd.lun),&id,&lun); > + > + /* Qemu vs. linux issue with LUNs to be sorted out ... */ > + sdev = (id< 8&& lun< 16) ? s->bus.devs[id] : NULL; > + if (!sdev) { > + dprintf("VSCSI: Command for id %d with no drive\n", id); > + if (srp->cmd.cdb[0] == INQUIRY) { > + vscsi_inquiry_no_target(s, req); > + } else { > + vscsi_makeup_sense(s, req, ILLEGAL_REQUEST, 0x24, 0x00); > + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); > + } return 1; > + } > + > + req->sdev = sdev; > + req->lun = lun; > + n = sdev->info->send_command(sdev, req->qtag, srp->cmd.cdb, lun); > + > + dprintf("VSCSI: Queued command tag 0x%x CMD 0x%x ID %d LUN %d ret: %d\n", > + req->qtag, srp->cmd.cdb[0], id, lun, n); > + > + if (n) { > + /* Transfer direction must be set before preprocessing the > + * descriptors > + */ > + req->writing = (n< 1); > + > + /* Preprocess RDMA descriptors */ > + vscsi_preprocess_desc(req); > + } > + > + /* Get transfer direction and initiate transfer */ > + if (n> 0) { > + req->data_len = n; > + sdev->info->read_data(sdev, req->qtag); > + } else if (n< 0) { > + req->data_len = -n; > + sdev->info->write_data(sdev, req->qtag); > + } > + /* Don't touch req here, it may have been recycled already */ > + > + return 0; > +} > + > +static int vscsi_process_tsk_mgmt(VSCSIState *s, vscsi_req *req) > +{ > + union viosrp_iu *iu =&req->iu; > + int fn; > + > + fprintf(stderr, "vscsi_process_tsk_mgmt %02x\n", > + iu->srp.tsk_mgmt.tsk_mgmt_func); > + > + switch (iu->srp.tsk_mgmt.tsk_mgmt_func) { > +#if 0 /* We really don't deal with these for now */ > + case SRP_TSK_ABORT_TASK: > + fn = ABORT_TASK; > + break; > + case SRP_TSK_ABORT_TASK_SET: > + fn = ABORT_TASK_SET; > + break; > + case SRP_TSK_CLEAR_TASK_SET: > + fn = CLEAR_TASK_SET; > + break; > + case SRP_TSK_LUN_RESET: > + fn = LOGICAL_UNIT_RESET; > + break; > + case SRP_TSK_CLEAR_ACA: > + fn = CLEAR_ACA; > + break; > +#endif > + default: > + fn = 0; > + } > + if (fn) { > + /* XXX Send/Handle target task management */ > + ; > + } else { > + vscsi_makeup_sense(s, req, ILLEGAL_REQUEST, 0x20, 0); > + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); > + } > + return !fn; > +} > + > +static int vscsi_handle_srp_req(VSCSIState *s, vscsi_req *req) > +{ > + union srp_iu *srp =&req->iu.srp; > + int done = 1; > + uint8_t opcode = srp->rsp.opcode; > + > + switch (opcode) { > + case SRP_LOGIN_REQ: > + vscsi_process_login(s, req); > + break; > + case SRP_TSK_MGMT: > + done = vscsi_process_tsk_mgmt(s, req); > + break; > + case SRP_CMD: > + done = vscsi_queue_cmd(s, req); > + break; > + case SRP_LOGIN_RSP: > + case SRP_I_LOGOUT: > + case SRP_T_LOGOUT: > + case SRP_RSP: > + case SRP_CRED_REQ: > + case SRP_CRED_RSP: > + case SRP_AER_REQ: > + case SRP_AER_RSP: > + fprintf(stderr, "VSCSI: Unsupported opcode %02x\n", opcode); > + break; > + default: > + fprintf(stderr, "VSCSI: Unknown type %02x\n", opcode); > + } > + > + return done; > +} > + > +static int vscsi_send_adapter_info(VSCSIState *s, vscsi_req *req) > +{ > + struct viosrp_adapter_info *sinfo; > + struct mad_adapter_info_data info; > + int rc; > + > + sinfo =&req->iu.mad.adapter_info; > + > +#if 0 /* What for ? */ > + rc = spapr_tce_dma_read(&s->vdev, be64_to_cpu(sinfo->buffer), > +&info, be16_to_cpu(sinfo->common.length)); > + if (rc) { > + fprintf(stderr, "vscsi_send_adapter_info: DMA read failure !\n"); > + } > +#endif > + memset(&info, 0, sizeof(info)); > + strcpy(info.srp_version, SRP_VERSION); > + strncpy(info.partition_name, "qemu", sizeof("qemu")); > + info.partition_number = cpu_to_be32(0); > + info.mad_version = cpu_to_be32(1); > + info.os_type = cpu_to_be32(2); > + info.port_max_txu[0] = cpu_to_be32(VSCSI_MAX_SECTORS<< 9); > + > + rc = spapr_tce_dma_write(&s->vdev, be64_to_cpu(sinfo->buffer), > +&info, be16_to_cpu(sinfo->common.length)); > + if (rc) { > + fprintf(stderr, "vscsi_send_adapter_info: DMA write failure !\n"); > + } > + > + sinfo->common.status = rc ? cpu_to_be32(1) : 0; > + > + return vscsi_send_iu(s, req, sizeof(*sinfo), VIOSRP_MAD_FORMAT); > +} > + > +static int vscsi_handle_mad_req(VSCSIState *s, vscsi_req *req) > +{ > + union mad_iu *mad =&req->iu.mad; > + > + switch (be32_to_cpu(mad->empty_iu.common.type)) { > + case VIOSRP_EMPTY_IU_TYPE: > + fprintf(stderr, "Unsupported EMPTY MAD IU\n"); > + break; > + case VIOSRP_ERROR_LOG_TYPE: > + fprintf(stderr, "Unsupported ERROR LOG MAD IU\n"); > + mad->error_log.common.status = cpu_to_be16(1); > + vscsi_send_iu(s, req, sizeof(mad->error_log), VIOSRP_MAD_FORMAT); > + break; > + case VIOSRP_ADAPTER_INFO_TYPE: > + vscsi_send_adapter_info(s, req); > + break; > + case VIOSRP_HOST_CONFIG_TYPE: > + mad->host_config.common.status = cpu_to_be16(1); > + vscsi_send_iu(s, req, sizeof(mad->host_config), VIOSRP_MAD_FORMAT); > + break; > + default: > + fprintf(stderr, "VSCSI: Unknown MAD type %02x\n", > + be32_to_cpu(mad->empty_iu.common.type)); > + } > + > + return 1; > +} > + > +static void vscsi_got_payload(VSCSIState *s, vscsi_crq *crq) > +{ > + vscsi_req *req; > + int done; > + > + req = vscsi_get_req(s); > + if (req == NULL) { > + fprintf(stderr, "VSCSI: Failed to get a request !\n"); > + return; > + } > + > + /* We only support a limited number of descriptors, we know > + * the ibmvscsi driver uses up to 10 max, so it should fit > + * in our 256 bytes IUs. If not we'll have to increase the size > + * of the structure. > + */ > + if (crq->s.IU_length> sizeof(union viosrp_iu)) { > + fprintf(stderr, "VSCSI: SRP IU too long (%d bytes) !\n", > + crq->s.IU_length); > + return; > + } > + > + /* XXX Handle failure differently ? */ > + if (spapr_tce_dma_read(&s->vdev, crq->s.IU_data_ptr,&req->iu, > + crq->s.IU_length)) { > + fprintf(stderr, "vscsi_got_payload: DMA read failure !\n"); > + qemu_free(req); > + } > + memcpy(&req->crq, crq, sizeof(vscsi_crq)); > + > + if (crq->s.format == VIOSRP_MAD_FORMAT) { > + done = vscsi_handle_mad_req(s, req); > + } else { > + done = vscsi_handle_srp_req(s, req); > + } > + > + if (done) { > + vscsi_put_req(s, req); > + } > +} > + > + > +static int vscsi_do_crq(struct VIOsPAPRDevice *dev, uint8_t *crq_data) > +{ > + VSCSIState *s = DO_UPCAST(VSCSIState, vdev, dev); > + vscsi_crq crq; > + > + memcpy(crq.raw, crq_data, 16); > + crq.s.timeout = be16_to_cpu(crq.s.timeout); > + crq.s.IU_length = be16_to_cpu(crq.s.IU_length); > + crq.s.IU_data_ptr = be64_to_cpu(crq.s.IU_data_ptr); > + > + dprintf("VSCSI: do_crq %02x %02x ...\n", crq.raw[0], crq.raw[1]); > + > + switch(crq.s.valid) { > + case 0xc0: /* Init command/response */ > + > + /* Respond to initialization request */ > + if (crq.s.format == 0x01) { > + memset(crq.raw, 0, 16); > + crq.s.valid = 0xc0; > + crq.s.format = 0x02; > + spapr_vio_send_crq(dev, crq.raw); > + } > + > + /* Note that in hotplug cases, we might get a 0x02 > + * as a result of us emitting the init request > + */ > + > + break; > + case 0xff: /* Link event */ > + > + /* Not handled for now */ > + > + break; > + case 0x80: /* Payloads */ > + switch (crq.s.format) { > + case VIOSRP_SRP_FORMAT: /* AKA VSCSI request */ > + case VIOSRP_MAD_FORMAT: /* AKA VSCSI response */ > + vscsi_got_payload(s,&crq); > + break; > + case VIOSRP_OS400_FORMAT: > + case VIOSRP_AIX_FORMAT: > + case VIOSRP_LINUX_FORMAT: > + case VIOSRP_INLINE_FORMAT: > + fprintf(stderr, "vscsi_do_srq: Unsupported payload format %02x\n", > + crq.s.format); > + break; > + default: > + fprintf(stderr, "vscsi_do_srq: Unknown payload format %02x\n", > + crq.s.format); > + } > + break; > + default: > + fprintf(stderr, "vscsi_do_crq: unknown CRQ %02x %02x ...\n", > + crq.raw[0], crq.raw[1]); > + }; > + > + return 0; > +} > + > +static int spapr_vscsi_init(VIOsPAPRDevice *dev) > +{ > + VSCSIState *s = DO_UPCAST(VSCSIState, vdev, dev); > + int i; > + > + dbg_vscsi_state = s; > + > + /* Initialize qemu request tags */ > + memset(s->reqs, 0, sizeof(s->reqs)); > + for (i = 0; i< VSCSI_REQ_LIMIT; i++) Braces > + s->reqs[i].qtag = i; > + > + dev->crq.SendFunc = vscsi_do_crq; > + > + scsi_bus_new(&s->bus,&dev->qdev, 1, VSCSI_REQ_LIMIT, > + vscsi_command_complete); > + if (!dev->qdev.hotplugged) { > + scsi_bus_legacy_handle_cmdline(&s->bus); > + } > + > + return 0; > +} > + > +void spapr_vscsi_create(VIOsPAPRBus *bus, uint32_t reg, > + qemu_irq qirq, uint32_t vio_irq_num) > +{ > + DeviceState *dev; > + VIOsPAPRDevice *sdev; > + > + dev = qdev_create(&bus->bus, "spapr-vscsi"); > + qdev_prop_set_uint32(dev, "reg", reg); > + > + qdev_init_nofail(dev); > + > + sdev = (VIOsPAPRDevice *)dev; > + sdev->qirq = qirq; > + sdev->vio_irq_num = vio_irq_num; > +} > + > +static int spapr_vscsi_devnode(VIOsPAPRDevice *dev, void *fdt, int node_off) > +{ > + int ret; > + > + ret = fdt_setprop_cell(fdt, node_off, "#address-cells", 2); > + if (ret< 0) { > + return ret; > + } > + > + ret = fdt_setprop_cell(fdt, node_off, "#size-cells", 0); > + if (ret< 0) { > + return ret; > + } > + > + return 0; > +} > + > +static VIOsPAPRDeviceInfo spapr_vscsi = { > + .init = spapr_vscsi_init, > + .devnode = spapr_vscsi_devnode, > + .dt_name = "v-scsi", > + .dt_type = "vscsi", > + .dt_compatible = "IBM,v-scsi", > + .signal_mask = 0x00000001, > + .qdev.name = "spapr-vscsi", > + .qdev.size = sizeof(VSCSIState), > + .qdev.props = (Property[]) { > + DEFINE_PROP_UINT32("reg", VIOsPAPRDevice, reg, 0x2000), > + DEFINE_PROP_UINT32("dma-window", VIOsPAPRDevice, > + rtce_window_size, 0x10000000), > + DEFINE_PROP_END_OF_LIST(), > + }, > +}; > + > +static void spapr_vscsi_register(void) > +{ > + spapr_vio_bus_register_withprop(&spapr_vscsi); > +} > +device_init(spapr_vscsi_register); > diff --git a/hw/srp.h b/hw/srp.h > new file mode 100644 > index 0000000..9d55fc4 > --- /dev/null > +++ b/hw/srp.h > @@ -0,0 +1,241 @@ > +/* > + * Copyright (c) 2005 Cisco Systems. All rights reserved. > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > + * > + * $Id$ > + */ > + > +#ifndef SCSI_SRP_H > +#define SCSI_SRP_H > + > +/* > + * Structures and constants for the SCSI RDMA Protocol (SRP) as > + * defined by the INCITS T10 committee. This file was written using > + * draft Revision 16a of the SRP standard. > + */ > + > +enum { > + > + SRP_LOGIN_REQ = 0x00, > + SRP_TSK_MGMT = 0x01, > + SRP_CMD = 0x02, > + SRP_I_LOGOUT = 0x03, > + SRP_LOGIN_RSP = 0xc0, > + SRP_RSP = 0xc1, > + SRP_LOGIN_REJ = 0xc2, > + SRP_T_LOGOUT = 0x80, > + SRP_CRED_REQ = 0x81, > + SRP_AER_REQ = 0x82, > + SRP_CRED_RSP = 0x41, > + SRP_AER_RSP = 0x42 > +}; > + > +enum { > + SRP_BUF_FORMAT_DIRECT = 1<< 1, > + SRP_BUF_FORMAT_INDIRECT = 1<< 2 > +}; > + > +enum { > + SRP_NO_DATA_DESC = 0, > + SRP_DATA_DESC_DIRECT = 1, > + SRP_DATA_DESC_INDIRECT = 2 > +}; > + > +enum { > + SRP_TSK_ABORT_TASK = 0x01, > + SRP_TSK_ABORT_TASK_SET = 0x02, > + SRP_TSK_CLEAR_TASK_SET = 0x04, > + SRP_TSK_LUN_RESET = 0x08, > + SRP_TSK_CLEAR_ACA = 0x40 > +}; > + > +enum srp_login_rej_reason { > + SRP_LOGIN_REJ_UNABLE_ESTABLISH_CHANNEL = 0x00010000, > + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES = 0x00010001, > + SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE = 0x00010002, > + SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL = 0x00010003, > + SRP_LOGIN_REJ_UNSUPPORTED_DESCRIPTOR_FMT = 0x00010004, > + SRP_LOGIN_REJ_MULTI_CHANNEL_UNSUPPORTED = 0x00010005, > + SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED = 0x00010006 > +}; > + > +enum { > + SRP_REV10_IB_IO_CLASS = 0xff00, > + SRP_REV16A_IB_IO_CLASS = 0x0100 > +}; > + > +struct srp_direct_buf { > + uint64_t va; > + uint32_t key; > + uint32_t len; > +}; > + > +/* > + * We need the packed attribute because the SRP spec puts the list of > + * descriptors at an offset of 20, which is not aligned to the size of > + * struct srp_direct_buf. The whole structure must be packed to avoid > + * having the 20-byte structure padded to 24 bytes on 64-bit architectures. > + */ > +struct srp_indirect_buf { > + struct srp_direct_buf table_desc; > + uint32_t len; > + struct srp_direct_buf desc_list[0]; > +} __attribute__((packed)); > + > +enum { > + SRP_MULTICHAN_SINGLE = 0, > + SRP_MULTICHAN_MULTI = 1 > +}; > + > +struct srp_login_req { > + uint8_t opcode; > + uint8_t reserved1[7]; > + uint64_t tag; > + uint32_t req_it_iu_len; > + uint8_t reserved2[4]; > + uint16_t req_buf_fmt; > + uint8_t req_flags; > + uint8_t reserved3[5]; > + uint8_t initiator_port_id[16]; > + uint8_t target_port_id[16]; > +}; > + > +/* > + * The SRP spec defines the size of the LOGIN_RSP structure to be 52 > + * bytes, so it needs to be packed to avoid having it padded to 56 > + * bytes on 64-bit architectures. > + */ > +struct srp_login_rsp { > + uint8_t opcode; > + uint8_t reserved1[3]; > + uint32_t req_lim_delta; > + uint64_t tag; > + uint32_t max_it_iu_len; > + uint32_t max_ti_iu_len; > + uint16_t buf_fmt; > + uint8_t rsp_flags; > + uint8_t reserved2[25]; > +} __attribute__((packed)); > + > +struct srp_login_rej { > + uint8_t opcode; > + uint8_t reserved1[3]; > + uint32_t reason; > + uint64_t tag; > + uint8_t reserved2[8]; > + uint16_t buf_fmt; > + uint8_t reserved3[6]; > +}; Why isn't this one packed? And the ones below? > + > +struct srp_i_logout { > + uint8_t opcode; > + uint8_t reserved[7]; > + uint64_t tag; > +}; > + > +struct srp_t_logout { > + uint8_t opcode; > + uint8_t sol_not; > + uint8_t reserved[2]; > + uint32_t reason; > + uint64_t tag; > +}; > + > +/* > + * We need the packed attribute because the SRP spec only aligns the > + * 8-byte LUN field to 4 bytes. > + */ > +struct srp_tsk_mgmt { > + uint8_t opcode; > + uint8_t sol_not; > + uint8_t reserved1[6]; > + uint64_t tag; > + uint8_t reserved2[4]; > + uint64_t lun __attribute__((packed)); > + uint8_t reserved3[2]; > + uint8_t tsk_mgmt_func; > + uint8_t reserved4; > + uint64_t task_tag; > + uint8_t reserved5[8]; > +}; > + > +/* > + * We need the packed attribute because the SRP spec only aligns the > + * 8-byte LUN field to 4 bytes. > + */ > +struct srp_cmd { > + uint8_t opcode; > + uint8_t sol_not; > + uint8_t reserved1[3]; > + uint8_t buf_fmt; > + uint8_t data_out_desc_cnt; > + uint8_t data_in_desc_cnt; > + uint64_t tag; > + uint8_t reserved2[4]; > + uint64_t lun __attribute__((packed)); > + uint8_t reserved3; > + uint8_t task_attr; > + uint8_t reserved4; > + uint8_t add_cdb_len; > + uint8_t cdb[16]; > + uint8_t add_data[0]; > +}; > + > +enum { > + SRP_RSP_FLAG_RSPVALID = 1<< 0, > + SRP_RSP_FLAG_SNSVALID = 1<< 1, > + SRP_RSP_FLAG_DOOVER = 1<< 2, > + SRP_RSP_FLAG_DOUNDER = 1<< 3, > + SRP_RSP_FLAG_DIOVER = 1<< 4, > + SRP_RSP_FLAG_DIUNDER = 1<< 5 > +}; > + > +/* > + * The SRP spec defines the size of the RSP structure to be 36 bytes, > + * so it needs to be packed to avoid having it padded to 40 bytes on > + * 64-bit architectures. > + */ > +struct srp_rsp { > + uint8_t opcode; > + uint8_t sol_not; > + uint8_t reserved1[2]; > + uint32_t req_lim_delta; > + uint64_t tag; > + uint8_t reserved2[2]; > + uint8_t flags; > + uint8_t status; > + uint32_t data_out_res_cnt; > + uint32_t data_in_res_cnt; > + uint32_t sense_data_len; > + uint32_t resp_data_len; > + uint8_t data[0]; > +} __attribute__((packed)); > + > +#endif /* SCSI_SRP_H */ Alex
On 03/16/2011 11:41 AM, Alexander Graf wrote: >> new file mode 100644 >> index 0000000..9d55fc4 >> --- /dev/null >> +++ b/hw/srp.h >> @@ -0,0 +1,241 @@ >> +/* >> + * Copyright (c) 2005 Cisco Systems. All rights reserved. >> + * >> + * This software is available to you under a choice of one of two >> + * licenses. You may choose to be licensed under the terms of the GNU >> + * General Public License (GPL) Version 2, available from the file >> + * COPYING in the main directory of this source tree, or the >> + * OpenIB.org BSD license below: >> + * >> + * Redistribution and use in source and binary forms, with or >> + * without modification, are permitted provided that the following >> + * conditions are met: >> + * >> + * - Redistributions of source code must retain the above >> + * copyright notice, this list of conditions and the following >> + * disclaimer. >> + * >> + * - Redistributions in binary form must reproduce the above >> + * copyright notice, this list of conditions and the following >> + * disclaimer in the documentation and/or other materials >> + * provided with the distribution. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF >> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND >> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS >> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN >> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN >> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE >> + * SOFTWARE. >> + * >> + * $Id$ >> + */ >> + >> +#ifndef SCSI_SRP_H >> +#define SCSI_SRP_H >> + >> +/* >> + * Structures and constants for the SCSI RDMA Protocol (SRP) as >> + * defined by the INCITS T10 committee. This file was written using >> + * draft Revision 16a of the SRP standard. >> + */ >> + >> +enum { >> + >> + SRP_LOGIN_REQ = 0x00, >> + SRP_TSK_MGMT = 0x01, >> + SRP_CMD = 0x02, >> + SRP_I_LOGOUT = 0x03, >> + SRP_LOGIN_RSP = 0xc0, >> + SRP_RSP = 0xc1, >> + SRP_LOGIN_REJ = 0xc2, >> + SRP_T_LOGOUT = 0x80, >> + SRP_CRED_REQ = 0x81, >> + SRP_AER_REQ = 0x82, >> + SRP_CRED_RSP = 0x41, >> + SRP_AER_RSP = 0x42 >> +}; >> + >> +enum { >> + SRP_BUF_FORMAT_DIRECT = 1<< 1, >> + SRP_BUF_FORMAT_INDIRECT = 1<< 2 >> +}; >> + >> +enum { >> + SRP_NO_DATA_DESC = 0, >> + SRP_DATA_DESC_DIRECT = 1, >> + SRP_DATA_DESC_INDIRECT = 2 >> +}; >> + >> +enum { >> + SRP_TSK_ABORT_TASK = 0x01, >> + SRP_TSK_ABORT_TASK_SET = 0x02, >> + SRP_TSK_CLEAR_TASK_SET = 0x04, >> + SRP_TSK_LUN_RESET = 0x08, >> + SRP_TSK_CLEAR_ACA = 0x40 >> +}; >> + >> +enum srp_login_rej_reason { >> + SRP_LOGIN_REJ_UNABLE_ESTABLISH_CHANNEL = 0x00010000, >> + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES = 0x00010001, >> + SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE = 0x00010002, >> + SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL = 0x00010003, >> + SRP_LOGIN_REJ_UNSUPPORTED_DESCRIPTOR_FMT = 0x00010004, >> + SRP_LOGIN_REJ_MULTI_CHANNEL_UNSUPPORTED = 0x00010005, >> + SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED = 0x00010006 >> +}; >> + >> +enum { >> + SRP_REV10_IB_IO_CLASS = 0xff00, >> + SRP_REV16A_IB_IO_CLASS = 0x0100 >> +}; >> + >> +struct srp_direct_buf { >> + uint64_t va; >> + uint32_t key; >> + uint32_t len; >> +}; >> + >> +/* >> + * We need the packed attribute because the SRP spec puts the list of >> + * descriptors at an offset of 20, which is not aligned to the size of >> + * struct srp_direct_buf. The whole structure must be packed to avoid >> + * having the 20-byte structure padded to 24 bytes on 64-bit >> architectures. >> + */ >> +struct srp_indirect_buf { >> + struct srp_direct_buf table_desc; >> + uint32_t len; >> + struct srp_direct_buf desc_list[0]; >> +} __attribute__((packed)); >> + >> +enum { >> + SRP_MULTICHAN_SINGLE = 0, >> + SRP_MULTICHAN_MULTI = 1 >> +}; >> + >> +struct srp_login_req { >> + uint8_t opcode; >> + uint8_t reserved1[7]; >> + uint64_t tag; >> + uint32_t req_it_iu_len; >> + uint8_t reserved2[4]; >> + uint16_t req_buf_fmt; >> + uint8_t req_flags; >> + uint8_t reserved3[5]; >> + uint8_t initiator_port_id[16]; >> + uint8_t target_port_id[16]; >> +}; >> + >> +/* >> + * The SRP spec defines the size of the LOGIN_RSP structure to be 52 >> + * bytes, so it needs to be packed to avoid having it padded to 56 >> + * bytes on 64-bit architectures. >> + */ >> +struct srp_login_rsp { >> + uint8_t opcode; >> + uint8_t reserved1[3]; >> + uint32_t req_lim_delta; >> + uint64_t tag; >> + uint32_t max_it_iu_len; >> + uint32_t max_ti_iu_len; >> + uint16_t buf_fmt; >> + uint8_t rsp_flags; >> + uint8_t reserved2[25]; >> +} __attribute__((packed)); >> + >> +struct srp_login_rej { >> + uint8_t opcode; >> + uint8_t reserved1[3]; >> + uint32_t reason; >> + uint64_t tag; >> + uint8_t reserved2[8]; >> + uint16_t buf_fmt; >> + uint8_t reserved3[6]; >> +}; > diff --git a/hw/srp.h b/hw/srp.h > > Why isn't this one packed? And the ones below? It's naturally aligned. There's no need to pack things that are naturally aligned (structure size is a multiple of 8 and each type starts at an offset that's a multiple of it's size). Regards, Anthony Liguori
On Wed, 2011-03-16 at 17:41 +0100, Alexander Graf wrote: > > +/* > > + * Common MAD header > > + */ > > +struct mad_common { > > + uint32_t type; > > + uint16_t status; > > + uint16_t length; > > + uint64_t tag; > > Is this an in-memory representation? If so, it should be packed, right? > Same goes for the ones below. Well, all the fields are naturally aligned, as is the structure itself, do we really need to pack ? Cheers, Ben.
On 03/16/2011 03:08 PM, Benjamin Herrenschmidt wrote: > On Wed, 2011-03-16 at 17:41 +0100, Alexander Graf wrote: > >>> +/* >>> + * Common MAD header >>> + */ >>> +struct mad_common { >>> + uint32_t type; >>> + uint16_t status; >>> + uint16_t length; >>> + uint64_t tag; >> Is this an in-memory representation? If so, it should be packed, right? >> Same goes for the ones below. > Well, all the fields are naturally aligned, as is the structure itself, > do we really need to pack ? No. Regards, Anthony Liguori > Cheers, > Ben. > > >
diff --git a/Makefile.target b/Makefile.target index ef86d43..49f9e9a 100644 --- a/Makefile.target +++ b/Makefile.target @@ -233,7 +233,7 @@ obj-ppc-y += ppc_oldworld.o obj-ppc-y += ppc_newworld.o # IBM pSeries (sPAPR) obj-ppc-y += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o -obj-ppc-y += xics.o spapr_vty.o spapr_llan.o +obj-ppc-y += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o # PowerPC 4xx boards obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o obj-ppc-y += ppc440.o ppc440_bamboo.o diff --git a/hw/ppc-viosrp.h b/hw/ppc-viosrp.h new file mode 100644 index 0000000..9afcf7a --- /dev/null +++ b/hw/ppc-viosrp.h @@ -0,0 +1,216 @@ +/*****************************************************************************/ +/* srp.h -- SCSI RDMA Protocol definitions */ +/* */ +/* Written By: Colin Devilbis, IBM Corporation */ +/* */ +/* Copyright (C) 2003 IBM Corporation */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the Free Software */ +/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* */ +/* */ +/* This file contains structures and definitions for IBM RPA (RS/6000 */ +/* platform architecture) implementation of the SRP (SCSI RDMA Protocol) */ +/* standard. SRP is used on IBM iSeries and pSeries platforms to send SCSI */ +/* commands between logical partitions. */ +/* */ +/* SRP Information Units (IUs) are sent on a "Command/Response Queue" (CRQ) */ +/* between partitions. The definitions in this file are architected, */ +/* and cannot be changed without breaking compatibility with other versions */ +/* of Linux and other operating systems (AIX, OS/400) that talk this protocol*/ +/* between logical partitions */ +/*****************************************************************************/ +#ifndef PPC_VIOSRP_H +#define PPC_VIOSRP_H + +#define SRP_VERSION "16.a" +#define SRP_MAX_IU_LEN 256 +#define SRP_MAX_LOC_LEN 32 + +union srp_iu { + struct srp_login_req login_req; + struct srp_login_rsp login_rsp; + struct srp_login_rej login_rej; + struct srp_i_logout i_logout; + struct srp_t_logout t_logout; + struct srp_tsk_mgmt tsk_mgmt; + struct srp_cmd cmd; + struct srp_rsp rsp; + uint8_t reserved[SRP_MAX_IU_LEN]; +}; + +enum viosrp_crq_formats { + VIOSRP_SRP_FORMAT = 0x01, + VIOSRP_MAD_FORMAT = 0x02, + VIOSRP_OS400_FORMAT = 0x03, + VIOSRP_AIX_FORMAT = 0x04, + VIOSRP_LINUX_FORMAT = 0x06, + VIOSRP_INLINE_FORMAT = 0x07 +}; + +enum viosrp_crq_status { + VIOSRP_OK = 0x0, + VIOSRP_NONRECOVERABLE_ERR = 0x1, + VIOSRP_VIOLATES_MAX_XFER = 0x2, + VIOSRP_PARTNER_PANIC = 0x3, + VIOSRP_DEVICE_BUSY = 0x8, + VIOSRP_ADAPTER_FAIL = 0x10, + VIOSRP_OK2 = 0x99, +}; + +struct viosrp_crq { + uint8_t valid; /* used by RPA */ + uint8_t format; /* SCSI vs out-of-band */ + uint8_t reserved; + uint8_t status; /* non-scsi failure? (e.g. DMA failure) */ + uint16_t timeout; /* in seconds */ + uint16_t IU_length; /* in bytes */ + uint64_t IU_data_ptr; /* the TCE for transferring data */ +}; + +/* MADs are Management requests above and beyond the IUs defined in the SRP + * standard. + */ +enum viosrp_mad_types { + VIOSRP_EMPTY_IU_TYPE = 0x01, + VIOSRP_ERROR_LOG_TYPE = 0x02, + VIOSRP_ADAPTER_INFO_TYPE = 0x03, + VIOSRP_HOST_CONFIG_TYPE = 0x04, + VIOSRP_CAPABILITIES_TYPE = 0x05, + VIOSRP_ENABLE_FAST_FAIL = 0x08, +}; + +enum viosrp_mad_status { + VIOSRP_MAD_SUCCESS = 0x00, + VIOSRP_MAD_NOT_SUPPORTED = 0xF1, + VIOSRP_MAD_FAILED = 0xF7, +}; + +enum viosrp_capability_type { + MIGRATION_CAPABILITIES = 0x01, + RESERVATION_CAPABILITIES = 0x02, +}; + +enum viosrp_capability_support { + SERVER_DOES_NOT_SUPPORTS_CAP = 0x0, + SERVER_SUPPORTS_CAP = 0x01, + SERVER_CAP_DATA = 0x02, +}; + +enum viosrp_reserve_type { + CLIENT_RESERVE_SCSI_2 = 0x01, +}; + +enum viosrp_capability_flag { + CLIENT_MIGRATED = 0x01, + CLIENT_RECONNECT = 0x02, + CAP_LIST_SUPPORTED = 0x04, + CAP_LIST_DATA = 0x08, +}; + +/* + * Common MAD header + */ +struct mad_common { + uint32_t type; + uint16_t status; + uint16_t length; + uint64_t tag; +}; + +/* + * All SRP (and MAD) requests normally flow from the + * client to the server. There is no way for the server to send + * an asynchronous message back to the client. The Empty IU is used + * to hang out a meaningless request to the server so that it can respond + * asynchrouously with something like a SCSI AER + */ +struct viosrp_empty_iu { + struct mad_common common; + uint64_t buffer; + uint32_t port; +}; + +struct viosrp_error_log { + struct mad_common common; + uint64_t buffer; +}; + +struct viosrp_adapter_info { + struct mad_common common; + uint64_t buffer; +}; + +struct viosrp_host_config { + struct mad_common common; + uint64_t buffer; +}; + +struct viosrp_fast_fail { + struct mad_common common; +}; + +struct viosrp_capabilities { + struct mad_common common; + uint64_t buffer; +}; + +struct mad_capability_common { + uint32_t cap_type; + uint16_t length; + uint16_t server_support; +}; + +struct mad_reserve_cap { + struct mad_capability_common common; + uint32_t type; +}; + +struct mad_migration_cap { + struct mad_capability_common common; + uint32_t ecl; +}; + +struct capabilities{ + uint32_t flags; + char name[SRP_MAX_LOC_LEN]; + char loc[SRP_MAX_LOC_LEN]; + struct mad_migration_cap migration; + struct mad_reserve_cap reserve; +}; + +union mad_iu { + struct viosrp_empty_iu empty_iu; + struct viosrp_error_log error_log; + struct viosrp_adapter_info adapter_info; + struct viosrp_host_config host_config; + struct viosrp_fast_fail fast_fail; + struct viosrp_capabilities capabilities; +}; + +union viosrp_iu { + union srp_iu srp; + union mad_iu mad; +}; + +struct mad_adapter_info_data { + char srp_version[8]; + char partition_name[96]; + uint32_t partition_number; + uint32_t mad_version; + uint32_t os_type; + uint32_t port_max_txu[8]; /* per-port maximum transfer */ +}; + +#endif diff --git a/hw/spapr.c b/hw/spapr.c index cb97a16..5f868fc 100644 --- a/hw/spapr.c +++ b/hw/spapr.c @@ -28,6 +28,7 @@ #include "hw.h" #include "elf.h" #include "net.h" +#include "blockdev.h" #include "hw/boards.h" #include "hw/ppc.h" @@ -316,7 +317,7 @@ static void ppc_spapr_init(ram_addr_t ram_size, qemu_free(filename); /* Set up Interrupt Controller */ - spapr->icp = xics_system_init(smp_cpus, envs, MAX_SERIAL_PORTS + nb_nics); + spapr->icp = xics_system_init(smp_cpus, envs, MAX_SERIAL_PORTS + nb_nics + drive_get_max_bus(IF_SCSI) + 1); /* Set up VIO bus */ spapr->vio_bus = spapr_vio_bus_init(); @@ -346,6 +347,12 @@ static void ppc_spapr_init(ram_addr_t ram_size, } } + for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) { + spapr_vscsi_create(spapr->vio_bus, 0x2000 + i, + xics_find_qirq(spapr->icp, irq), irq); + irq++; + } + if (kernel_filename) { uint64_t lowaddr = 0; @@ -406,6 +413,7 @@ static QEMUMachine spapr_machine = { .max_cpus = MAX_CPUS, .no_vga = 1, .no_parallel = 1, + .use_scsi = 1, }; static void spapr_machine_init(void) diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h index ba16795..b7d0daa 100644 --- a/hw/spapr_vio.h +++ b/hw/spapr_vio.h @@ -101,4 +101,7 @@ void spapr_vty_create(VIOsPAPRBus *bus, void spapr_vlan_create(VIOsPAPRBus *bus, uint32_t reg, NICInfo *nd, qemu_irq qirq, uint32_t vio_irq_num); +void spapr_vscsi_create(VIOsPAPRBus *bus, uint32_t reg, + qemu_irq qirq, uint32_t vio_irq_num); + #endif /* _HW_SPAPR_VIO_H */ diff --git a/hw/spapr_vscsi.c b/hw/spapr_vscsi.c new file mode 100644 index 0000000..0a67095 --- /dev/null +++ b/hw/spapr_vscsi.c @@ -0,0 +1,960 @@ +/* TODO: + * + * - Cleanups :-) + * - Sort out better how to assign devices to VSCSI instances + * - Fix residual counts + * - Add indirect descriptors support + * - Maybe do autosense (PAPR seems to mandate it, linux doesn't care) + */ +#include "hw.h" +#include "scsi.h" +#include "scsi-defs.h" +#include "net.h" /* Remove that when we can */ +#include "srp.h" +#include "hw/qdev.h" +#include "hw/spapr.h" +#include "hw/spapr_vio.h" +#include "hw/ppc-viosrp.h" + +#include <libfdt.h> + +//#define DEBUG_VSCSI + +#ifdef DEBUG_VSCSI +#define dprintf(fmt, ...) \ + do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) +#else +#define dprintf(fmt, ...) \ + do { } while (0) +#endif + +#define min(a, b) ((a) < (b) ? (a) : (b)) + +/* + * Virtual SCSI device + */ + +/* Random numbers */ +#define VSCSI_MAX_SECTORS 4096/*1024*//*256*/ +#define VSCSI_REQ_LIMIT 24 + +#define SCSI_SENSE_BUF_SIZE 96 +#define SRP_RSP_SENSE_DATA_LEN 18 + +typedef union vscsi_crq { + struct viosrp_crq s; + uint8_t raw[16]; +} vscsi_crq; + +typedef struct vscsi_req +{ + vscsi_crq crq; + union viosrp_iu iu; + + /* SCSI request tracking */ + SCSIDevice *sdev; + uint32_t qtag; /* qemu tag != srp tag */ + int lun; + int active; + long data_len; + int writing; + int sensing; + int senselen; + uint8_t sense[SCSI_SENSE_BUF_SIZE]; + + /* RDMA related bits */ + uint8_t dma_fmt; + struct srp_direct_buf ext_desc; + struct srp_direct_buf *cur_desc; + struct srp_indirect_buf *ind_desc; + int local_desc; + int total_desc; + +} vscsi_req; + + +typedef struct { + VIOsPAPRDevice vdev; + SCSIBus bus; + vscsi_req reqs[VSCSI_REQ_LIMIT]; +} VSCSIState; + +/* XXX Debug only */ +static VSCSIState *dbg_vscsi_state; + + +static struct vscsi_req *vscsi_get_req(VSCSIState *s) +{ + vscsi_req *req; + int i; + + for (i = 0; i < VSCSI_REQ_LIMIT; i++) { + req = &s->reqs[i]; + if (!req->active) { + memset(req, 0, sizeof(*req)); + req->qtag = i; + req->active = 1; + return req; + } + } + return NULL; +} + +static void vscsi_put_req(VSCSIState *s, vscsi_req *req) +{ + req->active = 0; +} + +static vscsi_req *vscsi_find_req(VSCSIState *s, uint32_t tag) +{ + if (tag >= VSCSI_REQ_LIMIT || !s->reqs[tag].active) { + return NULL; + } + return &s->reqs[tag]; +} + +static void vscsi_decode_id_lun(uint64_t srp_lun, int *id, int *lun) +{ + /* XXX Figure that one out properly ! This is crackpot */ + *id = (srp_lun >> 56) & 0x7f; + *lun = (srp_lun >> 48) & 0xff; +} + +static int vscsi_send_iu(VSCSIState *s, vscsi_req *req, + uint64_t length, uint8_t format) +{ + long rc, rc1; + + /* First copy the SRP */ + rc = spapr_tce_dma_write(&s->vdev, req->crq.s.IU_data_ptr, + &req->iu, length); + if (rc) { + fprintf(stderr, "vscsi_send_iu: DMA write failure !\n"); + } + + req->crq.s.valid = 0x80; + req->crq.s.format = format; + req->crq.s.reserved = 0x00; + req->crq.s.timeout = cpu_to_be16(0x0000); + req->crq.s.IU_length = cpu_to_be16(length); + req->crq.s.IU_data_ptr = req->iu.srp.rsp.tag; /* right byte order */ + + if (rc == 0) { + req->crq.s.status = 0x99; /* Just needs to be non-zero */ + } else { + req->crq.s.status = 0x00; + } + + rc1 = spapr_vio_send_crq(&s->vdev, req->crq.raw); + if (rc1) { + fprintf(stderr, "vscsi_send_iu: Error sending response\n"); + return rc1; + } + + return rc; +} + +static void vscsi_makeup_sense(VSCSIState *s, vscsi_req *req, + uint8_t key, uint8_t asc, uint8_t ascq) +{ + req->senselen = SRP_RSP_SENSE_DATA_LEN; + + /* Valid bit and 'current errors' */ + req->sense[0] = (0x1 << 7 | 0x70); + /* Sense key */ + req->sense[2] = key; + /* Additional sense length */ + req->sense[7] = 0xa; /* 10 bytes */ + /* Additional sense code */ + req->sense[12] = asc; + req->sense[13] = ascq; +} + +static int vscsi_send_rsp(VSCSIState *s, vscsi_req *req, + uint8_t status, int32_t res_in, int32_t res_out) +{ + union viosrp_iu *iu = &req->iu; + uint64_t tag = iu->srp.rsp.tag; + int total_len = sizeof(iu->srp.rsp); + + dprintf("VSCSI: Sending resp status: 0x%x, " + "res_in: %d, res_out: %d \n", status, res_in, res_out); + + memset(iu, 0, sizeof(struct srp_rsp)); + iu->srp.rsp.opcode = SRP_RSP; + iu->srp.rsp.req_lim_delta = cpu_to_be32(1); + iu->srp.rsp.tag = tag; + + /* Handle residuals */ + if (res_in < 0) { + iu->srp.rsp.flags |= SRP_RSP_FLAG_DIUNDER; + res_in = -res_in; + } else if (res_in) { + iu->srp.rsp.flags |= SRP_RSP_FLAG_DIOVER; + } + if (res_out < 0) { + iu->srp.rsp.flags |= SRP_RSP_FLAG_DOUNDER; + res_out = -res_out; + } else if (res_out) { + iu->srp.rsp.flags |= SRP_RSP_FLAG_DOOVER; + } + iu->srp.rsp.data_in_res_cnt = cpu_to_be32(res_in); + iu->srp.rsp.data_out_res_cnt = cpu_to_be32(res_out); + + /* We don't do response data */ + /* iu->srp.rsp.flags &= ~SRP_RSP_FLAG_RSPVALID; */ + iu->srp.rsp.resp_data_len = cpu_to_be32(0); + + /* Handle success vs. failure */ + iu->srp.rsp.status = status; + if (status) { + iu->srp.rsp.sol_not = (iu->srp.cmd.sol_not & 0x04) >> 2; + if (req->senselen) { + req->iu.srp.rsp.flags |= SRP_RSP_FLAG_SNSVALID; + req->iu.srp.rsp.sense_data_len = cpu_to_be32(req->senselen); + memcpy(req->iu.srp.rsp.data, req->sense, req->senselen); + total_len += req->senselen; + } + } else { + iu->srp.rsp.sol_not = (iu->srp.cmd.sol_not & 0x02) >> 1; + } + + vscsi_send_iu(s, req, total_len, VIOSRP_SRP_FORMAT); + return 0; +} + +static inline void vscsi_swap_desc(struct srp_direct_buf *desc) +{ + desc->va = be64_to_cpu(desc->va); + desc->len = be32_to_cpu(desc->len); +} + +static int vscsi_srp_direct_data(VSCSIState *s, vscsi_req *req, + uint8_t *buf, uint32_t len) +{ + struct srp_direct_buf *md = req->cur_desc; + uint32_t llen; + int rc; + + dprintf("VSCSI: direct segment 0x%x bytes, va=0x%llx desc len=0x%x\n", + len, (unsigned long long)md->va, md->len); + + llen = min(len, md->len); + if (llen) { + if (req->writing) { /* writing = to device = reading from memory */ + rc = spapr_tce_dma_read(&s->vdev, md->va, buf, llen); + } else { + rc = spapr_tce_dma_write(&s->vdev, md->va, buf, llen); + } + } + md->len -= llen; + md->va += llen; + + if (rc) { + return -1; + } + return llen; +} + +static int vscsi_srp_indirect_data(VSCSIState *s, vscsi_req *req, + uint8_t *buf, uint32_t len) +{ + struct srp_direct_buf *td = &req->ind_desc->table_desc; + struct srp_direct_buf *md = req->cur_desc; + int rc = 0; + uint32_t llen, total = 0; + + dprintf("VSCSI: indirect segment 0x%x bytes, td va=0x%llx len=0x%x\n", + len, (unsigned long long)td->va, td->len); + + /* While we have data ... */ + while(len) { + /* If we have a descriptor but it's empty, go fetch a new one */ + if (md && md->len == 0) { + /* More local available, use one */ + if (req->local_desc) { + md = ++req->cur_desc; + --req->local_desc; + --req->total_desc; + td->va += sizeof(struct srp_direct_buf); + } else { + md = req->cur_desc = NULL; + } + } + /* No descriptor at hand, fetch one */ + if (!md) { + if (!req->total_desc) { + dprintf("VSCSI: Out of descriptors !\n"); + break; + } + md = req->cur_desc = &req->ext_desc; + dprintf("VSCSI: Reading desc from 0x%llx\n", (unsigned long long)td->va); + rc = spapr_tce_dma_read(&s->vdev, td->va, md, sizeof(struct srp_direct_buf)); + if (rc) { + dprintf("VSCSI: tce_dma_read -> %d reading ext_desc\n", rc); + break; + } + vscsi_swap_desc(md); + td->va += sizeof(struct srp_direct_buf); + --req->total_desc; + } + dprintf("VSCSI: [desc va=0x%llx,len=0x%x] remaining=0x%x\n", + (unsigned long long)md->va, md->len, len); + + /* Perform transfer */ + llen = min(len, md->len); + if (req->writing) { /* writing = to device = reading from memory */ + rc = spapr_tce_dma_read(&s->vdev, md->va, buf, llen); + + } else { + rc = spapr_tce_dma_write(&s->vdev, md->va, buf, llen); + } + if (rc) { + dprintf("VSCSI: tce_dma_r/w(%d) -> %d\n", req->writing, rc); + break; + } + dprintf("VSCSI: data: %02x %02x %02x %02x...\n", + buf[0], buf[1], buf[2], buf[3]); + + len -= llen; + buf += llen; + total += llen; + md->va += llen; + md->len -= llen; + } + return rc ? -1 : total; +} + +static int vscsi_srp_transfer_data(VSCSIState *s, vscsi_req *req, + int writing, uint8_t *buf, uint32_t len) +{ + int err = 0; + + switch (req->dma_fmt) { + case SRP_NO_DATA_DESC: + dprintf("VSCSI: no data desc transfer, skipping 0x%x bytes\n", len); + break; + case SRP_DATA_DESC_DIRECT: + err = vscsi_srp_direct_data(s, req, buf, len); + break; + case SRP_DATA_DESC_INDIRECT: + err = vscsi_srp_indirect_data(s, req, buf, len); + break; + } + return err; +} + +/* Bits from linux srp */ +static int data_out_desc_size(struct srp_cmd *cmd) +{ + int size = 0; + uint8_t fmt = cmd->buf_fmt >> 4; + + switch (fmt) { + case SRP_NO_DATA_DESC: + break; + case SRP_DATA_DESC_DIRECT: + size = sizeof(struct srp_direct_buf); + break; + case SRP_DATA_DESC_INDIRECT: + size = sizeof(struct srp_indirect_buf) + + sizeof(struct srp_direct_buf) * cmd->data_out_desc_cnt; + break; + default: + break; + } + return size; +} + +static int vscsi_preprocess_desc(vscsi_req *req) +{ + struct srp_cmd *cmd = &req->iu.srp.cmd; + int offset, i; + + offset = cmd->add_cdb_len & ~3; + + if (req->writing) { + req->dma_fmt = cmd->buf_fmt >> 4; + } else { + offset += data_out_desc_size(cmd); + req->dma_fmt = cmd->buf_fmt & ((1U << 4) - 1); + } + + switch (req->dma_fmt) { + case SRP_NO_DATA_DESC: + break; + case SRP_DATA_DESC_DIRECT: + req->cur_desc = (struct srp_direct_buf *)(cmd->add_data + offset); + req->total_desc = req->local_desc = 1; + vscsi_swap_desc(req->cur_desc); + dprintf("VSCSI: using direct RDMA %s, 0x%x bytes MD: 0x%llx\n", + req->writing ? "write" : "read", + req->cur_desc->len, (unsigned long long)req->cur_desc->va); + break; + case SRP_DATA_DESC_INDIRECT: + req->ind_desc = (struct srp_indirect_buf *)(cmd->add_data + offset); + vscsi_swap_desc(&req->ind_desc->table_desc); + req->total_desc = req->ind_desc->table_desc.len / sizeof(struct srp_direct_buf); + req->local_desc = req->writing ? cmd->data_out_desc_cnt : + cmd->data_in_desc_cnt; + for (i = 0; i < req->local_desc; i++) + vscsi_swap_desc(&req->ind_desc->desc_list[i]); + req->cur_desc = req->local_desc ? &req->ind_desc->desc_list[0] : NULL; + dprintf("VSCSI: using indirect RDMA %s, 0x%x bytes %d descs (%d local) VA: 0x%llx\n", + req->writing ? "read" : "write", be32_to_cpu(req->ind_desc->len), + req->total_desc, req->local_desc, + (unsigned long long)req->ind_desc->table_desc.va); + break; + default: + fprintf(stderr, + "vscsi_preprocess_desc: Unknown format %x\n", req->dma_fmt); + return -1; + } + + return 0; +} + +static void vscsi_send_request_sense(VSCSIState *s, vscsi_req *req) +{ + SCSIDevice *sdev = req->sdev; + uint8_t *cdb = req->iu.srp.cmd.cdb; + int n; + + cdb[0] = 3; + cdb[1] = 0; + cdb[2] = 0; + cdb[3] = 0; + cdb[4] = 96; + cdb[5] = 0; + req->sensing = 1; + n = sdev->info->send_command(sdev, req->qtag, cdb, req->lun); + dprintf("VSCSI: Queued request sense tag 0x%x \n", req->qtag); + if (n < 0) { + fprintf(stderr, "VSCSI: REQUEST_SENSE wants write data !?!?!?\n"); + sdev->info->cancel_io(sdev, req->qtag); + vscsi_makeup_sense(s, req, HARDWARE_ERROR, 0, 0); + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); + vscsi_put_req(s, req); + return; + } else if (n == 0) { + return; + } + sdev->info->read_data(sdev, req->qtag); +} + +/* Callback to indicate that the SCSI layer has completed a transfer. */ +static void vscsi_command_complete(SCSIBus *bus, int reason, uint32_t tag, + uint32_t arg) +{ + VSCSIState *s = DO_UPCAST(VSCSIState, vdev.qdev, bus->qbus.parent); + vscsi_req *req = vscsi_find_req(s, tag); + SCSIDevice *sdev; + uint8_t *buf; + int32_t res_in = 0, res_out = 0; + int len, rc = 0; + + dprintf("VSCSI: SCSI cmd complete, r=0x%x tag=0x%x arg=0x%x, req=%p\n", + reason, tag, arg, req); + if (req == NULL) { + fprintf(stderr, "VSCSI: Can't find request for tag 0x%x\n", tag); + return; + } + sdev = req->sdev; + + if (req->sensing) { + if (reason == SCSI_REASON_DONE) { + dprintf("VSCSI: Sense done !\n"); + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); + vscsi_put_req(s, req); + } else { + uint8_t *buf = sdev->info->get_buf(sdev, tag); + + len = min(arg, SCSI_SENSE_BUF_SIZE); + dprintf("VSCSI: Sense data, %d bytes:\n", len); + dprintf(" %02x %02x %02x %02x %02x %02x %02x %02x\n", + buf[0], buf[1], buf[2], buf[3], + buf[4], buf[5], buf[6], buf[7]); + dprintf(" %02x %02x %02x %02x %02x %02x %02x %02x\n", + buf[8], buf[9], buf[10], buf[11], + buf[12], buf[13], buf[14], buf[15]); + memcpy(req->sense, buf, len); + req->senselen = len; + sdev->info->read_data(sdev, req->qtag); + } + return; + } + + if (reason == SCSI_REASON_DONE) { + dprintf("VSCSI: Command complete err=%d\n", arg); + if (arg == 0) { + /* We handle overflows, not underflows for normal commands, + * but hopefully nobody cares + */ + if (req->writing) + res_out = req->data_len; + else + res_in = req->data_len; + vscsi_send_rsp(s, req, 0, res_in, res_out); + } else if (arg == CHECK_CONDITION) { + dprintf("VSCSI: Got CHECK_CONDITION, requesting sense...\n"); + vscsi_send_request_sense(s, req); + return; + } else { + vscsi_send_rsp(s, req, arg, 0, 0); + } + vscsi_put_req(s, req); + return; + } + + /* "arg" is how much we have read for reads and how much we want + * to write for writes (ie, how much is to be DMA'd) + */ + if (arg) { + buf = sdev->info->get_buf(sdev, tag); + rc = vscsi_srp_transfer_data(s, req, req->writing, buf, arg); + } + if (rc < 0) { + fprintf(stderr, "VSCSI: RDMA error rc=%d!\n", rc); + sdev->info->cancel_io(sdev, req->qtag); + vscsi_makeup_sense(s, req, HARDWARE_ERROR, 0, 0); + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); + vscsi_put_req(s, req); + return; + } + + /* Start next chunk */ + req->data_len -= rc; + if (req->writing) { + sdev->info->write_data(sdev, req->qtag); + } else { + sdev->info->read_data(sdev, req->qtag); + } +} + +static void vscsi_process_login(VSCSIState *s, vscsi_req *req) +{ + union viosrp_iu *iu = &req->iu; + struct srp_login_rsp *rsp = &iu->srp.login_rsp; + uint64_t tag = iu->srp.rsp.tag; + + dprintf("VSCSI: Got login, sendin response !\n"); + + /* TODO handle case that requested size is wrong and + * buffer format is wrong + */ + memset(iu, 0, sizeof(struct srp_login_rsp)); + rsp->opcode = SRP_LOGIN_RSP; + /* Don't advertise quite as many request as we support to + * keep room for management stuff etc... + */ + rsp->req_lim_delta = cpu_to_be32(VSCSI_REQ_LIMIT-2); + rsp->tag = tag; + rsp->max_it_iu_len = cpu_to_be32(sizeof(union srp_iu)); + rsp->max_ti_iu_len = cpu_to_be32(sizeof(union srp_iu)); + /* direct and indirect */ + rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT); + + vscsi_send_iu(s, req, sizeof(*rsp), VIOSRP_SRP_FORMAT); +} + +static void vscsi_inquiry_no_target(VSCSIState *s, vscsi_req *req) +{ + uint8_t *cdb = req->iu.srp.cmd.cdb; + uint8_t resp_data[36]; + int rc, len, alen; + + /* We dont do EVPD. Also check that page_code is 0 */ + if ((cdb[1] & 0x01) || (cdb[1] & 0x01) || cdb[2] != 0) { + /* Send INVALID FIELD IN CDB */ + vscsi_makeup_sense(s, req, ILLEGAL_REQUEST, 0x24, 0); + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); + return; + } + alen = cdb[3]; + alen = (alen << 8) | cdb[4]; + len = min(alen, 36); + + /* Fake up inquiry using PQ=3 */ + memset(resp_data, 0, 36); + resp_data[0] = 0x7f; /* Not capable of supporting a device here */ + resp_data[2] = 0x06; /* SPS-4 */ + resp_data[3] = 0x02; /* Resp data format */ + resp_data[4] = 36 - 5; /* Additional length */ + resp_data[7] = 0x10; /* Sync transfers */ + memcpy(&resp_data[16], "QEMU EMPTY ", 16); + memcpy(&resp_data[8], "QEMU ", 8); + + req->writing = 0; + vscsi_preprocess_desc(req); + rc = vscsi_srp_transfer_data(s, req, 0, resp_data, len); + if (rc < 0) { + vscsi_makeup_sense(s, req, HARDWARE_ERROR, 0, 0); + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); + } else { + vscsi_send_rsp(s, req, 0, 36 - rc, 0); + } +} + +static int vscsi_queue_cmd(VSCSIState *s, vscsi_req *req) +{ + union srp_iu *srp = &req->iu.srp; + SCSIDevice *sdev; + int n, id, lun; + + vscsi_decode_id_lun(be64_to_cpu(srp->cmd.lun), &id, &lun); + + /* Qemu vs. linux issue with LUNs to be sorted out ... */ + sdev = (id < 8 && lun < 16) ? s->bus.devs[id] : NULL; + if (!sdev) { + dprintf("VSCSI: Command for id %d with no drive\n", id); + if (srp->cmd.cdb[0] == INQUIRY) { + vscsi_inquiry_no_target(s, req); + } else { + vscsi_makeup_sense(s, req, ILLEGAL_REQUEST, 0x24, 0x00); + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); + } return 1; + } + + req->sdev = sdev; + req->lun = lun; + n = sdev->info->send_command(sdev, req->qtag, srp->cmd.cdb, lun); + + dprintf("VSCSI: Queued command tag 0x%x CMD 0x%x ID %d LUN %d ret: %d\n", + req->qtag, srp->cmd.cdb[0], id, lun, n); + + if (n) { + /* Transfer direction must be set before preprocessing the + * descriptors + */ + req->writing = (n < 1); + + /* Preprocess RDMA descriptors */ + vscsi_preprocess_desc(req); + } + + /* Get transfer direction and initiate transfer */ + if (n > 0) { + req->data_len = n; + sdev->info->read_data(sdev, req->qtag); + } else if (n < 0) { + req->data_len = -n; + sdev->info->write_data(sdev, req->qtag); + } + /* Don't touch req here, it may have been recycled already */ + + return 0; +} + +static int vscsi_process_tsk_mgmt(VSCSIState *s, vscsi_req *req) +{ + union viosrp_iu *iu = &req->iu; + int fn; + + fprintf(stderr, "vscsi_process_tsk_mgmt %02x\n", + iu->srp.tsk_mgmt.tsk_mgmt_func); + + switch (iu->srp.tsk_mgmt.tsk_mgmt_func) { +#if 0 /* We really don't deal with these for now */ + case SRP_TSK_ABORT_TASK: + fn = ABORT_TASK; + break; + case SRP_TSK_ABORT_TASK_SET: + fn = ABORT_TASK_SET; + break; + case SRP_TSK_CLEAR_TASK_SET: + fn = CLEAR_TASK_SET; + break; + case SRP_TSK_LUN_RESET: + fn = LOGICAL_UNIT_RESET; + break; + case SRP_TSK_CLEAR_ACA: + fn = CLEAR_ACA; + break; +#endif + default: + fn = 0; + } + if (fn) { + /* XXX Send/Handle target task management */ + ; + } else { + vscsi_makeup_sense(s, req, ILLEGAL_REQUEST, 0x20, 0); + vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0); + } + return !fn; +} + +static int vscsi_handle_srp_req(VSCSIState *s, vscsi_req *req) +{ + union srp_iu *srp = &req->iu.srp; + int done = 1; + uint8_t opcode = srp->rsp.opcode; + + switch (opcode) { + case SRP_LOGIN_REQ: + vscsi_process_login(s, req); + break; + case SRP_TSK_MGMT: + done = vscsi_process_tsk_mgmt(s, req); + break; + case SRP_CMD: + done = vscsi_queue_cmd(s, req); + break; + case SRP_LOGIN_RSP: + case SRP_I_LOGOUT: + case SRP_T_LOGOUT: + case SRP_RSP: + case SRP_CRED_REQ: + case SRP_CRED_RSP: + case SRP_AER_REQ: + case SRP_AER_RSP: + fprintf(stderr, "VSCSI: Unsupported opcode %02x\n", opcode); + break; + default: + fprintf(stderr, "VSCSI: Unknown type %02x\n", opcode); + } + + return done; +} + +static int vscsi_send_adapter_info(VSCSIState *s, vscsi_req *req) +{ + struct viosrp_adapter_info *sinfo; + struct mad_adapter_info_data info; + int rc; + + sinfo = &req->iu.mad.adapter_info; + +#if 0 /* What for ? */ + rc = spapr_tce_dma_read(&s->vdev, be64_to_cpu(sinfo->buffer), + &info, be16_to_cpu(sinfo->common.length)); + if (rc) { + fprintf(stderr, "vscsi_send_adapter_info: DMA read failure !\n"); + } +#endif + memset(&info, 0, sizeof(info)); + strcpy(info.srp_version, SRP_VERSION); + strncpy(info.partition_name, "qemu", sizeof("qemu")); + info.partition_number = cpu_to_be32(0); + info.mad_version = cpu_to_be32(1); + info.os_type = cpu_to_be32(2); + info.port_max_txu[0] = cpu_to_be32(VSCSI_MAX_SECTORS << 9); + + rc = spapr_tce_dma_write(&s->vdev, be64_to_cpu(sinfo->buffer), + &info, be16_to_cpu(sinfo->common.length)); + if (rc) { + fprintf(stderr, "vscsi_send_adapter_info: DMA write failure !\n"); + } + + sinfo->common.status = rc ? cpu_to_be32(1) : 0; + + return vscsi_send_iu(s, req, sizeof(*sinfo), VIOSRP_MAD_FORMAT); +} + +static int vscsi_handle_mad_req(VSCSIState *s, vscsi_req *req) +{ + union mad_iu *mad = &req->iu.mad; + + switch (be32_to_cpu(mad->empty_iu.common.type)) { + case VIOSRP_EMPTY_IU_TYPE: + fprintf(stderr, "Unsupported EMPTY MAD IU\n"); + break; + case VIOSRP_ERROR_LOG_TYPE: + fprintf(stderr, "Unsupported ERROR LOG MAD IU\n"); + mad->error_log.common.status = cpu_to_be16(1); + vscsi_send_iu(s, req, sizeof(mad->error_log), VIOSRP_MAD_FORMAT); + break; + case VIOSRP_ADAPTER_INFO_TYPE: + vscsi_send_adapter_info(s, req); + break; + case VIOSRP_HOST_CONFIG_TYPE: + mad->host_config.common.status = cpu_to_be16(1); + vscsi_send_iu(s, req, sizeof(mad->host_config), VIOSRP_MAD_FORMAT); + break; + default: + fprintf(stderr, "VSCSI: Unknown MAD type %02x\n", + be32_to_cpu(mad->empty_iu.common.type)); + } + + return 1; +} + +static void vscsi_got_payload(VSCSIState *s, vscsi_crq *crq) +{ + vscsi_req *req; + int done; + + req = vscsi_get_req(s); + if (req == NULL) { + fprintf(stderr, "VSCSI: Failed to get a request !\n"); + return; + } + + /* We only support a limited number of descriptors, we know + * the ibmvscsi driver uses up to 10 max, so it should fit + * in our 256 bytes IUs. If not we'll have to increase the size + * of the structure. + */ + if (crq->s.IU_length > sizeof(union viosrp_iu)) { + fprintf(stderr, "VSCSI: SRP IU too long (%d bytes) !\n", + crq->s.IU_length); + return; + } + + /* XXX Handle failure differently ? */ + if (spapr_tce_dma_read(&s->vdev, crq->s.IU_data_ptr, &req->iu, + crq->s.IU_length)) { + fprintf(stderr, "vscsi_got_payload: DMA read failure !\n"); + qemu_free(req); + } + memcpy(&req->crq, crq, sizeof(vscsi_crq)); + + if (crq->s.format == VIOSRP_MAD_FORMAT) { + done = vscsi_handle_mad_req(s, req); + } else { + done = vscsi_handle_srp_req(s, req); + } + + if (done) { + vscsi_put_req(s, req); + } +} + + +static int vscsi_do_crq(struct VIOsPAPRDevice *dev, uint8_t *crq_data) +{ + VSCSIState *s = DO_UPCAST(VSCSIState, vdev, dev); + vscsi_crq crq; + + memcpy(crq.raw, crq_data, 16); + crq.s.timeout = be16_to_cpu(crq.s.timeout); + crq.s.IU_length = be16_to_cpu(crq.s.IU_length); + crq.s.IU_data_ptr = be64_to_cpu(crq.s.IU_data_ptr); + + dprintf("VSCSI: do_crq %02x %02x ...\n", crq.raw[0], crq.raw[1]); + + switch(crq.s.valid) { + case 0xc0: /* Init command/response */ + + /* Respond to initialization request */ + if (crq.s.format == 0x01) { + memset(crq.raw, 0, 16); + crq.s.valid = 0xc0; + crq.s.format = 0x02; + spapr_vio_send_crq(dev, crq.raw); + } + + /* Note that in hotplug cases, we might get a 0x02 + * as a result of us emitting the init request + */ + + break; + case 0xff: /* Link event */ + + /* Not handled for now */ + + break; + case 0x80: /* Payloads */ + switch (crq.s.format) { + case VIOSRP_SRP_FORMAT: /* AKA VSCSI request */ + case VIOSRP_MAD_FORMAT: /* AKA VSCSI response */ + vscsi_got_payload(s, &crq); + break; + case VIOSRP_OS400_FORMAT: + case VIOSRP_AIX_FORMAT: + case VIOSRP_LINUX_FORMAT: + case VIOSRP_INLINE_FORMAT: + fprintf(stderr, "vscsi_do_srq: Unsupported payload format %02x\n", + crq.s.format); + break; + default: + fprintf(stderr, "vscsi_do_srq: Unknown payload format %02x\n", + crq.s.format); + } + break; + default: + fprintf(stderr, "vscsi_do_crq: unknown CRQ %02x %02x ...\n", + crq.raw[0], crq.raw[1]); + }; + + return 0; +} + +static int spapr_vscsi_init(VIOsPAPRDevice *dev) +{ + VSCSIState *s = DO_UPCAST(VSCSIState, vdev, dev); + int i; + + dbg_vscsi_state = s; + + /* Initialize qemu request tags */ + memset(s->reqs, 0, sizeof(s->reqs)); + for (i = 0; i < VSCSI_REQ_LIMIT; i++) + s->reqs[i].qtag = i; + + dev->crq.SendFunc = vscsi_do_crq; + + scsi_bus_new(&s->bus, &dev->qdev, 1, VSCSI_REQ_LIMIT, + vscsi_command_complete); + if (!dev->qdev.hotplugged) { + scsi_bus_legacy_handle_cmdline(&s->bus); + } + + return 0; +} + +void spapr_vscsi_create(VIOsPAPRBus *bus, uint32_t reg, + qemu_irq qirq, uint32_t vio_irq_num) +{ + DeviceState *dev; + VIOsPAPRDevice *sdev; + + dev = qdev_create(&bus->bus, "spapr-vscsi"); + qdev_prop_set_uint32(dev, "reg", reg); + + qdev_init_nofail(dev); + + sdev = (VIOsPAPRDevice *)dev; + sdev->qirq = qirq; + sdev->vio_irq_num = vio_irq_num; +} + +static int spapr_vscsi_devnode(VIOsPAPRDevice *dev, void *fdt, int node_off) +{ + int ret; + + ret = fdt_setprop_cell(fdt, node_off, "#address-cells", 2); + if (ret < 0) { + return ret; + } + + ret = fdt_setprop_cell(fdt, node_off, "#size-cells", 0); + if (ret < 0) { + return ret; + } + + return 0; +} + +static VIOsPAPRDeviceInfo spapr_vscsi = { + .init = spapr_vscsi_init, + .devnode = spapr_vscsi_devnode, + .dt_name = "v-scsi", + .dt_type = "vscsi", + .dt_compatible = "IBM,v-scsi", + .signal_mask = 0x00000001, + .qdev.name = "spapr-vscsi", + .qdev.size = sizeof(VSCSIState), + .qdev.props = (Property[]) { + DEFINE_PROP_UINT32("reg", VIOsPAPRDevice, reg, 0x2000), + DEFINE_PROP_UINT32("dma-window", VIOsPAPRDevice, + rtce_window_size, 0x10000000), + DEFINE_PROP_END_OF_LIST(), + }, +}; + +static void spapr_vscsi_register(void) +{ + spapr_vio_bus_register_withprop(&spapr_vscsi); +} +device_init(spapr_vscsi_register); diff --git a/hw/srp.h b/hw/srp.h new file mode 100644 index 0000000..9d55fc4 --- /dev/null +++ b/hw/srp.h @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef SCSI_SRP_H +#define SCSI_SRP_H + +/* + * Structures and constants for the SCSI RDMA Protocol (SRP) as + * defined by the INCITS T10 committee. This file was written using + * draft Revision 16a of the SRP standard. + */ + +enum { + + SRP_LOGIN_REQ = 0x00, + SRP_TSK_MGMT = 0x01, + SRP_CMD = 0x02, + SRP_I_LOGOUT = 0x03, + SRP_LOGIN_RSP = 0xc0, + SRP_RSP = 0xc1, + SRP_LOGIN_REJ = 0xc2, + SRP_T_LOGOUT = 0x80, + SRP_CRED_REQ = 0x81, + SRP_AER_REQ = 0x82, + SRP_CRED_RSP = 0x41, + SRP_AER_RSP = 0x42 +}; + +enum { + SRP_BUF_FORMAT_DIRECT = 1 << 1, + SRP_BUF_FORMAT_INDIRECT = 1 << 2 +}; + +enum { + SRP_NO_DATA_DESC = 0, + SRP_DATA_DESC_DIRECT = 1, + SRP_DATA_DESC_INDIRECT = 2 +}; + +enum { + SRP_TSK_ABORT_TASK = 0x01, + SRP_TSK_ABORT_TASK_SET = 0x02, + SRP_TSK_CLEAR_TASK_SET = 0x04, + SRP_TSK_LUN_RESET = 0x08, + SRP_TSK_CLEAR_ACA = 0x40 +}; + +enum srp_login_rej_reason { + SRP_LOGIN_REJ_UNABLE_ESTABLISH_CHANNEL = 0x00010000, + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES = 0x00010001, + SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE = 0x00010002, + SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL = 0x00010003, + SRP_LOGIN_REJ_UNSUPPORTED_DESCRIPTOR_FMT = 0x00010004, + SRP_LOGIN_REJ_MULTI_CHANNEL_UNSUPPORTED = 0x00010005, + SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED = 0x00010006 +}; + +enum { + SRP_REV10_IB_IO_CLASS = 0xff00, + SRP_REV16A_IB_IO_CLASS = 0x0100 +}; + +struct srp_direct_buf { + uint64_t va; + uint32_t key; + uint32_t len; +}; + +/* + * We need the packed attribute because the SRP spec puts the list of + * descriptors at an offset of 20, which is not aligned to the size of + * struct srp_direct_buf. The whole structure must be packed to avoid + * having the 20-byte structure padded to 24 bytes on 64-bit architectures. + */ +struct srp_indirect_buf { + struct srp_direct_buf table_desc; + uint32_t len; + struct srp_direct_buf desc_list[0]; +} __attribute__((packed)); + +enum { + SRP_MULTICHAN_SINGLE = 0, + SRP_MULTICHAN_MULTI = 1 +}; + +struct srp_login_req { + uint8_t opcode; + uint8_t reserved1[7]; + uint64_t tag; + uint32_t req_it_iu_len; + uint8_t reserved2[4]; + uint16_t req_buf_fmt; + uint8_t req_flags; + uint8_t reserved3[5]; + uint8_t initiator_port_id[16]; + uint8_t target_port_id[16]; +}; + +/* + * The SRP spec defines the size of the LOGIN_RSP structure to be 52 + * bytes, so it needs to be packed to avoid having it padded to 56 + * bytes on 64-bit architectures. + */ +struct srp_login_rsp { + uint8_t opcode; + uint8_t reserved1[3]; + uint32_t req_lim_delta; + uint64_t tag; + uint32_t max_it_iu_len; + uint32_t max_ti_iu_len; + uint16_t buf_fmt; + uint8_t rsp_flags; + uint8_t reserved2[25]; +} __attribute__((packed)); + +struct srp_login_rej { + uint8_t opcode; + uint8_t reserved1[3]; + uint32_t reason; + uint64_t tag; + uint8_t reserved2[8]; + uint16_t buf_fmt; + uint8_t reserved3[6]; +}; + +struct srp_i_logout { + uint8_t opcode; + uint8_t reserved[7]; + uint64_t tag; +}; + +struct srp_t_logout { + uint8_t opcode; + uint8_t sol_not; + uint8_t reserved[2]; + uint32_t reason; + uint64_t tag; +}; + +/* + * We need the packed attribute because the SRP spec only aligns the + * 8-byte LUN field to 4 bytes. + */ +struct srp_tsk_mgmt { + uint8_t opcode; + uint8_t sol_not; + uint8_t reserved1[6]; + uint64_t tag; + uint8_t reserved2[4]; + uint64_t lun __attribute__((packed)); + uint8_t reserved3[2]; + uint8_t tsk_mgmt_func; + uint8_t reserved4; + uint64_t task_tag; + uint8_t reserved5[8]; +}; + +/* + * We need the packed attribute because the SRP spec only aligns the + * 8-byte LUN field to 4 bytes. + */ +struct srp_cmd { + uint8_t opcode; + uint8_t sol_not; + uint8_t reserved1[3]; + uint8_t buf_fmt; + uint8_t data_out_desc_cnt; + uint8_t data_in_desc_cnt; + uint64_t tag; + uint8_t reserved2[4]; + uint64_t lun __attribute__((packed)); + uint8_t reserved3; + uint8_t task_attr; + uint8_t reserved4; + uint8_t add_cdb_len; + uint8_t cdb[16]; + uint8_t add_data[0]; +}; + +enum { + SRP_RSP_FLAG_RSPVALID = 1 << 0, + SRP_RSP_FLAG_SNSVALID = 1 << 1, + SRP_RSP_FLAG_DOOVER = 1 << 2, + SRP_RSP_FLAG_DOUNDER = 1 << 3, + SRP_RSP_FLAG_DIOVER = 1 << 4, + SRP_RSP_FLAG_DIUNDER = 1 << 5 +}; + +/* + * The SRP spec defines the size of the RSP structure to be 36 bytes, + * so it needs to be packed to avoid having it padded to 40 bytes on + * 64-bit architectures. + */ +struct srp_rsp { + uint8_t opcode; + uint8_t sol_not; + uint8_t reserved1[2]; + uint32_t req_lim_delta; + uint64_t tag; + uint8_t reserved2[2]; + uint8_t flags; + uint8_t status; + uint32_t data_out_res_cnt; + uint32_t data_in_res_cnt; + uint32_t sense_data_len; + uint32_t resp_data_len; + uint8_t data[0]; +} __attribute__((packed)); + +#endif /* SCSI_SRP_H */