Patchwork [24/26] Implement PAPR virtual SCSI interface (ibmvscsi)

login
register
mail settings
Submitter David Gibson
Date March 16, 2011, 4:57 a.m.
Message ID <1300251423-6715-25-git-send-email-david@gibson.dropbear.id.au>
Download mbox | patch
Permalink /patch/87173/
State New
Headers show

Comments

David Gibson - March 16, 2011, 4:57 a.m.
This patch implements the infrastructure and hypercalls necessary for
the PAPR specified Virtual SCSI interface.  This is the normal method
for providing (virtual) disks to PAPR partitions.

Signed-off-by: Ben Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <dwg@au1.ibm.com>
---
 Makefile.target  |    2 +-
 hw/ppc-viosrp.h  |  216 ++++++++++++
 hw/spapr.c       |   10 +-
 hw/spapr_vio.h   |    3 +
 hw/spapr_vscsi.c |  960 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/srp.h         |  241 ++++++++++++++
 6 files changed, 1430 insertions(+), 2 deletions(-)
 create mode 100644 hw/ppc-viosrp.h
 create mode 100644 hw/spapr_vscsi.c
 create mode 100644 hw/srp.h
Alexander Graf - March 16, 2011, 4:41 p.m.
On 03/16/2011 05:57 AM, David Gibson wrote:
> This patch implements the infrastructure and hypercalls necessary for
> the PAPR specified Virtual SCSI interface.  This is the normal method
> for providing (virtual) disks to PAPR partitions.
>
> Signed-off-by: Ben Herrenschmidt<benh@kernel.crashing.org>
> Signed-off-by: David Gibson<dwg@au1.ibm.com>
> ---
>   Makefile.target  |    2 +-
>   hw/ppc-viosrp.h  |  216 ++++++++++++
>   hw/spapr.c       |   10 +-
>   hw/spapr_vio.h   |    3 +
>   hw/spapr_vscsi.c |  960 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>   hw/srp.h         |  241 ++++++++++++++
>   6 files changed, 1430 insertions(+), 2 deletions(-)
>   create mode 100644 hw/ppc-viosrp.h
>   create mode 100644 hw/spapr_vscsi.c
>   create mode 100644 hw/srp.h
>
> diff --git a/Makefile.target b/Makefile.target
> index ef86d43..49f9e9a 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -233,7 +233,7 @@ obj-ppc-y += ppc_oldworld.o
>   obj-ppc-y += ppc_newworld.o
>   # IBM pSeries (sPAPR)
>   obj-ppc-y += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o
> -obj-ppc-y += xics.o spapr_vty.o spapr_llan.o
> +obj-ppc-y += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o
>   # PowerPC 4xx boards
>   obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
>   obj-ppc-y += ppc440.o ppc440_bamboo.o
> diff --git a/hw/ppc-viosrp.h b/hw/ppc-viosrp.h
> new file mode 100644
> index 0000000..9afcf7a
> --- /dev/null
> +++ b/hw/ppc-viosrp.h
> @@ -0,0 +1,216 @@
> +/*****************************************************************************/
> +/* srp.h -- SCSI RDMA Protocol definitions                                   */
> +/*                                                                           */
> +/* Written By: Colin Devilbis, IBM Corporation                               */
> +/*                                                                           */
> +/* Copyright (C) 2003 IBM Corporation                                        */
> +/*                                                                           */
> +/* This program is free software; you can redistribute it and/or modify      */
> +/* it under the terms of the GNU General Public License as published by      */
> +/* the Free Software Foundation; either version 2 of the License, or         */
> +/* (at your option) any later version.                                       */
> +/*                                                                           */
> +/* This program is distributed in the hope that it will be useful,           */
> +/* but WITHOUT ANY WARRANTY; without even the implied warranty of            */
> +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             */
> +/* GNU General Public License for more details.                              */
> +/*                                                                           */
> +/* You should have received a copy of the GNU General Public License         */
> +/* along with this program; if not, write to the Free Software               */
> +/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
> +/*                                                                           */
> +/*                                                                           */
> +/* This file contains structures and definitions for IBM RPA (RS/6000        */
> +/* platform architecture) implementation of the SRP (SCSI RDMA Protocol)     */
> +/* standard.  SRP is used on IBM iSeries and pSeries platforms to send SCSI  */
> +/* commands between logical partitions.                                      */
> +/*                                                                           */
> +/* SRP Information Units (IUs) are sent on a "Command/Response Queue" (CRQ)  */
> +/* between partitions.  The definitions in this file are architected,        */
> +/* and cannot be changed without breaking compatibility with other versions  */
> +/* of Linux and other operating systems (AIX, OS/400) that talk this protocol*/
> +/* between logical partitions                                                */
> +/*****************************************************************************/
> +#ifndef PPC_VIOSRP_H
> +#define PPC_VIOSRP_H
> +
> +#define SRP_VERSION "16.a"
> +#define SRP_MAX_IU_LEN    256
> +#define SRP_MAX_LOC_LEN 32
> +
> +union srp_iu {
> +    struct srp_login_req login_req;
> +    struct srp_login_rsp login_rsp;
> +    struct srp_login_rej login_rej;
> +    struct srp_i_logout i_logout;
> +    struct srp_t_logout t_logout;
> +    struct srp_tsk_mgmt tsk_mgmt;
> +    struct srp_cmd cmd;
> +    struct srp_rsp rsp;
> +    uint8_t reserved[SRP_MAX_IU_LEN];
> +};
> +
> +enum viosrp_crq_formats {
> +    VIOSRP_SRP_FORMAT = 0x01,
> +    VIOSRP_MAD_FORMAT = 0x02,
> +    VIOSRP_OS400_FORMAT = 0x03,
> +    VIOSRP_AIX_FORMAT = 0x04,
> +    VIOSRP_LINUX_FORMAT = 0x06,
> +    VIOSRP_INLINE_FORMAT = 0x07
> +};
> +
> +enum viosrp_crq_status {
> +    VIOSRP_OK = 0x0,
> +    VIOSRP_NONRECOVERABLE_ERR = 0x1,
> +    VIOSRP_VIOLATES_MAX_XFER = 0x2,
> +    VIOSRP_PARTNER_PANIC = 0x3,
> +    VIOSRP_DEVICE_BUSY = 0x8,
> +    VIOSRP_ADAPTER_FAIL = 0x10,
> +    VIOSRP_OK2 = 0x99,
> +};
> +
> +struct viosrp_crq {
> +    uint8_t valid;        /* used by RPA */
> +    uint8_t format;        /* SCSI vs out-of-band */
> +    uint8_t reserved;
> +    uint8_t status;        /* non-scsi failure? (e.g. DMA failure) */
> +    uint16_t timeout;        /* in seconds */
> +    uint16_t IU_length;        /* in bytes */
> +    uint64_t IU_data_ptr;    /* the TCE for transferring data */
> +};
> +
> +/* MADs are Management requests above and beyond the IUs defined in the SRP
> + * standard.
> + */
> +enum viosrp_mad_types {
> +    VIOSRP_EMPTY_IU_TYPE = 0x01,
> +    VIOSRP_ERROR_LOG_TYPE = 0x02,
> +    VIOSRP_ADAPTER_INFO_TYPE = 0x03,
> +    VIOSRP_HOST_CONFIG_TYPE = 0x04,
> +    VIOSRP_CAPABILITIES_TYPE = 0x05,
> +    VIOSRP_ENABLE_FAST_FAIL = 0x08,
> +};
> +
> +enum viosrp_mad_status {
> +    VIOSRP_MAD_SUCCESS = 0x00,
> +    VIOSRP_MAD_NOT_SUPPORTED = 0xF1,
> +    VIOSRP_MAD_FAILED = 0xF7,
> +};
> +
> +enum viosrp_capability_type {
> +    MIGRATION_CAPABILITIES = 0x01,
> +    RESERVATION_CAPABILITIES = 0x02,
> +};
> +
> +enum viosrp_capability_support {
> +    SERVER_DOES_NOT_SUPPORTS_CAP = 0x0,
> +    SERVER_SUPPORTS_CAP = 0x01,
> +    SERVER_CAP_DATA = 0x02,
> +};
> +
> +enum viosrp_reserve_type {
> +    CLIENT_RESERVE_SCSI_2 = 0x01,
> +};
> +
> +enum viosrp_capability_flag {
> +    CLIENT_MIGRATED = 0x01,
> +    CLIENT_RECONNECT = 0x02,
> +    CAP_LIST_SUPPORTED = 0x04,
> +    CAP_LIST_DATA = 0x08,
> +};
> +
> +/*
> + * Common MAD header
> + */
> +struct mad_common {
> +    uint32_t type;
> +    uint16_t status;
> +    uint16_t length;
> +    uint64_t tag;

Is this an in-memory representation? If so, it should be packed, right? 
Same goes for the ones below.

> +};
> +
> +/*
> + * All SRP (and MAD) requests normally flow from the
> + * client to the server.  There is no way for the server to send
> + * an asynchronous message back to the client.  The Empty IU is used
> + * to hang out a meaningless request to the server so that it can respond
> + * asynchrouously with something like a SCSI AER
> + */
> +struct viosrp_empty_iu {
> +    struct mad_common common;
> +    uint64_t buffer;
> +    uint32_t port;
> +};
> +
> +struct viosrp_error_log {
> +    struct mad_common common;
> +    uint64_t buffer;
> +};
> +
> +struct viosrp_adapter_info {
> +    struct mad_common common;
> +    uint64_t buffer;
> +};
> +
> +struct viosrp_host_config {
> +    struct mad_common common;
> +    uint64_t buffer;
> +};
> +
> +struct viosrp_fast_fail {
> +    struct mad_common common;
> +};
> +
> +struct viosrp_capabilities {
> +    struct mad_common common;
> +    uint64_t buffer;
> +};
> +
> +struct mad_capability_common {
> +    uint32_t cap_type;
> +    uint16_t length;
> +    uint16_t server_support;
> +};
> +
> +struct mad_reserve_cap {
> +    struct mad_capability_common common;
> +    uint32_t type;
> +};
> +
> +struct mad_migration_cap {
> +    struct mad_capability_common common;
> +    uint32_t ecl;
> +};
> +
> +struct capabilities{

Space, but unused struct

> +    uint32_t flags;
> +    char name[SRP_MAX_LOC_LEN];
> +    char loc[SRP_MAX_LOC_LEN];
> +    struct mad_migration_cap migration;
> +    struct mad_reserve_cap reserve;
> +};
> +
> +union mad_iu {
> +    struct viosrp_empty_iu empty_iu;
> +    struct viosrp_error_log error_log;
> +    struct viosrp_adapter_info adapter_info;
> +    struct viosrp_host_config host_config;
> +    struct viosrp_fast_fail fast_fail;
> +    struct viosrp_capabilities capabilities;
> +};
> +
> +union viosrp_iu {
> +    union srp_iu srp;
> +    union mad_iu mad;
> +};
> +
> +struct mad_adapter_info_data {
> +    char srp_version[8];
> +    char partition_name[96];
> +    uint32_t partition_number;
> +    uint32_t mad_version;
> +    uint32_t os_type;
> +    uint32_t port_max_txu[8];    /* per-port maximum transfer */
> +};
> +
> +#endif
> diff --git a/hw/spapr.c b/hw/spapr.c
> index cb97a16..5f868fc 100644
> --- a/hw/spapr.c
> +++ b/hw/spapr.c
> @@ -28,6 +28,7 @@
>   #include "hw.h"
>   #include "elf.h"
>   #include "net.h"
> +#include "blockdev.h"
>
>   #include "hw/boards.h"
>   #include "hw/ppc.h"
> @@ -316,7 +317,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
>       qemu_free(filename);
>
>       /* Set up Interrupt Controller */
> -    spapr->icp = xics_system_init(smp_cpus, envs, MAX_SERIAL_PORTS + nb_nics);
> +    spapr->icp = xics_system_init(smp_cpus, envs, MAX_SERIAL_PORTS + nb_nics + drive_get_max_bus(IF_SCSI) + 1);

This looks like it's exceeding 80 characters :)

>
>       /* Set up VIO bus */
>       spapr->vio_bus = spapr_vio_bus_init();
> @@ -346,6 +347,12 @@ static void ppc_spapr_init(ram_addr_t ram_size,
>           }
>       }
>
> +    for (i = 0; i<= drive_get_max_bus(IF_SCSI); i++) {
> +        spapr_vscsi_create(spapr->vio_bus, 0x2000 + i,
> +                           xics_find_qirq(spapr->icp, irq), irq);
> +        irq++;
> +    }
> +
>       if (kernel_filename) {
>           uint64_t lowaddr = 0;
>
> @@ -406,6 +413,7 @@ static QEMUMachine spapr_machine = {
>       .max_cpus = MAX_CPUS,
>       .no_vga = 1,
>       .no_parallel = 1,
> +    .use_scsi = 1,
>   };
>
>   static void spapr_machine_init(void)
> diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h
> index ba16795..b7d0daa 100644
> --- a/hw/spapr_vio.h
> +++ b/hw/spapr_vio.h
> @@ -101,4 +101,7 @@ void spapr_vty_create(VIOsPAPRBus *bus,
>   void spapr_vlan_create(VIOsPAPRBus *bus, uint32_t reg, NICInfo *nd,
>                          qemu_irq qirq, uint32_t vio_irq_num);
>
> +void spapr_vscsi_create(VIOsPAPRBus *bus, uint32_t reg,
> +                        qemu_irq qirq, uint32_t vio_irq_num);
> +
>   #endif /* _HW_SPAPR_VIO_H */
> diff --git a/hw/spapr_vscsi.c b/hw/spapr_vscsi.c
> new file mode 100644
> index 0000000..0a67095
> --- /dev/null
> +++ b/hw/spapr_vscsi.c
> @@ -0,0 +1,960 @@

License header

> +/* TODO:
> + *
> + *  - Cleanups :-)
> + *  - Sort out better how to assign devices to VSCSI instances
> + *  - Fix residual counts
> + *  - Add indirect descriptors support
> + *  - Maybe do autosense (PAPR seems to mandate it, linux doesn't care)
> + */
> +#include "hw.h"
> +#include "scsi.h"
> +#include "scsi-defs.h"
> +#include "net.h" /* Remove that when we can */
> +#include "srp.h"
> +#include "hw/qdev.h"
> +#include "hw/spapr.h"
> +#include "hw/spapr_vio.h"
> +#include "hw/ppc-viosrp.h"
> +
> +#include<libfdt.h>
> +
> +//#define DEBUG_VSCSI
> +
> +#ifdef DEBUG_VSCSI
> +#define dprintf(fmt, ...) \
> +    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
> +#else
> +#define dprintf(fmt, ...) \
> +    do { } while (0)
> +#endif
> +
> +#define min(a, b) ((a)<  (b) ? (a) : (b))

There's MIN for that

> +
> +/*
> + * Virtual SCSI device
> + */
> +
> +/* Random numbers */
> +#define VSCSI_MAX_SECTORS       4096/*1024*//*256*/

Probably good to just remove the commented out ones

> +#define VSCSI_REQ_LIMIT         24
> +
> +#define SCSI_SENSE_BUF_SIZE     96
> +#define SRP_RSP_SENSE_DATA_LEN  18
> +
> +typedef union vscsi_crq {
> +    struct viosrp_crq s;
> +    uint8_t raw[16];
> +} vscsi_crq;
> +
> +typedef struct vscsi_req
> +{
> +    vscsi_crq               crq;
> +    union viosrp_iu         iu;
> +
> +    /* SCSI request tracking */
> +    SCSIDevice              *sdev;
> +    uint32_t                qtag; /* qemu tag != srp tag */
> +    int                     lun;
> +    int                     active;
> +    long                    data_len;
> +    int                     writing;
> +    int                     sensing;
> +    int                     senselen;
> +    uint8_t                 sense[SCSI_SENSE_BUF_SIZE];
> +
> +    /* RDMA related bits */
> +    uint8_t                 dma_fmt;
> +    struct srp_direct_buf   ext_desc;
> +    struct srp_direct_buf   *cur_desc;
> +    struct srp_indirect_buf *ind_desc;
> +    int                     local_desc;
> +    int                     total_desc;
> +
> +} vscsi_req;
> +
> +
> +typedef struct {
> +    VIOsPAPRDevice vdev;
> +    SCSIBus bus;
> +    vscsi_req reqs[VSCSI_REQ_LIMIT];
> +} VSCSIState;
> +
> +/* XXX Debug only */
> +static VSCSIState *dbg_vscsi_state;
> +
> +
> +static struct vscsi_req *vscsi_get_req(VSCSIState *s)
> +{
> +    vscsi_req *req;
> +    int i;
> +
> +    for (i = 0; i<  VSCSI_REQ_LIMIT; i++) {
> +        req =&s->reqs[i];
> +        if (!req->active) {
> +            memset(req, 0, sizeof(*req));
> +            req->qtag = i;
> +            req->active = 1;
> +            return req;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +static void vscsi_put_req(VSCSIState *s, vscsi_req *req)
> +{
> +    req->active = 0;
> +}
> +
> +static vscsi_req *vscsi_find_req(VSCSIState *s, uint32_t tag)
> +{
> +    if (tag>= VSCSI_REQ_LIMIT || !s->reqs[tag].active) {
> +        return NULL;
> +    }
> +    return&s->reqs[tag];
> +}
> +
> +static void vscsi_decode_id_lun(uint64_t srp_lun, int *id, int *lun)
> +{
> +    /* XXX Figure that one out properly ! This is crackpot */
> +    *id = (srp_lun>>  56)&  0x7f;
> +    *lun = (srp_lun>>  48)&  0xff;
> +}
> +
> +static int vscsi_send_iu(VSCSIState *s, vscsi_req *req,
> +                         uint64_t length, uint8_t format)
> +{
> +    long rc, rc1;
> +
> +    /* First copy the SRP */
> +    rc = spapr_tce_dma_write(&s->vdev, req->crq.s.IU_data_ptr,
> +&req->iu, length);
> +    if (rc) {
> +        fprintf(stderr, "vscsi_send_iu: DMA write failure !\n");
> +    }
> +
> +    req->crq.s.valid = 0x80;
> +    req->crq.s.format = format;
> +    req->crq.s.reserved = 0x00;
> +    req->crq.s.timeout = cpu_to_be16(0x0000);
> +    req->crq.s.IU_length = cpu_to_be16(length);
> +    req->crq.s.IU_data_ptr = req->iu.srp.rsp.tag; /* right byte order */
> +
> +    if (rc == 0) {
> +        req->crq.s.status = 0x99; /* Just needs to be non-zero */
> +    } else {
> +        req->crq.s.status = 0x00;
> +    }
> +
> +    rc1 = spapr_vio_send_crq(&s->vdev, req->crq.raw);
> +    if (rc1) {
> +        fprintf(stderr, "vscsi_send_iu: Error sending response\n");
> +        return rc1;
> +    }
> +
> +    return rc;
> +}
> +
> +static void vscsi_makeup_sense(VSCSIState *s, vscsi_req *req,
> +                               uint8_t key, uint8_t asc, uint8_t ascq)
> +{
> +    req->senselen = SRP_RSP_SENSE_DATA_LEN;
> +
> +    /* Valid bit and 'current errors' */
> +    req->sense[0] = (0x1<<  7 | 0x70);
> +    /* Sense key */
> +    req->sense[2] = key;
> +    /* Additional sense length */
> +    req->sense[7] = 0xa; /* 10 bytes */
> +    /* Additional sense code */
> +    req->sense[12] = asc;
> +    req->sense[13] = ascq;
> +}
> +
> +static int vscsi_send_rsp(VSCSIState *s, vscsi_req *req,
> +                          uint8_t status, int32_t res_in, int32_t res_out)
> +{
> +   union viosrp_iu *iu =&req->iu;
> +   uint64_t tag = iu->srp.rsp.tag;
> +   int total_len = sizeof(iu->srp.rsp);
> +
> +   dprintf("VSCSI: Sending resp status: 0x%x, "
> +           "res_in: %d, res_out: %d \n", status, res_in, res_out);
> +
> +   memset(iu, 0, sizeof(struct srp_rsp));
> +   iu->srp.rsp.opcode = SRP_RSP;
> +   iu->srp.rsp.req_lim_delta = cpu_to_be32(1);
> +   iu->srp.rsp.tag = tag;
> +
> +   /* Handle residuals */
> +   if (res_in<  0) {
> +       iu->srp.rsp.flags |= SRP_RSP_FLAG_DIUNDER;
> +       res_in = -res_in;
> +   } else if (res_in) {
> +       iu->srp.rsp.flags |= SRP_RSP_FLAG_DIOVER;
> +   }
> +   if (res_out<  0) {
> +       iu->srp.rsp.flags |= SRP_RSP_FLAG_DOUNDER;
> +       res_out = -res_out;
> +   } else if (res_out) {
> +       iu->srp.rsp.flags |= SRP_RSP_FLAG_DOOVER;
> +   }
> +   iu->srp.rsp.data_in_res_cnt = cpu_to_be32(res_in);
> +   iu->srp.rsp.data_out_res_cnt = cpu_to_be32(res_out);
> +
> +   /* We don't do response data */
> +   /* iu->srp.rsp.flags&= ~SRP_RSP_FLAG_RSPVALID; */
> +   iu->srp.rsp.resp_data_len = cpu_to_be32(0);
> +
> +   /* Handle success vs. failure */
> +   iu->srp.rsp.status = status;
> +   if (status) {
> +       iu->srp.rsp.sol_not = (iu->srp.cmd.sol_not&  0x04)>>  2;
> +       if (req->senselen) {
> +           req->iu.srp.rsp.flags |= SRP_RSP_FLAG_SNSVALID;
> +           req->iu.srp.rsp.sense_data_len = cpu_to_be32(req->senselen);
> +           memcpy(req->iu.srp.rsp.data, req->sense, req->senselen);
> +           total_len += req->senselen;
> +       }
> +   } else {
> +       iu->srp.rsp.sol_not = (iu->srp.cmd.sol_not&  0x02)>>  1;
> +   }
> +
> +   vscsi_send_iu(s, req, total_len, VIOSRP_SRP_FORMAT);
> +   return 0;
> +}
> +
> +static inline void vscsi_swap_desc(struct srp_direct_buf *desc)
> +{
> +    desc->va = be64_to_cpu(desc->va);
> +    desc->len = be32_to_cpu(desc->len);
> +}
> +
> +static int vscsi_srp_direct_data(VSCSIState *s, vscsi_req *req,
> +                                 uint8_t *buf, uint32_t len)
> +{
> +    struct srp_direct_buf *md = req->cur_desc;
> +    uint32_t llen;
> +    int rc;
> +
> +    dprintf("VSCSI: direct segment 0x%x bytes, va=0x%llx desc len=0x%x\n",
> +            len, (unsigned long long)md->va, md->len);
> +
> +    llen = min(len, md->len);
> +    if (llen) {
> +        if (req->writing) { /* writing = to device = reading from memory */
> +            rc = spapr_tce_dma_read(&s->vdev, md->va, buf, llen);
> +        } else {
> +            rc = spapr_tce_dma_write(&s->vdev, md->va, buf, llen);
> +        }
> +    }
> +    md->len -= llen;
> +    md->va += llen;
> +
> +    if (rc) {
> +        return -1;
> +    }
> +    return llen;
> +}
> +
> +static int vscsi_srp_indirect_data(VSCSIState *s, vscsi_req *req,
> +                                   uint8_t *buf, uint32_t len)
> +{
> +    struct srp_direct_buf *td =&req->ind_desc->table_desc;
> +    struct srp_direct_buf *md = req->cur_desc;
> +    int rc = 0;
> +    uint32_t llen, total = 0;
> +
> +    dprintf("VSCSI: indirect segment 0x%x bytes, td va=0x%llx len=0x%x\n",
> +            len, (unsigned long long)td->va, td->len);
> +
> +    /* While we have data ... */
> +    while(len) {
> +        /* If we have a descriptor but it's empty, go fetch a new one */
> +        if (md&&  md->len == 0) {
> +            /* More local available, use one */
> +            if (req->local_desc) {
> +                md = ++req->cur_desc;
> +                --req->local_desc;
> +                --req->total_desc;
> +                td->va += sizeof(struct srp_direct_buf);
> +            } else {
> +                md = req->cur_desc = NULL;
> +            }
> +        }
> +        /* No descriptor at hand, fetch one */
> +        if (!md) {
> +            if (!req->total_desc) {
> +                dprintf("VSCSI:   Out of descriptors !\n");
> +                break;
> +            }
> +            md = req->cur_desc =&req->ext_desc;
> +            dprintf("VSCSI:   Reading desc from 0x%llx\n", (unsigned long long)td->va);
> +            rc = spapr_tce_dma_read(&s->vdev, td->va, md, sizeof(struct srp_direct_buf));
> +            if (rc) {
> +                dprintf("VSCSI: tce_dma_read ->  %d reading ext_desc\n", rc);
> +                break;
> +            }
> +            vscsi_swap_desc(md);
> +            td->va += sizeof(struct srp_direct_buf);
> +            --req->total_desc;
> +        }
> +        dprintf("VSCSI:   [desc va=0x%llx,len=0x%x] remaining=0x%x\n",
> +                (unsigned long long)md->va, md->len, len);
> +
> +        /* Perform transfer */
> +        llen = min(len, md->len);
> +        if (req->writing) { /* writing = to device = reading from memory */
> +            rc = spapr_tce_dma_read(&s->vdev, md->va, buf, llen);
> +

spurious line

> +        } else {
> +            rc = spapr_tce_dma_write(&s->vdev, md->va, buf, llen);
> +        }
> +        if (rc) {
> +            dprintf("VSCSI: tce_dma_r/w(%d) ->  %d\n", req->writing, rc);
> +            break;
> +        }
> +        dprintf("VSCSI:     data: %02x %02x %02x %02x...\n",
> +                buf[0], buf[1], buf[2], buf[3]);
> +
> +        len -= llen;
> +        buf += llen;
> +        total += llen;
> +        md->va += llen;
> +        md->len -= llen;
> +    }
> +    return rc ? -1 : total;
> +}
> +
> +static int vscsi_srp_transfer_data(VSCSIState *s, vscsi_req *req,
> +                                   int writing, uint8_t *buf, uint32_t len)
> +{
> +    int err = 0;
> +
> +    switch (req->dma_fmt) {
> +    case SRP_NO_DATA_DESC:
> +        dprintf("VSCSI: no data desc transfer, skipping 0x%x bytes\n", len);
> +        break;
> +    case SRP_DATA_DESC_DIRECT:
> +        err = vscsi_srp_direct_data(s, req, buf, len);
> +        break;
> +    case SRP_DATA_DESC_INDIRECT:
> +        err = vscsi_srp_indirect_data(s, req, buf, len);
> +        break;
> +    }
> +    return err;
> +}
> +
> +/* Bits from linux srp */
> +static int data_out_desc_size(struct srp_cmd *cmd)
> +{
> +    int size = 0;
> +    uint8_t fmt = cmd->buf_fmt>>  4;
> +
> +    switch (fmt) {
> +    case SRP_NO_DATA_DESC:
> +        break;
> +    case SRP_DATA_DESC_DIRECT:
> +        size = sizeof(struct srp_direct_buf);
> +        break;
> +    case SRP_DATA_DESC_INDIRECT:
> +        size = sizeof(struct srp_indirect_buf) +
> +            sizeof(struct srp_direct_buf) * cmd->data_out_desc_cnt;
> +        break;
> +    default:
> +        break;
> +    }
> +    return size;
> +}
> +
> +static int vscsi_preprocess_desc(vscsi_req *req)
> +{
> +    struct srp_cmd *cmd =&req->iu.srp.cmd;
> +    int offset, i;
> +
> +    offset = cmd->add_cdb_len&  ~3;
> +
> +    if (req->writing) {
> +        req->dma_fmt = cmd->buf_fmt>>  4;
> +    } else {
> +        offset += data_out_desc_size(cmd);
> +        req->dma_fmt = cmd->buf_fmt&  ((1U<<  4) - 1);
> +    }
> +
> +    switch (req->dma_fmt) {
> +    case SRP_NO_DATA_DESC:
> +        break;
> +    case SRP_DATA_DESC_DIRECT:
> +        req->cur_desc = (struct srp_direct_buf *)(cmd->add_data + offset);
> +        req->total_desc = req->local_desc = 1;
> +        vscsi_swap_desc(req->cur_desc);
> +        dprintf("VSCSI: using direct RDMA %s, 0x%x bytes MD: 0x%llx\n",
> +                req->writing ? "write" : "read",
> +                req->cur_desc->len, (unsigned long long)req->cur_desc->va);
> +        break;
> +    case SRP_DATA_DESC_INDIRECT:
> +        req->ind_desc = (struct srp_indirect_buf *)(cmd->add_data + offset);
> +        vscsi_swap_desc(&req->ind_desc->table_desc);
> +        req->total_desc = req->ind_desc->table_desc.len / sizeof(struct srp_direct_buf);
> +        req->local_desc = req->writing ? cmd->data_out_desc_cnt :
> +            cmd->data_in_desc_cnt;
> +        for (i = 0; i<  req->local_desc; i++)

Braces

> +            vscsi_swap_desc(&req->ind_desc->desc_list[i]);
> +        req->cur_desc = req->local_desc ?&req->ind_desc->desc_list[0] : NULL;
> +        dprintf("VSCSI: using indirect RDMA %s, 0x%x bytes %d descs (%d local) VA: 0x%llx\n",
> +                req->writing ? "read" : "write", be32_to_cpu(req->ind_desc->len),
> +                req->total_desc, req->local_desc,
> +                (unsigned long long)req->ind_desc->table_desc.va);
> +        break;
> +    default:
> +        fprintf(stderr,
> +                "vscsi_preprocess_desc: Unknown format %x\n", req->dma_fmt);
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static void vscsi_send_request_sense(VSCSIState *s, vscsi_req *req)
> +{
> +    SCSIDevice *sdev = req->sdev;
> +    uint8_t *cdb = req->iu.srp.cmd.cdb;
> +    int n;
> +
> +    cdb[0] = 3;
> +    cdb[1] = 0;
> +    cdb[2] = 0;
> +    cdb[3] = 0;
> +    cdb[4] = 96;
> +    cdb[5] = 0;
> +    req->sensing = 1;
> +    n = sdev->info->send_command(sdev, req->qtag, cdb, req->lun);
> +    dprintf("VSCSI: Queued request sense tag 0x%x \n", req->qtag);
> +    if (n<  0) {
> +        fprintf(stderr, "VSCSI: REQUEST_SENSE wants write data !?!?!?\n");
> +        sdev->info->cancel_io(sdev, req->qtag);
> +        vscsi_makeup_sense(s, req, HARDWARE_ERROR, 0, 0);
> +        vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0);
> +        vscsi_put_req(s, req);
> +        return;
> +    } else if (n == 0) {
> +        return;
> +    }
> +    sdev->info->read_data(sdev, req->qtag);
> +}
> +
> +/* Callback to indicate that the SCSI layer has completed a transfer.  */
> +static void vscsi_command_complete(SCSIBus *bus, int reason, uint32_t tag,
> +                                   uint32_t arg)
> +{
> +    VSCSIState *s = DO_UPCAST(VSCSIState, vdev.qdev, bus->qbus.parent);
> +    vscsi_req *req = vscsi_find_req(s, tag);
> +    SCSIDevice *sdev;
> +    uint8_t *buf;
> +    int32_t res_in = 0, res_out = 0;
> +    int len, rc = 0;
> +
> +    dprintf("VSCSI: SCSI cmd complete, r=0x%x tag=0x%x arg=0x%x, req=%p\n",
> +            reason, tag, arg, req);
> +    if (req == NULL) {
> +        fprintf(stderr, "VSCSI: Can't find request for tag 0x%x\n", tag);
> +        return;
> +    }
> +    sdev = req->sdev;
> +
> +    if (req->sensing) {
> +        if (reason == SCSI_REASON_DONE) {
> +            dprintf("VSCSI: Sense done !\n");
> +            vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0);
> +            vscsi_put_req(s, req);
> +        } else {
> +            uint8_t *buf = sdev->info->get_buf(sdev, tag);
> +
> +            len = min(arg, SCSI_SENSE_BUF_SIZE);
> +            dprintf("VSCSI: Sense data, %d bytes:\n", len);
> +            dprintf("       %02x  %02x  %02x  %02x  %02x  %02x  %02x  %02x\n",
> +                    buf[0], buf[1], buf[2], buf[3],
> +                    buf[4], buf[5], buf[6], buf[7]);
> +            dprintf("       %02x  %02x  %02x  %02x  %02x  %02x  %02x  %02x\n",
> +                    buf[8], buf[9], buf[10], buf[11],
> +                    buf[12], buf[13], buf[14], buf[15]);
> +            memcpy(req->sense, buf, len);
> +            req->senselen = len;
> +            sdev->info->read_data(sdev, req->qtag);
> +        }
> +        return;
> +    }
> +
> +    if (reason == SCSI_REASON_DONE) {
> +        dprintf("VSCSI: Command complete err=%d\n", arg);
> +        if (arg == 0) {
> +            /* We handle overflows, not underflows for normal commands,
> +             * but hopefully nobody cares
> +             */
> +            if (req->writing)

Braces

> +                res_out = req->data_len;
> +            else
> +                res_in = req->data_len;
> +            vscsi_send_rsp(s, req, 0, res_in, res_out);
> +        } else if (arg == CHECK_CONDITION) {
> +            dprintf("VSCSI: Got CHECK_CONDITION, requesting sense...\n");
> +            vscsi_send_request_sense(s, req);
> +            return;
> +        } else {
> +            vscsi_send_rsp(s, req, arg, 0, 0);
> +        }
> +        vscsi_put_req(s, req);
> +        return;
> +    }
> +
> +    /* "arg" is how much we have read for reads and how much we want
> +     * to write for writes (ie, how much is to be DMA'd)
> +     */
> +    if (arg) {
> +        buf = sdev->info->get_buf(sdev, tag);
> +        rc = vscsi_srp_transfer_data(s, req, req->writing, buf, arg);
> +    }
> +    if (rc<  0) {
> +        fprintf(stderr, "VSCSI: RDMA error rc=%d!\n", rc);
> +        sdev->info->cancel_io(sdev, req->qtag);
> +        vscsi_makeup_sense(s, req, HARDWARE_ERROR, 0, 0);
> +        vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0);
> +        vscsi_put_req(s, req);
> +        return;
> +    }
> +
> +    /* Start next chunk */
> +    req->data_len -= rc;
> +    if (req->writing) {
> +        sdev->info->write_data(sdev, req->qtag);
> +    } else {
> +        sdev->info->read_data(sdev, req->qtag);
> +    }
> +}
> +
> +static void vscsi_process_login(VSCSIState *s, vscsi_req *req)
> +{
> +    union viosrp_iu *iu =&req->iu;
> +    struct srp_login_rsp *rsp =&iu->srp.login_rsp;
> +    uint64_t tag = iu->srp.rsp.tag;
> +
> +    dprintf("VSCSI: Got login, sendin response !\n");
> +
> +    /* TODO handle case that requested size is wrong and
> +     * buffer format is wrong
> +     */
> +    memset(iu, 0, sizeof(struct srp_login_rsp));
> +    rsp->opcode = SRP_LOGIN_RSP;
> +    /* Don't advertise quite as many request as we support to
> +     * keep room for management stuff etc...
> +     */
> +    rsp->req_lim_delta = cpu_to_be32(VSCSI_REQ_LIMIT-2);
> +    rsp->tag = tag;
> +    rsp->max_it_iu_len = cpu_to_be32(sizeof(union srp_iu));
> +    rsp->max_ti_iu_len = cpu_to_be32(sizeof(union srp_iu));
> +    /* direct and indirect */
> +    rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
> +
> +    vscsi_send_iu(s, req, sizeof(*rsp), VIOSRP_SRP_FORMAT);
> +}
> +
> +static void vscsi_inquiry_no_target(VSCSIState *s, vscsi_req *req)
> +{
> +    uint8_t *cdb = req->iu.srp.cmd.cdb;
> +    uint8_t resp_data[36];
> +    int rc, len, alen;
> +
> +    /* We dont do EVPD. Also check that page_code is 0 */
> +    if ((cdb[1]&  0x01) || (cdb[1]&  0x01) || cdb[2] != 0) {
> +        /* Send INVALID FIELD IN CDB */
> +        vscsi_makeup_sense(s, req, ILLEGAL_REQUEST, 0x24, 0);
> +        vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0);
> +        return;
> +    }
> +    alen = cdb[3];
> +    alen = (alen<<  8) | cdb[4];
> +    len = min(alen, 36);
> +
> +    /* Fake up inquiry using PQ=3 */
> +    memset(resp_data, 0, 36);
> +    resp_data[0] = 0x7f;   /* Not capable of supporting a device here */
> +    resp_data[2] = 0x06;   /* SPS-4 */
> +    resp_data[3] = 0x02;   /* Resp data format */
> +    resp_data[4] = 36 - 5; /* Additional length */
> +    resp_data[7] = 0x10;   /* Sync transfers */
> +    memcpy(&resp_data[16], "QEMU EMPTY      ", 16);
> +    memcpy(&resp_data[8], "QEMU    ", 8);
> +
> +    req->writing = 0;
> +    vscsi_preprocess_desc(req);
> +    rc = vscsi_srp_transfer_data(s, req, 0, resp_data, len);
> +    if (rc<  0) {
> +        vscsi_makeup_sense(s, req, HARDWARE_ERROR, 0, 0);
> +        vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0);
> +    } else {
> +        vscsi_send_rsp(s, req, 0, 36 - rc, 0);
> +    }
> +}
> +
> +static int vscsi_queue_cmd(VSCSIState *s, vscsi_req *req)
> +{
> +    union srp_iu *srp =&req->iu.srp;
> +    SCSIDevice *sdev;
> +    int n, id, lun;
> +
> +    vscsi_decode_id_lun(be64_to_cpu(srp->cmd.lun),&id,&lun);
> +
> +    /* Qemu vs. linux issue with LUNs to be sorted out ... */
> +    sdev = (id<  8&&  lun<  16) ? s->bus.devs[id] : NULL;
> +    if (!sdev) {
> +        dprintf("VSCSI: Command for id %d with no drive\n", id);
> +        if (srp->cmd.cdb[0] == INQUIRY) {
> +            vscsi_inquiry_no_target(s, req);
> +        } else {
> +            vscsi_makeup_sense(s, req, ILLEGAL_REQUEST, 0x24, 0x00);
> +            vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0);
> +        } return 1;
> +    }
> +
> +    req->sdev = sdev;
> +    req->lun = lun;
> +    n = sdev->info->send_command(sdev, req->qtag, srp->cmd.cdb, lun);
> +
> +    dprintf("VSCSI: Queued command tag 0x%x CMD 0x%x ID %d LUN %d ret: %d\n",
> +            req->qtag, srp->cmd.cdb[0], id, lun, n);
> +
> +    if (n) {
> +        /* Transfer direction must be set before preprocessing the
> +         * descriptors
> +         */
> +        req->writing = (n<  1);
> +
> +        /* Preprocess RDMA descriptors */
> +        vscsi_preprocess_desc(req);
> +    }
> +
> +    /* Get transfer direction and initiate transfer */
> +    if (n>  0) {
> +        req->data_len = n;
> +        sdev->info->read_data(sdev, req->qtag);
> +    } else if (n<  0) {
> +        req->data_len = -n;
> +        sdev->info->write_data(sdev, req->qtag);
> +    }
> +    /* Don't touch req here, it may have been recycled already */
> +
> +    return 0;
> +}
> +
> +static int vscsi_process_tsk_mgmt(VSCSIState *s, vscsi_req *req)
> +{
> +    union viosrp_iu *iu =&req->iu;
> +    int fn;
> +
> +    fprintf(stderr, "vscsi_process_tsk_mgmt %02x\n",
> +            iu->srp.tsk_mgmt.tsk_mgmt_func);
> +
> +    switch (iu->srp.tsk_mgmt.tsk_mgmt_func) {
> +#if 0 /* We really don't deal with these for now */
> +    case SRP_TSK_ABORT_TASK:
> +        fn = ABORT_TASK;
> +        break;
> +    case SRP_TSK_ABORT_TASK_SET:
> +        fn = ABORT_TASK_SET;
> +        break;
> +    case SRP_TSK_CLEAR_TASK_SET:
> +        fn = CLEAR_TASK_SET;
> +        break;
> +    case SRP_TSK_LUN_RESET:
> +        fn = LOGICAL_UNIT_RESET;
> +        break;
> +    case SRP_TSK_CLEAR_ACA:
> +        fn = CLEAR_ACA;
> +        break;
> +#endif
> +    default:
> +        fn = 0;
> +    }
> +    if (fn) {
> +        /* XXX Send/Handle target task management */
> +        ;
> +    } else {
> +        vscsi_makeup_sense(s, req, ILLEGAL_REQUEST, 0x20, 0);
> +        vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0);
> +    }
> +    return !fn;
> +}
> +
> +static int vscsi_handle_srp_req(VSCSIState *s, vscsi_req *req)
> +{
> +    union srp_iu *srp =&req->iu.srp;
> +    int done = 1;
> +    uint8_t opcode = srp->rsp.opcode;
> +
> +    switch (opcode) {
> +    case SRP_LOGIN_REQ:
> +        vscsi_process_login(s, req);
> +        break;
> +    case SRP_TSK_MGMT:
> +        done = vscsi_process_tsk_mgmt(s, req);
> +        break;
> +    case SRP_CMD:
> +        done = vscsi_queue_cmd(s, req);
> +        break;
> +    case SRP_LOGIN_RSP:
> +    case SRP_I_LOGOUT:
> +    case SRP_T_LOGOUT:
> +    case SRP_RSP:
> +    case SRP_CRED_REQ:
> +    case SRP_CRED_RSP:
> +    case SRP_AER_REQ:
> +    case SRP_AER_RSP:
> +        fprintf(stderr, "VSCSI: Unsupported opcode %02x\n", opcode);
> +        break;
> +    default:
> +        fprintf(stderr, "VSCSI: Unknown type %02x\n", opcode);
> +    }
> +
> +    return done;
> +}
> +
> +static int vscsi_send_adapter_info(VSCSIState *s, vscsi_req *req)
> +{
> +    struct viosrp_adapter_info *sinfo;
> +    struct mad_adapter_info_data info;
> +    int rc;
> +
> +    sinfo =&req->iu.mad.adapter_info;
> +
> +#if 0 /* What for ? */
> +    rc = spapr_tce_dma_read(&s->vdev, be64_to_cpu(sinfo->buffer),
> +&info, be16_to_cpu(sinfo->common.length));
> +    if (rc) {
> +        fprintf(stderr, "vscsi_send_adapter_info: DMA read failure !\n");
> +    }
> +#endif
> +    memset(&info, 0, sizeof(info));
> +    strcpy(info.srp_version, SRP_VERSION);
> +    strncpy(info.partition_name, "qemu", sizeof("qemu"));
> +    info.partition_number = cpu_to_be32(0);
> +    info.mad_version = cpu_to_be32(1);
> +    info.os_type = cpu_to_be32(2);
> +    info.port_max_txu[0] = cpu_to_be32(VSCSI_MAX_SECTORS<<  9);
> +
> +    rc = spapr_tce_dma_write(&s->vdev, be64_to_cpu(sinfo->buffer),
> +&info, be16_to_cpu(sinfo->common.length));
> +    if (rc)  {
> +        fprintf(stderr, "vscsi_send_adapter_info: DMA write failure !\n");
> +    }
> +
> +    sinfo->common.status = rc ? cpu_to_be32(1) : 0;
> +
> +    return vscsi_send_iu(s, req, sizeof(*sinfo), VIOSRP_MAD_FORMAT);
> +}
> +
> +static int vscsi_handle_mad_req(VSCSIState *s, vscsi_req *req)
> +{
> +    union mad_iu *mad =&req->iu.mad;
> +
> +    switch (be32_to_cpu(mad->empty_iu.common.type)) {
> +    case VIOSRP_EMPTY_IU_TYPE:
> +        fprintf(stderr, "Unsupported EMPTY MAD IU\n");
> +        break;
> +    case VIOSRP_ERROR_LOG_TYPE:
> +        fprintf(stderr, "Unsupported ERROR LOG MAD IU\n");
> +        mad->error_log.common.status = cpu_to_be16(1);
> +        vscsi_send_iu(s, req, sizeof(mad->error_log), VIOSRP_MAD_FORMAT);
> +        break;
> +    case VIOSRP_ADAPTER_INFO_TYPE:
> +        vscsi_send_adapter_info(s, req);
> +        break;
> +    case VIOSRP_HOST_CONFIG_TYPE:
> +        mad->host_config.common.status = cpu_to_be16(1);
> +        vscsi_send_iu(s, req, sizeof(mad->host_config), VIOSRP_MAD_FORMAT);
> +        break;
> +    default:
> +        fprintf(stderr, "VSCSI: Unknown MAD type %02x\n",
> +                be32_to_cpu(mad->empty_iu.common.type));
> +    }
> +
> +    return 1;
> +}
> +
> +static void vscsi_got_payload(VSCSIState *s, vscsi_crq *crq)
> +{
> +    vscsi_req *req;
> +    int done;
> +
> +    req = vscsi_get_req(s);
> +    if (req == NULL) {
> +        fprintf(stderr, "VSCSI: Failed to get a request !\n");
> +        return;
> +    }
> +
> +    /* We only support a limited number of descriptors, we know
> +     * the ibmvscsi driver uses up to 10 max, so it should fit
> +     * in our 256 bytes IUs. If not we'll have to increase the size
> +     * of the structure.
> +     */
> +    if (crq->s.IU_length>  sizeof(union viosrp_iu)) {
> +        fprintf(stderr, "VSCSI: SRP IU too long (%d bytes) !\n",
> +                crq->s.IU_length);
> +        return;
> +    }
> +
> +    /* XXX Handle failure differently ? */
> +    if (spapr_tce_dma_read(&s->vdev, crq->s.IU_data_ptr,&req->iu,
> +                           crq->s.IU_length)) {
> +        fprintf(stderr, "vscsi_got_payload: DMA read failure !\n");
> +        qemu_free(req);
> +    }
> +    memcpy(&req->crq, crq, sizeof(vscsi_crq));
> +
> +    if (crq->s.format == VIOSRP_MAD_FORMAT) {
> +        done = vscsi_handle_mad_req(s, req);
> +    } else {
> +        done = vscsi_handle_srp_req(s, req);
> +    }
> +
> +    if (done) {
> +        vscsi_put_req(s, req);
> +    }
> +}
> +
> +
> +static int vscsi_do_crq(struct VIOsPAPRDevice *dev, uint8_t *crq_data)
> +{
> +    VSCSIState *s = DO_UPCAST(VSCSIState, vdev, dev);
> +    vscsi_crq crq;
> +
> +    memcpy(crq.raw, crq_data, 16);
> +    crq.s.timeout = be16_to_cpu(crq.s.timeout);
> +    crq.s.IU_length = be16_to_cpu(crq.s.IU_length);
> +    crq.s.IU_data_ptr = be64_to_cpu(crq.s.IU_data_ptr);
> +
> +    dprintf("VSCSI: do_crq %02x %02x ...\n", crq.raw[0], crq.raw[1]);
> +
> +    switch(crq.s.valid) {
> +    case 0xc0: /* Init command/response */
> +
> +        /* Respond to initialization request */
> +        if (crq.s.format == 0x01) {
> +            memset(crq.raw, 0, 16);
> +            crq.s.valid = 0xc0;
> +            crq.s.format = 0x02;
> +            spapr_vio_send_crq(dev, crq.raw);
> +        }
> +
> +        /* Note that in hotplug cases, we might get a 0x02
> +         * as a result of us emitting the init request
> +         */
> +
> +        break;
> +    case 0xff: /* Link event */
> +
> +        /* Not handled for now */
> +
> +        break;
> +    case 0x80: /* Payloads */
> +        switch (crq.s.format) {
> +        case VIOSRP_SRP_FORMAT: /* AKA VSCSI request */
> +        case VIOSRP_MAD_FORMAT: /* AKA VSCSI response */
> +            vscsi_got_payload(s,&crq);
> +            break;
> +        case VIOSRP_OS400_FORMAT:
> +        case VIOSRP_AIX_FORMAT:
> +        case VIOSRP_LINUX_FORMAT:
> +        case VIOSRP_INLINE_FORMAT:
> +            fprintf(stderr, "vscsi_do_srq: Unsupported payload format %02x\n",
> +                    crq.s.format);
> +            break;
> +        default:
> +            fprintf(stderr, "vscsi_do_srq: Unknown payload format %02x\n",
> +                    crq.s.format);
> +        }
> +        break;
> +    default:
> +        fprintf(stderr, "vscsi_do_crq: unknown CRQ %02x %02x ...\n",
> +                crq.raw[0], crq.raw[1]);
> +    };
> +
> +    return 0;
> +}
> +
> +static int spapr_vscsi_init(VIOsPAPRDevice *dev)
> +{
> +    VSCSIState *s = DO_UPCAST(VSCSIState, vdev, dev);
> +    int i;
> +
> +    dbg_vscsi_state = s;
> +
> +    /* Initialize qemu request tags */
> +    memset(s->reqs, 0, sizeof(s->reqs));
> +    for (i = 0; i<  VSCSI_REQ_LIMIT; i++)

Braces

> +        s->reqs[i].qtag = i;
> +
> +    dev->crq.SendFunc = vscsi_do_crq;
> +
> +    scsi_bus_new(&s->bus,&dev->qdev, 1, VSCSI_REQ_LIMIT,
> +                 vscsi_command_complete);
> +    if (!dev->qdev.hotplugged) {
> +        scsi_bus_legacy_handle_cmdline(&s->bus);
> +    }
> +
> +    return 0;
> +}
> +
> +void spapr_vscsi_create(VIOsPAPRBus *bus, uint32_t reg,
> +                        qemu_irq qirq, uint32_t vio_irq_num)
> +{
> +    DeviceState *dev;
> +    VIOsPAPRDevice *sdev;
> +
> +    dev = qdev_create(&bus->bus, "spapr-vscsi");
> +    qdev_prop_set_uint32(dev, "reg", reg);
> +
> +    qdev_init_nofail(dev);
> +
> +    sdev = (VIOsPAPRDevice *)dev;
> +    sdev->qirq = qirq;
> +    sdev->vio_irq_num = vio_irq_num;
> +}
> +
> +static int spapr_vscsi_devnode(VIOsPAPRDevice *dev, void *fdt, int node_off)
> +{
> +    int ret;
> +
> +    ret = fdt_setprop_cell(fdt, node_off, "#address-cells", 2);
> +    if (ret<  0) {
> +        return ret;
> +    }
> +
> +    ret = fdt_setprop_cell(fdt, node_off, "#size-cells", 0);
> +    if (ret<  0) {
> +        return ret;
> +    }
> +
> +    return 0;
> +}
> +
> +static VIOsPAPRDeviceInfo spapr_vscsi = {
> +    .init = spapr_vscsi_init,
> +    .devnode = spapr_vscsi_devnode,
> +    .dt_name = "v-scsi",
> +    .dt_type = "vscsi",
> +    .dt_compatible = "IBM,v-scsi",
> +    .signal_mask = 0x00000001,
> +    .qdev.name = "spapr-vscsi",
> +    .qdev.size = sizeof(VSCSIState),
> +    .qdev.props = (Property[]) {
> +        DEFINE_PROP_UINT32("reg", VIOsPAPRDevice, reg, 0x2000),
> +        DEFINE_PROP_UINT32("dma-window", VIOsPAPRDevice,
> +                           rtce_window_size, 0x10000000),
> +        DEFINE_PROP_END_OF_LIST(),
> +    },
> +};
> +
> +static void spapr_vscsi_register(void)
> +{
> +    spapr_vio_bus_register_withprop(&spapr_vscsi);
> +}
> +device_init(spapr_vscsi_register);
> diff --git a/hw/srp.h b/hw/srp.h
> new file mode 100644
> index 0000000..9d55fc4
> --- /dev/null
> +++ b/hw/srp.h
> @@ -0,0 +1,241 @@
> +/*
> + * Copyright (c) 2005 Cisco Systems.  All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *
> + * $Id$
> + */
> +
> +#ifndef SCSI_SRP_H
> +#define SCSI_SRP_H
> +
> +/*
> + * Structures and constants for the SCSI RDMA Protocol (SRP) as
> + * defined by the INCITS T10 committee.  This file was written using
> + * draft Revision 16a of the SRP standard.
> + */
> +
> +enum {
> +
> +    SRP_LOGIN_REQ = 0x00,
> +    SRP_TSK_MGMT  = 0x01,
> +    SRP_CMD       = 0x02,
> +    SRP_I_LOGOUT  = 0x03,
> +    SRP_LOGIN_RSP = 0xc0,
> +    SRP_RSP       = 0xc1,
> +    SRP_LOGIN_REJ = 0xc2,
> +    SRP_T_LOGOUT  = 0x80,
> +    SRP_CRED_REQ  = 0x81,
> +    SRP_AER_REQ   = 0x82,
> +    SRP_CRED_RSP  = 0x41,
> +    SRP_AER_RSP   = 0x42
> +};
> +
> +enum {
> +    SRP_BUF_FORMAT_DIRECT   = 1<<  1,
> +    SRP_BUF_FORMAT_INDIRECT = 1<<  2
> +};
> +
> +enum {
> +    SRP_NO_DATA_DESC       = 0,
> +    SRP_DATA_DESC_DIRECT   = 1,
> +    SRP_DATA_DESC_INDIRECT = 2
> +};
> +
> +enum {
> +    SRP_TSK_ABORT_TASK     = 0x01,
> +    SRP_TSK_ABORT_TASK_SET = 0x02,
> +    SRP_TSK_CLEAR_TASK_SET = 0x04,
> +    SRP_TSK_LUN_RESET      = 0x08,
> +    SRP_TSK_CLEAR_ACA      = 0x40
> +};
> +
> +enum srp_login_rej_reason {
> +    SRP_LOGIN_REJ_UNABLE_ESTABLISH_CHANNEL   = 0x00010000,
> +    SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES     = 0x00010001,
> +    SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE = 0x00010002,
> +    SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL   = 0x00010003,
> +    SRP_LOGIN_REJ_UNSUPPORTED_DESCRIPTOR_FMT = 0x00010004,
> +    SRP_LOGIN_REJ_MULTI_CHANNEL_UNSUPPORTED  = 0x00010005,
> +    SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED      = 0x00010006
> +};
> +
> +enum {
> +    SRP_REV10_IB_IO_CLASS  = 0xff00,
> +    SRP_REV16A_IB_IO_CLASS = 0x0100
> +};
> +
> +struct srp_direct_buf {
> +    uint64_t    va;
> +    uint32_t    key;
> +    uint32_t    len;
> +};
> +
> +/*
> + * We need the packed attribute because the SRP spec puts the list of
> + * descriptors at an offset of 20, which is not aligned to the size of
> + * struct srp_direct_buf.  The whole structure must be packed to avoid
> + * having the 20-byte structure padded to 24 bytes on 64-bit architectures.
> + */
> +struct srp_indirect_buf {
> +    struct srp_direct_buf    table_desc;
> +    uint32_t                 len;
> +    struct srp_direct_buf    desc_list[0];
> +} __attribute__((packed));
> +
> +enum {
> +    SRP_MULTICHAN_SINGLE = 0,
> +    SRP_MULTICHAN_MULTI  = 1
> +};
> +
> +struct srp_login_req {
> +    uint8_t    opcode;
> +    uint8_t    reserved1[7];
> +    uint64_t   tag;
> +    uint32_t   req_it_iu_len;
> +    uint8_t    reserved2[4];
> +    uint16_t   req_buf_fmt;
> +    uint8_t    req_flags;
> +    uint8_t    reserved3[5];
> +    uint8_t    initiator_port_id[16];
> +    uint8_t    target_port_id[16];
> +};
> +
> +/*
> + * The SRP spec defines the size of the LOGIN_RSP structure to be 52
> + * bytes, so it needs to be packed to avoid having it padded to 56
> + * bytes on 64-bit architectures.
> + */
> +struct srp_login_rsp {
> +    uint8_t    opcode;
> +    uint8_t    reserved1[3];
> +    uint32_t   req_lim_delta;
> +    uint64_t   tag;
> +    uint32_t   max_it_iu_len;
> +    uint32_t   max_ti_iu_len;
> +    uint16_t   buf_fmt;
> +    uint8_t    rsp_flags;
> +    uint8_t    reserved2[25];
> +} __attribute__((packed));
> +
> +struct srp_login_rej {
> +    uint8_t    opcode;
> +    uint8_t    reserved1[3];
> +    uint32_t   reason;
> +    uint64_t   tag;
> +    uint8_t    reserved2[8];
> +    uint16_t   buf_fmt;
> +    uint8_t    reserved3[6];
> +};

Why isn't this one packed? And the ones below?

> +
> +struct srp_i_logout {
> +    uint8_t    opcode;
> +    uint8_t    reserved[7];
> +    uint64_t   tag;
> +};
> +
> +struct srp_t_logout {
> +    uint8_t    opcode;
> +    uint8_t    sol_not;
> +    uint8_t    reserved[2];
> +    uint32_t   reason;
> +    uint64_t   tag;
> +};
> +
> +/*
> + * We need the packed attribute because the SRP spec only aligns the
> + * 8-byte LUN field to 4 bytes.
> + */
> +struct srp_tsk_mgmt {
> +    uint8_t    opcode;
> +    uint8_t    sol_not;
> +    uint8_t    reserved1[6];
> +    uint64_t   tag;
> +    uint8_t    reserved2[4];
> +    uint64_t   lun __attribute__((packed));
> +    uint8_t    reserved3[2];
> +    uint8_t    tsk_mgmt_func;
> +    uint8_t    reserved4;
> +    uint64_t   task_tag;
> +    uint8_t    reserved5[8];
> +};
> +
> +/*
> + * We need the packed attribute because the SRP spec only aligns the
> + * 8-byte LUN field to 4 bytes.
> + */
> +struct srp_cmd {
> +    uint8_t    opcode;
> +    uint8_t    sol_not;
> +    uint8_t    reserved1[3];
> +    uint8_t    buf_fmt;
> +    uint8_t    data_out_desc_cnt;
> +    uint8_t    data_in_desc_cnt;
> +    uint64_t   tag;
> +    uint8_t    reserved2[4];
> +    uint64_t   lun __attribute__((packed));
> +    uint8_t    reserved3;
> +    uint8_t    task_attr;
> +    uint8_t    reserved4;
> +    uint8_t    add_cdb_len;
> +    uint8_t    cdb[16];
> +    uint8_t    add_data[0];
> +};
> +
> +enum {
> +    SRP_RSP_FLAG_RSPVALID = 1<<  0,
> +    SRP_RSP_FLAG_SNSVALID = 1<<  1,
> +    SRP_RSP_FLAG_DOOVER   = 1<<  2,
> +    SRP_RSP_FLAG_DOUNDER  = 1<<  3,
> +    SRP_RSP_FLAG_DIOVER   = 1<<  4,
> +    SRP_RSP_FLAG_DIUNDER  = 1<<  5
> +};
> +
> +/*
> + * The SRP spec defines the size of the RSP structure to be 36 bytes,
> + * so it needs to be packed to avoid having it padded to 40 bytes on
> + * 64-bit architectures.
> + */
> +struct srp_rsp {
> +    uint8_t    opcode;
> +    uint8_t    sol_not;
> +    uint8_t    reserved1[2];
> +    uint32_t   req_lim_delta;
> +    uint64_t   tag;
> +    uint8_t    reserved2[2];
> +    uint8_t    flags;
> +    uint8_t    status;
> +    uint32_t   data_out_res_cnt;
> +    uint32_t   data_in_res_cnt;
> +    uint32_t   sense_data_len;
> +    uint32_t   resp_data_len;
> +    uint8_t    data[0];
> +} __attribute__((packed));
> +
> +#endif /* SCSI_SRP_H */


Alex
Anthony Liguori - March 16, 2011, 4:51 p.m.
On 03/16/2011 11:41 AM, Alexander Graf wrote:
>> new file mode 100644
>> index 0000000..9d55fc4
>> --- /dev/null
>> +++ b/hw/srp.h
>> @@ -0,0 +1,241 @@
>> +/*
>> + * Copyright (c) 2005 Cisco Systems.  All rights reserved.
>> + *
>> + * This software is available to you under a choice of one of two
>> + * licenses.  You may choose to be licensed under the terms of the GNU
>> + * General Public License (GPL) Version 2, available from the file
>> + * COPYING in the main directory of this source tree, or the
>> + * OpenIB.org BSD license below:
>> + *
>> + *     Redistribution and use in source and binary forms, with or
>> + *     without modification, are permitted provided that the following
>> + *     conditions are met:
>> + *
>> + *      - Redistributions of source code must retain the above
>> + *        copyright notice, this list of conditions and the following
>> + *        disclaimer.
>> + *
>> + *      - Redistributions in binary form must reproduce the above
>> + *        copyright notice, this list of conditions and the following
>> + *        disclaimer in the documentation and/or other materials
>> + *        provided with the distribution.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
>> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
>> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
>> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
>> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
>> + * SOFTWARE.
>> + *
>> + * $Id$
>> + */
>> +
>> +#ifndef SCSI_SRP_H
>> +#define SCSI_SRP_H
>> +
>> +/*
>> + * Structures and constants for the SCSI RDMA Protocol (SRP) as
>> + * defined by the INCITS T10 committee.  This file was written using
>> + * draft Revision 16a of the SRP standard.
>> + */
>> +
>> +enum {
>> +
>> +    SRP_LOGIN_REQ = 0x00,
>> +    SRP_TSK_MGMT  = 0x01,
>> +    SRP_CMD       = 0x02,
>> +    SRP_I_LOGOUT  = 0x03,
>> +    SRP_LOGIN_RSP = 0xc0,
>> +    SRP_RSP       = 0xc1,
>> +    SRP_LOGIN_REJ = 0xc2,
>> +    SRP_T_LOGOUT  = 0x80,
>> +    SRP_CRED_REQ  = 0x81,
>> +    SRP_AER_REQ   = 0x82,
>> +    SRP_CRED_RSP  = 0x41,
>> +    SRP_AER_RSP   = 0x42
>> +};
>> +
>> +enum {
>> +    SRP_BUF_FORMAT_DIRECT   = 1<<  1,
>> +    SRP_BUF_FORMAT_INDIRECT = 1<<  2
>> +};
>> +
>> +enum {
>> +    SRP_NO_DATA_DESC       = 0,
>> +    SRP_DATA_DESC_DIRECT   = 1,
>> +    SRP_DATA_DESC_INDIRECT = 2
>> +};
>> +
>> +enum {
>> +    SRP_TSK_ABORT_TASK     = 0x01,
>> +    SRP_TSK_ABORT_TASK_SET = 0x02,
>> +    SRP_TSK_CLEAR_TASK_SET = 0x04,
>> +    SRP_TSK_LUN_RESET      = 0x08,
>> +    SRP_TSK_CLEAR_ACA      = 0x40
>> +};
>> +
>> +enum srp_login_rej_reason {
>> +    SRP_LOGIN_REJ_UNABLE_ESTABLISH_CHANNEL   = 0x00010000,
>> +    SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES     = 0x00010001,
>> +    SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE = 0x00010002,
>> +    SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL   = 0x00010003,
>> +    SRP_LOGIN_REJ_UNSUPPORTED_DESCRIPTOR_FMT = 0x00010004,
>> +    SRP_LOGIN_REJ_MULTI_CHANNEL_UNSUPPORTED  = 0x00010005,
>> +    SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED      = 0x00010006
>> +};
>> +
>> +enum {
>> +    SRP_REV10_IB_IO_CLASS  = 0xff00,
>> +    SRP_REV16A_IB_IO_CLASS = 0x0100
>> +};
>> +
>> +struct srp_direct_buf {
>> +    uint64_t    va;
>> +    uint32_t    key;
>> +    uint32_t    len;
>> +};
>> +
>> +/*
>> + * We need the packed attribute because the SRP spec puts the list of
>> + * descriptors at an offset of 20, which is not aligned to the size of
>> + * struct srp_direct_buf.  The whole structure must be packed to avoid
>> + * having the 20-byte structure padded to 24 bytes on 64-bit 
>> architectures.
>> + */
>> +struct srp_indirect_buf {
>> +    struct srp_direct_buf    table_desc;
>> +    uint32_t                 len;
>> +    struct srp_direct_buf    desc_list[0];
>> +} __attribute__((packed));
>> +
>> +enum {
>> +    SRP_MULTICHAN_SINGLE = 0,
>> +    SRP_MULTICHAN_MULTI  = 1
>> +};
>> +
>> +struct srp_login_req {
>> +    uint8_t    opcode;
>> +    uint8_t    reserved1[7];
>> +    uint64_t   tag;
>> +    uint32_t   req_it_iu_len;
>> +    uint8_t    reserved2[4];
>> +    uint16_t   req_buf_fmt;
>> +    uint8_t    req_flags;
>> +    uint8_t    reserved3[5];
>> +    uint8_t    initiator_port_id[16];
>> +    uint8_t    target_port_id[16];
>> +};
>> +
>> +/*
>> + * The SRP spec defines the size of the LOGIN_RSP structure to be 52
>> + * bytes, so it needs to be packed to avoid having it padded to 56
>> + * bytes on 64-bit architectures.
>> + */
>> +struct srp_login_rsp {
>> +    uint8_t    opcode;
>> +    uint8_t    reserved1[3];
>> +    uint32_t   req_lim_delta;
>> +    uint64_t   tag;
>> +    uint32_t   max_it_iu_len;
>> +    uint32_t   max_ti_iu_len;
>> +    uint16_t   buf_fmt;
>> +    uint8_t    rsp_flags;
>> +    uint8_t    reserved2[25];
>> +} __attribute__((packed));
>> +
>> +struct srp_login_rej {
>> +    uint8_t    opcode;
>> +    uint8_t    reserved1[3];
>> +    uint32_t   reason;
>> +    uint64_t   tag;
>> +    uint8_t    reserved2[8];
>> +    uint16_t   buf_fmt;
>> +    uint8_t    reserved3[6];
>> +};
> diff --git a/hw/srp.h b/hw/srp.h
>
> Why isn't this one packed? And the ones below?

It's naturally aligned.  There's no need to pack things that are 
naturally aligned (structure size is a multiple of 8 and each type 
starts at an offset that's a multiple of it's size).

Regards,

Anthony Liguori
Benjamin Herrenschmidt - March 16, 2011, 8:08 p.m.
On Wed, 2011-03-16 at 17:41 +0100, Alexander Graf wrote:

> > +/*
> > + * Common MAD header
> > + */
> > +struct mad_common {
> > +    uint32_t type;
> > +    uint16_t status;
> > +    uint16_t length;
> > +    uint64_t tag;
> 
> Is this an in-memory representation? If so, it should be packed, right? 
> Same goes for the ones below.

Well, all the fields are naturally aligned, as is the structure itself,
do we really need to pack ?

Cheers,
Ben.
Anthony Liguori - March 16, 2011, 8:19 p.m.
On 03/16/2011 03:08 PM, Benjamin Herrenschmidt wrote:
> On Wed, 2011-03-16 at 17:41 +0100, Alexander Graf wrote:
>
>>> +/*
>>> + * Common MAD header
>>> + */
>>> +struct mad_common {
>>> +    uint32_t type;
>>> +    uint16_t status;
>>> +    uint16_t length;
>>> +    uint64_t tag;
>> Is this an in-memory representation? If so, it should be packed, right?
>> Same goes for the ones below.
> Well, all the fields are naturally aligned, as is the structure itself,
> do we really need to pack ?

No.

Regards,

Anthony Liguori

> Cheers,
> Ben.
>
>
>

Patch

diff --git a/Makefile.target b/Makefile.target
index ef86d43..49f9e9a 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -233,7 +233,7 @@  obj-ppc-y += ppc_oldworld.o
 obj-ppc-y += ppc_newworld.o
 # IBM pSeries (sPAPR)
 obj-ppc-y += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o
-obj-ppc-y += xics.o spapr_vty.o spapr_llan.o
+obj-ppc-y += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o
 # PowerPC 4xx boards
 obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
 obj-ppc-y += ppc440.o ppc440_bamboo.o
diff --git a/hw/ppc-viosrp.h b/hw/ppc-viosrp.h
new file mode 100644
index 0000000..9afcf7a
--- /dev/null
+++ b/hw/ppc-viosrp.h
@@ -0,0 +1,216 @@ 
+/*****************************************************************************/
+/* srp.h -- SCSI RDMA Protocol definitions                                   */
+/*                                                                           */
+/* Written By: Colin Devilbis, IBM Corporation                               */
+/*                                                                           */
+/* Copyright (C) 2003 IBM Corporation                                        */
+/*                                                                           */
+/* This program is free software; you can redistribute it and/or modify      */
+/* it under the terms of the GNU General Public License as published by      */
+/* the Free Software Foundation; either version 2 of the License, or         */
+/* (at your option) any later version.                                       */
+/*                                                                           */
+/* This program is distributed in the hope that it will be useful,           */
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of            */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             */
+/* GNU General Public License for more details.                              */
+/*                                                                           */
+/* You should have received a copy of the GNU General Public License         */
+/* along with this program; if not, write to the Free Software               */
+/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+/*                                                                           */
+/*                                                                           */
+/* This file contains structures and definitions for IBM RPA (RS/6000        */
+/* platform architecture) implementation of the SRP (SCSI RDMA Protocol)     */
+/* standard.  SRP is used on IBM iSeries and pSeries platforms to send SCSI  */
+/* commands between logical partitions.                                      */
+/*                                                                           */
+/* SRP Information Units (IUs) are sent on a "Command/Response Queue" (CRQ)  */
+/* between partitions.  The definitions in this file are architected,        */
+/* and cannot be changed without breaking compatibility with other versions  */
+/* of Linux and other operating systems (AIX, OS/400) that talk this protocol*/
+/* between logical partitions                                                */
+/*****************************************************************************/
+#ifndef PPC_VIOSRP_H
+#define PPC_VIOSRP_H
+
+#define SRP_VERSION "16.a"
+#define SRP_MAX_IU_LEN    256
+#define SRP_MAX_LOC_LEN 32
+
+union srp_iu {
+    struct srp_login_req login_req;
+    struct srp_login_rsp login_rsp;
+    struct srp_login_rej login_rej;
+    struct srp_i_logout i_logout;
+    struct srp_t_logout t_logout;
+    struct srp_tsk_mgmt tsk_mgmt;
+    struct srp_cmd cmd;
+    struct srp_rsp rsp;
+    uint8_t reserved[SRP_MAX_IU_LEN];
+};
+
+enum viosrp_crq_formats {
+    VIOSRP_SRP_FORMAT = 0x01,
+    VIOSRP_MAD_FORMAT = 0x02,
+    VIOSRP_OS400_FORMAT = 0x03,
+    VIOSRP_AIX_FORMAT = 0x04,
+    VIOSRP_LINUX_FORMAT = 0x06,
+    VIOSRP_INLINE_FORMAT = 0x07
+};
+
+enum viosrp_crq_status {
+    VIOSRP_OK = 0x0,
+    VIOSRP_NONRECOVERABLE_ERR = 0x1,
+    VIOSRP_VIOLATES_MAX_XFER = 0x2,
+    VIOSRP_PARTNER_PANIC = 0x3,
+    VIOSRP_DEVICE_BUSY = 0x8,
+    VIOSRP_ADAPTER_FAIL = 0x10,
+    VIOSRP_OK2 = 0x99,
+};
+
+struct viosrp_crq {
+    uint8_t valid;        /* used by RPA */
+    uint8_t format;        /* SCSI vs out-of-band */
+    uint8_t reserved;
+    uint8_t status;        /* non-scsi failure? (e.g. DMA failure) */
+    uint16_t timeout;        /* in seconds */
+    uint16_t IU_length;        /* in bytes */
+    uint64_t IU_data_ptr;    /* the TCE for transferring data */
+};
+
+/* MADs are Management requests above and beyond the IUs defined in the SRP
+ * standard.  
+ */
+enum viosrp_mad_types {
+    VIOSRP_EMPTY_IU_TYPE = 0x01,
+    VIOSRP_ERROR_LOG_TYPE = 0x02,
+    VIOSRP_ADAPTER_INFO_TYPE = 0x03,
+    VIOSRP_HOST_CONFIG_TYPE = 0x04,
+    VIOSRP_CAPABILITIES_TYPE = 0x05,
+    VIOSRP_ENABLE_FAST_FAIL = 0x08,
+};
+
+enum viosrp_mad_status {
+    VIOSRP_MAD_SUCCESS = 0x00,
+    VIOSRP_MAD_NOT_SUPPORTED = 0xF1,
+    VIOSRP_MAD_FAILED = 0xF7,
+};
+
+enum viosrp_capability_type {
+    MIGRATION_CAPABILITIES = 0x01,
+    RESERVATION_CAPABILITIES = 0x02,
+};
+
+enum viosrp_capability_support {
+    SERVER_DOES_NOT_SUPPORTS_CAP = 0x0,
+    SERVER_SUPPORTS_CAP = 0x01,
+    SERVER_CAP_DATA = 0x02,
+};
+
+enum viosrp_reserve_type {
+    CLIENT_RESERVE_SCSI_2 = 0x01,
+};
+
+enum viosrp_capability_flag {
+    CLIENT_MIGRATED = 0x01,
+    CLIENT_RECONNECT = 0x02,
+    CAP_LIST_SUPPORTED = 0x04,
+    CAP_LIST_DATA = 0x08,
+};
+
+/* 
+ * Common MAD header
+ */
+struct mad_common {
+    uint32_t type;
+    uint16_t status;
+    uint16_t length;
+    uint64_t tag;
+};
+
+/*
+ * All SRP (and MAD) requests normally flow from the
+ * client to the server.  There is no way for the server to send
+ * an asynchronous message back to the client.  The Empty IU is used
+ * to hang out a meaningless request to the server so that it can respond
+ * asynchrouously with something like a SCSI AER 
+ */
+struct viosrp_empty_iu {
+    struct mad_common common;
+    uint64_t buffer;
+    uint32_t port;
+};
+
+struct viosrp_error_log {
+    struct mad_common common;
+    uint64_t buffer;
+};
+
+struct viosrp_adapter_info {
+    struct mad_common common;
+    uint64_t buffer;
+};
+
+struct viosrp_host_config {
+    struct mad_common common;
+    uint64_t buffer;
+};
+
+struct viosrp_fast_fail {
+    struct mad_common common;
+};
+
+struct viosrp_capabilities {
+    struct mad_common common;
+    uint64_t buffer;
+};
+
+struct mad_capability_common {
+    uint32_t cap_type;
+    uint16_t length;
+    uint16_t server_support;
+};
+
+struct mad_reserve_cap {
+    struct mad_capability_common common;
+    uint32_t type;
+};
+
+struct mad_migration_cap {
+    struct mad_capability_common common;
+    uint32_t ecl;
+};
+
+struct capabilities{
+    uint32_t flags;
+    char name[SRP_MAX_LOC_LEN];
+    char loc[SRP_MAX_LOC_LEN];
+    struct mad_migration_cap migration;
+    struct mad_reserve_cap reserve;
+};
+
+union mad_iu {
+    struct viosrp_empty_iu empty_iu;
+    struct viosrp_error_log error_log;
+    struct viosrp_adapter_info adapter_info;
+    struct viosrp_host_config host_config;
+    struct viosrp_fast_fail fast_fail;
+    struct viosrp_capabilities capabilities;
+};
+
+union viosrp_iu {
+    union srp_iu srp;
+    union mad_iu mad;
+};
+
+struct mad_adapter_info_data {
+    char srp_version[8];
+    char partition_name[96];
+    uint32_t partition_number;
+    uint32_t mad_version;
+    uint32_t os_type;
+    uint32_t port_max_txu[8];    /* per-port maximum transfer */
+};
+
+#endif
diff --git a/hw/spapr.c b/hw/spapr.c
index cb97a16..5f868fc 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -28,6 +28,7 @@ 
 #include "hw.h"
 #include "elf.h"
 #include "net.h"
+#include "blockdev.h"
 
 #include "hw/boards.h"
 #include "hw/ppc.h"
@@ -316,7 +317,7 @@  static void ppc_spapr_init(ram_addr_t ram_size,
     qemu_free(filename);
 
     /* Set up Interrupt Controller */
-    spapr->icp = xics_system_init(smp_cpus, envs, MAX_SERIAL_PORTS + nb_nics);
+    spapr->icp = xics_system_init(smp_cpus, envs, MAX_SERIAL_PORTS + nb_nics + drive_get_max_bus(IF_SCSI) + 1);
 
     /* Set up VIO bus */
     spapr->vio_bus = spapr_vio_bus_init();
@@ -346,6 +347,12 @@  static void ppc_spapr_init(ram_addr_t ram_size,
         }
     }
 
+    for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
+        spapr_vscsi_create(spapr->vio_bus, 0x2000 + i,
+                           xics_find_qirq(spapr->icp, irq), irq);
+        irq++;
+    }
+
     if (kernel_filename) {
         uint64_t lowaddr = 0;
 
@@ -406,6 +413,7 @@  static QEMUMachine spapr_machine = {
     .max_cpus = MAX_CPUS,
     .no_vga = 1,
     .no_parallel = 1,
+    .use_scsi = 1,
 };
 
 static void spapr_machine_init(void)
diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h
index ba16795..b7d0daa 100644
--- a/hw/spapr_vio.h
+++ b/hw/spapr_vio.h
@@ -101,4 +101,7 @@  void spapr_vty_create(VIOsPAPRBus *bus,
 void spapr_vlan_create(VIOsPAPRBus *bus, uint32_t reg, NICInfo *nd,
                        qemu_irq qirq, uint32_t vio_irq_num);
 
+void spapr_vscsi_create(VIOsPAPRBus *bus, uint32_t reg,
+                        qemu_irq qirq, uint32_t vio_irq_num);
+
 #endif /* _HW_SPAPR_VIO_H */
diff --git a/hw/spapr_vscsi.c b/hw/spapr_vscsi.c
new file mode 100644
index 0000000..0a67095
--- /dev/null
+++ b/hw/spapr_vscsi.c
@@ -0,0 +1,960 @@ 
+/* TODO:
+ *
+ *  - Cleanups :-)
+ *  - Sort out better how to assign devices to VSCSI instances
+ *  - Fix residual counts
+ *  - Add indirect descriptors support
+ *  - Maybe do autosense (PAPR seems to mandate it, linux doesn't care)
+ */
+#include "hw.h"
+#include "scsi.h"
+#include "scsi-defs.h"
+#include "net.h" /* Remove that when we can */
+#include "srp.h"
+#include "hw/qdev.h"
+#include "hw/spapr.h"
+#include "hw/spapr_vio.h"
+#include "hw/ppc-viosrp.h"
+
+#include <libfdt.h>
+
+//#define DEBUG_VSCSI
+
+#ifdef DEBUG_VSCSI
+#define dprintf(fmt, ...) \
+    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define dprintf(fmt, ...) \
+    do { } while (0)
+#endif
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+/*
+ * Virtual SCSI device
+ */
+
+/* Random numbers */
+#define VSCSI_MAX_SECTORS       4096/*1024*//*256*/
+#define VSCSI_REQ_LIMIT         24
+
+#define SCSI_SENSE_BUF_SIZE     96
+#define SRP_RSP_SENSE_DATA_LEN  18
+
+typedef union vscsi_crq {
+    struct viosrp_crq s;
+    uint8_t raw[16];
+} vscsi_crq;
+
+typedef struct vscsi_req
+{
+    vscsi_crq               crq;
+    union viosrp_iu         iu;
+
+    /* SCSI request tracking */
+    SCSIDevice              *sdev;
+    uint32_t                qtag; /* qemu tag != srp tag */
+    int                     lun;
+    int                     active;
+    long                    data_len;
+    int                     writing;
+    int                     sensing;
+    int                     senselen;
+    uint8_t                 sense[SCSI_SENSE_BUF_SIZE];
+
+    /* RDMA related bits */
+    uint8_t                 dma_fmt;
+    struct srp_direct_buf   ext_desc;
+    struct srp_direct_buf   *cur_desc;
+    struct srp_indirect_buf *ind_desc;
+    int                     local_desc;
+    int                     total_desc;
+    
+} vscsi_req;
+
+
+typedef struct {
+    VIOsPAPRDevice vdev;
+    SCSIBus bus;
+    vscsi_req reqs[VSCSI_REQ_LIMIT];
+} VSCSIState;
+
+/* XXX Debug only */
+static VSCSIState *dbg_vscsi_state;
+
+
+static struct vscsi_req *vscsi_get_req(VSCSIState *s)
+{
+    vscsi_req *req;
+    int i;
+
+    for (i = 0; i < VSCSI_REQ_LIMIT; i++) {
+        req = &s->reqs[i];
+        if (!req->active) {
+            memset(req, 0, sizeof(*req));
+            req->qtag = i;
+            req->active = 1;
+            return req;
+        }
+    }
+    return NULL;
+}
+
+static void vscsi_put_req(VSCSIState *s, vscsi_req *req)
+{
+    req->active = 0;
+}
+
+static vscsi_req *vscsi_find_req(VSCSIState *s, uint32_t tag)
+{
+    if (tag >= VSCSI_REQ_LIMIT || !s->reqs[tag].active) {
+        return NULL;
+    }
+    return &s->reqs[tag];
+}
+
+static void vscsi_decode_id_lun(uint64_t srp_lun, int *id, int *lun)
+{
+    /* XXX Figure that one out properly ! This is crackpot */
+    *id = (srp_lun >> 56) & 0x7f;
+    *lun = (srp_lun >> 48) & 0xff;
+}
+
+static int vscsi_send_iu(VSCSIState *s, vscsi_req *req,
+                         uint64_t length, uint8_t format)
+{
+    long rc, rc1;
+
+    /* First copy the SRP */
+    rc = spapr_tce_dma_write(&s->vdev, req->crq.s.IU_data_ptr,
+                             &req->iu, length);
+    if (rc) {
+        fprintf(stderr, "vscsi_send_iu: DMA write failure !\n");
+    }
+
+    req->crq.s.valid = 0x80;
+    req->crq.s.format = format;
+    req->crq.s.reserved = 0x00;
+    req->crq.s.timeout = cpu_to_be16(0x0000);
+    req->crq.s.IU_length = cpu_to_be16(length);
+    req->crq.s.IU_data_ptr = req->iu.srp.rsp.tag; /* right byte order */
+
+    if (rc == 0) {
+        req->crq.s.status = 0x99; /* Just needs to be non-zero */
+    } else {
+        req->crq.s.status = 0x00;
+    }
+
+    rc1 = spapr_vio_send_crq(&s->vdev, req->crq.raw);
+    if (rc1) {
+        fprintf(stderr, "vscsi_send_iu: Error sending response\n");
+        return rc1;
+    }
+
+    return rc;
+}
+
+static void vscsi_makeup_sense(VSCSIState *s, vscsi_req *req,
+                               uint8_t key, uint8_t asc, uint8_t ascq)
+{
+    req->senselen = SRP_RSP_SENSE_DATA_LEN;
+
+    /* Valid bit and 'current errors' */
+    req->sense[0] = (0x1 << 7 | 0x70);
+    /* Sense key */
+    req->sense[2] = key;
+    /* Additional sense length */
+    req->sense[7] = 0xa; /* 10 bytes */
+    /* Additional sense code */
+    req->sense[12] = asc;
+    req->sense[13] = ascq;
+}
+
+static int vscsi_send_rsp(VSCSIState *s, vscsi_req *req,
+                          uint8_t status, int32_t res_in, int32_t res_out)
+{
+   union viosrp_iu *iu = &req->iu;
+   uint64_t tag = iu->srp.rsp.tag;
+   int total_len = sizeof(iu->srp.rsp);
+
+   dprintf("VSCSI: Sending resp status: 0x%x, "
+           "res_in: %d, res_out: %d \n", status, res_in, res_out);
+
+   memset(iu, 0, sizeof(struct srp_rsp));
+   iu->srp.rsp.opcode = SRP_RSP;
+   iu->srp.rsp.req_lim_delta = cpu_to_be32(1);
+   iu->srp.rsp.tag = tag;
+
+   /* Handle residuals */
+   if (res_in < 0) {
+       iu->srp.rsp.flags |= SRP_RSP_FLAG_DIUNDER;
+       res_in = -res_in;
+   } else if (res_in) {
+       iu->srp.rsp.flags |= SRP_RSP_FLAG_DIOVER;
+   }
+   if (res_out < 0) {
+       iu->srp.rsp.flags |= SRP_RSP_FLAG_DOUNDER;
+       res_out = -res_out;
+   } else if (res_out) {
+       iu->srp.rsp.flags |= SRP_RSP_FLAG_DOOVER;
+   }
+   iu->srp.rsp.data_in_res_cnt = cpu_to_be32(res_in);
+   iu->srp.rsp.data_out_res_cnt = cpu_to_be32(res_out);
+
+   /* We don't do response data */
+   /* iu->srp.rsp.flags &= ~SRP_RSP_FLAG_RSPVALID; */
+   iu->srp.rsp.resp_data_len = cpu_to_be32(0);
+
+   /* Handle success vs. failure */
+   iu->srp.rsp.status = status;
+   if (status) {
+       iu->srp.rsp.sol_not = (iu->srp.cmd.sol_not & 0x04) >> 2;
+       if (req->senselen) {
+           req->iu.srp.rsp.flags |= SRP_RSP_FLAG_SNSVALID;
+           req->iu.srp.rsp.sense_data_len = cpu_to_be32(req->senselen);
+           memcpy(req->iu.srp.rsp.data, req->sense, req->senselen);
+           total_len += req->senselen;
+       }
+   } else {
+       iu->srp.rsp.sol_not = (iu->srp.cmd.sol_not & 0x02) >> 1;
+   }
+
+   vscsi_send_iu(s, req, total_len, VIOSRP_SRP_FORMAT);
+   return 0;
+}
+
+static inline void vscsi_swap_desc(struct srp_direct_buf *desc)
+{
+    desc->va = be64_to_cpu(desc->va);
+    desc->len = be32_to_cpu(desc->len);
+}
+
+static int vscsi_srp_direct_data(VSCSIState *s, vscsi_req *req,
+                                 uint8_t *buf, uint32_t len)
+{
+    struct srp_direct_buf *md = req->cur_desc;
+    uint32_t llen;
+    int rc;
+
+    dprintf("VSCSI: direct segment 0x%x bytes, va=0x%llx desc len=0x%x\n",
+            len, (unsigned long long)md->va, md->len);
+
+    llen = min(len, md->len);
+    if (llen) {
+        if (req->writing) { /* writing = to device = reading from memory */
+            rc = spapr_tce_dma_read(&s->vdev, md->va, buf, llen);
+        } else {
+            rc = spapr_tce_dma_write(&s->vdev, md->va, buf, llen);
+        }
+    }
+    md->len -= llen;
+    md->va += llen;
+
+    if (rc) {
+        return -1;
+    }
+    return llen;
+}
+
+static int vscsi_srp_indirect_data(VSCSIState *s, vscsi_req *req,
+                                   uint8_t *buf, uint32_t len)
+{
+    struct srp_direct_buf *td = &req->ind_desc->table_desc;
+    struct srp_direct_buf *md = req->cur_desc;
+    int rc = 0;
+    uint32_t llen, total = 0;
+
+    dprintf("VSCSI: indirect segment 0x%x bytes, td va=0x%llx len=0x%x\n",
+            len, (unsigned long long)td->va, td->len);
+
+    /* While we have data ... */
+    while(len) {
+        /* If we have a descriptor but it's empty, go fetch a new one */
+        if (md && md->len == 0) {
+            /* More local available, use one */
+            if (req->local_desc) {
+                md = ++req->cur_desc;
+                --req->local_desc;
+                --req->total_desc;
+                td->va += sizeof(struct srp_direct_buf);
+            } else {
+                md = req->cur_desc = NULL;
+            }
+        }
+        /* No descriptor at hand, fetch one */
+        if (!md) {
+            if (!req->total_desc) {
+                dprintf("VSCSI:   Out of descriptors !\n");
+                break;
+            }
+            md = req->cur_desc = &req->ext_desc;
+            dprintf("VSCSI:   Reading desc from 0x%llx\n", (unsigned long long)td->va);
+            rc = spapr_tce_dma_read(&s->vdev, td->va, md, sizeof(struct srp_direct_buf));
+            if (rc) {
+                dprintf("VSCSI: tce_dma_read -> %d reading ext_desc\n", rc);
+                break;
+            }
+            vscsi_swap_desc(md);
+            td->va += sizeof(struct srp_direct_buf);
+            --req->total_desc;
+        }
+        dprintf("VSCSI:   [desc va=0x%llx,len=0x%x] remaining=0x%x\n",
+                (unsigned long long)md->va, md->len, len);
+
+        /* Perform transfer */
+        llen = min(len, md->len);
+        if (req->writing) { /* writing = to device = reading from memory */ 
+            rc = spapr_tce_dma_read(&s->vdev, md->va, buf, llen);
+            
+        } else {
+            rc = spapr_tce_dma_write(&s->vdev, md->va, buf, llen);
+        }
+        if (rc) {
+            dprintf("VSCSI: tce_dma_r/w(%d) -> %d\n", req->writing, rc);
+            break;
+        }
+        dprintf("VSCSI:     data: %02x %02x %02x %02x...\n",
+                buf[0], buf[1], buf[2], buf[3]);
+
+        len -= llen;
+        buf += llen;
+        total += llen;
+        md->va += llen;
+        md->len -= llen;
+    }
+    return rc ? -1 : total;
+}
+
+static int vscsi_srp_transfer_data(VSCSIState *s, vscsi_req *req,
+                                   int writing, uint8_t *buf, uint32_t len)
+{
+    int err = 0;
+  
+    switch (req->dma_fmt) {
+    case SRP_NO_DATA_DESC:
+        dprintf("VSCSI: no data desc transfer, skipping 0x%x bytes\n", len);
+        break;
+    case SRP_DATA_DESC_DIRECT:
+        err = vscsi_srp_direct_data(s, req, buf, len);
+        break;
+    case SRP_DATA_DESC_INDIRECT:
+        err = vscsi_srp_indirect_data(s, req, buf, len);
+        break;
+    }
+    return err;
+}
+
+/* Bits from linux srp */
+static int data_out_desc_size(struct srp_cmd *cmd)
+{
+    int size = 0;
+    uint8_t fmt = cmd->buf_fmt >> 4;
+
+    switch (fmt) {
+    case SRP_NO_DATA_DESC:
+        break;
+    case SRP_DATA_DESC_DIRECT:
+        size = sizeof(struct srp_direct_buf);
+        break;
+    case SRP_DATA_DESC_INDIRECT:
+        size = sizeof(struct srp_indirect_buf) +
+            sizeof(struct srp_direct_buf) * cmd->data_out_desc_cnt;
+        break;
+    default:
+        break;
+    }
+    return size;
+}
+
+static int vscsi_preprocess_desc(vscsi_req *req)
+{
+    struct srp_cmd *cmd = &req->iu.srp.cmd;
+    int offset, i;
+
+    offset = cmd->add_cdb_len & ~3;
+
+    if (req->writing) {
+        req->dma_fmt = cmd->buf_fmt >> 4;
+    } else {
+        offset += data_out_desc_size(cmd);
+        req->dma_fmt = cmd->buf_fmt & ((1U << 4) - 1);
+    }
+
+    switch (req->dma_fmt) {
+    case SRP_NO_DATA_DESC:
+        break;
+    case SRP_DATA_DESC_DIRECT:
+        req->cur_desc = (struct srp_direct_buf *)(cmd->add_data + offset);
+        req->total_desc = req->local_desc = 1;
+        vscsi_swap_desc(req->cur_desc);
+        dprintf("VSCSI: using direct RDMA %s, 0x%x bytes MD: 0x%llx\n",
+                req->writing ? "write" : "read",
+                req->cur_desc->len, (unsigned long long)req->cur_desc->va);
+        break;
+    case SRP_DATA_DESC_INDIRECT:
+        req->ind_desc = (struct srp_indirect_buf *)(cmd->add_data + offset);
+        vscsi_swap_desc(&req->ind_desc->table_desc);
+        req->total_desc = req->ind_desc->table_desc.len / sizeof(struct srp_direct_buf);
+        req->local_desc = req->writing ? cmd->data_out_desc_cnt :
+            cmd->data_in_desc_cnt;
+        for (i = 0; i < req->local_desc; i++)
+            vscsi_swap_desc(&req->ind_desc->desc_list[i]);
+        req->cur_desc = req->local_desc ? &req->ind_desc->desc_list[0] : NULL;
+        dprintf("VSCSI: using indirect RDMA %s, 0x%x bytes %d descs (%d local) VA: 0x%llx\n",
+                req->writing ? "read" : "write", be32_to_cpu(req->ind_desc->len),
+                req->total_desc, req->local_desc,
+                (unsigned long long)req->ind_desc->table_desc.va);
+        break;
+    default:
+        fprintf(stderr,
+                "vscsi_preprocess_desc: Unknown format %x\n", req->dma_fmt);
+        return -1;
+    }
+
+    return 0;
+}
+
+static void vscsi_send_request_sense(VSCSIState *s, vscsi_req *req)
+{
+    SCSIDevice *sdev = req->sdev;
+    uint8_t *cdb = req->iu.srp.cmd.cdb;
+    int n;
+
+    cdb[0] = 3;
+    cdb[1] = 0;
+    cdb[2] = 0;
+    cdb[3] = 0;
+    cdb[4] = 96;
+    cdb[5] = 0;
+    req->sensing = 1;
+    n = sdev->info->send_command(sdev, req->qtag, cdb, req->lun);
+    dprintf("VSCSI: Queued request sense tag 0x%x \n", req->qtag);
+    if (n < 0) {
+        fprintf(stderr, "VSCSI: REQUEST_SENSE wants write data !?!?!?\n");
+        sdev->info->cancel_io(sdev, req->qtag);
+        vscsi_makeup_sense(s, req, HARDWARE_ERROR, 0, 0);
+        vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0);
+        vscsi_put_req(s, req);
+        return;
+    } else if (n == 0) {
+        return;
+    }
+    sdev->info->read_data(sdev, req->qtag);
+}
+
+/* Callback to indicate that the SCSI layer has completed a transfer.  */
+static void vscsi_command_complete(SCSIBus *bus, int reason, uint32_t tag,
+                                   uint32_t arg)
+{
+    VSCSIState *s = DO_UPCAST(VSCSIState, vdev.qdev, bus->qbus.parent);
+    vscsi_req *req = vscsi_find_req(s, tag);
+    SCSIDevice *sdev;
+    uint8_t *buf;
+    int32_t res_in = 0, res_out = 0;
+    int len, rc = 0;
+
+    dprintf("VSCSI: SCSI cmd complete, r=0x%x tag=0x%x arg=0x%x, req=%p\n",
+            reason, tag, arg, req);
+    if (req == NULL) {
+        fprintf(stderr, "VSCSI: Can't find request for tag 0x%x\n", tag);
+        return;
+    }
+    sdev = req->sdev;
+
+    if (req->sensing) {
+        if (reason == SCSI_REASON_DONE) {
+            dprintf("VSCSI: Sense done !\n");
+            vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0);
+            vscsi_put_req(s, req);
+        } else {
+            uint8_t *buf = sdev->info->get_buf(sdev, tag);
+
+            len = min(arg, SCSI_SENSE_BUF_SIZE);
+            dprintf("VSCSI: Sense data, %d bytes:\n", len);
+            dprintf("       %02x  %02x  %02x  %02x  %02x  %02x  %02x  %02x\n",
+                    buf[0], buf[1], buf[2], buf[3],
+                    buf[4], buf[5], buf[6], buf[7]);
+            dprintf("       %02x  %02x  %02x  %02x  %02x  %02x  %02x  %02x\n",
+                    buf[8], buf[9], buf[10], buf[11],
+                    buf[12], buf[13], buf[14], buf[15]);
+            memcpy(req->sense, buf, len);
+            req->senselen = len;
+            sdev->info->read_data(sdev, req->qtag);
+        }
+        return;
+    }
+
+    if (reason == SCSI_REASON_DONE) {
+        dprintf("VSCSI: Command complete err=%d\n", arg);
+        if (arg == 0) {
+            /* We handle overflows, not underflows for normal commands,
+             * but hopefully nobody cares
+             */
+            if (req->writing)
+                res_out = req->data_len;
+            else
+                res_in = req->data_len;
+            vscsi_send_rsp(s, req, 0, res_in, res_out);
+        } else if (arg == CHECK_CONDITION) {
+            dprintf("VSCSI: Got CHECK_CONDITION, requesting sense...\n");
+            vscsi_send_request_sense(s, req);
+            return;
+        } else {
+            vscsi_send_rsp(s, req, arg, 0, 0);
+        }
+        vscsi_put_req(s, req);
+        return;
+    }
+
+    /* "arg" is how much we have read for reads and how much we want
+     * to write for writes (ie, how much is to be DMA'd)
+     */
+    if (arg) {
+        buf = sdev->info->get_buf(sdev, tag);
+        rc = vscsi_srp_transfer_data(s, req, req->writing, buf, arg);
+    }
+    if (rc < 0) {
+        fprintf(stderr, "VSCSI: RDMA error rc=%d!\n", rc);
+        sdev->info->cancel_io(sdev, req->qtag);
+        vscsi_makeup_sense(s, req, HARDWARE_ERROR, 0, 0);
+        vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0);
+        vscsi_put_req(s, req);
+        return;
+    }
+
+    /* Start next chunk */
+    req->data_len -= rc;
+    if (req->writing) {
+        sdev->info->write_data(sdev, req->qtag);
+    } else {
+        sdev->info->read_data(sdev, req->qtag);
+    }
+}
+
+static void vscsi_process_login(VSCSIState *s, vscsi_req *req)
+{
+    union viosrp_iu *iu = &req->iu;
+    struct srp_login_rsp *rsp = &iu->srp.login_rsp;
+    uint64_t tag = iu->srp.rsp.tag;
+
+    dprintf("VSCSI: Got login, sendin response !\n");
+
+    /* TODO handle case that requested size is wrong and
+     * buffer format is wrong
+     */
+    memset(iu, 0, sizeof(struct srp_login_rsp));
+    rsp->opcode = SRP_LOGIN_RSP;
+    /* Don't advertise quite as many request as we support to
+     * keep room for management stuff etc...
+     */
+    rsp->req_lim_delta = cpu_to_be32(VSCSI_REQ_LIMIT-2);
+    rsp->tag = tag;
+    rsp->max_it_iu_len = cpu_to_be32(sizeof(union srp_iu));
+    rsp->max_ti_iu_len = cpu_to_be32(sizeof(union srp_iu));
+    /* direct and indirect */
+    rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
+
+    vscsi_send_iu(s, req, sizeof(*rsp), VIOSRP_SRP_FORMAT);
+}
+
+static void vscsi_inquiry_no_target(VSCSIState *s, vscsi_req *req)
+{
+    uint8_t *cdb = req->iu.srp.cmd.cdb;
+    uint8_t resp_data[36];
+    int rc, len, alen;
+
+    /* We dont do EVPD. Also check that page_code is 0 */
+    if ((cdb[1] & 0x01) || (cdb[1] & 0x01) || cdb[2] != 0) {
+        /* Send INVALID FIELD IN CDB */
+        vscsi_makeup_sense(s, req, ILLEGAL_REQUEST, 0x24, 0);
+        vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0);
+        return;
+    }
+    alen = cdb[3];
+    alen = (alen << 8) | cdb[4];
+    len = min(alen, 36);
+
+    /* Fake up inquiry using PQ=3 */
+    memset(resp_data, 0, 36);
+    resp_data[0] = 0x7f;   /* Not capable of supporting a device here */
+    resp_data[2] = 0x06;   /* SPS-4 */
+    resp_data[3] = 0x02;   /* Resp data format */
+    resp_data[4] = 36 - 5; /* Additional length */ 
+    resp_data[7] = 0x10;   /* Sync transfers */
+    memcpy(&resp_data[16], "QEMU EMPTY      ", 16);
+    memcpy(&resp_data[8], "QEMU    ", 8);
+
+    req->writing = 0;
+    vscsi_preprocess_desc(req);
+    rc = vscsi_srp_transfer_data(s, req, 0, resp_data, len);
+    if (rc < 0) {
+        vscsi_makeup_sense(s, req, HARDWARE_ERROR, 0, 0);
+        vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0);
+    } else {
+        vscsi_send_rsp(s, req, 0, 36 - rc, 0);
+    }
+}
+
+static int vscsi_queue_cmd(VSCSIState *s, vscsi_req *req)
+{
+    union srp_iu *srp = &req->iu.srp;
+    SCSIDevice *sdev;
+    int n, id, lun;
+
+    vscsi_decode_id_lun(be64_to_cpu(srp->cmd.lun), &id, &lun);
+
+    /* Qemu vs. linux issue with LUNs to be sorted out ... */
+    sdev = (id < 8 && lun < 16) ? s->bus.devs[id] : NULL;
+    if (!sdev) {
+        dprintf("VSCSI: Command for id %d with no drive\n", id);
+        if (srp->cmd.cdb[0] == INQUIRY) {
+            vscsi_inquiry_no_target(s, req);
+        } else {
+            vscsi_makeup_sense(s, req, ILLEGAL_REQUEST, 0x24, 0x00);
+            vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0);
+        } return 1;
+    }
+
+    req->sdev = sdev;
+    req->lun = lun;
+    n = sdev->info->send_command(sdev, req->qtag, srp->cmd.cdb, lun);
+    
+    dprintf("VSCSI: Queued command tag 0x%x CMD 0x%x ID %d LUN %d ret: %d\n",
+            req->qtag, srp->cmd.cdb[0], id, lun, n);
+
+    if (n) {
+        /* Transfer direction must be set before preprocessing the
+         * descriptors
+         */
+        req->writing = (n < 1);
+
+        /* Preprocess RDMA descriptors */
+        vscsi_preprocess_desc(req);
+    }
+
+    /* Get transfer direction and initiate transfer */
+    if (n > 0) {
+        req->data_len = n;
+        sdev->info->read_data(sdev, req->qtag);
+    } else if (n < 0) {
+        req->data_len = -n;
+        sdev->info->write_data(sdev, req->qtag);
+    }
+    /* Don't touch req here, it may have been recycled already */
+
+    return 0;
+}
+
+static int vscsi_process_tsk_mgmt(VSCSIState *s, vscsi_req *req)
+{
+    union viosrp_iu *iu = &req->iu;
+    int fn;
+
+    fprintf(stderr, "vscsi_process_tsk_mgmt %02x\n",
+            iu->srp.tsk_mgmt.tsk_mgmt_func);
+
+    switch (iu->srp.tsk_mgmt.tsk_mgmt_func) {
+#if 0 /* We really don't deal with these for now */
+    case SRP_TSK_ABORT_TASK:
+        fn = ABORT_TASK;
+        break;
+    case SRP_TSK_ABORT_TASK_SET:
+        fn = ABORT_TASK_SET;
+        break;
+    case SRP_TSK_CLEAR_TASK_SET:
+        fn = CLEAR_TASK_SET;
+        break;
+    case SRP_TSK_LUN_RESET:
+        fn = LOGICAL_UNIT_RESET;
+        break;
+    case SRP_TSK_CLEAR_ACA:
+        fn = CLEAR_ACA;
+        break;
+#endif
+    default:
+        fn = 0;
+    }
+    if (fn) {
+        /* XXX Send/Handle target task management */
+        ;
+    } else {
+        vscsi_makeup_sense(s, req, ILLEGAL_REQUEST, 0x20, 0);
+        vscsi_send_rsp(s, req, CHECK_CONDITION, 0, 0);
+    }
+    return !fn;
+}
+
+static int vscsi_handle_srp_req(VSCSIState *s, vscsi_req *req)
+{
+    union srp_iu *srp = &req->iu.srp;
+    int done = 1;
+    uint8_t opcode = srp->rsp.opcode;
+
+    switch (opcode) {
+    case SRP_LOGIN_REQ:
+        vscsi_process_login(s, req);
+        break;
+    case SRP_TSK_MGMT:
+        done = vscsi_process_tsk_mgmt(s, req);
+        break;
+    case SRP_CMD:
+        done = vscsi_queue_cmd(s, req);
+        break;
+    case SRP_LOGIN_RSP:
+    case SRP_I_LOGOUT:
+    case SRP_T_LOGOUT:
+    case SRP_RSP:
+    case SRP_CRED_REQ:
+    case SRP_CRED_RSP:
+    case SRP_AER_REQ:
+    case SRP_AER_RSP:
+        fprintf(stderr, "VSCSI: Unsupported opcode %02x\n", opcode);
+        break;
+    default:
+        fprintf(stderr, "VSCSI: Unknown type %02x\n", opcode);
+    }
+
+    return done;
+}
+
+static int vscsi_send_adapter_info(VSCSIState *s, vscsi_req *req)
+{
+    struct viosrp_adapter_info *sinfo;
+    struct mad_adapter_info_data info;
+    int rc;
+
+    sinfo = &req->iu.mad.adapter_info;
+
+#if 0 /* What for ? */
+    rc = spapr_tce_dma_read(&s->vdev, be64_to_cpu(sinfo->buffer),
+                            &info, be16_to_cpu(sinfo->common.length));
+    if (rc) {
+        fprintf(stderr, "vscsi_send_adapter_info: DMA read failure !\n");
+    }
+#endif
+    memset(&info, 0, sizeof(info));
+    strcpy(info.srp_version, SRP_VERSION);
+    strncpy(info.partition_name, "qemu", sizeof("qemu"));
+    info.partition_number = cpu_to_be32(0);
+    info.mad_version = cpu_to_be32(1);
+    info.os_type = cpu_to_be32(2);
+    info.port_max_txu[0] = cpu_to_be32(VSCSI_MAX_SECTORS << 9);
+
+    rc = spapr_tce_dma_write(&s->vdev, be64_to_cpu(sinfo->buffer),
+                             &info, be16_to_cpu(sinfo->common.length));
+    if (rc)  {
+        fprintf(stderr, "vscsi_send_adapter_info: DMA write failure !\n");
+    }
+
+    sinfo->common.status = rc ? cpu_to_be32(1) : 0;
+
+    return vscsi_send_iu(s, req, sizeof(*sinfo), VIOSRP_MAD_FORMAT);
+}
+
+static int vscsi_handle_mad_req(VSCSIState *s, vscsi_req *req)
+{
+    union mad_iu *mad = &req->iu.mad;
+
+    switch (be32_to_cpu(mad->empty_iu.common.type)) {
+    case VIOSRP_EMPTY_IU_TYPE:
+        fprintf(stderr, "Unsupported EMPTY MAD IU\n");
+        break;
+    case VIOSRP_ERROR_LOG_TYPE:
+        fprintf(stderr, "Unsupported ERROR LOG MAD IU\n");
+        mad->error_log.common.status = cpu_to_be16(1);
+        vscsi_send_iu(s, req, sizeof(mad->error_log), VIOSRP_MAD_FORMAT);
+        break;
+    case VIOSRP_ADAPTER_INFO_TYPE:
+        vscsi_send_adapter_info(s, req);
+        break;
+    case VIOSRP_HOST_CONFIG_TYPE:
+        mad->host_config.common.status = cpu_to_be16(1);
+        vscsi_send_iu(s, req, sizeof(mad->host_config), VIOSRP_MAD_FORMAT);
+        break;
+    default:
+        fprintf(stderr, "VSCSI: Unknown MAD type %02x\n",
+                be32_to_cpu(mad->empty_iu.common.type));
+    }
+
+    return 1;
+}
+
+static void vscsi_got_payload(VSCSIState *s, vscsi_crq *crq)
+{
+    vscsi_req *req;
+    int done;
+
+    req = vscsi_get_req(s);
+    if (req == NULL) {
+        fprintf(stderr, "VSCSI: Failed to get a request !\n");
+        return;
+    }
+
+    /* We only support a limited number of descriptors, we know
+     * the ibmvscsi driver uses up to 10 max, so it should fit
+     * in our 256 bytes IUs. If not we'll have to increase the size
+     * of the structure.
+     */
+    if (crq->s.IU_length > sizeof(union viosrp_iu)) {
+        fprintf(stderr, "VSCSI: SRP IU too long (%d bytes) !\n",
+                crq->s.IU_length);
+        return;
+    }
+
+    /* XXX Handle failure differently ? */
+    if (spapr_tce_dma_read(&s->vdev, crq->s.IU_data_ptr, &req->iu,
+                           crq->s.IU_length)) {
+        fprintf(stderr, "vscsi_got_payload: DMA read failure !\n");
+        qemu_free(req);
+    }
+    memcpy(&req->crq, crq, sizeof(vscsi_crq));
+
+    if (crq->s.format == VIOSRP_MAD_FORMAT) {
+        done = vscsi_handle_mad_req(s, req);
+    } else {
+        done = vscsi_handle_srp_req(s, req);
+    }
+
+    if (done) {
+        vscsi_put_req(s, req);
+    }
+}
+
+
+static int vscsi_do_crq(struct VIOsPAPRDevice *dev, uint8_t *crq_data)
+{
+    VSCSIState *s = DO_UPCAST(VSCSIState, vdev, dev);
+    vscsi_crq crq;
+    
+    memcpy(crq.raw, crq_data, 16);
+    crq.s.timeout = be16_to_cpu(crq.s.timeout);
+    crq.s.IU_length = be16_to_cpu(crq.s.IU_length);
+    crq.s.IU_data_ptr = be64_to_cpu(crq.s.IU_data_ptr);
+
+    dprintf("VSCSI: do_crq %02x %02x ...\n", crq.raw[0], crq.raw[1]);
+
+    switch(crq.s.valid) {
+    case 0xc0: /* Init command/response */
+
+        /* Respond to initialization request */
+        if (crq.s.format == 0x01) {
+            memset(crq.raw, 0, 16);
+            crq.s.valid = 0xc0;
+            crq.s.format = 0x02;
+            spapr_vio_send_crq(dev, crq.raw);
+        }
+
+        /* Note that in hotplug cases, we might get a 0x02
+         * as a result of us emitting the init request
+         */
+
+        break;
+    case 0xff: /* Link event */
+
+        /* Not handled for now */
+
+        break;
+    case 0x80: /* Payloads */
+        switch (crq.s.format) {
+        case VIOSRP_SRP_FORMAT: /* AKA VSCSI request */
+        case VIOSRP_MAD_FORMAT: /* AKA VSCSI response */
+            vscsi_got_payload(s, &crq);
+            break;
+        case VIOSRP_OS400_FORMAT:
+        case VIOSRP_AIX_FORMAT:
+        case VIOSRP_LINUX_FORMAT:
+        case VIOSRP_INLINE_FORMAT:
+            fprintf(stderr, "vscsi_do_srq: Unsupported payload format %02x\n",
+                    crq.s.format);
+            break;
+        default:
+            fprintf(stderr, "vscsi_do_srq: Unknown payload format %02x\n",
+                    crq.s.format);
+        }
+        break;
+    default:
+        fprintf(stderr, "vscsi_do_crq: unknown CRQ %02x %02x ...\n",
+                crq.raw[0], crq.raw[1]);
+    };
+
+    return 0;
+}
+
+static int spapr_vscsi_init(VIOsPAPRDevice *dev)
+{
+    VSCSIState *s = DO_UPCAST(VSCSIState, vdev, dev);
+    int i;
+
+    dbg_vscsi_state = s;
+
+    /* Initialize qemu request tags */
+    memset(s->reqs, 0, sizeof(s->reqs));
+    for (i = 0; i < VSCSI_REQ_LIMIT; i++)
+        s->reqs[i].qtag = i;
+
+    dev->crq.SendFunc = vscsi_do_crq;
+
+    scsi_bus_new(&s->bus, &dev->qdev, 1, VSCSI_REQ_LIMIT,
+                 vscsi_command_complete);
+    if (!dev->qdev.hotplugged) {
+        scsi_bus_legacy_handle_cmdline(&s->bus);
+    }
+
+    return 0;
+}
+
+void spapr_vscsi_create(VIOsPAPRBus *bus, uint32_t reg,
+                        qemu_irq qirq, uint32_t vio_irq_num)
+{
+    DeviceState *dev;
+    VIOsPAPRDevice *sdev;
+
+    dev = qdev_create(&bus->bus, "spapr-vscsi");
+    qdev_prop_set_uint32(dev, "reg", reg);
+
+    qdev_init_nofail(dev);
+
+    sdev = (VIOsPAPRDevice *)dev;
+    sdev->qirq = qirq;
+    sdev->vio_irq_num = vio_irq_num;
+}
+
+static int spapr_vscsi_devnode(VIOsPAPRDevice *dev, void *fdt, int node_off)
+{
+    int ret;
+
+    ret = fdt_setprop_cell(fdt, node_off, "#address-cells", 2);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = fdt_setprop_cell(fdt, node_off, "#size-cells", 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return 0;
+}
+
+static VIOsPAPRDeviceInfo spapr_vscsi = {
+    .init = spapr_vscsi_init,
+    .devnode = spapr_vscsi_devnode,
+    .dt_name = "v-scsi",
+    .dt_type = "vscsi",
+    .dt_compatible = "IBM,v-scsi",
+    .signal_mask = 0x00000001,
+    .qdev.name = "spapr-vscsi",
+    .qdev.size = sizeof(VSCSIState),
+    .qdev.props = (Property[]) {
+        DEFINE_PROP_UINT32("reg", VIOsPAPRDevice, reg, 0x2000),
+        DEFINE_PROP_UINT32("dma-window", VIOsPAPRDevice,
+                           rtce_window_size, 0x10000000),
+        DEFINE_PROP_END_OF_LIST(),
+    },
+};
+
+static void spapr_vscsi_register(void)
+{
+    spapr_vio_bus_register_withprop(&spapr_vscsi);
+}
+device_init(spapr_vscsi_register);
diff --git a/hw/srp.h b/hw/srp.h
new file mode 100644
index 0000000..9d55fc4
--- /dev/null
+++ b/hw/srp.h
@@ -0,0 +1,241 @@ 
+/*
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#ifndef SCSI_SRP_H
+#define SCSI_SRP_H
+
+/*
+ * Structures and constants for the SCSI RDMA Protocol (SRP) as
+ * defined by the INCITS T10 committee.  This file was written using
+ * draft Revision 16a of the SRP standard.
+ */
+
+enum {
+
+    SRP_LOGIN_REQ = 0x00,
+    SRP_TSK_MGMT  = 0x01,
+    SRP_CMD       = 0x02,
+    SRP_I_LOGOUT  = 0x03,
+    SRP_LOGIN_RSP = 0xc0,
+    SRP_RSP       = 0xc1,
+    SRP_LOGIN_REJ = 0xc2,
+    SRP_T_LOGOUT  = 0x80,
+    SRP_CRED_REQ  = 0x81,
+    SRP_AER_REQ   = 0x82,
+    SRP_CRED_RSP  = 0x41,
+    SRP_AER_RSP   = 0x42
+};
+
+enum {
+    SRP_BUF_FORMAT_DIRECT   = 1 << 1,
+    SRP_BUF_FORMAT_INDIRECT = 1 << 2
+};
+
+enum {
+    SRP_NO_DATA_DESC       = 0,
+    SRP_DATA_DESC_DIRECT   = 1,
+    SRP_DATA_DESC_INDIRECT = 2
+};
+
+enum {
+    SRP_TSK_ABORT_TASK     = 0x01,
+    SRP_TSK_ABORT_TASK_SET = 0x02,
+    SRP_TSK_CLEAR_TASK_SET = 0x04,
+    SRP_TSK_LUN_RESET      = 0x08,
+    SRP_TSK_CLEAR_ACA      = 0x40
+};
+
+enum srp_login_rej_reason {
+    SRP_LOGIN_REJ_UNABLE_ESTABLISH_CHANNEL   = 0x00010000,
+    SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES     = 0x00010001,
+    SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE = 0x00010002,
+    SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL   = 0x00010003,
+    SRP_LOGIN_REJ_UNSUPPORTED_DESCRIPTOR_FMT = 0x00010004,
+    SRP_LOGIN_REJ_MULTI_CHANNEL_UNSUPPORTED  = 0x00010005,
+    SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED      = 0x00010006
+};
+
+enum {
+    SRP_REV10_IB_IO_CLASS  = 0xff00,
+    SRP_REV16A_IB_IO_CLASS = 0x0100
+};
+
+struct srp_direct_buf {
+    uint64_t    va;
+    uint32_t    key;
+    uint32_t    len;
+};
+
+/*
+ * We need the packed attribute because the SRP spec puts the list of
+ * descriptors at an offset of 20, which is not aligned to the size of
+ * struct srp_direct_buf.  The whole structure must be packed to avoid
+ * having the 20-byte structure padded to 24 bytes on 64-bit architectures.
+ */
+struct srp_indirect_buf {
+    struct srp_direct_buf    table_desc;
+    uint32_t                 len;
+    struct srp_direct_buf    desc_list[0];
+} __attribute__((packed));
+
+enum {
+    SRP_MULTICHAN_SINGLE = 0,
+    SRP_MULTICHAN_MULTI  = 1
+};
+
+struct srp_login_req {
+    uint8_t    opcode;
+    uint8_t    reserved1[7];
+    uint64_t   tag;
+    uint32_t   req_it_iu_len;
+    uint8_t    reserved2[4];
+    uint16_t   req_buf_fmt;
+    uint8_t    req_flags;
+    uint8_t    reserved3[5];
+    uint8_t    initiator_port_id[16];
+    uint8_t    target_port_id[16];
+};
+
+/*
+ * The SRP spec defines the size of the LOGIN_RSP structure to be 52
+ * bytes, so it needs to be packed to avoid having it padded to 56
+ * bytes on 64-bit architectures.
+ */
+struct srp_login_rsp {
+    uint8_t    opcode;
+    uint8_t    reserved1[3];
+    uint32_t   req_lim_delta;
+    uint64_t   tag;
+    uint32_t   max_it_iu_len;
+    uint32_t   max_ti_iu_len;
+    uint16_t   buf_fmt;
+    uint8_t    rsp_flags;
+    uint8_t    reserved2[25];
+} __attribute__((packed));
+
+struct srp_login_rej {
+    uint8_t    opcode;
+    uint8_t    reserved1[3];
+    uint32_t   reason;
+    uint64_t   tag;
+    uint8_t    reserved2[8];
+    uint16_t   buf_fmt;
+    uint8_t    reserved3[6];
+};
+
+struct srp_i_logout {
+    uint8_t    opcode;
+    uint8_t    reserved[7];
+    uint64_t   tag;
+};
+
+struct srp_t_logout {
+    uint8_t    opcode;
+    uint8_t    sol_not;
+    uint8_t    reserved[2];
+    uint32_t   reason;
+    uint64_t   tag;
+};
+
+/*
+ * We need the packed attribute because the SRP spec only aligns the
+ * 8-byte LUN field to 4 bytes.
+ */
+struct srp_tsk_mgmt {
+    uint8_t    opcode;
+    uint8_t    sol_not;
+    uint8_t    reserved1[6];
+    uint64_t   tag;
+    uint8_t    reserved2[4];
+    uint64_t   lun __attribute__((packed));
+    uint8_t    reserved3[2];
+    uint8_t    tsk_mgmt_func;
+    uint8_t    reserved4;
+    uint64_t   task_tag;
+    uint8_t    reserved5[8];
+};
+
+/*
+ * We need the packed attribute because the SRP spec only aligns the
+ * 8-byte LUN field to 4 bytes.
+ */
+struct srp_cmd {
+    uint8_t    opcode;
+    uint8_t    sol_not;
+    uint8_t    reserved1[3];
+    uint8_t    buf_fmt;
+    uint8_t    data_out_desc_cnt;
+    uint8_t    data_in_desc_cnt;
+    uint64_t   tag;
+    uint8_t    reserved2[4];
+    uint64_t   lun __attribute__((packed));
+    uint8_t    reserved3;
+    uint8_t    task_attr;
+    uint8_t    reserved4;
+    uint8_t    add_cdb_len;
+    uint8_t    cdb[16];
+    uint8_t    add_data[0];
+};
+
+enum {
+    SRP_RSP_FLAG_RSPVALID = 1 << 0,
+    SRP_RSP_FLAG_SNSVALID = 1 << 1,
+    SRP_RSP_FLAG_DOOVER   = 1 << 2,
+    SRP_RSP_FLAG_DOUNDER  = 1 << 3,
+    SRP_RSP_FLAG_DIOVER   = 1 << 4,
+    SRP_RSP_FLAG_DIUNDER  = 1 << 5
+};
+
+/*
+ * The SRP spec defines the size of the RSP structure to be 36 bytes,
+ * so it needs to be packed to avoid having it padded to 40 bytes on
+ * 64-bit architectures.
+ */
+struct srp_rsp {
+    uint8_t    opcode;
+    uint8_t    sol_not;
+    uint8_t    reserved1[2];
+    uint32_t   req_lim_delta;
+    uint64_t   tag;
+    uint8_t    reserved2[2];
+    uint8_t    flags;
+    uint8_t    status;
+    uint32_t   data_out_res_cnt;
+    uint32_t   data_in_res_cnt;
+    uint32_t   sense_data_len;
+    uint32_t   resp_data_len;
+    uint8_t    data[0];
+} __attribute__((packed));
+
+#endif /* SCSI_SRP_H */