diff mbox

[1/2] nvram: Add TPM NVRAM implementation

Message ID 1370369921-14925-2-git-send-email-coreyb@linux.vnet.ibm.com
State New
Headers show

Commit Message

Corey Bryant June 4, 2013, 6:18 p.m. UTC
Provides TPM NVRAM implementation that enables storing of TPM
NVRAM data in a persistent image file.  The block driver is
used to read/write the drive image.  This will enable, for
example, an ecrypted QCOW2 image to be used to store sensitive
keys.

This patch provides APIs that a TPM backend can use to read and
write data.

Signed-off-by: Corey Bryant <coreyb@linux.vnet.ibm.com>
---
 hw/tpm/Makefile.objs |    1 +
 hw/tpm/tpm_nvram.c   |  399 ++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/tpm/tpm_nvram.h   |   25 +++
 3 files changed, 425 insertions(+), 0 deletions(-)
 create mode 100644 hw/tpm/tpm_nvram.c
 create mode 100644 hw/tpm/tpm_nvram.h

Comments

Stefan Hajnoczi June 5, 2013, 9:05 a.m. UTC | #1
On Tue, Jun 04, 2013 at 02:18:40PM -0400, Corey Bryant wrote:
> Provides TPM NVRAM implementation that enables storing of TPM
> NVRAM data in a persistent image file.  The block driver is
> used to read/write the drive image.  This will enable, for
> example, an ecrypted QCOW2 image to be used to store sensitive
> keys.
> 
> This patch provides APIs that a TPM backend can use to read and
> write data.
> 
> Signed-off-by: Corey Bryant <coreyb@linux.vnet.ibm.com>
> ---
>  hw/tpm/Makefile.objs |    1 +
>  hw/tpm/tpm_nvram.c   |  399 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/tpm/tpm_nvram.h   |   25 +++
>  3 files changed, 425 insertions(+), 0 deletions(-)
>  create mode 100644 hw/tpm/tpm_nvram.c
>  create mode 100644 hw/tpm/tpm_nvram.h
> 
> diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs
> index 99f5983..49faef4 100644
> --- a/hw/tpm/Makefile.objs
> +++ b/hw/tpm/Makefile.objs
> @@ -1,2 +1,3 @@
>  common-obj-$(CONFIG_TPM_TIS) += tpm_tis.o
> +common-obj-$(CONFIG_TPM_TIS) += tpm_nvram.o
>  common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o
> diff --git a/hw/tpm/tpm_nvram.c b/hw/tpm/tpm_nvram.c
> new file mode 100644
> index 0000000..95ff396
> --- /dev/null
> +++ b/hw/tpm/tpm_nvram.c
> @@ -0,0 +1,399 @@
> +/*
> + * TPM NVRAM - enables storage of persistent NVRAM data on an image file
> + *
> + * Copyright (C) 2013 IBM Corporation
> + *
> + * Authors:
> + *  Stefan Berger    <stefanb@us.ibm.com>
> + *  Corey Bryant     <coreyb@linux.vnet.ibm.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "tpm_nvram.h"
> +#include "block/block_int.h"
> +#include "qemu/thread.h"
> +#include "sysemu/sysemu.h"
> +
> +/* #define TPM_NVRAM_DEBUG */
> +
> +#ifdef TPM_NVRAM_DEBUG
> +#define DPRINTF(fmt, ...) \
> +    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
> +#else
> +#define DPRINTF(fmt, ...) \
> +    do { } while (0)
> +#endif

I suggest:

#define TPM_NVRAM_DEBUG 0
#define DPRINTF(fmt, ...) \
    do { \
        if (TPM_NVRAM_DEBUG) { \
	    fprintf(stderr, fmt, ## __VA_ARGS__); \
	} \
    } while (0)

This approach prevents bitrot since the compiler always parses the
printf() whether TPM_NVRAM_DEBUG is 0 or 1.  If you #ifdef out the code
completely, like above, then you don't notice compiler warnings/errors
until you actually #define TPM_NVRAM_DEBUG (i.e. prone to bitrot).

> +
> +/* Round a value up to the next SIZE */
> +#define ROUNDUP(VAL, SIZE) \
> +    (((VAL)+(SIZE)-1) & ~((SIZE)-1))

Please drop this macro and use include/qemu/osdep.h:ROUND_UP()

> +
> +/* Get the number of sectors required to contain SIZE bytes */
> +#define NUM_SECTORS(SIZE) \
> +    (ROUNDUP(SIZE, BDRV_SECTOR_SIZE) / BDRV_SECTOR_SIZE)

Please drop this macro and use include/qemu/osdep.h:DIV_ROUND_UP() instead.

> +
> +/* Read/write request data */
> +typedef struct TPMNvramRWRequest {
> +    BlockDriverState *bdrv;
> +    bool is_write;
> +    uint64_t sector_num;
> +    int num_sectors;
> +    uint8_t **blob_r;
> +    uint8_t *blob_w;
> +    uint32_t size;
> +    QEMUIOVector *qiov;
> +    bool done;
> +    int rc;
> +
> +    QemuMutex completion_mutex;
> +    QemuCond completion;
> +
> +    QSIMPLEQ_ENTRY(TPMNvramRWRequest) list;
> +} TPMNvramRWRequest;
> +
> +/* Mutex protected queue of read/write requests */
> +static QemuMutex tpm_nvram_rwrequests_mutex;
> +static QSIMPLEQ_HEAD(, TPMNvramRWRequest) tpm_nvram_rwrequests =
> +    QSIMPLEQ_HEAD_INITIALIZER(tpm_nvram_rwrequests);
> +
> +static QEMUBH *tpm_nvram_bh;
> +
> +/*
> + * Increase the drive size if it's too small to store the blob
> + */
> +static int tpm_nvram_adjust_size(BlockDriverState *bdrv, uint64_t sector_num,
> +                                 int num_sectors)
> +{
> +    int rc = 0;
> +    int64_t drive_size, required_size;
> +
> +    drive_size = bdrv_getlength(bdrv);
> +    if (drive_size < 0) {
> +        DPRINTF("%s: Unable to determine TPM NVRAM drive size\n", __func__);
> +        rc = drive_size;
> +        goto err_exit;
> +    }
> +
> +    required_size = (sector_num + num_sectors) * BDRV_SECTOR_SIZE;
> +
> +    if (drive_size < required_size) {
> +        rc = bdrv_truncate(bdrv, required_size);
> +        if (rc < 0) {
> +            DPRINTF("%s: TPM NVRAM drive too small\n", __func__);
> +        }
> +    }
> +
> +err_exit:
> +    return rc;
> +}
> +
> +/*
> + * Coroutine that reads a blob from the drive asynchronously
> + */
> +static void coroutine_fn tpm_nvram_co_read(void *opaque)
> +{
> +    TPMNvramRWRequest *rwr = opaque;
> +
> +    rwr->rc = bdrv_co_readv(rwr->bdrv,
> +                            rwr->sector_num,
> +                            rwr->num_sectors,
> +                            rwr->qiov);
> +    rwr->done = true;
> +}
> +
> +/*
> + * Coroutine that writes a blob to the drive asynchronously
> + */
> +static void coroutine_fn tpm_nvram_co_write(void *opaque)
> +{
> +    TPMNvramRWRequest *rwr = opaque;
> +
> +    rwr->rc = bdrv_co_writev(rwr->bdrv,
> +                             rwr->sector_num,
> +                             rwr->num_sectors,
> +                             rwr->qiov);
> +    rwr->done = true;
> +}
> +
> +/*
> + * Prepare for and enter a coroutine to read a blob from the drive
> + */
> +static void epm_nvram_do_co_read(TPMNvramRWRequest *rwr)
> +{
> +    Coroutine *co;
> +    size_t buf_len = rwr->num_sectors * BDRV_SECTOR_SIZE;
> +    uint8_t *buf = g_malloc(buf_len);
> +
> +    memset(buf, 0x0, buf_len);
> +
> +    struct iovec iov = {
> +        .iov_base = (void *)buf,
> +        .iov_len = rwr->size,
> +    };
> +
> +    qemu_iovec_init_external(rwr->qiov, &iov, 1);
> +
> +    co = qemu_coroutine_create(tpm_nvram_co_read);
> +    qemu_coroutine_enter(co, rwr);
> +
> +    while (!rwr->done) {
> +        qemu_aio_wait();
> +    }

The qemu_aio_wait() loop makes this function synchronous - it will block
the current thread until the request completes.

You need to use bdrv_aio_readv()/bdrv_aio_writev() or let the
tpm_nvram_co_read()/tpm_nvram_co_write() coroutine perform the
completion code path instead of waiting for it here.

> +
> +    if (rwr->rc == 0) {
> +        rwr->rc = rwr->num_sectors;
> +        *rwr->blob_r = g_malloc(rwr->size);
> +        memcpy(*rwr->blob_r, buf, rwr->size);

Use bdrv_pread()/bdrv_pwrite() for byte-granularity I/O instead of
duplicating the buffering yourself.

> +    } else {
> +        *rwr->blob_r = NULL;
> +    }
> +
> +    g_free(buf);
> +}
> +
> +/*
> + * Prepare for and enter a coroutine to write a blob to the drive
> + */
> +static void tpm_nvram_do_co_write(TPMNvramRWRequest *rwr)
> +{
> +    Coroutine *co;
> +    size_t buf_len = rwr->num_sectors * BDRV_SECTOR_SIZE;
> +    uint8_t *buf = g_malloc(buf_len);
> +
> +    memset(buf, 0x0, buf_len);
> +    memcpy(buf, rwr->blob_w, rwr->size);
> +
> +    struct iovec iov = {
> +        .iov_base = (void *)buf,
> +        .iov_len = rwr->size,
> +    };
> +
> +    qemu_iovec_init_external(rwr->qiov, &iov, 1);
> +
> +    rwr->rc = tpm_nvram_adjust_size(rwr->bdrv, rwr->sector_num,
> +                                    rwr->num_sectors);
> +    if (rwr->rc < 0) {
> +        goto err_exit;
> +    }
> +
> +    co = qemu_coroutine_create(tpm_nvram_co_write);
> +    qemu_coroutine_enter(co, rwr);
> +
> +    while (!rwr->done) {
> +        qemu_aio_wait();
> +    }
> +
> +    if (rwr->rc == 0) {
> +        rwr->rc = rwr->num_sectors;
> +    }
> +
> +err_exit:
> +    g_free(buf);
> +}
> +
> +/*
> + * Initialization for read requests
> + */
> +static TPMNvramRWRequest *tpm_nvram_rwrequest_init_read(BlockDriverState *bdrv,
> +                                                        int64_t sector_num,
> +                                                        uint8_t **blob,
> +                                                        uint32_t size)
> +{
> +    TPMNvramRWRequest *rwr;
> +
> +    rwr = g_new0(TPMNvramRWRequest, 1);
> +    rwr->bdrv = bdrv;
> +    rwr->is_write = false;
> +    rwr->sector_num = sector_num;
> +    rwr->num_sectors = NUM_SECTORS(size);
> +    rwr->blob_r = blob;
> +    rwr->size = size;
> +    rwr->qiov = g_new0(QEMUIOVector, 1);

Why allocate qiov on the heap instead of making it a TPMNvramRWRequest field?

> +    rwr->done = false;
> +
> +    return rwr;
> +}
> +
> +/*
> + * Initialization for write requests
> + */
> +static TPMNvramRWRequest *tpm_nvram_rwrequest_init_write(BlockDriverState *bdrv,
> +                                                         int64_t sector_num,
> +                                                         uint8_t *blob,
> +                                                         uint32_t size)
> +{
> +    TPMNvramRWRequest *rwr;
> +
> +    rwr = g_new0(TPMNvramRWRequest, 1);
> +    rwr->bdrv = bdrv;
> +    rwr->is_write = true;
> +    rwr->sector_num = sector_num;
> +    rwr->num_sectors = NUM_SECTORS(size);
> +    rwr->blob_w = blob;
> +    rwr->size = size;
> +    rwr->qiov = g_new0(QEMUIOVector, 1);
> +    rwr->done = false;
> +
> +    return rwr;
> +}
> +
> +/*
> + * Free read/write request memory
> + */
> +static void tpm_nvram_rwrequest_free(TPMNvramRWRequest *rwr)
> +{
> +    g_free(rwr->qiov);
> +    g_free(rwr);
> +}
> +
> +/*
> + * Execute a read or write of TPM NVRAM blob data
> + */
> +static void tpm_nvram_rwrequest_exec(TPMNvramRWRequest *rwr)
> +{
> +    if (rwr->is_write) {
> +        tpm_nvram_do_co_write(rwr);
> +    } else {
> +        tpm_nvram_do_co_read(rwr);
> +    }
> +
> +    qemu_mutex_lock(&rwr->completion_mutex);
> +    qemu_cond_signal(&rwr->completion);
> +    qemu_mutex_unlock(&rwr->completion_mutex);
> +}
> +
> +/*
> + * Bottom-half callback that is invoked by QEMU's main thread to
> + * process TPM NVRAM read/write requests.
> + */
> +static void tpm_nvram_rwrequest_callback(void *opaque)
> +{
> +    TPMNvramRWRequest *rwr, *next;
> +
> +    qemu_mutex_lock(&tpm_nvram_rwrequests_mutex);
> +
> +    QSIMPLEQ_FOREACH_SAFE(rwr, &tpm_nvram_rwrequests, list, next) {
> +        QSIMPLEQ_REMOVE(&tpm_nvram_rwrequests, rwr, TPMNvramRWRequest, list);
> +
> +        qemu_mutex_unlock(&tpm_nvram_rwrequests_mutex);
> +        tpm_nvram_rwrequest_exec(rwr);
> +        qemu_mutex_lock(&tpm_nvram_rwrequests_mutex);
> +    }
> +
> +    qemu_mutex_unlock(&tpm_nvram_rwrequests_mutex);
> +}
> +
> +/*
> + * Schedule a bottom-half to read or write a blob to the TPM NVRAM drive
> + */
> +static void tpm_nvram_rwrequest_schedule(TPMNvramRWRequest *rwr)
> +{
> +    qemu_mutex_lock(&tpm_nvram_rwrequests_mutex);
> +    QSIMPLEQ_INSERT_TAIL(&tpm_nvram_rwrequests, rwr, list);
> +    qemu_mutex_unlock(&tpm_nvram_rwrequests_mutex);
> +
> +    qemu_bh_schedule(tpm_nvram_bh);
> +
> +    /* Wait for completion of the read/write request */
> +    qemu_mutex_lock(&rwr->completion_mutex);
> +    qemu_cond_wait(&rwr->completion, &rwr->completion_mutex);
> +    qemu_mutex_unlock(&rwr->completion_mutex);

Race condition: what if the request completes before we reach
qemu_cond_wait()?  I suggest initializing rwr->rc to -EINPROGRESS and
doing:

qemu_mutex_lock(&rwr->completion_mutex);
while (rwr->rc == -EINPROGRESS) {
    qemu_cond_wait(&rwr->completion, &rwr->completion_mutex);
}
qemu_mutex_unlock(&rwr->completion_mutex);

> +}
> +
> +/*
> + * Initialize a TPM NVRAM drive
> + */
> +int tpm_nvram_init(BlockDriverState *bdrv)
> +{
> +    qemu_mutex_init(&tpm_nvram_rwrequests_mutex);
> +    tpm_nvram_bh = qemu_bh_new(tpm_nvram_rwrequest_callback, NULL);
> +
> +    if (bdrv_is_read_only(bdrv)) {
> +        DPRINTF("%s: TPM NVRAM drive '%s' is read-only\n", __func__,
> +                bdrv->filename);
> +        return -EPERM;
> +    }
> +
> +    bdrv_lock_medium(bdrv, true);
> +
> +    DPRINTF("%s: TPM NVRAM drive '%s' initialized successfully\n", __func__,
> +            bdrv->filename);
> +
> +    return 0;
> +}
> +
> +/*
> + * Read a TPM NVRAM blob from the drive.  On success, returns the
> + * number of sectors used by this blob.
> + */
> +int tpm_nvram_read(BlockDriverState *bdrv, int64_t sector_num,
> +                   uint8_t **blob, uint32_t size)
> +{
> +    int rc;
> +    TPMNvramRWRequest *rwr;
> +
> +    *blob = NULL;
> +
> +    if (!bdrv) {
> +        return -EPERM;
> +    }
> +
> +    if (sector_num < 0) {
> +        return -EINVAL;
> +    }

Can you let block.c handle these checks to avoid duplication?

> +
> +    rwr = tpm_nvram_rwrequest_init_read(bdrv, sector_num, blob, size);
> +    tpm_nvram_rwrequest_schedule(rwr);
> +    rc = rwr->rc;
> +
> +#ifdef TPM_NVRAM_DEBUG
> +    if (rc < 0) {
> +        DPRINTF("%s: TPM NVRAM read failed\n", __func__);
> +    } else {
> +        DPRINTF("%s: TPM NVRAM read successful: sector_num=%"PRIu64", "
> +                "size=%"PRIu32", num_sectors=%d\n", __func__,
> +                rwr->sector_num, rwr->size, rwr->num_sectors);
> +    }
> +#endif

#ifdef is unnecessary here.  The compiler can drop deadcode.

> +
> +    tpm_nvram_rwrequest_free(rwr);
> +    return rc;
> +}
> +
> +/*
> + * Write a TPM NVRAM blob to the drive.  On success, returns the
> + * number of sectors used by this blob.
> + */
> +int tpm_nvram_write(BlockDriverState *bdrv, int64_t sector_num,
> +                    uint8_t *blob, uint32_t size)
> +{
> +    int rc;
> +    TPMNvramRWRequest *rwr;
> +
> +    if (!bdrv) {
> +        return -EPERM;
> +    }
> +
> +    if (sector_num < 0 || !blob) {
> +        return -EINVAL;
> +    }
> +
> +    rwr = tpm_nvram_rwrequest_init_write(bdrv, sector_num, blob, size);
> +    tpm_nvram_rwrequest_schedule(rwr);
> +    rc = rwr->rc;
> +
> +#ifdef TPM_NVRAM_DEBUG
> +    if (rc < 0) {
> +        DPRINTF("%s: TPM NVRAM write failed\n", __func__);
> +    } else {
> +        DPRINTF("%s: TPM NVRAM write successful: sector_num=%"PRIu64", "
> +                "size=%"PRIu32", num_sectors=%d\n", __func__,
> +                rwr->sector_num, rwr->size, rwr->num_sectors);
> +    }
> +#endif
> +
> +    tpm_nvram_rwrequest_free(rwr);
> +    return rc;
> +}
> diff --git a/hw/tpm/tpm_nvram.h b/hw/tpm/tpm_nvram.h
> new file mode 100644
> index 0000000..fceb4d0
> --- /dev/null
> +++ b/hw/tpm/tpm_nvram.h
> @@ -0,0 +1,25 @@
> +/*
> + * TPM NVRAM - enables storage of persistent NVRAM data on an image file
> + *
> + * Copyright (C) 2013 IBM Corporation
> + *
> + * Authors:
> + *  Stefan Berger    <stefanb@us.ibm.com>
> + *  Corey Bryant     <coreyb@linux.vnet.ibm.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef TPM_TPM_NVRAM_H
> +#define TPM_TPM_NVRAM_H
> +
> +#include "block/block.h"
> +
> +int tpm_nvram_init(BlockDriverState *bdrv);
> +int tpm_nvram_read(BlockDriverState *bdrv, int64_t sector_num,
> +                   uint8_t **blob, uint32_t size);
> +int tpm_nvram_write(BlockDriverState *bdrv, int64_t sector_num,
> +                    uint8_t *blob, uint32_t size);
> +
> +#endif
> -- 
> 1.7.1
>
Corey Bryant June 5, 2013, 1:28 p.m. UTC | #2
Thanks for reviewing!

On 06/05/2013 05:05 AM, Stefan Hajnoczi wrote:
> On Tue, Jun 04, 2013 at 02:18:40PM -0400, Corey Bryant wrote:
>> Provides TPM NVRAM implementation that enables storing of TPM
>> NVRAM data in a persistent image file.  The block driver is
>> used to read/write the drive image.  This will enable, for
>> example, an ecrypted QCOW2 image to be used to store sensitive
>> keys.
>>
>> This patch provides APIs that a TPM backend can use to read and
>> write data.
>>
>> Signed-off-by: Corey Bryant <coreyb@linux.vnet.ibm.com>
>> ---
>>   hw/tpm/Makefile.objs |    1 +
>>   hw/tpm/tpm_nvram.c   |  399 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>   hw/tpm/tpm_nvram.h   |   25 +++
>>   3 files changed, 425 insertions(+), 0 deletions(-)
>>   create mode 100644 hw/tpm/tpm_nvram.c
>>   create mode 100644 hw/tpm/tpm_nvram.h
>>
>> diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs
>> index 99f5983..49faef4 100644
>> --- a/hw/tpm/Makefile.objs
>> +++ b/hw/tpm/Makefile.objs
>> @@ -1,2 +1,3 @@
>>   common-obj-$(CONFIG_TPM_TIS) += tpm_tis.o
>> +common-obj-$(CONFIG_TPM_TIS) += tpm_nvram.o
>>   common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o
>> diff --git a/hw/tpm/tpm_nvram.c b/hw/tpm/tpm_nvram.c
>> new file mode 100644
>> index 0000000..95ff396
>> --- /dev/null
>> +++ b/hw/tpm/tpm_nvram.c
>> @@ -0,0 +1,399 @@
>> +/*
>> + * TPM NVRAM - enables storage of persistent NVRAM data on an image file
>> + *
>> + * Copyright (C) 2013 IBM Corporation
>> + *
>> + * Authors:
>> + *  Stefan Berger    <stefanb@us.ibm.com>
>> + *  Corey Bryant     <coreyb@linux.vnet.ibm.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +
>> +#include "tpm_nvram.h"
>> +#include "block/block_int.h"
>> +#include "qemu/thread.h"
>> +#include "sysemu/sysemu.h"
>> +
>> +/* #define TPM_NVRAM_DEBUG */
>> +
>> +#ifdef TPM_NVRAM_DEBUG
>> +#define DPRINTF(fmt, ...) \
>> +    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
>> +#else
>> +#define DPRINTF(fmt, ...) \
>> +    do { } while (0)
>> +#endif
>
> I suggest:
>
> #define TPM_NVRAM_DEBUG 0
> #define DPRINTF(fmt, ...) \
>      do { \
>          if (TPM_NVRAM_DEBUG) { \
> 	    fprintf(stderr, fmt, ## __VA_ARGS__); \
> 	} \
>      } while (0)
>
> This approach prevents bitrot since the compiler always parses the
> printf() whether TPM_NVRAM_DEBUG is 0 or 1.  If you #ifdef out the code
> completely, like above, then you don't notice compiler warnings/errors
> until you actually #define TPM_NVRAM_DEBUG (i.e. prone to bitrot).
>

Ok

>> +
>> +/* Round a value up to the next SIZE */
>> +#define ROUNDUP(VAL, SIZE) \
>> +    (((VAL)+(SIZE)-1) & ~((SIZE)-1))
>
> Please drop this macro and use include/qemu/osdep.h:ROUND_UP()
>

Ok

>> +
>> +/* Get the number of sectors required to contain SIZE bytes */
>> +#define NUM_SECTORS(SIZE) \
>> +    (ROUNDUP(SIZE, BDRV_SECTOR_SIZE) / BDRV_SECTOR_SIZE)
>
> Please drop this macro and use include/qemu/osdep.h:DIV_ROUND_UP() instead.
>

Ok

>> +
>> +/* Read/write request data */
>> +typedef struct TPMNvramRWRequest {
>> +    BlockDriverState *bdrv;
>> +    bool is_write;
>> +    uint64_t sector_num;
>> +    int num_sectors;
>> +    uint8_t **blob_r;
>> +    uint8_t *blob_w;
>> +    uint32_t size;
>> +    QEMUIOVector *qiov;
>> +    bool done;
>> +    int rc;
>> +
>> +    QemuMutex completion_mutex;
>> +    QemuCond completion;
>> +
>> +    QSIMPLEQ_ENTRY(TPMNvramRWRequest) list;
>> +} TPMNvramRWRequest;
>> +
>> +/* Mutex protected queue of read/write requests */
>> +static QemuMutex tpm_nvram_rwrequests_mutex;
>> +static QSIMPLEQ_HEAD(, TPMNvramRWRequest) tpm_nvram_rwrequests =
>> +    QSIMPLEQ_HEAD_INITIALIZER(tpm_nvram_rwrequests);
>> +
>> +static QEMUBH *tpm_nvram_bh;
>> +
>> +/*
>> + * Increase the drive size if it's too small to store the blob
>> + */
>> +static int tpm_nvram_adjust_size(BlockDriverState *bdrv, uint64_t sector_num,
>> +                                 int num_sectors)
>> +{
>> +    int rc = 0;
>> +    int64_t drive_size, required_size;
>> +
>> +    drive_size = bdrv_getlength(bdrv);
>> +    if (drive_size < 0) {
>> +        DPRINTF("%s: Unable to determine TPM NVRAM drive size\n", __func__);
>> +        rc = drive_size;
>> +        goto err_exit;
>> +    }
>> +
>> +    required_size = (sector_num + num_sectors) * BDRV_SECTOR_SIZE;
>> +
>> +    if (drive_size < required_size) {
>> +        rc = bdrv_truncate(bdrv, required_size);
>> +        if (rc < 0) {
>> +            DPRINTF("%s: TPM NVRAM drive too small\n", __func__);
>> +        }
>> +    }
>> +
>> +err_exit:
>> +    return rc;
>> +}
>> +
>> +/*
>> + * Coroutine that reads a blob from the drive asynchronously
>> + */
>> +static void coroutine_fn tpm_nvram_co_read(void *opaque)
>> +{
>> +    TPMNvramRWRequest *rwr = opaque;
>> +
>> +    rwr->rc = bdrv_co_readv(rwr->bdrv,
>> +                            rwr->sector_num,
>> +                            rwr->num_sectors,
>> +                            rwr->qiov);
>> +    rwr->done = true;
>> +}
>> +
>> +/*
>> + * Coroutine that writes a blob to the drive asynchronously
>> + */
>> +static void coroutine_fn tpm_nvram_co_write(void *opaque)
>> +{
>> +    TPMNvramRWRequest *rwr = opaque;
>> +
>> +    rwr->rc = bdrv_co_writev(rwr->bdrv,
>> +                             rwr->sector_num,
>> +                             rwr->num_sectors,
>> +                             rwr->qiov);
>> +    rwr->done = true;
>> +}
>> +
>> +/*
>> + * Prepare for and enter a coroutine to read a blob from the drive
>> + */
>> +static void epm_nvram_do_co_read(TPMNvramRWRequest *rwr)
>> +{
>> +    Coroutine *co;
>> +    size_t buf_len = rwr->num_sectors * BDRV_SECTOR_SIZE;
>> +    uint8_t *buf = g_malloc(buf_len);
>> +
>> +    memset(buf, 0x0, buf_len);
>> +
>> +    struct iovec iov = {
>> +        .iov_base = (void *)buf,
>> +        .iov_len = rwr->size,
>> +    };
>> +
>> +    qemu_iovec_init_external(rwr->qiov, &iov, 1);
>> +
>> +    co = qemu_coroutine_create(tpm_nvram_co_read);
>> +    qemu_coroutine_enter(co, rwr);
>> +
>> +    while (!rwr->done) {
>> +        qemu_aio_wait();
>> +    }
>
> The qemu_aio_wait() loop makes this function synchronous - it will block
> the current thread until the request completes.
>
> You need to use bdrv_aio_readv()/bdrv_aio_writev() or let the
> tpm_nvram_co_read()/tpm_nvram_co_write() coroutine perform the
> completion code path instead of waiting for it here.
>

Ok, I think I can just have the coroutine perform the completion path.

>> +
>> +    if (rwr->rc == 0) {
>> +        rwr->rc = rwr->num_sectors;
>> +        *rwr->blob_r = g_malloc(rwr->size);
>> +        memcpy(*rwr->blob_r, buf, rwr->size);
>
> Use bdrv_pread()/bdrv_pwrite() for byte-granularity I/O instead of
> duplicating the buffering yourself.
>

Aren't bdrv_pread()/bdrv_pwrite() synchronous?  Wouldn't using them 
block the main QEMU thread?  That is why I switched to using the 
coroutine versions.

>> +    } else {
>> +        *rwr->blob_r = NULL;
>> +    }
>> +
>> +    g_free(buf);
>> +}
>> +
>> +/*
>> + * Prepare for and enter a coroutine to write a blob to the drive
>> + */
>> +static void tpm_nvram_do_co_write(TPMNvramRWRequest *rwr)
>> +{
>> +    Coroutine *co;
>> +    size_t buf_len = rwr->num_sectors * BDRV_SECTOR_SIZE;
>> +    uint8_t *buf = g_malloc(buf_len);
>> +
>> +    memset(buf, 0x0, buf_len);
>> +    memcpy(buf, rwr->blob_w, rwr->size);
>> +
>> +    struct iovec iov = {
>> +        .iov_base = (void *)buf,
>> +        .iov_len = rwr->size,
>> +    };
>> +
>> +    qemu_iovec_init_external(rwr->qiov, &iov, 1);
>> +
>> +    rwr->rc = tpm_nvram_adjust_size(rwr->bdrv, rwr->sector_num,
>> +                                    rwr->num_sectors);
>> +    if (rwr->rc < 0) {
>> +        goto err_exit;
>> +    }
>> +
>> +    co = qemu_coroutine_create(tpm_nvram_co_write);
>> +    qemu_coroutine_enter(co, rwr);
>> +
>> +    while (!rwr->done) {
>> +        qemu_aio_wait();
>> +    }
>> +
>> +    if (rwr->rc == 0) {
>> +        rwr->rc = rwr->num_sectors;
>> +    }
>> +
>> +err_exit:
>> +    g_free(buf);
>> +}
>> +
>> +/*
>> + * Initialization for read requests
>> + */
>> +static TPMNvramRWRequest *tpm_nvram_rwrequest_init_read(BlockDriverState *bdrv,
>> +                                                        int64_t sector_num,
>> +                                                        uint8_t **blob,
>> +                                                        uint32_t size)
>> +{
>> +    TPMNvramRWRequest *rwr;
>> +
>> +    rwr = g_new0(TPMNvramRWRequest, 1);
>> +    rwr->bdrv = bdrv;
>> +    rwr->is_write = false;
>> +    rwr->sector_num = sector_num;
>> +    rwr->num_sectors = NUM_SECTORS(size);
>> +    rwr->blob_r = blob;
>> +    rwr->size = size;
>> +    rwr->qiov = g_new0(QEMUIOVector, 1);
>
> Why allocate qiov on the heap instead of making it a TPMNvramRWRequest field?
>

No reason, I'll change that.

>> +    rwr->done = false;
>> +
>> +    return rwr;
>> +}
>> +
>> +/*
>> + * Initialization for write requests
>> + */
>> +static TPMNvramRWRequest *tpm_nvram_rwrequest_init_write(BlockDriverState *bdrv,
>> +                                                         int64_t sector_num,
>> +                                                         uint8_t *blob,
>> +                                                         uint32_t size)
>> +{
>> +    TPMNvramRWRequest *rwr;
>> +
>> +    rwr = g_new0(TPMNvramRWRequest, 1);
>> +    rwr->bdrv = bdrv;
>> +    rwr->is_write = true;
>> +    rwr->sector_num = sector_num;
>> +    rwr->num_sectors = NUM_SECTORS(size);
>> +    rwr->blob_w = blob;
>> +    rwr->size = size;
>> +    rwr->qiov = g_new0(QEMUIOVector, 1);
>> +    rwr->done = false;
>> +
>> +    return rwr;
>> +}
>> +
>> +/*
>> + * Free read/write request memory
>> + */
>> +static void tpm_nvram_rwrequest_free(TPMNvramRWRequest *rwr)
>> +{
>> +    g_free(rwr->qiov);
>> +    g_free(rwr);
>> +}
>> +
>> +/*
>> + * Execute a read or write of TPM NVRAM blob data
>> + */
>> +static void tpm_nvram_rwrequest_exec(TPMNvramRWRequest *rwr)
>> +{
>> +    if (rwr->is_write) {
>> +        tpm_nvram_do_co_write(rwr);
>> +    } else {
>> +        tpm_nvram_do_co_read(rwr);
>> +    }
>> +
>> +    qemu_mutex_lock(&rwr->completion_mutex);
>> +    qemu_cond_signal(&rwr->completion);
>> +    qemu_mutex_unlock(&rwr->completion_mutex);
>> +}
>> +
>> +/*
>> + * Bottom-half callback that is invoked by QEMU's main thread to
>> + * process TPM NVRAM read/write requests.
>> + */
>> +static void tpm_nvram_rwrequest_callback(void *opaque)
>> +{
>> +    TPMNvramRWRequest *rwr, *next;
>> +
>> +    qemu_mutex_lock(&tpm_nvram_rwrequests_mutex);
>> +
>> +    QSIMPLEQ_FOREACH_SAFE(rwr, &tpm_nvram_rwrequests, list, next) {
>> +        QSIMPLEQ_REMOVE(&tpm_nvram_rwrequests, rwr, TPMNvramRWRequest, list);
>> +
>> +        qemu_mutex_unlock(&tpm_nvram_rwrequests_mutex);
>> +        tpm_nvram_rwrequest_exec(rwr);
>> +        qemu_mutex_lock(&tpm_nvram_rwrequests_mutex);
>> +    }
>> +
>> +    qemu_mutex_unlock(&tpm_nvram_rwrequests_mutex);
>> +}
>> +
>> +/*
>> + * Schedule a bottom-half to read or write a blob to the TPM NVRAM drive
>> + */
>> +static void tpm_nvram_rwrequest_schedule(TPMNvramRWRequest *rwr)
>> +{
>> +    qemu_mutex_lock(&tpm_nvram_rwrequests_mutex);
>> +    QSIMPLEQ_INSERT_TAIL(&tpm_nvram_rwrequests, rwr, list);
>> +    qemu_mutex_unlock(&tpm_nvram_rwrequests_mutex);
>> +
>> +    qemu_bh_schedule(tpm_nvram_bh);
>> +
>> +    /* Wait for completion of the read/write request */
>> +    qemu_mutex_lock(&rwr->completion_mutex);
>> +    qemu_cond_wait(&rwr->completion, &rwr->completion_mutex);
>> +    qemu_mutex_unlock(&rwr->completion_mutex);
>
> Race condition: what if the request completes before we reach
> qemu_cond_wait()?  I suggest initializing rwr->rc to -EINPROGRESS and
> doing:
>
> qemu_mutex_lock(&rwr->completion_mutex);
> while (rwr->rc == -EINPROGRESS) {
>      qemu_cond_wait(&rwr->completion, &rwr->completion_mutex);
> }
> qemu_mutex_unlock(&rwr->completion_mutex);
>

Good catch.  Thanks.

>> +}
>> +
>> +/*
>> + * Initialize a TPM NVRAM drive
>> + */
>> +int tpm_nvram_init(BlockDriverState *bdrv)
>> +{
>> +    qemu_mutex_init(&tpm_nvram_rwrequests_mutex);
>> +    tpm_nvram_bh = qemu_bh_new(tpm_nvram_rwrequest_callback, NULL);
>> +
>> +    if (bdrv_is_read_only(bdrv)) {
>> +        DPRINTF("%s: TPM NVRAM drive '%s' is read-only\n", __func__,
>> +                bdrv->filename);
>> +        return -EPERM;
>> +    }
>> +
>> +    bdrv_lock_medium(bdrv, true);
>> +
>> +    DPRINTF("%s: TPM NVRAM drive '%s' initialized successfully\n", __func__,
>> +            bdrv->filename);
>> +
>> +    return 0;
>> +}
>> +
>> +/*
>> + * Read a TPM NVRAM blob from the drive.  On success, returns the
>> + * number of sectors used by this blob.
>> + */
>> +int tpm_nvram_read(BlockDriverState *bdrv, int64_t sector_num,
>> +                   uint8_t **blob, uint32_t size)
>> +{
>> +    int rc;
>> +    TPMNvramRWRequest *rwr;
>> +
>> +    *blob = NULL;
>> +
>> +    if (!bdrv) {
>> +        return -EPERM;
>> +    }
>> +
>> +    if (sector_num < 0) {
>> +        return -EINVAL;
>> +    }
>
> Can you let block.c handle these checks to avoid duplication?
>

Most likely I can remove these.

>> +
>> +    rwr = tpm_nvram_rwrequest_init_read(bdrv, sector_num, blob, size);
>> +    tpm_nvram_rwrequest_schedule(rwr);
>> +    rc = rwr->rc;
>> +
>> +#ifdef TPM_NVRAM_DEBUG
>> +    if (rc < 0) {
>> +        DPRINTF("%s: TPM NVRAM read failed\n", __func__);
>> +    } else {
>> +        DPRINTF("%s: TPM NVRAM read successful: sector_num=%"PRIu64", "
>> +                "size=%"PRIu32", num_sectors=%d\n", __func__,
>> +                rwr->sector_num, rwr->size, rwr->num_sectors);
>> +    }
>> +#endif
>
> #ifdef is unnecessary here.  The compiler can drop deadcode.
>

Ok

>> +
>> +    tpm_nvram_rwrequest_free(rwr);
>> +    return rc;
>> +}
>> +
>> +/*
>> + * Write a TPM NVRAM blob to the drive.  On success, returns the
>> + * number of sectors used by this blob.
>> + */
>> +int tpm_nvram_write(BlockDriverState *bdrv, int64_t sector_num,
>> +                    uint8_t *blob, uint32_t size)
>> +{
>> +    int rc;
>> +    TPMNvramRWRequest *rwr;
>> +
>> +    if (!bdrv) {
>> +        return -EPERM;
>> +    }
>> +
>> +    if (sector_num < 0 || !blob) {
>> +        return -EINVAL;
>> +    }
>> +
>> +    rwr = tpm_nvram_rwrequest_init_write(bdrv, sector_num, blob, size);
>> +    tpm_nvram_rwrequest_schedule(rwr);
>> +    rc = rwr->rc;
>> +
>> +#ifdef TPM_NVRAM_DEBUG
>> +    if (rc < 0) {
>> +        DPRINTF("%s: TPM NVRAM write failed\n", __func__);
>> +    } else {
>> +        DPRINTF("%s: TPM NVRAM write successful: sector_num=%"PRIu64", "
>> +                "size=%"PRIu32", num_sectors=%d\n", __func__,
>> +                rwr->sector_num, rwr->size, rwr->num_sectors);
>> +    }
>> +#endif
>> +
>> +    tpm_nvram_rwrequest_free(rwr);
>> +    return rc;
>> +}
>> diff --git a/hw/tpm/tpm_nvram.h b/hw/tpm/tpm_nvram.h
>> new file mode 100644
>> index 0000000..fceb4d0
>> --- /dev/null
>> +++ b/hw/tpm/tpm_nvram.h
>> @@ -0,0 +1,25 @@
>> +/*
>> + * TPM NVRAM - enables storage of persistent NVRAM data on an image file
>> + *
>> + * Copyright (C) 2013 IBM Corporation
>> + *
>> + * Authors:
>> + *  Stefan Berger    <stefanb@us.ibm.com>
>> + *  Corey Bryant     <coreyb@linux.vnet.ibm.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +
>> +#ifndef TPM_TPM_NVRAM_H
>> +#define TPM_TPM_NVRAM_H
>> +
>> +#include "block/block.h"
>> +
>> +int tpm_nvram_init(BlockDriverState *bdrv);
>> +int tpm_nvram_read(BlockDriverState *bdrv, int64_t sector_num,
>> +                   uint8_t **blob, uint32_t size);
>> +int tpm_nvram_write(BlockDriverState *bdrv, int64_t sector_num,
>> +                    uint8_t *blob, uint32_t size);
>> +
>> +#endif
>> --
>> 1.7.1
>>
>
>
>
Kevin Wolf June 5, 2013, 1:42 p.m. UTC | #3
Am 05.06.2013 um 15:28 hat Corey Bryant geschrieben:
> >>+
> >>+    if (rwr->rc == 0) {
> >>+        rwr->rc = rwr->num_sectors;
> >>+        *rwr->blob_r = g_malloc(rwr->size);
> >>+        memcpy(*rwr->blob_r, buf, rwr->size);
> >
> >Use bdrv_pread()/bdrv_pwrite() for byte-granularity I/O instead of
> >duplicating the buffering yourself.
> >
> 
> Aren't bdrv_pread()/bdrv_pwrite() synchronous?  Wouldn't using them
> block the main QEMU thread?  That is why I switched to using the
> coroutine versions.

You need to call them from coroutine context to avoid that they invoke
their on coroutine on which they wait in this this while (!done)
{ qemu_aio_wait(); } loop that blocks everything. Called from coroutine
context, they do the Right Thing, though.

Kevin
Corey Bryant June 5, 2013, 1:57 p.m. UTC | #4
On 06/05/2013 09:42 AM, Kevin Wolf wrote:
> Am 05.06.2013 um 15:28 hat Corey Bryant geschrieben:
>>>> +
>>>> +    if (rwr->rc == 0) {
>>>> +        rwr->rc = rwr->num_sectors;
>>>> +        *rwr->blob_r = g_malloc(rwr->size);
>>>> +        memcpy(*rwr->blob_r, buf, rwr->size);
>>>
>>> Use bdrv_pread()/bdrv_pwrite() for byte-granularity I/O instead of
>>> duplicating the buffering yourself.
>>>
>>
>> Aren't bdrv_pread()/bdrv_pwrite() synchronous?  Wouldn't using them
>> block the main QEMU thread?  That is why I switched to using the
>> coroutine versions.
>
> You need to call them from coroutine context to avoid that they invoke
> their on coroutine on which they wait in this this while (!done)
> { qemu_aio_wait(); } loop that blocks everything. Called from coroutine
> context, they do the Right Thing, though.
>
> Kevin
>
>
>

Ah, thanks for explaining.  Now I can work in bytes rather than sectors.  :)
diff mbox

Patch

diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs
index 99f5983..49faef4 100644
--- a/hw/tpm/Makefile.objs
+++ b/hw/tpm/Makefile.objs
@@ -1,2 +1,3 @@ 
 common-obj-$(CONFIG_TPM_TIS) += tpm_tis.o
+common-obj-$(CONFIG_TPM_TIS) += tpm_nvram.o
 common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o
diff --git a/hw/tpm/tpm_nvram.c b/hw/tpm/tpm_nvram.c
new file mode 100644
index 0000000..95ff396
--- /dev/null
+++ b/hw/tpm/tpm_nvram.c
@@ -0,0 +1,399 @@ 
+/*
+ * TPM NVRAM - enables storage of persistent NVRAM data on an image file
+ *
+ * Copyright (C) 2013 IBM Corporation
+ *
+ * Authors:
+ *  Stefan Berger    <stefanb@us.ibm.com>
+ *  Corey Bryant     <coreyb@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "tpm_nvram.h"
+#include "block/block_int.h"
+#include "qemu/thread.h"
+#include "sysemu/sysemu.h"
+
+/* #define TPM_NVRAM_DEBUG */
+
+#ifdef TPM_NVRAM_DEBUG
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+/* Round a value up to the next SIZE */
+#define ROUNDUP(VAL, SIZE) \
+    (((VAL)+(SIZE)-1) & ~((SIZE)-1))
+
+/* Get the number of sectors required to contain SIZE bytes */
+#define NUM_SECTORS(SIZE) \
+    (ROUNDUP(SIZE, BDRV_SECTOR_SIZE) / BDRV_SECTOR_SIZE)
+
+/* Read/write request data */
+typedef struct TPMNvramRWRequest {
+    BlockDriverState *bdrv;
+    bool is_write;
+    uint64_t sector_num;
+    int num_sectors;
+    uint8_t **blob_r;
+    uint8_t *blob_w;
+    uint32_t size;
+    QEMUIOVector *qiov;
+    bool done;
+    int rc;
+
+    QemuMutex completion_mutex;
+    QemuCond completion;
+
+    QSIMPLEQ_ENTRY(TPMNvramRWRequest) list;
+} TPMNvramRWRequest;
+
+/* Mutex protected queue of read/write requests */
+static QemuMutex tpm_nvram_rwrequests_mutex;
+static QSIMPLEQ_HEAD(, TPMNvramRWRequest) tpm_nvram_rwrequests =
+    QSIMPLEQ_HEAD_INITIALIZER(tpm_nvram_rwrequests);
+
+static QEMUBH *tpm_nvram_bh;
+
+/*
+ * Increase the drive size if it's too small to store the blob
+ */
+static int tpm_nvram_adjust_size(BlockDriverState *bdrv, uint64_t sector_num,
+                                 int num_sectors)
+{
+    int rc = 0;
+    int64_t drive_size, required_size;
+
+    drive_size = bdrv_getlength(bdrv);
+    if (drive_size < 0) {
+        DPRINTF("%s: Unable to determine TPM NVRAM drive size\n", __func__);
+        rc = drive_size;
+        goto err_exit;
+    }
+
+    required_size = (sector_num + num_sectors) * BDRV_SECTOR_SIZE;
+
+    if (drive_size < required_size) {
+        rc = bdrv_truncate(bdrv, required_size);
+        if (rc < 0) {
+            DPRINTF("%s: TPM NVRAM drive too small\n", __func__);
+        }
+    }
+
+err_exit:
+    return rc;
+}
+
+/*
+ * Coroutine that reads a blob from the drive asynchronously
+ */
+static void coroutine_fn tpm_nvram_co_read(void *opaque)
+{
+    TPMNvramRWRequest *rwr = opaque;
+
+    rwr->rc = bdrv_co_readv(rwr->bdrv,
+                            rwr->sector_num,
+                            rwr->num_sectors,
+                            rwr->qiov);
+    rwr->done = true;
+}
+
+/*
+ * Coroutine that writes a blob to the drive asynchronously
+ */
+static void coroutine_fn tpm_nvram_co_write(void *opaque)
+{
+    TPMNvramRWRequest *rwr = opaque;
+
+    rwr->rc = bdrv_co_writev(rwr->bdrv,
+                             rwr->sector_num,
+                             rwr->num_sectors,
+                             rwr->qiov);
+    rwr->done = true;
+}
+
+/*
+ * Prepare for and enter a coroutine to read a blob from the drive
+ */
+static void tpm_nvram_do_co_read(TPMNvramRWRequest *rwr)
+{
+    Coroutine *co;
+    size_t buf_len = rwr->num_sectors * BDRV_SECTOR_SIZE;
+    uint8_t *buf = g_malloc(buf_len);
+
+    memset(buf, 0x0, buf_len);
+
+    struct iovec iov = {
+        .iov_base = (void *)buf,
+        .iov_len = rwr->size,
+    };
+
+    qemu_iovec_init_external(rwr->qiov, &iov, 1);
+
+    co = qemu_coroutine_create(tpm_nvram_co_read);
+    qemu_coroutine_enter(co, rwr);
+
+    while (!rwr->done) {
+        qemu_aio_wait();
+    }
+
+    if (rwr->rc == 0) {
+        rwr->rc = rwr->num_sectors;
+        *rwr->blob_r = g_malloc(rwr->size);
+        memcpy(*rwr->blob_r, buf, rwr->size);
+    } else {
+        *rwr->blob_r = NULL;
+    }
+
+    g_free(buf);
+}
+
+/*
+ * Prepare for and enter a coroutine to write a blob to the drive
+ */
+static void tpm_nvram_do_co_write(TPMNvramRWRequest *rwr)
+{
+    Coroutine *co;
+    size_t buf_len = rwr->num_sectors * BDRV_SECTOR_SIZE;
+    uint8_t *buf = g_malloc(buf_len);
+
+    memset(buf, 0x0, buf_len);
+    memcpy(buf, rwr->blob_w, rwr->size);
+
+    struct iovec iov = {
+        .iov_base = (void *)buf,
+        .iov_len = rwr->size,
+    };
+
+    qemu_iovec_init_external(rwr->qiov, &iov, 1);
+
+    rwr->rc = tpm_nvram_adjust_size(rwr->bdrv, rwr->sector_num,
+                                    rwr->num_sectors);
+    if (rwr->rc < 0) {
+        goto err_exit;
+    }
+
+    co = qemu_coroutine_create(tpm_nvram_co_write);
+    qemu_coroutine_enter(co, rwr);
+
+    while (!rwr->done) {
+        qemu_aio_wait();
+    }
+
+    if (rwr->rc == 0) {
+        rwr->rc = rwr->num_sectors;
+    }
+
+err_exit:
+    g_free(buf);
+}
+
+/*
+ * Initialization for read requests
+ */
+static TPMNvramRWRequest *tpm_nvram_rwrequest_init_read(BlockDriverState *bdrv,
+                                                        int64_t sector_num,
+                                                        uint8_t **blob,
+                                                        uint32_t size)
+{
+    TPMNvramRWRequest *rwr;
+
+    rwr = g_new0(TPMNvramRWRequest, 1);
+    rwr->bdrv = bdrv;
+    rwr->is_write = false;
+    rwr->sector_num = sector_num;
+    rwr->num_sectors = NUM_SECTORS(size);
+    rwr->blob_r = blob;
+    rwr->size = size;
+    rwr->qiov = g_new0(QEMUIOVector, 1);
+    rwr->done = false;
+
+    return rwr;
+}
+
+/*
+ * Initialization for write requests
+ */
+static TPMNvramRWRequest *tpm_nvram_rwrequest_init_write(BlockDriverState *bdrv,
+                                                         int64_t sector_num,
+                                                         uint8_t *blob,
+                                                         uint32_t size)
+{
+    TPMNvramRWRequest *rwr;
+
+    rwr = g_new0(TPMNvramRWRequest, 1);
+    rwr->bdrv = bdrv;
+    rwr->is_write = true;
+    rwr->sector_num = sector_num;
+    rwr->num_sectors = NUM_SECTORS(size);
+    rwr->blob_w = blob;
+    rwr->size = size;
+    rwr->qiov = g_new0(QEMUIOVector, 1);
+    rwr->done = false;
+
+    return rwr;
+}
+
+/*
+ * Free read/write request memory
+ */
+static void tpm_nvram_rwrequest_free(TPMNvramRWRequest *rwr)
+{
+    g_free(rwr->qiov);
+    g_free(rwr);
+}
+
+/*
+ * Execute a read or write of TPM NVRAM blob data
+ */
+static void tpm_nvram_rwrequest_exec(TPMNvramRWRequest *rwr)
+{
+    if (rwr->is_write) {
+        tpm_nvram_do_co_write(rwr);
+    } else {
+        tpm_nvram_do_co_read(rwr);
+    }
+
+    qemu_mutex_lock(&rwr->completion_mutex);
+    qemu_cond_signal(&rwr->completion);
+    qemu_mutex_unlock(&rwr->completion_mutex);
+}
+
+/*
+ * Bottom-half callback that is invoked by QEMU's main thread to
+ * process TPM NVRAM read/write requests.
+ */
+static void tpm_nvram_rwrequest_callback(void *opaque)
+{
+    TPMNvramRWRequest *rwr, *next;
+
+    qemu_mutex_lock(&tpm_nvram_rwrequests_mutex);
+
+    QSIMPLEQ_FOREACH_SAFE(rwr, &tpm_nvram_rwrequests, list, next) {
+        QSIMPLEQ_REMOVE(&tpm_nvram_rwrequests, rwr, TPMNvramRWRequest, list);
+
+        qemu_mutex_unlock(&tpm_nvram_rwrequests_mutex);
+        tpm_nvram_rwrequest_exec(rwr);
+        qemu_mutex_lock(&tpm_nvram_rwrequests_mutex);
+    }
+
+    qemu_mutex_unlock(&tpm_nvram_rwrequests_mutex);
+}
+
+/*
+ * Schedule a bottom-half to read or write a blob to the TPM NVRAM drive
+ */
+static void tpm_nvram_rwrequest_schedule(TPMNvramRWRequest *rwr)
+{
+    qemu_mutex_lock(&tpm_nvram_rwrequests_mutex);
+    QSIMPLEQ_INSERT_TAIL(&tpm_nvram_rwrequests, rwr, list);
+    qemu_mutex_unlock(&tpm_nvram_rwrequests_mutex);
+
+    qemu_bh_schedule(tpm_nvram_bh);
+
+    /* Wait for completion of the read/write request */
+    qemu_mutex_lock(&rwr->completion_mutex);
+    qemu_cond_wait(&rwr->completion, &rwr->completion_mutex);
+    qemu_mutex_unlock(&rwr->completion_mutex);
+}
+
+/*
+ * Initialize a TPM NVRAM drive
+ */
+int tpm_nvram_init(BlockDriverState *bdrv)
+{
+    qemu_mutex_init(&tpm_nvram_rwrequests_mutex);
+    tpm_nvram_bh = qemu_bh_new(tpm_nvram_rwrequest_callback, NULL);
+
+    if (bdrv_is_read_only(bdrv)) {
+        DPRINTF("%s: TPM NVRAM drive '%s' is read-only\n", __func__,
+                bdrv->filename);
+        return -EPERM;
+    }
+
+    bdrv_lock_medium(bdrv, true);
+
+    DPRINTF("%s: TPM NVRAM drive '%s' initialized successfully\n", __func__,
+            bdrv->filename);
+
+    return 0;
+}
+
+/*
+ * Read a TPM NVRAM blob from the drive.  On success, returns the
+ * number of sectors used by this blob.
+ */
+int tpm_nvram_read(BlockDriverState *bdrv, int64_t sector_num,
+                   uint8_t **blob, uint32_t size)
+{
+    int rc;
+    TPMNvramRWRequest *rwr;
+
+    *blob = NULL;
+
+    if (!bdrv) {
+        return -EPERM;
+    }
+
+    if (sector_num < 0) {
+        return -EINVAL;
+    }
+
+    rwr = tpm_nvram_rwrequest_init_read(bdrv, sector_num, blob, size);
+    tpm_nvram_rwrequest_schedule(rwr);
+    rc = rwr->rc;
+
+#ifdef TPM_NVRAM_DEBUG
+    if (rc < 0) {
+        DPRINTF("%s: TPM NVRAM read failed\n", __func__);
+    } else {
+        DPRINTF("%s: TPM NVRAM read successful: sector_num=%"PRIu64", "
+                "size=%"PRIu32", num_sectors=%d\n", __func__,
+                rwr->sector_num, rwr->size, rwr->num_sectors);
+    }
+#endif
+
+    tpm_nvram_rwrequest_free(rwr);
+    return rc;
+}
+
+/*
+ * Write a TPM NVRAM blob to the drive.  On success, returns the
+ * number of sectors used by this blob.
+ */
+int tpm_nvram_write(BlockDriverState *bdrv, int64_t sector_num,
+                    uint8_t *blob, uint32_t size)
+{
+    int rc;
+    TPMNvramRWRequest *rwr;
+
+    if (!bdrv) {
+        return -EPERM;
+    }
+
+    if (sector_num < 0 || !blob) {
+        return -EINVAL;
+    }
+
+    rwr = tpm_nvram_rwrequest_init_write(bdrv, sector_num, blob, size);
+    tpm_nvram_rwrequest_schedule(rwr);
+    rc = rwr->rc;
+
+#ifdef TPM_NVRAM_DEBUG
+    if (rc < 0) {
+        DPRINTF("%s: TPM NVRAM write failed\n", __func__);
+    } else {
+        DPRINTF("%s: TPM NVRAM write successful: sector_num=%"PRIu64", "
+                "size=%"PRIu32", num_sectors=%d\n", __func__,
+                rwr->sector_num, rwr->size, rwr->num_sectors);
+    }
+#endif
+
+    tpm_nvram_rwrequest_free(rwr);
+    return rc;
+}
diff --git a/hw/tpm/tpm_nvram.h b/hw/tpm/tpm_nvram.h
new file mode 100644
index 0000000..fceb4d0
--- /dev/null
+++ b/hw/tpm/tpm_nvram.h
@@ -0,0 +1,25 @@ 
+/*
+ * TPM NVRAM - enables storage of persistent NVRAM data on an image file
+ *
+ * Copyright (C) 2013 IBM Corporation
+ *
+ * Authors:
+ *  Stefan Berger    <stefanb@us.ibm.com>
+ *  Corey Bryant     <coreyb@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TPM_TPM_NVRAM_H
+#define TPM_TPM_NVRAM_H
+
+#include "block/block.h"
+
+int tpm_nvram_init(BlockDriverState *bdrv);
+int tpm_nvram_read(BlockDriverState *bdrv, int64_t sector_num,
+                   uint8_t **blob, uint32_t size);
+int tpm_nvram_write(BlockDriverState *bdrv, int64_t sector_num,
+                    uint8_t *blob, uint32_t size);
+
+#endif