Patchwork [RFC] Writeable files in fw_cfg

login
register
mail settings
Submitter David Woodhouse
Date Jan. 26, 2013, 8:43 p.m.
Message ID <1359233008.10797.10.camel@shinybook.infradead.org>
Download mbox | patch
Permalink /patch/215934/
State New
Headers show

Comments

David Woodhouse - Jan. 26, 2013, 8:43 p.m.
For OVMF we really want to have a way to store non-volatile variables,
other than the dirty hack that currently puts them on a file in the EFI
system partition.

It looks like we've supported writing to fw_cfg items fairly much since
they were introduced, but we've never actually made use of that.

This WIP patch kills the existing fw_cfg_add_callback() because I can't
see how it would ever be useful, and it doesn't seem to have been used
for years, if ever. And adds something slightly more useful.

Other then the obvious bits that need finishing, any objections?
Blue Swirl - Jan. 27, 2013, 3:14 p.m.
On Sat, Jan 26, 2013 at 8:43 PM, David Woodhouse <dwmw2@infradead.org> wrote:
> For OVMF we really want to have a way to store non-volatile variables,
> other than the dirty hack that currently puts them on a file in the EFI
> system partition.
>
> It looks like we've supported writing to fw_cfg items fairly much since
> they were introduced, but we've never actually made use of that.
>
> This WIP patch kills the existing fw_cfg_add_callback() because I can't
> see how it would ever be useful, and it doesn't seem to have been used
> for years, if ever. And adds something slightly more useful.
>
> Other then the obvious bits that need finishing, any objections?

It looks like this duplicates rom_add_file() and fw_cfg_add_file(), so
I don't see the point.

Removing unused fw_cfg_add_callback() should be a separate patch and
that would be OK.

In general, QEMU uses different coding style from kernel, so please
read our CODING_STYLE and use checkpatch.pl to catch obvious problems,
like missing braces and C99 comments.

>
> diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
> index 3b31d77..1318a2e 100644
> --- a/hw/fw_cfg.c
> +++ b/hw/fw_cfg.c
> @@ -33,6 +33,9 @@
>  #define FW_CFG_SIZE 2
>  #define FW_CFG_DATA_SIZE 1
>
> +struct FWCfgEntry;
> +typedef void (*FWCfgCallback)(struct FWCfgState *s, struct FWCfgEntry *e, int value);
> +
>  typedef struct FWCfgEntry {
>      uint32_t len;
>      uint8_t *data;
> @@ -206,20 +209,19 @@ static void fw_cfg_write(FWCfgState *s, uint8_t value)
>
>      trace_fw_cfg_write(s, value);
>
> -    if (s->cur_entry & FW_CFG_WRITE_CHANNEL && e->callback &&
> -        s->cur_offset < e->len) {
> -        e->data[s->cur_offset++] = value;
> -        if (s->cur_offset == e->len) {
> -            e->callback(e->callback_opaque, e->data);
> -            s->cur_offset = 0;
> -        }
> -    }
> +    if (s->cur_entry & FW_CFG_WRITE_CHANNEL && e->callback)
> +        e->callback(s, e, value);
>  }
>
>  static int fw_cfg_select(FWCfgState *s, uint16_t key)
>  {
> +    int arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL);
> +    FWCfgEntry *e = &s->entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK];
>      int ret;
>
> +    if (e->callback)
> +        e->callback(s, e, -1);
> +
>      s->cur_offset = 0;
>      if ((key & FW_CFG_ENTRY_MASK) >= FW_CFG_MAX_ENTRY) {
>          s->cur_entry = FW_CFG_INVALID;
> @@ -419,8 +421,8 @@ void fw_cfg_add_i64(FWCfgState *s, uint16_t key, uint64_t value)
>      fw_cfg_add_bytes(s, key, copy, sizeof(value));
>  }
>
> -void fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback,
> -                         void *callback_opaque, void *data, size_t len)
> +static void fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback,
> +                                void *callback_opaque, void *data, size_t len)
>  {
>      int arch = !!(key & FW_CFG_ARCH_LOCAL);
>
> @@ -436,8 +438,9 @@ void fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback,
>      s->entries[arch][key].callback = callback;
>  }
>
> -void fw_cfg_add_file(FWCfgState *s,  const char *filename,
> -                     void *data, size_t len)
> +static void fw_cfg_add_file_writeable(FWCfgState *s,  const char *filename,
> +                                      void *data, size_t len,
> +                                     FWCfgCallback callback, void *cb_opaque)
>  {
>      int i, index;
>      size_t dsize;
> @@ -451,7 +454,8 @@ void fw_cfg_add_file(FWCfgState *s,  const char *filename,
>      index = be32_to_cpu(s->files->count);
>      assert(index < FW_CFG_FILE_SLOTS);
>
> -    fw_cfg_add_bytes(s, FW_CFG_FILE_FIRST + index, data, len);
> +    fw_cfg_add_callback(s, FW_CFG_WRITE_CHANNEL + FW_CFG_FILE_FIRST + index,
> +                        callback, cb_opaque, data, len);
>
>      pstrcpy(s->files->f[index].name, sizeof(s->files->f[index].name),
>              filename);
> @@ -464,11 +468,74 @@ void fw_cfg_add_file(FWCfgState *s,  const char *filename,
>
>      s->files->f[index].size   = cpu_to_be32(len);
>      s->files->f[index].select = cpu_to_be16(FW_CFG_FILE_FIRST + index);
> +    if (callback)
> +           s->files->f[index].select |= cpu_to_be16(FW_CFG_WRITE_CHANNEL);
>      trace_fw_cfg_add_file(s, index, s->files->f[index].name, len);
>
>      s->files->count = cpu_to_be32(index+1);
>  }
>
> +void fw_cfg_add_file(FWCfgState *s,  const char *filename,
> +                     void *data, size_t len)
> +{
> +    fw_cfg_add_file_writeable(s, filename, data, len, NULL, NULL);
> +}
> +
> +// Lifted from pc.c
> +static long get_file_size(FILE *f)
> +{
> +    long where, size;
> +
> +    /* XXX: on Unix systems, using fstat() probably makes more sense */
> +
> +    where = ftell(f);
> +    fseek(f, 0, SEEK_END);
> +    size = ftell(f);
> +    fseek(f, where, SEEK_SET);
> +
> +    return size;
> +}
> +
> +static void nvstorage_callback(struct FWCfgState *s, struct FWCfgEntry *e, int value)
> +
> +{
> +    if (value == -1) {
> +        FILE *f = fopen(e->callback_opaque, "wb");
> +        if (f) {
> +            fwrite(e->data, e->len, 1, f);
> +            fclose(f);
> +        }
> +       return;
> +    }
> +    if (s->cur_offset == e->len)
> +        e->data = realloc(e->data, ++e->len);
> +    e->data[s->cur_offset++] = value;
> +}
> +
> +void fw_cfg_add_nvstorage(FWCfgState *s, const char *filename)
> +{
> +    FILE *f;
> +    int file_size = 0;
> +    int f2;
> +    uint8_t *data = NULL;
> +
> +    f = fopen(filename, "rb");
> +    if (f) {
> +        file_size = get_file_size(f);
> +       if (file_size) {
> +            data = malloc(file_size);
> +            if ((f2=fread(data, 1, file_size, f)) != file_size) {
> +                fprintf(stderr, "qemu: Could not load non-volatile storage file '%s' %d %d: %s\n",
> +                        filename, file_size, f2, strerror(errno));
> +                exit(1);
> +            }
> +        }
> +        fclose(f);
> +    }
> +    fw_cfg_add_file_writeable(s, "etc/nvstorage", data, file_size,
> +                              nvstorage_callback, strdup(filename));
> +}
> +
>  static void fw_cfg_machine_ready(struct Notifier *n, void *data)
>  {
>      size_t len;
> @@ -507,6 +574,7 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
>      fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
>      fw_cfg_bootsplash(s);
>      fw_cfg_reboot(s);
> +    fw_cfg_add_nvstorage(s, "/tmp/nvstorage");
>
>      s->machine_ready.notify = fw_cfg_machine_ready;
>      qemu_add_machine_init_done_notifier(&s->machine_ready);
> diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h
> index 05c8df1..298bc47 100644
> --- a/hw/fw_cfg.h
> +++ b/hw/fw_cfg.h
> @@ -51,20 +51,17 @@ typedef struct FWCfgFiles {
>      FWCfgFile f[];
>  } FWCfgFiles;
>
> -typedef void (*FWCfgCallback)(void *opaque, uint8_t *data);
> -
>  typedef struct FWCfgState FWCfgState;
>  void fw_cfg_add_bytes(FWCfgState *s, uint16_t key, void *data, size_t len);
>  void fw_cfg_add_string(FWCfgState *s, uint16_t key, const char *value);
>  void fw_cfg_add_i16(FWCfgState *s, uint16_t key, uint16_t value);
>  void fw_cfg_add_i32(FWCfgState *s, uint16_t key, uint32_t value);
>  void fw_cfg_add_i64(FWCfgState *s, uint16_t key, uint64_t value);
> -void fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback,
> -                         void *callback_opaque, void *data, size_t len);
>  void fw_cfg_add_file(FWCfgState *s, const char *filename, void *data,
>                       size_t len);
>  FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
>                          hwaddr crl_addr, hwaddr data_addr);
> +void fw_cfg_add_nvstorage(FWCfgState *s, const char *filename);
>
>  #endif /* NO_QEMU_PROTOS */
>
>
> --
> dwmw2
>
David Woodhouse - Jan. 27, 2013, 3:50 p.m.
On Sun, 2013-01-27 at 15:14 +0000, Blue Swirl wrote:
> It looks like this duplicates rom_add_file() and fw_cfg_add_file(), so
> I don't see the point.

Both of those are read-only, surely? The firmware inside the guest can't
use them for non-volatile storage.

It doesn't duplicate fw_cfg_add_file(); it extends it to allow a
writeable mode too. fw_cfg_add_file() becomes a simple wrapper around
fw_cfg_add_file_writeable(), which actually contains most of the
contents of the original function.
Blue Swirl - Jan. 27, 2013, 4:02 p.m.
On Sun, Jan 27, 2013 at 3:50 PM, David Woodhouse <dwmw2@infradead.org> wrote:
> On Sun, 2013-01-27 at 15:14 +0000, Blue Swirl wrote:
>> It looks like this duplicates rom_add_file() and fw_cfg_add_file(), so
>> I don't see the point.
>
> Both of those are read-only, surely? The firmware inside the guest can't
> use them for non-volatile storage.

Right. But why do we need PV non-volatile storage, instead of normal flash?

>
> It doesn't duplicate fw_cfg_add_file(); it extends it to allow a
> writeable mode too. fw_cfg_add_file() becomes a simple wrapper around
> fw_cfg_add_file_writeable(), which actually contains most of the
> contents of the original function.

OK, however most of the loading logic is now in loader.c and that
should not be duplicated either.

>
> --
> dwmw2
>
Anthony Liguori - Jan. 27, 2013, 4:29 p.m.
David Woodhouse <dwmw2@infradead.org> writes:

> For OVMF we really want to have a way to store non-volatile variables,
> other than the dirty hack that currently puts them on a file in the EFI
> system partition.
>
> It looks like we've supported writing to fw_cfg items fairly much since
> they were introduced, but we've never actually made use of that.
>
> This WIP patch kills the existing fw_cfg_add_callback() because I can't
> see how it would ever be useful, and it doesn't seem to have been used
> for years, if ever. And adds something slightly more useful.
>
> Other then the obvious bits that need finishing, any objections?

The main issue is malicious guests.  The administrator has to be aware
of and in control of how much disk space the guest can use.  The
secondary issue is how to tie into the block layer so things like live
migration work.

This is why "use a flash device" is an attractive answer here because it
gives a fixed sized storage pool that's configurable by the
administrator. Since it's backed by a block device, it supports all of
the features needed for non-volatile storage (snapshotting, live copy,
etc.).

The only real downside is that it's opaque to the host.  If it's
desirable to have something that's visible to the host, then I think we
would still need to make use of BlockDriverState as the means to make it
non-volatile.

That essentially involves writing a file system in QEMU on top of
BlockDriverState and then having a PV inteface with the guest to get/set
file content.  Would be useful to have, but so far no one has cared
enough about making these stores non-opaque to do it.

Regards,

Anthony Liguori

>
> diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
> index 3b31d77..1318a2e 100644
> --- a/hw/fw_cfg.c
> +++ b/hw/fw_cfg.c
> @@ -33,6 +33,9 @@
>  #define FW_CFG_SIZE 2
>  #define FW_CFG_DATA_SIZE 1
>  
> +struct FWCfgEntry;
> +typedef void (*FWCfgCallback)(struct FWCfgState *s, struct FWCfgEntry *e, int value);
> +
>  typedef struct FWCfgEntry {
>      uint32_t len;
>      uint8_t *data;
> @@ -206,20 +209,19 @@ static void fw_cfg_write(FWCfgState *s, uint8_t value)
>  
>      trace_fw_cfg_write(s, value);
>  
> -    if (s->cur_entry & FW_CFG_WRITE_CHANNEL && e->callback &&
> -        s->cur_offset < e->len) {
> -        e->data[s->cur_offset++] = value;
> -        if (s->cur_offset == e->len) {
> -            e->callback(e->callback_opaque, e->data);
> -            s->cur_offset = 0;
> -        }
> -    }
> +    if (s->cur_entry & FW_CFG_WRITE_CHANNEL && e->callback)
> +        e->callback(s, e, value);
>  }
>  
>  static int fw_cfg_select(FWCfgState *s, uint16_t key)
>  {
> +    int arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL);
> +    FWCfgEntry *e = &s->entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK];
>      int ret;
>  
> +    if (e->callback)
> +        e->callback(s, e, -1);
> +
>      s->cur_offset = 0;
>      if ((key & FW_CFG_ENTRY_MASK) >= FW_CFG_MAX_ENTRY) {
>          s->cur_entry = FW_CFG_INVALID;
> @@ -419,8 +421,8 @@ void fw_cfg_add_i64(FWCfgState *s, uint16_t key, uint64_t value)
>      fw_cfg_add_bytes(s, key, copy, sizeof(value));
>  }
>  
> -void fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback,
> -                         void *callback_opaque, void *data, size_t len)
> +static void fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback,
> +                                void *callback_opaque, void *data, size_t len)
>  {
>      int arch = !!(key & FW_CFG_ARCH_LOCAL);
>  
> @@ -436,8 +438,9 @@ void fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback,
>      s->entries[arch][key].callback = callback;
>  }
>  
> -void fw_cfg_add_file(FWCfgState *s,  const char *filename,
> -                     void *data, size_t len)
> +static void fw_cfg_add_file_writeable(FWCfgState *s,  const char *filename,
> +                                      void *data, size_t len,
> +				      FWCfgCallback callback, void *cb_opaque)
>  {
>      int i, index;
>      size_t dsize;
> @@ -451,7 +454,8 @@ void fw_cfg_add_file(FWCfgState *s,  const char *filename,
>      index = be32_to_cpu(s->files->count);
>      assert(index < FW_CFG_FILE_SLOTS);
>  
> -    fw_cfg_add_bytes(s, FW_CFG_FILE_FIRST + index, data, len);
> +    fw_cfg_add_callback(s, FW_CFG_WRITE_CHANNEL + FW_CFG_FILE_FIRST + index,
> +                        callback, cb_opaque, data, len);
>  
>      pstrcpy(s->files->f[index].name, sizeof(s->files->f[index].name),
>              filename);
> @@ -464,11 +468,74 @@ void fw_cfg_add_file(FWCfgState *s,  const char *filename,
>  
>      s->files->f[index].size   = cpu_to_be32(len);
>      s->files->f[index].select = cpu_to_be16(FW_CFG_FILE_FIRST + index);
> +    if (callback)
> +	    s->files->f[index].select |= cpu_to_be16(FW_CFG_WRITE_CHANNEL);
>      trace_fw_cfg_add_file(s, index, s->files->f[index].name, len);
>  
>      s->files->count = cpu_to_be32(index+1);
>  }
>  
> +void fw_cfg_add_file(FWCfgState *s,  const char *filename,
> +                     void *data, size_t len)
> +{
> +    fw_cfg_add_file_writeable(s, filename, data, len, NULL, NULL);
> +}
> +
> +// Lifted from pc.c
> +static long get_file_size(FILE *f)
> +{
> +    long where, size;
> +
> +    /* XXX: on Unix systems, using fstat() probably makes more sense */
> +
> +    where = ftell(f);
> +    fseek(f, 0, SEEK_END);
> +    size = ftell(f);
> +    fseek(f, where, SEEK_SET);
> +
> +    return size;
> +}
> +
> +static void nvstorage_callback(struct FWCfgState *s, struct FWCfgEntry *e, int value)
> +
> +{
> +    if (value == -1) {
> +        FILE *f = fopen(e->callback_opaque, "wb");
> +        if (f) {
> +            fwrite(e->data, e->len, 1, f);
> +            fclose(f);
> +        }
> +	return;
> +    }
> +    if (s->cur_offset == e->len)
> +        e->data = realloc(e->data, ++e->len);
> +    e->data[s->cur_offset++] = value;
> +}
> +
> +void fw_cfg_add_nvstorage(FWCfgState *s, const char *filename)
> +{
> +    FILE *f;
> +    int file_size = 0;
> +    int f2;
> +    uint8_t *data = NULL;
> +
> +    f = fopen(filename, "rb");
> +    if (f) {
> +        file_size = get_file_size(f);
> +	if (file_size) {
> +            data = malloc(file_size);
> +            if ((f2=fread(data, 1, file_size, f)) != file_size) {
> +                fprintf(stderr, "qemu: Could not load non-volatile storage file '%s' %d %d: %s\n",
> +                        filename, file_size, f2, strerror(errno));
> +                exit(1);
> +            }
> +        }
> +        fclose(f);
> +    }
> +    fw_cfg_add_file_writeable(s, "etc/nvstorage", data, file_size,
> +                              nvstorage_callback, strdup(filename));
> +}
> +
>  static void fw_cfg_machine_ready(struct Notifier *n, void *data)
>  {
>      size_t len;
> @@ -507,6 +574,7 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
>      fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
>      fw_cfg_bootsplash(s);
>      fw_cfg_reboot(s);
> +    fw_cfg_add_nvstorage(s, "/tmp/nvstorage");
>  
>      s->machine_ready.notify = fw_cfg_machine_ready;
>      qemu_add_machine_init_done_notifier(&s->machine_ready);
> diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h
> index 05c8df1..298bc47 100644
> --- a/hw/fw_cfg.h
> +++ b/hw/fw_cfg.h
> @@ -51,20 +51,17 @@ typedef struct FWCfgFiles {
>      FWCfgFile f[];
>  } FWCfgFiles;
>  
> -typedef void (*FWCfgCallback)(void *opaque, uint8_t *data);
> -
>  typedef struct FWCfgState FWCfgState;
>  void fw_cfg_add_bytes(FWCfgState *s, uint16_t key, void *data, size_t len);
>  void fw_cfg_add_string(FWCfgState *s, uint16_t key, const char *value);
>  void fw_cfg_add_i16(FWCfgState *s, uint16_t key, uint16_t value);
>  void fw_cfg_add_i32(FWCfgState *s, uint16_t key, uint32_t value);
>  void fw_cfg_add_i64(FWCfgState *s, uint16_t key, uint64_t value);
> -void fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback,
> -                         void *callback_opaque, void *data, size_t len);
>  void fw_cfg_add_file(FWCfgState *s, const char *filename, void *data,
>                       size_t len);
>  FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
>                          hwaddr crl_addr, hwaddr data_addr);
> +void fw_cfg_add_nvstorage(FWCfgState *s, const char *filename);
>  
>  #endif /* NO_QEMU_PROTOS */
>  
>
> -- 
> dwmw2
David Woodhouse - Jan. 27, 2013, 4:38 p.m.
On Sun, 2013-01-27 at 16:02 +0000, Blue Swirl wrote:
> On Sun, Jan 27, 2013 at 3:50 PM, David Woodhouse <dwmw2@infradead.org> wrote:
> > On Sun, 2013-01-27 at 15:14 +0000, Blue Swirl wrote:
> >> It looks like this duplicates rom_add_file() and fw_cfg_add_file(), so
> >> I don't see the point.
> >
> > Both of those are read-only, surely? The firmware inside the guest can't
> > use them for non-volatile storage.
> 
> Right. But why do we need PV non-volatile storage, instead of normal flash?

People have looked at adding normal flash to the PC target, but it has a
number of problems. Firstly, it doesn't even *work* with KVM enabled.
But even if we were to somehow fix that, the simple approach of
emulating a BIOS flash is also very suboptimal; it puts the whole of the
firmware *into* the flash and lets it store its non-volatile variables
in the same emulated flash chip. So if you want to update the firmware
(which would normally be just a case of updating the bios.bin file), you
end up having to do each guest system individually *and* the update is
painful because you have to preserve the non-volatile storage while
updating the other parts.

It's much better to have a separate storage for the non-volatile
variables. Yes, we *could* teach it to use a dedicated flash chip for
that rather than using flash for the firmware *and* storage as we would
on real hardware. But if we're going to do something different for the
virtualised case, a writeable fw_cfg file seems a whole lot easier and
saner.

> > It doesn't duplicate fw_cfg_add_file(); it extends it to allow a
> > writeable mode too. fw_cfg_add_file() becomes a simple wrapper around
> > fw_cfg_add_file_writeable(), which actually contains most of the
> > contents of the original function.
> 
> OK, however most of the loading logic is now in loader.c and that
> should not be duplicated either.

I actually got file_get_size() from pc.c, and had marked it with a
horrid C99 comment so we couldn't miss the duplication. I'll sort out
that kind of detail later, if we can reach consensus on the basic
approach.
David Woodhouse - Jan. 27, 2013, 4:47 p.m.
On Sun, 2013-01-27 at 10:29 -0600, Anthony Liguori wrote:
> David Woodhouse <dwmw2@infradead.org> writes:
> 
> > For OVMF we really want to have a way to store non-volatile variables,
> > other than the dirty hack that currently puts them on a file in the EFI
> > system partition.
> >
> > It looks like we've supported writing to fw_cfg items fairly much since
> > they were introduced, but we've never actually made use of that.
> >
> > This WIP patch kills the existing fw_cfg_add_callback() because I can't
> > see how it would ever be useful, and it doesn't seem to have been used
> > for years, if ever. And adds something slightly more useful.
> >
> > Other then the obvious bits that need finishing, any objections?
> 
> The main issue is malicious guests.  The administrator has to be aware
> of and in control of how much disk space the guest can use.  The
> secondary issue is how to tie into the block layer so things like live
> migration work.
> 
> This is why "use a flash device" is an attractive answer here because it
> gives a fixed sized storage pool that's configurable by the
> administrator. 

That part is fixable simply by imposing a maximum size, surely?

> Since it's backed by a block device, it supports all of
> the features needed for non-volatile storage (snapshotting, live copy,
> etc.).
> 
> The only real downside is that it's opaque to the host.  If it's
> desirable to have something that's visible to the host, then I think we
> would still need to make use of BlockDriverState as the means to make it
> non-volatile.

I don't really care about it being visible to the host. The important
thing is that it's easily usable from the guest firmware at runtime, and
not on a device that the OS might be trying to use. Their current trick
is a file on the EFI system partition, which is definitely *not*
acceptable. It doesn't work after the OS has booted, so even basic tasks
like "install an OS and hope the installer can set the boot variables to
boot into it" are broken.

For exposing it to the guest, writeable fw_cfg definitely seems like the
most attractive answer. I'll look at backing it with BlockDriverState. I
don't even think we need a file system on it; it can just be a
fixed-size device exposed as-is, surely?
Blue Swirl - Jan. 27, 2013, 4:55 p.m.
On Sun, Jan 27, 2013 at 4:38 PM, David Woodhouse <dwmw2@infradead.org> wrote:
> On Sun, 2013-01-27 at 16:02 +0000, Blue Swirl wrote:
>> On Sun, Jan 27, 2013 at 3:50 PM, David Woodhouse <dwmw2@infradead.org> wrote:
>> > On Sun, 2013-01-27 at 15:14 +0000, Blue Swirl wrote:
>> >> It looks like this duplicates rom_add_file() and fw_cfg_add_file(), so
>> >> I don't see the point.
>> >
>> > Both of those are read-only, surely? The firmware inside the guest can't
>> > use them for non-volatile storage.
>>
>> Right. But why do we need PV non-volatile storage, instead of normal flash?
>
> People have looked at adding normal flash to the PC target, but it has a
> number of problems. Firstly, it doesn't even *work* with KVM enabled.
> But even if we were to somehow fix that, the simple approach of
> emulating a BIOS flash is also very suboptimal; it puts the whole of the
> firmware *into* the flash and lets it store its non-volatile variables
> in the same emulated flash chip. So if you want to update the firmware
> (which would normally be just a case of updating the bios.bin file), you
> end up having to do each guest system individually *and* the update is
> painful because you have to preserve the non-volatile storage while
> updating the other parts.

That problem could be easily solved by allowing a combination of two
images with different RO/RW settings, for example -bios
bios.bin[,offset=0,ro] -bios flash.img, offset=0x8000,rw.

> It's much better to have a separate storage for the non-volatile
> variables. Yes, we *could* teach it to use a dedicated flash chip for
> that rather than using flash for the firmware *and* storage as we would
> on real hardware. But if we're going to do something different for the
> virtualised case, a writeable fw_cfg file seems a whole lot easier and
> saner.
>
>> > It doesn't duplicate fw_cfg_add_file(); it extends it to allow a
>> > writeable mode too. fw_cfg_add_file() becomes a simple wrapper around
>> > fw_cfg_add_file_writeable(), which actually contains most of the
>> > contents of the original function.
>>
>> OK, however most of the loading logic is now in loader.c and that
>> should not be duplicated either.
>
> I actually got file_get_size() from pc.c, and had marked it with a
> horrid C99 comment so we couldn't miss the duplication. I'll sort out
> that kind of detail later, if we can reach consensus on the basic
> approach.

So the duplication already exists. :-(

>
> --
> dwmw2
>
David Woodhouse - Jan. 27, 2013, 4:59 p.m.
On Sun, 2013-01-27 at 16:55 +0000, Blue Swirl wrote:
> 
> 
> That problem could be easily solved by allowing a combination of two
> images with different RO/RW settings, for example -bios
> bios.bin[,offset=0,ro] -bios flash.img, offset=0x8000,rw.

/me shudders at the idea of co-ordinating that with the range of the
flash chip that the latest build of the firmware actually wants to use
for its non-volatile storage.

Seriously, keep them separate.
Blue Swirl - Jan. 27, 2013, 4:59 p.m.
On Sun, Jan 27, 2013 at 4:47 PM, David Woodhouse <dwmw2@infradead.org> wrote:
> On Sun, 2013-01-27 at 10:29 -0600, Anthony Liguori wrote:
>> David Woodhouse <dwmw2@infradead.org> writes:
>>
>> > For OVMF we really want to have a way to store non-volatile variables,
>> > other than the dirty hack that currently puts them on a file in the EFI
>> > system partition.
>> >
>> > It looks like we've supported writing to fw_cfg items fairly much since
>> > they were introduced, but we've never actually made use of that.
>> >
>> > This WIP patch kills the existing fw_cfg_add_callback() because I can't
>> > see how it would ever be useful, and it doesn't seem to have been used
>> > for years, if ever. And adds something slightly more useful.
>> >
>> > Other then the obvious bits that need finishing, any objections?
>>
>> The main issue is malicious guests.  The administrator has to be aware
>> of and in control of how much disk space the guest can use.  The
>> secondary issue is how to tie into the block layer so things like live
>> migration work.
>>
>> This is why "use a flash device" is an attractive answer here because it
>> gives a fixed sized storage pool that's configurable by the
>> administrator.
>
> That part is fixable simply by imposing a maximum size, surely?
>
>> Since it's backed by a block device, it supports all of
>> the features needed for non-volatile storage (snapshotting, live copy,
>> etc.).
>>
>> The only real downside is that it's opaque to the host.  If it's
>> desirable to have something that's visible to the host, then I think we
>> would still need to make use of BlockDriverState as the means to make it
>> non-volatile.
>
> I don't really care about it being visible to the host. The important
> thing is that it's easily usable from the guest firmware at runtime, and
> not on a device that the OS might be trying to use. Their current trick
> is a file on the EFI system partition, which is definitely *not*
> acceptable. It doesn't work after the OS has booted, so even basic tasks
> like "install an OS and hope the installer can set the boot variables to
> boot into it" are broken.
>
> For exposing it to the guest, writeable fw_cfg definitely seems like the
> most attractive answer. I'll look at backing it with BlockDriverState. I
> don't even think we need a file system on it; it can just be a
> fixed-size device exposed as-is, surely?

For example hw/spapr_nvram.c implements a similar device. If the user
does not specify any backing file for nvram, its contents will not be
saved.

>
> --
> dwmw2
>
David Woodhouse - Jan. 27, 2013, 5:05 p.m.
On Sun, 2013-01-27 at 16:59 +0000, Blue Swirl wrote:
> For example hw/spapr_nvram.c implements a similar device. If the user
> does not specify any backing file for nvram, its contents will not be
> saved.

I'll look at that; thanks.
Jordan Justen - Jan. 27, 2013, 7:35 p.m.
On Sun, Jan 27, 2013 at 8:38 AM, David Woodhouse <dwmw2@infradead.org> wrote:
> On Sun, 2013-01-27 at 16:02 +0000, Blue Swirl wrote:
>> On Sun, Jan 27, 2013 at 3:50 PM, David Woodhouse <dwmw2@infradead.org> wrote:
>> > On Sun, 2013-01-27 at 15:14 +0000, Blue Swirl wrote:
>> >> It looks like this duplicates rom_add_file() and fw_cfg_add_file(), so
>> >> I don't see the point.
>> >
>> > Both of those are read-only, surely? The firmware inside the guest can't
>> > use them for non-volatile storage.
>>
>> Right. But why do we need PV non-volatile storage, instead of normal flash?
>
> People have looked at adding normal flash to the PC target,

FWIW, I do have non-volatile variables working for OVMF using
http://wiki.qemu.org/Features/PC_System_Flash

> but it has a
> number of problems. Firstly, it doesn't even *work* with KVM enabled.

Avi tells me KVM_MEM_READONLY may be a big step in solving this. But,
I'll admit that I've not made much progress on this part of the task.

> But even if we were to somehow fix that, the simple approach of
> emulating a BIOS flash is also very suboptimal; it puts the whole of the
> firmware *into* the flash and lets it store its non-volatile variables
> in the same emulated flash chip. So if you want to update the firmware
> (which would normally be just a case of updating the bios.bin file), you
> end up having to do each guest system individually *and* the update is
> painful because you have to preserve the non-volatile storage while
> updating the other parts.

It is a good point, and I originally thought of keeping them separate,
but it seems like we ended up heading down the PC System Flash path.

I think at one point being able to keep the firmware image static when
QEMU updated might have been seen as an advantage. Based on several
years experience, I think OVMF builds have continued to work well with
older and newer versions of QEMU.

Yet, for the user that just wants to have the latest firmware, well we
all probably know how much fun bios upgrades can be. :)

Anyway, I think OVMF probably will end up supporting PC System Flash
for non-volatile variables in addition to what ever comes of this
discussion.

> It's much better to have a separate storage for the non-volatile
> variables. Yes, we *could* teach it to use a dedicated flash chip for
> that rather than using flash for the firmware *and* storage as we would
> on real hardware. But if we're going to do something different for the
> virtualised case, a writeable fw_cfg file seems a whole lot easier and
> saner.

I just wish fw_cfg had a more flash-friendly interface. The EDK II
variable storage prefers random access to the non-volatile storage
bytes. The infrastructure generally is built around random writes and
block erases.

-Jordan
Anthony Liguori - Jan. 28, 2013, 12:53 a.m.
David Woodhouse <dwmw2@infradead.org> writes:

> On Sun, 2013-01-27 at 10:29 -0600, Anthony Liguori wrote:
>> David Woodhouse <dwmw2@infradead.org> writes:
>> 
>> > For OVMF we really want to have a way to store non-volatile variables,
>> > other than the dirty hack that currently puts them on a file in the EFI
>> > system partition.
>> >
>> > It looks like we've supported writing to fw_cfg items fairly much since
>> > they were introduced, but we've never actually made use of that.
>> >
>> > This WIP patch kills the existing fw_cfg_add_callback() because I can't
>> > see how it would ever be useful, and it doesn't seem to have been used
>> > for years, if ever. And adds something slightly more useful.
>> >
>> > Other then the obvious bits that need finishing, any objections?
>> 
>> The main issue is malicious guests.  The administrator has to be aware
>> of and in control of how much disk space the guest can use.  The
>> secondary issue is how to tie into the block layer so things like live
>> migration work.
>> 
>> This is why "use a flash device" is an attractive answer here because it
>> gives a fixed sized storage pool that's configurable by the
>> administrator. 
>
> That part is fixable simply by imposing a maximum size, surely?

Are you just trying to persist a single blob of a fixed maximum size?
Why not just have a second flash device then?

>> Since it's backed by a block device, it supports all of
>> the features needed for non-volatile storage (snapshotting, live copy,
>> etc.).
>> 
>> The only real downside is that it's opaque to the host.  If it's
>> desirable to have something that's visible to the host, then I think we
>> would still need to make use of BlockDriverState as the means to make it
>> non-volatile.
>
> I don't really care about it being visible to the host. The important
> thing is that it's easily usable from the guest firmware at runtime, and
> not on a device that the OS might be trying to use. Their current trick
> is a file on the EFI system partition, which is definitely *not*
> acceptable. It doesn't work after the OS has booted, so even basic tasks
> like "install an OS and hope the installer can set the boot variables to
> boot into it" are broken.
>
> For exposing it to the guest, writeable fw_cfg definitely seems like the
> most attractive answer. I'll look at backing it with BlockDriverState. I
> don't even think we need a file system on it; it can just be a
> fixed-size device exposed as-is, surely?

If you just want to write out a blob, I'm not sure why you prefer to use
fw_cfg vs. a flash interface.

Regards,

Anthony Liguori

>
> -- 
> dwmw2
David Woodhouse - Jan. 28, 2013, 1:11 p.m.
On Sun, 2013-01-27 at 18:53 -0600, Anthony Liguori wrote:
> Are you just trying to persist a single blob of a fixed maximum size?

That would suffice.

> Why not just have a second flash device then?

Mostly because flash devices don't actually *work* with KVM.

Should I be looking at fixing *that*, instead?
Anthony Liguori - Jan. 28, 2013, 4:10 p.m.
David Woodhouse <dwmw2@infradead.org> writes:

> On Sun, 2013-01-27 at 18:53 -0600, Anthony Liguori wrote:
>> Are you just trying to persist a single blob of a fixed maximum size?
>
> That would suffice.
>
>> Why not just have a second flash device then?
>
> Mostly because flash devices don't actually *work* with KVM.

They absolutely do.  What doesn't work is executing ROM from flash if
the ROM cannot be treated as read-only memory.

That's because all we get is a PF in the kernel when trying to execute
from unmapped ROM.  There's no way to turn that into MMIO to userspace
without switching to running fully in emulation mode.  The x86 emulator
is pretty close to complete but work would be needed to fully complete
it to make this work.

We normally handle this by mapping the ROM memory read-only so it can be
executed without PF'ing but since the BIOS area is subject to PAM, we
can't use this trick for that particular ROM.

SeaBIOS has hack to just not use PAM to do BIOS shadowing when running
under KVM/QEMU but presumably OVMF lacks this.

But in this case, you're using the flash device purely for read/write,
not for execution, so there's no limitation at all.

Regards,

Anthony Liguori

>
> Should I be looking at fixing *that*, instead?
>
> -- 
> dwmw2
David Woodhouse - Jan. 28, 2013, 4:35 p.m.
On Mon, 2013-01-28 at 10:10 -0600, Anthony Liguori wrote:
> > Mostly because flash devices don't actually *work* with KVM.
> 
> They absolutely do.  What doesn't work is executing ROM from flash if
> the ROM cannot be treated as read-only memory.

Ah, great. In that case, that seems like the best approach. It should
make it easier to integrate into OVMF too, because it'll just be using
the 'standard' flash storage code with a different physical location.
Hopefully.

Can we get away with exposing an additional flash device in the physical
memory map immediately below the BIOS "ROM" and giving the firmware a
way to discover its location? Or do we need to do it "cleanly" by making
it a BAR of a PCI device?
Jordan Justen - Jan. 28, 2013, 4:36 p.m.
On Mon, Jan 28, 2013 at 8:10 AM, Anthony Liguori <anthony@codemonkey.ws> wrote:
> David Woodhouse <dwmw2@infradead.org> writes:
>
>> On Sun, 2013-01-27 at 18:53 -0600, Anthony Liguori wrote:
>>> Are you just trying to persist a single blob of a fixed maximum size?
>>
>> That would suffice.
>>
>>> Why not just have a second flash device then?
>>
>> Mostly because flash devices don't actually *work* with KVM.
>
> They absolutely do.  What doesn't work is executing ROM from flash if
> the ROM cannot be treated as read-only memory.

What is need is for pflash_cfi01 to start in plain rom/executable mode
while firmware executes from it during early boot.

Then later, after the rom has been shadowed, firmware will want to
write to that memory space to program it. At that point it no longer
needs to be executable.

So the question is, can it start out in rom/executable mode, but
change into a non-executable mode if a write occurs? Will qemu get a
chance to respond if something is written to a rom region, or is it
silently ignored?

Also, once the 'read-array' command is written to it after programming
is finished, can it revert to executable rom mode?

-Jordan

> That's because all we get is a PF in the kernel when trying to execute
> from unmapped ROM.  There's no way to turn that into MMIO to userspace
> without switching to running fully in emulation mode.  The x86 emulator
> is pretty close to complete but work would be needed to fully complete
> it to make this work.
>
> We normally handle this by mapping the ROM memory read-only so it can be
> executed without PF'ing but since the BIOS area is subject to PAM, we
> can't use this trick for that particular ROM.
>
> SeaBIOS has hack to just not use PAM to do BIOS shadowing when running
> under KVM/QEMU but presumably OVMF lacks this.
>
> But in this case, you're using the flash device purely for read/write,
> not for execution, so there's no limitation at all.
David Woodhouse - Jan. 28, 2013, 4:40 p.m.
On Mon, 2013-01-28 at 08:36 -0800, Jordan Justen wrote:
> 
> What is need is for pflash_cfi01 to start in plain rom/executable mode
> while firmware executes from it during early boot.
> 
> Then later, after the rom has been shadowed, firmware will want to
> write to that memory space to program it. At that point it no longer
> needs to be executable.
> 
> So the question is, can it start out in rom/executable mode, but
> change into a non-executable mode if a write occurs? Will qemu get a
> chance to respond if something is written to a rom region, or is it
> silently ignored?
> 
> Also, once the 'read-array' command is written to it after programming
> is finished, can it revert to executable rom mode?

We often have separate gating in hardware to enable the write line (or
Vpp) to flash chips. Can we emulate that and use it to switch the flash
between executable and MMIO mode? Rather than being able to trap the
first write and see what it was...
Anthony Liguori - Jan. 28, 2013, 5:02 p.m.
Jordan Justen <jljusten@gmail.com> writes:

> On Mon, Jan 28, 2013 at 8:10 AM, Anthony Liguori <anthony@codemonkey.ws> wrote:
>> David Woodhouse <dwmw2@infradead.org> writes:
>>
>>> On Sun, 2013-01-27 at 18:53 -0600, Anthony Liguori wrote:
>>>> Are you just trying to persist a single blob of a fixed maximum size?
>>>
>>> That would suffice.
>>>
>>>> Why not just have a second flash device then?
>>>
>>> Mostly because flash devices don't actually *work* with KVM.
>>
>> They absolutely do.  What doesn't work is executing ROM from flash if
>> the ROM cannot be treated as read-only memory.
>
> What is need is for pflash_cfi01 to start in plain rom/executable mode
> while firmware executes from it during early boot.
>
> Then later, after the rom has been shadowed, firmware will want to
> write to that memory space to program it. At that point it no longer
> needs to be executable.
>
> So the question is, can it start out in rom/executable mode, but
> change into a non-executable mode if a write occurs?

It's a matter of how you setup the memory region but yes, I see no
problem with this although it may need to be a qdev option.

> chance to respond if something is written to a rom region, or is it
> silently ignored?
>
> Also, once the 'read-array' command is written to it after programming
> is finished, can it revert to executable rom mode?

Reverting to executable means remapping the region as read-only memory
verses MMIO.  So yes, it's technically possible.

I suspect that you want to make this behavior a flag for the device
though.

Regards,

Anthony Liguori

>
> -Jordan
>
>> That's because all we get is a PF in the kernel when trying to execute
>> from unmapped ROM.  There's no way to turn that into MMIO to userspace
>> without switching to running fully in emulation mode.  The x86 emulator
>> is pretty close to complete but work would be needed to fully complete
>> it to make this work.
>>
>> We normally handle this by mapping the ROM memory read-only so it can be
>> executed without PF'ing but since the BIOS area is subject to PAM, we
>> can't use this trick for that particular ROM.
>>
>> SeaBIOS has hack to just not use PAM to do BIOS shadowing when running
>> under KVM/QEMU but presumably OVMF lacks this.
>>
>> But in this case, you're using the flash device purely for read/write,
>> not for execution, so there's no limitation at all.
Gleb Natapov - Jan. 28, 2013, 6:37 p.m.
On Mon, Jan 28, 2013 at 10:10:06AM -0600, Anthony Liguori wrote:
> David Woodhouse <dwmw2@infradead.org> writes:
> 
> > On Sun, 2013-01-27 at 18:53 -0600, Anthony Liguori wrote:
> >> Are you just trying to persist a single blob of a fixed maximum size?
> >
> > That would suffice.
> >
> >> Why not just have a second flash device then?
> >
> > Mostly because flash devices don't actually *work* with KVM.
> 
> They absolutely do.  What doesn't work is executing ROM from flash if
> the ROM cannot be treated as read-only memory.
> 
> That's because all we get is a PF in the kernel when trying to execute
> from unmapped ROM.  There's no way to turn that into MMIO to userspace
> without switching to running fully in emulation mode.  The x86 emulator
> is pretty close to complete but work would be needed to fully complete
> it to make this work.
> 
The x86 emulator cannot fetch from MMIO memory today. And protected mode
emulation is not so complete. We rarely, if ever, need to emulate
protected mode instructions without memory operands.

> We normally handle this by mapping the ROM memory read-only so it can be
> executed without PF'ing but since the BIOS area is subject to PAM, we
> can't use this trick for that particular ROM.
Up until kernel 3.7 there was no support for read-only memory slots. As
far as I know QEMU still maps ROM as regular RAM to KVM.

> 
> SeaBIOS has hack to just not use PAM to do BIOS shadowing when running
> under KVM/QEMU but presumably OVMF lacks this.
Not sure that such hack exists and why is it needed. BIOS area is always
writable in KVM. 

> 
> But in this case, you're using the flash device purely for read/write,
> not for execution, so there's no limitation at all.
> 
Yes, on newer kernel we can use read-only slots to emulate flash. They
behaves like ROMD: memory on read MMIO on write. On older kernels we can
use pure MMIO.

--
			Gleb.
Anthony Liguori - Jan. 28, 2013, 6:48 p.m.
Gleb Natapov <gleb@redhat.com> writes:

> On Mon, Jan 28, 2013 at 10:10:06AM -0600, Anthony Liguori wrote:
>> David Woodhouse <dwmw2@infradead.org> writes:
>> 
>> > On Sun, 2013-01-27 at 18:53 -0600, Anthony Liguori wrote:
>> >> Are you just trying to persist a single blob of a fixed maximum size?
>> >
>> > That would suffice.
>> >
>> >> Why not just have a second flash device then?
>> >
>> > Mostly because flash devices don't actually *work* with KVM.
>> 
>> They absolutely do.  What doesn't work is executing ROM from flash if
>> the ROM cannot be treated as read-only memory.
>> 
>> That's because all we get is a PF in the kernel when trying to execute
>> from unmapped ROM.  There's no way to turn that into MMIO to userspace
>> without switching to running fully in emulation mode.  The x86 emulator
>> is pretty close to complete but work would be needed to fully complete
>> it to make this work.
>> 
> The x86 emulator cannot fetch from MMIO memory today. And protected mode
> emulation is not so complete. We rarely, if ever, need to emulate
> protected mode instructions without memory operands.

Ack.

>
>> We normally handle this by mapping the ROM memory read-only so it can be
>> executed without PF'ing but since the BIOS area is subject to PAM, we
>> can't use this trick for that particular ROM.
> Up until kernel 3.7 there was no support for read-only memory slots. As
> far as I know QEMU still maps ROM as regular RAM to KVM.

Ack.

>> 
>> SeaBIOS has hack to just not use PAM to do BIOS shadowing when running
>> under KVM/QEMU but presumably OVMF lacks this.
> Not sure that such hack exists and why is it needed. BIOS area is always
> writable in KVM. 

shadow.c:
  static void
  make_bios_writable_intel(u16 bdf, u32 pam0)
  {
      int reg = pci_config_readb(bdf, pam0);
      if (!(reg & 0x10)) {
          // QEMU doesn't fully implement the piix shadow capabilities -
          // if ram isn't backing the bios segment when shadowing is
          // disabled, the code itself wont be in memory.  So, run the
          // code from the high-memory flash location.

Normally when shadowing, you set the PAM registers to send read requests
to ROM and write requests to RAM.  You then read the code segment (that
you're executing from) and write that out to RAM and switch to executing
from there.

That's just not possible without treating ROMs as MMIO and sending all
requests down to QEMU.

>> 
>> But in this case, you're using the flash device purely for read/write,
>> not for execution, so there's no limitation at all.
>> 
> Yes, on newer kernel we can use read-only slots to emulate flash. They
> behaves like ROMD: memory on read MMIO on write. On older kernels we can
> use pure MMIO.

Ack.

Regards,

Anthony Liguori

>
> --
> 			Gleb.
Gleb Natapov - Jan. 28, 2013, 8:37 p.m.
On Mon, Jan 28, 2013 at 12:48:43PM -0600, Anthony Liguori wrote:
> Gleb Natapov <gleb@redhat.com> writes:
> 
> > On Mon, Jan 28, 2013 at 10:10:06AM -0600, Anthony Liguori wrote:
> >> David Woodhouse <dwmw2@infradead.org> writes:
> >> 
> >> > On Sun, 2013-01-27 at 18:53 -0600, Anthony Liguori wrote:
> >> >> Are you just trying to persist a single blob of a fixed maximum size?
> >> >
> >> > That would suffice.
> >> >
> >> >> Why not just have a second flash device then?
> >> >
> >> > Mostly because flash devices don't actually *work* with KVM.
> >> 
> >> They absolutely do.  What doesn't work is executing ROM from flash if
> >> the ROM cannot be treated as read-only memory.
> >> 
> >> That's because all we get is a PF in the kernel when trying to execute
> >> from unmapped ROM.  There's no way to turn that into MMIO to userspace
> >> without switching to running fully in emulation mode.  The x86 emulator
> >> is pretty close to complete but work would be needed to fully complete
> >> it to make this work.
> >> 
> > The x86 emulator cannot fetch from MMIO memory today. And protected mode
> > emulation is not so complete. We rarely, if ever, need to emulate
> > protected mode instructions without memory operands.
> 
> Ack.
> 
> >
> >> We normally handle this by mapping the ROM memory read-only so it can be
> >> executed without PF'ing but since the BIOS area is subject to PAM, we
> >> can't use this trick for that particular ROM.
> > Up until kernel 3.7 there was no support for read-only memory slots. As
> > far as I know QEMU still maps ROM as regular RAM to KVM.
> 
> Ack.
> 
> >> 
> >> SeaBIOS has hack to just not use PAM to do BIOS shadowing when running
> >> under KVM/QEMU but presumably OVMF lacks this.
> > Not sure that such hack exists and why is it needed. BIOS area is always
> > writable in KVM. 
> 
> shadow.c:
>   static void
>   make_bios_writable_intel(u16 bdf, u32 pam0)
>   {
>       int reg = pci_config_readb(bdf, pam0);
>       if (!(reg & 0x10)) {
>           // QEMU doesn't fully implement the piix shadow capabilities -
>           // if ram isn't backing the bios segment when shadowing is
>           // disabled, the code itself wont be in memory.  So, run the
>           // code from the high-memory flash location.
> 
> Normally when shadowing, you set the PAM registers to send read requests
> to ROM and write requests to RAM.  You then read the code segment (that
> you're executing from) and write that out to RAM and switch to executing
> from there.
> 
> That's just not possible without treating ROMs as MMIO and sending all
> requests down to QEMU.
> 
Ack.

> >> 
> >> But in this case, you're using the flash device purely for read/write,
> >> not for execution, so there's no limitation at all.
> >> 
> > Yes, on newer kernel we can use read-only slots to emulate flash. They
> > behaves like ROMD: memory on read MMIO on write. On older kernels we can
> > use pure MMIO.
> 
> Ack.
> 
> Regards,
> 
> Anthony Liguori
> 
> >
> > --
> > 			Gleb.

--
			Gleb.

Patch

diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
index 3b31d77..1318a2e 100644
--- a/hw/fw_cfg.c
+++ b/hw/fw_cfg.c
@@ -33,6 +33,9 @@ 
 #define FW_CFG_SIZE 2
 #define FW_CFG_DATA_SIZE 1
 
+struct FWCfgEntry;
+typedef void (*FWCfgCallback)(struct FWCfgState *s, struct FWCfgEntry *e, int value);
+
 typedef struct FWCfgEntry {
     uint32_t len;
     uint8_t *data;
@@ -206,20 +209,19 @@  static void fw_cfg_write(FWCfgState *s, uint8_t value)
 
     trace_fw_cfg_write(s, value);
 
-    if (s->cur_entry & FW_CFG_WRITE_CHANNEL && e->callback &&
-        s->cur_offset < e->len) {
-        e->data[s->cur_offset++] = value;
-        if (s->cur_offset == e->len) {
-            e->callback(e->callback_opaque, e->data);
-            s->cur_offset = 0;
-        }
-    }
+    if (s->cur_entry & FW_CFG_WRITE_CHANNEL && e->callback)
+        e->callback(s, e, value);
 }
 
 static int fw_cfg_select(FWCfgState *s, uint16_t key)
 {
+    int arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL);
+    FWCfgEntry *e = &s->entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK];
     int ret;
 
+    if (e->callback)
+        e->callback(s, e, -1);
+
     s->cur_offset = 0;
     if ((key & FW_CFG_ENTRY_MASK) >= FW_CFG_MAX_ENTRY) {
         s->cur_entry = FW_CFG_INVALID;
@@ -419,8 +421,8 @@  void fw_cfg_add_i64(FWCfgState *s, uint16_t key, uint64_t value)
     fw_cfg_add_bytes(s, key, copy, sizeof(value));
 }
 
-void fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback,
-                         void *callback_opaque, void *data, size_t len)
+static void fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback,
+                                void *callback_opaque, void *data, size_t len)
 {
     int arch = !!(key & FW_CFG_ARCH_LOCAL);
 
@@ -436,8 +438,9 @@  void fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback,
     s->entries[arch][key].callback = callback;
 }
 
-void fw_cfg_add_file(FWCfgState *s,  const char *filename,
-                     void *data, size_t len)
+static void fw_cfg_add_file_writeable(FWCfgState *s,  const char *filename,
+                                      void *data, size_t len,
+				      FWCfgCallback callback, void *cb_opaque)
 {
     int i, index;
     size_t dsize;
@@ -451,7 +454,8 @@  void fw_cfg_add_file(FWCfgState *s,  const char *filename,
     index = be32_to_cpu(s->files->count);
     assert(index < FW_CFG_FILE_SLOTS);
 
-    fw_cfg_add_bytes(s, FW_CFG_FILE_FIRST + index, data, len);
+    fw_cfg_add_callback(s, FW_CFG_WRITE_CHANNEL + FW_CFG_FILE_FIRST + index,
+                        callback, cb_opaque, data, len);
 
     pstrcpy(s->files->f[index].name, sizeof(s->files->f[index].name),
             filename);
@@ -464,11 +468,74 @@  void fw_cfg_add_file(FWCfgState *s,  const char *filename,
 
     s->files->f[index].size   = cpu_to_be32(len);
     s->files->f[index].select = cpu_to_be16(FW_CFG_FILE_FIRST + index);
+    if (callback)
+	    s->files->f[index].select |= cpu_to_be16(FW_CFG_WRITE_CHANNEL);
     trace_fw_cfg_add_file(s, index, s->files->f[index].name, len);
 
     s->files->count = cpu_to_be32(index+1);
 }
 
+void fw_cfg_add_file(FWCfgState *s,  const char *filename,
+                     void *data, size_t len)
+{
+    fw_cfg_add_file_writeable(s, filename, data, len, NULL, NULL);
+}
+
+// Lifted from pc.c
+static long get_file_size(FILE *f)
+{
+    long where, size;
+
+    /* XXX: on Unix systems, using fstat() probably makes more sense */
+
+    where = ftell(f);
+    fseek(f, 0, SEEK_END);
+    size = ftell(f);
+    fseek(f, where, SEEK_SET);
+
+    return size;
+}
+
+static void nvstorage_callback(struct FWCfgState *s, struct FWCfgEntry *e, int value)
+
+{
+    if (value == -1) {
+        FILE *f = fopen(e->callback_opaque, "wb");
+        if (f) {
+            fwrite(e->data, e->len, 1, f);
+            fclose(f);
+        }
+	return;
+    }
+    if (s->cur_offset == e->len)
+        e->data = realloc(e->data, ++e->len);
+    e->data[s->cur_offset++] = value;
+}
+
+void fw_cfg_add_nvstorage(FWCfgState *s, const char *filename)
+{
+    FILE *f;
+    int file_size = 0;
+    int f2;
+    uint8_t *data = NULL;
+
+    f = fopen(filename, "rb");
+    if (f) {
+        file_size = get_file_size(f);
+	if (file_size) {
+            data = malloc(file_size);
+            if ((f2=fread(data, 1, file_size, f)) != file_size) {
+                fprintf(stderr, "qemu: Could not load non-volatile storage file '%s' %d %d: %s\n",
+                        filename, file_size, f2, strerror(errno));
+                exit(1);
+            }
+        }
+        fclose(f);
+    }
+    fw_cfg_add_file_writeable(s, "etc/nvstorage", data, file_size,
+                              nvstorage_callback, strdup(filename));
+}
+
 static void fw_cfg_machine_ready(struct Notifier *n, void *data)
 {
     size_t len;
@@ -507,6 +574,7 @@  FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
     fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
     fw_cfg_bootsplash(s);
     fw_cfg_reboot(s);
+    fw_cfg_add_nvstorage(s, "/tmp/nvstorage");
 
     s->machine_ready.notify = fw_cfg_machine_ready;
     qemu_add_machine_init_done_notifier(&s->machine_ready);
diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h
index 05c8df1..298bc47 100644
--- a/hw/fw_cfg.h
+++ b/hw/fw_cfg.h
@@ -51,20 +51,17 @@  typedef struct FWCfgFiles {
     FWCfgFile f[];
 } FWCfgFiles;
 
-typedef void (*FWCfgCallback)(void *opaque, uint8_t *data);
-
 typedef struct FWCfgState FWCfgState;
 void fw_cfg_add_bytes(FWCfgState *s, uint16_t key, void *data, size_t len);
 void fw_cfg_add_string(FWCfgState *s, uint16_t key, const char *value);
 void fw_cfg_add_i16(FWCfgState *s, uint16_t key, uint16_t value);
 void fw_cfg_add_i32(FWCfgState *s, uint16_t key, uint32_t value);
 void fw_cfg_add_i64(FWCfgState *s, uint16_t key, uint64_t value);
-void fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback,
-                         void *callback_opaque, void *data, size_t len);
 void fw_cfg_add_file(FWCfgState *s, const char *filename, void *data,
                      size_t len);
 FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
                         hwaddr crl_addr, hwaddr data_addr);
+void fw_cfg_add_nvstorage(FWCfgState *s, const char *filename);
 
 #endif /* NO_QEMU_PROTOS */