Patchwork [RFC] Distinguish between reset types

login
register
mail settings
Submitter David Woodhouse
Date Feb. 19, 2013, 2:40 p.m.
Message ID <1361284822.13482.209.camel@i7.infradead.org>
Download mbox | patch
Permalink /patch/221711/
State New
Headers show

Comments

David Woodhouse - Feb. 19, 2013, 2:40 p.m.
On Tue, 2013-02-19 at 13:31 +0100, Paolo Bonzini wrote:
> So you can make the hard reset line a qemu_irq (output in PIIX, input in
> i440FX) using qdev_init_gpio_in/out.  The input side in the i440FX then
> can reset the PAM registers while the output side can pulse it before
> calling qemu_system_reset_request() if RCR bit 1 is set.  Then you can
> connect it using qdev_connect_gpio_out/qdev_get_gpio_in in
> i440fx_common_init.

Like this? I've still left i440fx_reset() hooked up as dc->reset, and
conditionally do the full reset based on a flag in the state. If I
actually *do* the reset directly from i440fx_handle_reset() rather than
just setting the flag there, it might happen too early?
Paolo Bonzini - Feb. 19, 2013, 2:57 p.m.
Il 19/02/2013 15:40, David Woodhouse ha scritto:
> On Tue, 2013-02-19 at 13:31 +0100, Paolo Bonzini wrote:
>> So you can make the hard reset line a qemu_irq (output in PIIX, input in
>> i440FX) using qdev_init_gpio_in/out.  The input side in the i440FX then
>> can reset the PAM registers while the output side can pulse it before
>> calling qemu_system_reset_request() if RCR bit 1 is set.  Then you can
>> connect it using qdev_connect_gpio_out/qdev_get_gpio_in in
>> i440fx_common_init.
> 
> Like this?

Exactly.

> I've still left i440fx_reset() hooked up as dc->reset, and
> conditionally do the full reset based on a flag in the state. If I
> actually *do* the reset directly from i440fx_handle_reset() rather than
> just setting the flag there, it might happen too early?

I think it'd be okay, but without actually tracing what happens I agree
that it's safer this way.

Paolo

> diff --git a/hw/piix_pci.c b/hw/piix_pci.c
> index 6c77e49..0c8f613 100644
> --- a/hw/piix_pci.c
> +++ b/hw/piix_pci.c
> @@ -71,6 +71,7 @@ typedef struct PIIX3State {
>      uint64_t pic_levels;
>  
>      qemu_irq *pic;
> +    qemu_irq reset_out;
>  
>      /* This member isn't used. Just for save/load compatibility */
>      int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS];
> @@ -92,6 +93,7 @@ struct PCII440FXState {
>      PAMMemoryRegion pam_regions[13];
>      MemoryRegion smram_region;
>      uint8_t smm_enabled;
> +    uint8_t hard_reset;
>  };
>  
>  
> @@ -171,6 +173,42 @@ static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
>      return 0;
>  }
>  
> +static void i440fx_reset(DeviceState *ds)
> +{
> +    PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, ds);
> +    PCII440FXState *d = DO_UPCAST(PCII440FXState, dev, dev);
> +    uint8_t *pci_conf = d->dev.config;
> +
> +    /* We only reset the PAM configuration if it was a *hard* reset,
> +     * triggered from the RCR on the PIIX3 (or from the TRCR on the
> +     * i440FX itself, but we don't implement that yet and software
> +     * is advised not to use it when there's a PIIX). */
> +    if (!d->hard_reset)
> +        return;
> +
> +    d->hard_reset = 0;
> +
> +    pci_conf[0x59] = 0x00; // Reset PAM setup
> +    pci_conf[0x5a] = 0x00;
> +    pci_conf[0x5b] = 0x00;
> +    pci_conf[0x5c] = 0x00;
> +    pci_conf[0x5d] = 0x00;
> +    pci_conf[0x5e] = 0x00;
> +    pci_conf[0x5f] = 0x00;
> +    pci_conf[0x72] = 0x02; // And SMM
> +
> +    i440fx_update_memory_mappings(d);
> +}
> +
> +static void i440fx_handle_reset(void *opaque, int n, int level)
> +{
> +    PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, opaque);
> +    PCII440FXState *d = DO_UPCAST(PCII440FXState, dev, dev);
> +
> +    if (level)
> +        d->hard_reset = 1;
> +}
> +
>  static int i440fx_post_load(void *opaque, int version_id)
>  {
>      PCII440FXState *d = opaque;
> @@ -216,6 +254,8 @@ static int i440fx_initfn(PCIDevice *dev)
>  
>      d->dev.config[I440FX_SMRAM] = 0x02;
>  
> +    qdev_init_gpio_in(&d->dev.qdev, i440fx_handle_reset, 1);
> +
>      cpu_smm_register(&i440fx_set_smm, d);
>      return 0;
>  }
> @@ -297,6 +337,8 @@ static PCIBus *i440fx_common_init(const char *device_name,
>          pci_bus_set_route_irq_fn(b, piix3_route_intx_pin_to_irq);
>      }
>      piix3->pic = pic;
> +    qdev_connect_gpio_out(&piix3->dev.qdev, 0,
> +                          qdev_get_gpio_in(&f->dev.qdev, 0));
>      *isa_bus = DO_UPCAST(ISABus, qbus,
>                           qdev_get_child_bus(&piix3->dev.qdev, "isa.0"));
>  
> @@ -521,6 +563,8 @@ static void rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len)
>      PIIX3State *d = opaque;
>  
>      if (val & 4) {
> +        if (val & 2)
> +            qemu_irq_pulse(d->reset_out);
>          qemu_system_reset_request();
>          return;
>      }
> @@ -550,6 +594,7 @@ static int piix3_initfn(PCIDevice *dev)
>      memory_region_add_subregion_overlap(pci_address_space_io(dev), RCR_IOPORT,
>                                          &d->rcr_mem, 1);
>  
> +    qdev_init_gpio_out(&dev->qdev, &d->reset_out, 1);
>      qemu_register_reset(piix3_reset, d);
>      return 0;
>  }
> @@ -615,6 +660,7 @@ static void i440fx_class_init(ObjectClass *klass, void *data)
>      dc->desc = "Host bridge";
>      dc->no_user = 1;
>      dc->vmsd = &vmstate_i440fx;
> +    dc->reset = i440fx_reset;
>  }
>  
>  static const TypeInfo i440fx_info = {
> 
> 
> 
>
Laszlo Ersek - Feb. 19, 2013, 5:51 p.m.
On 02/19/13 15:40, David Woodhouse wrote:
> On Tue, 2013-02-19 at 13:31 +0100, Paolo Bonzini wrote:
>> So you can make the hard reset line a qemu_irq (output in PIIX, input in
>> i440FX) using qdev_init_gpio_in/out.  The input side in the i440FX then
>> can reset the PAM registers while the output side can pulse it before
>> calling qemu_system_reset_request() if RCR bit 1 is set.  Then you can
>> connect it using qdev_connect_gpio_out/qdev_get_gpio_in in
>> i440fx_common_init.
> 
> Like this? I've still left i440fx_reset() hooked up as dc->reset, and
> conditionally do the full reset based on a flag in the state. If I
> actually *do* the reset directly from i440fx_handle_reset() rather than
> just setting the flag there, it might happen too early?
> 

> @@ -521,6 +563,8 @@ static void rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len)
>      PIIX3State *d = opaque;
>  
>      if (val & 4) {
> +        if (val & 2)
> +            qemu_irq_pulse(d->reset_out);
>          qemu_system_reset_request();
>          return;
>      }

We can't start to savevm between qemu_irq_pulse() and
qemu_system_reset_request(); OK.

I wonder though if we can enter do_savevm() after rcr_write() returns,
but before i440fx_reset() -- the registered reset handler -- is called.

Consider a monitor sitting on stdio. First we create the chardev:

  main() [vl.c]
    chardev_init_func()
      qemu_chr_new_from_opts() [qemu-char.c]
        qemu_chr_open_stdio() via funcptr
          qemu_chr_open_fd()
            registers "fd_chr_update_read_handler"

Then we hook it to the monitor:

  main() [vl.c]
    mon_init_func()
      qemu_chr_find() [qemu-char.c]
      monitor_init() [monitor.c]
        qemu_chr_add_handlers() [qemu-char.c]
          fd_chr_update_read_handler() via earlier registration
            qemu_set_fd_handler2() [iohandler.c]
              append to global list "io_handlers"

Then,

CPU thread:

  qemu_kvm_cpu_thread_fn() [cpus.c]
    kvm_cpu_exec() [kvm-all.c]
      qemu_mutex_lock_iothread()
      kvm_handle_io()
        ... rcr_write()
        ... loops back
      qemu_mutex_unlock_iothread()                        UNLOCK

IO thread (with kvm_enabled()==true):

  main_loop() [vl.c]
    main_loop_wait(nonblocking = false) [main-loop.c]
      os_host_main_loop_wait(timeout > 0)
        qemu_mutex_unlock_iothread()
        select()
        qemu_mutex_lock_iothread()                        LOCK
        glib_select_poll()
      qemu_iohandler_poll() [iohandler.c]
        scans global "io_handlers" list, monitor fd ready
        monitor_read() / monitor_control_read()
          handle_user_command() / handle_qmp_command()
            do_savevm() [savevm.c] via funcptr

Apparently, in theory at least, the guest can trap to rcr_write() and
set "PCII440FXState.hard_reset", then immediately qemu can save a VM
snapshot. If we restore this snapshot, instead of the hard reset we'll
do a soft reset.

Consider adding "vmstate_i440fx" to "vmstate_i440fx", perhaps? :)

Laszlo
Laszlo Ersek - Feb. 19, 2013, 5:53 p.m.
On 02/19/13 18:51, Laszlo Ersek wrote:

> Apparently, in theory at least, the guest can trap to rcr_write() and
> set "PCII440FXState.hard_reset", then immediately qemu can save a VM
> snapshot. If we restore this snapshot, instead of the hard reset we'll
> do a soft reset.
> 
> Consider adding "vmstate_i440fx" to "vmstate_i440fx", perhaps? :)

Aargh. "adding hard_reset to vmstate_i440fx" of course.
L.
Anthony Liguori - Feb. 19, 2013, 8:29 p.m.
David Woodhouse <dwmw2@infradead.org> writes:

> On Tue, 2013-02-19 at 13:31 +0100, Paolo Bonzini wrote:
>> So you can make the hard reset line a qemu_irq (output in PIIX, input in
>> i440FX) using qdev_init_gpio_in/out.  The input side in the i440FX then
>> can reset the PAM registers while the output side can pulse it before
>> calling qemu_system_reset_request() if RCR bit 1 is set.  Then you can
>> connect it using qdev_connect_gpio_out/qdev_get_gpio_in in
>> i440fx_common_init.
>
> Like this? I've still left i440fx_reset() hooked up as dc->reset, and
> conditionally do the full reset based on a flag in the state. If I
> actually *do* the reset directly from i440fx_handle_reset() rather than
> just setting the flag there, it might happen too early?
>
> diff --git a/hw/piix_pci.c b/hw/piix_pci.c
> index 6c77e49..0c8f613 100644
> --- a/hw/piix_pci.c
> +++ b/hw/piix_pci.c
> @@ -71,6 +71,7 @@ typedef struct PIIX3State {
>      uint64_t pic_levels;
>  
>      qemu_irq *pic;
> +    qemu_irq reset_out;
>  
>      /* This member isn't used. Just for save/load compatibility */
>      int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS];
> @@ -92,6 +93,7 @@ struct PCII440FXState {
>      PAMMemoryRegion pam_regions[13];
>      MemoryRegion smram_region;
>      uint8_t smm_enabled;
> +    uint8_t hard_reset;
>  };
>  
>  
> @@ -171,6 +173,42 @@ static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
>      return 0;
>  }
>  
> +static void i440fx_reset(DeviceState *ds)
> +{
> +    PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, ds);
> +    PCII440FXState *d = DO_UPCAST(PCII440FXState, dev, dev);
> +    uint8_t *pci_conf = d->dev.config;
> +
> +    /* We only reset the PAM configuration if it was a *hard* reset,
> +     * triggered from the RCR on the PIIX3 (or from the TRCR on the
> +     * i440FX itself, but we don't implement that yet and software
> +     * is advised not to use it when there's a PIIX). */
> +    if (!d->hard_reset)
> +        return;
> +
> +    d->hard_reset = 0;
> +
> +    pci_conf[0x59] = 0x00; // Reset PAM setup
> +    pci_conf[0x5a] = 0x00;
> +    pci_conf[0x5b] = 0x00;
> +    pci_conf[0x5c] = 0x00;
> +    pci_conf[0x5d] = 0x00;
> +    pci_conf[0x5e] = 0x00;
> +    pci_conf[0x5f] = 0x00;
> +    pci_conf[0x72] = 0x02; // And SMM
> +
> +    i440fx_update_memory_mappings(d);
> +}
> +
> +static void i440fx_handle_reset(void *opaque, int n, int level)
> +{
> +    PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, opaque);
> +    PCII440FXState *d = DO_UPCAST(PCII440FXState, dev, dev);
> +
> +    if (level)
> +        d->hard_reset = 1;
> +}
> +
>  static int i440fx_post_load(void *opaque, int version_id)
>  {
>      PCII440FXState *d = opaque;
> @@ -216,6 +254,8 @@ static int i440fx_initfn(PCIDevice *dev)
>  
>      d->dev.config[I440FX_SMRAM] = 0x02;
>  
> +    qdev_init_gpio_in(&d->dev.qdev, i440fx_handle_reset, 1);
> +
>      cpu_smm_register(&i440fx_set_smm, d);
>      return 0;
>  }
> @@ -297,6 +337,8 @@ static PCIBus *i440fx_common_init(const char *device_name,
>          pci_bus_set_route_irq_fn(b, piix3_route_intx_pin_to_irq);
>      }
>      piix3->pic = pic;
> +    qdev_connect_gpio_out(&piix3->dev.qdev, 0,
> +                          qdev_get_gpio_in(&f->dev.qdev, 0));
>      *isa_bus = DO_UPCAST(ISABus, qbus,
>                           qdev_get_child_bus(&piix3->dev.qdev, "isa.0"));
>  
> @@ -521,6 +563,8 @@ static void rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len)
>      PIIX3State *d = opaque;
>  
>      if (val & 4) {
> +        if (val & 2)
> +            qemu_irq_pulse(d->reset_out);
>          qemu_system_reset_request();


This is a bit strange to me.  From the spec:

"Bits 1 and 2 in this register are used by PIIX4 to generate a hard reset
 or a soft reset. During a hard reset, PIIX4 asserts CPURST, PCIRST#, and
 RSTDRV, as well as reset its core and suspend well logic. During a soft
 reset, PIIX4 asserts INIT."

Bit
2   Reset CPU (RCPU). This bit is used to initiate (transitions from 0
    to 1) a hard reset (bit 1 in this register is set to 1) or a soft
    reset to the CPU. PIIX4 will also initiate a hard reset when PWROK
    is asserted. This bit cannot be read as a 1.

1   System Reset (SRST). This bit is used to select the type of reset
    generated when bit 2 in this register is set to 1. When SRST=1,
    PIIX4 initiates a hard reset to the CPU when bit 2 in this register
    transitions from 0 to 1. When SRST=0, PIIX4 initiates a soft reset
    when bit 2 in this register transitions from 0 to 1.

Now this maps to two signals: INIT# and CPURST#.  PCIRST# is strictly
about resetting the PCI bus.  PCIRST# is only asserted during hard
reset.

So should we even be resetting anything other than the CPU during soft
reset?

In the very least, shouldn't we expose qemu_irqs from the PIIX and let
the i440fx decide what to do with them?  In this case, it would be an
INIT# and CPURST# qemu_irq corresponding to soft and hard reset
respectively.

Regards,

Anthony Liguori


>          return;
>      }
> @@ -550,6 +594,7 @@ static int piix3_initfn(PCIDevice *dev)
>      memory_region_add_subregion_overlap(pci_address_space_io(dev), RCR_IOPORT,
>                                          &d->rcr_mem, 1);
>  
> +    qdev_init_gpio_out(&dev->qdev, &d->reset_out, 1);
>      qemu_register_reset(piix3_reset, d);
>      return 0;
>  }
> @@ -615,6 +660,7 @@ static void i440fx_class_init(ObjectClass *klass, void *data)
>      dc->desc = "Host bridge";
>      dc->no_user = 1;
>      dc->vmsd = &vmstate_i440fx;
> +    dc->reset = i440fx_reset;
>  }
>  
>  static const TypeInfo i440fx_info = {
>
>
>
>
> -- 
> dwmw2
Paolo Bonzini - Feb. 19, 2013, 9:06 p.m.
Il 19/02/2013 21:29, Anthony Liguori ha scritto:
> In the very least, shouldn't we expose qemu_irqs from the PIIX and let
> the i440fx decide what to do with them?  In this case, it would be an
> INIT# and CPURST# qemu_irq corresponding to soft and hard reset
> respectively.

Yes, see the other patch that David posted.  It only exposes PCIRST# but
it's a start.

Paolo
David Woodhouse - Feb. 19, 2013, 9:58 p.m.
On Tue, 2013-02-19 at 14:29 -0600, Anthony Liguori wrote:
> David Woodhouse <dwmw2@infradead.org> writes:
> >      if (val & 4) {
> > +        if (val & 2)
> > +            qemu_irq_pulse(d->reset_out);
> >          qemu_system_reset_request();
> 
> 
> This is a bit strange to me. 

The reset_out "IRQ" isn't actually what triggers the I440FX/PAM reset.
That just sets a flag to say that the coming *system* reset is a hard
reset and not a soft reset. I did comment about the possibility of doing
the reset directly from the qemu_irq handler, and why I hadn't done it
that way...

> So should we even be resetting anything other than the CPU during soft
> reset?

I suspect not. A soft reset triggered by the RCR, keyboard controller,
port 92 etc. should all just reset the CPU and nothing else.

> In the very least, shouldn't we expose qemu_irqs from the PIIX and let
> the i440fx decide what to do with them?  In this case, it would be an
> INIT# and CPURST# qemu_irq corresponding to soft and hard reset
> respectively.

How far down this road do we go? Do we end up wiring up the full reset
topology and abandoning the special-case qemu_system_reset() altogether?
Anthony Liguori - Feb. 19, 2013, 10:17 p.m.
David Woodhouse <dwmw2@infradead.org> writes:

> On Tue, 2013-02-19 at 14:29 -0600, Anthony Liguori wrote:
>> David Woodhouse <dwmw2@infradead.org> writes:
>> >      if (val & 4) {
>> > +        if (val & 2)
>> > +            qemu_irq_pulse(d->reset_out);
>> >          qemu_system_reset_request();
>> 
>> 
>> This is a bit strange to me. 
>
> The reset_out "IRQ" isn't actually what triggers the I440FX/PAM reset.

Right, this is what's strange to me.  There's no hardware analog AFAICT
so I'm not sure why we're exposing it as a qemu_irq other than we want
to jump through a function pointer invocation instead of making a
straight funciton call :-)

> That just sets a flag to say that the coming *system* reset is a hard
> reset and not a soft reset.

Yes, but this flag is in PIIX, not the i440fx.

> I did comment about the possibility of doing
> the reset directly from the qemu_irq handler, and why I hadn't done it
> that way...
>
>> So should we even be resetting anything other than the CPU during soft
>> reset?
>
> I suspect not. A soft reset triggered by the RCR, keyboard controller,
> port 92 etc. should all just reset the CPU and nothing else.

I suspect what we need to do is convert qemu_system_reset_request() into
a qemu_system_cpu_reset() that takes a callback.  Once the VCPUs have
been reset, the callback can then be used to reset all or some of the
device model.  This of course means removing the reset handlers in the
CPUs as they exist today.

Cc'ing Andreas to get his thoughts.

FWIW, I'm not expecting you to do this to fix this issue.  Just thinking
out loud here really.

>> In the very least, shouldn't we expose qemu_irqs from the PIIX and let
>> the i440fx decide what to do with them?  In this case, it would be an
>> INIT# and CPURST# qemu_irq corresponding to soft and hard reset
>> respectively.
>
> How far down this road do we go? Do we end up wiring up the full reset
> topology and abandoning the special-case qemu_system_reset()
> altogether?

Long term, yes.  Short term, whatever we need that's reasonable to get
the CSM happy without making things worse.

I'm not terribly happy exposing an IRQ that doesn't exist in real life
to "model hardware".  We could just as easily call into i440fx to set
the hard_reset flag without jumping through qemu_irq hoops if we're just
looking to make it work.  I think that's clearer if what we're doing is
essentially a short term hack.

If we were going to model an INIT# and CPURST# qemu_irq and raise them
based on what the hardware does, I'm happy with that.  But AFAICT
'reset_out' has no hardware analogy.

Regards,

Anthony Liguori

>
> -- 
> dwmw2
David Woodhouse - Feb. 19, 2013, 10:45 p.m.
On Tue, 2013-02-19 at 16:17 -0600, Anthony Liguori wrote:
> Right, this is what's strange to me.  There's no hardware analog AFAICT
> so I'm not sure why we're exposing it as a qemu_irq other than we want
> to jump through a function pointer invocation instead of making a
> straight funciton call :-)

Hey, don't ask me. I was just trying to do what Paolo said in response
to me first attempt — which just accessed the 'hard reset' flag in the
PIIX directly from the I440FX reset handler.

> > That just sets a flag to say that the coming *system* reset is a hard
> > reset and not a soft reset.
> 
> Yes, but this flag is in PIIX, not the i440fx.

Yes. The Reset Control Register is in the PIIX. And the i440fx just
happens to be the only other device that *cares* if it's a hard reset or
a soft reset. For now. Thankfully they're implemented in the same C file
and even initialised together, which lets us do a special-case hack
relatively easily.

> I suspect what we need to do is convert qemu_system_reset_request() into
> a qemu_system_cpu_reset() that takes a callback.  Once the VCPUs have
> been reset, the callback can then be used to reset all or some of the
> device model.  This of course means removing the reset handlers in the
> CPUs as they exist today.
> 
> Cc'ing Andreas to get his thoughts.
> 
> FWIW, I'm not expecting you to do this to fix this issue.  Just thinking
> out loud here really.

Sounds good to me. I'm beginning to wish I'd just ignored the fact that
we need a properly working "soft" reset to get back from 286 protected
mode to real mode, and wired up the damn PAM reset unconditionally. I'm
not convinced that the protected->real mode transition will work for
anyone anyway.

> I'm not terribly happy exposing an IRQ that doesn't exist in real life
> to "model hardware".  We could just as easily call into i440fx to set
> the hard_reset flag without jumping through qemu_irq hoops if we're just
> looking to make it work.  I think that's clearer if what we're doing is
> essentially a short term hack.

That was basically my first attempt, before Paulo's feedback? I had
i440fx calling into PIIX to *read* the flag, rather than PIIX calling
into i440fx to *set* it, but if you feel strongly it wouldn't be hard to
switch that round.
Paolo Bonzini - Feb. 19, 2013, 10:47 p.m.
Il 19/02/2013 23:17, Anthony Liguori ha scritto:
>>> >> >      if (val & 4) {
>>> >> > +        if (val & 2)
>>> >> > +            qemu_irq_pulse(d->reset_out);
>>> >> >          qemu_system_reset_request();
>> >> 
>> >> 
>> >> This is a bit strange to me. 
> >
> > The reset_out "IRQ" isn't actually what triggers the I440FX/PAM reset.
> 
> Right, this is what's strange to me.  There's no hardware analog AFAICT
> so I'm not sure why we're exposing it as a qemu_irq other than we want
> to jump through a function pointer invocation instead of making a
> straight funciton call :-)

True, OTOH I agreed with David's explanation that the hard reset could
happen too early.

IOW, doing the irq this way is a consequence of having
qemu_system_reset_request() instead of qemu_system_reset().

Paolo
Peter Maydell - Feb. 19, 2013, 10:50 p.m.
On 19 February 2013 22:17, Anthony Liguori <anthony@codemonkey.ws> wrote:
> David Woodhouse <dwmw2@infradead.org> writes:
>> On Tue, 2013-02-19 at 14:29 -0600, Anthony Liguori wrote:
>>> So should we even be resetting anything other than the CPU during soft
>>> reset?
>>
>> I suspect not. A soft reset triggered by the RCR, keyboard controller,
>> port 92 etc. should all just reset the CPU and nothing else.
>
> I suspect what we need to do is convert qemu_system_reset_request() into
> a qemu_system_cpu_reset() that takes a callback.  Once the VCPUs have
> been reset, the callback can then be used to reset all or some of the
> device model.

If we're just solving a PC problem here and it really is just
"only reset the CPU, nothing else", why don't we give the
x86 CPU a qemu_irq input for "reset this CPU core" and wire it
up to the relevant bit of hardware on the PC board? I don't
see the need for a specific 'qemu_system_cpu_reset()' here
(and not having one avoids the swamp of trying to define its
semantics...)

>> How far down this road do we go? Do we end up wiring up the full reset
>> topology and abandoning the special-case qemu_system_reset()
>> altogether?
>
> Long term, yes.  Short term, whatever we need that's reasonable to get
> the CSM happy without making things worse.

I definitely think we should be modelling reset lines, yes.
It would be nice if we could sketch a path for how we get from
here to there. Here's a strawman proposal that's probably full
of holes:

(1) we retain the existing 'reset' Device method as meaning "full
power-cycle style reset" and qemu_system_reset_request() as
meaning "power cycle entire machine". (Eventually the latter
might go away as I doubt much real hardware has a "power
cycle the world" wiring.)

(2) we recommend that for new devices etc, where the device has
one or more physical reset pins those should be modeled as
qdev_gpio input lines, with the behaviour the hardware has
when those are asserted. [Q: what do we do about logic-low-is-assert
vs logic-high-is-assert hardware?] This reset can obviously share
code with the DeviceState::reset in many cases, but it's
conceptually separate.

(3) when we need to implement a particular effect on a particular
board (as here with the PC) we do that by:
 a. making sure all affected devices implement reset
 b. wiring up reset on the board model
 c. having the implementation of the 'reset' register or whatever
    assert the irq line

(4) as and when we have time, convert existing code (ho ho)

This obviously works best when the "not actually a full power
cycle" reset you want in (3) is a very limited focus one,
like "just reset the CPU"...

It also exposes some "not there yet" features like the fact
we can't have named gpio input lines so you have to have a
numbering convention for smooshing all your inputs into a
single array. Pins, anybody? :-)

-- PMM
Peter Crosthwaite - Feb. 20, 2013, 12:21 p.m.
Hi Peter,

On Wed, Feb 20, 2013 at 8:50 AM, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 19 February 2013 22:17, Anthony Liguori <anthony@codemonkey.ws> wrote:
>> David Woodhouse <dwmw2@infradead.org> writes:
>>> On Tue, 2013-02-19 at 14:29 -0600, Anthony Liguori wrote:
>>>> So should we even be resetting anything other than the CPU during soft
>>>> reset?
>>>
>>> I suspect not. A soft reset triggered by the RCR, keyboard controller,
>>> port 92 etc. should all just reset the CPU and nothing else.
>>
>> I suspect what we need to do is convert qemu_system_reset_request() into
>> a qemu_system_cpu_reset() that takes a callback.  Once the VCPUs have
>> been reset, the callback can then be used to reset all or some of the
>> device model.
>
> If we're just solving a PC problem here and it really is just
> "only reset the CPU, nothing else", why don't we give the
> x86 CPU a qemu_irq input for "reset this CPU core" and wire it
> up to the relevant bit of hardware on the PC board? I don't
> see the need for a specific 'qemu_system_cpu_reset()' here
> (and not having one avoids the swamp of trying to define its
> semantics...)
>

Could we be more general and implement this on the TYPE_DEVICE level
(rather than X86_CPU)? I want this GPIO-as-reset feature for all Zynq
devices cpus and preihperals alike. The Zynq power controller has
software controllable individual reset for every device in the system
and my plan-A was to do it as GPIOs. To implement the reset gpio-ins
however I was thinking do it in one swift stroke by adding the single
GPIO on the TYPE_DEVICE layer that backs onto DeviceClass-.>reset.
With recent QOM efforts (making CPUs DEVICEs) this catchall will also
implment the feature for all CPUs. Power controllers define gpio_outs
and then machine model just play connect the dots.

RFC! I was planning at some stage to formally RFC this but yet to get
around to it. I bring it up because the topic is hot here.

>>> How far down this road do we go? Do we end up wiring up the full reset
>>> topology and abandoning the special-case qemu_system_reset()
>>> altogether?
>>
>> Long term, yes.  Short term, whatever we need that's reasonable to get
>> the CSM happy without making things worse.
>
> I definitely think we should be modelling reset lines, yes.
> It would be nice if we could sketch a path for how we get from
> here to there. Here's a strawman proposal that's probably full
> of holes:
>
> (1) we retain the existing 'reset' Device method as meaning "full
> power-cycle style reset" and qemu_system_reset_request() as
> meaning "power cycle entire machine". (Eventually the latter
> might go away as I doubt much real hardware has a "power
> cycle the world" wiring.)
>
> (2) we recommend that for new devices etc, where the device has
> one or more physical reset pins those should be modeled as
> qdev_gpio input lines, with the behaviour the hardware has
> when those are asserted. [Q: what do we do about logic-low-is-assert
> vs logic-high-is-assert hardware?] This reset can obviously share
> code with the DeviceState::reset in many cases, but it's
> conceptually separate.
>
> (3) when we need to implement a particular effect on a particular
> board (as here with the PC) we do that by:
>  a. making sure all affected devices implement reset
>  b. wiring up reset on the board model
>  c. having the implementation of the 'reset' register or whatever
>     assert the irq line
>
> (4) as and when we have time, convert existing code (ho ho)
>
The TYPE_DEVICE level implementation would give a reset pin to every
device that implements DeviceClass->reset which should minimise the
pain here. The hard part is devices that dont implement reset at all
which are a lost cause WRT this discussion.

> This obviously works best when the "not actually a full power
> cycle" reset you want in (3) is a very limited focus one,
> like "just reset the CPU"...
>
> It also exposes some "not there yet" features like the fact
> we can't have named gpio input lines so you have to have a
> numbering convention for smooshing all your inputs into a
> single array. Pins, anybody? :-)
>

Yes my idea requires this so would have to bite the bullet and get
this one through.

Regards,
Peter

> -- PMM
>
Peter Maydell - Feb. 20, 2013, 12:29 p.m.
On 20 February 2013 12:21, Peter Crosthwaite
<peter.crosthwaite@xilinx.com> wrote:
> On Wed, Feb 20, 2013 at 8:50 AM, Peter Maydell <peter.maydell@linaro.org> wrote:
>> If we're just solving a PC problem here and it really is just
>> "only reset the CPU, nothing else", why don't we give the
>> x86 CPU a qemu_irq input for "reset this CPU core" and wire it
>> up to the relevant bit of hardware on the PC board? I don't
>> see the need for a specific 'qemu_system_cpu_reset()' here
>> (and not having one avoids the swamp of trying to define its
>> semantics...)

> Could we be more general and implement this on the TYPE_DEVICE level
> (rather than X86_CPU)? I want this GPIO-as-reset feature for all Zynq
> devices cpus and preihperals alike. The Zynq power controller has
> software controllable individual reset for every device in the system
> and my plan-A was to do it as GPIOs. To implement the reset gpio-ins
> however I was thinking do it in one swift stroke by adding the single
> GPIO on the TYPE_DEVICE layer that backs onto DeviceClass-.>reset.

The trouble is that:
 * some devices have no reset GPIO line
 * some devices have more than one (eg a Cortex-A9MPx4 has 18 different
   reset input lines)
 * the reset line doesn't always match up with the DeviceClass::reset
   semantics

I guess maybe if there was a way to say 'this device suppresses
the default reset input implementation'.

(Plus as you note we'd have to actually support named GPIO inputs
to have the base class provide an input pin that didn't get
tangled up with the device's own inputs.)

-- PMM
Peter Crosthwaite - Feb. 20, 2013, 12:47 p.m.
On Wed, Feb 20, 2013 at 10:29 PM, Peter Maydell
<peter.maydell@linaro.org> wrote:
> On 20 February 2013 12:21, Peter Crosthwaite
> <peter.crosthwaite@xilinx.com> wrote:
>> On Wed, Feb 20, 2013 at 8:50 AM, Peter Maydell <peter.maydell@linaro.org> wrote:
>>> If we're just solving a PC problem here and it really is just
>>> "only reset the CPU, nothing else", why don't we give the
>>> x86 CPU a qemu_irq input for "reset this CPU core" and wire it
>>> up to the relevant bit of hardware on the PC board? I don't
>>> see the need for a specific 'qemu_system_cpu_reset()' here
>>> (and not having one avoids the swamp of trying to define its
>>> semantics...)
>
>> Could we be more general and implement this on the TYPE_DEVICE level
>> (rather than X86_CPU)? I want this GPIO-as-reset feature for all Zynq
>> devices cpus and preihperals alike. The Zynq power controller has
>> software controllable individual reset for every device in the system
>> and my plan-A was to do it as GPIOs. To implement the reset gpio-ins
>> however I was thinking do it in one swift stroke by adding the single
>> GPIO on the TYPE_DEVICE layer that backs onto DeviceClass-.>reset.
>
> The trouble is that:
>  * some devices have no reset GPIO line

Is there any harm in just not connecting the default reset GPIO? Or if
you are pedantic allow the class definition to opt-out and inhibit
generation of the GPIO.

>  * some devices have more than one (eg a Cortex-A9MPx4 has 18 different
>    reset input lines)

Yeh you are lost in this case. But my intended semantics for the
TYPE_DEVICE reset GPIO is it is a power on reset (PoR) with equivalent
function to DeviceClass->reset

>  * the reset line doesn't always match up with the DeviceClass::reset
>    semantics
>

Then its not a PoR equivalent (and thus from QEMUs perspective not a
"reset" at all). Its a device specific GPIO. The same applies to 18
lines of A9MPx4, althought that is a container object so im guessing
some of those 18 resets will pass through as PoR equivalents to the
subdevices?

So working on that case, suppose GIC (a subcomponent of Cortex-A9MPx4)
has a PoR equivalent wired directly as one of the 18 resets. The
container will have to explicitly define all 18 resets, however, it
can pass GICs through to the TYPE_DEVICE reset for the GIC instance,
saving on having to hack up GIC to explicitly have a reset GPIO.

It just strikes me as a workable solution for the 90% case then we can
go you full custom GPIO solution for the harder ones.

Regards,
Peter


> I guess maybe if there was a way to say 'this device suppresses
> the default reset input implementation'.
>
> (Plus as you note we'd have to actually support named GPIO inputs
> to have the base class provide an input pin that didn't get
> tangled up with the device's own inputs.)
>
> -- PMM
>
Peter Maydell - Feb. 20, 2013, 1:01 p.m.
On 20 February 2013 12:47, Peter Crosthwaite
<peter.crosthwaite@xilinx.com> wrote:
> On Wed, Feb 20, 2013 at 10:29 PM, Peter Maydell
> <peter.maydell@linaro.org> wrote:
>> On 20 February 2013 12:21, Peter Crosthwaite
>> <peter.crosthwaite@xilinx.com> wrote:
>>> Could we be more general and implement this on the TYPE_DEVICE level
>>> (rather than X86_CPU)? I want this GPIO-as-reset feature for all Zynq
>>> devices cpus and preihperals alike. The Zynq power controller has
>>> software controllable individual reset for every device in the system
>>> and my plan-A was to do it as GPIOs. To implement the reset gpio-ins
>>> however I was thinking do it in one swift stroke by adding the single
>>> GPIO on the TYPE_DEVICE layer that backs onto DeviceClass-.>reset.
>>
>> The trouble is that:
>>  * some devices have no reset GPIO line
>
> Is there any harm in just not connecting the default reset GPIO? Or if
> you are pedantic allow the class definition to opt-out and inhibit
> generation of the GPIO.
>
>>  * some devices have more than one (eg a Cortex-A9MPx4 has 18 different
>>    reset input lines)
>
> Yeh you are lost in this case. But my intended semantics for the
> TYPE_DEVICE reset GPIO is it is a power on reset (PoR) with equivalent
> function to DeviceClass->reset

I don't think this is a useful thing to model. What is actually
useful to model is "what does the device do when its hardware
reset line is asserted" (which might be the same as power on reset
but isn't required to be so). We can have a default convenience
implementation which is "reset like a power cycle", but that doesn't
mean the semantics of the reset GPIO line are *defined* to be "reset
like a power cycle". It just means that if your reset line's semantics
are different you can't use the convenience implementation. (Also
that you can't rely on every device having a 'reset' GPIO line,
if the hardware doesn't either. But that's not a problem I think.)

If you're trying to model the case of "my power controller actually
forcibly powers down subsets of the board" you should already be
able to do that by calling the relevant object reset methods
[possibly via some indirection through suitably constructed containers].
Powering down a section of the board isn't done by asserting input
lines to each affected device; the mechanism is different and the
modelling should be too.

> It just strikes me as a workable solution for the 90% case then we can
> go you full custom GPIO solution for the harder ones.

Mmm, as long as there's a way to turn it off and/or to override
the implementation it should be ok as a default/convenience
method.

-- PMM
Laszlo Ersek - Feb. 20, 2013, 3:18 p.m.
On 02/19/13 23:45, David Woodhouse wrote:

> I'm beginning to wish I'd just ignored the fact that
> we need a properly working "soft" reset to get back from 286 protected
> mode to real mode, and wired up the damn PAM reset unconditionally. I'm
> not convinced that the protected->real mode transition will work for
> anyone anyway.

I believe currently we must be somewhere "between" soft reset & hard
reset. I estimate getting closer to a well-emulated hard reset is more
important than getting closer to a soft one. If you were to extend the
i440FX reset handler so that it unconditionally resets the PAMs, I'd
give my Rb. (Take it for what it's worth :))

The long-term solution covering all targets is being designed, but a
quick shortcut would be nice, and not only for CSM development /
testing. (I gather qemu_prep_reset() in SeaBIOS would not have had to be
written that way, for example.) For CSM stuff we can still carry private
qemu patches of course...

Thanks
Laszlo
Paolo Bonzini - Feb. 20, 2013, 3:34 p.m.
Il 20/02/2013 16:18, Laszlo Ersek ha scritto:
>> > I'm beginning to wish I'd just ignored the fact that
>> > we need a properly working "soft" reset to get back from 286 protected
>> > mode to real mode, and wired up the damn PAM reset unconditionally. I'm
>> > not convinced that the protected->real mode transition will work for
>> > anyone anyway.
> I believe currently we must be somewhere "between" soft reset & hard
> reset. I estimate getting closer to a well-emulated hard reset is more
> important than getting closer to a soft one. If you were to extend the
> i440FX reset handler so that it unconditionally resets the PAMs, I'd
> give my Rb. (Take it for what it's worth :))

It would actually make sense.  The right way to do soft reset has
nothing to do with qemu_system_reset_request().

Paolo
David Woodhouse - Feb. 20, 2013, 3:37 p.m.
On Wed, 2013-02-20 at 16:34 +0100, Paolo Bonzini wrote:
> Il 20/02/2013 16:18, Laszlo Ersek ha scritto:
> >> > I'm beginning to wish I'd just ignored the fact that
> >> > we need a properly working "soft" reset to get back from 286 protected
> >> > mode to real mode, and wired up the damn PAM reset unconditionally. I'm
> >> > not convinced that the protected->real mode transition will work for
> >> > anyone anyway.
> > I believe currently we must be somewhere "between" soft reset & hard
> > reset. I estimate getting closer to a well-emulated hard reset is more
> > important than getting closer to a soft one. If you were to extend the
> > i440FX reset handler so that it unconditionally resets the PAMs, I'd
> > give my Rb. (Take it for what it's worth :))
> 
> It would actually make sense.  The right way to do soft reset has
> nothing to do with qemu_system_reset_request().

I've posted that version of the patch, with a suitable comment.

Thanks.
Anthony Liguori - Feb. 20, 2013, 6:12 p.m.
David Woodhouse <dwmw2@infradead.org> writes:

> On Tue, 2013-02-19 at 16:17 -0600, Anthony Liguori wrote:
>> Right, this is what's strange to me.  There's no hardware analog AFAICT
>> so I'm not sure why we're exposing it as a qemu_irq other than we want
>> to jump through a function pointer invocation instead of making a
>> straight funciton call :-)
>
> Hey, don't ask me. I was just trying to do what Paolo said in response
> to me first attempt — which just accessed the 'hard reset' flag in the
> PIIX directly from the I440FX reset handler.
>
>> > That just sets a flag to say that the coming *system* reset is a hard
>> > reset and not a soft reset.
>> 
>> Yes, but this flag is in PIIX, not the i440fx.
>
> Yes. The Reset Control Register is in the PIIX. And the i440fx just
> happens to be the only other device that *cares* if it's a hard reset or
> a soft reset. For now. Thankfully they're implemented in the same C file
> and even initialised together, which lets us do a special-case hack
> relatively easily.
>
>> I suspect what we need to do is convert qemu_system_reset_request() into
>> a qemu_system_cpu_reset() that takes a callback.  Once the VCPUs have
>> been reset, the callback can then be used to reset all or some of the
>> device model.  This of course means removing the reset handlers in the
>> CPUs as they exist today.
>> 
>> Cc'ing Andreas to get his thoughts.
>> 
>> FWIW, I'm not expecting you to do this to fix this issue.  Just thinking
>> out loud here really.
>
> Sounds good to me. I'm beginning to wish I'd just ignored the fact that
> we need a properly working "soft" reset to get back from 286 protected
> mode to real mode, and wired up the damn PAM reset unconditionally. I'm
> not convinced that the protected->real mode transition will work for
> anyone anyway.

Can you try out the following tree:

    git://github.com/aliguori/qemu.git soft-reset.2

Or better yet, post binaries of Tiano Core + SeaBIOS as a CSM for me to
try out?

The above implements a CPU-only soft reset that should fix the problem
you're having with PAM resetting unconditionally.  If it works, I'll
fixup the other PC callers of reset too.

Regards,

Anthony Liguori
Laszlo Ersek - Feb. 20, 2013, 11:50 p.m.
On 02/20/13 19:12, Anthony Liguori wrote:

> Or better yet, post binaries of Tiano Core + SeaBIOS as a CSM for me to
> try out?

I tested David's recent PAM-resetting series with these:

http://people.redhat.com/~lersek/csm-test.tar.xz

(Debug output of OVMF, SeaBIOS and SeaVGABIOS all goes to the qemu debug
console; -debugcon file:XXXX -global isa-debugcon.iobase=0x402.)

> The above implements a CPU-only soft reset that should fix the problem
> you're having with PAM resetting unconditionally.  If it works, I'll
> fixup the other PC callers of reset too.

The problem I was facing on my workstation is that the PAM registers
were *not* reset, unconditionally. What I needed was KVM continuing at
0xfffffff0, or to make the reset "as hard as possible": it was "too
soft". So if the linked branch makes resets softer, that's the opposite
direction for my problem.

Thanks
Laszlo
David Woodhouse - Feb. 20, 2013, 11:55 p.m.
On Thu, 2013-02-21 at 00:50 +0100, Laszlo Ersek wrote:
> On 02/20/13 19:12, Anthony Liguori wrote:
> 
> > Or better yet, post binaries of Tiano Core + SeaBIOS as a CSM for me to
> > try out?
> 
> I tested David's recent PAM-resetting series with these:
> 
> http://people.redhat.com/~lersek/csm-test.tar.xz
> 
> (Debug output of OVMF, SeaBIOS and SeaVGABIOS all goes to the qemu debug
> console; -debugcon file:XXXX -global isa-debugcon.iobase=0x402.)

Oops, sorry for not noticing that request earlier.

> > The above implements a CPU-only soft reset that should fix the problem
> > you're having with PAM resetting unconditionally.  If it works, I'll
> > fixup the other PC callers of reset too.
> 
> The problem I was facing on my workstation is that the PAM registers
> were *not* reset, unconditionally. What I needed was KVM continuing at
> 0xfffffff0, or to make the reset "as hard as possible": it was "too
> soft". So if the linked branch makes resets softer, that's the opposite
> direction for my problem.

Well... your test now works because of the bug that Anthony is trying to
fix :)

SeaBIOS is doing a keyboard-controller reset. And that's supposed to be
a soft reset and thus *shouldn't* reset the PAM configuration.

We need to fix SeaBIOS to do a "PCI" (0xcf9) reset. Or, since SeaBIOS
isn't supposed to be hardware-specific, we need to fix SeaBIOS to use
the ACPI RESET_REG, and fix OVMF to *set* the RESET_REG in the ACPI
tables it generates. I posted patches for the SeaBIOS side of that
today, if you want to do the OVMF side...
David Woodhouse - Feb. 21, 2013, 12:02 a.m.
On Wed, 2013-02-20 at 12:12 -0600, Anthony Liguori wrote:
> The above implements a CPU-only soft reset that should fix the problem
> you're having with PAM resetting unconditionally.  If it works, I'll
> fixup the other PC callers of reset too.

To be clear: that wasn't my problem :)

If people want to run OS/2, VDISK or whatever else needs to get back
from protected mode to real mode with a CPU reset but *without* a full
system reset, it's going to be *their* problem. And I suspect they have
other problems already, so I don't feel too guilty about the final patch
in my tree at git.infradead.org/users/dwmw2/qemu.git doing the PAM reset
unconditionally. That works just fine for me.


     if (val & 4) {
-        qemu_system_reset_request();
+        if (val & 2) {
+            piix_soft_reset_request();
+        } else {
+            qemu_system_reset_request();
+        }
         return;

Those are the wrong way round, I think. If bit 1 is set, that's a hard
reset and should use qemu_system_reset_request(). If it's *not* set,
then it's supposed to be a soft reset (like a port 92 or keyboard
controller reset).
Laszlo Ersek - Feb. 21, 2013, 1:10 a.m.
On 02/21/13 00:55, David Woodhouse wrote:

>>> The above implements a CPU-only soft reset that should fix the problem
>>> you're having with PAM resetting unconditionally.  If it works, I'll
>>> fixup the other PC callers of reset too.
>>
>> The problem I was facing on my workstation is that the PAM registers
>> were *not* reset, unconditionally. What I needed was KVM continuing at
>> 0xfffffff0, or to make the reset "as hard as possible": it was "too
>> soft". So if the linked branch makes resets softer, that's the opposite
>> direction for my problem.
>
> Well... your test now works because of the bug that Anthony is trying to
> fix :)

I don't believe so,

> SeaBIOS is doing a keyboard-controller reset. And that's supposed to be
> a soft reset and thus *shouldn't* reset the PAM configuration.

the reset I tested was requested by the Linux kernel (in one of the ways
we discussed elsewhere). Granted, one of those methods may have been a
keyboard-controller reset.

I booted Fedora 18 from OVMF+SeaBIOS CSM to the xfce GUI, then clicked
Reboot on the virt-manager window. That generates an ACPI event, which
starts the shutdown sequence, at the end of which the kernel reboots the
VM. SeaBIOS cannot come into the picture until after that Linux request.
(And if the host machine supports unrestricted guest, or the host kernel
has a very recent KVM, or qemu resets the PAMs, then SeaBIOS doesn't
come into the picture at all, at least before OVMF invokes again it
after reboot.)

> We need to fix SeaBIOS to do a "PCI" (0xcf9) reset. Or, since SeaBIOS
> isn't supposed to be hardware-specific, we need to fix SeaBIOS to use
> the ACPI RESET_REG, and fix OVMF to *set* the RESET_REG in the ACPI
> tables it generates. I posted patches for the SeaBIOS side of that
> today, if you want to do the OVMF side...

(adding Jordan & edk2-devel)

We might want to discuss two things here:

(1) The reset capability that OVMF exports via ACPI -- I agree that I
should be effecting the 0xCF9 thing in the appropriate table.

(2) There's also one point (that I've ever seen) where OVMF itself
resets the platform. It is on the initial config TUI; there's a Reset
System option somewhere.

... It's implemented in
"IntelFrameworkModulePkg/Universal/BdsDxe/BootMaint/BootMaint.c",
function BootMaintCallback(). It calls

      gRT->ResetSystem (EfiResetCold, EFI_SUCCESS, 0, NULL);

This runtime service comes from "PcAtChipsetPkg/KbcResetDxe" for
OvmfPkg, which delegates the task to whichever ResetSystemLib flavor the
module being built specifies. For OvmfPkg that's
"OvmfPkg/Library/ResetSystemLib", and for a cold reset it uses

  /**
    Calling this function causes a system-wide reset. This sets
    all circuitry within the system to its initial state. This type of reset
    is asynchronous to system operation and operates without regard to
    cycle boundaries.

    System reset should not return, if it returns, it means the system does
    not support cold reset.
  **/
  VOID
  EFIAPI
  ResetCold (
    VOID
    )
  {
    IoWrite8 (0x64, 0xfe);
  }

The same code is used for ResetWarm(), this is the keyboard reset.

I guess I could change it to access 0xCF9 instead, but
- that won't work on earlier qemu versions,
- it turns the intermediate module "PcAtChipsetPkg/KbcResetDxe" a
misnomer, since we wouldn't be using the keyboard controller any longer.
(Actually, in theory, "KbcResetDxe" itself is incapable of hard reset.)

OTOH if the keyboard reset gets soft in qemu, then OVMF's hard reset
(the above code) will break. Maybe I could cycle between 0xCF9 and 0x64
in ResetCold(), starting with 0xCF9. (The full logic in the Linux kernel
seems a bit too much, see the list of source files in
<http://thread.gmane.org/gmane.comp.bios.tianocore.devel/2093/focus=195441>.)


((3) Apparently a pre-DXE reset service (PPI,
Pre-EFI-Initialization-Module to Pre-Efi-Initialization-Module
Interface) exists as well, called PeiResetService(). It tries to
delegate to the architectural protocol designated with
"gEfiPeiResetPpiGuid" (MdeModulePkg/Core/Pei/Reset/Reset.c). The
protocol has one function of type EFI_PEI_RESET_SYSTEM ("This service
resets the entire platform, including all processors and devices, and
reboots the system"), but it doesn't seem to be implemented anywhere in
edk2. It's documented in Volume 1, Chapter 4.8 of the Plat. Init. Spec.
We probably don't need to provide it.)

Laszlo
Paolo Bonzini - Feb. 21, 2013, 8:36 a.m.
Il 21/02/2013 02:10, Laszlo Ersek ha scritto:
> OTOH if the keyboard reset gets soft in qemu, then OVMF's hard reset
> (the above code) will break. Maybe I could cycle between 0xCF9 and 0x64
> in ResetCold(), starting with 0xCF9.

Yes, that's the right thing to do.

Also, in QEMU you're doing:

    if (val & 4) {
        qemu_system_reset_request();
        return;
    }
    d->rcr = val & 2; /* keep System Reset type only */

It looks like d->rcr should be set first to match what hardware does
(writing 0x6 causes a cold reset, and that's what you usually find in
the ACPI tables).

Paolo
David Woodhouse - Feb. 21, 2013, 9:04 a.m.
On Thu, 2013-02-21 at 09:36 +0100, Paolo Bonzini wrote:
> Il 21/02/2013 02:10, Laszlo Ersek ha scritto:
> > OTOH if the keyboard reset gets soft in qemu, then OVMF's hard reset
> > (the above code) will break. Maybe I could cycle between 0xCF9 and 0x64
> > in ResetCold(), starting with 0xCF9.
> 
> Yes, that's the right thing to do.
> 
> Also, in QEMU you're doing:
> 
>     if (val & 4) {
>         qemu_system_reset_request();
>         return;
>     }
>     d->rcr = val & 2; /* keep System Reset type only */
> 
> It looks like d->rcr should be set first to match what hardware does
> (writing 0x6 causes a cold reset, and that's what you usually find in
> the ACPI tables).

What Laszlo implemented (above) is fairly much as the data sheet
describes it. You're supposed to write 0x02 to set the reset type, and
*then* write 0x06 to actually trigger the reset.

However, in practice on real hardware that isn't necessary. Just writing
0x06 once will do the trick. Which is just as well, because otherwise
the ACPI RESET_REG wouldn't be able to represent it; that only describes
*one* write, not two.

So yes, you're right. Once qemu starts to distinguish properly between
hard and soft reset, it should honour the value of bit 1 in the write
which actually triggers the reset. Anthony's patch set did that.. albeit
backwards, doing hard and soft reset the wrong way round.
David Woodhouse - Feb. 21, 2013, 9:15 a.m.
On Thu, 2013-02-21 at 02:10 +0100, Laszlo Ersek wrote:
> On 02/21/13 00:55, David Woodhouse wrote:
> > Well... your test now works because of the bug that Anthony is trying to
> > fix :)
> 
> I don't believe so,

Ok, for the *specific* variant of the test that you did.

But there are many tests you could have done which *do* only work
because of the bug that Anthony is trying to fix. From what you say, it
looks like if the kernel had used the EFI runtime services 'ResetSystem'
call, that "should" have failed too since it only does a KBC soft reset.

> We might want to discuss two things here:
> 
> (1) The reset capability that OVMF exports via ACPI -- I agree that I
> should be effecting the 0xCF9 thing in the appropriate table.

Right. When doing that, we should bear in mind your observation (which I
can confirm here) that boards in the field tend to have the RESET_REG
filled in to point at 0xcf9, but *without* the corresponding flag set in
the FADT flags to say that it's supported. It would be interesting to
know if there's a *reason* for that, or if it's just the typical failure
of BIOS engineers to actually read a specification or care about quality
any further than "it boots one version of Windows when the wind is in an
easterly direction".

> (2) There's also one point (that I've ever seen) where OVMF itself
> resets the platform. It is on the initial config TUI; there's a Reset
> System option somewhere.

It's a runtime services call. The kernel can use it too, and perhaps one
might even argue that the kernel *should* use it by default, in
preference to anything else? But again I suppose that's only true if
Windows uses it; if Windows doesn't then we can expect that nobody out
there bothers to *test* its implementation on real hardware :(

> OTOH if the keyboard reset gets soft in qemu, then OVMF's hard reset
> (the above code) will break. Maybe I could cycle between 0xCF9 and 0x64
> in ResetCold(), starting with 0xCF9. (The full logic in the Linux kernel
> seems a bit too much, see the list of source files in
> <http://thread.gmane.org/gmane.comp.bios.tianocore.devel/2093/focus=195441>.)

Just cf9, kbc, cf9, kbc perhaps? http://mjg59.dreamwidth.org/3561.html
Anthony Liguori - Feb. 21, 2013, 2:37 p.m.
David Woodhouse <dwmw2@infradead.org> writes:

> On Thu, 2013-02-21 at 02:10 +0100, Laszlo Ersek wrote:
>> On 02/21/13 00:55, David Woodhouse wrote:
>> > Well... your test now works because of the bug that Anthony is trying to
>> > fix :)
>> 
>> I don't believe so,
>
> Ok, for the *specific* variant of the test that you did.
>
> But there are many tests you could have done which *do* only work
> because of the bug that Anthony is trying to fix. From what you say, it
> looks like if the kernel had used the EFI runtime services 'ResetSystem'
> call, that "should" have failed too since it only does a KBC soft
> reset.

Help me understand where we're at.  If you fix the bugs in UEFI and
SeaBIOS, and I correct the reset patches I pointed you at earlier, we're
good?  Or is the lack of big real mode at startup on non-unrestricted
mode boxes also a problem?

I never quite understood how the two related to each other in this thread.

Regards,

Anthony Liguori

>
>> We might want to discuss two things here:
>> 
>> (1) The reset capability that OVMF exports via ACPI -- I agree that I
>> should be effecting the 0xCF9 thing in the appropriate table.
>
> Right. When doing that, we should bear in mind your observation (which I
> can confirm here) that boards in the field tend to have the RESET_REG
> filled in to point at 0xcf9, but *without* the corresponding flag set in
> the FADT flags to say that it's supported. It would be interesting to
> know if there's a *reason* for that, or if it's just the typical failure
> of BIOS engineers to actually read a specification or care about quality
> any further than "it boots one version of Windows when the wind is in an
> easterly direction".
>
>> (2) There's also one point (that I've ever seen) where OVMF itself
>> resets the platform. It is on the initial config TUI; there's a Reset
>> System option somewhere.
>
> It's a runtime services call. The kernel can use it too, and perhaps one
> might even argue that the kernel *should* use it by default, in
> preference to anything else? But again I suppose that's only true if
> Windows uses it; if Windows doesn't then we can expect that nobody out
> there bothers to *test* its implementation on real hardware :(
>
>> OTOH if the keyboard reset gets soft in qemu, then OVMF's hard reset
>> (the above code) will break. Maybe I could cycle between 0xCF9 and 0x64
>> in ResetCold(), starting with 0xCF9. (The full logic in the Linux kernel
>> seems a bit too much, see the list of source files in
>> <http://thread.gmane.org/gmane.comp.bios.tianocore.devel/2093/focus=195441>.)
>
> Just cf9, kbc, cf9, kbc perhaps? http://mjg59.dreamwidth.org/3561.html
>
>
> -- 
> dwmw2
David Woodhouse - Feb. 21, 2013, 2:41 p.m.
On Thu, 2013-02-21 at 08:37 -0600, Anthony Liguori wrote:
> 
> Help me understand where we're at.  If you fix the bugs in UEFI and
> SeaBIOS, and I correct the reset patches I pointed you at earlier, we're
> good? 

Yes. And merge the bit which resets the PAM configuration in the i440FX
on a hard reset. And make sure that suspend/resume, keyboard controller,
etc., are *not* hard resets.

>  Or is the lack of big real mode at startup on non-unrestricted
> mode boxes also a problem?

No.

> I never quite understood how the two related to each other in this thread.

It's one or the other. If big real mode actually worked, we'd run from
0xfffffff0 after reset (*any* reset), and we wouldn't *care* what we get
when we read from 0xffff0.

Since big real mode is broken, we *do* care what's at 0xffff0. And thus
we need to make sure that after a *hard* reset it's the ROM (OVMF), and
after a *soft* reset it's still RAM (SeaBIOS after it's done its init,
and can find the ACPI resume vector, etc.)
Laszlo Ersek - Feb. 21, 2013, 4:38 p.m.
On 02/21/13 09:36, Paolo Bonzini wrote:
> Il 21/02/2013 02:10, Laszlo Ersek ha scritto:
>> OTOH if the keyboard reset gets soft in qemu, then OVMF's hard reset
>> (the above code) will break. Maybe I could cycle between 0xCF9 and 0x64
>> in ResetCold(), starting with 0xCF9.
> 
> Yes, that's the right thing to do.
> 
> Also, in QEMU you're doing:
> 
>     if (val & 4) {
>         qemu_system_reset_request();
>         return;
>     }
>     d->rcr = val & 2; /* keep System Reset type only */
> 
> It looks like d->rcr should be set first to match what hardware does
> (writing 0x6 causes a cold reset, and that's what you usually find in
> the ACPI tables).

I was thinking about that, yes. The language in the PIIX3 spec in fact
suggested something like this to me:

    if (val & 4) {
        if ((val & 2) || (d->rcr & 2)) {
            /* Either current write or the previous setting is asking
             * for hard reset. Don't bother storing any value since the
             * register will be cleared anyway during hard reset.
             */
            request_hard_reset();
            return;
        }

        /* Soft reset requested. The reset type is stored & survives the
         * reset, but bit 2 (value 4) can never read as 1.
         *
         * Actually if we reach this point,
         * (val & 2) == 0 && (d->rcr & 2) == 0, so there's nothing to
         * store. Other bits in d->rcr don't have to be cleared because
         * we never allow them to be set.
         */
        request_soft_reset();
        return;
    }

    /* No reset requested, just save the reset type for later. */
    d->rcr = val & 2;

(Regarding the logical disjunction before the hard reset, the PIIX3 spec
says "For example, to initiate a soft reset via the CF9 Reset Control
Register, write 00h then 04h, then read the CF9 register." Ie. you can't
just blindly write 0x04, because the previously set SRST could still
force a hard reset.)

Now add to the above code: "if we reset, we always do it the hard way"
-- I took that for an "axiom":

    if (val & 4) {
        if (true) {
            request_hard_reset();
            return;
        }
        /* not reached */
    }
    d->rcr = val & 2;

And then you get what I submitted in the patch.

(Plus of course if reset callbacks actually care about d->rcr, then the
"don't bother" thing isn't valid anymore, but when I submitted the
patch, I was just adding d->rcr so nobody could have cared.)

Laszlo
Laszlo Ersek - Feb. 21, 2013, 5:10 p.m.
On 02/21/13 15:37, Anthony Liguori wrote:
> David Woodhouse <dwmw2@infradead.org> writes:
> 
>> On Thu, 2013-02-21 at 02:10 +0100, Laszlo Ersek wrote:
>>> On 02/21/13 00:55, David Woodhouse wrote:
>>>> Well... your test now works because of the bug that Anthony is trying to
>>>> fix :)
>>>
>>> I don't believe so,
>>
>> Ok, for the *specific* variant of the test that you did.
>>
>> But there are many tests you could have done which *do* only work
>> because of the bug that Anthony is trying to fix. From what you say, it
>> looks like if the kernel had used the EFI runtime services 'ResetSystem'
>> call, that "should" have failed too since it only does a KBC soft
>> reset.
> 
> Help me understand where we're at.  If you fix the bugs in UEFI and
> SeaBIOS, and I correct the reset patches I pointed you at earlier, we're
> good?  Or is the lack of big real mode at startup on non-unrestricted
> mode boxes also a problem?
> 
> I never quite understood how the two related to each other in this thread.

In addition to what David wrote, I'll try to formalize the problem I ran
into.

    assert(firmware is OVMF + SeaBIOS CSM);
    assert(at 0xFFFFFFF0, OVMF reset vector / startup code is visible);
    assert(reset, soft or hard, was requested);

    if (!kvm_enabled() ||
        (host_cpu_supports_unrestricted_guest() &&
         !host_admin_has_disabled_unrestricted_guest()) ||
        kvm_is_recent_and_doesnt_approximate_real_mode_with_vm86()) {
        jump_target = 0xFFFFFFF0;
    } else {
        /* KVM problem hits */
        jump_target = 0xFFFF0;
    }

    if (reset_kind_is_hard() &&
        qemu_is_recent_and_clears_i440FX_PAM_regs_at_hard_reset()) {
        /* workable */
        visible_at_FFFF0h = low_image_of_OVMF_reset_vector_from_ROM;
    } else {
        /* unusable under OVMF+SeaBIOS CSM */
        visible_at_FFFF0h = SeaBIOS_reset_code_retriggering_reset;
    }

    jump_to(jump_target);

If kvm is disabled, or it is enabled and the host has support for
unrestricted guest and the sysadmin hasn't disabled UG, or the host is
old or UG is disabled BUT KVM fully-emulates real mode instead of
approximating it, *then* we jump to the correct OVMF reboot code at hard
reset time, and we don't care what's visible at 0xFFFF0 at all.

If however the KVM problem affects us, then we *do* care about what's
visible at 0xFFFF0. If we want hard reset, and qemu is recent and resets
the PAM regs at hard reset, then the incorrectly low jump_target value
ultimately points into good code.

Otherwise at 0xFFFF0 we'll find a vestigial SeaBIOS reset vector from
the CSM binary, which just re-requests a reset, and we reenter the above
pseudo-code at the top. (Infinite loop, if SeaBIOS requests a soft reset
*or* it requests a hard one but qemu doesn't have the PAM fix.)

Since
- I like kvm, and
- I cannot easily change my hardware (which doesn't support UG), and
- I prefer to run the RHEL-6 kernel which has "old" KVM,
I depend on David's fix for the PAM registers.

Laszlo
David Woodhouse - Feb. 21, 2013, 5:12 p.m.
On Thu, 2013-02-21 at 18:10 +0100, Laszlo Ersek wrote:
> Since
> - I like kvm, and
> - I cannot easily change my hardware (which doesn't support UG), and
> - I prefer to run the RHEL-6 kernel which has "old" KVM,
> I depend on David's fix for the PAM registers.

Which means your suspend/resume is broken. Yay! :)

Anthony's patchset should hopefully fix that.

I wonder what it would take to work around the KVM bug in qemu though;
can we actually *emulate* 16-bit mode instead of using KVM, on the
kernels where KVM is broken? And then transition back to using KVM when
we switch to 32-bit or 64-bit mode?
Paolo Bonzini - Feb. 21, 2013, 5:19 p.m.
Il 21/02/2013 18:12, David Woodhouse ha scritto:
>> > Since
>> > - I like kvm, and
>> > - I cannot easily change my hardware (which doesn't support UG), and
>> > - I prefer to run the RHEL-6 kernel which has "old" KVM,
>> > I depend on David's fix for the PAM registers.
> Which means your suspend/resume is broken. Yay! :)
> 
> Anthony's patchset should hopefully fix that.
> 
> I wonder what it would take to work around the KVM bug in qemu though;
> can we actually *emulate* 16-bit mode instead of using KVM, on the
> kernels where KVM is broken? And then transition back to using KVM when
> we switch to 32-bit or 64-bit mode?

I don't think you want to do that.  It would be really slow, it would
trade some emulation bugs for others, and people who mind security would
not appreciate the additional attack surface of TCG.

(BTW, that's how the very first version of KVM worked.  It didn't use
hardware virtualization at all until entering 64-bit mode, IIRC).

I think we just have to fix it in 1.5, and suggest a newer kernel to
everyone else.

Paolo
Laszlo Ersek - Feb. 21, 2013, 5:22 p.m.
On 02/21/13 18:12, David Woodhouse wrote:
> On Thu, 2013-02-21 at 18:10 +0100, Laszlo Ersek wrote:
>> Since
>> - I like kvm, and
>> - I cannot easily change my hardware (which doesn't support UG), and
>> - I prefer to run the RHEL-6 kernel which has "old" KVM,
>> I depend on David's fix for the PAM registers.
> 
> Which means your suspend/resume is broken. Yay! :)
> 
> Anthony's patchset should hopefully fix that.
> 
> I wonder what it would take to work around the KVM bug in qemu though;
> can we actually *emulate* 16-bit mode instead of using KVM, on the
> kernels where KVM is broken? And then transition back to using KVM when
> we switch to 32-bit or 64-bit mode?

I can but join you in wondering! :)

Laszlo
Anthony Liguori - Feb. 21, 2013, 5:32 p.m.
David Woodhouse <dwmw2@infradead.org> writes:

> On Thu, 2013-02-21 at 18:10 +0100, Laszlo Ersek wrote:
>> Since
>> - I like kvm, and
>> - I cannot easily change my hardware (which doesn't support UG), and
>> - I prefer to run the RHEL-6 kernel which has "old" KVM,
>> I depend on David's fix for the PAM registers.
>
> Which means your suspend/resume is broken. Yay! :)
>
> Anthony's patchset should hopefully fix that.

Will post a new version later today.  Am testing now with Laszlo's mock
up from a previous note.

I'm also going to write a qtest test case to try and validate all of
this behavior (minus the KVM bits).

> I wonder what it would take to work around the KVM bug in qemu though;
> can we actually *emulate* 16-bit mode instead of using KVM, on the
> kernels where KVM is broken? And then transition back to using KVM when
> we switch to 32-bit or 64-bit mode?

It's in the realm of possibility but ugly enough that we never did it.

I even worked on this way back in the day with Xen.  It sucked pretty
bad.  The challenge in support SMP eventually scared me off of it.

http://new-wiki.xen.org/old-wiki/xenwiki/HVM/V2E.html

Regards,

Anthony Liguori

>
> -- 
> dwmw2
Laszlo Ersek - Feb. 21, 2013, 6:24 p.m.
> (1) The reset capability that OVMF exports via ACPI -- I agree that I
> should be effecting the 0xCF9 thing in the appropriate table.

On a second thought, this will require a new build -D flag, or a PCD.

I'm not worried about the ACPI 1.0 --> ACPI 2.0 change in the FADT, the
table struct itself forward compatible.

However currently we're not saying anything about the reset capabilities
of the platform. A client looking at the FADT for reset info will find
nothing and follow its own logic, which may or may not include writing
to 0xCF9, but we don't have any part in it.

If now the FADT starts to claim 0xCF9 on a qemu version that doesn't
actually support it, we could mislead the client. Unless we can
interrogate qemu about the support (and I think we can't), we'll have to
depend on a build-time option. (Or should I shoehorn it into -D CSM_ENABLE?)

Jordan, what do you think?

(Of course this *too* would go away if qemu itself prepared all ACPI
tables over fw_cfg for whichever firmware the user selects. Then we
wouldn't have to duplicate ACPI work between SeaBIOS and OVMF any
longer. This task has been on my todo list forever, but it never seems
to near the top.

Maybe moving to a hermit cabin for a month and sleeping even less would
make it happen. I have no clue how others do it.)

Thanks!
Laszlo
Anthony Liguori - Feb. 21, 2013, 7:31 p.m.
Paolo Bonzini <pbonzini@redhat.com> writes:

> Il 21/02/2013 18:12, David Woodhouse ha scritto:
>>> > Since
>>> > - I like kvm, and
>>> > - I cannot easily change my hardware (which doesn't support UG), and
>>> > - I prefer to run the RHEL-6 kernel which has "old" KVM,
>>> > I depend on David's fix for the PAM registers.
>> Which means your suspend/resume is broken. Yay! :)
>> 
>> Anthony's patchset should hopefully fix that.
>> 
>> I wonder what it would take to work around the KVM bug in qemu though;
>> can we actually *emulate* 16-bit mode instead of using KVM, on the
>> kernels where KVM is broken? And then transition back to using KVM when
>> we switch to 32-bit or 64-bit mode?
>
> I don't think you want to do that.  It would be really slow, it would
> trade some emulation bugs for others, and people who mind security would
> not appreciate the additional attack surface of TCG.
>
> (BTW, that's how the very first version of KVM worked.  It didn't use
> hardware virtualization at all until entering 64-bit mode, IIRC).

Pre-released versions, yes.  Avi started with emulating up to 64-bit and
worked up from there.  None of the public versions ever implemented
this.

Regards,

Anthony Liguori

>
> I think we just have to fix it in 1.5, and suggest a newer kernel to
> everyone else.
>
> Paolo
Paolo Bonzini - Feb. 21, 2013, 7:31 p.m.
Il 21/02/2013 19:24, Laszlo Ersek ha scritto:
>> > (1) The reset capability that OVMF exports via ACPI -- I agree that I
>> > should be effecting the 0xCF9 thing in the appropriate table.
> On a second thought, this will require a new build -D flag, or a PCD.
> 
> I'm not worried about the ACPI 1.0 --> ACPI 2.0 change in the FADT, the
> table struct itself forward compatible.
> 
> However currently we're not saying anything about the reset capabilities
> of the platform. A client looking at the FADT for reset info will find
> nothing and follow its own logic, which may or may not include writing
> to 0xCF9, but we don't have any part in it.
> 
> If now the FADT starts to claim 0xCF9 on a qemu version that doesn't
> actually support it, we could mislead the client. Unless we can
> interrogate qemu about the support (and I think we can't), we'll have to
> depend on a build-time option. (Or should I shoehorn it into -D CSM_ENABLE?)
> 
> Jordan, what do you think?

ACPI tables are hosed enough on some real system that we can assume that
all guests will fall back to something else---typically a keyboard
controller reset.

See the sequence that Windows does: ACPI, kbd, ACPI, kbd.

Paolo
Laszlo Ersek - Feb. 21, 2013, 7:52 p.m.
On 02/21/13 20:31, Paolo Bonzini wrote:
> Il 21/02/2013 19:24, Laszlo Ersek ha scritto:
>>>> (1) The reset capability that OVMF exports via ACPI -- I agree that I
>>>> should be effecting the 0xCF9 thing in the appropriate table.
>> On a second thought, this will require a new build -D flag, or a PCD.
>>
>> I'm not worried about the ACPI 1.0 --> ACPI 2.0 change in the FADT, the
>> table struct itself forward compatible.
>>
>> However currently we're not saying anything about the reset capabilities
>> of the platform. A client looking at the FADT for reset info will find
>> nothing and follow its own logic, which may or may not include writing
>> to 0xCF9, but we don't have any part in it.
>>
>> If now the FADT starts to claim 0xCF9 on a qemu version that doesn't
>> actually support it, we could mislead the client. Unless we can
>> interrogate qemu about the support (and I think we can't), we'll have to
>> depend on a build-time option. (Or should I shoehorn it into -D CSM_ENABLE?)
>>
>> Jordan, what do you think?
> 
> ACPI tables are hosed enough on some real system that we can assume that
> all guests will fall back to something else---typically a keyboard
> controller reset.

That works for me. Thx.

(BTW I can "plausibly justify" why a BIOS vendor would advertize 0xCF9
(or anything else) in RESET_REG, but deny it by clearing RESET_REG_SUP:
they're probably not sure what hardware the BIOS will be flashed to. So
RESET_REG is a hint for OSPM, but if it doesn't work, "we told you so in
RESET_REG_SUP!" :) Repudiation of responsibility. Maybe we should follow
suit :))

Laszlo
H. Peter Anvin - Feb. 28, 2013, 3 p.m.
On 02/20/2013 07:37 AM, David Woodhouse wrote:
> On Wed, 2013-02-20 at 16:34 +0100, Paolo Bonzini wrote:
>> Il 20/02/2013 16:18, Laszlo Ersek ha scritto:
>>>>> I'm beginning to wish I'd just ignored the fact that
>>>>> we need a properly working "soft" reset to get back from 286 protected
>>>>> mode to real mode, and wired up the damn PAM reset unconditionally. I'm
>>>>> not convinced that the protected->real mode transition will work for
>>>>> anyone anyway.
>>> I believe currently we must be somewhere "between" soft reset & hard
>>> reset. I estimate getting closer to a well-emulated hard reset is more
>>> important than getting closer to a soft one. If you were to extend the
>>> i440FX reset handler so that it unconditionally resets the PAMs, I'd
>>> give my Rb. (Take it for what it's worth :))
>>
>> It would actually make sense.  The right way to do soft reset has
>> nothing to do with qemu_system_reset_request().
> 
> I've posted that version of the patch, with a suitable comment.
> 

Right... the "soft reset" described above is really INIT, which isn't
even a reset in modern CPUs (it couldn't be, it has to preserve caches)
but more of a special interrupt.  It is also used during multiprocessor
init.

	-hpa

Patch

diff --git a/hw/piix_pci.c b/hw/piix_pci.c
index 6c77e49..0c8f613 100644
--- a/hw/piix_pci.c
+++ b/hw/piix_pci.c
@@ -71,6 +71,7 @@  typedef struct PIIX3State {
     uint64_t pic_levels;
 
     qemu_irq *pic;
+    qemu_irq reset_out;
 
     /* This member isn't used. Just for save/load compatibility */
     int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS];
@@ -92,6 +93,7 @@  struct PCII440FXState {
     PAMMemoryRegion pam_regions[13];
     MemoryRegion smram_region;
     uint8_t smm_enabled;
+    uint8_t hard_reset;
 };
 
 
@@ -171,6 +173,42 @@  static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
     return 0;
 }
 
+static void i440fx_reset(DeviceState *ds)
+{
+    PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, ds);
+    PCII440FXState *d = DO_UPCAST(PCII440FXState, dev, dev);
+    uint8_t *pci_conf = d->dev.config;
+
+    /* We only reset the PAM configuration if it was a *hard* reset,
+     * triggered from the RCR on the PIIX3 (or from the TRCR on the
+     * i440FX itself, but we don't implement that yet and software
+     * is advised not to use it when there's a PIIX). */
+    if (!d->hard_reset)
+        return;
+
+    d->hard_reset = 0;
+
+    pci_conf[0x59] = 0x00; // Reset PAM setup
+    pci_conf[0x5a] = 0x00;
+    pci_conf[0x5b] = 0x00;
+    pci_conf[0x5c] = 0x00;
+    pci_conf[0x5d] = 0x00;
+    pci_conf[0x5e] = 0x00;
+    pci_conf[0x5f] = 0x00;
+    pci_conf[0x72] = 0x02; // And SMM
+
+    i440fx_update_memory_mappings(d);
+}
+
+static void i440fx_handle_reset(void *opaque, int n, int level)
+{
+    PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, opaque);
+    PCII440FXState *d = DO_UPCAST(PCII440FXState, dev, dev);
+
+    if (level)
+        d->hard_reset = 1;
+}
+
 static int i440fx_post_load(void *opaque, int version_id)
 {
     PCII440FXState *d = opaque;
@@ -216,6 +254,8 @@  static int i440fx_initfn(PCIDevice *dev)
 
     d->dev.config[I440FX_SMRAM] = 0x02;
 
+    qdev_init_gpio_in(&d->dev.qdev, i440fx_handle_reset, 1);
+
     cpu_smm_register(&i440fx_set_smm, d);
     return 0;
 }
@@ -297,6 +337,8 @@  static PCIBus *i440fx_common_init(const char *device_name,
         pci_bus_set_route_irq_fn(b, piix3_route_intx_pin_to_irq);
     }
     piix3->pic = pic;
+    qdev_connect_gpio_out(&piix3->dev.qdev, 0,
+                          qdev_get_gpio_in(&f->dev.qdev, 0));
     *isa_bus = DO_UPCAST(ISABus, qbus,
                          qdev_get_child_bus(&piix3->dev.qdev, "isa.0"));
 
@@ -521,6 +563,8 @@  static void rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len)
     PIIX3State *d = opaque;
 
     if (val & 4) {
+        if (val & 2)
+            qemu_irq_pulse(d->reset_out);
         qemu_system_reset_request();
         return;
     }
@@ -550,6 +594,7 @@  static int piix3_initfn(PCIDevice *dev)
     memory_region_add_subregion_overlap(pci_address_space_io(dev), RCR_IOPORT,
                                         &d->rcr_mem, 1);
 
+    qdev_init_gpio_out(&dev->qdev, &d->reset_out, 1);
     qemu_register_reset(piix3_reset, d);
     return 0;
 }
@@ -615,6 +660,7 @@  static void i440fx_class_init(ObjectClass *klass, void *data)
     dc->desc = "Host bridge";
     dc->no_user = 1;
     dc->vmsd = &vmstate_i440fx;
+    dc->reset = i440fx_reset;
 }
 
 static const TypeInfo i440fx_info = {