diff mbox series

[v10,1/8] memory: prevent dma-reentracy issues

Message ID 20230427211013.2994127-2-alxndr@bu.edu
State New
Headers show
Series memory: prevent dma-reentracy issues | expand

Commit Message

Alexander Bulekov April 27, 2023, 9:10 p.m. UTC
Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
This flag is set/checked prior to calling a device's MemoryRegion
handlers, and set when device code initiates DMA.  The purpose of this
flag is to prevent two types of DMA-based reentrancy issues:

1.) mmio -> dma -> mmio case
2.) bh -> dma write -> mmio case

These issues have led to problems such as stack-exhaustion and
use-after-frees.

Summary of the problem from Peter Maydell:
https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282
Resolves: CVE-2023-0330

Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Thomas Huth <thuth@redhat.com>
---
 include/exec/memory.h  |  5 +++++
 include/hw/qdev-core.h |  7 +++++++
 softmmu/memory.c       | 16 ++++++++++++++++
 3 files changed, 28 insertions(+)

Comments

Thomas Huth April 28, 2023, 6:09 a.m. UTC | #1
On 27/04/2023 23.10, Alexander Bulekov wrote:
> Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
> This flag is set/checked prior to calling a device's MemoryRegion
> handlers, and set when device code initiates DMA.  The purpose of this
> flag is to prevent two types of DMA-based reentrancy issues:
> 
> 1.) mmio -> dma -> mmio case
> 2.) bh -> dma write -> mmio case
> 
> These issues have led to problems such as stack-exhaustion and
> use-after-frees.
> 
> Summary of the problem from Peter Maydell:
> https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
> 
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282
> Resolves: CVE-2023-0330
> 
> Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
> Reviewed-by: Thomas Huth <thuth@redhat.com>
> ---
>   include/exec/memory.h  |  5 +++++
>   include/hw/qdev-core.h |  7 +++++++
>   softmmu/memory.c       | 16 ++++++++++++++++
>   3 files changed, 28 insertions(+)
> 
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 15ade918ba..e45ce6061f 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -767,6 +767,8 @@ struct MemoryRegion {
>       bool is_iommu;
>       RAMBlock *ram_block;
>       Object *owner;
> +    /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */
> +    DeviceState *dev;
>   
>       const MemoryRegionOps *ops;
>       void *opaque;
> @@ -791,6 +793,9 @@ struct MemoryRegion {
>       unsigned ioeventfd_nb;
>       MemoryRegionIoeventfd *ioeventfds;
>       RamDiscardManager *rdm; /* Only for RAM */
> +
> +    /* For devices designed to perform re-entrant IO into their own IO MRs */
> +    bool disable_reentrancy_guard;
>   };
>   
>   struct IOMMUMemoryRegion {
> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
> index bd50ad5ee1..7623703943 100644
> --- a/include/hw/qdev-core.h
> +++ b/include/hw/qdev-core.h
> @@ -162,6 +162,10 @@ struct NamedClockList {
>       QLIST_ENTRY(NamedClockList) node;
>   };
>   
> +typedef struct {
> +    bool engaged_in_io;
> +} MemReentrancyGuard;
> +
>   /**
>    * DeviceState:
>    * @realized: Indicates whether the device has been fully constructed.
> @@ -194,6 +198,9 @@ struct DeviceState {
>       int alias_required_for_version;
>       ResettableState reset;
>       GSList *unplug_blockers;
> +
> +    /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
> +    MemReentrancyGuard mem_reentrancy_guard;
>   };
>   
>   struct DeviceListener {
> diff --git a/softmmu/memory.c b/softmmu/memory.c
> index b1a6cae6f5..fe23f0e5ce 100644
> --- a/softmmu/memory.c
> +++ b/softmmu/memory.c
> @@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>           access_size_max = 4;
>       }
>   
> +    /* Do not allow more than one simultaneous access to a device's IO Regions */
> +    if (mr->dev && !mr->disable_reentrancy_guard &&
> +        !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
> +        if (mr->dev->mem_reentrancy_guard.engaged_in_io) {
> +            warn_report("Blocked re-entrant IO on "
> +                    "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX,
> +                    memory_region_name(mr), addr);

Ack, a warn_report make sense here, at least initially, to make sure that 
people get aware of related problems!

  Thomas
Daniel P. Berrangé April 28, 2023, 8:12 a.m. UTC | #2
On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote:
> Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
> This flag is set/checked prior to calling a device's MemoryRegion
> handlers, and set when device code initiates DMA.  The purpose of this
> flag is to prevent two types of DMA-based reentrancy issues:
> 
> 1.) mmio -> dma -> mmio case
> 2.) bh -> dma write -> mmio case
> 
> These issues have led to problems such as stack-exhaustion and
> use-after-frees.
> 
> Summary of the problem from Peter Maydell:
> https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
> 
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282
> Resolves: CVE-2023-0330
> 
> Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
> Reviewed-by: Thomas Huth <thuth@redhat.com>
> ---
>  include/exec/memory.h  |  5 +++++
>  include/hw/qdev-core.h |  7 +++++++
>  softmmu/memory.c       | 16 ++++++++++++++++
>  3 files changed, 28 insertions(+)
> 
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 15ade918ba..e45ce6061f 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -767,6 +767,8 @@ struct MemoryRegion {
>      bool is_iommu;
>      RAMBlock *ram_block;
>      Object *owner;
> +    /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */
> +    DeviceState *dev;
>  
>      const MemoryRegionOps *ops;
>      void *opaque;
> @@ -791,6 +793,9 @@ struct MemoryRegion {
>      unsigned ioeventfd_nb;
>      MemoryRegionIoeventfd *ioeventfds;
>      RamDiscardManager *rdm; /* Only for RAM */
> +
> +    /* For devices designed to perform re-entrant IO into their own IO MRs */
> +    bool disable_reentrancy_guard;
>  };
>  
>  struct IOMMUMemoryRegion {
> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
> index bd50ad5ee1..7623703943 100644
> --- a/include/hw/qdev-core.h
> +++ b/include/hw/qdev-core.h
> @@ -162,6 +162,10 @@ struct NamedClockList {
>      QLIST_ENTRY(NamedClockList) node;
>  };
>  
> +typedef struct {
> +    bool engaged_in_io;
> +} MemReentrancyGuard;
> +
>  /**
>   * DeviceState:
>   * @realized: Indicates whether the device has been fully constructed.
> @@ -194,6 +198,9 @@ struct DeviceState {
>      int alias_required_for_version;
>      ResettableState reset;
>      GSList *unplug_blockers;
> +
> +    /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
> +    MemReentrancyGuard mem_reentrancy_guard;
>  };
>  
>  struct DeviceListener {
> diff --git a/softmmu/memory.c b/softmmu/memory.c
> index b1a6cae6f5..fe23f0e5ce 100644
> --- a/softmmu/memory.c
> +++ b/softmmu/memory.c
> @@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>          access_size_max = 4;
>      }
>  
> +    /* Do not allow more than one simultaneous access to a device's IO Regions */
> +    if (mr->dev && !mr->disable_reentrancy_guard &&
> +        !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
> +        if (mr->dev->mem_reentrancy_guard.engaged_in_io) {
> +            warn_report("Blocked re-entrant IO on "
> +                    "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX,
> +                    memory_region_name(mr), addr);
> +            return MEMTX_ACCESS_ERROR;

If we issue this warn_report on every invalid memory access, is this
going to become a denial of service by flooding logs, or is the
return MEMTX_ACCESS_ERROR, sufficient to ensure this is only printed
*once* in the lifetime of the QEMU process ?


With regards,
Daniel
Thomas Huth April 28, 2023, 8:15 a.m. UTC | #3
On 28/04/2023 10.12, Daniel P. Berrangé wrote:
> On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote:
>> Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
>> This flag is set/checked prior to calling a device's MemoryRegion
>> handlers, and set when device code initiates DMA.  The purpose of this
>> flag is to prevent two types of DMA-based reentrancy issues:
>>
>> 1.) mmio -> dma -> mmio case
>> 2.) bh -> dma write -> mmio case
>>
>> These issues have led to problems such as stack-exhaustion and
>> use-after-frees.
>>
>> Summary of the problem from Peter Maydell:
>> https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
>>
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282
>> Resolves: CVE-2023-0330
>>
>> Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
>> Reviewed-by: Thomas Huth <thuth@redhat.com>
>> ---
>>   include/exec/memory.h  |  5 +++++
>>   include/hw/qdev-core.h |  7 +++++++
>>   softmmu/memory.c       | 16 ++++++++++++++++
>>   3 files changed, 28 insertions(+)
>>
>> diff --git a/include/exec/memory.h b/include/exec/memory.h
>> index 15ade918ba..e45ce6061f 100644
>> --- a/include/exec/memory.h
>> +++ b/include/exec/memory.h
>> @@ -767,6 +767,8 @@ struct MemoryRegion {
>>       bool is_iommu;
>>       RAMBlock *ram_block;
>>       Object *owner;
>> +    /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */
>> +    DeviceState *dev;
>>   
>>       const MemoryRegionOps *ops;
>>       void *opaque;
>> @@ -791,6 +793,9 @@ struct MemoryRegion {
>>       unsigned ioeventfd_nb;
>>       MemoryRegionIoeventfd *ioeventfds;
>>       RamDiscardManager *rdm; /* Only for RAM */
>> +
>> +    /* For devices designed to perform re-entrant IO into their own IO MRs */
>> +    bool disable_reentrancy_guard;
>>   };
>>   
>>   struct IOMMUMemoryRegion {
>> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
>> index bd50ad5ee1..7623703943 100644
>> --- a/include/hw/qdev-core.h
>> +++ b/include/hw/qdev-core.h
>> @@ -162,6 +162,10 @@ struct NamedClockList {
>>       QLIST_ENTRY(NamedClockList) node;
>>   };
>>   
>> +typedef struct {
>> +    bool engaged_in_io;
>> +} MemReentrancyGuard;
>> +
>>   /**
>>    * DeviceState:
>>    * @realized: Indicates whether the device has been fully constructed.
>> @@ -194,6 +198,9 @@ struct DeviceState {
>>       int alias_required_for_version;
>>       ResettableState reset;
>>       GSList *unplug_blockers;
>> +
>> +    /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
>> +    MemReentrancyGuard mem_reentrancy_guard;
>>   };
>>   
>>   struct DeviceListener {
>> diff --git a/softmmu/memory.c b/softmmu/memory.c
>> index b1a6cae6f5..fe23f0e5ce 100644
>> --- a/softmmu/memory.c
>> +++ b/softmmu/memory.c
>> @@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>>           access_size_max = 4;
>>       }
>>   
>> +    /* Do not allow more than one simultaneous access to a device's IO Regions */
>> +    if (mr->dev && !mr->disable_reentrancy_guard &&
>> +        !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
>> +        if (mr->dev->mem_reentrancy_guard.engaged_in_io) {
>> +            warn_report("Blocked re-entrant IO on "
>> +                    "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX,
>> +                    memory_region_name(mr), addr);
>> +            return MEMTX_ACCESS_ERROR;
> 
> If we issue this warn_report on every invalid memory access, is this
> going to become a denial of service by flooding logs, or is the
> return MEMTX_ACCESS_ERROR, sufficient to ensure this is only printed
> *once* in the lifetime of the QEMU process ?

Maybe it's better to use warn_report_once() here instead?

  Thomas
Alexander Bulekov April 28, 2023, 9:11 a.m. UTC | #4
On 230428 1015, Thomas Huth wrote:
> On 28/04/2023 10.12, Daniel P. Berrangé wrote:
> > On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote:
> > > Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
> > > This flag is set/checked prior to calling a device's MemoryRegion
> > > handlers, and set when device code initiates DMA.  The purpose of this
> > > flag is to prevent two types of DMA-based reentrancy issues:
> > > 
> > > 1.) mmio -> dma -> mmio case
> > > 2.) bh -> dma write -> mmio case
> > > 
> > > These issues have led to problems such as stack-exhaustion and
> > > use-after-frees.
> > > 
> > > Summary of the problem from Peter Maydell:
> > > https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
> > > 
> > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
> > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
> > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
> > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
> > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
> > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
> > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282
> > > Resolves: CVE-2023-0330
> > > 
> > > Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
> > > Reviewed-by: Thomas Huth <thuth@redhat.com>
> > > ---
> > >   include/exec/memory.h  |  5 +++++
> > >   include/hw/qdev-core.h |  7 +++++++
> > >   softmmu/memory.c       | 16 ++++++++++++++++
> > >   3 files changed, 28 insertions(+)
> > > 
> > > diff --git a/include/exec/memory.h b/include/exec/memory.h
> > > index 15ade918ba..e45ce6061f 100644
> > > --- a/include/exec/memory.h
> > > +++ b/include/exec/memory.h
> > > @@ -767,6 +767,8 @@ struct MemoryRegion {
> > >       bool is_iommu;
> > >       RAMBlock *ram_block;
> > >       Object *owner;
> > > +    /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */
> > > +    DeviceState *dev;
> > >       const MemoryRegionOps *ops;
> > >       void *opaque;
> > > @@ -791,6 +793,9 @@ struct MemoryRegion {
> > >       unsigned ioeventfd_nb;
> > >       MemoryRegionIoeventfd *ioeventfds;
> > >       RamDiscardManager *rdm; /* Only for RAM */
> > > +
> > > +    /* For devices designed to perform re-entrant IO into their own IO MRs */
> > > +    bool disable_reentrancy_guard;
> > >   };
> > >   struct IOMMUMemoryRegion {
> > > diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
> > > index bd50ad5ee1..7623703943 100644
> > > --- a/include/hw/qdev-core.h
> > > +++ b/include/hw/qdev-core.h
> > > @@ -162,6 +162,10 @@ struct NamedClockList {
> > >       QLIST_ENTRY(NamedClockList) node;
> > >   };
> > > +typedef struct {
> > > +    bool engaged_in_io;
> > > +} MemReentrancyGuard;
> > > +
> > >   /**
> > >    * DeviceState:
> > >    * @realized: Indicates whether the device has been fully constructed.
> > > @@ -194,6 +198,9 @@ struct DeviceState {
> > >       int alias_required_for_version;
> > >       ResettableState reset;
> > >       GSList *unplug_blockers;
> > > +
> > > +    /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
> > > +    MemReentrancyGuard mem_reentrancy_guard;
> > >   };
> > >   struct DeviceListener {
> > > diff --git a/softmmu/memory.c b/softmmu/memory.c
> > > index b1a6cae6f5..fe23f0e5ce 100644
> > > --- a/softmmu/memory.c
> > > +++ b/softmmu/memory.c
> > > @@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
> > >           access_size_max = 4;
> > >       }
> > > +    /* Do not allow more than one simultaneous access to a device's IO Regions */
> > > +    if (mr->dev && !mr->disable_reentrancy_guard &&
> > > +        !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
> > > +        if (mr->dev->mem_reentrancy_guard.engaged_in_io) {
> > > +            warn_report("Blocked re-entrant IO on "
> > > +                    "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX,
> > > +                    memory_region_name(mr), addr);
> > > +            return MEMTX_ACCESS_ERROR;
> > 
> > If we issue this warn_report on every invalid memory access, is this
> > going to become a denial of service by flooding logs, or is the
> > return MEMTX_ACCESS_ERROR, sufficient to ensure this is only printed
> > *once* in the lifetime of the QEMU process ?
> 
> Maybe it's better to use warn_report_once() here instead?

Sounds good - should I respin the series to change this?
-Alex
Thomas Huth April 28, 2023, 9:14 a.m. UTC | #5
On 28/04/2023 11.11, Alexander Bulekov wrote:
> On 230428 1015, Thomas Huth wrote:
>> On 28/04/2023 10.12, Daniel P. Berrangé wrote:
>>> On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote:
>>>> Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
>>>> This flag is set/checked prior to calling a device's MemoryRegion
>>>> handlers, and set when device code initiates DMA.  The purpose of this
>>>> flag is to prevent two types of DMA-based reentrancy issues:
>>>>
>>>> 1.) mmio -> dma -> mmio case
>>>> 2.) bh -> dma write -> mmio case
>>>>
>>>> These issues have led to problems such as stack-exhaustion and
>>>> use-after-frees.
>>>>
>>>> Summary of the problem from Peter Maydell:
>>>> https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
>>>>
>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282
>>>> Resolves: CVE-2023-0330
>>>>
>>>> Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
>>>> Reviewed-by: Thomas Huth <thuth@redhat.com>
>>>> ---
>>>>    include/exec/memory.h  |  5 +++++
>>>>    include/hw/qdev-core.h |  7 +++++++
>>>>    softmmu/memory.c       | 16 ++++++++++++++++
>>>>    3 files changed, 28 insertions(+)
>>>>
>>>> diff --git a/include/exec/memory.h b/include/exec/memory.h
>>>> index 15ade918ba..e45ce6061f 100644
>>>> --- a/include/exec/memory.h
>>>> +++ b/include/exec/memory.h
>>>> @@ -767,6 +767,8 @@ struct MemoryRegion {
>>>>        bool is_iommu;
>>>>        RAMBlock *ram_block;
>>>>        Object *owner;
>>>> +    /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */
>>>> +    DeviceState *dev;
>>>>        const MemoryRegionOps *ops;
>>>>        void *opaque;
>>>> @@ -791,6 +793,9 @@ struct MemoryRegion {
>>>>        unsigned ioeventfd_nb;
>>>>        MemoryRegionIoeventfd *ioeventfds;
>>>>        RamDiscardManager *rdm; /* Only for RAM */
>>>> +
>>>> +    /* For devices designed to perform re-entrant IO into their own IO MRs */
>>>> +    bool disable_reentrancy_guard;
>>>>    };
>>>>    struct IOMMUMemoryRegion {
>>>> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
>>>> index bd50ad5ee1..7623703943 100644
>>>> --- a/include/hw/qdev-core.h
>>>> +++ b/include/hw/qdev-core.h
>>>> @@ -162,6 +162,10 @@ struct NamedClockList {
>>>>        QLIST_ENTRY(NamedClockList) node;
>>>>    };
>>>> +typedef struct {
>>>> +    bool engaged_in_io;
>>>> +} MemReentrancyGuard;
>>>> +
>>>>    /**
>>>>     * DeviceState:
>>>>     * @realized: Indicates whether the device has been fully constructed.
>>>> @@ -194,6 +198,9 @@ struct DeviceState {
>>>>        int alias_required_for_version;
>>>>        ResettableState reset;
>>>>        GSList *unplug_blockers;
>>>> +
>>>> +    /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
>>>> +    MemReentrancyGuard mem_reentrancy_guard;
>>>>    };
>>>>    struct DeviceListener {
>>>> diff --git a/softmmu/memory.c b/softmmu/memory.c
>>>> index b1a6cae6f5..fe23f0e5ce 100644
>>>> --- a/softmmu/memory.c
>>>> +++ b/softmmu/memory.c
>>>> @@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>>>>            access_size_max = 4;
>>>>        }
>>>> +    /* Do not allow more than one simultaneous access to a device's IO Regions */
>>>> +    if (mr->dev && !mr->disable_reentrancy_guard &&
>>>> +        !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
>>>> +        if (mr->dev->mem_reentrancy_guard.engaged_in_io) {
>>>> +            warn_report("Blocked re-entrant IO on "
>>>> +                    "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX,
>>>> +                    memory_region_name(mr), addr);
>>>> +            return MEMTX_ACCESS_ERROR;
>>>
>>> If we issue this warn_report on every invalid memory access, is this
>>> going to become a denial of service by flooding logs, or is the
>>> return MEMTX_ACCESS_ERROR, sufficient to ensure this is only printed
>>> *once* in the lifetime of the QEMU process ?
>>
>> Maybe it's better to use warn_report_once() here instead?
> 
> Sounds good - should I respin the series to change this?

Not necessary, I've got v10 already queued, I'll fix it up there

  Thomas
gaosong May 6, 2023, 9:25 a.m. UTC | #6
Hi Alexander

在 2023/4/28 下午5:14, Thomas Huth 写道:
> On 28/04/2023 11.11, Alexander Bulekov wrote:
>> On 230428 1015, Thomas Huth wrote:
>>> On 28/04/2023 10.12, Daniel P. Berrangé wrote:
>>>> On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote:
>>>>> Add a flag to the DeviceState, when a device is engaged in 
>>>>> PIO/MMIO/DMA.
>>>>> This flag is set/checked prior to calling a device's MemoryRegion
>>>>> handlers, and set when device code initiates DMA.  The purpose of 
>>>>> this
>>>>> flag is to prevent two types of DMA-based reentrancy issues:
>>>>>
>>>>> 1.) mmio -> dma -> mmio case
>>>>> 2.) bh -> dma write -> mmio case
>>>>>
>>>>> These issues have led to problems such as stack-exhaustion and
>>>>> use-after-frees.
>>>>>
>>>>> Summary of the problem from Peter Maydell:
>>>>> https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com 
>>>>>
>>>>>
>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282
>>>>> Resolves: CVE-2023-0330
>>>>>
>>>>> Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
>>>>> Reviewed-by: Thomas Huth <thuth@redhat.com>
>>>>> ---
>>>>>    include/exec/memory.h  |  5 +++++
>>>>>    include/hw/qdev-core.h |  7 +++++++
>>>>>    softmmu/memory.c       | 16 ++++++++++++++++
>>>>>    3 files changed, 28 insertions(+)
>>>>>
>>>>> diff --git a/include/exec/memory.h b/include/exec/memory.h
>>>>> index 15ade918ba..e45ce6061f 100644
>>>>> --- a/include/exec/memory.h
>>>>> +++ b/include/exec/memory.h
>>>>> @@ -767,6 +767,8 @@ struct MemoryRegion {
>>>>>        bool is_iommu;
>>>>>        RAMBlock *ram_block;
>>>>>        Object *owner;
>>>>> +    /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR 
>>>>> access hotpath */
>>>>> +    DeviceState *dev;
>>>>>        const MemoryRegionOps *ops;
>>>>>        void *opaque;
>>>>> @@ -791,6 +793,9 @@ struct MemoryRegion {
>>>>>        unsigned ioeventfd_nb;
>>>>>        MemoryRegionIoeventfd *ioeventfds;
>>>>>        RamDiscardManager *rdm; /* Only for RAM */
>>>>> +
>>>>> +    /* For devices designed to perform re-entrant IO into their 
>>>>> own IO MRs */
>>>>> +    bool disable_reentrancy_guard;
>>>>>    };
>>>>>    struct IOMMUMemoryRegion {
>>>>> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
>>>>> index bd50ad5ee1..7623703943 100644
>>>>> --- a/include/hw/qdev-core.h
>>>>> +++ b/include/hw/qdev-core.h
>>>>> @@ -162,6 +162,10 @@ struct NamedClockList {
>>>>>        QLIST_ENTRY(NamedClockList) node;
>>>>>    };
>>>>> +typedef struct {
>>>>> +    bool engaged_in_io;
>>>>> +} MemReentrancyGuard;
>>>>> +
>>>>>    /**
>>>>>     * DeviceState:
>>>>>     * @realized: Indicates whether the device has been fully 
>>>>> constructed.
>>>>> @@ -194,6 +198,9 @@ struct DeviceState {
>>>>>        int alias_required_for_version;
>>>>>        ResettableState reset;
>>>>>        GSList *unplug_blockers;
>>>>> +
>>>>> +    /* Is the device currently in mmio/pio/dma? Used to prevent 
>>>>> re-entrancy */
>>>>> +    MemReentrancyGuard mem_reentrancy_guard;
>>>>>    };
>>>>>    struct DeviceListener {
>>>>> diff --git a/softmmu/memory.c b/softmmu/memory.c
>>>>> index b1a6cae6f5..fe23f0e5ce 100644
>>>>> --- a/softmmu/memory.c
>>>>> +++ b/softmmu/memory.c
>>>>> @@ -542,6 +542,18 @@ static MemTxResult 
>>>>> access_with_adjusted_size(hwaddr addr,
>>>>>            access_size_max = 4;
>>>>>        }
>>>>> +    /* Do not allow more than one simultaneous access to a 
>>>>> device's IO Regions */
>>>>> +    if (mr->dev && !mr->disable_reentrancy_guard &&
>>>>> +        !mr->ram_device && !mr->ram && !mr->rom_device && 
>>>>> !mr->readonly) {
>>>>> +        if (mr->dev->mem_reentrancy_guard.engaged_in_io) {
>>>>> +            warn_report("Blocked re-entrant IO on "
>>>>> +                    "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX,
>>>>> +                    memory_region_name(mr), addr);
>>>>> +            return MEMTX_ACCESS_ERROR;
>>>>
>>>> If we issue this warn_report on every invalid memory access, is this
>>>> going to become a denial of service by flooding logs, or is the
>>>> return MEMTX_ACCESS_ERROR, sufficient to ensure this is only printed
>>>> *once* in the lifetime of the QEMU process ?
>>>
>>> Maybe it's better to use warn_report_once() here instead?
>>
>> Sounds good - should I respin the series to change this?
>
> Not necessary, I've got v10 already queued, I'll fix it up there
>
>  Thomas
>
This patch causes the loongarch virtual machine to fail to start the 
slave cpu.

     ./build/qemu-system-loongarch64 -machine virt -m 8G -cpu la464 \
              -smp 4 -bios QEMU_EFI.fd -kernel vmlinuz.efi -initrd 
ramdisk   \
                -serial stdio   -monitor 
telnet:localhost:4495,server,nowait  \
                -append "root=/dev/ram rdinit=/sbin/init 
console=ttyS0,115200"   --nographic


....
qemu-system-loongarch64: warning: Blocked re-entrant IO on MemoryRegion: 
loongarch_ipi_iocsr at addr: 0x24

....
[    0.059284] smp: Bringing up secondary CPUs ...
[    0.062540] Booting CPU#1...
[    5.204340] CPU1: failed to start
[    5.211435] Booting CPU#2...
[   10.465762] CPU2: failed to start
[   10.467757] Booting CPU#3...
[   15.805430] CPU3: failed to start
[   15.805980] smp: Brought up 1 node, 1 CPU
[   15.818832] devtmpfs: initialized
[   15.830287] clocksource: jiffies: mask: 0xffffffff max_cycles: 
0xffffffff, max_idle_ns: 7645041785100000 ns
[   15.830429] futex hash table entries: 1024 (order: 2, 65536 bytes, 
linear)
[   15.840470] NET: Registered PF_NETLINK/PF_ROUTE protocol family
[   15.845424] audit: initializing netlink subsys (disabled)
...

gdb
(gdb) n
qemu-system-loongarch64: warning: Blocked re-entrant IO on MemoryRegion: 
loongarch_ipi_iocsr at addr: 0x24
552                return MEMTX_ACCESS_ERROR;
(gdb) bt
#0  0x0000555555ae93aa in access_with_adjusted_size
     (addr=36, value=0x7fffd299f988, size=4, access_size_min=<optimized 
out>, access_size_max=<optimized out>, access_fn=
     0x555555ae98f0 <memory_region_write_accessor>, mr=0x555556e3a740, 
attrs=...) at ../softmmu/memory.c:552
#1  0x0000555555ae962c in memory_region_dispatch_write 
(mr=0x555556e3a740, addr=36, data=<optimized out>, op=<optimized out>, 
attrs=...)
     at ../softmmu/memory.c:1531
#2  0x0000555555af5739 in address_space_stl_internal 
(endian=DEVICE_NATIVE_ENDIAN, result=0x0, attrs=..., val=0, 
addr=<optimized out>, as=0x5555567b4bb8)
     at 
/home3/gaosong/code/vec/github/send/v2/qemu/include/exec/memory.h:2997
#3  0x0000555555af5739 in address_space_stl (as=0x5555567b4bb8, 
addr=<optimized out>, val=0, attrs=..., result=0x0)
     at /home3/gaosong/code/vec/github/send/v2/qemu/memory_ldst.c.inc:350
#4  0x0000555555ae99e8 in memory_region_write_accessor
     (mr=0x555556e3ab80, addr=0, value=<optimized out>, size=8, 
shift=<optimized out>, mask=<optimized out>, attrs=...) at 
../softmmu/memory.c:493
#5  0x0000555555ae92ae in access_with_adjusted_size
     (addr=0, value=0x7fffd299fb88, size=8, access_size_min=<optimized 
out>, access_size_max=<optimized out>, access_fn=
     0x555555ae98f0 <memory_region_write_accessor>, mr=0x555556e3ab80, 
attrs=...) at ../softmmu/memory.c:567
#6  0x0000555555ae962c in memory_region_dispatch_write 
(mr=0x555556e3ab80, addr=0, data=<optimized out>, op=<optimized out>, 
attrs=...) at ../softmmu/memory.c:1531
#7  0x0000555555af2591 in address_space_stq_internal (as=<optimized 
out>, addr=<optimized out>, val=2147483652, attrs=..., result=0x0, 
endian=DEVICE_NATIVE_ENDIAN)
     at 
/home3/gaosong/code/vec/github/send/v2/qemu/include/exec/memory.h:2997
#8  0x00007fff8d922923 in code_gen_buffer ()
#9  0x0000555555b4b9e0 in cpu_tb_exec (tb_exit=<synthetic pointer>, 
itb=<optimized out>, cpu=0x5555567a5d00) at ../accel/tcg/cpu-exec.c:460
#10 0x0000555555b4b9e0 in cpu_loop_exec_tb (tb_exit=<synthetic pointer>, 
last_tb=<synthetic pointer>, pc=<optimized out>, tb=<optimized out>, 
cpu=0x5555567a5d00)
     at ../accel/tcg/cpu-exec.c:893
#11 0x0000555555b4b9e0 in cpu_exec_loop (cpu=0x5555567a5d00, 
sc=<optimized out>) at ../accel/tcg/cpu-exec.c:1013
#12 0x0000555555b4c67d in cpu_exec_setjmp (cpu=0x5555567a5d00, 
sc=0x7fffd29a0220) at ../accel/tcg/cpu-exec.c:1043
#13 0x0000555555b4c746 in cpu_exec (cpu=0x5555567a5d00) at 
../accel/tcg/cpu-exec.c:1069
#14 0x0000555555b634bf in tcg_cpus_exec (cpu=0x5555567a5d00) at 
../accel/tcg/tcg-accel-ops.c:81
#15 0x0000555555b63613 in mttcg_cpu_thread_fn (arg=0x5555567a5d00) at 
../accel/tcg/tcg-accel-ops-mttcg.c:95
#16 0x0000555555cdefba in qemu_thread_start (args=<optimized out>) at 
../util/qemu-thread-posix.c:541
#17 0x00007fffefcb717a in start_thread () at /lib64/libpthread.so.0
#18 0x00007fffef9e6dc3 in clone () at /lib64/libc.so.6

About LoongArch:
  - docs/system/loongarch/virt.rst

Thanks.
Song Gao
Thomas Huth May 8, 2023, 9:33 a.m. UTC | #7
On 06/05/2023 11.25, Song Gao wrote:
>   Hi Alexander
> 
> 在 2023/4/28 下午5:14, Thomas Huth 写道:
>> On 28/04/2023 11.11, Alexander Bulekov wrote:
>>> On 230428 1015, Thomas Huth wrote:
>>>> On 28/04/2023 10.12, Daniel P. Berrangé wrote:
>>>>> On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote:
>>>>>> Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
>>>>>> This flag is set/checked prior to calling a device's MemoryRegion
>>>>>> handlers, and set when device code initiates DMA.  The purpose of this
>>>>>> flag is to prevent two types of DMA-based reentrancy issues:
>>>>>>
>>>>>> 1.) mmio -> dma -> mmio case
>>>>>> 2.) bh -> dma write -> mmio case
>>>>>>
>>>>>> These issues have led to problems such as stack-exhaustion and
>>>>>> use-after-frees.
>>>>>>
>>>>>> Summary of the problem from Peter Maydell:
>>>>>> https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
>>>>>>
>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282
>>>>>> Resolves: CVE-2023-0330
>>>>>>
>>>>>> Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
>>>>>> Reviewed-by: Thomas Huth <thuth@redhat.com>
>>>>>> ---
>>>>>>    include/exec/memory.h  |  5 +++++
>>>>>>    include/hw/qdev-core.h |  7 +++++++
>>>>>>    softmmu/memory.c       | 16 ++++++++++++++++
>>>>>>    3 files changed, 28 insertions(+)
>>>>>>
>>>>>> diff --git a/include/exec/memory.h b/include/exec/memory.h
>>>>>> index 15ade918ba..e45ce6061f 100644
>>>>>> --- a/include/exec/memory.h
>>>>>> +++ b/include/exec/memory.h
>>>>>> @@ -767,6 +767,8 @@ struct MemoryRegion {
>>>>>>        bool is_iommu;
>>>>>>        RAMBlock *ram_block;
>>>>>>        Object *owner;
>>>>>> +    /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access 
>>>>>> hotpath */
>>>>>> +    DeviceState *dev;
>>>>>>        const MemoryRegionOps *ops;
>>>>>>        void *opaque;
>>>>>> @@ -791,6 +793,9 @@ struct MemoryRegion {
>>>>>>        unsigned ioeventfd_nb;
>>>>>>        MemoryRegionIoeventfd *ioeventfds;
>>>>>>        RamDiscardManager *rdm; /* Only for RAM */
>>>>>> +
>>>>>> +    /* For devices designed to perform re-entrant IO into their own 
>>>>>> IO MRs */
>>>>>> +    bool disable_reentrancy_guard;
>>>>>>    };
>>>>>>    struct IOMMUMemoryRegion {
>>>>>> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
>>>>>> index bd50ad5ee1..7623703943 100644
>>>>>> --- a/include/hw/qdev-core.h
>>>>>> +++ b/include/hw/qdev-core.h
>>>>>> @@ -162,6 +162,10 @@ struct NamedClockList {
>>>>>>        QLIST_ENTRY(NamedClockList) node;
>>>>>>    };
>>>>>> +typedef struct {
>>>>>> +    bool engaged_in_io;
>>>>>> +} MemReentrancyGuard;
>>>>>> +
>>>>>>    /**
>>>>>>     * DeviceState:
>>>>>>     * @realized: Indicates whether the device has been fully constructed.
>>>>>> @@ -194,6 +198,9 @@ struct DeviceState {
>>>>>>        int alias_required_for_version;
>>>>>>        ResettableState reset;
>>>>>>        GSList *unplug_blockers;
>>>>>> +
>>>>>> +    /* Is the device currently in mmio/pio/dma? Used to prevent 
>>>>>> re-entrancy */
>>>>>> +    MemReentrancyGuard mem_reentrancy_guard;
>>>>>>    };
>>>>>>    struct DeviceListener {
>>>>>> diff --git a/softmmu/memory.c b/softmmu/memory.c
>>>>>> index b1a6cae6f5..fe23f0e5ce 100644
>>>>>> --- a/softmmu/memory.c
>>>>>> +++ b/softmmu/memory.c
>>>>>> @@ -542,6 +542,18 @@ static MemTxResult 
>>>>>> access_with_adjusted_size(hwaddr addr,
>>>>>>            access_size_max = 4;
>>>>>>        }
>>>>>> +    /* Do not allow more than one simultaneous access to a device's 
>>>>>> IO Regions */
>>>>>> +    if (mr->dev && !mr->disable_reentrancy_guard &&
>>>>>> +        !mr->ram_device && !mr->ram && !mr->rom_device && 
>>>>>> !mr->readonly) {
>>>>>> +        if (mr->dev->mem_reentrancy_guard.engaged_in_io) {
>>>>>> +            warn_report("Blocked re-entrant IO on "
>>>>>> +                    "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX,
>>>>>> +                    memory_region_name(mr), addr);
>>>>>> +            return MEMTX_ACCESS_ERROR;
>>>>>
>>>>> If we issue this warn_report on every invalid memory access, is this
>>>>> going to become a denial of service by flooding logs, or is the
>>>>> return MEMTX_ACCESS_ERROR, sufficient to ensure this is only printed
>>>>> *once* in the lifetime of the QEMU process ?
>>>>
>>>> Maybe it's better to use warn_report_once() here instead?
>>>
>>> Sounds good - should I respin the series to change this?
>>
>> Not necessary, I've got v10 already queued, I'll fix it up there
>>
>>  Thomas
>>
> This patch causes the loongarch virtual machine to fail to start the slave cpu.
> 
>      ./build/qemu-system-loongarch64 -machine virt -m 8G -cpu la464 \
>               -smp 4 -bios QEMU_EFI.fd -kernel vmlinuz.efi -initrd ramdisk   \
>                 -serial stdio   -monitor telnet:localhost:4495,server,nowait  \
>                 -append "root=/dev/ram rdinit=/sbin/init 
> console=ttyS0,115200"   --nographic
> 
> 
> ....
> qemu-system-loongarch64: warning: Blocked re-entrant IO on MemoryRegion: 
> loongarch_ipi_iocsr at addr: 0x24

Oh, another spot that needs special handling ... I see Alexander already 
sent a patch (thanks!), but anyway, this is a good indication that we're 
missing some test coverage in the CI.

Are there any loongarch kernel images available for public download 
somewhere? If so, we really should add an avocado regression test for this - 
since as far as I can see, we don't have any  tests for loongarch in 
tests/avocado yet?

  Thomas
gaosong May 8, 2023, 1:03 p.m. UTC | #8
Hi, Thomas

在 2023/5/8 下午5:33, Thomas Huth 写道:
> On 06/05/2023 11.25, Song Gao wrote:
>>   Hi Alexander
>>
>> 在 2023/4/28 下午5:14, Thomas Huth 写道:
>>> On 28/04/2023 11.11, Alexander Bulekov wrote:
>>>> On 230428 1015, Thomas Huth wrote:
>>>>> On 28/04/2023 10.12, Daniel P. Berrangé wrote:
>>>>>> On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote:
>>>>>>> Add a flag to the DeviceState, when a device is engaged in 
>>>>>>> PIO/MMIO/DMA.
>>>>>>> This flag is set/checked prior to calling a device's MemoryRegion
>>>>>>> handlers, and set when device code initiates DMA.  The purpose 
>>>>>>> of this
>>>>>>> flag is to prevent two types of DMA-based reentrancy issues:
>>>>>>>
>>>>>>> 1.) mmio -> dma -> mmio case
>>>>>>> 2.) bh -> dma write -> mmio case
>>>>>>>
>>>>>>> These issues have led to problems such as stack-exhaustion and
>>>>>>> use-after-frees.
>>>>>>>
>>>>>>> Summary of the problem from Peter Maydell:
>>>>>>> https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com 
>>>>>>>
>>>>>>>
>>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
>>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
>>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
>>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
>>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
>>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
>>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282
>>>>>>> Resolves: CVE-2023-0330
>>>>>>>
>>>>>>> Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
>>>>>>> Reviewed-by: Thomas Huth <thuth@redhat.com>
>>>>>>> ---
>>>>>>>    include/exec/memory.h  |  5 +++++
>>>>>>>    include/hw/qdev-core.h |  7 +++++++
>>>>>>>    softmmu/memory.c       | 16 ++++++++++++++++
>>>>>>>    3 files changed, 28 insertions(+)
>>>>>>>
>>>>>>> diff --git a/include/exec/memory.h b/include/exec/memory.h
>>>>>>> index 15ade918ba..e45ce6061f 100644
>>>>>>> --- a/include/exec/memory.h
>>>>>>> +++ b/include/exec/memory.h
>>>>>>> @@ -767,6 +767,8 @@ struct MemoryRegion {
>>>>>>>        bool is_iommu;
>>>>>>>        RAMBlock *ram_block;
>>>>>>>        Object *owner;
>>>>>>> +    /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR 
>>>>>>> access hotpath */
>>>>>>> +    DeviceState *dev;
>>>>>>>        const MemoryRegionOps *ops;
>>>>>>>        void *opaque;
>>>>>>> @@ -791,6 +793,9 @@ struct MemoryRegion {
>>>>>>>        unsigned ioeventfd_nb;
>>>>>>>        MemoryRegionIoeventfd *ioeventfds;
>>>>>>>        RamDiscardManager *rdm; /* Only for RAM */
>>>>>>> +
>>>>>>> +    /* For devices designed to perform re-entrant IO into their 
>>>>>>> own IO MRs */
>>>>>>> +    bool disable_reentrancy_guard;
>>>>>>>    };
>>>>>>>    struct IOMMUMemoryRegion {
>>>>>>> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
>>>>>>> index bd50ad5ee1..7623703943 100644
>>>>>>> --- a/include/hw/qdev-core.h
>>>>>>> +++ b/include/hw/qdev-core.h
>>>>>>> @@ -162,6 +162,10 @@ struct NamedClockList {
>>>>>>>        QLIST_ENTRY(NamedClockList) node;
>>>>>>>    };
>>>>>>> +typedef struct {
>>>>>>> +    bool engaged_in_io;
>>>>>>> +} MemReentrancyGuard;
>>>>>>> +
>>>>>>>    /**
>>>>>>>     * DeviceState:
>>>>>>>     * @realized: Indicates whether the device has been fully 
>>>>>>> constructed.
>>>>>>> @@ -194,6 +198,9 @@ struct DeviceState {
>>>>>>>        int alias_required_for_version;
>>>>>>>        ResettableState reset;
>>>>>>>        GSList *unplug_blockers;
>>>>>>> +
>>>>>>> +    /* Is the device currently in mmio/pio/dma? Used to prevent 
>>>>>>> re-entrancy */
>>>>>>> +    MemReentrancyGuard mem_reentrancy_guard;
>>>>>>>    };
>>>>>>>    struct DeviceListener {
>>>>>>> diff --git a/softmmu/memory.c b/softmmu/memory.c
>>>>>>> index b1a6cae6f5..fe23f0e5ce 100644
>>>>>>> --- a/softmmu/memory.c
>>>>>>> +++ b/softmmu/memory.c
>>>>>>> @@ -542,6 +542,18 @@ static MemTxResult 
>>>>>>> access_with_adjusted_size(hwaddr addr,
>>>>>>>            access_size_max = 4;
>>>>>>>        }
>>>>>>> +    /* Do not allow more than one simultaneous access to a 
>>>>>>> device's IO Regions */
>>>>>>> +    if (mr->dev && !mr->disable_reentrancy_guard &&
>>>>>>> +        !mr->ram_device && !mr->ram && !mr->rom_device && 
>>>>>>> !mr->readonly) {
>>>>>>> +        if (mr->dev->mem_reentrancy_guard.engaged_in_io) {
>>>>>>> +            warn_report("Blocked re-entrant IO on "
>>>>>>> +                    "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX,
>>>>>>> +                    memory_region_name(mr), addr);
>>>>>>> +            return MEMTX_ACCESS_ERROR;
>>>>>>
>>>>>> If we issue this warn_report on every invalid memory access, is this
>>>>>> going to become a denial of service by flooding logs, or is the
>>>>>> return MEMTX_ACCESS_ERROR, sufficient to ensure this is only printed
>>>>>> *once* in the lifetime of the QEMU process ?
>>>>>
>>>>> Maybe it's better to use warn_report_once() here instead?
>>>>
>>>> Sounds good - should I respin the series to change this?
>>>
>>> Not necessary, I've got v10 already queued, I'll fix it up there
>>>
>>>  Thomas
>>>
>> This patch causes the loongarch virtual machine to fail to start the 
>> slave cpu.
>>
>>      ./build/qemu-system-loongarch64 -machine virt -m 8G -cpu la464 \
>>               -smp 4 -bios QEMU_EFI.fd -kernel vmlinuz.efi -initrd 
>> ramdisk   \
>>                 -serial stdio   -monitor 
>> telnet:localhost:4495,server,nowait  \
>>                 -append "root=/dev/ram rdinit=/sbin/init 
>> console=ttyS0,115200"   --nographic
>>
>>
>> ....
>> qemu-system-loongarch64: warning: Blocked re-entrant IO on 
>> MemoryRegion: loongarch_ipi_iocsr at addr: 0x24
>
> Oh, another spot that needs special handling ... I see Alexander 
> already sent a patch (thanks!), but anyway, this is a good indication 
> that we're missing some test coverage in the CI.
>
> Are there any loongarch kernel images available for public download 
> somewhere? If so, we really should add an avocado regression test for 
> this - since as far as I can see, we don't have any  tests for 
> loongarch in tests/avocado yet?
>
we can get  some binarys  at:
https://github.com/yangxiaojuan-loongson/qemu-binary

I'm not sure that avacodo testing can be done using just the kernel.

Is a full loongarch system required?
if  yes,  unfortunately, there is not yet a well-developed loongarch  
system.
And we are moving forward with loongarch support for debian systems.

There are also systems that already support loongarch, e.g archlinux.
https://mirrors.wsyu.edu.cn/loongarch/archlinux/images/

But I briefly tested that it doesn't work in the latest qemu.   I've 
already pushed the maintainer to update the system.

Thanks.
Song Gao
Thomas Huth May 8, 2023, 1:12 p.m. UTC | #9
On 08/05/2023 15.03, Song Gao wrote:
> Hi, Thomas
> 
> 在 2023/5/8 下午5:33, Thomas Huth 写道:
>> On 06/05/2023 11.25, Song Gao wrote:
>>>   Hi Alexander
>>>
>>> 在 2023/4/28 下午5:14, Thomas Huth 写道:
>>>> On 28/04/2023 11.11, Alexander Bulekov wrote:
>>>>> On 230428 1015, Thomas Huth wrote:
>>>>>> On 28/04/2023 10.12, Daniel P. Berrangé wrote:
>>>>>>> On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote:
>>>>>>>> Add a flag to the DeviceState, when a device is engaged in 
>>>>>>>> PIO/MMIO/DMA.
...
>>> This patch causes the loongarch virtual machine to fail to start the 
>>> slave cpu.
>>>
>>>      ./build/qemu-system-loongarch64 -machine virt -m 8G -cpu la464 \
>>>               -smp 4 -bios QEMU_EFI.fd -kernel vmlinuz.efi -initrd 
>>> ramdisk   \
>>>                 -serial stdio   -monitor 
>>> telnet:localhost:4495,server,nowait  \
>>>                 -append "root=/dev/ram rdinit=/sbin/init 
>>> console=ttyS0,115200"   --nographic
>>>
>>>
>>> ....
>>> qemu-system-loongarch64: warning: Blocked re-entrant IO on MemoryRegion: 
>>> loongarch_ipi_iocsr at addr: 0x24
>>
>> Oh, another spot that needs special handling ... I see Alexander already 
>> sent a patch (thanks!), but anyway, this is a good indication that we're 
>> missing some test coverage in the CI.
>>
>> Are there any loongarch kernel images available for public download 
>> somewhere? If so, we really should add an avocado regression test for this 
>> - since as far as I can see, we don't have any  tests for loongarch in 
>> tests/avocado yet?
>>
> we can get  some binarys  at:
> https://github.com/yangxiaojuan-loongson/qemu-binary
 >
> I'm not sure that avacodo testing can be done using just the kernel.
> 
> Is a full loongarch system required?

No, you don't need a full distro installation, just a kernel with ramdisk 
(which is also available there) is good enough for a basic test, e.g. just 
check whether the kernel boots to a certain point is good enough to provide 
a basic sanity test. If you then can also get even into a shell (of the 
ramdisk), you can check some additional stuff in the sysfs or "dmesg" 
output, see for example tests/avocado/machine_s390_ccw_virtio.py which does 
such checks with a kernel and initrd on s390x.

  Thomas
gaosong May 9, 2023, 2:13 a.m. UTC | #10
在 2023/5/8 下午9:12, Thomas Huth 写道:
> On 08/05/2023 15.03, Song Gao wrote:
>> Hi, Thomas
>>
>> 在 2023/5/8 下午5:33, Thomas Huth 写道:
>>> On 06/05/2023 11.25, Song Gao wrote:
>>>>   Hi Alexander
>>>>
>>>> 在 2023/4/28 下午5:14, Thomas Huth 写道:
>>>>> On 28/04/2023 11.11, Alexander Bulekov wrote:
>>>>>> On 230428 1015, Thomas Huth wrote:
>>>>>>> On 28/04/2023 10.12, Daniel P. Berrangé wrote:
>>>>>>>> On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote:
>>>>>>>>> Add a flag to the DeviceState, when a device is engaged in 
>>>>>>>>> PIO/MMIO/DMA.
> ...
>>>> This patch causes the loongarch virtual machine to fail to start 
>>>> the slave cpu.
>>>>
>>>>      ./build/qemu-system-loongarch64 -machine virt -m 8G -cpu la464 \
>>>>               -smp 4 -bios QEMU_EFI.fd -kernel vmlinuz.efi -initrd 
>>>> ramdisk   \
>>>>                 -serial stdio   -monitor 
>>>> telnet:localhost:4495,server,nowait  \
>>>>                 -append "root=/dev/ram rdinit=/sbin/init 
>>>> console=ttyS0,115200"   --nographic
>>>>
>>>>
>>>> ....
>>>> qemu-system-loongarch64: warning: Blocked re-entrant IO on 
>>>> MemoryRegion: loongarch_ipi_iocsr at addr: 0x24
>>>
>>> Oh, another spot that needs special handling ... I see Alexander 
>>> already sent a patch (thanks!), but anyway, this is a good 
>>> indication that we're missing some test coverage in the CI.
>>>
>>> Are there any loongarch kernel images available for public download 
>>> somewhere? If so, we really should add an avocado regression test 
>>> for this - since as far as I can see, we don't have any  tests for 
>>> loongarch in tests/avocado yet?
>>>
>> we can get  some binarys  at:
>> https://github.com/yangxiaojuan-loongson/qemu-binary
> >
>> I'm not sure that avacodo testing can be done using just the kernel.
>>
>> Is a full loongarch system required?
>
> No, you don't need a full distro installation, just a kernel with 
> ramdisk (which is also available there) is good enough for a basic 
> test, e.g. just check whether the kernel boots to a certain point is 
> good enough to provide a basic sanity test. If you then can also get 
> even into a shell (of the ramdisk), you can check some additional 
> stuff in the sysfs or "dmesg" output, see for example 
> tests/avocado/machine_s390_ccw_virtio.py which does such checks with a 
> kernel and initrd on s390x.
>
Thanks for you suggestion .

We will add a loongarch basic test  on tests/avacode.

Thanks.
Song Gao
gaosong May 10, 2023, 9:02 a.m. UTC | #11
Hi, Thomas

在 2023/5/8 下午9:12, Thomas Huth 写道:
>
>>> Oh, another spot that needs special handling ... I see Alexander 
>>> already sent a patch (thanks!), but anyway, this is a good 
>>> indication that we're missing some test coverage in the CI.
>>>
>>> Are there any loongarch kernel images available for public download 
>>> somewhere? If so, we really should add an avocado regression test 
>>> for this - since as far as I can see, we don't have any  tests for 
>>> loongarch in tests/avocado yet?
>>>
>> we can get  some binarys  at:
>> https://github.com/yangxiaojuan-loongson/qemu-binary
> >
>> I'm not sure that avacodo testing can be done using just the kernel.
>>
>> Is a full loongarch system required?
>
> No, you don't need a full distro installation, just a kernel with 
> ramdisk (which is also available there) is good enough for a basic 
> test, e.g. just check whether the kernel boots to a certain point is 
> good enough to provide a basic sanity test. If you then can also get 
> even into a shell (of the ramdisk), you can check some additional 
> stuff in the sysfs or "dmesg" output, see for example 
> tests/avocado/machine_s390_ccw_virtio.py which does such checks with a 
> kernel and initrd on s390x.
>
>
I have a few questions.

I run ' make check-avocado 
AVOCADO_TESTS=./tests/avocado/machine_s390_ccw_virtio.py V=1'

root@loongson-KVM:~/work/qemu#make check-avocado 
AVOCADO_TESTS=./tests/avocado/machine_s390_ccw_virtio.py V=1
changing dir to build for make "check-avocado"...
make[1]: Entering directory '/root/work/qemu/build'
(GIT="git" "/root/work/qemu/scripts/git-submodule.sh" update 
ui/keycodemapdb meson tests/fp/berkeley-testfloat-3 
tests/fp/berkeley-softfloat-3 dtc)
/root/work/qemu/build/tests/venv/bin/python3 -m avocado vmimage get 
--distro=fedora --distro-version=31 --arch=s390x
The image was downloaded:
Provider Version Architecture File
fedora   31      s390x 
/root/avocado/data/cache/by_location/8ee06cba5485a58b2203c2c000d6d2ff6da0f040/Fedora-Cloud-Base-31-1.9.s390x.qcow2
/root/work/qemu/build/tests/venv/bin/python3 -m avocado --show=app run 
--job-results-dir=/root/work/qemu/build/tests/results 
--filter-by-tags-include-empty --filter-by-tags-include-empty-key 
--max-parallel-tasks 1 -t arch:loongarch64 -t arch:s390x --failfast 
./tests/avocado/machine_s390_ccw_virtio.py
...

This test downloaded   'Fedora-Cloud-Base-31-1.9.s390x.qcow2' image.
but we don't have a  'Fedora-Cloud-Base-31-1.9.loongarch.qcow2' image.

Am I missing something?

One more question,    How to get the 'kernel_hash' and 'initrd_hash'?

Thanks.
Song Gao
Thomas Huth May 10, 2023, 12:21 p.m. UTC | #12
On 10/05/2023 11.02, Song Gao wrote:
> Hi, Thomas
> 
> 在 2023/5/8 下午9:12, Thomas Huth 写道:
>>
>>>> Oh, another spot that needs special handling ... I see Alexander already 
>>>> sent a patch (thanks!), but anyway, this is a good indication that we're 
>>>> missing some test coverage in the CI.
>>>>
>>>> Are there any loongarch kernel images available for public download 
>>>> somewhere? If so, we really should add an avocado regression test for 
>>>> this - since as far as I can see, we don't have any  tests for loongarch 
>>>> in tests/avocado yet?
>>>>
>>> we can get  some binarys  at:
>>> https://github.com/yangxiaojuan-loongson/qemu-binary
>> >
>>> I'm not sure that avacodo testing can be done using just the kernel.
>>>
>>> Is a full loongarch system required?
>>
>> No, you don't need a full distro installation, just a kernel with ramdisk 
>> (which is also available there) is good enough for a basic test, e.g. just 
>> check whether the kernel boots to a certain point is good enough to 
>> provide a basic sanity test. If you then can also get even into a shell 
>> (of the ramdisk), you can check some additional stuff in the sysfs or 
>> "dmesg" output, see for example tests/avocado/machine_s390_ccw_virtio.py 
>> which does such checks with a kernel and initrd on s390x.
>>
>>
> I have a few questions.
> 
> I run ' make check-avocado 
> AVOCADO_TESTS=./tests/avocado/machine_s390_ccw_virtio.py V=1'
> 
> root@loongson-KVM:~/work/qemu#make check-avocado 
> AVOCADO_TESTS=./tests/avocado/machine_s390_ccw_virtio.py V=1
> changing dir to build for make "check-avocado"...
> make[1]: Entering directory '/root/work/qemu/build'
> (GIT="git" "/root/work/qemu/scripts/git-submodule.sh" update ui/keycodemapdb 
> meson tests/fp/berkeley-testfloat-3 tests/fp/berkeley-softfloat-3 dtc)
> /root/work/qemu/build/tests/venv/bin/python3 -m avocado vmimage get 
> --distro=fedora --distro-version=31 --arch=s390x
> The image was downloaded:
> Provider Version Architecture File
> fedora   31      s390x 
> /root/avocado/data/cache/by_location/8ee06cba5485a58b2203c2c000d6d2ff6da0f040/Fedora-Cloud-Base-31-1.9.s390x.qcow2
> /root/work/qemu/build/tests/venv/bin/python3 -m avocado --show=app run 
> --job-results-dir=/root/work/qemu/build/tests/results 
> --filter-by-tags-include-empty --filter-by-tags-include-empty-key 
> --max-parallel-tasks 1 -t arch:loongarch64 -t arch:s390x --failfast 
> ./tests/avocado/machine_s390_ccw_virtio.py
> ...
> 
> This test downloaded   'Fedora-Cloud-Base-31-1.9.s390x.qcow2' image.
> but we don't have a  'Fedora-Cloud-Base-31-1.9.loongarch.qcow2' image.
> 
> Am I missing something?

Hmm, that image is not required for those tests... not sure why they get 
downloaded here... I think something in 
tests/avocado/avocado_qemu/__init__.py or in tests/Makefile.include tries to 
download the cloudinit stuff in advance for other tests, but it is certainly 
unrelated to the machine_s390_ccw_virtio.py test that only uses a kernel and 
initrd.

I think you can ignore that (unless there is an error since it's trying to 
download the loongarch Cloud-Base image - then that's a bug).

> One more question,    How to get the 'kernel_hash' and 'initrd_hash'?

I think it's a SHA1 hash by default, so you can for example get it with the 
"sha1sum" tool on the command line.

But seems like it is also possible to specify different algorithms with the 
"algorithm=..." parameter of fetch_asset().

  HTH,
   Thomas
gaosong May 11, 2023, 8:53 a.m. UTC | #13
在 2023/5/10 下午8:21, Thomas Huth 写道:
> On 10/05/2023 11.02, Song Gao wrote:
>> Hi, Thomas
>>
>> 在 2023/5/8 下午9:12, Thomas Huth 写道:
>>>
>>>>> Oh, another spot that needs special handling ... I see Alexander 
>>>>> already sent a patch (thanks!), but anyway, this is a good 
>>>>> indication that we're missing some test coverage in the CI.
>>>>>
>>>>> Are there any loongarch kernel images available for public 
>>>>> download somewhere? If so, we really should add an avocado 
>>>>> regression test for this - since as far as I can see, we don't 
>>>>> have any  tests for loongarch in tests/avocado yet?
>>>>>
>>>> we can get  some binarys  at:
>>>> https://github.com/yangxiaojuan-loongson/qemu-binary
>>> >
>>>> I'm not sure that avacodo testing can be done using just the kernel.
>>>>
>>>> Is a full loongarch system required?
>>>
>>> No, you don't need a full distro installation, just a kernel with 
>>> ramdisk (which is also available there) is good enough for a basic 
>>> test, e.g. just check whether the kernel boots to a certain point is 
>>> good enough to provide a basic sanity test. If you then can also get 
>>> even into a shell (of the ramdisk), you can check some additional 
>>> stuff in the sysfs or "dmesg" output, see for example 
>>> tests/avocado/machine_s390_ccw_virtio.py which does such checks with 
>>> a kernel and initrd on s390x.
>>>
>>>
>> I have a few questions.
>>
>> I run ' make check-avocado 
>> AVOCADO_TESTS=./tests/avocado/machine_s390_ccw_virtio.py V=1'
>>
>> root@loongson-KVM:~/work/qemu#make check-avocado 
>> AVOCADO_TESTS=./tests/avocado/machine_s390_ccw_virtio.py V=1
>> changing dir to build for make "check-avocado"...
>> make[1]: Entering directory '/root/work/qemu/build'
>> (GIT="git" "/root/work/qemu/scripts/git-submodule.sh" update 
>> ui/keycodemapdb meson tests/fp/berkeley-testfloat-3 
>> tests/fp/berkeley-softfloat-3 dtc)
>> /root/work/qemu/build/tests/venv/bin/python3 -m avocado vmimage get 
>> --distro=fedora --distro-version=31 --arch=s390x
>> The image was downloaded:
>> Provider Version Architecture File
>> fedora   31      s390x 
>> /root/avocado/data/cache/by_location/8ee06cba5485a58b2203c2c000d6d2ff6da0f040/Fedora-Cloud-Base-31-1.9.s390x.qcow2
>> /root/work/qemu/build/tests/venv/bin/python3 -m avocado --show=app 
>> run --job-results-dir=/root/work/qemu/build/tests/results 
>> --filter-by-tags-include-empty --filter-by-tags-include-empty-key 
>> --max-parallel-tasks 1 -t arch:loongarch64 -t arch:s390x --failfast 
>> ./tests/avocado/machine_s390_ccw_virtio.py
>> ...
>>
>> This test downloaded   'Fedora-Cloud-Base-31-1.9.s390x.qcow2' image.
>> but we don't have a  'Fedora-Cloud-Base-31-1.9.loongarch.qcow2' image.
>>
>> Am I missing something?
>
> Hmm, that image is not required for those tests... not sure why they 
> get downloaded here... I think something in 
> tests/avocado/avocado_qemu/__init__.py or in tests/Makefile.include 
> tries to download the cloudinit stuff in advance for other tests, but 
> it is certainly unrelated to the machine_s390_ccw_virtio.py test that 
> only uses a kernel and initrd.
>
> I think you can ignore that (unless there is an error since it's 
> trying to download the loongarch Cloud-Base image - then that's a bug).
>
Yes,   we can ignore,  no error.
>> One more question,    How to get the 'kernel_hash' and 'initrd_hash'?
>
> I think it's a SHA1 hash by default, so you can for example get it 
> with the "sha1sum" tool on the command line.
>
> But seems like it is also possible to specify different algorithms 
> with the "algorithm=..." parameter of fetch_asset().
>
Thanks for you help.

And
Should we need add  '  @skipIf(os.getenv('GITLAB_CI'), 'Running on 
GitLab')' ?

I see some tests add this.

Thanks.
Song Gao
Thomas Huth May 11, 2023, 8:58 a.m. UTC | #14
On 11/05/2023 10.53, Song Gao wrote:
...
> And
> Should we need add  '  @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')' ?
> 
> I see some tests add this.

No, please don't add that unless there is a good reason. That marker is only 
required if the test does not work reliable on gitlab, e.g. if it sometimes 
fails due to race conditions or if it takes incredibly long to finish.

  Thomas
gaosong May 11, 2023, 9:08 a.m. UTC | #15
在 2023/5/11 下午4:58, Thomas Huth 写道:
> On 11/05/2023 10.53, Song Gao wrote:
> ...
>> And
>> Should we need add  '  @skipIf(os.getenv('GITLAB_CI'), 'Running on 
>> GitLab')' ?
>>
>> I see some tests add this.
>
> No, please don't add that unless there is a good reason. That marker 
> is only required if the test does not work reliable on gitlab, e.g. if 
> it sometimes fails due to race conditions or if it takes incredibly 
> long to finish.
>
Ok.

Thanks.
Song Gao
diff mbox series

Patch

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 15ade918ba..e45ce6061f 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -767,6 +767,8 @@  struct MemoryRegion {
     bool is_iommu;
     RAMBlock *ram_block;
     Object *owner;
+    /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */
+    DeviceState *dev;
 
     const MemoryRegionOps *ops;
     void *opaque;
@@ -791,6 +793,9 @@  struct MemoryRegion {
     unsigned ioeventfd_nb;
     MemoryRegionIoeventfd *ioeventfds;
     RamDiscardManager *rdm; /* Only for RAM */
+
+    /* For devices designed to perform re-entrant IO into their own IO MRs */
+    bool disable_reentrancy_guard;
 };
 
 struct IOMMUMemoryRegion {
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index bd50ad5ee1..7623703943 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -162,6 +162,10 @@  struct NamedClockList {
     QLIST_ENTRY(NamedClockList) node;
 };
 
+typedef struct {
+    bool engaged_in_io;
+} MemReentrancyGuard;
+
 /**
  * DeviceState:
  * @realized: Indicates whether the device has been fully constructed.
@@ -194,6 +198,9 @@  struct DeviceState {
     int alias_required_for_version;
     ResettableState reset;
     GSList *unplug_blockers;
+
+    /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
+    MemReentrancyGuard mem_reentrancy_guard;
 };
 
 struct DeviceListener {
diff --git a/softmmu/memory.c b/softmmu/memory.c
index b1a6cae6f5..fe23f0e5ce 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -542,6 +542,18 @@  static MemTxResult access_with_adjusted_size(hwaddr addr,
         access_size_max = 4;
     }
 
+    /* Do not allow more than one simultaneous access to a device's IO Regions */
+    if (mr->dev && !mr->disable_reentrancy_guard &&
+        !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
+        if (mr->dev->mem_reentrancy_guard.engaged_in_io) {
+            warn_report("Blocked re-entrant IO on "
+                    "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX,
+                    memory_region_name(mr), addr);
+            return MEMTX_ACCESS_ERROR;
+        }
+        mr->dev->mem_reentrancy_guard.engaged_in_io = true;
+    }
+
     /* FIXME: support unaligned access? */
     access_size = MAX(MIN(size, access_size_max), access_size_min);
     access_mask = MAKE_64BIT_MASK(0, access_size * 8);
@@ -556,6 +568,9 @@  static MemTxResult access_with_adjusted_size(hwaddr addr,
                         access_mask, attrs);
         }
     }
+    if (mr->dev) {
+        mr->dev->mem_reentrancy_guard.engaged_in_io = false;
+    }
     return r;
 }
 
@@ -1170,6 +1185,7 @@  static void memory_region_do_init(MemoryRegion *mr,
     }
     mr->name = g_strdup(name);
     mr->owner = owner;
+    mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE);
     mr->ram_block = NULL;
 
     if (name) {