diff mbox series

[v2] e1000e: Prevent MSI/MSI-X storms

Message ID bfb309be-3ef4-bd01-b4f8-3ebdeaa64374@web.de
State New
Headers show
Series [v2] e1000e: Prevent MSI/MSI-X storms | expand

Commit Message

Jan Kiszka April 5, 2018, 5:41 p.m. UTC
From: Jan Kiszka <jan.kiszka@siemens.com>

Only signal MSI/MSI-X events on rising edges. So far we re-triggered the
interrupt sources even if the guest did no consumed the pending one,
easily causing interrupt storms.

Issue was observable with Linux 4.16 e1000e driver when MSI-X was used.
Vector 2 was causing interrupt storms after the driver activated the
device.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---

Changes in v2:
 - also update msi_causes_pending after EIAC changes (required because
   there is no e1000e_update_interrupt_state after that

 hw/net/e1000e_core.c | 11 +++++++++++
 hw/net/e1000e_core.h |  2 ++
 2 files changed, 13 insertions(+)

Comments

Jan Kiszka June 30, 2018, 6:13 a.m. UTC | #1
On 2018-04-05 19:41, Jan Kiszka wrote:
> From: Jan Kiszka <jan.kiszka@siemens.com>
> 
> Only signal MSI/MSI-X events on rising edges. So far we re-triggered the
> interrupt sources even if the guest did no consumed the pending one,
> easily causing interrupt storms.
> 
> Issue was observable with Linux 4.16 e1000e driver when MSI-X was used.
> Vector 2 was causing interrupt storms after the driver activated the
> device.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
> 
> Changes in v2:
>  - also update msi_causes_pending after EIAC changes (required because
>    there is no e1000e_update_interrupt_state after that
> 
>  hw/net/e1000e_core.c | 11 +++++++++++
>  hw/net/e1000e_core.h |  2 ++
>  2 files changed, 13 insertions(+)
> 
> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
> index c93c4661ed..d6ddd59986 100644
> --- a/hw/net/e1000e_core.c
> +++ b/hw/net/e1000e_core.c
> @@ -2027,6 +2027,7 @@ e1000e_msix_notify_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg)
>      }
>  
>      core->mac[ICR] &= ~effective_eiac;
> +    core->msi_causes_pending &= ~effective_eiac;
>  
>      if (!(core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
>          core->mac[IMS] &= ~effective_eiac;
> @@ -2123,6 +2124,13 @@ e1000e_send_msi(E1000ECore *core, bool msix)
>  {
>      uint32_t causes = core->mac[ICR] & core->mac[IMS] & ~E1000_ICR_ASSERTED;
>  
> +    core->msi_causes_pending &= causes;
> +    causes ^= core->msi_causes_pending;
> +    if (causes == 0) {
> +        return;
> +    }
> +    core->msi_causes_pending |= causes;
> +
>      if (msix) {
>          e1000e_msix_notify(core, causes);
>      } else {
> @@ -2160,6 +2168,9 @@ e1000e_update_interrupt_state(E1000ECore *core)
>      core->mac[ICS] = core->mac[ICR];
>  
>      interrupts_pending = (core->mac[IMS] & core->mac[ICR]) ? true : false;
> +    if (!interrupts_pending) {
> +        core->msi_causes_pending = 0;
> +    }
>  
>      trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS],
>                                          core->mac[ICR], core->mac[IMS]);
> diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
> index 7d8ff41890..63a15510cc 100644
> --- a/hw/net/e1000e_core.h
> +++ b/hw/net/e1000e_core.h
> @@ -109,6 +109,8 @@ struct E1000Core {
>      NICState *owner_nic;
>      PCIDevice *owner;
>      void (*owner_start_recv)(PCIDevice *d);
> +
> +    uint32_t msi_causes_pending;
>  };
>  
>  void
> 

Ping again, this one is still open.

Jan
Jason Wang July 2, 2018, 3:40 a.m. UTC | #2
On 2018年06月30日 14:13, Jan Kiszka wrote:
> On 2018-04-05 19:41, Jan Kiszka wrote:
>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>
>> Only signal MSI/MSI-X events on rising edges. So far we re-triggered the
>> interrupt sources even if the guest did no consumed the pending one,
>> easily causing interrupt storms.
>>
>> Issue was observable with Linux 4.16 e1000e driver when MSI-X was used.
>> Vector 2 was causing interrupt storms after the driver activated the
>> device.
>>
>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>> ---
>>
>> Changes in v2:
>>   - also update msi_causes_pending after EIAC changes (required because
>>     there is no e1000e_update_interrupt_state after that
>>
>>   hw/net/e1000e_core.c | 11 +++++++++++
>>   hw/net/e1000e_core.h |  2 ++
>>   2 files changed, 13 insertions(+)
>>
>> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
>> index c93c4661ed..d6ddd59986 100644
>> --- a/hw/net/e1000e_core.c
>> +++ b/hw/net/e1000e_core.c
>> @@ -2027,6 +2027,7 @@ e1000e_msix_notify_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg)
>>       }
>>   
>>       core->mac[ICR] &= ~effective_eiac;
>> +    core->msi_causes_pending &= ~effective_eiac;
>>   
>>       if (!(core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
>>           core->mac[IMS] &= ~effective_eiac;
>> @@ -2123,6 +2124,13 @@ e1000e_send_msi(E1000ECore *core, bool msix)
>>   {
>>       uint32_t causes = core->mac[ICR] & core->mac[IMS] & ~E1000_ICR_ASSERTED;
>>   
>> +    core->msi_causes_pending &= causes;
>> +    causes ^= core->msi_causes_pending;
>> +    if (causes == 0) {
>> +        return;
>> +    }
>> +    core->msi_causes_pending |= causes;
>> +
>>       if (msix) {
>>           e1000e_msix_notify(core, causes);
>>       } else {
>> @@ -2160,6 +2168,9 @@ e1000e_update_interrupt_state(E1000ECore *core)
>>       core->mac[ICS] = core->mac[ICR];
>>   
>>       interrupts_pending = (core->mac[IMS] & core->mac[ICR]) ? true : false;
>> +    if (!interrupts_pending) {
>> +        core->msi_causes_pending = 0;
>> +    }
>>   
>>       trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS],
>>                                           core->mac[ICR], core->mac[IMS]);
>> diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
>> index 7d8ff41890..63a15510cc 100644
>> --- a/hw/net/e1000e_core.h
>> +++ b/hw/net/e1000e_core.h
>> @@ -109,6 +109,8 @@ struct E1000Core {
>>       NICState *owner_nic;
>>       PCIDevice *owner;
>>       void (*owner_start_recv)(PCIDevice *d);
>> +
>> +    uint32_t msi_causes_pending;
>>   };
>>   
>>   void
>>
> Ping again, this one is still open.
>
> Jan

Sorry for the late.

Just one thing to confirm, I think the reason that we don't need to 
migrate msi_cause_pending is that it can only lead at most one more 
signal of MSI on destination?

Thanks
Jan Kiszka July 2, 2018, 5:14 a.m. UTC | #3
On 2018-07-02 05:40, Jason Wang wrote:
> 
> 
> On 2018年06月30日 14:13, Jan Kiszka wrote:
>> On 2018-04-05 19:41, Jan Kiszka wrote:
>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>
>>> Only signal MSI/MSI-X events on rising edges. So far we re-triggered the
>>> interrupt sources even if the guest did no consumed the pending one,
>>> easily causing interrupt storms.
>>>
>>> Issue was observable with Linux 4.16 e1000e driver when MSI-X was used.
>>> Vector 2 was causing interrupt storms after the driver activated the
>>> device.
>>>
>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>> ---
>>>
>>> Changes in v2:
>>>   - also update msi_causes_pending after EIAC changes (required because
>>>     there is no e1000e_update_interrupt_state after that
>>>
>>>   hw/net/e1000e_core.c | 11 +++++++++++
>>>   hw/net/e1000e_core.h |  2 ++
>>>   2 files changed, 13 insertions(+)
>>>
>>> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
>>> index c93c4661ed..d6ddd59986 100644
>>> --- a/hw/net/e1000e_core.c
>>> +++ b/hw/net/e1000e_core.c
>>> @@ -2027,6 +2027,7 @@ e1000e_msix_notify_one(E1000ECore *core,
>>> uint32_t cause, uint32_t int_cfg)
>>>       }
>>>         core->mac[ICR] &= ~effective_eiac;
>>> +    core->msi_causes_pending &= ~effective_eiac;
>>>         if (!(core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
>>>           core->mac[IMS] &= ~effective_eiac;
>>> @@ -2123,6 +2124,13 @@ e1000e_send_msi(E1000ECore *core, bool msix)
>>>   {
>>>       uint32_t causes = core->mac[ICR] & core->mac[IMS] &
>>> ~E1000_ICR_ASSERTED;
>>>   +    core->msi_causes_pending &= causes;
>>> +    causes ^= core->msi_causes_pending;
>>> +    if (causes == 0) {
>>> +        return;
>>> +    }
>>> +    core->msi_causes_pending |= causes;
>>> +
>>>       if (msix) {
>>>           e1000e_msix_notify(core, causes);
>>>       } else {
>>> @@ -2160,6 +2168,9 @@ e1000e_update_interrupt_state(E1000ECore *core)
>>>       core->mac[ICS] = core->mac[ICR];
>>>         interrupts_pending = (core->mac[IMS] & core->mac[ICR]) ? true
>>> : false;
>>> +    if (!interrupts_pending) {
>>> +        core->msi_causes_pending = 0;
>>> +    }
>>>         trace_e1000e_irq_pending_interrupts(core->mac[ICR] &
>>> core->mac[IMS],
>>>                                           core->mac[ICR],
>>> core->mac[IMS]);
>>> diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
>>> index 7d8ff41890..63a15510cc 100644
>>> --- a/hw/net/e1000e_core.h
>>> +++ b/hw/net/e1000e_core.h
>>> @@ -109,6 +109,8 @@ struct E1000Core {
>>>       NICState *owner_nic;
>>>       PCIDevice *owner;
>>>       void (*owner_start_recv)(PCIDevice *d);
>>> +
>>> +    uint32_t msi_causes_pending;
>>>   };
>>>     void
>>>
>> Ping again, this one is still open.
>>
>> Jan
> 
> Sorry for the late.
> 
> Just one thing to confirm, I think the reason that we don't need to
> migrate msi_cause_pending is that it can only lead at most one more
> signal of MSI on destination?

Right, a cleared msi_causes_pending on the destination may lead to one
spurious interrupt injects per vector. That should be tolerable.

Jan
Jason Wang July 3, 2018, 4:01 a.m. UTC | #4
On 2018年07月02日 13:14, Jan Kiszka wrote:
> On 2018-07-02 05:40, Jason Wang wrote:
>>
>> On 2018年06月30日 14:13, Jan Kiszka wrote:
>>> On 2018-04-05 19:41, Jan Kiszka wrote:
>>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>>
>>>> Only signal MSI/MSI-X events on rising edges. So far we re-triggered the
>>>> interrupt sources even if the guest did no consumed the pending one,
>>>> easily causing interrupt storms.
>>>>
>>>> Issue was observable with Linux 4.16 e1000e driver when MSI-X was used.
>>>> Vector 2 was causing interrupt storms after the driver activated the
>>>> device.
>>>>
>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>> ---
>>>>
>>>> Changes in v2:
>>>>    - also update msi_causes_pending after EIAC changes (required because
>>>>      there is no e1000e_update_interrupt_state after that
>>>>
>>>>    hw/net/e1000e_core.c | 11 +++++++++++
>>>>    hw/net/e1000e_core.h |  2 ++
>>>>    2 files changed, 13 insertions(+)
>>>>
>>>> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
>>>> index c93c4661ed..d6ddd59986 100644
>>>> --- a/hw/net/e1000e_core.c
>>>> +++ b/hw/net/e1000e_core.c
>>>> @@ -2027,6 +2027,7 @@ e1000e_msix_notify_one(E1000ECore *core,
>>>> uint32_t cause, uint32_t int_cfg)
>>>>        }
>>>>          core->mac[ICR] &= ~effective_eiac;
>>>> +    core->msi_causes_pending &= ~effective_eiac;
>>>>          if (!(core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
>>>>            core->mac[IMS] &= ~effective_eiac;
>>>> @@ -2123,6 +2124,13 @@ e1000e_send_msi(E1000ECore *core, bool msix)
>>>>    {
>>>>        uint32_t causes = core->mac[ICR] & core->mac[IMS] &
>>>> ~E1000_ICR_ASSERTED;
>>>>    +    core->msi_causes_pending &= causes;
>>>> +    causes ^= core->msi_causes_pending;
>>>> +    if (causes == 0) {
>>>> +        return;
>>>> +    }
>>>> +    core->msi_causes_pending |= causes;
>>>> +
>>>>        if (msix) {
>>>>            e1000e_msix_notify(core, causes);
>>>>        } else {
>>>> @@ -2160,6 +2168,9 @@ e1000e_update_interrupt_state(E1000ECore *core)
>>>>        core->mac[ICS] = core->mac[ICR];
>>>>          interrupts_pending = (core->mac[IMS] & core->mac[ICR]) ? true
>>>> : false;
>>>> +    if (!interrupts_pending) {
>>>> +        core->msi_causes_pending = 0;
>>>> +    }
>>>>          trace_e1000e_irq_pending_interrupts(core->mac[ICR] &
>>>> core->mac[IMS],
>>>>                                            core->mac[ICR],
>>>> core->mac[IMS]);
>>>> diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
>>>> index 7d8ff41890..63a15510cc 100644
>>>> --- a/hw/net/e1000e_core.h
>>>> +++ b/hw/net/e1000e_core.h
>>>> @@ -109,6 +109,8 @@ struct E1000Core {
>>>>        NICState *owner_nic;
>>>>        PCIDevice *owner;
>>>>        void (*owner_start_recv)(PCIDevice *d);
>>>> +
>>>> +    uint32_t msi_causes_pending;
>>>>    };
>>>>      void
>>>>
>>> Ping again, this one is still open.
>>>
>>> Jan
>> Sorry for the late.
>>
>> Just one thing to confirm, I think the reason that we don't need to
>> migrate msi_cause_pending is that it can only lead at most one more
>> signal of MSI on destination?
> Right, a cleared msi_causes_pending on the destination may lead to one
> spurious interrupt injects per vector. That should be tolerable.
>
> Jan
>

Applied.

Thanks
diff mbox series

Patch

diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index c93c4661ed..d6ddd59986 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -2027,6 +2027,7 @@  e1000e_msix_notify_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg)
     }
 
     core->mac[ICR] &= ~effective_eiac;
+    core->msi_causes_pending &= ~effective_eiac;
 
     if (!(core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
         core->mac[IMS] &= ~effective_eiac;
@@ -2123,6 +2124,13 @@  e1000e_send_msi(E1000ECore *core, bool msix)
 {
     uint32_t causes = core->mac[ICR] & core->mac[IMS] & ~E1000_ICR_ASSERTED;
 
+    core->msi_causes_pending &= causes;
+    causes ^= core->msi_causes_pending;
+    if (causes == 0) {
+        return;
+    }
+    core->msi_causes_pending |= causes;
+
     if (msix) {
         e1000e_msix_notify(core, causes);
     } else {
@@ -2160,6 +2168,9 @@  e1000e_update_interrupt_state(E1000ECore *core)
     core->mac[ICS] = core->mac[ICR];
 
     interrupts_pending = (core->mac[IMS] & core->mac[ICR]) ? true : false;
+    if (!interrupts_pending) {
+        core->msi_causes_pending = 0;
+    }
 
     trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS],
                                         core->mac[ICR], core->mac[IMS]);
diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
index 7d8ff41890..63a15510cc 100644
--- a/hw/net/e1000e_core.h
+++ b/hw/net/e1000e_core.h
@@ -109,6 +109,8 @@  struct E1000Core {
     NICState *owner_nic;
     PCIDevice *owner;
     void (*owner_start_recv)(PCIDevice *d);
+
+    uint32_t msi_causes_pending;
 };
 
 void