[Qemu-devel] [PATCH v2] ioapic: allow buggy guests mishandling level-triggered interrupts to make progress

Vitaly Kuznetsov posted 1 patch 5 years ago
Test docker-mingw@fedora passed
Test docker-clang@ubuntu passed
Test checkpatch passed
Test asan passed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20190402080215.10747-1-vkuznets@redhat.com
Maintainers: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, "Michael S. Tsirkin" <mst@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>
hw/intc/ioapic.c                  | 57 ++++++++++++++++++++++++++++---
hw/intc/trace-events              |  1 +
include/hw/i386/ioapic_internal.h |  3 ++
3 files changed, 56 insertions(+), 5 deletions(-)
[Qemu-devel] [PATCH v2] ioapic: allow buggy guests mishandling level-triggered interrupts to make progress
Posted by Vitaly Kuznetsov 5 years ago
It was found that Hyper-V 2016 on KVM in some configurations (q35 machine +
piix4-usb-uhci) hangs on boot. Root-cause was that one of Hyper-V
level-triggered interrupt handler performs EOI before fixing the cause of
the interrupt. This results in IOAPIC keep re-raising the level-triggered
interrupt after EOI because irq-line remains asserted.

Gory details: https://www.spinics.net/lists/kvm/msg184484.html
(the whole thread).

Turns out we were dealing with similar issues before; in-kernel IOAPIC
implementation has commit 184564efae4d ("kvm: ioapic: conditionally delay
irq delivery duringeoi broadcast") which describes a very similar issue.

Steal the idea from the above mentioned commit for IOAPIC implementation in
QEMU. SUCCESSIVE_IRQ_MAX_COUNT, delay and the comment are borrowed as well.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
Changes since v1:
- timer_mod() -> timer_mod_anticipate() [Paolo Bonzini]
- Massaged changelog [Liran Alon]
- Make implementation look like in-kernel one [Liran Alon]
---
 hw/intc/ioapic.c                  | 57 ++++++++++++++++++++++++++++---
 hw/intc/trace-events              |  1 +
 include/hw/i386/ioapic_internal.h |  3 ++
 3 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c
index 9d75f84d3b..9fb8dd3450 100644
--- a/hw/intc/ioapic.c
+++ b/hw/intc/ioapic.c
@@ -139,6 +139,15 @@ static void ioapic_service(IOAPICCommonState *s)
     }
 }
 
+#define SUCCESSIVE_IRQ_MAX_COUNT 10000
+
+static void ioapic_timer(void *opaque)
+{
+    IOAPICCommonState *s = opaque;
+
+    ioapic_service(s);
+}
+
 static void ioapic_set_irq(void *opaque, int vector, int level)
 {
     IOAPICCommonState *s = opaque;
@@ -222,13 +231,40 @@ void ioapic_eoi_broadcast(int vector)
         }
         for (n = 0; n < IOAPIC_NUM_PINS; n++) {
             entry = s->ioredtbl[n];
-            if ((entry & IOAPIC_LVT_REMOTE_IRR)
-                && (entry & IOAPIC_VECTOR_MASK) == vector) {
-                trace_ioapic_clear_remote_irr(n, vector);
-                s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
-                if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) {
+
+            if (((entry & IOAPIC_VECTOR_MASK) != vector) ||
+                !(entry & IOAPIC_LVT_REMOTE_IRR)) {
+                continue;
+            }
+
+            trace_ioapic_clear_remote_irr(n, vector);
+            s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
+
+            if (((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) !=
+                IOAPIC_TRIGGER_LEVEL) {
+                continue;
+            }
+
+            if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) {
+                ++s->irq_eoi[vector];
+                if (s->irq_eoi[vector] >= SUCCESSIVE_IRQ_MAX_COUNT) {
+                    /*
+                     * Real hardware does not deliver the interrupt immediately
+                     * during eoi broadcast, and this lets a buggy guest make
+                     * slow progress even if it does not correctly handle a
+                     * level-triggered interrupt. Emulate this behavior if we
+                     * detect an interrupt storm.
+                     */
+                    s->irq_eoi[vector] = 0;
+                    timer_mod_anticipate(s->timer,
+                                         qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+                                         NANOSECONDS_PER_SECOND / 100);
+                    trace_ioapic_eoi_delayed_reassert(vector);
+                } else {
                     ioapic_service(s);
                 }
+            } else {
+                s->irq_eoi[vector] = 0;
             }
         }
     }
@@ -401,6 +437,8 @@ static void ioapic_realize(DeviceState *dev, Error **errp)
     memory_region_init_io(&s->io_memory, OBJECT(s), &ioapic_io_ops, s,
                           "ioapic", 0x1000);
 
+    s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ioapic_timer, s);
+
     qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS);
 
     ioapics[ioapic_no] = s;
@@ -408,6 +446,14 @@ static void ioapic_realize(DeviceState *dev, Error **errp)
     qemu_add_machine_init_done_notifier(&s->machine_done);
 }
 
+static void ioapic_unrealize(DeviceState *dev, Error **errp)
+{
+    IOAPICCommonState *s = IOAPIC_COMMON(dev);
+
+    timer_del(s->timer);
+    timer_free(s->timer);
+}
+
 static Property ioapic_properties[] = {
     DEFINE_PROP_UINT8("version", IOAPICCommonState, version, IOAPIC_VER_DEF),
     DEFINE_PROP_END_OF_LIST(),
@@ -419,6 +465,7 @@ static void ioapic_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     k->realize = ioapic_realize;
+    k->unrealize = ioapic_unrealize;
     /*
      * If APIC is in kernel, we need to update the kernel cache after
      * migration, otherwise first 24 gsi routes will be invalid.
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index a28bdce925..90c9d07c1a 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -25,6 +25,7 @@ apic_mem_writel(uint64_t addr, uint32_t val) "0x%"PRIx64" = 0x%08x"
 ioapic_set_remote_irr(int n) "set remote irr for pin %d"
 ioapic_clear_remote_irr(int n, int vector) "clear remote irr for pin %d vector %d"
 ioapic_eoi_broadcast(int vector) "EOI broadcast for vector %d"
+ioapic_eoi_delayed_reassert(int vector) "delayed reassert on EOI broadcast for vector %d"
 ioapic_mem_read(uint8_t addr, uint8_t regsel, uint8_t size, uint32_t val) "ioapic mem read addr 0x%"PRIx8" regsel: 0x%"PRIx8" size 0x%"PRIx8" retval 0x%"PRIx32
 ioapic_mem_write(uint8_t addr, uint8_t regsel, uint8_t size, uint32_t val) "ioapic mem write addr 0x%"PRIx8" regsel: 0x%"PRIx8" size 0x%"PRIx8" val 0x%"PRIx32
 ioapic_set_irq(int vector, int level) "vector: %d level: %d"
diff --git a/include/hw/i386/ioapic_internal.h b/include/hw/i386/ioapic_internal.h
index 9848f391bb..70f9fc750a 100644
--- a/include/hw/i386/ioapic_internal.h
+++ b/include/hw/i386/ioapic_internal.h
@@ -96,6 +96,7 @@ typedef struct IOAPICCommonClass {
     SysBusDeviceClass parent_class;
 
     DeviceRealize realize;
+    DeviceUnrealize unrealize;
     void (*pre_save)(IOAPICCommonState *s);
     void (*post_load)(IOAPICCommonState *s);
 } IOAPICCommonClass;
@@ -111,6 +112,8 @@ struct IOAPICCommonState {
     uint8_t version;
     uint64_t irq_count[IOAPIC_NUM_PINS];
     int irq_level[IOAPIC_NUM_PINS];
+    int irq_eoi[IOAPIC_NUM_PINS];
+    QEMUTimer *timer;
 };
 
 void ioapic_reset_common(DeviceState *dev);
-- 
2.20.1


Re: [Qemu-devel] [PATCH v2] ioapic: allow buggy guests mishandling level-triggered interrupts to make progress
Posted by Liran Alon 5 years ago

> On 2 Apr 2019, at 11:02, Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
> 
> It was found that Hyper-V 2016 on KVM in some configurations (q35 machine +
> piix4-usb-uhci) hangs on boot. Root-cause was that one of Hyper-V
> level-triggered interrupt handler performs EOI before fixing the cause of
> the interrupt. This results in IOAPIC keep re-raising the level-triggered
> interrupt after EOI because irq-line remains asserted.
> 
> Gory details: https://urldefense.proofpoint.com/v2/url?u=https-3A__www.spinics.net_lists_kvm_msg184484.html&d=DwIDAg&c=RoP1YumCXCgaWHvlZYR8PZh8Bv7qIrMUB65eapI_JnE&r=Jk6Q8nNzkQ6LJ6g42qARkg6ryIDGQr-yKXPNGZbpTx0&m=GChegdKds--tabGxr5GuYEYOixhXJSy1NdM1z1HllzY&s=1eaLCEy8KZxs9hcxJGI2MMfmsL0jIRhem8QAeeBgehM&e=
> (the whole thread).
> 
> Turns out we were dealing with similar issues before; in-kernel IOAPIC
> implementation has commit 184564efae4d ("kvm: ioapic: conditionally delay
> irq delivery duringeoi broadcast") which describes a very similar issue.
> 
> Steal the idea from the above mentioned commit for IOAPIC implementation in
> QEMU. SUCCESSIVE_IRQ_MAX_COUNT, delay and the comment are borrowed as well.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>

I have only small minor refactoring comments inline.
Reviewed-by: Liran Alon <liran.alon@oracle.com>

-Liran

> ---
> Changes since v1:
> - timer_mod() -> timer_mod_anticipate() [Paolo Bonzini]
> - Massaged changelog [Liran Alon]
> - Make implementation look like in-kernel one [Liran Alon]
> ---
> hw/intc/ioapic.c                  | 57 ++++++++++++++++++++++++++++---
> hw/intc/trace-events              |  1 +
> include/hw/i386/ioapic_internal.h |  3 ++
> 3 files changed, 56 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c
> index 9d75f84d3b..9fb8dd3450 100644
> --- a/hw/intc/ioapic.c
> +++ b/hw/intc/ioapic.c
> @@ -139,6 +139,15 @@ static void ioapic_service(IOAPICCommonState *s)
>     }
> }
> 
> +#define SUCCESSIVE_IRQ_MAX_COUNT 10000
> +
> +static void ioapic_timer(void *opaque)

I suggest rename method to something such as “delayed_ioapic_service_timer_handler()”.

> +{
> +    IOAPICCommonState *s = opaque;
> +
> +    ioapic_service(s);
> +}
> +
> static void ioapic_set_irq(void *opaque, int vector, int level)
> {
>     IOAPICCommonState *s = opaque;
> @@ -222,13 +231,40 @@ void ioapic_eoi_broadcast(int vector)
>         }
>         for (n = 0; n < IOAPIC_NUM_PINS; n++) {
>             entry = s->ioredtbl[n];
> -            if ((entry & IOAPIC_LVT_REMOTE_IRR)
> -                && (entry & IOAPIC_VECTOR_MASK) == vector) {
> -                trace_ioapic_clear_remote_irr(n, vector);
> -                s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
> -                if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) {
> +
> +            if (((entry & IOAPIC_VECTOR_MASK) != vector) ||
> +                !(entry & IOAPIC_LVT_REMOTE_IRR)) {
> +                continue;
> +            }
> +
> +            trace_ioapic_clear_remote_irr(n, vector);
> +            s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;

remote-irr is only set for level-triggered interrupt.
Thus, I think in the “if” above you should “continue;” in case trigger-mode != IOAPIC_TRIGGER_LEVEL.
i.e. Replace condition "!(entry & IOAPIC_LVT_REMOTE_IRR)” with “trigger-mode != IOAPIC_TRIGGER_LEVEL”.
Then you can also remove the “if” below.

> +
> +            if (((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) !=
> +                IOAPIC_TRIGGER_LEVEL) {
> +                continue;
> +            }
> +
> +            if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) {
> +                ++s->irq_eoi[vector];
> +                if (s->irq_eoi[vector] >= SUCCESSIVE_IRQ_MAX_COUNT) {

This can just be “==“ as you should never reach beyond SUCCESSIVE_IRQ_MAX_COUNT.
But you can remain with “>=“ if you want to be on the safe side?

> +                    /*
> +                     * Real hardware does not deliver the interrupt immediately
> +                     * during eoi broadcast, and this lets a buggy guest make
> +                     * slow progress even if it does not correctly handle a
> +                     * level-triggered interrupt. Emulate this behavior if we
> +                     * detect an interrupt storm.
> +                     */
> +                    s->irq_eoi[vector] = 0;
> +                    timer_mod_anticipate(s->timer,
> +                                         qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
> +                                         NANOSECONDS_PER_SECOND / 100);
> +                    trace_ioapic_eoi_delayed_reassert(vector);
> +                } else {
>                     ioapic_service(s);
>                 }
> +            } else {
> +                s->irq_eoi[vector] = 0;
>             }
>         }
>     }
> @@ -401,6 +437,8 @@ static void ioapic_realize(DeviceState *dev, Error **errp)
>     memory_region_init_io(&s->io_memory, OBJECT(s), &ioapic_io_ops, s,
>                           "ioapic", 0x1000);
> 
> +    s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ioapic_timer, s);
> +
>     qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS);
> 
>     ioapics[ioapic_no] = s;
> @@ -408,6 +446,14 @@ static void ioapic_realize(DeviceState *dev, Error **errp)
>     qemu_add_machine_init_done_notifier(&s->machine_done);
> }
> 
> +static void ioapic_unrealize(DeviceState *dev, Error **errp)
> +{
> +    IOAPICCommonState *s = IOAPIC_COMMON(dev);
> +
> +    timer_del(s->timer);
> +    timer_free(s->timer);
> +}
> +
> static Property ioapic_properties[] = {
>     DEFINE_PROP_UINT8("version", IOAPICCommonState, version, IOAPIC_VER_DEF),
>     DEFINE_PROP_END_OF_LIST(),
> @@ -419,6 +465,7 @@ static void ioapic_class_init(ObjectClass *klass, void *data)
>     DeviceClass *dc = DEVICE_CLASS(klass);
> 
>     k->realize = ioapic_realize;
> +    k->unrealize = ioapic_unrealize;
>     /*
>      * If APIC is in kernel, we need to update the kernel cache after
>      * migration, otherwise first 24 gsi routes will be invalid.
> diff --git a/hw/intc/trace-events b/hw/intc/trace-events
> index a28bdce925..90c9d07c1a 100644
> --- a/hw/intc/trace-events
> +++ b/hw/intc/trace-events
> @@ -25,6 +25,7 @@ apic_mem_writel(uint64_t addr, uint32_t val) "0x%"PRIx64" = 0x%08x"
> ioapic_set_remote_irr(int n) "set remote irr for pin %d"
> ioapic_clear_remote_irr(int n, int vector) "clear remote irr for pin %d vector %d"
> ioapic_eoi_broadcast(int vector) "EOI broadcast for vector %d"
> +ioapic_eoi_delayed_reassert(int vector) "delayed reassert on EOI broadcast for vector %d"
> ioapic_mem_read(uint8_t addr, uint8_t regsel, uint8_t size, uint32_t val) "ioapic mem read addr 0x%"PRIx8" regsel: 0x%"PRIx8" size 0x%"PRIx8" retval 0x%"PRIx32
> ioapic_mem_write(uint8_t addr, uint8_t regsel, uint8_t size, uint32_t val) "ioapic mem write addr 0x%"PRIx8" regsel: 0x%"PRIx8" size 0x%"PRIx8" val 0x%"PRIx32
> ioapic_set_irq(int vector, int level) "vector: %d level: %d"
> diff --git a/include/hw/i386/ioapic_internal.h b/include/hw/i386/ioapic_internal.h
> index 9848f391bb..70f9fc750a 100644
> --- a/include/hw/i386/ioapic_internal.h
> +++ b/include/hw/i386/ioapic_internal.h
> @@ -96,6 +96,7 @@ typedef struct IOAPICCommonClass {
>     SysBusDeviceClass parent_class;
> 
>     DeviceRealize realize;
> +    DeviceUnrealize unrealize;
>     void (*pre_save)(IOAPICCommonState *s);
>     void (*post_load)(IOAPICCommonState *s);
> } IOAPICCommonClass;
> @@ -111,6 +112,8 @@ struct IOAPICCommonState {
>     uint8_t version;
>     uint64_t irq_count[IOAPIC_NUM_PINS];
>     int irq_level[IOAPIC_NUM_PINS];
> +    int irq_eoi[IOAPIC_NUM_PINS];
> +    QEMUTimer *timer;

I suggest rename “timer” to something more indicative as one can confuse it with a generic IOAPIC timer.
Something such as “delayed_ioapic_service_timer”.

> };
> 
> void ioapic_reset_common(DeviceState *dev);
> -- 
> 2.20.1
> 


Re: [Qemu-devel] [PATCH v2] ioapic: allow buggy guests mishandling level-triggered interrupts to make progress
Posted by Paolo Bonzini 5 years ago
On 02/04/19 10:23, Liran Alon wrote:
>> +#define SUCCESSIVE_IRQ_MAX_COUNT 10000
>> +
>> +static void ioapic_timer(void *opaque)
> I suggest rename method to something such as “delayed_ioapic_service_timer_handler()”.
> 

I renamed them to delayed_ioapic_service_{timer,cb} and queued the patch.

>>
>> +            if (((entry & IOAPIC_VECTOR_MASK) != vector) ||
>> +                !(entry & IOAPIC_LVT_REMOTE_IRR)) {
>> +                continue;
>> +            }
>> +
>> +            trace_ioapic_clear_remote_irr(n, vector);
>> +            s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
> 
> remote-irr is only set for level-triggered interrupt.
> Thus, I think in the “if” above you should “continue;” in case trigger-mode != IOAPIC_TRIGGER_LEVEL.
> i.e. Replace condition "!(entry & IOAPIC_LVT_REMOTE_IRR)” with “trigger-mode != IOAPIC_TRIGGER_LEVEL”.
> Then you can also remove the “if” below.

Like this?

@@ -232,19 +232,18 @@ void ioapic_eoi_broadcast(int vector)
         for (n = 0; n < IOAPIC_NUM_PINS; n++) {
             entry = s->ioredtbl[n];
 
-            if (((entry & IOAPIC_VECTOR_MASK) != vector) ||
-                !(entry & IOAPIC_LVT_REMOTE_IRR)) {
+            if ((entry & IOAPIC_VECTOR_MASK) != vector ||
+                ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) != IOAPIC_TRIGGER_LEVEL) {
                 continue;
             }
 
-            trace_ioapic_clear_remote_irr(n, vector);
-            s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
-
-            if (((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) !=
-                IOAPIC_TRIGGER_LEVEL) {
+            if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
                 continue;
             }
 
+            trace_ioapic_clear_remote_irr(n, vector);
+            s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
+
             if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) {
                 ++s->irq_eoi[vector];
                 if (s->irq_eoi[vector] >= SUCCESSIVE_IRQ_MAX_COUNT) {


Paolo

Re: [Qemu-devel] [PATCH v2] ioapic: allow buggy guests mishandling level-triggered interrupts to make progress
Posted by Liran Alon 5 years ago

> On 2 Apr 2019, at 12:06, Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
> On 02/04/19 10:23, Liran Alon wrote:
>>> +#define SUCCESSIVE_IRQ_MAX_COUNT 10000
>>> +
>>> +static void ioapic_timer(void *opaque)
>> I suggest rename method to something such as “delayed_ioapic_service_timer_handler()”.
>> 
> 
> I renamed them to delayed_ioapic_service_{timer,cb} and queued the patch.

Thanks.

> 
>>> 
>>> +            if (((entry & IOAPIC_VECTOR_MASK) != vector) ||
>>> +                !(entry & IOAPIC_LVT_REMOTE_IRR)) {
>>> +                continue;
>>> +            }
>>> +
>>> +            trace_ioapic_clear_remote_irr(n, vector);
>>> +            s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
>> 
>> remote-irr is only set for level-triggered interrupt.
>> Thus, I think in the “if” above you should “continue;” in case trigger-mode != IOAPIC_TRIGGER_LEVEL.
>> i.e. Replace condition "!(entry & IOAPIC_LVT_REMOTE_IRR)” with “trigger-mode != IOAPIC_TRIGGER_LEVEL”.
>> Then you can also remove the “if” below.
> 
> Like this?
> 
> @@ -232,19 +232,18 @@ void ioapic_eoi_broadcast(int vector)
>         for (n = 0; n < IOAPIC_NUM_PINS; n++) {
>             entry = s->ioredtbl[n];
> 
> -            if (((entry & IOAPIC_VECTOR_MASK) != vector) ||
> -                !(entry & IOAPIC_LVT_REMOTE_IRR)) {
> +            if ((entry & IOAPIC_VECTOR_MASK) != vector ||
> +                ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) != IOAPIC_TRIGGER_LEVEL) {
>                 continue;
>             }
> 
> -            trace_ioapic_clear_remote_irr(n, vector);
> -            s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
> -
> -            if (((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) !=
> -                IOAPIC_TRIGGER_LEVEL) {
> +            if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
>                 continue;
>             }

I think above “if” of checking remote-irr should just be removed.
But the rest seems good :)

> 
> +            trace_ioapic_clear_remote_irr(n, vector);
> +            s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
> +
>             if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) {
>                 ++s->irq_eoi[vector];
>                 if (s->irq_eoi[vector] >= SUCCESSIVE_IRQ_MAX_COUNT) {
> 
> 
> Paolo


Re: [Qemu-devel] [PATCH v2] ioapic: allow buggy guests mishandling level-triggered interrupts to make progress
Posted by Paolo Bonzini 5 years ago
On 02/04/19 11:08, Liran Alon wrote:
>> -
>> -            if (((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) !=
>> -                IOAPIC_TRIGGER_LEVEL) {
>> +            if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
>>                 continue;
>>             }
> I think above “if” of checking remote-irr should just be removed.
> But the rest seems good :)
> 

It seems more logical, as the condition is now the opposite of
ioapic_set_irq: ioapic_set_irq services the interrupt if remote-irr = 0,
EOI does it if remote-irr = 1.

Paolo

Re: [Qemu-devel] [PATCH v2] ioapic: allow buggy guests mishandling level-triggered interrupts to make progress
Posted by Liran Alon 5 years ago

> On 2 Apr 2019, at 13:20, Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
> On 02/04/19 11:08, Liran Alon wrote:
>>> -
>>> -            if (((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) !=
>>> -                IOAPIC_TRIGGER_LEVEL) {
>>> +            if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
>>>                continue;
>>>            }
>> I think above “if” of checking remote-irr should just be removed.
>> But the rest seems good :)
>> 
> 
> It seems more logical, as the condition is now the opposite of
> ioapic_set_irq: ioapic_set_irq services the interrupt if remote-irr = 0,
> EOI does it if remote-irr = 1.
> 
> Paolo

At this point at ioapic_eoi_broadcast(), you already know you got an EOI for a level-triggered interrupt.
Therefore, the remote-irr must be already set to 1. Otherwise, this is a bug. You can assert on this if you wish.
(Note that remote-irr is a read-only property that cannot be overwritten by guest writing to IOAPIC redirection-table)

-Liran