Interrupt throttling is broken in several ways:
- Timer expiry sends an interrupt even if there is no cause.
- (e1000e) Mitigated interrupts still auto-clear cause bits.
- Timer expiry that results in an interrupt does not re-arm the timer so
an interrupt can appear immediately after the timer expiry interrupt.
To fix:
- When the throttle timer expires, check the cause bits corresponding to
the msix vector before sending an irq.
- (e1000e) Skip the auto-clear logic if an interrupt is delayed, and
send delayed irqs using e1000e_msix_notify() to perform auto-clear.
- Re-load the throttle timer when a delayed interrupt is signaled. e1000e
gets this by signaling them with e1000e_msix_notify(), igb calls
igb_intrmgr_rearm_timer() directly.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
hw/net/e1000e_core.c | 59 +++++++++++++++++++++++++++++++++++++++-----
hw/net/igb_core.c | 50 ++++++++++++++++++++++++-------------
2 files changed, 86 insertions(+), 23 deletions(-)
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index f8e6522f810..6fb8da32e4d 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -178,16 +178,62 @@ e1000e_intrmgr_on_throttling_timer(void *opaque)
}
}
+static uint32_t find_msix_causes(E1000ECore *core, int vec)
+{
+ uint32_t causes = 0;
+ uint32_t int_cfg;
+
+ int_cfg = E1000_IVAR_RXQ0(core->mac[IVAR]);
+ if (E1000_IVAR_ENTRY_VALID(int_cfg) &&
+ E1000_IVAR_ENTRY_VEC(int_cfg) == vec) {
+ causes |= E1000_ICR_RXQ0;
+ }
+
+ int_cfg = E1000_IVAR_RXQ1(core->mac[IVAR]);
+ if (E1000_IVAR_ENTRY_VALID(int_cfg) &&
+ E1000_IVAR_ENTRY_VEC(int_cfg) == vec) {
+ causes |= E1000_ICR_RXQ1;
+ }
+
+ int_cfg = E1000_IVAR_TXQ0(core->mac[IVAR]);
+ if (E1000_IVAR_ENTRY_VALID(int_cfg) &&
+ E1000_IVAR_ENTRY_VEC(int_cfg) == vec) {
+ causes |= E1000_ICR_TXQ0;
+ }
+
+ int_cfg = E1000_IVAR_TXQ1(core->mac[IVAR]);
+ if (E1000_IVAR_ENTRY_VALID(int_cfg) &&
+ E1000_IVAR_ENTRY_VEC(int_cfg) == vec) {
+ causes |= E1000_ICR_TXQ1;
+ }
+
+ int_cfg = E1000_IVAR_OTHER(core->mac[IVAR]);
+ if (E1000_IVAR_ENTRY_VALID(int_cfg) &&
+ E1000_IVAR_ENTRY_VEC(int_cfg) == vec) {
+ causes |= E1000_ICR_OTHER;
+ }
+
+ return causes;
+}
+
+static void
+e1000e_msix_notify(E1000ECore *core, uint32_t causes);
+
static void
e1000e_intrmgr_on_msix_throttling_timer(void *opaque)
{
E1000IntrDelayTimer *timer = opaque;
- int idx = timer - &timer->core->eitr[0];
+ E1000ECore *core = timer->core;
+ int idx = timer - &core->eitr[0];
+ uint32_t causes;
timer->running = false;
- trace_e1000e_irq_msix_notify_postponed_vec(idx);
- msix_notify(timer->core->owner, idx);
+ causes = find_msix_causes(core, idx) & core->mac[IMS] & core->mac[ICR];
+ if (causes) {
+ trace_e1000e_irq_msix_notify_postponed_vec(idx);
+ e1000e_msix_notify(core, causes);
+ }
}
static void
@@ -1992,10 +2038,11 @@ e1000e_msix_notify_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg)
if (E1000_IVAR_ENTRY_VALID(int_cfg)) {
uint32_t vec = E1000_IVAR_ENTRY_VEC(int_cfg);
if (vec < E1000E_MSIX_VEC_NUM) {
- if (!e1000e_eitr_should_postpone(core, vec)) {
- trace_e1000e_irq_msix_notify_vec(vec);
- msix_notify(core->owner, vec);
+ if (e1000e_eitr_should_postpone(core, vec)) {
+ return;
}
+ trace_e1000e_irq_msix_notify_vec(vec);
+ msix_notify(core->owner, vec);
} else {
trace_e1000e_wrn_msix_vec_wrong(cause, int_cfg);
}
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 3ae3e53530b..cc25a1d5baa 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -152,11 +152,14 @@ igb_intrmgr_arm_timer(IGBIntrDelayTimer *timer, int64_t delay_ns)
static inline void
igb_intrmgr_rearm_timer(IGBIntrDelayTimer *timer)
{
- uint32_t interval = (timer->core->mac[timer->delay_reg] &
- E1000_EITR_INTERVAL) >> 2;
- int64_t delay_ns = (int64_t)interval * timer->delay_resolution_ns;
+ uint32_t eitr = timer->core->mac[timer->delay_reg];
- igb_intrmgr_arm_timer(timer, delay_ns);
+ if (eitr != 0) {
+ uint32_t interval = (eitr & E1000_EITR_INTERVAL) >> 2;
+ int64_t delay_ns = (int64_t)interval * timer->delay_resolution_ns;
+
+ igb_intrmgr_arm_timer(timer, delay_ns);
+ }
}
static void
@@ -168,16 +171,7 @@ igb_intmgr_timer_resume(IGBIntrDelayTimer *timer)
}
static void
-igb_intrmgr_on_msix_throttling_timer(void *opaque)
-{
- IGBIntrDelayTimer *timer = opaque;
- int idx = timer - &timer->core->eitr[0];
-
- timer->running = false;
-
- trace_e1000e_irq_msix_notify_postponed_vec(idx);
- igb_msix_notify(timer->core, idx);
-}
+igb_intrmgr_on_msix_throttling_timer(void *opaque);
static void
igb_intrmgr_initialize_all_timers(IGBCore *core, bool create)
@@ -2253,9 +2247,7 @@ igb_postpone_interrupt(IGBIntrDelayTimer *timer)
return true;
}
- if (timer->core->mac[timer->delay_reg] != 0) {
- igb_intrmgr_rearm_timer(timer);
- }
+ igb_intrmgr_rearm_timer(timer);
return false;
}
@@ -2279,6 +2271,30 @@ static void igb_send_msix(IGBCore *core, uint32_t causes)
}
}
+static void
+igb_intrmgr_on_msix_throttling_timer(void *opaque)
+{
+ IGBIntrDelayTimer *timer = opaque;
+ IGBCore *core = timer->core;
+ int vector = timer - &core->eitr[0];
+ uint32_t causes;
+
+ timer->running = false;
+
+ causes = core->mac[EICR] & core->mac[EIMS];
+ if (causes & BIT(vector)) {
+ /*
+ * The moderation counter is loaded with interval value whenever the
+ * interrupt is signaled. This includes when the interrupt is signaled
+ * by the counter reaching 0.
+ */
+ igb_intrmgr_rearm_timer(timer);
+
+ trace_e1000e_irq_msix_notify_postponed_vec(vector);
+ igb_msix_notify(core, vector);
+ }
+}
+
static inline void
igb_fix_icr_asserted(IGBCore *core)
{
--
2.47.1
On 2025/04/11 13:31, Nicholas Piggin wrote:
> Interrupt throttling is broken in several ways:
> - Timer expiry sends an interrupt even if there is no cause.
> - (e1000e) Mitigated interrupts still auto-clear cause bits.
> - Timer expiry that results in an interrupt does not re-arm the timer so
> an interrupt can appear immediately after the timer expiry interrupt.
>
> To fix:
>
> - When the throttle timer expires, check the cause bits corresponding to
> the msix vector before sending an irq.
> - (e1000e) Skip the auto-clear logic if an interrupt is delayed, and
> send delayed irqs using e1000e_msix_notify() to perform auto-clear.
> - Re-load the throttle timer when a delayed interrupt is signaled. e1000e
> gets this by signaling them with e1000e_msix_notify(), igb calls
> igb_intrmgr_rearm_timer() directly.
Please split this patch into independent changes.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> hw/net/e1000e_core.c | 59 +++++++++++++++++++++++++++++++++++++++-----
> hw/net/igb_core.c | 50 ++++++++++++++++++++++++-------------
> 2 files changed, 86 insertions(+), 23 deletions(-)
>
> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
> index f8e6522f810..6fb8da32e4d 100644
> --- a/hw/net/e1000e_core.c
> +++ b/hw/net/e1000e_core.c
> @@ -178,16 +178,62 @@ e1000e_intrmgr_on_throttling_timer(void *opaque)
> }
> }
>
> +static uint32_t find_msix_causes(E1000ECore *core, int vec)
> +{
> + uint32_t causes = 0;
> + uint32_t int_cfg;
> +
> + int_cfg = E1000_IVAR_RXQ0(core->mac[IVAR]);
> + if (E1000_IVAR_ENTRY_VALID(int_cfg) &&
> + E1000_IVAR_ENTRY_VEC(int_cfg) == vec) {
> + causes |= E1000_ICR_RXQ0;
> + }
> +
> + int_cfg = E1000_IVAR_RXQ1(core->mac[IVAR]);
> + if (E1000_IVAR_ENTRY_VALID(int_cfg) &&
> + E1000_IVAR_ENTRY_VEC(int_cfg) == vec) {
> + causes |= E1000_ICR_RXQ1;
> + }
> +
> + int_cfg = E1000_IVAR_TXQ0(core->mac[IVAR]);
> + if (E1000_IVAR_ENTRY_VALID(int_cfg) &&
> + E1000_IVAR_ENTRY_VEC(int_cfg) == vec) {
> + causes |= E1000_ICR_TXQ0;
> + }
> +
> + int_cfg = E1000_IVAR_TXQ1(core->mac[IVAR]);
> + if (E1000_IVAR_ENTRY_VALID(int_cfg) &&
> + E1000_IVAR_ENTRY_VEC(int_cfg) == vec) {
> + causes |= E1000_ICR_TXQ1;
> + }
> +
> + int_cfg = E1000_IVAR_OTHER(core->mac[IVAR]);
> + if (E1000_IVAR_ENTRY_VALID(int_cfg) &&
> + E1000_IVAR_ENTRY_VEC(int_cfg) == vec) {
> + causes |= E1000_ICR_OTHER;
> + }
> +
> + return causes;
> +}
> +
> +static void
> +e1000e_msix_notify(E1000ECore *core, uint32_t causes);
> +
> static void
> e1000e_intrmgr_on_msix_throttling_timer(void *opaque)
> {
> E1000IntrDelayTimer *timer = opaque;
> - int idx = timer - &timer->core->eitr[0];
> + E1000ECore *core = timer->core;
> + int idx = timer - &core->eitr[0];
> + uint32_t causes;
>
> timer->running = false;
>
> - trace_e1000e_irq_msix_notify_postponed_vec(idx);
> - msix_notify(timer->core->owner, idx);
> + causes = find_msix_causes(core, idx) & core->mac[IMS] & core->mac[ICR];
> + if (causes) {
> + trace_e1000e_irq_msix_notify_postponed_vec(idx);
> + e1000e_msix_notify(core, causes);
> + }
> }
>
> static void
> @@ -1992,10 +2038,11 @@ e1000e_msix_notify_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg)
> if (E1000_IVAR_ENTRY_VALID(int_cfg)) {
> uint32_t vec = E1000_IVAR_ENTRY_VEC(int_cfg);
> if (vec < E1000E_MSIX_VEC_NUM) {
> - if (!e1000e_eitr_should_postpone(core, vec)) {
> - trace_e1000e_irq_msix_notify_vec(vec);
> - msix_notify(core->owner, vec);
> + if (e1000e_eitr_should_postpone(core, vec)) {
> + return;
> }
> + trace_e1000e_irq_msix_notify_vec(vec);
> + msix_notify(core->owner, vec);
> } else {
> trace_e1000e_wrn_msix_vec_wrong(cause, int_cfg);
> }
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
> index 3ae3e53530b..cc25a1d5baa 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -152,11 +152,14 @@ igb_intrmgr_arm_timer(IGBIntrDelayTimer *timer, int64_t delay_ns)
> static inline void
> igb_intrmgr_rearm_timer(IGBIntrDelayTimer *timer)
> {
> - uint32_t interval = (timer->core->mac[timer->delay_reg] &
> - E1000_EITR_INTERVAL) >> 2;
> - int64_t delay_ns = (int64_t)interval * timer->delay_resolution_ns;
> + uint32_t eitr = timer->core->mac[timer->delay_reg];
>
> - igb_intrmgr_arm_timer(timer, delay_ns);
> + if (eitr != 0) {
> + uint32_t interval = (eitr & E1000_EITR_INTERVAL) >> 2;
> + int64_t delay_ns = (int64_t)interval * timer->delay_resolution_ns;
> +
> + igb_intrmgr_arm_timer(timer, delay_ns);
> + }
> }
>
> static void
> @@ -168,16 +171,7 @@ igb_intmgr_timer_resume(IGBIntrDelayTimer *timer)
> }
>
> static void
> -igb_intrmgr_on_msix_throttling_timer(void *opaque)
> -{
> - IGBIntrDelayTimer *timer = opaque;
> - int idx = timer - &timer->core->eitr[0];
> -
> - timer->running = false;
> -
> - trace_e1000e_irq_msix_notify_postponed_vec(idx);
> - igb_msix_notify(timer->core, idx);
> -}
> +igb_intrmgr_on_msix_throttling_timer(void *opaque);
>
> static void
> igb_intrmgr_initialize_all_timers(IGBCore *core, bool create)
> @@ -2253,9 +2247,7 @@ igb_postpone_interrupt(IGBIntrDelayTimer *timer)
> return true;
> }
>
> - if (timer->core->mac[timer->delay_reg] != 0) {
> - igb_intrmgr_rearm_timer(timer);
> - }
> + igb_intrmgr_rearm_timer(timer);
>
> return false;
> }
> @@ -2279,6 +2271,30 @@ static void igb_send_msix(IGBCore *core, uint32_t causes)
> }
> }
>
> +static void
> +igb_intrmgr_on_msix_throttling_timer(void *opaque)
> +{
> + IGBIntrDelayTimer *timer = opaque;
> + IGBCore *core = timer->core;
> + int vector = timer - &core->eitr[0];
> + uint32_t causes;
> +
> + timer->running = false;
> +
> + causes = core->mac[EICR] & core->mac[EIMS];
> + if (causes & BIT(vector)) {
> + /*
> + * The moderation counter is loaded with interval value whenever the
> + * interrupt is signaled. This includes when the interrupt is signaled
> + * by the counter reaching 0.
> + */
> + igb_intrmgr_rearm_timer(timer);
> +
> + trace_e1000e_irq_msix_notify_postponed_vec(vector);
> + igb_msix_notify(core, vector);
> + }
> +}
> +
> static inline void
> igb_fix_icr_asserted(IGBCore *core)
> {
© 2016 - 2025 Red Hat, Inc.