The AER injection path may run in forced-threaded interrupt context
under PREEMPT_RT while holding the lock with IRQs disabled.
On RT kernels, spinlocks are converted into rt_spinlocks, which may
sleep. Sleeping is not permitted in IRQ-off sections, so this may
trigger lockdep warnings such as "Invalid wait context" in [1].
raw_spin_lock_irqsave(&pci_lock);
↓
rt_spin_lock(&inject_lock); <-- not allowed
Switching inject_lock to raw_spinlock_t preserves non-sleeping locking
semantics and avoids the warning when running with PREEMPT_RT enabled.
The list protected by this lock is bounded and only used for debugging
purposes, so using a raw spinlock will not cause unbounded latencies.
[1] https://lore.kernel.org/all/20251009150651.93618-1-jckeep.cuiguangbo@gmail.com/
Acked-by: Waiman Long <longman@redhat.com>
Signed-off-by: Guangbo Cui <jckeep.cuiguangbo@gmail.com>
---
drivers/pci/pcie/aer_inject.c | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/drivers/pci/pcie/aer_inject.c b/drivers/pci/pcie/aer_inject.c
index 91acc7b17f68..c8d65bfb10ff 100644
--- a/drivers/pci/pcie/aer_inject.c
+++ b/drivers/pci/pcie/aer_inject.c
@@ -72,7 +72,7 @@ static LIST_HEAD(einjected);
static LIST_HEAD(pci_bus_ops_list);
/* Protect einjected and pci_bus_ops_list */
-static DEFINE_SPINLOCK(inject_lock);
+static DEFINE_RAW_SPINLOCK(inject_lock);
static void aer_error_init(struct aer_error *err, u32 domain,
unsigned int bus, unsigned int devfn,
@@ -126,12 +126,12 @@ static struct pci_bus_ops *pci_bus_ops_pop(void)
unsigned long flags;
struct pci_bus_ops *bus_ops;
- spin_lock_irqsave(&inject_lock, flags);
+ raw_spin_lock_irqsave(&inject_lock, flags);
bus_ops = list_first_entry_or_null(&pci_bus_ops_list,
struct pci_bus_ops, list);
if (bus_ops)
list_del(&bus_ops->list);
- spin_unlock_irqrestore(&inject_lock, flags);
+ raw_spin_unlock_irqrestore(&inject_lock, flags);
return bus_ops;
}
@@ -223,7 +223,7 @@ static int aer_inj_read_config(struct pci_bus *bus, unsigned int devfn,
int domain;
int rv;
- spin_lock_irqsave(&inject_lock, flags);
+ raw_spin_lock_irqsave(&inject_lock, flags);
if (size != sizeof(u32))
goto out;
domain = pci_domain_nr(bus);
@@ -236,12 +236,12 @@ static int aer_inj_read_config(struct pci_bus *bus, unsigned int devfn,
sim = find_pci_config_dword(err, where, NULL);
if (sim) {
*val = *sim;
- spin_unlock_irqrestore(&inject_lock, flags);
+ raw_spin_unlock_irqrestore(&inject_lock, flags);
return 0;
}
out:
rv = aer_inj_read(bus, devfn, where, size, val);
- spin_unlock_irqrestore(&inject_lock, flags);
+ raw_spin_unlock_irqrestore(&inject_lock, flags);
return rv;
}
@@ -255,7 +255,7 @@ static int aer_inj_write_config(struct pci_bus *bus, unsigned int devfn,
int domain;
int rv;
- spin_lock_irqsave(&inject_lock, flags);
+ raw_spin_lock_irqsave(&inject_lock, flags);
if (size != sizeof(u32))
goto out;
domain = pci_domain_nr(bus);
@@ -271,12 +271,12 @@ static int aer_inj_write_config(struct pci_bus *bus, unsigned int devfn,
*sim ^= val;
else
*sim = val;
- spin_unlock_irqrestore(&inject_lock, flags);
+ raw_spin_unlock_irqrestore(&inject_lock, flags);
return 0;
}
out:
rv = aer_inj_write(bus, devfn, where, size, val);
- spin_unlock_irqrestore(&inject_lock, flags);
+ raw_spin_unlock_irqrestore(&inject_lock, flags);
return rv;
}
@@ -304,14 +304,14 @@ static int pci_bus_set_aer_ops(struct pci_bus *bus)
if (!bus_ops)
return -ENOMEM;
ops = pci_bus_set_ops(bus, &aer_inj_pci_ops);
- spin_lock_irqsave(&inject_lock, flags);
+ raw_spin_lock_irqsave(&inject_lock, flags);
if (ops == &aer_inj_pci_ops)
goto out;
pci_bus_ops_init(bus_ops, bus, ops);
list_add(&bus_ops->list, &pci_bus_ops_list);
bus_ops = NULL;
out:
- spin_unlock_irqrestore(&inject_lock, flags);
+ raw_spin_unlock_irqrestore(&inject_lock, flags);
kfree(bus_ops);
return 0;
}
@@ -383,7 +383,7 @@ static int aer_inject(struct aer_error_inj *einj)
uncor_mask);
}
- spin_lock_irqsave(&inject_lock, flags);
+ raw_spin_lock_irqsave(&inject_lock, flags);
err = __find_aer_error_by_dev(dev);
if (!err) {
@@ -404,14 +404,14 @@ static int aer_inject(struct aer_error_inj *einj)
!(einj->cor_status & ~cor_mask)) {
ret = -EINVAL;
pci_warn(dev, "The correctable error(s) is masked by device\n");
- spin_unlock_irqrestore(&inject_lock, flags);
+ raw_spin_unlock_irqrestore(&inject_lock, flags);
goto out_put;
}
if (!aer_mask_override && einj->uncor_status &&
!(einj->uncor_status & ~uncor_mask)) {
ret = -EINVAL;
pci_warn(dev, "The uncorrectable error(s) is masked by device\n");
- spin_unlock_irqrestore(&inject_lock, flags);
+ raw_spin_unlock_irqrestore(&inject_lock, flags);
goto out_put;
}
@@ -445,7 +445,7 @@ static int aer_inject(struct aer_error_inj *einj)
rperr->source_id &= 0x0000ffff;
rperr->source_id |= PCI_DEVID(einj->bus, devfn) << 16;
}
- spin_unlock_irqrestore(&inject_lock, flags);
+ raw_spin_unlock_irqrestore(&inject_lock, flags);
if (aer_mask_override) {
pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK,
--
2.43.0
On 2025-10-26 04:43:34 [+0000], Guangbo Cui wrote:
> The AER injection path may run in forced-threaded interrupt context
> under PREEMPT_RT while holding the lock with IRQs disabled.
not IRQs but interrupts.
> On RT kernels, spinlocks are converted into rt_spinlocks, which may
rt_spinlocks is not a thing. An established term is sleeping spinlock.
> sleep. Sleeping is not permitted in IRQ-off sections, so this may
> trigger lockdep warnings such as "Invalid wait context" in [1].
Why that reference. You should be able to describe your change _here_.
> raw_spin_lock_irqsave(&pci_lock);
> ↓
> rt_spin_lock(&inject_lock); <-- not allowed
It is still a spin_lock() that happens. The problem is that a
raw_spinlock_t must not nest into a spinlock_t. See
Documentation/locking/locktypes.rst
very bottom of that file.
> Switching inject_lock to raw_spinlock_t preserves non-sleeping locking
> semantics and avoids the warning when running with PREEMPT_RT enabled.
>
> The list protected by this lock is bounded and only used for debugging
> purposes, so using a raw spinlock will not cause unbounded latencies.
This is I like.
> [1] https://lore.kernel.org/all/20251009150651.93618-1-jckeep.cuiguangbo@gmail.com/
>
> Acked-by: Waiman Long <longman@redhat.com>
> Signed-off-by: Guangbo Cui <jckeep.cuiguangbo@gmail.com>
> ---
> @@ -445,7 +445,7 @@ static int aer_inject(struct aer_error_inj *einj)
> rperr->source_id &= 0x0000ffff;
> rperr->source_id |= PCI_DEVID(einj->bus, devfn) << 16;
> }
> - spin_unlock_irqrestore(&inject_lock, flags);
> + raw_spin_unlock_irqrestore(&inject_lock, flags);
>
> if (aer_mask_override) {
> pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK,
This is the last hunk. You miss the module exit part. This won't
compile on its own, as such it can't be applied. It might be chosen for
a backport. A change must always be self-contained.
Sebastian
On Mon, Oct 27, 2025 at 09:11:25AM +0100, Sebastian Andrzej Siewior wrote:
> > @@ -445,7 +445,7 @@ static int aer_inject(struct aer_error_inj *einj)
> > rperr->source_id &= 0x0000ffff;
> > rperr->source_id |= PCI_DEVID(einj->bus, devfn) << 16;
> > }
> > - spin_unlock_irqrestore(&inject_lock, flags);
> > + raw_spin_unlock_irqrestore(&inject_lock, flags);
> >
> > if (aer_mask_override) {
> > pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK,
>
> This is the last hunk. You miss the module exit part. This won't
> compile on its own, as such it can't be applied. It might be chosen for
> a backport. A change must always be self-contained.
I haven’t yet developed the good habit of keeping each patch fully
self-contained. I will fix this in the next version.
Best regards,
Guangbo
© 2016 - 2026 Red Hat, Inc.