Using a linked-list for LPIs is less than ideal as it of course requires
iterative searches to find a particular entry. An xarray is a better
data structure for this use case, as it provides faster searches and can
still handle a potentially sparse range of INTID allocations.
Start by storing LPIs in an xarray, punting usage of the xarray to a
subsequent change.
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
arch/arm64/kvm/vgic/vgic-init.c | 3 +++
arch/arm64/kvm/vgic/vgic-its.c | 16 ++++++++++++++++
arch/arm64/kvm/vgic/vgic.c | 1 +
include/kvm/arm_vgic.h | 2 ++
4 files changed, 22 insertions(+)
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index e949e1d0fd9f..411719053107 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -56,6 +56,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
INIT_LIST_HEAD(&dist->lpi_list_head);
INIT_LIST_HEAD(&dist->lpi_translation_cache);
raw_spin_lock_init(&dist->lpi_list_lock);
+ xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ);
}
/* CREATION */
@@ -366,6 +367,8 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
if (vgic_supports_direct_msis(kvm))
vgic_v4_teardown(kvm);
+
+ xa_destroy(&dist->lpi_xa);
}
static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index e2764d0ffa9f..fb2d3c356984 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -52,6 +52,12 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
if (!irq)
return ERR_PTR(-ENOMEM);
+ ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT);
+ if (ret) {
+ kfree(irq);
+ return ERR_PTR(ret);
+ }
+
INIT_LIST_HEAD(&irq->lpi_list);
INIT_LIST_HEAD(&irq->ap_list);
raw_spin_lock_init(&irq->irq_lock);
@@ -86,12 +92,22 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
goto out_unlock;
}
+ ret = xa_err(xa_store(&dist->lpi_xa, intid, irq, 0));
+ if (ret) {
+ xa_release(&dist->lpi_xa, intid);
+ kfree(irq);
+ goto out_unlock;
+ }
+
list_add_tail(&irq->lpi_list, &dist->lpi_list_head);
dist->lpi_list_count++;
out_unlock:
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+ if (ret)
+ return ERR_PTR(ret);
+
/*
* We "cache" the configuration table entries in our struct vgic_irq's.
* However we only have those structs for mapped IRQs, so we read in
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index db2a95762b1b..c126014f8395 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -131,6 +131,7 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
return;
list_del(&irq->lpi_list);
+ xa_erase(&dist->lpi_xa, irq->intid);
dist->lpi_list_count--;
kfree(irq);
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 8cc38e836f54..795b35656b54 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -13,6 +13,7 @@
#include <linux/spinlock.h>
#include <linux/static_key.h>
#include <linux/types.h>
+#include <linux/xarray.h>
#include <kvm/iodev.h>
#include <linux/list.h>
#include <linux/jump_label.h>
@@ -275,6 +276,7 @@ struct vgic_dist {
/* Protects the lpi_list and the count value below. */
raw_spinlock_t lpi_list_lock;
+ struct xarray lpi_xa;
struct list_head lpi_list_head;
int lpi_list_count;
--
2.44.0.rc0.258.g7320e95886-goog
On 2024/2/17 02:41, Oliver Upton wrote: > Using a linked-list for LPIs is less than ideal as it of course requires > iterative searches to find a particular entry. An xarray is a better > data structure for this use case, as it provides faster searches and can > still handle a potentially sparse range of INTID allocations. > > Start by storing LPIs in an xarray, punting usage of the xarray to a > subsequent change. > > Signed-off-by: Oliver Upton <oliver.upton@linux.dev> [..] > diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c > index db2a95762b1b..c126014f8395 100644 > --- a/arch/arm64/kvm/vgic/vgic.c > +++ b/arch/arm64/kvm/vgic/vgic.c > @@ -131,6 +131,7 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq) > return; > > list_del(&irq->lpi_list); > + xa_erase(&dist->lpi_xa, irq->intid); We can get here *after* grabbing the vgic_cpu->ap_list_lock (e.g., vgic_flush_pending_lpis()/vgic_put_irq()). And as according to vGIC's "Locking order", we should disable interrupts before taking the xa_lock in xa_erase() and we would otherwise see bad things like deadlock.. It's not a problem before patch #10, where we drop the lpi_list_lock and start taking the xa_lock with interrupts enabled. Consider switching to use xa_erase_irq() instead? > dist->lpi_list_count--; > > kfree(irq); Thanks, Zenghui
On Tue, 20 Feb 2024 16:30:24 +0000, Zenghui Yu <zenghui.yu@linux.dev> wrote: > > On 2024/2/17 02:41, Oliver Upton wrote: > > Using a linked-list for LPIs is less than ideal as it of course requires > > iterative searches to find a particular entry. An xarray is a better > > data structure for this use case, as it provides faster searches and can > > still handle a potentially sparse range of INTID allocations. > > > > Start by storing LPIs in an xarray, punting usage of the xarray to a > > subsequent change. > > > > Signed-off-by: Oliver Upton <oliver.upton@linux.dev> > > [..] > > > diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c > > index db2a95762b1b..c126014f8395 100644 > > --- a/arch/arm64/kvm/vgic/vgic.c > > +++ b/arch/arm64/kvm/vgic/vgic.c > > @@ -131,6 +131,7 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq) > > return; > > list_del(&irq->lpi_list); > > + xa_erase(&dist->lpi_xa, irq->intid); > > We can get here *after* grabbing the vgic_cpu->ap_list_lock (e.g., > vgic_flush_pending_lpis()/vgic_put_irq()). And as according to vGIC's > "Locking order", we should disable interrupts before taking the xa_lock > in xa_erase() and we would otherwise see bad things like deadlock.. > > It's not a problem before patch #10, where we drop the lpi_list_lock and > start taking the xa_lock with interrupts enabled. Consider switching to > use xa_erase_irq() instead? But does it actually work? xa_erase_irq() uses spin_lock_irq(), followed by spin_unlock_irq(). So if we were already in interrupt context, we would end-up reenabling interrupts. At least, this should be the irqsave version. The question is whether we manipulate LPIs (in the get/put sense) on the back of an interrupt handler (like we do for the timer). It isn't obvious to me that it is the case, but I haven't spent much time staring at this code recently. Thanks, M. -- Without deviation from the norm, progress is not possible.
On Tue, Feb 20, 2024 at 05:24:50PM +0000, Marc Zyngier wrote: > On Tue, 20 Feb 2024 16:30:24 +0000, > Zenghui Yu <zenghui.yu@linux.dev> wrote: > > > > On 2024/2/17 02:41, Oliver Upton wrote: > > > Using a linked-list for LPIs is less than ideal as it of course requires > > > iterative searches to find a particular entry. An xarray is a better > > > data structure for this use case, as it provides faster searches and can > > > still handle a potentially sparse range of INTID allocations. > > > > > > Start by storing LPIs in an xarray, punting usage of the xarray to a > > > subsequent change. > > > > > > Signed-off-by: Oliver Upton <oliver.upton@linux.dev> > > > > [..] > > > > > diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c > > > index db2a95762b1b..c126014f8395 100644 > > > --- a/arch/arm64/kvm/vgic/vgic.c > > > +++ b/arch/arm64/kvm/vgic/vgic.c > > > @@ -131,6 +131,7 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq) > > > return; > > > list_del(&irq->lpi_list); > > > + xa_erase(&dist->lpi_xa, irq->intid); > > > > We can get here *after* grabbing the vgic_cpu->ap_list_lock (e.g., > > vgic_flush_pending_lpis()/vgic_put_irq()). And as according to vGIC's > > "Locking order", we should disable interrupts before taking the xa_lock > > in xa_erase() and we would otherwise see bad things like deadlock.. > > > > It's not a problem before patch #10, where we drop the lpi_list_lock and > > start taking the xa_lock with interrupts enabled. Consider switching to > > use xa_erase_irq() instead? > > But does it actually work? xa_erase_irq() uses spin_lock_irq(), > followed by spin_unlock_irq(). So if we were already in interrupt > context, we would end-up reenabling interrupts. At least, this should > be the irqsave version. This is what I was planning to do, although I may kick it out to patch 10 to avoid churn. > The question is whether we manipulate LPIs (in the get/put sense) on > the back of an interrupt handler (like we do for the timer). It isn't > obvious to me that it is the case, but I haven't spent much time > staring at this code recently. I think we can get into here both from contexts w/ interrupts disabled or enabled. irqfd_wakeup() expects to be called w/ interrupts disabled. All the more reason to use irqsave() / irqrestore() flavors of all of this, and a reminder to go check all callsites that implicitly take the xa_lock. -- Thanks, Oliver
On Tue, 20 Feb 2024 17:43:03 +0000, Oliver Upton <oliver.upton@linux.dev> wrote: > > On Tue, Feb 20, 2024 at 05:24:50PM +0000, Marc Zyngier wrote: > > On Tue, 20 Feb 2024 16:30:24 +0000, > > Zenghui Yu <zenghui.yu@linux.dev> wrote: > > > > > > On 2024/2/17 02:41, Oliver Upton wrote: > > > > Using a linked-list for LPIs is less than ideal as it of course requires > > > > iterative searches to find a particular entry. An xarray is a better > > > > data structure for this use case, as it provides faster searches and can > > > > still handle a potentially sparse range of INTID allocations. > > > > > > > > Start by storing LPIs in an xarray, punting usage of the xarray to a > > > > subsequent change. > > > > > > > > Signed-off-by: Oliver Upton <oliver.upton@linux.dev> > > > > > > [..] > > > > > > > diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c > > > > index db2a95762b1b..c126014f8395 100644 > > > > --- a/arch/arm64/kvm/vgic/vgic.c > > > > +++ b/arch/arm64/kvm/vgic/vgic.c > > > > @@ -131,6 +131,7 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq) > > > > return; > > > > list_del(&irq->lpi_list); > > > > + xa_erase(&dist->lpi_xa, irq->intid); > > > > > > We can get here *after* grabbing the vgic_cpu->ap_list_lock (e.g., > > > vgic_flush_pending_lpis()/vgic_put_irq()). And as according to vGIC's > > > "Locking order", we should disable interrupts before taking the xa_lock > > > in xa_erase() and we would otherwise see bad things like deadlock.. > > > > > > It's not a problem before patch #10, where we drop the lpi_list_lock and > > > start taking the xa_lock with interrupts enabled. Consider switching to > > > use xa_erase_irq() instead? > > > > But does it actually work? xa_erase_irq() uses spin_lock_irq(), > > followed by spin_unlock_irq(). So if we were already in interrupt > > context, we would end-up reenabling interrupts. At least, this should > > be the irqsave version. > > This is what I was planning to do, although I may kick it out to patch > 10 to avoid churn. > > > The question is whether we manipulate LPIs (in the get/put sense) on > > the back of an interrupt handler (like we do for the timer). It isn't > > obvious to me that it is the case, but I haven't spent much time > > staring at this code recently. > > I think we can get into here both from contexts w/ interrupts disabled > or enabled. irqfd_wakeup() expects to be called w/ interrupts disabled. > > All the more reason to use irqsave() / irqrestore() flavors of all of > this, and a reminder to go check all callsites that implicitly take the > xa_lock. Sounds good. Maybe you can also update the locking order "documentation" to include the xa_lock? I expect that it will ultimately replace lpi_list_lock. Thanks, M. -- Without deviation from the norm, progress is not possible.
On Tue, Feb 20, 2024 at 05:53:55PM +0000, Marc Zyngier wrote: > On Tue, 20 Feb 2024 17:43:03 +0000, Oliver Upton <oliver.upton@linux.dev> wrote: > > I think we can get into here both from contexts w/ interrupts disabled > > or enabled. irqfd_wakeup() expects to be called w/ interrupts disabled. > > > > All the more reason to use irqsave() / irqrestore() flavors of all of > > this, and a reminder to go check all callsites that implicitly take the > > xa_lock. > > Sounds good. Maybe you can also update the locking order > "documentation" to include the xa_lock? I expect that it will > ultimately replace lpi_list_lock. Yep, I got to the point of deleting the lpi_list_lock on the full series, which is where I update the documentation. I really didn't want people to know I'm adding yet another layer of locking in the interim... Anyways, I think there's sufficient feedback to justify a respin. I'll make sure the documentation is updated w/ the xa_lock for the stuff I'm trying to land in 6.9. -- Thanks, Oliver
Hi Zenghui, On Wed, Feb 21, 2024 at 12:30:24AM +0800, Zenghui Yu wrote: > On 2024/2/17 02:41, Oliver Upton wrote: > > Using a linked-list for LPIs is less than ideal as it of course requires > > iterative searches to find a particular entry. An xarray is a better > > data structure for this use case, as it provides faster searches and can > > still handle a potentially sparse range of INTID allocations. > > > > Start by storing LPIs in an xarray, punting usage of the xarray to a > > subsequent change. > > > > Signed-off-by: Oliver Upton <oliver.upton@linux.dev> > > [..] > > > diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c > > index db2a95762b1b..c126014f8395 100644 > > --- a/arch/arm64/kvm/vgic/vgic.c > > +++ b/arch/arm64/kvm/vgic/vgic.c > > @@ -131,6 +131,7 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq) > > return; > > list_del(&irq->lpi_list); > > + xa_erase(&dist->lpi_xa, irq->intid); > > We can get here *after* grabbing the vgic_cpu->ap_list_lock (e.g., > vgic_flush_pending_lpis()/vgic_put_irq()). And as according to vGIC's > "Locking order", we should disable interrupts before taking the xa_lock > in xa_erase() and we would otherwise see bad things like deadlock.. Nice catch! Yeah, the general intention was to disable interrupts outside of the xa_lock, however: > It's not a problem before patch #10, where we drop the lpi_list_lock and > start taking the xa_lock with interrupts enabled. Consider switching to > use xa_erase_irq() instead? I don't think this change is safe until #10, as the implied xa_unlock_irq() would re-enable interrupts before the lpi_list_lock is dropped. Or do I have wires crossed? -- Thanks, Oliver
On 2024/2/21 1:15, Oliver Upton wrote: > Hi Zenghui, > > On Wed, Feb 21, 2024 at 12:30:24AM +0800, Zenghui Yu wrote: >> On 2024/2/17 02:41, Oliver Upton wrote: >>> Using a linked-list for LPIs is less than ideal as it of course requires >>> iterative searches to find a particular entry. An xarray is a better >>> data structure for this use case, as it provides faster searches and can >>> still handle a potentially sparse range of INTID allocations. >>> >>> Start by storing LPIs in an xarray, punting usage of the xarray to a >>> subsequent change. >>> >>> Signed-off-by: Oliver Upton <oliver.upton@linux.dev> >> >> [..] >> >>> diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c >>> index db2a95762b1b..c126014f8395 100644 >>> --- a/arch/arm64/kvm/vgic/vgic.c >>> +++ b/arch/arm64/kvm/vgic/vgic.c >>> @@ -131,6 +131,7 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq) >>> return; >>> list_del(&irq->lpi_list); >>> + xa_erase(&dist->lpi_xa, irq->intid); >> >> We can get here *after* grabbing the vgic_cpu->ap_list_lock (e.g., >> vgic_flush_pending_lpis()/vgic_put_irq()). And as according to vGIC's >> "Locking order", we should disable interrupts before taking the xa_lock >> in xa_erase() and we would otherwise see bad things like deadlock.. > > Nice catch! > > Yeah, the general intention was to disable interrupts outside of the > xa_lock, however: > >> It's not a problem before patch #10, where we drop the lpi_list_lock and >> start taking the xa_lock with interrupts enabled. Consider switching to >> use xa_erase_irq() instead? > > I don't think this change is safe until #10, as the implied xa_unlock_irq() > would re-enable interrupts before the lpi_list_lock is dropped. Or do I > have wires crossed? No, you're right. My intention was to fix it in patch #10. And as you've both pointed out, using xa_erase_irq() can hardly be the correct fix. My mistake :-( . Thanks, Zenghui
On Wed, Feb 21, 2024 at 01:11:02PM +0800, Zenghui Yu wrote: > No, you're right. My intention was to fix it in patch #10. And as > you've both pointed out, using xa_erase_irq() can hardly be the correct > fix. My mistake :-( . Still -- thank you for pointing out the very obvious bug at the end of the series :) -- Thanks, Oliver
© 2016 - 2026 Red Hat, Inc.