Change find_virq() to return -EEXIST when a VIRQ is bound to a
different CPU than the one passed in. With that, remove the BUG_ON()
from bind_virq_to_irq() to propogate the error upwards.
Some VIRQs are per-cpu, but others are per-domain or global. Those must
be bound to CPU0 and can then migrate elsewhere. The lookup for
per-domain and global will probably fail when migrated off CPU 0,
especially when the current CPU is tracked. This now returns -EEXIST
instead of BUG_ON().
A second call to bind a per-domain or global VIRQ is not expected, but
make it non-fatal to avoid trying to look up the irq, since we don't
know which per_cpu(virq_to_irq) it will be in.
Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
---
V2:
New
---
drivers/xen/events/events_base.c | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 199afe59f357..a85bc43f4344 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1314,10 +1314,12 @@ int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
}
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
-static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
+static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn,
+ bool percpu)
{
struct evtchn_status status;
evtchn_port_t port;
+ bool exists = false;
int rc;
memset(&status, 0, sizeof(status));
@@ -1329,12 +1331,16 @@ static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
continue;
if (status.status != EVTCHNSTAT_virq)
continue;
- if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
+ if (status.u.virq != virq)
+ continue;
+ if (status.vcpu == xen_vcpu_nr(cpu)) {
*evtchn = port;
return 0;
+ } else if (!percpu) {
+ exists = true;
}
}
- return -ENOENT;
+ return exists ? -EEXIST : -ENOENT;
}
/**
@@ -1381,8 +1387,9 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
evtchn = bind_virq.port;
else {
if (ret == -EEXIST)
- ret = find_virq(virq, cpu, &evtchn);
- BUG_ON(ret < 0);
+ ret = find_virq(virq, cpu, &evtchn, percpu);
+ if (ret)
+ goto out;
}
ret = xen_irq_info_virq_setup(info, cpu, evtchn, virq);
--
2.50.1
On 26.08.25 02:55, Jason Andryuk wrote: > Change find_virq() to return -EEXIST when a VIRQ is bound to a > different CPU than the one passed in. With that, remove the BUG_ON() > from bind_virq_to_irq() to propogate the error upwards. > > Some VIRQs are per-cpu, but others are per-domain or global. Those must > be bound to CPU0 and can then migrate elsewhere. The lookup for > per-domain and global will probably fail when migrated off CPU 0, > especially when the current CPU is tracked. This now returns -EEXIST > instead of BUG_ON(). > > A second call to bind a per-domain or global VIRQ is not expected, but > make it non-fatal to avoid trying to look up the irq, since we don't > know which per_cpu(virq_to_irq) it will be in. > > Signed-off-by: Jason Andryuk <jason.andryuk@amd.com> > --- > V2: > New > --- > drivers/xen/events/events_base.c | 17 ++++++++++++----- > 1 file changed, 12 insertions(+), 5 deletions(-) > > diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c > index 199afe59f357..a85bc43f4344 100644 > --- a/drivers/xen/events/events_base.c > +++ b/drivers/xen/events/events_base.c > @@ -1314,10 +1314,12 @@ int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev, > } > EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi); > > -static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn) > +static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn, > + bool percpu) > { > struct evtchn_status status; > evtchn_port_t port; > + bool exists = false; > int rc; > > memset(&status, 0, sizeof(status)); > @@ -1329,12 +1331,16 @@ static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn) > continue; > if (status.status != EVTCHNSTAT_virq) > continue; > - if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) { > + if (status.u.virq != virq) > + continue; > + if (status.vcpu == xen_vcpu_nr(cpu)) { > *evtchn = port; > return 0; > + } else if (!percpu) { > + exists = true; > } > } > - return -ENOENT; > + return exists ? -EEXIST : -ENOENT; > } > > /** > @@ -1381,8 +1387,9 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) > evtchn = bind_virq.port; > else { > if (ret == -EEXIST) > - ret = find_virq(virq, cpu, &evtchn); > - BUG_ON(ret < 0); > + ret = find_virq(virq, cpu, &evtchn, percpu); > + if (ret) > + goto out; I think you are leaking info here. I guess a call of __unbind_from_irq() is wanted like in the error case below (note that the case of no valid evtchn is handled there just fine). Juergen
On 2025-08-27 11:21, Jürgen Groß wrote: > On 26.08.25 02:55, Jason Andryuk wrote: >> Change find_virq() to return -EEXIST when a VIRQ is bound to a >> different CPU than the one passed in. With that, remove the BUG_ON() >> from bind_virq_to_irq() to propogate the error upwards. >> >> Some VIRQs are per-cpu, but others are per-domain or global. Those must >> be bound to CPU0 and can then migrate elsewhere. The lookup for >> per-domain and global will probably fail when migrated off CPU 0, >> especially when the current CPU is tracked. This now returns -EEXIST >> instead of BUG_ON(). >> >> A second call to bind a per-domain or global VIRQ is not expected, but >> make it non-fatal to avoid trying to look up the irq, since we don't >> know which per_cpu(virq_to_irq) it will be in. >> >> Signed-off-by: Jason Andryuk <jason.andryuk@amd.com> > >> @@ -1381,8 +1387,9 @@ int bind_virq_to_irq(unsigned int virq, unsigned >> int cpu, bool percpu) >> evtchn = bind_virq.port; >> else { >> if (ret == -EEXIST) >> - ret = find_virq(virq, cpu, &evtchn); >> - BUG_ON(ret < 0); >> + ret = find_virq(virq, cpu, &evtchn, percpu); >> + if (ret) >> + goto out; > > I think you are leaking info here. I guess a call of __unbind_from_irq() is > wanted like in the error case below (note that the case of no valid > evtchn is > handled there just fine). Ok, thanks for catching that. I'm going to add Cc: stable to the next version of this. While it doesn't have a Fixes associated, we want this as a prerequisite for patch 3. Regards, Jason
© 2016 - 2025 Red Hat, Inc.