Change find_virq() to return -EEXIST when a VIRQ is bound to a
different CPU than the one passed in. With that, remove the BUG_ON()
from bind_virq_to_irq() to propogate the error upwards.
Some VIRQs are per-cpu, but others are per-domain or global. Those must
be bound to CPU0 and can then migrate elsewhere. The lookup for
per-domain and global will probably fail when migrated off CPU 0,
especially when the current CPU is tracked. This now returns -EEXIST
instead of BUG_ON().
A second call to bind a per-domain or global VIRQ is not expected, but
make it non-fatal to avoid trying to look up the irq, since we don't
know which per_cpu(virq_to_irq) it will be in.
Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
---
V2:
New
---
drivers/xen/events/events_base.c | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 199afe59f357..a85bc43f4344 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1314,10 +1314,12 @@ int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
}
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
-static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
+static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn,
+ bool percpu)
{
struct evtchn_status status;
evtchn_port_t port;
+ bool exists = false;
int rc;
memset(&status, 0, sizeof(status));
@@ -1329,12 +1331,16 @@ static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
continue;
if (status.status != EVTCHNSTAT_virq)
continue;
- if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
+ if (status.u.virq != virq)
+ continue;
+ if (status.vcpu == xen_vcpu_nr(cpu)) {
*evtchn = port;
return 0;
+ } else if (!percpu) {
+ exists = true;
}
}
- return -ENOENT;
+ return exists ? -EEXIST : -ENOENT;
}
/**
@@ -1381,8 +1387,9 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
evtchn = bind_virq.port;
else {
if (ret == -EEXIST)
- ret = find_virq(virq, cpu, &evtchn);
- BUG_ON(ret < 0);
+ ret = find_virq(virq, cpu, &evtchn, percpu);
+ if (ret)
+ goto out;
}
ret = xen_irq_info_virq_setup(info, cpu, evtchn, virq);
--
2.50.1
On 26.08.25 02:55, Jason Andryuk wrote:
> Change find_virq() to return -EEXIST when a VIRQ is bound to a
> different CPU than the one passed in. With that, remove the BUG_ON()
> from bind_virq_to_irq() to propogate the error upwards.
>
> Some VIRQs are per-cpu, but others are per-domain or global. Those must
> be bound to CPU0 and can then migrate elsewhere. The lookup for
> per-domain and global will probably fail when migrated off CPU 0,
> especially when the current CPU is tracked. This now returns -EEXIST
> instead of BUG_ON().
>
> A second call to bind a per-domain or global VIRQ is not expected, but
> make it non-fatal to avoid trying to look up the irq, since we don't
> know which per_cpu(virq_to_irq) it will be in.
>
> Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
> ---
> V2:
> New
> ---
> drivers/xen/events/events_base.c | 17 ++++++++++++-----
> 1 file changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
> index 199afe59f357..a85bc43f4344 100644
> --- a/drivers/xen/events/events_base.c
> +++ b/drivers/xen/events/events_base.c
> @@ -1314,10 +1314,12 @@ int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
> }
> EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
>
> -static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
> +static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn,
> + bool percpu)
> {
> struct evtchn_status status;
> evtchn_port_t port;
> + bool exists = false;
> int rc;
>
> memset(&status, 0, sizeof(status));
> @@ -1329,12 +1331,16 @@ static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
> continue;
> if (status.status != EVTCHNSTAT_virq)
> continue;
> - if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
> + if (status.u.virq != virq)
> + continue;
> + if (status.vcpu == xen_vcpu_nr(cpu)) {
> *evtchn = port;
> return 0;
> + } else if (!percpu) {
> + exists = true;
> }
> }
> - return -ENOENT;
> + return exists ? -EEXIST : -ENOENT;
> }
>
> /**
> @@ -1381,8 +1387,9 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
> evtchn = bind_virq.port;
> else {
> if (ret == -EEXIST)
> - ret = find_virq(virq, cpu, &evtchn);
> - BUG_ON(ret < 0);
> + ret = find_virq(virq, cpu, &evtchn, percpu);
> + if (ret)
> + goto out;
I think you are leaking info here. I guess a call of __unbind_from_irq() is
wanted like in the error case below (note that the case of no valid evtchn is
handled there just fine).
Juergen
On 2025-08-27 11:21, Jürgen Groß wrote:
> On 26.08.25 02:55, Jason Andryuk wrote:
>> Change find_virq() to return -EEXIST when a VIRQ is bound to a
>> different CPU than the one passed in. With that, remove the BUG_ON()
>> from bind_virq_to_irq() to propogate the error upwards.
>>
>> Some VIRQs are per-cpu, but others are per-domain or global. Those must
>> be bound to CPU0 and can then migrate elsewhere. The lookup for
>> per-domain and global will probably fail when migrated off CPU 0,
>> especially when the current CPU is tracked. This now returns -EEXIST
>> instead of BUG_ON().
>>
>> A second call to bind a per-domain or global VIRQ is not expected, but
>> make it non-fatal to avoid trying to look up the irq, since we don't
>> know which per_cpu(virq_to_irq) it will be in.
>>
>> Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
>
>> @@ -1381,8 +1387,9 @@ int bind_virq_to_irq(unsigned int virq, unsigned
>> int cpu, bool percpu)
>> evtchn = bind_virq.port;
>> else {
>> if (ret == -EEXIST)
>> - ret = find_virq(virq, cpu, &evtchn);
>> - BUG_ON(ret < 0);
>> + ret = find_virq(virq, cpu, &evtchn, percpu);
>> + if (ret)
>> + goto out;
>
> I think you are leaking info here. I guess a call of __unbind_from_irq() is
> wanted like in the error case below (note that the case of no valid
> evtchn is
> handled there just fine).
Ok, thanks for catching that.
I'm going to add Cc: stable to the next version of this. While it
doesn't have a Fixes associated, we want this as a prerequisite for patch 3.
Regards,
Jason
© 2016 - 2026 Red Hat, Inc.