Add a bitmap with one bit per possible domid indicating the respective
domain has changed its state (created, deleted, dying, crashed,
shutdown).
Registering the VIRQ_DOM_EXC event will result in setting the bits for
all existing domains and resetting all other bits.
As the usage of this bitmap is tightly coupled with the VIRQ_DOM_EXC
event, it is meant to be used only by a single consumer in the system,
just like the VIRQ_DOM_EXC event.
Resetting a bit will be done in a future patch.
This information is needed for Xenstore to keep track of all domains.
Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- use DOMID_FIRST_RESERVED instead of DOMID_MASK + 1 (Jan Beulich)
- use const (Jan Beulich)
- move call of domain_reset_states() into evtchn_bind_virq() (Jan Beulich)
- dynamically allocate dom_state_changed bitmap (Jan Beulich)
V3:
- use xvzalloc_array() (Jan Beulich)
- don't rename existing label (Jan Beulich)
---
xen/common/domain.c | 60 ++++++++++++++++++++++++++++++++++++++
xen/common/event_channel.c | 14 +++++++++
xen/include/xen/sched.h | 3 ++
3 files changed, 77 insertions(+)
diff --git a/xen/common/domain.c b/xen/common/domain.c
index e33a0a5a21..dab2344ef6 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -34,6 +34,7 @@
#include <xen/xenoprof.h>
#include <xen/irq.h>
#include <xen/argo.h>
+#include <xen/xvmalloc.h>
#include <asm/p2m.h>
#include <asm/processor.h>
#include <public/sched.h>
@@ -138,6 +139,60 @@ bool __read_mostly vmtrace_available;
bool __read_mostly vpmu_is_available;
+static DEFINE_SPINLOCK(dom_state_changed_lock);
+static unsigned long *dom_state_changed;
+
+int domain_init_states(void)
+{
+ const struct domain *d;
+ int rc = -ENOMEM;
+
+ spin_lock(&dom_state_changed_lock);
+
+ if ( dom_state_changed )
+ bitmap_zero(dom_state_changed, DOMID_FIRST_RESERVED);
+ else
+ {
+ dom_state_changed = xvzalloc_array(unsigned long,
+ BITS_TO_LONGS(DOMID_FIRST_RESERVED));
+ if ( !dom_state_changed )
+ goto unlock;
+ }
+
+ rcu_read_lock(&domlist_read_lock);
+
+ for_each_domain ( d )
+ set_bit(d->domain_id, dom_state_changed);
+
+ rcu_read_unlock(&domlist_read_lock);
+
+ rc = 0;
+
+ unlock:
+ spin_unlock(&dom_state_changed_lock);
+
+ return rc;
+}
+
+void domain_deinit_states(void)
+{
+ spin_lock(&dom_state_changed_lock);
+
+ XVFREE(dom_state_changed);
+
+ spin_unlock(&dom_state_changed_lock);
+}
+
+static void domain_changed_state(const struct domain *d)
+{
+ spin_lock(&dom_state_changed_lock);
+
+ if ( dom_state_changed )
+ set_bit(d->domain_id, dom_state_changed);
+
+ spin_unlock(&dom_state_changed_lock);
+}
+
static void __domain_finalise_shutdown(struct domain *d)
{
struct vcpu *v;
@@ -152,6 +207,7 @@ static void __domain_finalise_shutdown(struct domain *d)
return;
d->is_shut_down = 1;
+ domain_changed_state(d);
if ( (d->shutdown_code == SHUTDOWN_suspend) && d->suspend_evtchn )
evtchn_send(d, d->suspend_evtchn);
else
@@ -839,6 +895,7 @@ struct domain *domain_create(domid_t domid,
*/
domlist_insert(d);
+ domain_changed_state(d);
memcpy(d->handle, config->handle, sizeof(d->handle));
return d;
@@ -1104,6 +1161,7 @@ int domain_kill(struct domain *d)
/* Mem event cleanup has to go here because the rings
* have to be put before we call put_domain. */
vm_event_cleanup(d);
+ domain_changed_state(d);
put_domain(d);
send_global_virq(VIRQ_DOM_EXC);
/* fallthrough */
@@ -1293,6 +1351,8 @@ static void cf_check complete_domain_destroy(struct rcu_head *head)
xfree(d->vcpu);
+ domain_changed_state(d);
+
_domain_destroy(d);
send_global_virq(VIRQ_DOM_EXC);
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index 8db2ca4ba2..d18a756217 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -485,6 +485,13 @@ int evtchn_bind_virq(evtchn_bind_virq_t *bind, evtchn_port_t port)
if ( (v = domain_vcpu(d, vcpu)) == NULL )
return -ENOENT;
+ if ( virq == VIRQ_DOM_EXC )
+ {
+ rc = domain_init_states();
+ if ( rc )
+ goto deinit;
+ }
+
write_lock(&d->event_lock);
if ( read_atomic(&v->virq_to_evtchn[virq]) )
@@ -527,6 +534,10 @@ int evtchn_bind_virq(evtchn_bind_virq_t *bind, evtchn_port_t port)
out:
write_unlock(&d->event_lock);
+ deinit:
+ if ( rc )
+ domain_deinit_states();
+
return rc;
}
@@ -730,6 +741,9 @@ int evtchn_close(struct domain *d1, int port1, bool guest)
struct vcpu *v;
unsigned long flags;
+ if ( chn1->u.virq == VIRQ_DOM_EXC )
+ domain_deinit_states();
+
v = d1->vcpu[virq_is_global(chn1->u.virq) ? 0 : chn1->notify_vcpu_id];
write_lock_irqsave(&v->virq_lock, flags);
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 711668e028..16684bbaf9 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -800,6 +800,9 @@ void domain_resume(struct domain *d);
int domain_soft_reset(struct domain *d, bool resuming);
+int domain_init_states(void);
+void domain_deinit_states(void);
+
int vcpu_start_shutdown_deferral(struct vcpu *v);
void vcpu_end_shutdown_deferral(struct vcpu *v);
--
2.43.0
On 13.12.2024 17:24, Juergen Gross wrote:
> @@ -138,6 +139,60 @@ bool __read_mostly vmtrace_available;
>
> bool __read_mostly vpmu_is_available;
>
> +static DEFINE_SPINLOCK(dom_state_changed_lock);
> +static unsigned long *dom_state_changed;
__read_mostly?
> +int domain_init_states(void)
> +{
> + const struct domain *d;
> + int rc = -ENOMEM;
> +
> + spin_lock(&dom_state_changed_lock);
> +
> + if ( dom_state_changed )
> + bitmap_zero(dom_state_changed, DOMID_FIRST_RESERVED);
> + else
> + {
> + dom_state_changed = xvzalloc_array(unsigned long,
> + BITS_TO_LONGS(DOMID_FIRST_RESERVED));
> + if ( !dom_state_changed )
> + goto unlock;
> + }
> +
> + rcu_read_lock(&domlist_read_lock);
> +
> + for_each_domain ( d )
> + set_bit(d->domain_id, dom_state_changed);
Use the cheaper __set_bit() here?
> +static void domain_changed_state(const struct domain *d)
> +{
> + spin_lock(&dom_state_changed_lock);
> +
> + if ( dom_state_changed )
> + set_bit(d->domain_id, dom_state_changed);
And perhaps even here, considering everything's under lock now?
> --- a/xen/common/event_channel.c
> +++ b/xen/common/event_channel.c
> @@ -485,6 +485,13 @@ int evtchn_bind_virq(evtchn_bind_virq_t *bind, evtchn_port_t port)
> if ( (v = domain_vcpu(d, vcpu)) == NULL )
> return -ENOENT;
>
> + if ( virq == VIRQ_DOM_EXC )
> + {
> + rc = domain_init_states();
> + if ( rc )
> + goto deinit;
> + }
This is tied to VIRQ_DOM_EXC. How come ...
> @@ -527,6 +534,10 @@ int evtchn_bind_virq(evtchn_bind_virq_t *bind, evtchn_port_t port)
> out:
> write_unlock(&d->event_lock);
>
> + deinit:
> + if ( rc )
> + domain_deinit_states();
> +
> return rc;
> }
... de-init happens upon any error, regardless of vIRQ? Even checking
the virq isn't sufficient, as we also need to gracefully deal with the
-EEXIST path.
Jan
On 16.12.24 11:21, Jan Beulich wrote:
> On 13.12.2024 17:24, Juergen Gross wrote:
>> @@ -138,6 +139,60 @@ bool __read_mostly vmtrace_available;
>>
>> bool __read_mostly vpmu_is_available;
>>
>> +static DEFINE_SPINLOCK(dom_state_changed_lock);
>> +static unsigned long *dom_state_changed;
>
> __read_mostly?
Yes.
>
>> +int domain_init_states(void)
>> +{
>> + const struct domain *d;
>> + int rc = -ENOMEM;
>> +
>> + spin_lock(&dom_state_changed_lock);
>> +
>> + if ( dom_state_changed )
>> + bitmap_zero(dom_state_changed, DOMID_FIRST_RESERVED);
>> + else
>> + {
>> + dom_state_changed = xvzalloc_array(unsigned long,
>> + BITS_TO_LONGS(DOMID_FIRST_RESERVED));
>> + if ( !dom_state_changed )
>> + goto unlock;
>> + }
>> +
>> + rcu_read_lock(&domlist_read_lock);
>> +
>> + for_each_domain ( d )
>> + set_bit(d->domain_id, dom_state_changed);
>
> Use the cheaper __set_bit() here?
Okay.
>
>> +static void domain_changed_state(const struct domain *d)
>> +{
>> + spin_lock(&dom_state_changed_lock);
>> +
>> + if ( dom_state_changed )
>> + set_bit(d->domain_id, dom_state_changed);
>
> And perhaps even here, considering everything's under lock now?
True.
>
>> --- a/xen/common/event_channel.c
>> +++ b/xen/common/event_channel.c
>> @@ -485,6 +485,13 @@ int evtchn_bind_virq(evtchn_bind_virq_t *bind, evtchn_port_t port)
>> if ( (v = domain_vcpu(d, vcpu)) == NULL )
>> return -ENOENT;
>>
>> + if ( virq == VIRQ_DOM_EXC )
>> + {
>> + rc = domain_init_states();
>> + if ( rc )
>> + goto deinit;
>> + }
>
> This is tied to VIRQ_DOM_EXC. How come ...
>
>> @@ -527,6 +534,10 @@ int evtchn_bind_virq(evtchn_bind_virq_t *bind, evtchn_port_t port)
>> out:
>> write_unlock(&d->event_lock);
>>
>> + deinit:
>> + if ( rc )
>> + domain_deinit_states();
>> +
>> return rc;
>> }
>
> ... de-init happens upon any error, regardless of vIRQ? Even checking
> the virq isn't sufficient, as we also need to gracefully deal with the
> -EEXIST path.
Good catch. Will change that.
Juergen
© 2016 - 2026 Red Hat, Inc.