Move sev_lock/unlock_vcpus_for_migration to kvm_main and call the
new functions the kvm_lock_all_vcpus/kvm_unlock_all_vcpus
and kvm_lock_all_vcpus_nested.
This code allows to lock all vCPUs without triggering lockdep warning
about reaching MAX_LOCK_DEPTH depth by coercing the lockdep into
thinking that we release all the locks other than vcpu'0 lock
immediately after we take them.
No functional change intended.
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/kvm/svm/sev.c | 65 +++---------------------------------
include/linux/kvm_host.h | 6 ++++
virt/kvm/kvm_main.c | 71 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 81 insertions(+), 61 deletions(-)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 0bc708ee2788..7adc54b1f741 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1889,63 +1889,6 @@ enum sev_migration_role {
SEV_NR_MIGRATION_ROLES,
};
-static int sev_lock_vcpus_for_migration(struct kvm *kvm,
- enum sev_migration_role role)
-{
- struct kvm_vcpu *vcpu;
- unsigned long i, j;
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (mutex_lock_killable_nested(&vcpu->mutex, role))
- goto out_unlock;
-
-#ifdef CONFIG_PROVE_LOCKING
- if (!i)
- /*
- * Reset the role to one that avoids colliding with
- * the role used for the first vcpu mutex.
- */
- role = SEV_NR_MIGRATION_ROLES;
- else
- mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
-#endif
- }
-
- return 0;
-
-out_unlock:
-
- kvm_for_each_vcpu(j, vcpu, kvm) {
- if (i == j)
- break;
-
-#ifdef CONFIG_PROVE_LOCKING
- if (j)
- mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_);
-#endif
-
- mutex_unlock(&vcpu->mutex);
- }
- return -EINTR;
-}
-
-static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
-{
- struct kvm_vcpu *vcpu;
- unsigned long i;
- bool first = true;
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (first)
- first = false;
- else
- mutex_acquire(&vcpu->mutex.dep_map,
- SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_);
-
- mutex_unlock(&vcpu->mutex);
- }
-}
-
static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
{
struct kvm_sev_info *dst = to_kvm_sev_info(dst_kvm);
@@ -2083,10 +2026,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
charged = true;
}
- ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE);
+ ret = kvm_lock_all_vcpus_nested(kvm, false, SEV_MIGRATION_SOURCE);
if (ret)
goto out_dst_cgroup;
- ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET);
+ ret = kvm_lock_all_vcpus_nested(source_kvm, false, SEV_MIGRATION_TARGET);
if (ret)
goto out_dst_vcpu;
@@ -2100,9 +2043,9 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
ret = 0;
out_source_vcpu:
- sev_unlock_vcpus_for_migration(source_kvm);
+ kvm_unlock_all_vcpus(source_kvm);
out_dst_vcpu:
- sev_unlock_vcpus_for_migration(kvm);
+ kvm_unlock_all_vcpus(kvm);
out_dst_cgroup:
/* Operates on the source on success, on the destination on failure. */
if (charged)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1dedc421b3e3..30cf28bf5c80 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1015,6 +1015,12 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
void kvm_destroy_vcpus(struct kvm *kvm);
+int kvm_lock_all_vcpus_nested(struct kvm *kvm, bool trylock, unsigned int role);
+void kvm_unlock_all_vcpus(struct kvm *kvm);
+
+#define kvm_lock_all_vcpus(kvm, trylock) \
+ kvm_lock_all_vcpus_nested(kvm, trylock, 0)
+
void vcpu_load(struct kvm_vcpu *vcpu);
void vcpu_put(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 69782df3617f..71c0d8c35b4b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1368,6 +1368,77 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
return 0;
}
+
+/*
+ * Lock all VM vCPUs.
+ * Can be used nested (to lock vCPUS of two VMs for example)
+ */
+int kvm_lock_all_vcpus_nested(struct kvm *kvm, bool trylock, unsigned int role)
+{
+ struct kvm_vcpu *vcpu;
+ unsigned long i, j;
+
+ lockdep_assert_held(&kvm->lock);
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+
+ if (trylock && !mutex_trylock_nested(&vcpu->mutex, role))
+ goto out_unlock;
+ else if (!trylock && mutex_lock_killable_nested(&vcpu->mutex, role))
+ goto out_unlock;
+
+#ifdef CONFIG_PROVE_LOCKING
+ if (!i)
+ /*
+ * Reset the role to one that avoids colliding with
+ * the role used for the first vcpu mutex.
+ */
+ role = MAX_LOCK_DEPTH - 1;
+ else
+ mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
+#endif
+ }
+
+ return 0;
+
+out_unlock:
+
+ kvm_for_each_vcpu(j, vcpu, kvm) {
+ if (i == j)
+ break;
+
+#ifdef CONFIG_PROVE_LOCKING
+ if (j)
+ mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_);
+#endif
+
+ mutex_unlock(&vcpu->mutex);
+ }
+ return -EINTR;
+}
+EXPORT_SYMBOL_GPL(kvm_lock_all_vcpus_nested);
+
+void kvm_unlock_all_vcpus(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ unsigned long i;
+ bool first = true;
+
+ lockdep_assert_held(&kvm->lock);
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (first)
+ first = false;
+ else
+ mutex_acquire(&vcpu->mutex.dep_map,
+ MAX_LOCK_DEPTH - 1, 0, _THIS_IP_);
+
+ mutex_unlock(&vcpu->mutex);
+ }
+}
+EXPORT_SYMBOL_GPL(kvm_unlock_all_vcpus);
+
+
/*
* Allocation size is twice as large as the actual dirty bitmap size.
* See kvm_vm_ioctl_get_dirty_log() why this is needed.
--
2.26.3
On Tue, Apr 08, 2025 at 09:41:34PM -0400, Maxim Levitsky wrote:
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 69782df3617f..71c0d8c35b4b 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1368,6 +1368,77 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
> return 0;
> }
>
> +
> +/*
> + * Lock all VM vCPUs.
> + * Can be used nested (to lock vCPUS of two VMs for example)
> + */
> +int kvm_lock_all_vcpus_nested(struct kvm *kvm, bool trylock, unsigned int role)
> +{
> + struct kvm_vcpu *vcpu;
> + unsigned long i, j;
> +
> + lockdep_assert_held(&kvm->lock);
> +
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> +
> + if (trylock && !mutex_trylock_nested(&vcpu->mutex, role))
> + goto out_unlock;
> + else if (!trylock && mutex_lock_killable_nested(&vcpu->mutex, role))
> + goto out_unlock;
> +
> +#ifdef CONFIG_PROVE_LOCKING
> + if (!i)
> + /*
> + * Reset the role to one that avoids colliding with
> + * the role used for the first vcpu mutex.
> + */
> + role = MAX_LOCK_DEPTH - 1;
> + else
> + mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
> +#endif
> + }
This code is all sorts of terrible.
Per the lockdep_assert_held() above, you serialize all these locks by
holding that lock, this means you can be using the _nest_lock()
annotation.
Also, the original code didn't have this trylock nonsense, and the
Changelog doesn't mention this -- in fact the Changelog claims no
change, which is patently false.
Anyway, please write like:
kvm_for_each_vcpu(i, vcpu, kvm) {
if (mutex_lock_killable_nest_lock(&vcpu->mutex, &kvm->lock))
goto unlock;
}
return 0;
unlock:
kvm_for_each_vcpu(j, vcpu, kvm) {
if (j == i)
break;
mutex_unlock(&vcpu->mutex);
}
return -EINTR;
And yes, you'll have to add mutex_lock_killable_nest_lock(), but that
should be trivial.
On 4/10/25 10:16, Peter Zijlstra wrote:
> On Tue, Apr 08, 2025 at 09:41:34PM -0400, Maxim Levitsky wrote:
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 69782df3617f..71c0d8c35b4b 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -1368,6 +1368,77 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
>> return 0;
>> }
>>
>> +
>> +/*
>> + * Lock all VM vCPUs.
>> + * Can be used nested (to lock vCPUS of two VMs for example)
>> + */
>> +int kvm_lock_all_vcpus_nested(struct kvm *kvm, bool trylock, unsigned int role)
>> +{
>> + struct kvm_vcpu *vcpu;
>> + unsigned long i, j;
>> +
>> + lockdep_assert_held(&kvm->lock);
>> +
>> + kvm_for_each_vcpu(i, vcpu, kvm) {
>> +
>> + if (trylock && !mutex_trylock_nested(&vcpu->mutex, role))
>> + goto out_unlock;
>> + else if (!trylock && mutex_lock_killable_nested(&vcpu->mutex, role))
>> + goto out_unlock;
>> +
>> +#ifdef CONFIG_PROVE_LOCKING
>> + if (!i)
>> + /*
>> + * Reset the role to one that avoids colliding with
>> + * the role used for the first vcpu mutex.
>> + */
>> + role = MAX_LOCK_DEPTH - 1;
>> + else
>> + mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
>> +#endif
>> + }
>
> This code is all sorts of terrible.
>
> Per the lockdep_assert_held() above, you serialize all these locks by
> holding that lock, this means you can be using the _nest_lock()
> annotation.
>
> Also, the original code didn't have this trylock nonsense, and the
> Changelog doesn't mention this -- in fact the Changelog claims no
> change, which is patently false.
>
> Anyway, please write like:
>
> kvm_for_each_vcpu(i, vcpu, kvm) {
> if (mutex_lock_killable_nest_lock(&vcpu->mutex, &kvm->lock))
> goto unlock;
> }
>
> return 0;
>
> unlock:
>
> kvm_for_each_vcpu(j, vcpu, kvm) {
> if (j == i)
> break;
>
> mutex_unlock(&vcpu->mutex);
> }
> return -EINTR;
>
> And yes, you'll have to add mutex_lock_killable_nest_lock(), but that
> should be trivial.
If I understand correctly, that would be actually
_mutex_lock_killable_nest_lock() plus a wrapper macro. But yes,
that is easy so it sounds good.
For the ARM case, which is the actual buggy one (it was complaining
about too high a depth) it still needs mutex_trylock_nest_lock();
the nest_lock is needed to avoid bumping the depth on every
mutex_trylock().
It should be something like
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 2143d05116be..328f573cab6d 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -174,6 +174,12 @@ do { \
_mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \
} while (0)
+#define mutex_trylock_nest_lock(lock, nest_lock) \
+do { \
+ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
+ _mutex_trylock_nest_lock(lock, &(nest_lock)->dep_map); \
+} while (0)
+
#else
extern void mutex_lock(struct mutex *lock);
extern int __must_check mutex_lock_interruptible(struct mutex *lock);
@@ -185,6 +191,7 @@ extern void mutex_lock_io(struct mutex *lock);
# define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
# define mutex_lock_io_nested(lock, subclass) mutex_lock_io(lock)
+# define mutex_trylock_nest_lock(lock, nest_lock) mutex_trylock(lock)
#endif
/*
@@ -193,9 +200,14 @@ extern void mutex_lock_io(struct mutex *lock);
*
* Returns 1 if the mutex has been acquired successfully, and 0 on contention.
*/
-extern int mutex_trylock(struct mutex *lock);
+extern int _mutex_trylock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
extern void mutex_unlock(struct mutex *lock);
+static inline int mutex_trylock(struct mutex *lock)
+{
+ return _mutex_trylock_nest_lock(lock, NULL);
+}
+
extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T))
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 555e2b3a665a..d5d1e79495fc 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -1063,8 +1063,10 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
#endif
/**
- * mutex_trylock - try to acquire the mutex, without waiting
+ * _mutex_trylock_nest_lock - try to acquire the mutex, without waiting
* @lock: the mutex to be acquired
+ * @nest_lock: if not NULL, a mutex that is always taken whenever multiple
+ * instances of @lock are
*
* Try to acquire the mutex atomically. Returns 1 if the mutex
* has been acquired successfully, and 0 on contention.
@@ -1076,7 +1078,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
* This function must not be used in interrupt context. The
* mutex must be released by the same task that acquired it.
*/
-int __sched mutex_trylock(struct mutex *lock)
+int __sched _mutex_trylock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock)
{
bool locked;
@@ -1084,11 +1086,11 @@ int __sched mutex_trylock(struct mutex *lock)
locked = __mutex_trylock(lock);
if (locked)
- mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+ mutex_acquire_nest(&lock->dep_map, 0, 1, nest_lock, _RET_IP_);
return locked;
}
-EXPORT_SYMBOL(mutex_trylock);
+EXPORT_SYMBOL(_mutex_trylock_nest_lock);
#ifndef CONFIG_DEBUG_LOCK_ALLOC
int __sched
Does that seem sane?
Paolo
On Wed, Apr 16, 2025 at 07:48:00PM +0200, Paolo Bonzini wrote:
> On 4/10/25 10:16, Peter Zijlstra wrote:
> > On Tue, Apr 08, 2025 at 09:41:34PM -0400, Maxim Levitsky wrote:
> > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > > index 69782df3617f..71c0d8c35b4b 100644
> > > --- a/virt/kvm/kvm_main.c
> > > +++ b/virt/kvm/kvm_main.c
> > > @@ -1368,6 +1368,77 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
> > > return 0;
> > > }
> > > +
> > > +/*
> > > + * Lock all VM vCPUs.
> > > + * Can be used nested (to lock vCPUS of two VMs for example)
> > > + */
> > > +int kvm_lock_all_vcpus_nested(struct kvm *kvm, bool trylock, unsigned int role)
> > > +{
> > > + struct kvm_vcpu *vcpu;
> > > + unsigned long i, j;
> > > +
> > > + lockdep_assert_held(&kvm->lock);
> > > +
> > > + kvm_for_each_vcpu(i, vcpu, kvm) {
> > > +
> > > + if (trylock && !mutex_trylock_nested(&vcpu->mutex, role))
> > > + goto out_unlock;
> > > + else if (!trylock && mutex_lock_killable_nested(&vcpu->mutex, role))
> > > + goto out_unlock;
> > > +
> > > +#ifdef CONFIG_PROVE_LOCKING
> > > + if (!i)
> > > + /*
> > > + * Reset the role to one that avoids colliding with
> > > + * the role used for the first vcpu mutex.
> > > + */
> > > + role = MAX_LOCK_DEPTH - 1;
> > > + else
> > > + mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
> > > +#endif
> > > + }
> >
> > This code is all sorts of terrible.
> >
> > Per the lockdep_assert_held() above, you serialize all these locks by
> > holding that lock, this means you can be using the _nest_lock()
> > annotation.
> >
> > Also, the original code didn't have this trylock nonsense, and the
> > Changelog doesn't mention this -- in fact the Changelog claims no
> > change, which is patently false.
> >
> > Anyway, please write like:
> >
> > kvm_for_each_vcpu(i, vcpu, kvm) {
> > if (mutex_lock_killable_nest_lock(&vcpu->mutex, &kvm->lock))
> > goto unlock;
> > }
> >
> > return 0;
> >
> > unlock:
> >
> > kvm_for_each_vcpu(j, vcpu, kvm) {
> > if (j == i)
> > break;
> >
> > mutex_unlock(&vcpu->mutex);
> > }
> > return -EINTR;
> >
> > And yes, you'll have to add mutex_lock_killable_nest_lock(), but that
> > should be trivial.
>
> If I understand correctly, that would be actually
> _mutex_lock_killable_nest_lock() plus a wrapper macro. But yes,
> that is easy so it sounds good.
>
> For the ARM case, which is the actual buggy one (it was complaining
> about too high a depth) it still needs mutex_trylock_nest_lock();
> the nest_lock is needed to avoid bumping the depth on every
> mutex_trylock().
Got a link to the ARM code in question ? And I'm assuming you're talking
about task_struct::lockdep_depth ? The nest lock annotation does not
in fact increment depth beyond one of each type. It does a refcount like
thing.
On Wed, Apr 16, 2025 at 8:50 PM Peter Zijlstra <peterz@infradead.org> wrote:
> > For the ARM case, which is the actual buggy one (it was complaining
> > about too high a depth) it still needs mutex_trylock_nest_lock();
> > the nest_lock is needed to avoid bumping the depth on every
> > mutex_trylock().
>
> Got a link to the ARM code in question ?
lock_all_vcpus() in arch/arm64/kvm/arm.c:
lockdep_assert_held(&kvm->lock);
kvm_for_each_vcpu(c, tmp_vcpu, kvm) {
if (!mutex_trylock(&tmp_vcpu->mutex)) {
unlock_vcpus(kvm, c - 1);
return false;
}
}
> And I'm assuming you're talking about task_struct::lockdep_depth ?
> The nest lock annotation does not in fact increment depth beyond
> one of each type. It does a refcount like thing.
Yes, exactly - mutex_trylock_nest_lock() is needed so that the
code above counts per-lock instead of using the per-task depth.
Paolo
On Tue, Apr 08, 2025 at 09:41:34PM -0400, Maxim Levitsky wrote:
> Move sev_lock/unlock_vcpus_for_migration to kvm_main and call the
> new functions the kvm_lock_all_vcpus/kvm_unlock_all_vcpus
> and kvm_lock_all_vcpus_nested.
>
> This code allows to lock all vCPUs without triggering lockdep warning
> about reaching MAX_LOCK_DEPTH depth by coercing the lockdep into
> thinking that we release all the locks other than vcpu'0 lock
> immediately after we take them.
>
> No functional change intended.
>
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
> arch/x86/kvm/svm/sev.c | 65 +++---------------------------------
> include/linux/kvm_host.h | 6 ++++
> virt/kvm/kvm_main.c | 71 ++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 81 insertions(+), 61 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 0bc708ee2788..7adc54b1f741 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -1889,63 +1889,6 @@ enum sev_migration_role {
> SEV_NR_MIGRATION_ROLES,
> };
>
> -static int sev_lock_vcpus_for_migration(struct kvm *kvm,
> - enum sev_migration_role role)
> -{
> - struct kvm_vcpu *vcpu;
> - unsigned long i, j;
> -
> - kvm_for_each_vcpu(i, vcpu, kvm) {
> - if (mutex_lock_killable_nested(&vcpu->mutex, role))
> - goto out_unlock;
> -
> -#ifdef CONFIG_PROVE_LOCKING
> - if (!i)
> - /*
> - * Reset the role to one that avoids colliding with
> - * the role used for the first vcpu mutex.
> - */
> - role = SEV_NR_MIGRATION_ROLES;
> - else
> - mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
> -#endif
> - }
> -
> - return 0;
> -
> -out_unlock:
> -
> - kvm_for_each_vcpu(j, vcpu, kvm) {
> - if (i == j)
> - break;
> -
> -#ifdef CONFIG_PROVE_LOCKING
> - if (j)
> - mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_);
> -#endif
> -
> - mutex_unlock(&vcpu->mutex);
> - }
> - return -EINTR;
> -}
> -
> -static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
> -{
> - struct kvm_vcpu *vcpu;
> - unsigned long i;
> - bool first = true;
> -
> - kvm_for_each_vcpu(i, vcpu, kvm) {
> - if (first)
> - first = false;
> - else
> - mutex_acquire(&vcpu->mutex.dep_map,
> - SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_);
> -
> - mutex_unlock(&vcpu->mutex);
> - }
> -}
> -
> static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
> {
> struct kvm_sev_info *dst = to_kvm_sev_info(dst_kvm);
> @@ -2083,10 +2026,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
> charged = true;
> }
>
> - ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE);
> + ret = kvm_lock_all_vcpus_nested(kvm, false, SEV_MIGRATION_SOURCE);
> if (ret)
> goto out_dst_cgroup;
> - ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET);
> + ret = kvm_lock_all_vcpus_nested(source_kvm, false, SEV_MIGRATION_TARGET);
> if (ret)
> goto out_dst_vcpu;
>
> @@ -2100,9 +2043,9 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
> ret = 0;
>
> out_source_vcpu:
> - sev_unlock_vcpus_for_migration(source_kvm);
> + kvm_unlock_all_vcpus(source_kvm);
> out_dst_vcpu:
> - sev_unlock_vcpus_for_migration(kvm);
> + kvm_unlock_all_vcpus(kvm);
> out_dst_cgroup:
> /* Operates on the source on success, on the destination on failure. */
> if (charged)
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 1dedc421b3e3..30cf28bf5c80 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1015,6 +1015,12 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
>
> void kvm_destroy_vcpus(struct kvm *kvm);
>
> +int kvm_lock_all_vcpus_nested(struct kvm *kvm, bool trylock, unsigned int role);
> +void kvm_unlock_all_vcpus(struct kvm *kvm);
> +
> +#define kvm_lock_all_vcpus(kvm, trylock) \
> + kvm_lock_all_vcpus_nested(kvm, trylock, 0)
> +
Can you instead add lock / trylock variants of this?
kvm_trylock_all_vcpus(kvm) seems a bit more obvious in the calling code.
Thanks,
Oliver
On 4/8/25 9:41 PM, Maxim Levitsky wrote:
> Move sev_lock/unlock_vcpus_for_migration to kvm_main and call the
> new functions the kvm_lock_all_vcpus/kvm_unlock_all_vcpus
> and kvm_lock_all_vcpus_nested.
>
> This code allows to lock all vCPUs without triggering lockdep warning
> about reaching MAX_LOCK_DEPTH depth by coercing the lockdep into
> thinking that we release all the locks other than vcpu'0 lock
> immediately after we take them.
>
> No functional change intended.
>
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
> arch/x86/kvm/svm/sev.c | 65 +++---------------------------------
> include/linux/kvm_host.h | 6 ++++
> virt/kvm/kvm_main.c | 71 ++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 81 insertions(+), 61 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 0bc708ee2788..7adc54b1f741 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -1889,63 +1889,6 @@ enum sev_migration_role {
> SEV_NR_MIGRATION_ROLES,
> };
>
> -static int sev_lock_vcpus_for_migration(struct kvm *kvm,
> - enum sev_migration_role role)
> -{
> - struct kvm_vcpu *vcpu;
> - unsigned long i, j;
> -
> - kvm_for_each_vcpu(i, vcpu, kvm) {
> - if (mutex_lock_killable_nested(&vcpu->mutex, role))
> - goto out_unlock;
> -
> -#ifdef CONFIG_PROVE_LOCKING
> - if (!i)
> - /*
> - * Reset the role to one that avoids colliding with
> - * the role used for the first vcpu mutex.
> - */
> - role = SEV_NR_MIGRATION_ROLES;
> - else
> - mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
> -#endif
> - }
> -
> - return 0;
> -
> -out_unlock:
> -
> - kvm_for_each_vcpu(j, vcpu, kvm) {
> - if (i == j)
> - break;
> -
> -#ifdef CONFIG_PROVE_LOCKING
> - if (j)
> - mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_);
> -#endif
> -
> - mutex_unlock(&vcpu->mutex);
> - }
> - return -EINTR;
> -}
> -
> -static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
> -{
> - struct kvm_vcpu *vcpu;
> - unsigned long i;
> - bool first = true;
> -
> - kvm_for_each_vcpu(i, vcpu, kvm) {
> - if (first)
> - first = false;
> - else
> - mutex_acquire(&vcpu->mutex.dep_map,
> - SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_);
> -
> - mutex_unlock(&vcpu->mutex);
> - }
> -}
> -
> static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
> {
> struct kvm_sev_info *dst = to_kvm_sev_info(dst_kvm);
> @@ -2083,10 +2026,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
> charged = true;
> }
>
> - ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE);
> + ret = kvm_lock_all_vcpus_nested(kvm, false, SEV_MIGRATION_SOURCE);
> if (ret)
> goto out_dst_cgroup;
> - ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET);
> + ret = kvm_lock_all_vcpus_nested(source_kvm, false, SEV_MIGRATION_TARGET);
> if (ret)
> goto out_dst_vcpu;
>
> @@ -2100,9 +2043,9 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
> ret = 0;
>
> out_source_vcpu:
> - sev_unlock_vcpus_for_migration(source_kvm);
> + kvm_unlock_all_vcpus(source_kvm);
> out_dst_vcpu:
> - sev_unlock_vcpus_for_migration(kvm);
> + kvm_unlock_all_vcpus(kvm);
> out_dst_cgroup:
> /* Operates on the source on success, on the destination on failure. */
> if (charged)
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 1dedc421b3e3..30cf28bf5c80 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1015,6 +1015,12 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
>
> void kvm_destroy_vcpus(struct kvm *kvm);
>
> +int kvm_lock_all_vcpus_nested(struct kvm *kvm, bool trylock, unsigned int role);
> +void kvm_unlock_all_vcpus(struct kvm *kvm);
> +
> +#define kvm_lock_all_vcpus(kvm, trylock) \
> + kvm_lock_all_vcpus_nested(kvm, trylock, 0)
> +
> void vcpu_load(struct kvm_vcpu *vcpu);
> void vcpu_put(struct kvm_vcpu *vcpu);
>
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 69782df3617f..71c0d8c35b4b 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1368,6 +1368,77 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
> return 0;
> }
>
> +
> +/*
> + * Lock all VM vCPUs.
> + * Can be used nested (to lock vCPUS of two VMs for example)
> + */
> +int kvm_lock_all_vcpus_nested(struct kvm *kvm, bool trylock, unsigned int role)
> +{
> + struct kvm_vcpu *vcpu;
> + unsigned long i, j;
> +
> + lockdep_assert_held(&kvm->lock);
> +
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> +
> + if (trylock && !mutex_trylock_nested(&vcpu->mutex, role))
> + goto out_unlock;
> + else if (!trylock && mutex_lock_killable_nested(&vcpu->mutex, role))
> + goto out_unlock;
> +
> +#ifdef CONFIG_PROVE_LOCKING
> + if (!i)
> + /*
> + * Reset the role to one that avoids colliding with
> + * the role used for the first vcpu mutex.
> + */
> + role = MAX_LOCK_DEPTH - 1;
> + else
> + mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
> +#endif
Lockdep supports up to 8 subclasses, but MAX_LOCK_DEPTH is 48. I believe
it is OK to add a mutex_trylock_nested(), but can you just use 0 and 1
for the subclasses?
Cheers,
Longman
© 2016 - 2026 Red Hat, Inc.