[PATCH] KVM: use call_rcu instead of synchronize_srcu_expedited() for MMIO unregistration

lirongqing posted 1 patch 4 months ago
include/linux/kvm_host.h  |  1 +
virt/kvm/coalesced_mmio.c |  2 +-
virt/kvm/eventfd.c        |  2 +-
virt/kvm/kvm_main.c       | 13 ++++++++-----
4 files changed, 11 insertions(+), 7 deletions(-)
[PATCH] KVM: use call_rcu instead of synchronize_srcu_expedited() for MMIO unregistration
Posted by lirongqing 4 months ago
From: Li RongQing <lirongqing@baidu.com>

During VM reboot/shutdown, device MMIO unregistration maybe occurs
frequently. The current use of synchronize_srcu_expedited() introduces
measurable latency in these operations. Replace with call_rcu to defer
cleanup asynchronously, speed up VM reboot/shutdown.

Add a 'dev' field to struct kvm_io_bus to hold the device being
unregistered for the RCU callback. Adjust related code to ensure
proper list management before unregistration.

Signed-off-by: Li RongQing <lirongqing@baidu.com>
---
 include/linux/kvm_host.h  |  1 +
 virt/kvm/coalesced_mmio.c |  2 +-
 virt/kvm/eventfd.c        |  2 +-
 virt/kvm/kvm_main.c       | 13 ++++++++-----
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 19b8c4b..38498d9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -208,6 +208,7 @@ struct kvm_io_bus {
 	int dev_count;
 	int ioeventfd_count;
 	struct rcu_head rcu;
+	struct kvm_io_device *dev;
 	struct kvm_io_range range[];
 };
 
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 375d628..0db6af2 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -82,7 +82,6 @@ static void coalesced_mmio_destructor(struct kvm_io_device *this)
 {
 	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
 
-	list_del(&dev->list);
 
 	kfree(dev);
 }
@@ -169,6 +168,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
 	list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list) {
 		if (zone->pio == dev->zone.pio &&
 		    coalesced_mmio_in_range(dev, zone->addr, zone->size)) {
+			list_del(&dev->list);
 			r = kvm_io_bus_unregister_dev(kvm,
 				zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, &dev->dev);
 			/*
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 6b1133a..8a2f0e0 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -750,7 +750,6 @@ static void
 ioeventfd_release(struct _ioeventfd *p)
 {
 	eventfd_ctx_put(p->eventfd);
-	list_del(&p->list);
 	kfree(p);
 }
 
@@ -949,6 +948,7 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
 		if (!p->wildcard && p->datamatch != args->datamatch)
 			continue;
 
+		list_del(&p->list);
 		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
 		bus = kvm_get_bus(kvm, bus_idx);
 		if (bus)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f2e77eb..3ddad34 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -5955,10 +5955,12 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
 }
 EXPORT_SYMBOL_GPL(kvm_io_bus_read);
 
-static void __free_bus(struct rcu_head *rcu)
+static void __free_bus_dev(struct rcu_head *rcu)
 {
 	struct kvm_io_bus *bus = container_of(rcu, struct kvm_io_bus, rcu);
 
+	if (bus->dev)
+		kvm_iodevice_destructor(bus->dev);
 	kfree(bus);
 }
 
@@ -6000,7 +6002,8 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 	memcpy(new_bus->range + i + 1, bus->range + i,
 		(bus->dev_count - i) * sizeof(struct kvm_io_range));
 	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
-	call_srcu(&kvm->srcu, &bus->rcu, __free_bus);
+	bus->dev = NULL;
+	call_srcu(&kvm->srcu, &bus->rcu, __free_bus_dev);
 
 	return 0;
 }
@@ -6036,20 +6039,20 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 	}
 
 	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
-	synchronize_srcu_expedited(&kvm->srcu);
 
 	/*
 	 * If NULL bus is installed, destroy the old bus, including all the
 	 * attached devices. Otherwise, destroy the caller's device only.
 	 */
 	if (!new_bus) {
+		synchronize_srcu_expedited(&kvm->srcu);
 		pr_err("kvm: failed to shrink bus, removing it completely\n");
 		kvm_io_bus_destroy(bus);
 		return -ENOMEM;
 	}
 
-	kvm_iodevice_destructor(dev);
-	kfree(bus);
+	bus->dev = dev;
+	call_srcu(&kvm->srcu, &bus->rcu, __free_bus_dev);
 	return 0;
 }
 
-- 
2.9.4
RE: [PATCH] KVM: use call_rcu instead of synchronize_srcu_expedited() for MMIO unregistration
Posted by Li,Rongqing 2 months, 2 weeks ago
> From: Li RongQing <lirongqing@baidu.com>
> 
> During VM reboot/shutdown, device MMIO unregistration maybe occurs
> frequently. The current use of synchronize_srcu_expedited() introduces
> measurable latency in these operations. Replace with call_rcu to defer
> cleanup asynchronously, speed up VM reboot/shutdown.
> 
> Add a 'dev' field to struct kvm_io_bus to hold the device being unregistered
> for the RCU callback. Adjust related code to ensure proper list management
> before unregistration.


Ping

Thanks

-Li


> 
> Signed-off-by: Li RongQing <lirongqing@baidu.com>
> ---
>  include/linux/kvm_host.h  |  1 +
>  virt/kvm/coalesced_mmio.c |  2 +-
>  virt/kvm/eventfd.c        |  2 +-
>  virt/kvm/kvm_main.c       | 13 ++++++++-----
>  4 files changed, 11 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index
> 19b8c4b..38498d9 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -208,6 +208,7 @@ struct kvm_io_bus {
>  	int dev_count;
>  	int ioeventfd_count;
>  	struct rcu_head rcu;
> +	struct kvm_io_device *dev;
>  	struct kvm_io_range range[];
>  };
> 
> diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index
> 375d628..0db6af2 100644
> --- a/virt/kvm/coalesced_mmio.c
> +++ b/virt/kvm/coalesced_mmio.c
> @@ -82,7 +82,6 @@ static void coalesced_mmio_destructor(struct
> kvm_io_device *this)  {
>  	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
> 
> -	list_del(&dev->list);
> 
>  	kfree(dev);
>  }
> @@ -169,6 +168,7 @@ int
> kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
>  	list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list) {
>  		if (zone->pio == dev->zone.pio &&
>  		    coalesced_mmio_in_range(dev, zone->addr, zone->size)) {
> +			list_del(&dev->list);
>  			r = kvm_io_bus_unregister_dev(kvm,
>  				zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS,
> &dev->dev);
>  			/*
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 6b1133a..8a2f0e0
> 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -750,7 +750,6 @@ static void
>  ioeventfd_release(struct _ioeventfd *p)  {
>  	eventfd_ctx_put(p->eventfd);
> -	list_del(&p->list);
>  	kfree(p);
>  }
> 
> @@ -949,6 +948,7 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm,
> enum kvm_bus bus_idx,
>  		if (!p->wildcard && p->datamatch != args->datamatch)
>  			continue;
> 
> +		list_del(&p->list);
>  		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
>  		bus = kvm_get_bus(kvm, bus_idx);
>  		if (bus)
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index
> f2e77eb..3ddad34 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -5955,10 +5955,12 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu,
> enum kvm_bus bus_idx, gpa_t addr,  }
> EXPORT_SYMBOL_GPL(kvm_io_bus_read);
> 
> -static void __free_bus(struct rcu_head *rcu)
> +static void __free_bus_dev(struct rcu_head *rcu)
>  {
>  	struct kvm_io_bus *bus = container_of(rcu, struct kvm_io_bus, rcu);
> 
> +	if (bus->dev)
> +		kvm_iodevice_destructor(bus->dev);
>  	kfree(bus);
>  }
> 
> @@ -6000,7 +6002,8 @@ int kvm_io_bus_register_dev(struct kvm *kvm,
> enum kvm_bus bus_idx, gpa_t addr,
>  	memcpy(new_bus->range + i + 1, bus->range + i,
>  		(bus->dev_count - i) * sizeof(struct kvm_io_range));
>  	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
> -	call_srcu(&kvm->srcu, &bus->rcu, __free_bus);
> +	bus->dev = NULL;
> +	call_srcu(&kvm->srcu, &bus->rcu, __free_bus_dev);
> 
>  	return 0;
>  }
> @@ -6036,20 +6039,20 @@ int kvm_io_bus_unregister_dev(struct kvm
> *kvm, enum kvm_bus bus_idx,
>  	}
> 
>  	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
> -	synchronize_srcu_expedited(&kvm->srcu);
> 
>  	/*
>  	 * If NULL bus is installed, destroy the old bus, including all the
>  	 * attached devices. Otherwise, destroy the caller's device only.
>  	 */
>  	if (!new_bus) {
> +		synchronize_srcu_expedited(&kvm->srcu);
>  		pr_err("kvm: failed to shrink bus, removing it completely\n");
>  		kvm_io_bus_destroy(bus);
>  		return -ENOMEM;
>  	}
> 
> -	kvm_iodevice_destructor(dev);
> -	kfree(bus);
> +	bus->dev = dev;
> +	call_srcu(&kvm->srcu, &bus->rcu, __free_bus_dev);
>  	return 0;
>  }
> 
> --
> 2.9.4