This patch pre-heat vhost iotlb cache when passthrough mode enabled.
Sometimes, even if user specified iommu_platform for vhost devices,
IOMMU might still be disabled. One case is passthrough mode in VT-d
implementation. We can detect this by observing iommu_list. If it's
empty, it means IOMMU translation is disabled, then we can actually
pre-heat the translation (it'll be static mapping then) by first
invalidating all IOTLB, then cache existing memory ranges into vhost
backend iotlb using 1:1 mapping.
Signed-off-by: Peter Xu <peterx@redhat.com>
---
hw/virtio/trace-events | 4 ++++
hw/virtio/vhost.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 53 insertions(+)
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 1f7a7c1..54dcbb3 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -24,3 +24,7 @@ virtio_balloon_handle_output(const char *name, uint64_t gpa) "section name: %s g
virtio_balloon_get_config(uint32_t num_pages, uint32_t actual) "num_pages: %d actual: %d"
virtio_balloon_set_config(uint32_t actual, uint32_t oldactual) "actual: %d oldactual: %d"
virtio_balloon_to_target(uint64_t target, uint32_t num_pages) "balloon target: %"PRIx64" num_pages: %d"
+
+# hw/virtio/vhost.c
+vhost_iommu_commit(void) ""
+vhost_iommu_static_preheat(void) ""
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 0001e60..1c92e62 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -27,6 +27,7 @@
#include "hw/virtio/virtio-access.h"
#include "migration/migration.h"
#include "sysemu/dma.h"
+#include "trace.h"
/* enabled until disconnected backend stabilizes */
#define _VHOST_DEBUG 1
@@ -730,6 +731,11 @@ static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
}
}
+static bool vhost_iommu_mr_enabled(struct vhost_dev *dev)
+{
+ return !QLIST_EMPTY(&dev->iommu_list);
+}
+
static void vhost_iommu_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -782,6 +788,48 @@ static void vhost_iommu_region_del(MemoryListener *listener,
}
}
+static void vhost_iommu_commit(MemoryListener *listener)
+{
+ struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+ iommu_listener);
+ struct vhost_memory_region *r;
+ int i;
+
+ trace_vhost_iommu_commit();
+
+ if (!vhost_iommu_mr_enabled(dev)) {
+ /*
+ * This means iommu_platform is enabled, however iommu memory
+ * region is disabled, e.g., when device passthrough is setup.
+ * Then, no translation is needed any more.
+ *
+ * Let's first invalidate the whole IOTLB, then pre-heat the
+ * static mapping by looping over vhost memory ranges.
+ */
+
+ if (dev->vhost_ops->vhost_invalidate_device_iotlb(dev, 0,
+ UINT64_MAX-1)) {
+ error_report("%s: flush existing IOTLB failed", __func__);
+ return;
+ }
+
+ for (i = 0; i < dev->mem->nregions; i++) {
+ r = &dev->mem->regions[i];
+ /* Vhost regions are writable RAM, so IOMMU_RW suites. */
+ if (dev->vhost_ops->vhost_update_device_iotlb(dev,
+ r->guest_phys_addr,
+ r->userspace_addr,
+ r->memory_size,
+ IOMMU_RW)) {
+ error_report("%s: pre-heat static mapping failed", __func__);
+ return;
+ }
+ }
+
+ trace_vhost_iommu_static_preheat();
+ }
+}
+
static void vhost_region_nop(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -1298,6 +1346,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
hdev->iommu_listener = (MemoryListener) {
.region_add = vhost_iommu_region_add,
.region_del = vhost_iommu_region_del,
+ .commit = vhost_iommu_commit,
};
if (hdev->migration_blocker == NULL) {
--
2.7.4
On 2017年05月10日 16:01, Peter Xu wrote:
> This patch pre-heat vhost iotlb cache when passthrough mode enabled.
>
> Sometimes, even if user specified iommu_platform for vhost devices,
> IOMMU might still be disabled. One case is passthrough mode in VT-d
> implementation. We can detect this by observing iommu_list. If it's
> empty, it means IOMMU translation is disabled, then we can actually
> pre-heat the translation (it'll be static mapping then) by first
> invalidating all IOTLB, then cache existing memory ranges into vhost
> backend iotlb using 1:1 mapping.
>
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
> hw/virtio/trace-events | 4 ++++
> hw/virtio/vhost.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 53 insertions(+)
>
> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> index 1f7a7c1..54dcbb3 100644
> --- a/hw/virtio/trace-events
> +++ b/hw/virtio/trace-events
> @@ -24,3 +24,7 @@ virtio_balloon_handle_output(const char *name, uint64_t gpa) "section name: %s g
> virtio_balloon_get_config(uint32_t num_pages, uint32_t actual) "num_pages: %d actual: %d"
> virtio_balloon_set_config(uint32_t actual, uint32_t oldactual) "actual: %d oldactual: %d"
> virtio_balloon_to_target(uint64_t target, uint32_t num_pages) "balloon target: %"PRIx64" num_pages: %d"
> +
> +# hw/virtio/vhost.c
> +vhost_iommu_commit(void) ""
> +vhost_iommu_static_preheat(void) ""
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index 0001e60..1c92e62 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -27,6 +27,7 @@
> #include "hw/virtio/virtio-access.h"
> #include "migration/migration.h"
> #include "sysemu/dma.h"
> +#include "trace.h"
>
> /* enabled until disconnected backend stabilizes */
> #define _VHOST_DEBUG 1
> @@ -730,6 +731,11 @@ static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> }
> }
>
> +static bool vhost_iommu_mr_enabled(struct vhost_dev *dev)
> +{
> + return !QLIST_EMPTY(&dev->iommu_list);
> +}
> +
> static void vhost_iommu_region_add(MemoryListener *listener,
> MemoryRegionSection *section)
> {
> @@ -782,6 +788,48 @@ static void vhost_iommu_region_del(MemoryListener *listener,
> }
> }
>
> +static void vhost_iommu_commit(MemoryListener *listener)
> +{
> + struct vhost_dev *dev = container_of(listener, struct vhost_dev,
> + iommu_listener);
> + struct vhost_memory_region *r;
> + int i;
> +
> + trace_vhost_iommu_commit();
> +
> + if (!vhost_iommu_mr_enabled(dev)) {
> + /*
> + * This means iommu_platform is enabled, however iommu memory
> + * region is disabled, e.g., when device passthrough is setup.
> + * Then, no translation is needed any more.
> + *
> + * Let's first invalidate the whole IOTLB, then pre-heat the
> + * static mapping by looping over vhost memory ranges.
> + */
> +
> + if (dev->vhost_ops->vhost_invalidate_device_iotlb(dev, 0,
> + UINT64_MAX-1)) {
> + error_report("%s: flush existing IOTLB failed", __func__);
> + return;
> + }
> +
> + for (i = 0; i < dev->mem->nregions; i++) {
> + r = &dev->mem->regions[i];
> + /* Vhost regions are writable RAM, so IOMMU_RW suites. */
> + if (dev->vhost_ops->vhost_update_device_iotlb(dev,
> + r->guest_phys_addr,
> + r->userspace_addr,
> + r->memory_size,
> + IOMMU_RW)) {
> + error_report("%s: pre-heat static mapping failed", __func__);
> + return;
> + }
> + }
> +
> + trace_vhost_iommu_static_preheat();
> + }
> +}
Looks like vfio does the map in region_add(), if we can have different
types of memory regions (e.g some were under an IOMMU but others were
not), do we need to switch to do this in vhost_iommu_region_add() ?
Thanks
> +
> static void vhost_region_nop(MemoryListener *listener,
> MemoryRegionSection *section)
> {
> @@ -1298,6 +1346,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
> hdev->iommu_listener = (MemoryListener) {
> .region_add = vhost_iommu_region_add,
> .region_del = vhost_iommu_region_del,
> + .commit = vhost_iommu_commit,
> };
>
> if (hdev->migration_blocker == NULL) {
On Thu, May 11, 2017 at 04:35:21PM +0800, Jason Wang wrote:
>
>
> On 2017年05月10日 16:01, Peter Xu wrote:
> >This patch pre-heat vhost iotlb cache when passthrough mode enabled.
> >
> >Sometimes, even if user specified iommu_platform for vhost devices,
> >IOMMU might still be disabled. One case is passthrough mode in VT-d
> >implementation. We can detect this by observing iommu_list. If it's
> >empty, it means IOMMU translation is disabled, then we can actually
> >pre-heat the translation (it'll be static mapping then) by first
> >invalidating all IOTLB, then cache existing memory ranges into vhost
> >backend iotlb using 1:1 mapping.
> >
> >Signed-off-by: Peter Xu <peterx@redhat.com>
> >---
> > hw/virtio/trace-events | 4 ++++
> > hw/virtio/vhost.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
> > 2 files changed, 53 insertions(+)
> >
> >diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> >index 1f7a7c1..54dcbb3 100644
> >--- a/hw/virtio/trace-events
> >+++ b/hw/virtio/trace-events
> >@@ -24,3 +24,7 @@ virtio_balloon_handle_output(const char *name, uint64_t gpa) "section name: %s g
> > virtio_balloon_get_config(uint32_t num_pages, uint32_t actual) "num_pages: %d actual: %d"
> > virtio_balloon_set_config(uint32_t actual, uint32_t oldactual) "actual: %d oldactual: %d"
> > virtio_balloon_to_target(uint64_t target, uint32_t num_pages) "balloon target: %"PRIx64" num_pages: %d"
> >+
> >+# hw/virtio/vhost.c
> >+vhost_iommu_commit(void) ""
> >+vhost_iommu_static_preheat(void) ""
> >diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> >index 0001e60..1c92e62 100644
> >--- a/hw/virtio/vhost.c
> >+++ b/hw/virtio/vhost.c
> >@@ -27,6 +27,7 @@
> > #include "hw/virtio/virtio-access.h"
> > #include "migration/migration.h"
> > #include "sysemu/dma.h"
> >+#include "trace.h"
> > /* enabled until disconnected backend stabilizes */
> > #define _VHOST_DEBUG 1
> >@@ -730,6 +731,11 @@ static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> > }
> > }
> >+static bool vhost_iommu_mr_enabled(struct vhost_dev *dev)
> >+{
> >+ return !QLIST_EMPTY(&dev->iommu_list);
> >+}
> >+
> > static void vhost_iommu_region_add(MemoryListener *listener,
> > MemoryRegionSection *section)
> > {
> >@@ -782,6 +788,48 @@ static void vhost_iommu_region_del(MemoryListener *listener,
> > }
> > }
> >+static void vhost_iommu_commit(MemoryListener *listener)
> >+{
> >+ struct vhost_dev *dev = container_of(listener, struct vhost_dev,
> >+ iommu_listener);
> >+ struct vhost_memory_region *r;
> >+ int i;
> >+
> >+ trace_vhost_iommu_commit();
> >+
> >+ if (!vhost_iommu_mr_enabled(dev)) {
> >+ /*
> >+ * This means iommu_platform is enabled, however iommu memory
> >+ * region is disabled, e.g., when device passthrough is setup.
> >+ * Then, no translation is needed any more.
> >+ *
> >+ * Let's first invalidate the whole IOTLB, then pre-heat the
> >+ * static mapping by looping over vhost memory ranges.
> >+ */
> >+
> >+ if (dev->vhost_ops->vhost_invalidate_device_iotlb(dev, 0,
> >+ UINT64_MAX-1)) {
> >+ error_report("%s: flush existing IOTLB failed", __func__);
> >+ return;
> >+ }
> >+
> >+ for (i = 0; i < dev->mem->nregions; i++) {
> >+ r = &dev->mem->regions[i];
> >+ /* Vhost regions are writable RAM, so IOMMU_RW suites. */
> >+ if (dev->vhost_ops->vhost_update_device_iotlb(dev,
> >+ r->guest_phys_addr,
> >+ r->userspace_addr,
> >+ r->memory_size,
> >+ IOMMU_RW)) {
> >+ error_report("%s: pre-heat static mapping failed", __func__);
> >+ return;
> >+ }
> >+ }
> >+
> >+ trace_vhost_iommu_static_preheat();
> >+ }
> >+}
>
> Looks like vfio does the map in region_add(), if we can have different types
> of memory regions (e.g some were under an IOMMU but others were not), do we
> need to switch to do this in vhost_iommu_region_add() ?
Currently this is only a pre-heat of cache only if IOMMU is totally
disabled (!vhost_iommu_mr_enabled(dev) means no IOMMU memory regions).
This patch won't be activated without this condition, so for the cases
(non-x86 platforms) where there are some IOMMU regions, it'll be just
automatically disabled. And, I don't really quite sure whether we
should cache non-IOMMU regions when there are some IOMMU regions... So
imho we can keep this until one day we really want to support some
non-x86 platforms for vhost-dmar, then we can work on top. Thanks,
--
Peter Xu
On 2017年05月11日 16:59, Peter Xu wrote:
> On Thu, May 11, 2017 at 04:35:21PM +0800, Jason Wang wrote:
>>
>> On 2017年05月10日 16:01, Peter Xu wrote:
>>> This patch pre-heat vhost iotlb cache when passthrough mode enabled.
>>>
>>> Sometimes, even if user specified iommu_platform for vhost devices,
>>> IOMMU might still be disabled. One case is passthrough mode in VT-d
>>> implementation. We can detect this by observing iommu_list. If it's
>>> empty, it means IOMMU translation is disabled, then we can actually
>>> pre-heat the translation (it'll be static mapping then) by first
>>> invalidating all IOTLB, then cache existing memory ranges into vhost
>>> backend iotlb using 1:1 mapping.
>>>
>>> Signed-off-by: Peter Xu <peterx@redhat.com>
>>> ---
>>> hw/virtio/trace-events | 4 ++++
>>> hw/virtio/vhost.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
>>> 2 files changed, 53 insertions(+)
>>>
>>> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
>>> index 1f7a7c1..54dcbb3 100644
>>> --- a/hw/virtio/trace-events
>>> +++ b/hw/virtio/trace-events
>>> @@ -24,3 +24,7 @@ virtio_balloon_handle_output(const char *name, uint64_t gpa) "section name: %s g
>>> virtio_balloon_get_config(uint32_t num_pages, uint32_t actual) "num_pages: %d actual: %d"
>>> virtio_balloon_set_config(uint32_t actual, uint32_t oldactual) "actual: %d oldactual: %d"
>>> virtio_balloon_to_target(uint64_t target, uint32_t num_pages) "balloon target: %"PRIx64" num_pages: %d"
>>> +
>>> +# hw/virtio/vhost.c
>>> +vhost_iommu_commit(void) ""
>>> +vhost_iommu_static_preheat(void) ""
>>> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
>>> index 0001e60..1c92e62 100644
>>> --- a/hw/virtio/vhost.c
>>> +++ b/hw/virtio/vhost.c
>>> @@ -27,6 +27,7 @@
>>> #include "hw/virtio/virtio-access.h"
>>> #include "migration/migration.h"
>>> #include "sysemu/dma.h"
>>> +#include "trace.h"
>>> /* enabled until disconnected backend stabilizes */
>>> #define _VHOST_DEBUG 1
>>> @@ -730,6 +731,11 @@ static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>>> }
>>> }
>>> +static bool vhost_iommu_mr_enabled(struct vhost_dev *dev)
>>> +{
>>> + return !QLIST_EMPTY(&dev->iommu_list);
>>> +}
>>> +
>>> static void vhost_iommu_region_add(MemoryListener *listener,
>>> MemoryRegionSection *section)
>>> {
>>> @@ -782,6 +788,48 @@ static void vhost_iommu_region_del(MemoryListener *listener,
>>> }
>>> }
>>> +static void vhost_iommu_commit(MemoryListener *listener)
>>> +{
>>> + struct vhost_dev *dev = container_of(listener, struct vhost_dev,
>>> + iommu_listener);
>>> + struct vhost_memory_region *r;
>>> + int i;
>>> +
>>> + trace_vhost_iommu_commit();
>>> +
>>> + if (!vhost_iommu_mr_enabled(dev)) {
>>> + /*
>>> + * This means iommu_platform is enabled, however iommu memory
>>> + * region is disabled, e.g., when device passthrough is setup.
>>> + * Then, no translation is needed any more.
>>> + *
>>> + * Let's first invalidate the whole IOTLB, then pre-heat the
>>> + * static mapping by looping over vhost memory ranges.
>>> + */
>>> +
>>> + if (dev->vhost_ops->vhost_invalidate_device_iotlb(dev, 0,
>>> + UINT64_MAX-1)) {
>>> + error_report("%s: flush existing IOTLB failed", __func__);
>>> + return;
>>> + }
>>> +
>>> + for (i = 0; i < dev->mem->nregions; i++) {
>>> + r = &dev->mem->regions[i];
>>> + /* Vhost regions are writable RAM, so IOMMU_RW suites. */
>>> + if (dev->vhost_ops->vhost_update_device_iotlb(dev,
>>> + r->guest_phys_addr,
>>> + r->userspace_addr,
>>> + r->memory_size,
>>> + IOMMU_RW)) {
>>> + error_report("%s: pre-heat static mapping failed", __func__);
>>> + return;
>>> + }
>>> + }
>>> +
>>> + trace_vhost_iommu_static_preheat();
>>> + }
>>> +}
>> Looks like vfio does the map in region_add(), if we can have different types
>> of memory regions (e.g some were under an IOMMU but others were not), do we
>> need to switch to do this in vhost_iommu_region_add() ?
> Currently this is only a pre-heat of cache only if IOMMU is totally
> disabled (!vhost_iommu_mr_enabled(dev) means no IOMMU memory regions).
> This patch won't be activated without this condition, so for the cases
> (non-x86 platforms) where there are some IOMMU regions, it'll be just
> automatically disabled. And, I don't really quite sure whether we
> should cache non-IOMMU regions when there are some IOMMU regions... So
> imho we can keep this until one day we really want to support some
> non-x86 platforms for vhost-dmar, then we can work on top. Thanks,
>
Right, so let's keep this as is and do optimization on top.
Thanks
© 2016 - 2026 Red Hat, Inc.