From nobody Thu May 2 02:23:38 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1517284669761851.5524216081463; Mon, 29 Jan 2018 19:57:49 -0800 (PST) Received: from localhost ([::1]:35196 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1egN3M-0008Ed-VK for importer@patchew.org; Mon, 29 Jan 2018 22:57:49 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:53379) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1egN1l-0007Jh-9t for qemu-devel@nongnu.org; Mon, 29 Jan 2018 22:56:10 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1egN1h-0002Bo-Cp for qemu-devel@nongnu.org; Mon, 29 Jan 2018 22:56:09 -0500 Received: from ozlabs.ru ([107.173.13.209]:56762) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1egN1h-0002BQ-45; Mon, 29 Jan 2018 22:56:05 -0500 Received: from vpl1.ozlabs.ibm.com (localhost [IPv6:::1]) by ozlabs.ru (Postfix) with ESMTP id C9EA23A60024; Mon, 29 Jan 2018 22:55:27 -0500 (EST) From: Alexey Kardashevskiy To: qemu-devel@nongnu.org Date: Tue, 30 Jan 2018 14:55:26 +1100 Message-Id: <20180130035527.47336-2-aik@ozlabs.ru> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20180130035527.47336-1-aik@ozlabs.ru> References: <20180130035527.47336-1-aik@ozlabs.ru> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x [fuzzy] X-Received-From: 107.173.13.209 Subject: [Qemu-devel] [PATCH qemu v5 1/2] vfio/common: Add 'p2p' property to enable DMA mapping of MMIO regions X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Alexey Kardashevskiy , Auger Eric , Alex Williamson , qemu-ppc@nongnu.org, David Gibson Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" At the moment we map all RAM memory regions for possible DMA including MMIO regions of passed through device as this might be used for P2P PCI. However if DMA map fails for whatever reason, we fail and exit QEMU. Since P2P is not widely used and tested, it makes sense to exclude MMIO regions from DMA mapping by default and add a flag to enable these when needed. This adds a "p2p" option for "vfio-pci" device and vfio_listener_skipped_section() checks for it so region_add/del() skip these by default. The MMIO region needs initialized mr::owner which is set anyway. This avoids DMA map when start and/or size of the area is not aligned to the minimal IOMMU page size as it is known to fail; the diagnostic message is printed in this case. This avoids exiting QEMU if QEMU tried DMA map and failed for some other reason that misalignment; this should allow to experiment when needed. This adds necessary checks to the vfio_listener_region_del() hook. Signed-off-by: Alexey Kardashevskiy --- include/hw/vfio/vfio-common.h | 1 + hw/vfio/common.c | 59 ++++++++++++++++++++++++++++++++++++++-= ---- hw/vfio/pci.c | 1 + 3 files changed, 54 insertions(+), 7 deletions(-) diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index f3a2ac9..8c7ba75 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -121,6 +121,7 @@ typedef struct VFIODevice { bool reset_works; bool needs_reset; bool no_mmap; + bool p2p; VFIODeviceOps *ops; unsigned int num_irqs; unsigned int num_regions; diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 3d652c8..8aaed8d 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -305,7 +305,13 @@ static bool vfio_listener_skipped_section(MemoryRegion= Section *section) * are never accessed by the CPU and beyond the address width of * some IOMMU hardware. TODO: VFIO should tell us the IOMMU wi= dth. */ - section->offset_within_address_space & (1ULL << 63); + section->offset_within_address_space & (1ULL << 63) || + /* + * Allow mapping of MMIO only if the device has p2p=3Dtrue. + */ + (memory_region_is_ram_device(section->mr) && + (!section->mr->owner || + !object_property_get_bool(section->mr->owner, "p2p", NULL))); } =20 /* Called with rcu_read_lock held. */ @@ -508,6 +514,18 @@ static void vfio_listener_region_add(MemoryListener *l= istener, } =20 /* Here we assume that memory_region_is_ram(section->mr)=3D=3Dtrue */ + if (memory_region_is_ram_device(section->mr)) { + hwaddr pgmask =3D (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; + + if ((section->offset_within_region & pgmask) || + (int128_getlo(section->size) & pgmask)) { + error_report("Region %"HWADDR_PRIx"..%"HWADDR_PRIx" is not ali= gned to %"HWADDR_PRIx" and cannot be mapped for DMA", + section->offset_within_region, + int128_getlo(section->size), + pgmask + 1); + return; + } + } =20 vaddr =3D memory_region_get_ram_ptr(section->mr) + section->offset_within_region + @@ -523,6 +541,9 @@ static void vfio_listener_region_add(MemoryListener *li= stener, error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " "0x%"HWADDR_PRIx", %p) =3D %d (%m)", container, iova, int128_get64(llsize), vaddr, ret); + if (memory_region_is_ram_device(section->mr)) { + return; + } goto fail; } =20 @@ -550,6 +571,7 @@ static void vfio_listener_region_del(MemoryListener *li= stener, hwaddr iova, end; Int128 llend, llsize; int ret; + bool try_unmap =3D true; =20 if (vfio_listener_skipped_section(section)) { trace_vfio_listener_region_del_skip( @@ -602,13 +624,36 @@ static void vfio_listener_region_del(MemoryListener *= listener, =20 trace_vfio_listener_region_del(iova, end); =20 - ret =3D vfio_dma_unmap(container, iova, int128_get64(llsize)); + if (memory_region_is_ram_device(section->mr)) { + hwaddr pgmask; + VFIOHostDMAWindow *hostwin; + bool hostwin_found; + + hostwin_found =3D false; + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (hostwin->min_iova <=3D iova && end <=3D hostwin->max_iova)= { + hostwin_found =3D true; + break; + } + } + assert(hostwin_found); + + pgmask =3D (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; + + try_unmap =3D !(section->offset_within_region & pgmask) && + !(int128_getlo(section->size) & pgmask); + } + + if (try_unmap) { + ret =3D vfio_dma_unmap(container, iova, int128_get64(llsize)); + if (ret) { + error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") =3D %d (%m)", + container, iova, int128_get64(llsize), ret); + } + } + memory_region_unref(section->mr); - if (ret) { - error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") =3D %d (%m)", - container, iova, int128_get64(llsize), ret); - } =20 if (container->iommu_type =3D=3D VFIO_SPAPR_TCE_v2_IOMMU) { vfio_spapr_remove_window(container, diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 2c71295..5b20620 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2999,6 +2999,7 @@ static Property vfio_pci_dev_properties[] =3D { DEFINE_PROP_UNSIGNED_NODEFAULT("x-nv-gpudirect-clique", VFIOPCIDevice, nv_gpudirect_clique, qdev_prop_nv_gpudirect_clique, uint8_t), + DEFINE_PROP_BOOL("p2p", VFIOPCIDevice, vbasedev.p2p, false), /* * TODO - support passed fds... is this necessary? * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), --=20 2.11.0 From nobody Thu May 2 02:23:38 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1517284637792715.0861594964863; Mon, 29 Jan 2018 19:57:17 -0800 (PST) Received: from localhost ([::1]:35187 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1egN2m-0007mO-Nc for importer@patchew.org; Mon, 29 Jan 2018 22:57:12 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:53255) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1egN1G-0006yD-NY for qemu-devel@nongnu.org; Mon, 29 Jan 2018 22:55:40 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1egN1F-0001jt-8T for qemu-devel@nongnu.org; Mon, 29 Jan 2018 22:55:38 -0500 Received: from ozlabs.ru ([107.173.13.209]:56666) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1egN1E-0001jL-V2; Mon, 29 Jan 2018 22:55:37 -0500 Received: from vpl1.ozlabs.ibm.com (localhost [IPv6:::1]) by ozlabs.ru (Postfix) with ESMTP id A91CF3A60043; Mon, 29 Jan 2018 22:55:29 -0500 (EST) From: Alexey Kardashevskiy To: qemu-devel@nongnu.org Date: Tue, 30 Jan 2018 14:55:27 +1100 Message-Id: <20180130035527.47336-3-aik@ozlabs.ru> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20180130035527.47336-1-aik@ozlabs.ru> References: <20180130035527.47336-1-aik@ozlabs.ru> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x [fuzzy] X-Received-From: 107.173.13.209 Subject: [Qemu-devel] [PATCH qemu v5 2/2] RFC: vfio-pci: Allow mmap of MSIX BAR X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Alexey Kardashevskiy , Auger Eric , Alex Williamson , qemu-ppc@nongnu.org, David Gibson Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" This makes use of a new VFIO_REGION_INFO_CAP_MSIX_MAPPABLE capability which tells that a region with MSIX data can be mapped entirely, i.e. the VFIO PCI driver won't prevent MSIX vectors area from being mapped. With this change, all BARs are mapped in a single chunk and MSIX vectors are emulated on top unless the machine requests not to by defining and enabling a new "vfio-no-msix-emulation" property. At the moment only sPAPR machine does so - it prohibits MSIX emulation and does not allow enabling it as it does not define the "set" callback for the new property; the new property also does not appear in "-machine pseries,help". This requires the kernel change - "vfio-pci: Allow mapping MSIX BAR" - for the new capability: https://www.spinics.net/lists/kvm/msg160282.html Signed-off-by: Alexey Kardashevskiy --- Changes: v5: * rebased on top of 'p2p' proposed patch v4: * silenced dma map errors if unaligned mapping is attempted - they are going to fail anyway v3: * vfio_listener_region_add() won't make qemu exit if failed on MMIO MR --- include/hw/vfio/vfio-common.h | 1 + linux-headers/linux/vfio.h | 5 +++++ hw/ppc/spapr.c | 7 +++++++ hw/vfio/common.c | 15 +++++++++++++++ hw/vfio/pci.c | 10 ++++++++++ 5 files changed, 38 insertions(+) diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 8c7ba75..3ef9f8e 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -172,6 +172,7 @@ int vfio_get_region_info(VFIODevice *vbasedev, int inde= x, struct vfio_region_info **info); int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type, uint32_t subtype, struct vfio_region_info **i= nfo); +bool vfio_is_cap_present(VFIODevice *vbasedev, uint16_t cap_type, int regi= on); #endif extern const MemoryListener vfio_prereg_listener; =20 diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index 4312e96..b45182e 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -301,6 +301,11 @@ struct vfio_region_info_cap_type { #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2) #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3) =20 +/* + * The MSIX mappable capability informs that MSIX data of a BAR can be mma= pped. + */ +#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE 3 + /** * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9, * struct vfio_irq_info) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 32a876b..6d333e2 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2830,6 +2830,11 @@ static void spapr_set_modern_hotplug_events(Object *= obj, bool value, spapr->use_hotplug_event_source =3D value; } =20 +static bool spapr_get_msix_emulation(Object *obj, Error **errp) +{ + return true; +} + static char *spapr_get_resize_hpt(Object *obj, Error **errp) { sPAPRMachineState *spapr =3D SPAPR_MACHINE(obj); @@ -2911,6 +2916,8 @@ static void spapr_instance_init(Object *obj) object_property_set_description(obj, "vsmt", "Virtual SMT: KVM behaves as if this w= ere" " the host's SMT mode", &error_abort); + object_property_add_bool(obj, "vfio-no-msix-emulation", + spapr_get_msix_emulation, NULL, NULL); } =20 static void spapr_machine_finalizefn(Object *obj) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 8aaed8d..18b98e8 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1431,6 +1431,21 @@ int vfio_get_dev_region_info(VFIODevice *vbasedev, u= int32_t type, return -ENODEV; } =20 +bool vfio_is_cap_present(VFIODevice *vbasedev, uint16_t cap_type, int regi= on) +{ + struct vfio_region_info *info =3D NULL; + bool ret =3D false; + + if (!vfio_get_region_info(vbasedev, region, &info)) { + if (vfio_get_region_info_cap(info, cap_type)) { + ret =3D true; + } + g_free(info); + } + + return ret; +} + /* * Interfaces for IBM EEH (Enhanced Error Handling) */ diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 5b20620..87a186f 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1289,6 +1289,11 @@ static void vfio_pci_fixup_msix_region(VFIOPCIDevice= *vdev) off_t start, end; VFIORegion *region =3D &vdev->bars[vdev->msix->table_bar].region; =20 + if (vfio_is_cap_present(&vdev->vbasedev, VFIO_REGION_INFO_CAP_MSIX_MAP= PABLE, + vdev->msix->table_bar)) { + return; + } + /* * We expect to find a single mmap covering the whole BAR, anything el= se * means it's either unsupported or already setup. @@ -1473,6 +1478,11 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int = pos, Error **errp) */ memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, false); =20 + if (object_property_get_bool(OBJECT(qdev_get_machine()), + "vfio-no-msix-emulation", NULL)) { + memory_region_set_enabled(&vdev->pdev.msix_table_mmio, false); + } + return 0; } =20 --=20 2.11.0