hw/xen/xen_pt.c | 53 ++++++++++++++++++++++++++++++++++++++++++++ include/hw/pci/pci.h | 4 ++++ 2 files changed, 57 insertions(+)
In PVH dom0, when passthrough a device to domU, QEMU code
xen_pt_realize->xc_physdev_map_pirq wants to use gsi, but in current codes
the gsi number is got from file /sys/bus/pci/devices/<sbdf>/irq, that is
wrong, because irq is not equal with gsi, they are in different spaces, so
pirq mapping fails.
To solve above problem, use new interface of Xen, xc_pcidev_get_gsi to get
gsi and use xc_physdev_map_pirq_gsi to map pirq when dom0 is PVH.
Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
---
Hi All,
This is v9 to support passthrough on Xen when dom0 is PVH.
v8->v9 changes:
* Moved the definition of PCI_SBDF from /hw/xen/xen_pt.c to /include/hw/pci/pci.h.
* Renamed xen_run_qemu_on_hvm to xen_pt_need_gsi.
* Renamed xen_map_pirq_for_gsi to xen_pt_map_pirq_for_gsi.
* Through reading /sys/hypervisor/guest_type to get dom type instead of using xc_domain_getinfo_single.
Best regards,
Jiqian Chen
v7->v8 changes:
* Since xc_physdev_gsi_from_dev was renamed to xc_pcidev_get_gsi, changed it.
* Added xen_run_qemu_on_hvm to check if Qemu run on PV dom0, if not use xc_physdev_map_pirq_gsi to map pirq.
* Used CONFIG_XEN_CTRL_INTERFACE_VERSION to wrap the new part for compatibility.
* Added "#define DOMID_RUN_QEMU 0" to represent the id of domain that Qemu run on.
v6->v7 changes:
* Because the function of obtaining gsi was changed on the kernel and Xen side. Changed to use
xc_physdev_gsi_from_dev, that requires passing in sbdf instead of irq.
v5->v6 changes:
* Because the function of obtaining gsi was changed on the kernel and Xen side. Changed to use
xc_physdev_gsi_from_irq, instead of gsi sysfs.
* Since function changed, removed the Review-by of Stefano.
v4->v5 changes:
* Added Review-by Stefano.
v3->v4 changes:
* Added gsi into struct XenHostPCIDevice and used gsi number that read from gsi sysfs
if it exists, if there is no gsi sysfs, still use irq.
v2->v3 changes:
* Due to changes in the implementation of the second patch on kernel side(that adds
a new sysfs for gsi instead of a new syscall), so read gsi number from the sysfs of gsi.
v1 and v2:
We can record the relation between gsi and irq, then when userspace(qemu) want
to use gsi, we can do a translation. The third patch of kernel(xen/privcmd: Add new syscall
to get gsi from irq) records all the relations in acpi_register_gsi_xen_pvh() when dom0
initialize pci devices, and provide a syscall for userspace to get the gsi from irq. The
third patch of xen(tools: Add new function to get gsi from irq) add a new function
xc_physdev_gsi_from_irq() to call the new syscall added on kernel side.
And then userspace can use that function to get gsi. Then xc_physdev_map_pirq() will success.
Issues we encountered:
1. failed to map pirq for gsi
Problem: qemu will call xc_physdev_map_pirq() to map a passthrough device's gsi to pirq in
function xen_pt_realize(). But failed.
Reason: According to the implement of xc_physdev_map_pirq(), it needs gsi instead of irq,
but qemu pass irq to it and treat irq as gsi, it is got from file
/sys/bus/pci/devices/xxxx:xx:xx.x/irq in function xen_host_pci_device_get(). But actually
the gsi number is not equal with irq. They are in different space.
---
hw/xen/xen_pt.c | 53 ++++++++++++++++++++++++++++++++++++++++++++
include/hw/pci/pci.h | 4 ++++
2 files changed, 57 insertions(+)
diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index 3635d1b39f79..5b10d501d566 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -766,6 +766,50 @@ static void xen_pt_destroy(PCIDevice *d) {
}
/* init */
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 42000
+static bool xen_pt_need_gsi(void)
+{
+ FILE *fp;
+ int len;
+ char type[10];
+ const char *guest_type = "/sys/hypervisor/guest_type";
+
+ fp = fopen(guest_type, "r");
+ if (fp == NULL) {
+ error_report("Cannot open %s: %s", guest_type, strerror(errno));
+ return false;
+ }
+ fgets(type, sizeof(type), fp);
+ fclose(fp);
+
+ len = strlen(type);
+ if (len) {
+ type[len - 1] = '\0';
+ if (!strcmp(type, "PVH")) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static int xen_pt_map_pirq_for_gsi(PCIDevice *d, int *pirq)
+{
+ int gsi;
+ XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
+
+ gsi = xc_pcidev_get_gsi(xen_xc,
+ PCI_SBDF(s->real_device.domain,
+ s->real_device.bus,
+ s->real_device.dev,
+ s->real_device.func));
+ if (gsi >= 0) {
+ return xc_physdev_map_pirq_gsi(xen_xc, xen_domid, gsi, pirq);
+ }
+
+ return gsi;
+}
+#endif
+
static void xen_pt_realize(PCIDevice *d, Error **errp)
{
ERRP_GUARD();
@@ -847,7 +891,16 @@ static void xen_pt_realize(PCIDevice *d, Error **errp)
goto out;
}
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 42000
+ if (xen_pt_need_gsi()) {
+ rc = xen_pt_map_pirq_for_gsi(d, &pirq);
+ } else {
+ rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq);
+ }
+#else
rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq);
+#endif
+
if (rc < 0) {
XEN_PT_ERR(d, "Mapping machine irq %u to pirq %i failed, (err: %d)\n",
machine_irq, pirq, errno);
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index eb26cac81098..07805aa8a5f3 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -23,6 +23,10 @@ extern bool pci_available;
#define PCI_SLOT_MAX 32
#define PCI_FUNC_MAX 8
+#define PCI_SBDF(seg, bus, dev, func) \
+ ((((uint32_t)(seg)) << 16) | \
+ (PCI_BUILD_BDF(bus, PCI_DEVFN(dev, func))))
+
/* Class, Vendor and Device IDs from Linux's pci_ids.h */
#include "hw/pci/pci_ids.h"
--
2.34.1
On 10/24/24 05:06, Jiqian Chen wrote:
> diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
> index 3635d1b39f79..5b10d501d566 100644
> --- a/hw/xen/xen_pt.c
> +++ b/hw/xen/xen_pt.c
> @@ -766,6 +766,50 @@ static void xen_pt_destroy(PCIDevice *d) {
> }
> /* init */
>
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 42000
> +static bool xen_pt_need_gsi(void)
> +{
> + FILE *fp;
> + int len;
> + char type[10];
A brief in-code comment to explain how you arrived at 10 would be
appreciated.
> + const char *guest_type = "/sys/hypervisor/guest_type";
> +
> + fp = fopen(guest_type, "r");
> + if (fp == NULL) {
> + error_report("Cannot open %s: %s", guest_type, strerror(errno));
> + return false;
> + }
> + fgets(type, sizeof(type), fp);
Please check the return value of fgets.
> + fclose(fp);
> +
> + len = strlen(type);
Before passing to strlen, is "type" always guaranteed to have a
terminating '\0' character?
> + if (len) {
> + type[len - 1] = '\0';
> + if (!strcmp(type, "PVH")) {
> + return true;
> + }
> + }
> + return false;
> +}
> +
> +static int xen_pt_map_pirq_for_gsi(PCIDevice *d, int *pirq)
> +{
> + int gsi;
> + XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
> +
> + gsi = xc_pcidev_get_gsi(xen_xc,
> + PCI_SBDF(s->real_device.domain,
> + s->real_device.bus,
> + s->real_device.dev,
> + s->real_device.func));
> + if (gsi >= 0) {
> + return xc_physdev_map_pirq_gsi(xen_xc, xen_domid, gsi, pirq);
> + }
> +
> + return gsi;
> +}
> +#endif
> +
> static void xen_pt_realize(PCIDevice *d, Error **errp)
> {
> ERRP_GUARD();
> @@ -847,7 +891,16 @@ static void xen_pt_realize(PCIDevice *d, Error **errp)
> goto out;
> }
>
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 42000
> + if (xen_pt_need_gsi()) {
> + rc = xen_pt_map_pirq_for_gsi(d, &pirq);
> + } else {
> + rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq);
> + }
> +#else
> rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq);
> +#endif
> +
> if (rc < 0) {
> XEN_PT_ERR(d, "Mapping machine irq %u to pirq %i failed, (err: %d)\n",
> machine_irq, pirq, errno);
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index eb26cac81098..07805aa8a5f3 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -23,6 +23,10 @@ extern bool pci_available;
> #define PCI_SLOT_MAX 32
> #define PCI_FUNC_MAX 8
>
> +#define PCI_SBDF(seg, bus, dev, func) \
> + ((((uint32_t)(seg)) << 16) | \
> + (PCI_BUILD_BDF(bus, PCI_DEVFN(dev, func))))
> +
> /* Class, Vendor and Device IDs from Linux's pci_ids.h */
> #include "hw/pci/pci_ids.h"
>
On 2024/11/1 21:09, Stewart Hildebrand wrote:
> On 10/24/24 05:06, Jiqian Chen wrote:
>> diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
>> index 3635d1b39f79..5b10d501d566 100644
>> --- a/hw/xen/xen_pt.c
>> +++ b/hw/xen/xen_pt.c
>> @@ -766,6 +766,50 @@ static void xen_pt_destroy(PCIDevice *d) {
>> }
>> /* init */
>>
>> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 42000
>> +static bool xen_pt_need_gsi(void)
>> +{
>> + FILE *fp;
>> + int len;
>> + char type[10];
>
> A brief in-code comment to explain how you arrived at 10 would be
> appreciated.
The max number of characters in the description of the "guest_type" is 4 ("PVH" plus line break).
I set it to 10 to prevent longer description types in the future.
Do you have another suggest number?
>
>> + const char *guest_type = "/sys/hypervisor/guest_type";
>> +
>> + fp = fopen(guest_type, "r");
>> + if (fp == NULL) {
>> + error_report("Cannot open %s: %s", guest_type, strerror(errno));
>> + return false;
>> + }
>> + fgets(type, sizeof(type), fp);
>
> Please check the return value of fgets.
Will change in next version.
>
>> + fclose(fp);
>> +
>> + len = strlen(type);
>
> Before passing to strlen, is "type" always guaranteed to have a
> terminating '\0' character?
Yes, "fgets" will guarantee that, and I will add check for "fgets" when it returns NULL in next version.
>
>> + if (len) {
>> + type[len - 1] = '\0';
>> + if (!strcmp(type, "PVH")) {
>> + return true;
>> + }
>> + }
>> + return false;
>> +}
>> +
>> +static int xen_pt_map_pirq_for_gsi(PCIDevice *d, int *pirq)
>> +{
>> + int gsi;
>> + XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
>> +
>> + gsi = xc_pcidev_get_gsi(xen_xc,
>> + PCI_SBDF(s->real_device.domain,
>> + s->real_device.bus,
>> + s->real_device.dev,
>> + s->real_device.func));
>> + if (gsi >= 0) {
>> + return xc_physdev_map_pirq_gsi(xen_xc, xen_domid, gsi, pirq);
>> + }
>> +
>> + return gsi;
>> +}
>> +#endif
>> +
>> static void xen_pt_realize(PCIDevice *d, Error **errp)
>> {
>> ERRP_GUARD();
>> @@ -847,7 +891,16 @@ static void xen_pt_realize(PCIDevice *d, Error **errp)
>> goto out;
>> }
>>
>> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 42000
>> + if (xen_pt_need_gsi()) {
>> + rc = xen_pt_map_pirq_for_gsi(d, &pirq);
>> + } else {
>> + rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq);
>> + }
>> +#else
>> rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq);
>> +#endif
>> +
>> if (rc < 0) {
>> XEN_PT_ERR(d, "Mapping machine irq %u to pirq %i failed, (err: %d)\n",
>> machine_irq, pirq, errno);
>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>> index eb26cac81098..07805aa8a5f3 100644
>> --- a/include/hw/pci/pci.h
>> +++ b/include/hw/pci/pci.h
>> @@ -23,6 +23,10 @@ extern bool pci_available;
>> #define PCI_SLOT_MAX 32
>> #define PCI_FUNC_MAX 8
>>
>> +#define PCI_SBDF(seg, bus, dev, func) \
>> + ((((uint32_t)(seg)) << 16) | \
>> + (PCI_BUILD_BDF(bus, PCI_DEVFN(dev, func))))
>> +
>> /* Class, Vendor and Device IDs from Linux's pci_ids.h */
>> #include "hw/pci/pci_ids.h"
>>
>
--
Best regards,
Jiqian Chen.
On 11/4/24 01:03, Chen, Jiqian wrote:
> On 2024/11/1 21:09, Stewart Hildebrand wrote:
>> On 10/24/24 05:06, Jiqian Chen wrote:
>>> diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
>>> index 3635d1b39f79..5b10d501d566 100644
>>> --- a/hw/xen/xen_pt.c
>>> +++ b/hw/xen/xen_pt.c
>>> @@ -766,6 +766,50 @@ static void xen_pt_destroy(PCIDevice *d) {
>>> }
>>> /* init */
>>>
>>> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 42000
>>> +static bool xen_pt_need_gsi(void)
>>> +{
>>> + FILE *fp;
>>> + int len;
>>> + char type[10];
>>
>> A brief in-code comment to explain how you arrived at 10 would be
>> appreciated.
> The max number of characters in the description of the "guest_type" is 4 ("PVH" plus line break).
> I set it to 10 to prevent longer description types in the future.
> Do you have another suggest number?
No, I think 10 is a good choice. I'm just looking for the rationale to
be documented.
Hi,
On 2024/10/24 17:06, Jiqian Chen wrote:
> In PVH dom0, when passthrough a device to domU, QEMU code
> xen_pt_realize->xc_physdev_map_pirq wants to use gsi, but in current codes
> the gsi number is got from file /sys/bus/pci/devices/<sbdf>/irq, that is
> wrong, because irq is not equal with gsi, they are in different spaces, so
> pirq mapping fails.
>
> To solve above problem, use new interface of Xen, xc_pcidev_get_gsi to get
> gsi and use xc_physdev_map_pirq_gsi to map pirq when dom0 is PVH.
>
> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
> Signed-off-by: Huang Rui <ray.huang@amd.com>
> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
> ---
> Hi All,
> This is v9 to support passthrough on Xen when dom0 is PVH.
> v8->v9 changes:
> * Moved the definition of PCI_SBDF from /hw/xen/xen_pt.c to /include/hw/pci/pci.h.
> * Renamed xen_run_qemu_on_hvm to xen_pt_need_gsi.
> * Renamed xen_map_pirq_for_gsi to xen_pt_map_pirq_for_gsi.
> * Through reading /sys/hypervisor/guest_type to get dom type instead of using xc_domain_getinfo_single.
>
> Best regards,
> Jiqian Chen
>
> v7->v8 changes:
> * Since xc_physdev_gsi_from_dev was renamed to xc_pcidev_get_gsi, changed it.
> * Added xen_run_qemu_on_hvm to check if Qemu run on PV dom0, if not use xc_physdev_map_pirq_gsi to map pirq.
> * Used CONFIG_XEN_CTRL_INTERFACE_VERSION to wrap the new part for compatibility.
> * Added "#define DOMID_RUN_QEMU 0" to represent the id of domain that Qemu run on.
>
> v6->v7 changes:
> * Because the function of obtaining gsi was changed on the kernel and Xen side. Changed to use
> xc_physdev_gsi_from_dev, that requires passing in sbdf instead of irq.
>
> v5->v6 changes:
> * Because the function of obtaining gsi was changed on the kernel and Xen side. Changed to use
> xc_physdev_gsi_from_irq, instead of gsi sysfs.
> * Since function changed, removed the Review-by of Stefano.
>
> v4->v5 changes:
> * Added Review-by Stefano.
>
> v3->v4 changes:
> * Added gsi into struct XenHostPCIDevice and used gsi number that read from gsi sysfs
> if it exists, if there is no gsi sysfs, still use irq.
>
> v2->v3 changes:
> * Due to changes in the implementation of the second patch on kernel side(that adds
> a new sysfs for gsi instead of a new syscall), so read gsi number from the sysfs of gsi.
>
> v1 and v2:
> We can record the relation between gsi and irq, then when userspace(qemu) want
> to use gsi, we can do a translation. The third patch of kernel(xen/privcmd: Add new syscall
> to get gsi from irq) records all the relations in acpi_register_gsi_xen_pvh() when dom0
> initialize pci devices, and provide a syscall for userspace to get the gsi from irq. The
> third patch of xen(tools: Add new function to get gsi from irq) add a new function
> xc_physdev_gsi_from_irq() to call the new syscall added on kernel side.
> And then userspace can use that function to get gsi. Then xc_physdev_map_pirq() will success.
>
> Issues we encountered:
> 1. failed to map pirq for gsi
> Problem: qemu will call xc_physdev_map_pirq() to map a passthrough device's gsi to pirq in
> function xen_pt_realize(). But failed.
>
> Reason: According to the implement of xc_physdev_map_pirq(), it needs gsi instead of irq,
> but qemu pass irq to it and treat irq as gsi, it is got from file
> /sys/bus/pci/devices/xxxx:xx:xx.x/irq in function xen_host_pci_device_get(). But actually
> the gsi number is not equal with irq. They are in different space.
> ---
> hw/xen/xen_pt.c | 53 ++++++++++++++++++++++++++++++++++++++++++++
> include/hw/pci/pci.h | 4 ++++
> 2 files changed, 57 insertions(+)
>
> diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
> index 3635d1b39f79..5b10d501d566 100644
> --- a/hw/xen/xen_pt.c
> +++ b/hw/xen/xen_pt.c
> @@ -766,6 +766,50 @@ static void xen_pt_destroy(PCIDevice *d) {
> }
> /* init */
>
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 42000
> +static bool xen_pt_need_gsi(void)
> +{
> + FILE *fp;
> + int len;
> + char type[10];
> + const char *guest_type = "/sys/hypervisor/guest_type";
> +
> + fp = fopen(guest_type, "r");
> + if (fp == NULL) {
> + error_report("Cannot open %s: %s", guest_type, strerror(errno));
> + return false;
> + }
> + fgets(type, sizeof(type), fp);
> + fclose(fp);
> +
> + len = strlen(type);
> + if (len) {
> + type[len - 1] = '\0';
> + if (!strcmp(type, "PVH")) {
> + return true;
> + }
> + }
> + return false;
> +}
> +
> +static int xen_pt_map_pirq_for_gsi(PCIDevice *d, int *pirq)
> +{
> + int gsi;
> + XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
> +
> + gsi = xc_pcidev_get_gsi(xen_xc,
> + PCI_SBDF(s->real_device.domain,
> + s->real_device.bus,
> + s->real_device.dev,
> + s->real_device.func));
> + if (gsi >= 0) {
> + return xc_physdev_map_pirq_gsi(xen_xc, xen_domid, gsi, pirq);
> + }
> +
> + return gsi;
> +}
> +#endif
> +
> static void xen_pt_realize(PCIDevice *d, Error **errp)
> {
> ERRP_GUARD();
> @@ -847,7 +891,16 @@ static void xen_pt_realize(PCIDevice *d, Error **errp)
> goto out;
> }
>
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 42000
> + if (xen_pt_need_gsi()) {
> + rc = xen_pt_map_pirq_for_gsi(d, &pirq);
> + } else {
> + rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq);
> + }
> +#else
> rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq);
> +#endif
> +
> if (rc < 0) {
> XEN_PT_ERR(d, "Mapping machine irq %u to pirq %i failed, (err: %d)\n",
> machine_irq, pirq, errno);
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index eb26cac81098..07805aa8a5f3 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -23,6 +23,10 @@ extern bool pci_available;
> #define PCI_SLOT_MAX 32
> #define PCI_FUNC_MAX 8
>
> +#define PCI_SBDF(seg, bus, dev, func) \
> + ((((uint32_t)(seg)) << 16) | \
> + (PCI_BUILD_BDF(bus, PCI_DEVFN(dev, func))))
> +
> /* Class, Vendor and Device IDs from Linux's pci_ids.h */
> #include "hw/pci/pci_ids.h"
>
Do you have any comments?
--
Best regards,
Jiqian Chen.
© 2016 - 2026 Red Hat, Inc.