drivers/iommu/amd/amd_iommu_types.h | 3 +++ drivers/iommu/amd/iommu.c | 40 +++++++++++++++++++++++++++++ include/uapi/linux/iommufd.h | 19 ++++++++++++++ 3 files changed, 62 insertions(+)
AMD IOMMU Extended Feature (EFR) and Extended Feature 2 (EFR2) registers
specify features supported by each IOMMU hardware instance.
The IOMMU driver checks each feature-specific bits before enabling
each feature at run time.
For IOMMUFD, the hypervisor determines which IOMMU features to support
in the guest, and communicates this information to user-space (e.g. QEMU)
via iommufd IOMMU_DEVICE_GET_HW_INFO ioctl.
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
drivers/iommu/amd/amd_iommu_types.h | 3 +++
drivers/iommu/amd/iommu.c | 40 +++++++++++++++++++++++++++++
include/uapi/linux/iommufd.h | 19 ++++++++++++++
3 files changed, 62 insertions(+)
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 5219d7ddfdaa..efdd0cbda1df 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -95,9 +95,12 @@
#define FEATURE_HE BIT_ULL(8)
#define FEATURE_PC BIT_ULL(9)
#define FEATURE_HATS GENMASK_ULL(11, 10)
+#define FEATURE_GATS_SHIFT 12
#define FEATURE_GATS GENMASK_ULL(13, 12)
+#define FEATURE_GLX_SHIFT 14
#define FEATURE_GLX GENMASK_ULL(15, 14)
#define FEATURE_GAM_VAPIC BIT_ULL(21)
+#define FEATURE_PASMAX_SHIFT 32
#define FEATURE_PASMAX GENMASK_ULL(36, 32)
#define FEATURE_GIOSUP BIT_ULL(48)
#define FEATURE_HASUP BIT_ULL(49)
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index eb348c63a8d0..ebe1cb9b0807 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -3038,8 +3038,48 @@ static const struct iommu_dirty_ops amd_dirty_ops = {
.read_and_clear_dirty = amd_iommu_read_and_clear_dirty,
};
+#define AMD_VIOMMU_EFR_GUEST_TRANSLATION_FLAGS \
+ (FEATURE_GT | FEATURE_GA | FEATURE_GIOSUP | \
+ FEATURE_PPR | FEATURE_EPHSUP)
+
+static void _build_efr_guest_translation(struct amd_iommu *iommu, u64 *efr, u64 *efr2)
+{
+ /*
+ * Build the EFR against the current hardware capabilities
+ *
+ * Also, not all IOMMU features are emulated by KVM.
+ * Therefore, only advertise what KVM can support
+ * or virtualzied by the hardware.
+ */
+ if (!efr)
+ return;
+
+ *efr |= (amd_iommu_efr & AMD_VIOMMU_EFR_GUEST_TRANSLATION_FLAGS);
+ *efr |= (FIELD_GET(FEATURE_GATS, amd_iommu_efr) << FEATURE_GATS_SHIFT);
+ *efr |= (FIELD_GET(FEATURE_GLX, amd_iommu_efr) << FEATURE_GLX_SHIFT);
+ *efr |= (FIELD_GET(FEATURE_PASMAX, amd_iommu_efr) << FEATURE_PASMAX_SHIFT);
+ pr_debug("%s: efr=%#llx\n", __func__, *efr);
+}
+
+static void *amd_iommu_hw_info(struct device *dev, u32 *length, u32 *type)
+{
+ struct iommu_hw_info_amd *hwinfo;
+ struct amd_iommu *iommu = rlookup_amd_iommu(dev);
+
+ hwinfo = kzalloc(sizeof(*hwinfo), GFP_KERNEL);
+ if (!hwinfo)
+ return ERR_PTR(-ENOMEM);
+
+ *length = sizeof(*hwinfo);
+ *type = IOMMU_HW_INFO_TYPE_AMD;
+
+ _build_efr_guest_translation(iommu, &hwinfo->efr, &hwinfo->efr2);
+ return hwinfo;
+}
+
const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
+ .hw_info = amd_iommu_hw_info,
.blocked_domain = &blocked_domain,
.release_domain = &release_domain,
.identity_domain = &identity_domain.domain,
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index c218c89e0e2e..0f7212f9e0ce 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -613,6 +613,24 @@ struct iommu_hw_info_tegra241_cmdqv {
__u8 __reserved;
};
+/**
+ * struct iommu_hw_info_amd - AMD IOMMU device info
+ *
+ * @efr : Value of AMD IOMMU Extended Feature Register (EFR)
+ * @efr2: Value of AMD IOMMU Extended Feature 2 Register (EFR2)
+ *
+ * Please See description of these registers in the following sections of
+ * the AMD I/O Virtualization Technology (IOMMU) Specification.
+ * (https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/specifications/48882_IOMMU.pdf)
+ *
+ * - MMIO Offset 0030h IOMMU Extended Feature Register
+ * - MMIO Offset 01A0h IOMMU Extended Feature 2 Register
+ */
+struct iommu_hw_info_amd {
+ __aligned_u64 efr;
+ __aligned_u64 efr2;
+};
+
/**
* enum iommu_hw_info_type - IOMMU Hardware Info Types
* @IOMMU_HW_INFO_TYPE_NONE: Output by the drivers that do not report hardware
@@ -629,6 +647,7 @@ enum iommu_hw_info_type {
IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2,
IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV = 3,
+ IOMMU_HW_INFO_TYPE_AMD = 4,
};
/**
--
2.34.1
On Wed, Aug 20, 2025 at 04:25:33AM +0000, Suravee Suthikulpanit wrote: > AMD IOMMU Extended Feature (EFR) and Extended Feature 2 (EFR2) registers > specify features supported by each IOMMU hardware instance. > The IOMMU driver checks each feature-specific bits before enabling > each feature at run time. > > For IOMMUFD, the hypervisor determines which IOMMU features to support > in the guest, and communicates this information to user-space (e.g. QEMU) > via iommufd IOMMU_DEVICE_GET_HW_INFO ioctl. > > Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > --- > drivers/iommu/amd/amd_iommu_types.h | 3 +++ > drivers/iommu/amd/iommu.c | 40 +++++++++++++++++++++++++++++ > include/uapi/linux/iommufd.h | 19 ++++++++++++++ > 3 files changed, 62 insertions(+) Can you follow what ARM did and put the iommufd functions in a amd/iommufd.c file? I think that worked pretty good. Jason > diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h > index 5219d7ddfdaa..efdd0cbda1df 100644 > --- a/drivers/iommu/amd/amd_iommu_types.h > +++ b/drivers/iommu/amd/amd_iommu_types.h > @@ -95,9 +95,12 @@ > #define FEATURE_HE BIT_ULL(8) > #define FEATURE_PC BIT_ULL(9) > #define FEATURE_HATS GENMASK_ULL(11, 10) > +#define FEATURE_GATS_SHIFT 12 > #define FEATURE_GATS GENMASK_ULL(13, 12) > +#define FEATURE_GLX_SHIFT 14 > #define FEATURE_GLX GENMASK_ULL(15, 14) > #define FEATURE_GAM_VAPIC BIT_ULL(21) > +#define FEATURE_PASMAX_SHIFT 32 > #define FEATURE_PASMAX GENMASK_ULL(36, 32) Please no.. FIELD_PREP is how to get the shift, don't add constants. > diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c > index eb348c63a8d0..ebe1cb9b0807 100644 > --- a/drivers/iommu/amd/iommu.c > +++ b/drivers/iommu/amd/iommu.c > @@ -3038,8 +3038,48 @@ static const struct iommu_dirty_ops amd_dirty_ops = { > .read_and_clear_dirty = amd_iommu_read_and_clear_dirty, > }; > > +#define AMD_VIOMMU_EFR_GUEST_TRANSLATION_FLAGS \ > + (FEATURE_GT | FEATURE_GA | FEATURE_GIOSUP | \ > + FEATURE_PPR | FEATURE_EPHSUP) > + > +static void _build_efr_guest_translation(struct amd_iommu *iommu, u64 *efr, u64 *efr2) > +{ > + /* > + * Build the EFR against the current hardware capabilities > + * > + * Also, not all IOMMU features are emulated by KVM. > + * Therefore, only advertise what KVM can support > + * or virtualzied by the hardware. > + */ What does KVM have to do with iommu features on AMD architecture?? > + if (!efr) > + return; > + > + *efr |= (amd_iommu_efr & AMD_VIOMMU_EFR_GUEST_TRANSLATION_FLAGS); > + *efr |= (FIELD_GET(FEATURE_GATS, amd_iommu_efr) << FEATURE_GATS_SHIFT); > + *efr |= (FIELD_GET(FEATURE_GLX, amd_iommu_efr) << FEATURE_GLX_SHIFT); > + *efr |= (FIELD_GET(FEATURE_PASMAX, amd_iommu_efr) << FEATURE_PASMAX_SHIFT); > + pr_debug("%s: efr=%#llx\n", __func__, *efr); > +} I'm not sure all this masking is a good idea, how do you intend to handshake upgrades when more features are supported if masking is present? Nothing sets efr2? > +/** > + * struct iommu_hw_info_amd - AMD IOMMU device info > + * > + * @efr : Value of AMD IOMMU Extended Feature Register (EFR) > + * @efr2: Value of AMD IOMMU Extended Feature 2 Register (EFR2) > + * > + * Please See description of these registers in the following sections of > + * the AMD I/O Virtualization Technology (IOMMU) Specification. > + * (https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/specifications/48882_IOMMU.pdf) > + * > + * - MMIO Offset 0030h IOMMU Extended Feature Register > + * - MMIO Offset 01A0h IOMMU Extended Feature 2 Register > + */ Need to document the masking and explain what the forwards/backwards compatible strategy is here. I think you should probably just pass the raw HW value through and require the VMM to figure out what bits it needs based on feature flags elsewhere. Jason
On 8/25/2025 8:44 AM, Jason Gunthorpe wrote: > On Wed, Aug 20, 2025 at 04:25:33AM +0000, Suravee Suthikulpanit wrote: >> AMD IOMMU Extended Feature (EFR) and Extended Feature 2 (EFR2) registers >> specify features supported by each IOMMU hardware instance. >> The IOMMU driver checks each feature-specific bits before enabling >> each feature at run time. >> >> For IOMMUFD, the hypervisor determines which IOMMU features to support >> in the guest, and communicates this information to user-space (e.g. QEMU) >> via iommufd IOMMU_DEVICE_GET_HW_INFO ioctl. >> >> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> >> --- >> drivers/iommu/amd/amd_iommu_types.h | 3 +++ >> drivers/iommu/amd/iommu.c | 40 +++++++++++++++++++++++++++++ >> include/uapi/linux/iommufd.h | 19 ++++++++++++++ >> 3 files changed, 62 insertions(+) > > Can you follow what ARM did and put the iommufd functions in a > amd/iommufd.c file? I think that worked pretty good. Sure, I can put it in the amd/iommufd.c. > >> diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h >> index 5219d7ddfdaa..efdd0cbda1df 100644 >> --- a/drivers/iommu/amd/amd_iommu_types.h >> +++ b/drivers/iommu/amd/amd_iommu_types.h >> @@ -95,9 +95,12 @@ >> #define FEATURE_HE BIT_ULL(8) >> #define FEATURE_PC BIT_ULL(9) >> #define FEATURE_HATS GENMASK_ULL(11, 10) >> +#define FEATURE_GATS_SHIFT 12 >> #define FEATURE_GATS GENMASK_ULL(13, 12) >> +#define FEATURE_GLX_SHIFT 14 >> #define FEATURE_GLX GENMASK_ULL(15, 14) >> #define FEATURE_GAM_VAPIC BIT_ULL(21) >> +#define FEATURE_PASMAX_SHIFT 32 >> #define FEATURE_PASMAX GENMASK_ULL(36, 32) > > Please no.. FIELD_PREP is how to get the shift, don't add constants. Good point. Ok >> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c >> index eb348c63a8d0..ebe1cb9b0807 100644 >> --- a/drivers/iommu/amd/iommu.c >> +++ b/drivers/iommu/amd/iommu.c >> @@ -3038,8 +3038,48 @@ static const struct iommu_dirty_ops amd_dirty_ops = { >> .read_and_clear_dirty = amd_iommu_read_and_clear_dirty, >> }; >> >> +#define AMD_VIOMMU_EFR_GUEST_TRANSLATION_FLAGS \ >> + (FEATURE_GT | FEATURE_GA | FEATURE_GIOSUP | \ >> + FEATURE_PPR | FEATURE_EPHSUP) >> + >> +static void _build_efr_guest_translation(struct amd_iommu *iommu, u64 *efr, u64 *efr2) >> +{ >> + /* >> + * Build the EFR against the current hardware capabilities >> + * >> + * Also, not all IOMMU features are emulated by KVM. >> + * Therefore, only advertise what KVM can support >> + * or virtualzied by the hardware. >> + */ > > What does KVM have to do with iommu features on AMD architecture?? Sorry, I meant to say hypervisor. >> + if (!efr) >> + return; >> + >> + *efr |= (amd_iommu_efr & AMD_VIOMMU_EFR_GUEST_TRANSLATION_FLAGS); >> + *efr |= (FIELD_GET(FEATURE_GATS, amd_iommu_efr) << FEATURE_GATS_SHIFT); >> + *efr |= (FIELD_GET(FEATURE_GLX, amd_iommu_efr) << FEATURE_GLX_SHIFT); >> + *efr |= (FIELD_GET(FEATURE_PASMAX, amd_iommu_efr) << FEATURE_PASMAX_SHIFT); >> + pr_debug("%s: efr=%#llx\n", __func__, *efr); >> +} > > I'm not sure all this masking is a good idea, how do you intend to > handshake upgrades when more features are supported if masking is > present? > > Nothing sets efr2? We are planning to extend this function to advertise additional flags in EFR/EFR2 once we add guest support for the corresponded features in the hypervisor in the future. >> +/** >> + * struct iommu_hw_info_amd - AMD IOMMU device info >> + * >> + * @efr : Value of AMD IOMMU Extended Feature Register (EFR) >> + * @efr2: Value of AMD IOMMU Extended Feature 2 Register (EFR2) >> + * >> + * Please See description of these registers in the following sections of >> + * the AMD I/O Virtualization Technology (IOMMU) Specification. >> + * (https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/specifications/48882_IOMMU.pdf) >> + * >> + * - MMIO Offset 0030h IOMMU Extended Feature Register >> + * - MMIO Offset 01A0h IOMMU Extended Feature 2 Register >> + */ > > Need to document the masking and explain what the forwards/backwards > compatible strategy is here. Sure, I can add a note that certain EFR/EFR2 are masked based on the available support in the hypervisor. > I think you should probably just pass the raw HW value through and > require the VMM to figure out what bits it needs based on feature > flags elsewhere. The problem is some of the features are virtualized by hardware, which needs enabling from the Linux AMD IOMMU driver. We cannot just provide all flags since VMM would not know if the kernel has the support enabled. Some of these bits are also propagated to guest AMD IOMMU driver. If we don't mask them, the guest would detect and try to enable unsupported features, which would cause failure. Thanks, Suravee
On Tue, Aug 26, 2025 at 12:36:23PM -0500, Suthikulpanit, Suravee wrote: > > I think you should probably just pass the raw HW value through and > > require the VMM to figure out what bits it needs based on feature > > flags elsewhere. > > The problem is some of the features are virtualized by hardware, which needs > enabling from the Linux AMD IOMMU driver. We cannot just provide all flags > since VMM would not know if the kernel has the support enabled. The VMM is not supposed to forward these flags as-is! It is sort of some kind of maximum what the underlying HW can support. If you forward as-is then the VMM will forward broken flags it doesn't support when the kernel gets updated, that isn't OK. Each and every feature the VMM wants to show in the EFR has to figured out on its own if it can be supported based on other kernel features. The utility of the get_info return is for HW features that don't require any special kernel enablement. This is all the same as ARM which is working this way, I don't think there is a reason to deviate here, it just gets confusing and opens up paths for bugs. Pass the real values from HW, write a comment similar to ARM that says these are raw HW values and the VMM must generate its own EFR not copy blindly from here. Jason
On 8/26/2025 12:58 PM, Jason Gunthorpe wrote: > On Tue, Aug 26, 2025 at 12:36:23PM -0500, Suthikulpanit, Suravee wrote: >>> I think you should probably just pass the raw HW value through and >>> require the VMM to figure out what bits it needs based on feature >>> flags elsewhere. >> >> The problem is some of the features are virtualized by hardware, which needs >> enabling from the Linux AMD IOMMU driver. We cannot just provide all flags >> since VMM would not know if the kernel has the support enabled. > > The VMM is not supposed to forward these flags as-is! It is sort of > some kind of maximum what the underlying HW can support. > > If you forward as-is then the VMM will forward broken flags it doesn't > support when the kernel gets updated, that isn't OK. I got this part. That's why we mask out unsupported feature flags before returning the EFR/EFR2 to the VMM. > Each and every feature the VMM wants to show in the EFR has to figured > out on its own if it can be supported based on other kernel features. > > The utility of the get_info return is for HW features that don't > require any special kernel enablement. Not sure if I got this part. Are you referring to the struct vfio_iommu_type1_info and vfio_iommu_type1_get_info()? Suravee
On Tue, Aug 26, 2025 at 01:43:59PM -0500, Suthikulpanit, Suravee wrote: > > > On 8/26/2025 12:58 PM, Jason Gunthorpe wrote: > > On Tue, Aug 26, 2025 at 12:36:23PM -0500, Suthikulpanit, Suravee wrote: > > > > I think you should probably just pass the raw HW value through and > > > > require the VMM to figure out what bits it needs based on feature > > > > flags elsewhere. > > > > > > The problem is some of the features are virtualized by hardware, which needs > > > enabling from the Linux AMD IOMMU driver. We cannot just provide all flags > > > since VMM would not know if the kernel has the support enabled. > > > > The VMM is not supposed to forward these flags as-is! It is sort of > > some kind of maximum what the underlying HW can support. > > > > If you forward as-is then the VMM will forward broken flags it doesn't > > support when the kernel gets updated, that isn't OK. > > I got this part. That's why we mask out unsupported feature flags before > returning the EFR/EFR2 to the VMM. The kernel can't do anything on behalf of the VMM, it doesn't know what the VMM even supports emulating. The VMM alone is responsible to build the efr/efr2 values. The VMM may choose to copy only some bits from the kernel, but only if it knows it can support whatever it is copying. > > Each and every feature the VMM wants to show in the EFR has to figured > > out on its own if it can be supported based on other kernel features. > > > > The utility of the get_info return is for HW features that don't > > require any special kernel enablement. > > Not sure if I got this part. Are you referring to the struct > vfio_iommu_type1_info and vfio_iommu_type1_get_info()? Sorry hw_info. Jason
© 2016 - 2025 Red Hat, Inc.