From: Yi Liu <yi.l.liu@intel.com>
This adds stage-1 page table walking to support stage-1 only
transltion in scalable modern mode.
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu_internal.h | 17 +++++
hw/i386/intel_iommu.c | 128 +++++++++++++++++++++++++++++++--
2 files changed, 141 insertions(+), 4 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 0e240d6d54..abfdbd5f65 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -534,6 +534,23 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_SM_PASID_ENTRY_AW 7ULL /* Adjusted guest-address-width */
#define VTD_SM_PASID_ENTRY_DID(val) ((val) & VTD_DOMAIN_ID_MASK)
+#define VTD_SM_PASID_ENTRY_FLPM 3ULL
+#define VTD_SM_PASID_ENTRY_FLPTPTR (~0xfffULL)
+
+/* Paging Structure common */
+#define VTD_FL_PT_PAGE_SIZE_MASK (1ULL << 7)
+/* Bits to decide the offset for each level */
+#define VTD_FL_LEVEL_BITS 9
+
+/* First Level Paging Structure */
+#define VTD_FL_PT_LEVEL 1
+#define VTD_FL_PT_ENTRY_NR 512
+
+/* Masks for First Level Paging Entry */
+#define VTD_FL_RW_MASK (1ULL << 1)
+#define VTD_FL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw))
+#define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */
+
/* Second Level Page Translation Pointer*/
#define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 544e8f0e40..cf29809bc1 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -50,6 +50,8 @@
/* pe operations */
#define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT)
#define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW))
+#define VTD_PE_GET_FLPT_LEVEL(pe) \
+ (4 + (((pe)->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM))
/*
* PCI bus number (or SID) is not reliable since the device is usaully
@@ -823,6 +825,11 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
return -VTD_FR_PASID_TABLE_ENTRY_INV;
}
+ if (pgtt == VTD_SM_PASID_ENTRY_FLT &&
+ VTD_PE_GET_FLPT_LEVEL(pe) != 4) {
+ return -VTD_FR_PASID_TABLE_ENTRY_INV;
+ }
+
return 0;
}
@@ -958,7 +965,11 @@ static uint32_t vtd_get_iova_level(IntelIOMMUState *s,
if (s->root_scalable) {
vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
- return VTD_PE_GET_LEVEL(&pe);
+ if (s->scalable_modern) {
+ return VTD_PE_GET_FLPT_LEVEL(&pe);
+ } else {
+ return VTD_PE_GET_LEVEL(&pe);
+ }
}
return vtd_ce_get_level(ce);
@@ -1045,7 +1056,11 @@ static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s,
if (s->root_scalable) {
vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
- return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR;
+ if (s->scalable_modern) {
+ return pe.val[2] & VTD_SM_PASID_ENTRY_FLPTPTR;
+ } else {
+ return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR;
+ }
}
return vtd_ce_get_slpt_base(ce);
@@ -1847,6 +1862,106 @@ out:
trace_vtd_pt_enable_fast_path(source_id, success);
}
+/* The shift of an addr for a certain level of paging structure */
+static inline uint32_t vtd_flpt_level_shift(uint32_t level)
+{
+ assert(level != 0);
+ return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_FL_LEVEL_BITS;
+}
+
+/*
+ * Given an iova and the level of paging structure, return the offset
+ * of current level.
+ */
+static inline uint32_t vtd_iova_fl_level_offset(uint64_t iova, uint32_t level)
+{
+ return (iova >> vtd_flpt_level_shift(level)) &
+ ((1ULL << VTD_FL_LEVEL_BITS) - 1);
+}
+
+/* Get the content of a flpte located in @base_addr[@index] */
+static uint64_t vtd_get_flpte(dma_addr_t base_addr, uint32_t index)
+{
+ uint64_t flpte;
+
+ assert(index < VTD_FL_PT_ENTRY_NR);
+
+ if (dma_memory_read(&address_space_memory,
+ base_addr + index * sizeof(flpte), &flpte,
+ sizeof(flpte), MEMTXATTRS_UNSPECIFIED)) {
+ flpte = (uint64_t)-1;
+ return flpte;
+ }
+ flpte = le64_to_cpu(flpte);
+ return flpte;
+}
+
+static inline bool vtd_flpte_present(uint64_t flpte)
+{
+ return !!(flpte & 0x1);
+}
+
+/* Whether the pte indicates the address of the page frame */
+static inline bool vtd_is_last_flpte(uint64_t flpte, uint32_t level)
+{
+ return level == VTD_FL_PT_LEVEL || (flpte & VTD_FL_PT_PAGE_SIZE_MASK);
+}
+
+static inline uint64_t vtd_get_flpte_addr(uint64_t flpte, uint8_t aw)
+{
+ return flpte & VTD_FL_PT_BASE_ADDR_MASK(aw);
+}
+
+/*
+ * Given the @iova, get relevant @flptep. @flpte_level will be the last level
+ * of the translation, can be used for deciding the size of large page.
+ */
+static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce,
+ uint64_t iova, bool is_write,
+ uint64_t *flptep, uint32_t *flpte_level,
+ bool *reads, bool *writes, uint8_t aw_bits,
+ uint32_t pasid)
+{
+ dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
+ uint32_t level = vtd_get_iova_level(s, ce, pasid);
+ uint32_t offset;
+ uint64_t flpte;
+
+ while (true) {
+ offset = vtd_iova_fl_level_offset(iova, level);
+ flpte = vtd_get_flpte(addr, offset);
+ if (flpte == (uint64_t)-1) {
+ if (level == vtd_get_iova_level(s, ce, pasid)) {
+ /* Invalid programming of context-entry */
+ return -VTD_FR_CONTEXT_ENTRY_INV;
+ } else {
+ return -VTD_FR_PAGING_ENTRY_INV;
+ }
+ }
+
+ if (!vtd_flpte_present(flpte)) {
+ *reads = false;
+ *writes = false;
+ return -VTD_FR_PAGING_ENTRY_INV;
+ }
+
+ *reads = true;
+ *writes = (*writes) && (flpte & VTD_FL_RW_MASK);
+ if (is_write && !(flpte & VTD_FL_RW_MASK)) {
+ return -VTD_FR_WRITE;
+ }
+
+ if (vtd_is_last_flpte(flpte, level)) {
+ *flptep = flpte;
+ *flpte_level = level;
+ return 0;
+ }
+
+ addr = vtd_get_flpte_addr(flpte, aw_bits);
+ level--;
+ }
+}
+
static void vtd_report_fault(IntelIOMMUState *s,
int err, bool is_fpd_set,
uint16_t source_id,
@@ -1995,8 +2110,13 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
}
}
- ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level,
- &reads, &writes, s->aw_bits, pasid);
+ if (s->scalable_modern) {
+ ret_fr = vtd_iova_to_flpte(s, &ce, addr, is_write, &pte, &level,
+ &reads, &writes, s->aw_bits, pasid);
+ } else {
+ ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level,
+ &reads, &writes, s->aw_bits, pasid);
+ }
if (ret_fr) {
vtd_report_fault(s, -ret_fr, is_fpd_set, source_id,
--
2.34.1
Hi Zhenzhong,
I already sent you my comments about this patch earlier (question about
checking pgtt) but here is a style review
On 22/05/2024 08:23, Zhenzhong Duan wrote:
> Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
>
>
> From: Yi Liu <yi.l.liu@intel.com>
>
> This adds stage-1 page table walking to support stage-1 only
> transltion in scalable modern mode.
>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> hw/i386/intel_iommu_internal.h | 17 +++++
> hw/i386/intel_iommu.c | 128 +++++++++++++++++++++++++++++++--
> 2 files changed, 141 insertions(+), 4 deletions(-)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 0e240d6d54..abfdbd5f65 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -534,6 +534,23 @@ typedef struct VTDRootEntry VTDRootEntry;
> #define VTD_SM_PASID_ENTRY_AW 7ULL /* Adjusted guest-address-width */
> #define VTD_SM_PASID_ENTRY_DID(val) ((val) & VTD_DOMAIN_ID_MASK)
>
> +#define VTD_SM_PASID_ENTRY_FLPM 3ULL
> +#define VTD_SM_PASID_ENTRY_FLPTPTR (~0xfffULL)
> +
> +/* Paging Structure common */
> +#define VTD_FL_PT_PAGE_SIZE_MASK (1ULL << 7)
> +/* Bits to decide the offset for each level */
> +#define VTD_FL_LEVEL_BITS 9
> +
> +/* First Level Paging Structure */
> +#define VTD_FL_PT_LEVEL 1
> +#define VTD_FL_PT_ENTRY_NR 512
> +
> +/* Masks for First Level Paging Entry */
> +#define VTD_FL_RW_MASK (1ULL << 1)
> +#define VTD_FL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw))
> +#define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */
> +
> /* Second Level Page Translation Pointer*/
> #define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL)
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 544e8f0e40..cf29809bc1 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -50,6 +50,8 @@
> /* pe operations */
> #define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT)
> #define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW))
> +#define VTD_PE_GET_FLPT_LEVEL(pe) \
> + (4 + (((pe)->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM))
>
> /*
> * PCI bus number (or SID) is not reliable since the device is usaully
> @@ -823,6 +825,11 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
> return -VTD_FR_PASID_TABLE_ENTRY_INV;
> }
>
> + if (pgtt == VTD_SM_PASID_ENTRY_FLT &&
> + VTD_PE_GET_FLPT_LEVEL(pe) != 4) {
Maybe you could add a function to check if the level is supported.
And it would also be nice to rename vtd_is_level_supported (used just
above these lines) to make it clear that it's only relevant for second
level translations and avoid mistakes
> + return -VTD_FR_PASID_TABLE_ENTRY_INV;
> + }
> +
> return 0;
> }
>
> @@ -958,7 +965,11 @@ static uint32_t vtd_get_iova_level(IntelIOMMUState *s,
>
> if (s->root_scalable) {
> vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
> - return VTD_PE_GET_LEVEL(&pe);
> + if (s->scalable_modern) {
> + return VTD_PE_GET_FLPT_LEVEL(&pe);
> + } else {
> + return VTD_PE_GET_LEVEL(&pe);
same, could be renamed
> + }
> }
>
> return vtd_ce_get_level(ce);
> @@ -1045,7 +1056,11 @@ static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s,
>
> if (s->root_scalable) {
> vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
> - return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR;
> + if (s->scalable_modern) {
> + return pe.val[2] & VTD_SM_PASID_ENTRY_FLPTPTR;
> + } else {
> + return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR;
> + }
> }
>
> return vtd_ce_get_slpt_base(ce);
> @@ -1847,6 +1862,106 @@ out:
> trace_vtd_pt_enable_fast_path(source_id, success);
> }
>
> +/* The shift of an addr for a certain level of paging structure */
> +static inline uint32_t vtd_flpt_level_shift(uint32_t level)
> +{
> + assert(level != 0);
> + return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_FL_LEVEL_BITS;
> +}
> +
> +/*
> + * Given an iova and the level of paging structure, return the offset
> + * of current level.
> + */
> +static inline uint32_t vtd_iova_fl_level_offset(uint64_t iova, uint32_t level)
> +{
> + return (iova >> vtd_flpt_level_shift(level)) &
> + ((1ULL << VTD_FL_LEVEL_BITS) - 1);
> +}
> +
> +/* Get the content of a flpte located in @base_addr[@index] */
> +static uint64_t vtd_get_flpte(dma_addr_t base_addr, uint32_t index)
> +{
> + uint64_t flpte;
> +
> + assert(index < VTD_FL_PT_ENTRY_NR);
> +
> + if (dma_memory_read(&address_space_memory,
> + base_addr + index * sizeof(flpte), &flpte,
> + sizeof(flpte), MEMTXATTRS_UNSPECIFIED)) {
> + flpte = (uint64_t)-1;
> + return flpte;
> + }
> + flpte = le64_to_cpu(flpte);
> + return flpte;
> +}
> +
> +static inline bool vtd_flpte_present(uint64_t flpte)
> +{
> + return !!(flpte & 0x1);
Shouldn't we use a #define instead of hardcoding the mask?
> +}
> +
> +/* Whether the pte indicates the address of the page frame */
> +static inline bool vtd_is_last_flpte(uint64_t flpte, uint32_t level)
> +{
> + return level == VTD_FL_PT_LEVEL || (flpte & VTD_FL_PT_PAGE_SIZE_MASK);
> +}
> +
> +static inline uint64_t vtd_get_flpte_addr(uint64_t flpte, uint8_t aw)
> +{
> + return flpte & VTD_FL_PT_BASE_ADDR_MASK(aw);
> +}
> +
> +/*
> + * Given the @iova, get relevant @flptep. @flpte_level will be the last level
> + * of the translation, can be used for deciding the size of large page.
> + */
> +static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce,
> + uint64_t iova, bool is_write,
> + uint64_t *flptep, uint32_t *flpte_level,
> + bool *reads, bool *writes, uint8_t aw_bits,
> + uint32_t pasid)
> +{
> + dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
> + uint32_t level = vtd_get_iova_level(s, ce, pasid);
> + uint32_t offset;
> + uint64_t flpte;
> +
> + while (true) {
> + offset = vtd_iova_fl_level_offset(iova, level);
> + flpte = vtd_get_flpte(addr, offset);
> + if (flpte == (uint64_t)-1) {
> + if (level == vtd_get_iova_level(s, ce, pasid)) {
> + /* Invalid programming of context-entry */
> + return -VTD_FR_CONTEXT_ENTRY_INV;
> + } else {
> + return -VTD_FR_PAGING_ENTRY_INV;
> + }
> + }
> +
> + if (!vtd_flpte_present(flpte)) {
> + *reads = false;
> + *writes = false;
> + return -VTD_FR_PAGING_ENTRY_INV;
> + }
> +
> + *reads = true;
> + *writes = (*writes) && (flpte & VTD_FL_RW_MASK);
> + if (is_write && !(flpte & VTD_FL_RW_MASK)) {
> + return -VTD_FR_WRITE;
> + }
> +
> + if (vtd_is_last_flpte(flpte, level)) {
> + *flptep = flpte;
> + *flpte_level = level;
> + return 0;
> + }
> +
> + addr = vtd_get_flpte_addr(flpte, aw_bits);
> + level--;
> + }
> +}
> +
> static void vtd_report_fault(IntelIOMMUState *s,
> int err, bool is_fpd_set,
> uint16_t source_id,
> @@ -1995,8 +2110,13 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
> }
> }
>
> - ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level,
> - &reads, &writes, s->aw_bits, pasid);
> + if (s->scalable_modern) {
> + ret_fr = vtd_iova_to_flpte(s, &ce, addr, is_write, &pte, &level,
> + &reads, &writes, s->aw_bits, pasid);
> + } else {
> + ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level,
> + &reads, &writes, s->aw_bits, pasid);
> + }
>
> if (ret_fr) {
> vtd_report_fault(s, -ret_fr, is_fpd_set, source_id,
> --
> 2.34.1
>
#cmd
>-----Original Message-----
>From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@eviden.com>
>Subject: Re: [PATCH rfcv2 06/17] intel_iommu: Implement stage-1
>translation
>
>Hi Zhenzhong,
>
>I already sent you my comments about this patch earlier (question about
>checking pgtt) but here is a style review
>
>On 22/05/2024 08:23, Zhenzhong Duan wrote:
>> Caution: External email. Do not open attachments or click links, unless this
>email comes from a known sender and you know the content is safe.
>>
>>
>> From: Yi Liu <yi.l.liu@intel.com>
>>
>> This adds stage-1 page table walking to support stage-1 only
>> transltion in scalable modern mode.
>>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> hw/i386/intel_iommu_internal.h | 17 +++++
>> hw/i386/intel_iommu.c | 128
>+++++++++++++++++++++++++++++++--
>> 2 files changed, 141 insertions(+), 4 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h
>b/hw/i386/intel_iommu_internal.h
>> index 0e240d6d54..abfdbd5f65 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -534,6 +534,23 @@ typedef struct VTDRootEntry VTDRootEntry;
>> #define VTD_SM_PASID_ENTRY_AW 7ULL /* Adjusted guest-
>address-width */
>> #define VTD_SM_PASID_ENTRY_DID(val) ((val) &
>VTD_DOMAIN_ID_MASK)
>>
>> +#define VTD_SM_PASID_ENTRY_FLPM 3ULL
>> +#define VTD_SM_PASID_ENTRY_FLPTPTR (~0xfffULL)
>> +
>> +/* Paging Structure common */
>> +#define VTD_FL_PT_PAGE_SIZE_MASK (1ULL << 7)
>> +/* Bits to decide the offset for each level */
>> +#define VTD_FL_LEVEL_BITS 9
>> +
>> +/* First Level Paging Structure */
>> +#define VTD_FL_PT_LEVEL 1
>> +#define VTD_FL_PT_ENTRY_NR 512
>> +
>> +/* Masks for First Level Paging Entry */
>> +#define VTD_FL_RW_MASK (1ULL << 1)
>> +#define VTD_FL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) &
>VTD_HAW_MASK(aw))
>> +#define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing
>Disable */
>> +
>> /* Second Level Page Translation Pointer*/
>> #define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL)
>>
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 544e8f0e40..cf29809bc1 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -50,6 +50,8 @@
>> /* pe operations */
>> #define VTD_PE_GET_TYPE(pe) ((pe)->val[0] &
>VTD_SM_PASID_ENTRY_PGTT)
>> #define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) &
>VTD_SM_PASID_ENTRY_AW))
>> +#define VTD_PE_GET_FLPT_LEVEL(pe) \
>> + (4 + (((pe)->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM))
>>
>> /*
>> * PCI bus number (or SID) is not reliable since the device is usaully
>> @@ -823,6 +825,11 @@ static int
>vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>> return -VTD_FR_PASID_TABLE_ENTRY_INV;
>> }
>>
>> + if (pgtt == VTD_SM_PASID_ENTRY_FLT &&
>> + VTD_PE_GET_FLPT_LEVEL(pe) != 4) {
>Maybe you could add a function to check if the level is supported.
>And it would also be nice to rename vtd_is_level_supported (used just
>above these lines) to make it clear that it's only relevant for second
>level translations and avoid mistakes
Sure, will do.
Thanks
Zhenzhong
>> + return -VTD_FR_PASID_TABLE_ENTRY_INV;
>> + }
>> +
>> return 0;
>> }
>>
>> @@ -958,7 +965,11 @@ static uint32_t
>vtd_get_iova_level(IntelIOMMUState *s,
>>
>> if (s->root_scalable) {
>> vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
>> - return VTD_PE_GET_LEVEL(&pe);
>> + if (s->scalable_modern) {
>> + return VTD_PE_GET_FLPT_LEVEL(&pe);
>> + } else {
>> + return VTD_PE_GET_LEVEL(&pe);
>same, could be renamed
>> + }
>> }
>>
>> return vtd_ce_get_level(ce);
>> @@ -1045,7 +1056,11 @@ static dma_addr_t
>vtd_get_iova_pgtbl_base(IntelIOMMUState *s,
>>
>> if (s->root_scalable) {
>> vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
>> - return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR;
>> + if (s->scalable_modern) {
>> + return pe.val[2] & VTD_SM_PASID_ENTRY_FLPTPTR;
>> + } else {
>> + return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR;
>> + }
>> }
>>
>> return vtd_ce_get_slpt_base(ce);
>> @@ -1847,6 +1862,106 @@ out:
>> trace_vtd_pt_enable_fast_path(source_id, success);
>> }
>>
>> +/* The shift of an addr for a certain level of paging structure */
>> +static inline uint32_t vtd_flpt_level_shift(uint32_t level)
>> +{
>> + assert(level != 0);
>> + return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_FL_LEVEL_BITS;
>> +}
>> +
>> +/*
>> + * Given an iova and the level of paging structure, return the offset
>> + * of current level.
>> + */
>> +static inline uint32_t vtd_iova_fl_level_offset(uint64_t iova, uint32_t
>level)
>> +{
>> + return (iova >> vtd_flpt_level_shift(level)) &
>> + ((1ULL << VTD_FL_LEVEL_BITS) - 1);
>> +}
>> +
>> +/* Get the content of a flpte located in @base_addr[@index] */
>> +static uint64_t vtd_get_flpte(dma_addr_t base_addr, uint32_t index)
>> +{
>> + uint64_t flpte;
>> +
>> + assert(index < VTD_FL_PT_ENTRY_NR);
>> +
>> + if (dma_memory_read(&address_space_memory,
>> + base_addr + index * sizeof(flpte), &flpte,
>> + sizeof(flpte), MEMTXATTRS_UNSPECIFIED)) {
>> + flpte = (uint64_t)-1;
>> + return flpte;
>> + }
>> + flpte = le64_to_cpu(flpte);
>> + return flpte;
>> +}
>> +
>> +static inline bool vtd_flpte_present(uint64_t flpte)
>> +{
>> + return !!(flpte & 0x1);
>Shouldn't we use a #define instead of hardcoding the mask?
>> +}
>> +
>> +/* Whether the pte indicates the address of the page frame */
>> +static inline bool vtd_is_last_flpte(uint64_t flpte, uint32_t level)
>> +{
>> + return level == VTD_FL_PT_LEVEL || (flpte &
>VTD_FL_PT_PAGE_SIZE_MASK);
>> +}
>> +
>> +static inline uint64_t vtd_get_flpte_addr(uint64_t flpte, uint8_t aw)
>> +{
>> + return flpte & VTD_FL_PT_BASE_ADDR_MASK(aw);
>> +}
>> +
>> +/*
>> + * Given the @iova, get relevant @flptep. @flpte_level will be the last
>level
>> + * of the translation, can be used for deciding the size of large page.
>> + */
>> +static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce,
>> + uint64_t iova, bool is_write,
>> + uint64_t *flptep, uint32_t *flpte_level,
>> + bool *reads, bool *writes, uint8_t aw_bits,
>> + uint32_t pasid)
>> +{
>> + dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
>> + uint32_t level = vtd_get_iova_level(s, ce, pasid);
>> + uint32_t offset;
>> + uint64_t flpte;
>> +
>> + while (true) {
>> + offset = vtd_iova_fl_level_offset(iova, level);
>> + flpte = vtd_get_flpte(addr, offset);
>> + if (flpte == (uint64_t)-1) {
>> + if (level == vtd_get_iova_level(s, ce, pasid)) {
>> + /* Invalid programming of context-entry */
>> + return -VTD_FR_CONTEXT_ENTRY_INV;
>> + } else {
>> + return -VTD_FR_PAGING_ENTRY_INV;
>> + }
>> + }
>> +
>> + if (!vtd_flpte_present(flpte)) {
>> + *reads = false;
>> + *writes = false;
>> + return -VTD_FR_PAGING_ENTRY_INV;
>> + }
>> +
>> + *reads = true;
>> + *writes = (*writes) && (flpte & VTD_FL_RW_MASK);
>> + if (is_write && !(flpte & VTD_FL_RW_MASK)) {
>> + return -VTD_FR_WRITE;
>> + }
>> +
>> + if (vtd_is_last_flpte(flpte, level)) {
>> + *flptep = flpte;
>> + *flpte_level = level;
>> + return 0;
>> + }
>> +
>> + addr = vtd_get_flpte_addr(flpte, aw_bits);
>> + level--;
>> + }
>> +}
>> +
>> static void vtd_report_fault(IntelIOMMUState *s,
>> int err, bool is_fpd_set,
>> uint16_t source_id,
>> @@ -1995,8 +2110,13 @@ static bool
>vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>> }
>> }
>>
>> - ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level,
>> - &reads, &writes, s->aw_bits, pasid);
>> + if (s->scalable_modern) {
>> + ret_fr = vtd_iova_to_flpte(s, &ce, addr, is_write, &pte, &level,
>> + &reads, &writes, s->aw_bits, pasid);
>> + } else {
>> + ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level,
>> + &reads, &writes, s->aw_bits, pasid);
>> + }
>>
>> if (ret_fr) {
>> vtd_report_fault(s, -ret_fr, is_fpd_set, source_id,
>> --
>> 2.34.1
>>
>#cmd
© 2016 - 2026 Red Hat, Inc.