In preparation for adding stage-2 support. Add Stage-2 PTW code.
Only Aarch64 fromat is supported as stage-1.
Max 48 bits IPA is supported.
Nesting stage-1 and stage-2 is not supported right now.
Signed-off-by: Mostafa Saleh <smostafa@google.com>
---
hw/arm/smmu-common.c | 112 ++++++++++++++++++++++++++++++++---
hw/arm/smmu-internal.h | 37 ++++++++++++
include/hw/arm/smmu-common.h | 1 +
3 files changed, 143 insertions(+), 7 deletions(-)
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 4fcbffa2f1..df0d1dc024 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -362,6 +362,99 @@ error:
return -EINVAL;
}
+/**
+ * smmu_ptw_64_s2 - VMSAv8-64 Walk of the page tables for a given IOVA
+ * for stage-2.
+ * @cfg: translation config
+ * @iova: iova to translate
+ * @perm: access type
+ * @tlbe: SMMUTLBEntry (out)
+ * @info: handle to an error info
+ *
+ * Return 0 on success, < 0 on error. In case of error, @info is filled
+ * and tlbe->perm is set to IOMMU_NONE.
+ * Upon success, @tlbe is filled with translated_addr and entry
+ * permission rights.
+ */
+
+static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
+ dma_addr_t iova, IOMMUAccessFlags perm,
+ SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
+{
+ const int stage = 2;
+ int granule_sz = cfg->s2cfg.granule_sz;
+ /* ARM ARM: Table D8-7. */
+ int inputsize = 64 - cfg->s2cfg.tsz;
+ int level = get_start_level(cfg->s2cfg.sl0, granule_sz);
+ int stride = granule_sz - 3;
+ int idx = pgd_idx(level, granule_sz, iova);
+ /*
+ * Get the ttb from concatenated structure.
+ * The offset is the idx * size of each ttb(number of ptes * (sizeof(pte))
+ */
+ uint64_t baseaddr = extract64(cfg->s2cfg.vttb, 0, 48) + (1 << stride) *
+ idx * sizeof(uint64_t);
+ dma_addr_t indexmask = (1ULL << (inputsize - (stride * (4 - level)))) - 1;
+
+ baseaddr &= ~indexmask;
+
+ while (level < SMMU_MAX_LEVELS) {
+ uint64_t subpage_size = 1ULL << level_shift(level, granule_sz);
+ uint64_t mask = subpage_size - 1;
+ uint32_t offset = iova_level_offset(iova, inputsize, level, granule_sz);
+ uint64_t pte, gpa;
+ dma_addr_t pte_addr = baseaddr + offset * sizeof(pte);
+ uint8_t ap;
+
+ if (get_pte(baseaddr, offset, &pte, info)) {
+ goto error;
+ }
+ trace_smmu_ptw_level(level, iova, subpage_size,
+ baseaddr, offset, pte);
+ if (is_invalid_pte(pte) || is_reserved_pte(pte, level)) {
+ trace_smmu_ptw_invalid_pte(stage, level, baseaddr,
+ pte_addr, offset, pte);
+ break;
+ }
+
+ if (is_table_pte(pte, level)) {
+ baseaddr = get_table_pte_address(pte, granule_sz);
+ level++;
+ continue;
+ } else if (is_page_pte(pte, level)) {
+ gpa = get_page_pte_address(pte, granule_sz);
+ trace_smmu_ptw_page_pte(stage, level, iova,
+ baseaddr, pte_addr, pte, gpa);
+ } else {
+ uint64_t block_size;
+
+ gpa = get_block_pte_address(pte, level, granule_sz,
+ &block_size);
+ trace_smmu_ptw_block_pte(stage, level, baseaddr,
+ pte_addr, pte, iova, gpa,
+ block_size >> 20);
+ }
+ ap = PTE_AP(pte);
+ if (is_permission_fault_s2(ap, perm)) {
+ info->type = SMMU_PTW_ERR_PERMISSION;
+ goto error;
+ }
+
+ tlbe->entry.translated_addr = gpa;
+ tlbe->entry.iova = iova & ~mask;
+ tlbe->entry.addr_mask = mask;
+ tlbe->entry.perm = ap;
+ tlbe->level = level;
+ tlbe->granule = granule_sz;
+ return 0;
+ }
+ info->type = SMMU_PTW_ERR_TRANSLATION;
+
+error:
+ tlbe->entry.perm = IOMMU_NONE;
+ return -EINVAL;
+}
+
/**
* smmu_ptw - Walk the page tables for an IOVA, according to @cfg
*
@@ -376,15 +469,20 @@ error:
int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
{
- if (!cfg->aa64) {
- /*
- * This code path is not entered as we check this while decoding
- * the configuration data in the derived SMMU model.
- */
- g_assert_not_reached();
+ if (cfg->stage == 1) {
+ if (!cfg->aa64) {
+ /*
+ * This code path is not entered as we check this while decoding
+ * the configuration data in the derived SMMU model.
+ */
+ g_assert_not_reached();
+ }
+ return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info);
+ } else if (cfg->stage == 2) {
+ return smmu_ptw_64_s2(cfg, iova, perm, tlbe, info);
}
- return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info);
+ g_assert_not_reached();
}
/**
diff --git a/hw/arm/smmu-internal.h b/hw/arm/smmu-internal.h
index 2d75b31953..b02c05319f 100644
--- a/hw/arm/smmu-internal.h
+++ b/hw/arm/smmu-internal.h
@@ -73,6 +73,9 @@
#define is_permission_fault(ap, perm) \
(((perm) & IOMMU_WO) && ((ap) & 0x2))
+#define is_permission_fault_s2(ap, perm) \
+ (!((ap & perm) == perm))
+
#define PTE_AP_TO_PERM(ap) \
(IOMMU_ACCESS_FLAG(true, !((ap) & 0x2)))
@@ -96,6 +99,40 @@ uint64_t iova_level_offset(uint64_t iova, int inputsize,
MAKE_64BIT_MASK(0, gsz - 3);
}
+#define SMMU_MAX_S2_CONCAT 16
+
+/*
+ * Relies on correctness of gran and sl0 from caller.
+ * FEAT_LPA2 and FEAT_TTST are not implemented.
+ */
+static inline int get_start_level(int sl0 , int gran)
+{
+ /* ARM ARM: Table D8-12. */
+ if (gran == 12) {
+ return 2 - sl0;
+ }
+ /* ARM ARM: Table D8-22 and Table D8-31. */
+ return 3 - sl0;
+}
+
+/*
+ * Index in a concatenated first level stage-2 page table.
+ * ARM ARM: D8.2.2 Concatenated translation tables.
+ */
+static inline int pgd_idx(int start_level, int granule, dma_addr_t iova)
+{
+ uint64_t ret;
+ /*
+ * Get the number of bits handled by next levels, then any extra bits in
+ * the address should index the concatenated tables. This relation can
+ * deduced from tables in ARM ARM: D8.2.7-9
+ */
+ int shift = (SMMU_MAX_LEVELS - start_level) * (granule - 3) + granule;
+
+ ret = iova >> shift;
+ return ret;
+}
+
#define SMMU_IOTLB_ASID(key) ((key).asid)
typedef struct SMMUIOTLBPageInvInfo {
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index 45f74d0e93..1e666e8b6d 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -28,6 +28,7 @@
#define SMMU_PCI_DEVFN(sid) (sid & 0xFF)
#define SMMU_MAX_VA_BITS 48
+#define SMMU_MAX_LEVELS 4
/*
* Page table walk error types
--
2.39.1.519.gcb327c4b5f-goog
Hi Mostafa,
On 2/5/23 10:44, Mostafa Saleh wrote:
> In preparation for adding stage-2 support. Add Stage-2 PTW code.
> Only Aarch64 fromat is supported as stage-1.
format
> Max 48 bits IPA is supported.
>
> Nesting stage-1 and stage-2 is not supported right now.
>
> Signed-off-by: Mostafa Saleh <smostafa@google.com>
> ---
> hw/arm/smmu-common.c | 112 ++++++++++++++++++++++++++++++++---
> hw/arm/smmu-internal.h | 37 ++++++++++++
> include/hw/arm/smmu-common.h | 1 +
> 3 files changed, 143 insertions(+), 7 deletions(-)
>
> diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> index 4fcbffa2f1..df0d1dc024 100644
> --- a/hw/arm/smmu-common.c
> +++ b/hw/arm/smmu-common.c
> @@ -362,6 +362,99 @@ error:
> return -EINVAL;
> }
>
> +/**
> + * smmu_ptw_64_s2 - VMSAv8-64 Walk of the page tables for a given IOVA
> + * for stage-2.
> + * @cfg: translation config
> + * @iova: iova to translate
> + * @perm: access type
> + * @tlbe: SMMUTLBEntry (out)
> + * @info: handle to an error info
> + *
> + * Return 0 on success, < 0 on error. In case of error, @info is filled
> + * and tlbe->perm is set to IOMMU_NONE.
> + * Upon success, @tlbe is filled with translated_addr and entry
> + * permission rights.
> + */
> +
> +static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
> + dma_addr_t iova, IOMMUAccessFlags perm,
> + SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
> +{
> + const int stage = 2;
> + int granule_sz = cfg->s2cfg.granule_sz;
> + /* ARM ARM: Table D8-7. */
> + int inputsize = 64 - cfg->s2cfg.tsz;
> + int level = get_start_level(cfg->s2cfg.sl0, granule_sz);
> + int stride = granule_sz - 3;
> + int idx = pgd_idx(level, granule_sz, iova);
> + /*
> + * Get the ttb from concatenated structure.
> + * The offset is the idx * size of each ttb(number of ptes * (sizeof(pte))
> + */
> + uint64_t baseaddr = extract64(cfg->s2cfg.vttb, 0, 48) + (1 << stride) *
> + idx * sizeof(uint64_t);
> + dma_addr_t indexmask = (1ULL << (inputsize - (stride * (4 - level)))) - 1;
> +
> + baseaddr &= ~indexmask;
> +
> + while (level < SMMU_MAX_LEVELS) {
> + uint64_t subpage_size = 1ULL << level_shift(level, granule_sz);
> + uint64_t mask = subpage_size - 1;
> + uint32_t offset = iova_level_offset(iova, inputsize, level, granule_sz);
> + uint64_t pte, gpa;
> + dma_addr_t pte_addr = baseaddr + offset * sizeof(pte);
> + uint8_t ap;
> +
> + if (get_pte(baseaddr, offset, &pte, info)) {
> + goto error;
> + }
> + trace_smmu_ptw_level(level, iova, subpage_size,
> + baseaddr, offset, pte);
I can the trace point names should be updated as well (and
differentiated between S1/S2)
> + if (is_invalid_pte(pte) || is_reserved_pte(pte, level)) {
> + trace_smmu_ptw_invalid_pte(stage, level, baseaddr,
> + pte_addr, offset, pte);
same for PTE's?
> + break;
> + }
> +
> + if (is_table_pte(pte, level)) {
> + baseaddr = get_table_pte_address(pte, granule_sz);
> + level++;
> + continue;
> + } else if (is_page_pte(pte, level)) {
> + gpa = get_page_pte_address(pte, granule_sz);
> + trace_smmu_ptw_page_pte(stage, level, iova,
> + baseaddr, pte_addr, pte, gpa);
> + } else {
> + uint64_t block_size;
> +
> + gpa = get_block_pte_address(pte, level, granule_sz,
> + &block_size);
> + trace_smmu_ptw_block_pte(stage, level, baseaddr,
> + pte_addr, pte, iova, gpa,
> + block_size >> 20);
> + }
> + ap = PTE_AP(pte);
> + if (is_permission_fault_s2(ap, perm)) {
> + info->type = SMMU_PTW_ERR_PERMISSION;
don't we have to different S1 versus S2 faults?
> + goto error;
> + }
> +
> + tlbe->entry.translated_addr = gpa;
> + tlbe->entry.iova = iova & ~mask;
> + tlbe->entry.addr_mask = mask;
> + tlbe->entry.perm = ap;
> + tlbe->level = level;
> + tlbe->granule = granule_sz;
> + return 0;
> + }
> + info->type = SMMU_PTW_ERR_TRANSLATION;
> +
> +error:
> + tlbe->entry.perm = IOMMU_NONE;
> + return -EINVAL;
> +}
> +
> /**
> * smmu_ptw - Walk the page tables for an IOVA, according to @cfg
> *
> @@ -376,15 +469,20 @@ error:
> int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
> SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
> {
> - if (!cfg->aa64) {
> - /*
> - * This code path is not entered as we check this while decoding
> - * the configuration data in the derived SMMU model.
> - */
> - g_assert_not_reached();
if that's still true for S2, maybe keep that check here upfront?
> + if (cfg->stage == 1) {
> + if (!cfg->aa64) {
> + /*
> + * This code path is not entered as we check this while decoding
> + * the configuration data in the derived SMMU model.
> + */
> + g_assert_not_reached();
> + }
> + return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info);
> + } else if (cfg->stage == 2) {
> + return smmu_ptw_64_s2(cfg, iova, perm, tlbe, info);
> }
>
> - return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info);
> + g_assert_not_reached();
> }
>
> /**
> diff --git a/hw/arm/smmu-internal.h b/hw/arm/smmu-internal.h
> index 2d75b31953..b02c05319f 100644
> --- a/hw/arm/smmu-internal.h
> +++ b/hw/arm/smmu-internal.h
> @@ -73,6 +73,9 @@
> #define is_permission_fault(ap, perm) \
> (((perm) & IOMMU_WO) && ((ap) & 0x2))
>
> +#define is_permission_fault_s2(ap, perm) \
> + (!((ap & perm) == perm))
> +
> #define PTE_AP_TO_PERM(ap) \
> (IOMMU_ACCESS_FLAG(true, !((ap) & 0x2)))
>
> @@ -96,6 +99,40 @@ uint64_t iova_level_offset(uint64_t iova, int inputsize,
> MAKE_64BIT_MASK(0, gsz - 3);
> }
>
> +#define SMMU_MAX_S2_CONCAT 16
> +
> +/*
> + * Relies on correctness of gran and sl0 from caller.
> + * FEAT_LPA2 and FEAT_TTST are not implemented.
> + */
> +static inline int get_start_level(int sl0 , int gran)
> +{
> + /* ARM ARM: Table D8-12. */
> + if (gran == 12) {
> + return 2 - sl0;
> + }
> + /* ARM ARM: Table D8-22 and Table D8-31. */
> + return 3 - sl0;
> +}
> +
> +/*
> + * Index in a concatenated first level stage-2 page table.
> + * ARM ARM: D8.2.2 Concatenated translation tables.
> + */
> +static inline int pgd_idx(int start_level, int granule, dma_addr_t iova)
> +{
> + uint64_t ret;
> + /*
> + * Get the number of bits handled by next levels, then any extra bits in
> + * the address should index the concatenated tables. This relation can
> + * deduced from tables in ARM ARM: D8.2.7-9
> + */
> + int shift = (SMMU_MAX_LEVELS - start_level) * (granule - 3) + granule;
can't we factorize anything with the S1 PTW?
indexmask = (1ULL << (inputsize - (stride * (4 - level)))) - 1;
> +
> + ret = iova >> shift;
> + return ret;
> +}
> +
> #define SMMU_IOTLB_ASID(key) ((key).asid)
>
> typedef struct SMMUIOTLBPageInvInfo {
> diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
> index 45f74d0e93..1e666e8b6d 100644
> --- a/include/hw/arm/smmu-common.h
> +++ b/include/hw/arm/smmu-common.h
> @@ -28,6 +28,7 @@
> #define SMMU_PCI_DEVFN(sid) (sid & 0xFF)
>
> #define SMMU_MAX_VA_BITS 48
> +#define SMMU_MAX_LEVELS 4
can't this be reused as well with S1 PTW?
>
> /*
> * Page table walk error types
Eric
Hi Eric,
On Wed, Feb 15, 2023 at 05:52:39PM +0100, Eric Auger wrote:
> > In preparation for adding stage-2 support. Add Stage-2 PTW code.
> > Only Aarch64 fromat is supported as stage-1.
> format
I will update it.
> > + uint64_t subpage_size = 1ULL << level_shift(level, granule_sz);
> > + uint64_t mask = subpage_size - 1;
> > + uint32_t offset = iova_level_offset(iova, inputsize, level, granule_sz);
> > + uint64_t pte, gpa;
> > + dma_addr_t pte_addr = baseaddr + offset * sizeof(pte);
> > + uint8_t ap;
> > +
> > + if (get_pte(baseaddr, offset, &pte, info)) {
> > + goto error;
> > + }
> > + trace_smmu_ptw_level(level, iova, subpage_size,
> > + baseaddr, offset, pte);
> I can the trace point names should be updated as well (and
> differentiated between S1/S2)
I was thinking we could leave those with stage argument, and only
update trace_smmu_ptw_level to have stage argument as the others.
> > + if (is_permission_fault_s2(ap, perm)) {
> > + info->type = SMMU_PTW_ERR_PERMISSION;
> don't we have to different S1 versus S2 faults?
Yes, I missed that, I see setting info->u.f_walk_eabt.s2 should be
enough, this will set the S2 field in the fault event.
> > int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
> > SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
> > {
> > - if (!cfg->aa64) {
> > - /*
> > - * This code path is not entered as we check this while decoding
> > - * the configuration data in the derived SMMU model.
> > - */
> > - g_assert_not_reached();
> if that's still true for S2, maybe keep that check here upfront?
Stage-2 is checked in STE parsing and throws BAD_STE if not aa64,
which I believe is not correct, however I think we can just call
g_assert_not_reached() during STE parsing, I don’t see added value for
saving this field in SMMUTransCfg if we don’t use it.
I am not sure why this check exists for stage-1 as it is hardcoded in
decode_cd anyway.
> > +{
> > + uint64_t ret;
> > + /*
> > + * Get the number of bits handled by next levels, then any extra bits in
> > + * the address should index the concatenated tables. This relation can
> > + * deduced from tables in ARM ARM: D8.2.7-9
> > + */
> > + int shift = (SMMU_MAX_LEVELS - start_level) * (granule - 3) + granule;
> can't we factorize anything with the S1 PTW?
> indexmask = (1ULL << (inputsize - (stride * (4 - level)))) - 1;
Yes, I think we can refactor some of these in common functions/macros, I
will do that in v2.
> > @@ -28,6 +28,7 @@
> > #define SMMU_PCI_DEVFN(sid) (sid & 0xFF)
> >
> > #define SMMU_MAX_VA_BITS 48
> > +#define SMMU_MAX_LEVELS 4
> can't this be reused as well with S1 PTW?
I believe yes, I will update it.
Thanks,
Mostafa
On 2/16/23 14:09, Mostafa Saleh wrote:
> Hi Eric,
>
> On Wed, Feb 15, 2023 at 05:52:39PM +0100, Eric Auger wrote:
>>> In preparation for adding stage-2 support. Add Stage-2 PTW code.
>>> Only Aarch64 fromat is supported as stage-1.
>> format
> I will update it.
>
>>> + uint64_t subpage_size = 1ULL << level_shift(level, granule_sz);
>>> + uint64_t mask = subpage_size - 1;
>>> + uint32_t offset = iova_level_offset(iova, inputsize, level, granule_sz);
>>> + uint64_t pte, gpa;
>>> + dma_addr_t pte_addr = baseaddr + offset * sizeof(pte);
>>> + uint8_t ap;
>>> +
>>> + if (get_pte(baseaddr, offset, &pte, info)) {
>>> + goto error;
>>> + }
>>> + trace_smmu_ptw_level(level, iova, subpage_size,
>>> + baseaddr, offset, pte);
>> I can the trace point names should be updated as well (and
>> differentiated between S1/S2)
> I was thinking we could leave those with stage argument, and only
> update trace_smmu_ptw_level to have stage argument as the others.
yes as far as the stage is properly populated that's fine.
>
>>> + if (is_permission_fault_s2(ap, perm)) {
>>> + info->type = SMMU_PTW_ERR_PERMISSION;
>> don't we have to different S1 versus S2 faults?
> Yes, I missed that, I see setting info->u.f_walk_eabt.s2 should be
> enough, this will set the S2 field in the fault event.
>
>>> int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
>>> SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
>>> {
>>> - if (!cfg->aa64) {
>>> - /*
>>> - * This code path is not entered as we check this while decoding
>>> - * the configuration data in the derived SMMU model.
>>> - */
>>> - g_assert_not_reached();
>> if that's still true for S2, maybe keep that check here upfront?
> Stage-2 is checked in STE parsing and throws BAD_STE if not aa64,
> which I believe is not correct, however I think we can just call
> g_assert_not_reached() during STE parsing, I don’t see added value for
> saving this field in SMMUTransCfg if we don’t use it.
I agree. I guess we provisionned for this field in the prospect of
completing the emulation but I don't think we care.
> I am not sure why this check exists for stage-1 as it is hardcoded in
> decode_cd anyway.
>
>>> +{
>>> + uint64_t ret;
>>> + /*
>>> + * Get the number of bits handled by next levels, then any extra bits in
>>> + * the address should index the concatenated tables. This relation can
>>> + * deduced from tables in ARM ARM: D8.2.7-9
>>> + */
>>> + int shift = (SMMU_MAX_LEVELS - start_level) * (granule - 3) + granule;
>> can't we factorize anything with the S1 PTW?
>> indexmask = (1ULL << (inputsize - (stride * (4 - level)))) - 1;
> Yes, I think we can refactor some of these in common functions/macros, I
> will do that in v2.
I guess that's a trade-off between beeing close enough to the spec and
maybe its pseudo-code and having both S1/S2 codes looking similar.
Eric
>
>
>>> @@ -28,6 +28,7 @@
>>> #define SMMU_PCI_DEVFN(sid) (sid & 0xFF)
>>>
>>> #define SMMU_MAX_VA_BITS 48
>>> +#define SMMU_MAX_LEVELS 4
>> can't this be reused as well with S1 PTW?
> I believe yes, I will update it.
>
> Thanks,
> Mostafa
>
© 2016 - 2026 Red Hat, Inc.