When pt_update() is called with arguments (..., INVALID_MFN, ..., 0 or 1),
it indicates that a mapping is being destroyed/modifyed.
In the case when modifying or destroying a mapping, it is necessary to
search until a leaf node is found, instead of searching for a page table
entry based on the precalculated `level` and `order`(look at pt_update()).
This is because when `mfn` == INVALID_MFN, the `mask` (in pt_mapping_level())
will take into account only `vfn`, which could accidentally return an
incorrect level, leading to the discovery of an incorrect page table entry.
For example, if `vfn` is page table level 1 aligned, but it was mapped as
page table level 0, then pt_mapping_level() will return `level` = 1, since
only `vfn` (which is page table level 1 aligned) is taken into account when
`mfn` == INVALID_MFN (look at pt_mapping_level()).
Fixes: c2f1ded524 ("xen/riscv: page table handling")
Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in v5:
- Rename *entry to *ptep in pt_update_entry().
- Fix code style issue in the comment.
- Move NULL check of pointer to `table` inside unmap_table and then drop
it in pt_update_entry().
---
Changes in v4:
- Move defintion of local variable table inside `else` case as it is
used only there.
- Change unmap_table(table) to unmap_table(entry) for unifying both
cases when _pt_walk() is used and when pte is seached on the specified
level.
- Initialize local variable `entry` to avoid compilation error caused by
uninitialized variable.
---
Changes in v3:
- Drop ASSERT() for order as it isn't needed anymore.
- Drop PTE_LEAF_SEARCH and use instead level=CONFIG_PAGING_LEVELS;
refactor connected code correspondingly.
- Calculate order once.
- Drop initializer for local variable order.
- Drop BUG_ON(!pte_is_mapping(*entry)) for the case when leaf searching
happens as there is a similar check in pt_check_entry(). Look at
pt.c:41 and pt.c:75.
---
Changes in v2:
- Introduce PTE_LEAF_SEARCH to tell page table update operation to
walk down to wherever the leaf entry is.
- Use introduced PTE_LEAF_SEARCH to not searching pte_t entry twice.
- Update the commit message.
---
xen/arch/riscv/pt.c | 116 +++++++++++++++++++++++++++++---------------
1 file changed, 78 insertions(+), 38 deletions(-)
diff --git a/xen/arch/riscv/pt.c b/xen/arch/riscv/pt.c
index 9c1f8f6b55..518939b443 100644
--- a/xen/arch/riscv/pt.c
+++ b/xen/arch/riscv/pt.c
@@ -102,6 +102,9 @@ static pte_t *map_table(mfn_t mfn)
static void unmap_table(const pte_t *table)
{
+ if ( !table )
+ return;
+
/*
* During early boot, map_table() will not use map_domain_page()
* but the PMAP.
@@ -245,14 +248,21 @@ pte_t pt_walk(vaddr_t va, unsigned int *pte_level)
return pte;
}
-/* Update an entry at the level @target. */
+/*
+ * Update an entry at the level @target.
+ *
+ * If `target` == CONFIG_PAGING_LEVELS, the search will continue until
+ * a leaf node is found.
+ * Otherwise, the page table entry will be searched at the requested
+ * `target` level.
+ * For an example of why this might be needed, see the comment in
+ * pt_update() before pt_update_entry() is called.
+ */
static int pt_update_entry(mfn_t root, vaddr_t virt,
- mfn_t mfn, unsigned int target,
+ mfn_t mfn, unsigned int *target,
unsigned int flags)
{
int rc;
- unsigned int level = HYP_PT_ROOT_LEVEL;
- pte_t *table;
/*
* The intermediate page table shouldn't be allocated when MFN isn't
* valid and we are not populating page table.
@@ -263,43 +273,50 @@ static int pt_update_entry(mfn_t root, vaddr_t virt,
* combinations of (mfn, flags).
*/
bool alloc_tbl = !mfn_eq(mfn, INVALID_MFN) || (flags & PTE_POPULATE);
- pte_t pte, *entry;
-
- /* convenience aliases */
- DECLARE_OFFSETS(offsets, virt);
+ pte_t pte, *ptep = NULL;
- table = map_table(root);
- for ( ; level > target; level-- )
+ if ( *target == CONFIG_PAGING_LEVELS )
+ ptep = _pt_walk(virt, target);
+ else
{
- rc = pt_next_level(alloc_tbl, &table, offsets[level]);
- if ( rc == XEN_TABLE_MAP_NOMEM )
+ pte_t *table;
+ unsigned int level = HYP_PT_ROOT_LEVEL;
+ /* Convenience aliases */
+ DECLARE_OFFSETS(offsets, virt);
+
+ table = map_table(root);
+ for ( ; level > *target; level-- )
{
- rc = -ENOMEM;
- goto out;
+ rc = pt_next_level(alloc_tbl, &table, offsets[level]);
+ if ( rc == XEN_TABLE_MAP_NOMEM )
+ {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ if ( rc == XEN_TABLE_MAP_NONE )
+ {
+ rc = 0;
+ goto out;
+ }
+
+ if ( rc != XEN_TABLE_NORMAL )
+ break;
}
- if ( rc == XEN_TABLE_MAP_NONE )
+ if ( level != *target )
{
- rc = 0;
+ dprintk(XENLOG_ERR,
+ "%s: Shattering superpage is not supported\n", __func__);
+ rc = -EOPNOTSUPP;
goto out;
}
- if ( rc != XEN_TABLE_NORMAL )
- break;
- }
-
- if ( level != target )
- {
- dprintk(XENLOG_ERR,
- "%s: Shattering superpage is not supported\n", __func__);
- rc = -EOPNOTSUPP;
- goto out;
+ ptep = table + offsets[level];
}
- entry = table + offsets[level];
-
rc = -EINVAL;
- if ( !pt_check_entry(*entry, mfn, flags) )
+ if ( !pt_check_entry(*ptep, mfn, flags) )
goto out;
/* We are removing the page */
@@ -316,7 +333,7 @@ static int pt_update_entry(mfn_t root, vaddr_t virt,
pte = pte_from_mfn(mfn, PTE_VALID);
else /* We are updating the permission => Copy the current pte. */
{
- pte = *entry;
+ pte = *ptep;
pte.pte &= ~PTE_ACCESS_MASK;
}
@@ -324,12 +341,12 @@ static int pt_update_entry(mfn_t root, vaddr_t virt,
pte.pte |= (flags & PTE_ACCESS_MASK) | PTE_ACCESSED | PTE_DIRTY;
}
- write_pte(entry, pte);
+ write_pte(ptep, pte);
rc = 0;
out:
- unmap_table(table);
+ unmap_table(ptep);
return rc;
}
@@ -422,17 +439,40 @@ static int pt_update(vaddr_t virt, mfn_t mfn,
while ( left )
{
- unsigned int order, level;
-
- level = pt_mapping_level(vfn, mfn, left, flags);
- order = XEN_PT_LEVEL_ORDER(level);
+ unsigned int order, level = CONFIG_PAGING_LEVELS;
- ASSERT(left >= BIT(order, UL));
+ /*
+ * In the case when modifying or destroying a mapping, it is necessary
+ * to search until a leaf node is found, instead of searching for
+ * a page table entry based on the precalculated `level` and `order`
+ * (look at pt_update()).
+ * This is because when `mfn` == INVALID_MFN, the `mask`(in
+ * pt_mapping_level()) will take into account only `vfn`, which could
+ * accidentally return an incorrect level, leading to the discovery of
+ * an incorrect page table entry.
+ *
+ * For example, if `vfn` is page table level 1 aligned, but it was
+ * mapped as page table level 0, then pt_mapping_level() will return
+ * `level` = 1, since only `vfn` (which is page table level 1 aligned)
+ * is taken into account when `mfn` == INVALID_MFN
+ * (look at pt_mapping_level()).
+ *
+ * To force searching until a leaf node is found is necessary to have
+ * `level` == CONFIG_PAGING_LEVELS which is a default value for
+ * `level`.
+ *
+ * For other cases (when a mapping is not being modified or destroyed),
+ * pt_mapping_level() should be used.
+ */
+ if ( !mfn_eq(mfn, INVALID_MFN) || (flags & PTE_POPULATE) )
+ level = pt_mapping_level(vfn, mfn, left, flags);
- rc = pt_update_entry(root, vfn << PAGE_SHIFT, mfn, level, flags);
+ rc = pt_update_entry(root, vfn << PAGE_SHIFT, mfn, &level, flags);
if ( rc )
break;
+ order = XEN_PT_LEVEL_ORDER(level);
+
vfn += 1UL << order;
if ( !mfn_eq(mfn, INVALID_MFN) )
mfn = mfn_add(mfn, 1UL << order);
--
2.48.1
On 20.02.2025 18:44, Oleksii Kurochko wrote: > When pt_update() is called with arguments (..., INVALID_MFN, ..., 0 or 1), > it indicates that a mapping is being destroyed/modifyed. > > In the case when modifying or destroying a mapping, it is necessary to > search until a leaf node is found, instead of searching for a page table > entry based on the precalculated `level` and `order`(look at pt_update()). > This is because when `mfn` == INVALID_MFN, the `mask` (in pt_mapping_level()) > will take into account only `vfn`, which could accidentally return an > incorrect level, leading to the discovery of an incorrect page table entry. > > For example, if `vfn` is page table level 1 aligned, but it was mapped as > page table level 0, then pt_mapping_level() will return `level` = 1, since > only `vfn` (which is page table level 1 aligned) is taken into account when > `mfn` == INVALID_MFN (look at pt_mapping_level()). > > Fixes: c2f1ded524 ("xen/riscv: page table handling") > Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> > --- > Changes in v5: > - Rename *entry to *ptep in pt_update_entry(). > - Fix code style issue in the comment. > - Move NULL check of pointer to `table` inside unmap_table and then drop > it in pt_update_entry(). Imo this last aspect wants mentioning in the description. > @@ -422,17 +439,40 @@ static int pt_update(vaddr_t virt, mfn_t mfn, > > while ( left ) > { > - unsigned int order, level; > - > - level = pt_mapping_level(vfn, mfn, left, flags); > - order = XEN_PT_LEVEL_ORDER(level); > + unsigned int order, level = CONFIG_PAGING_LEVELS; > > - ASSERT(left >= BIT(order, UL)); > + /* > + * In the case when modifying or destroying a mapping, it is necessary > + * to search until a leaf node is found, instead of searching for > + * a page table entry based on the precalculated `level` and `order` > + * (look at pt_update()). > + * This is because when `mfn` == INVALID_MFN, the `mask`(in > + * pt_mapping_level()) will take into account only `vfn`, which could > + * accidentally return an incorrect level, leading to the discovery of > + * an incorrect page table entry. > + * > + * For example, if `vfn` is page table level 1 aligned, but it was > + * mapped as page table level 0, then pt_mapping_level() will return > + * `level` = 1, since only `vfn` (which is page table level 1 aligned) > + * is taken into account when `mfn` == INVALID_MFN > + * (look at pt_mapping_level()). > + * > + * To force searching until a leaf node is found is necessary to have > + * `level` == CONFIG_PAGING_LEVELS which is a default value for > + * `level`. There looks to be an "it" missing before the 2nd "is". I'm also unconvinced the part starting with "which" is really necessary. Preferably with these small adjustments (I'd be happy to do so while committing) Reviewed-by: Jan Beulich <jbeulich@suse.com> Jan
On 2/25/25 8:59 AM, Jan Beulich wrote: > On 20.02.2025 18:44, Oleksii Kurochko wrote: >> When pt_update() is called with arguments (..., INVALID_MFN, ..., 0 or 1), >> it indicates that a mapping is being destroyed/modifyed. >> >> In the case when modifying or destroying a mapping, it is necessary to >> search until a leaf node is found, instead of searching for a page table >> entry based on the precalculated `level` and `order`(look at pt_update()). >> This is because when `mfn` == INVALID_MFN, the `mask` (in pt_mapping_level()) >> will take into account only `vfn`, which could accidentally return an >> incorrect level, leading to the discovery of an incorrect page table entry. >> >> For example, if `vfn` is page table level 1 aligned, but it was mapped as >> page table level 0, then pt_mapping_level() will return `level` = 1, since >> only `vfn` (which is page table level 1 aligned) is taken into account when >> `mfn` == INVALID_MFN (look at pt_mapping_level()). >> >> Fixes: c2f1ded524 ("xen/riscv: page table handling") >> Signed-off-by: Oleksii Kurochko<oleksii.kurochko@gmail.com> >> --- >> Changes in v5: >> - Rename *entry to *ptep in pt_update_entry(). >> - Fix code style issue in the comment. >> - Move NULL check of pointer to `table` inside unmap_table and then drop >> it in pt_update_entry(). > Imo this last aspect wants mentioning in the description. Agree, considering that it is a change in the code of previously introduced function, it makes to mention that. > >> @@ -422,17 +439,40 @@ static int pt_update(vaddr_t virt, mfn_t mfn, >> >> while ( left ) >> { >> - unsigned int order, level; >> - >> - level = pt_mapping_level(vfn, mfn, left, flags); >> - order = XEN_PT_LEVEL_ORDER(level); >> + unsigned int order, level = CONFIG_PAGING_LEVELS; >> >> - ASSERT(left >= BIT(order, UL)); >> + /* >> + * In the case when modifying or destroying a mapping, it is necessary >> + * to search until a leaf node is found, instead of searching for >> + * a page table entry based on the precalculated `level` and `order` >> + * (look at pt_update()). >> + * This is because when `mfn` == INVALID_MFN, the `mask`(in >> + * pt_mapping_level()) will take into account only `vfn`, which could >> + * accidentally return an incorrect level, leading to the discovery of >> + * an incorrect page table entry. >> + * >> + * For example, if `vfn` is page table level 1 aligned, but it was >> + * mapped as page table level 0, then pt_mapping_level() will return >> + * `level` = 1, since only `vfn` (which is page table level 1 aligned) >> + * is taken into account when `mfn` == INVALID_MFN >> + * (look at pt_mapping_level()). >> + * >> + * To force searching until a leaf node is found is necessary to have >> + * `level` == CONFIG_PAGING_LEVELS which is a default value for >> + * `level`. > There looks to be an "it" missing before the 2nd "is". I'm also unconvinced the > part starting with "which" is really necessary. Lets then just drop this part. I added only to explicitly show that it is the value which is used to define `level`. > > Preferably with these small adjustments (I'd be happy to do so while committing) > Reviewed-by: Jan Beulich<jbeulich@suse.com> Thanks! ~ Oleksii
© 2016 - 2025 Red Hat, Inc.