When pt_update() is called with arguments (..., INVALID_MFN, ..., 0 or 1),
it indicates that a mapping is being destroyed/modifyed.
In the case when modifying or destroying a mapping, it is necessary to
search until a leaf node is found, instead of searching for a page table
entry based on the precalculated `level` and `order` returned from pt_update().
This is because when `mfn` == INVALID_MFN, the `mask` (in pt_mapping_level())
will take into account only `vfn`, which could accidentally return an
incorrect level, leading to the discovery of an incorrect page table entry.
For example, if `vfn` is page table level 1 aligned, but it was mapped as
page table level 0, then pt_mapping_level() will return `level` = 1, since
only `vfn` (which is page table level 1 aligned) is taken into account when
`mfn` == INVALID_MFN (look at pt_mapping_level()).
Fixes: c2f1ded524 ("xen/riscv: page table handling")
Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in v2:
- Introduce PTE_LEAF_SEARCH to tell page table update operation to
walk down to wherever the leaf entry is.
- Use introduced PTE_LEAF_SEARCH to not searching pte_t entry twice.
- Update the commit message.
---
xen/arch/riscv/include/asm/page.h | 16 ++++++
xen/arch/riscv/pt.c | 87 +++++++++++++++++++------------
2 files changed, 69 insertions(+), 34 deletions(-)
diff --git a/xen/arch/riscv/include/asm/page.h b/xen/arch/riscv/include/asm/page.h
index b9076173f4..72d29376bc 100644
--- a/xen/arch/riscv/include/asm/page.h
+++ b/xen/arch/riscv/include/asm/page.h
@@ -55,6 +55,22 @@
#define PTE_SMALL BIT(10, UL)
#define PTE_POPULATE BIT(11, UL)
+/*
+ * In the case when modifying or destroying a mapping, it is necessary to
+ * search until a leaf node is found, instead of searching for a page table
+ * entry based on the precalculated `level` and `order` (look at pt_update()).
+ * This is because when `mfn` == INVALID_MFN, the `mask`(in pt_mapping_level())
+ * will take into account only `vfn`, which could accidentally return an
+ * incorrect level, leading to the discovery of an incorrect page table entry.
+ *
+ * For example, if `vfn` is page table level 1 aligned, but it was mapped as
+ * page table level 0, then pt_mapping_level() will return `level` = 1,
+ * since only `vfn` (which is page table level 1 aligned) is taken into account
+ * when `mfn` == INVALID_MFN (look at pt_mapping_level()).
+ */
+
+#define PTE_LEAF_SEARCH BIT(12, UL)
+
#define PTE_ACCESS_MASK (PTE_READABLE | PTE_WRITABLE | PTE_EXECUTABLE)
/* Calculate the offsets into the pagetables for a given VA */
diff --git a/xen/arch/riscv/pt.c b/xen/arch/riscv/pt.c
index 2a5a191a70..9db41eac53 100644
--- a/xen/arch/riscv/pt.c
+++ b/xen/arch/riscv/pt.c
@@ -187,11 +187,10 @@ static int pt_next_level(bool alloc_tbl, pte_t **table, unsigned int offset)
/* Update an entry at the level @target. */
static int pt_update_entry(mfn_t root, vaddr_t virt,
- mfn_t mfn, unsigned int target,
+ mfn_t mfn, unsigned int *target,
unsigned int flags)
{
int rc;
- unsigned int level = HYP_PT_ROOT_LEVEL;
pte_t *table;
/*
* The intermediate page table shouldn't be allocated when MFN isn't
@@ -205,39 +204,48 @@ static int pt_update_entry(mfn_t root, vaddr_t virt,
bool alloc_tbl = !mfn_eq(mfn, INVALID_MFN) || (flags & PTE_POPULATE);
pte_t pte, *entry;
- /* convenience aliases */
- DECLARE_OFFSETS(offsets, virt);
-
- table = map_table(root);
- for ( ; level > target; level-- )
+ if ( flags & PTE_LEAF_SEARCH )
{
- rc = pt_next_level(alloc_tbl, &table, offsets[level]);
- if ( rc == XEN_TABLE_MAP_NOMEM )
+ entry = pt_walk(virt, target);
+ BUG_ON(!pte_is_mapping(*entry));
+ }
+ else
+ {
+ unsigned int level = HYP_PT_ROOT_LEVEL;
+ /* convenience aliases */
+ DECLARE_OFFSETS(offsets, virt);
+
+ table = map_table(root);
+ for ( ; level > *target; level-- )
{
- rc = -ENOMEM;
- goto out;
+ rc = pt_next_level(alloc_tbl, &table, offsets[level]);
+ if ( rc == XEN_TABLE_MAP_NOMEM )
+ {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ if ( rc == XEN_TABLE_MAP_NONE )
+ {
+ rc = 0;
+ goto out;
+ }
+
+ if ( rc != XEN_TABLE_NORMAL )
+ break;
}
- if ( rc == XEN_TABLE_MAP_NONE )
+ if ( level != *target )
{
- rc = 0;
+ dprintk(XENLOG_ERR,
+ "%s: Shattering superpage is not supported\n", __func__);
+ rc = -EOPNOTSUPP;
goto out;
}
- if ( rc != XEN_TABLE_NORMAL )
- break;
+ entry = table + offsets[level];
}
- if ( level != target )
- {
- dprintk(XENLOG_ERR,
- "%s: Shattering superpage is not supported\n", __func__);
- rc = -EOPNOTSUPP;
- goto out;
- }
-
- entry = table + offsets[level];
-
rc = -EINVAL;
if ( !pt_check_entry(*entry, mfn, flags) )
goto out;
@@ -345,9 +353,6 @@ static int pt_mapping_level(unsigned long vfn, mfn_t mfn, unsigned long nr,
return level;
/*
- * Don't take into account the MFN when removing mapping (i.e
- * MFN_INVALID) to calculate the correct target order.
- *
* `vfn` and `mfn` must be both superpage aligned.
* They are or-ed together and then checked against the size of
* each level.
@@ -415,19 +420,33 @@ static int pt_update(vaddr_t virt, mfn_t mfn,
spin_lock(&pt_lock);
- while ( left )
+ /* look at the comment above the definition of PTE_LEAF_SEARCH */
+ if ( mfn_eq(mfn, INVALID_MFN) && !(flags & PTE_POPULATE) )
{
- unsigned int order, level;
+ flags |= PTE_LEAF_SEARCH;
+ }
- level = pt_mapping_level(vfn, mfn, left, flags);
- order = XEN_PT_LEVEL_ORDER(level);
+ while ( left )
+ {
+ unsigned int order = 0, level;
- ASSERT(left >= BIT(order, UL));
+ if ( !(flags & PTE_LEAF_SEARCH) )
+ {
+ level = pt_mapping_level(vfn, mfn, left, flags);
+ order = XEN_PT_LEVEL_ORDER(level);
+ ASSERT(left >= BIT(order, UL));
+ }
- rc = pt_update_entry(root, vfn << PAGE_SHIFT, mfn, level, flags);
+ rc = pt_update_entry(root, vfn << PAGE_SHIFT, mfn, &level, flags);
if ( rc )
break;
+ if ( flags & PTE_LEAF_SEARCH )
+ {
+ order = XEN_PT_LEVEL_ORDER(level);
+ ASSERT(left >= BIT(order, UL));
+ }
+
vfn += 1UL << order;
if ( !mfn_eq(mfn, INVALID_MFN) )
mfn = mfn_add(mfn, 1UL << order);
--
2.48.1
On 03.02.2025 14:12, Oleksii Kurochko wrote: > --- a/xen/arch/riscv/include/asm/page.h > +++ b/xen/arch/riscv/include/asm/page.h > @@ -55,6 +55,22 @@ > #define PTE_SMALL BIT(10, UL) > #define PTE_POPULATE BIT(11, UL) > > +/* > + * In the case when modifying or destroying a mapping, it is necessary to > + * search until a leaf node is found, instead of searching for a page table > + * entry based on the precalculated `level` and `order` (look at pt_update()). > + * This is because when `mfn` == INVALID_MFN, the `mask`(in pt_mapping_level()) > + * will take into account only `vfn`, which could accidentally return an > + * incorrect level, leading to the discovery of an incorrect page table entry. > + * > + * For example, if `vfn` is page table level 1 aligned, but it was mapped as > + * page table level 0, then pt_mapping_level() will return `level` = 1, > + * since only `vfn` (which is page table level 1 aligned) is taken into account > + * when `mfn` == INVALID_MFN (look at pt_mapping_level()). > + */ > + > +#define PTE_LEAF_SEARCH BIT(12, UL) Is it intended for callers outside of pt.c to make use of this? If not, it better wouldn't be globally exposed. Furthermore, this isn't a property of the PTE(s) to be created, so is likely wrong to mix with PTE_* flags. (PTE_POPULATE is on the edge of also falling in this category, btw.) Perhaps ... > --- a/xen/arch/riscv/pt.c > +++ b/xen/arch/riscv/pt.c > @@ -187,11 +187,10 @@ static int pt_next_level(bool alloc_tbl, pte_t **table, unsigned int offset) > > /* Update an entry at the level @target. */ > static int pt_update_entry(mfn_t root, vaddr_t virt, > - mfn_t mfn, unsigned int target, > + mfn_t mfn, unsigned int *target, ... you instead want to have callers of this function preset *target to a special value (e.g. UINT_MAX or CONFIG_PAGING_LEVELS) indicating the level is wanted as an output. > @@ -205,39 +204,48 @@ static int pt_update_entry(mfn_t root, vaddr_t virt, > bool alloc_tbl = !mfn_eq(mfn, INVALID_MFN) || (flags & PTE_POPULATE); > pte_t pte, *entry; > > - /* convenience aliases */ > - DECLARE_OFFSETS(offsets, virt); > - > - table = map_table(root); > - for ( ; level > target; level-- ) > + if ( flags & PTE_LEAF_SEARCH ) > { > - rc = pt_next_level(alloc_tbl, &table, offsets[level]); > - if ( rc == XEN_TABLE_MAP_NOMEM ) > + entry = pt_walk(virt, target); > + BUG_ON(!pte_is_mapping(*entry)); Is this really necessarily a bug? Can't one want to determine how deep the (populated) page tables are for a given VA? Hmm, here I can see why you have pt_walk() return a pointer. As per the comment on the earlier patch, I don't think this is a good idea. You may want to have static pte_t *_pt_walk(...) { ... } pte_t pt_walk(...) { return *_pt_walk(...); } > @@ -345,9 +353,6 @@ static int pt_mapping_level(unsigned long vfn, mfn_t mfn, unsigned long nr, > return level; > > /* > - * Don't take into account the MFN when removing mapping (i.e > - * MFN_INVALID) to calculate the correct target order. > - * > * `vfn` and `mfn` must be both superpage aligned. > * They are or-ed together and then checked against the size of > * each level. You drop part of the comment without altering the code being commented. What's the deal? > @@ -415,19 +420,33 @@ static int pt_update(vaddr_t virt, mfn_t mfn, > > spin_lock(&pt_lock); > > - while ( left ) > + /* look at the comment above the definition of PTE_LEAF_SEARCH */ > + if ( mfn_eq(mfn, INVALID_MFN) && !(flags & PTE_POPULATE) ) > { > - unsigned int order, level; > + flags |= PTE_LEAF_SEARCH; > + } For readability I think it would be better if the figure braces were dropped. > - level = pt_mapping_level(vfn, mfn, left, flags); > - order = XEN_PT_LEVEL_ORDER(level); > + while ( left ) > + { > + unsigned int order = 0, level; > > - ASSERT(left >= BIT(order, UL)); > + if ( !(flags & PTE_LEAF_SEARCH) ) > + { > + level = pt_mapping_level(vfn, mfn, left, flags); > + order = XEN_PT_LEVEL_ORDER(level); > + ASSERT(left >= BIT(order, UL)); Assignment to order and assertion are ... > + } > > - rc = pt_update_entry(root, vfn << PAGE_SHIFT, mfn, level, flags); > + rc = pt_update_entry(root, vfn << PAGE_SHIFT, mfn, &level, flags); > if ( rc ) > break; > > + if ( flags & PTE_LEAF_SEARCH ) > + { > + order = XEN_PT_LEVEL_ORDER(level); > + ASSERT(left >= BIT(order, UL)); > + } ... repeated here, with neither left nor order being passed into pt_update_entry(). Does this really need doing twice? (I have to admit that I have trouble determining what the assertion is about. For order alone it clearly could be done centrally after the call.) Jan
On 2/4/25 3:57 PM, Jan Beulich wrote: > On 03.02.2025 14:12, Oleksii Kurochko wrote: >> --- a/xen/arch/riscv/include/asm/page.h >> +++ b/xen/arch/riscv/include/asm/page.h >> @@ -55,6 +55,22 @@ >> #define PTE_SMALL BIT(10, UL) >> #define PTE_POPULATE BIT(11, UL) >> >> +/* >> + * In the case when modifying or destroying a mapping, it is necessary to >> + * search until a leaf node is found, instead of searching for a page table >> + * entry based on the precalculated `level` and `order` (look at pt_update()). >> + * This is because when `mfn` == INVALID_MFN, the `mask`(in pt_mapping_level()) >> + * will take into account only `vfn`, which could accidentally return an >> + * incorrect level, leading to the discovery of an incorrect page table entry. >> + * >> + * For example, if `vfn` is page table level 1 aligned, but it was mapped as >> + * page table level 0, then pt_mapping_level() will return `level` = 1, >> + * since only `vfn` (which is page table level 1 aligned) is taken into account >> + * when `mfn` == INVALID_MFN (look at pt_mapping_level()). >> + */ >> + >> +#define PTE_LEAF_SEARCH BIT(12, UL) > Is it intended for callers outside of pt.c to make use of this? If not, > it better wouldn't be globally exposed. > > Furthermore, this isn't a property of the PTE(s) to be created, so is > likely wrong to mix with PTE_* flags. (PTE_POPULATE is on the edge of > also falling in this category, btw.) Perhaps ... > >> --- a/xen/arch/riscv/pt.c >> +++ b/xen/arch/riscv/pt.c >> @@ -187,11 +187,10 @@ static int pt_next_level(bool alloc_tbl, pte_t **table, unsigned int offset) >> >> /* Update an entry at the level @target. */ >> static int pt_update_entry(mfn_t root, vaddr_t virt, >> - mfn_t mfn, unsigned int target, >> + mfn_t mfn, unsigned int *target, > ... you instead want to have callers of this function preset *target > to a special value (e.g. UINT_MAX or CONFIG_PAGING_LEVELS) indicating > the level is wanted as an output. I thought about this way but decided to use a separate for PTE_* flag which looked to me more clearer, at that moment. But I didn't take into account that it will be used only inside pt.c, so I agree that it should declared locally in pt.c and used for that a special value. I will update correspondingly. > >> @@ -205,39 +204,48 @@ static int pt_update_entry(mfn_t root, vaddr_t virt, >> bool alloc_tbl = !mfn_eq(mfn, INVALID_MFN) || (flags & PTE_POPULATE); >> pte_t pte, *entry; >> >> - /* convenience aliases */ >> - DECLARE_OFFSETS(offsets, virt); >> - >> - table = map_table(root); >> - for ( ; level > target; level-- ) >> + if ( flags & PTE_LEAF_SEARCH ) >> { >> - rc = pt_next_level(alloc_tbl, &table, offsets[level]); >> - if ( rc == XEN_TABLE_MAP_NOMEM ) >> + entry = pt_walk(virt, target); >> + BUG_ON(!pte_is_mapping(*entry)); > Is this really necessarily a bug? Can't one want to determine how deep > the (populated) page tables are for a given VA? pt_walk() could be used in that way but PTE_LEAF_SEARCH won't be used for page table populating: if ( mfn_eq(mfn, INVALID_MFN) && !(flags & PTE_POPULATE) ) { flags |= PTE_LEAF_SEARCH; } so in the current version of the patch only mapped VA <-> PA is expected to be in this part of the code. > > Hmm, here I can see why you have pt_walk() return a pointer. As per the > comment on the earlier patch, I don't think this is a good idea. You > may want to have > > static pte_t *_pt_walk(...) > { > ... > } > > pte_t pt_walk(...) > { > return *_pt_walk(...); > } It would be better to do in this way. > >> @@ -345,9 +353,6 @@ static int pt_mapping_level(unsigned long vfn, mfn_t mfn, unsigned long nr, >> return level; >> >> /* >> - * Don't take into account the MFN when removing mapping (i.e >> - * MFN_INVALID) to calculate the correct target order. >> - * >> * `vfn` and `mfn` must be both superpage aligned. >> * They are or-ed together and then checked against the size of >> * each level. > You drop part of the comment without altering the code being commented. > What's the deal? These changes are the part of v1 version of this functions. Basically I did incorrect reverting. Thanks for noticing that, I have to return this comments back. > >> @@ -415,19 +420,33 @@ static int pt_update(vaddr_t virt, mfn_t mfn, >> >> spin_lock(&pt_lock); >> >> - while ( left ) >> + /* look at the comment above the definition of PTE_LEAF_SEARCH */ >> + if ( mfn_eq(mfn, INVALID_MFN) && !(flags & PTE_POPULATE) ) >> { >> - unsigned int order, level; >> + flags |= PTE_LEAF_SEARCH; >> + } > For readability I think it would be better if the figure braces were > dropped. > >> - level = pt_mapping_level(vfn, mfn, left, flags); >> - order = XEN_PT_LEVEL_ORDER(level); >> + while ( left ) >> + { >> + unsigned int order = 0, level; >> >> - ASSERT(left >= BIT(order, UL)); >> + if ( !(flags & PTE_LEAF_SEARCH) ) >> + { >> + level = pt_mapping_level(vfn, mfn, left, flags); >> + order = XEN_PT_LEVEL_ORDER(level); >> + ASSERT(left >= BIT(order, UL)); > Assignment to order and assertion are ... > >> + } >> >> - rc = pt_update_entry(root, vfn << PAGE_SHIFT, mfn, level, flags); >> + rc = pt_update_entry(root, vfn << PAGE_SHIFT, mfn, &level, flags); >> if ( rc ) >> break; >> >> + if ( flags & PTE_LEAF_SEARCH ) >> + { >> + order = XEN_PT_LEVEL_ORDER(level); >> + ASSERT(left >= BIT(order, UL)); >> + } > ... repeated here, with neither left nor order being passed into > pt_update_entry(). Does this really need doing twice? (I have to > admit that I have trouble determining what the assertion is about. > For order alone it clearly could be done centrally after the call.) Sure, it could be done just once. Regarding ASSERT() itself it was added to be sure that pt_mapping_level() returns proper `level`. I am not really sure that it is needed anymore. ~ Oleksii
© 2016 - 2025 Red Hat, Inc.