Updates xen_pt_update_entry function from xen/arch/arm/mm.c to
automatically split superpages as needed.
---
xen/arch/arm/mm.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 78 insertions(+), 13 deletions(-)
diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index 6301752..91b9c2b 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -753,8 +753,78 @@ static int create_xen_table(lpae_t *entry)
}
#define XEN_TABLE_MAP_FAILED 0
-#define XEN_TABLE_SUPER_PAGE 1
-#define XEN_TABLE_NORMAL_PAGE 2
+#define XEN_TABLE_NORMAL_PAGE 1
+
+/* More or less taken from p2m_split_superpage, without the p2m stuff */
+static bool xen_split_superpage(lpae_t *entry, unsigned int level,
+ unsigned int target, const unsigned int *offsets)
+{
+ struct page_info *page;
+ lpae_t pte, *table;
+ unsigned int i;
+ bool rv = true;
+
+ mfn_t mfn = lpae_get_mfn(*entry);
+ unsigned int next_level = level + 1;
+ unsigned int level_order = XEN_PT_LEVEL_ORDER(next_level);
+
+ ASSERT(level < target);
+ ASSERT(lpae_is_superpage(*entry, level));
+
+ page = alloc_domheap_page(NULL, 0);
+ if ( !page )
+ return false;
+
+ table = __map_domain_page(page);
+
+ /*
+ * We are either splitting a first level 1G page into 512 second level
+ * 2M pages, or a second level 2M page into 512 third level 4K pages.
+ */
+ for ( i = 0; i < XEN_PT_LPAE_ENTRIES; i++ )
+ {
+ lpae_t *new_entry = table + i;
+
+ /*
+ * Use the content of the superpage entry and override
+ * the necessary fields. So the correct permission are kept.
+ */
+ pte = *entry;
+ lpae_set_mfn(pte, mfn_add(mfn, i << level_order));
+
+ /*
+ * First and second level pages set walk.table = 0, but third
+ * level entries set walk.table = 1.
+ */
+ pte.walk.table = (next_level == 3);
+
+ write_pte(new_entry, pte);
+ }
+
+ /*
+ * Shatter superpage in the page to the level we want to make the
+ * changes.
+ * This is done outside the loop to avoid checking the offset to
+ * know whether the entry should be shattered for every entry.
+ */
+ if ( next_level != target )
+ rv = xen_split_superpage(table + offsets[next_level],
+ level + 1, target, offsets);
+
+ clean_dcache_va_range(table, PAGE_SIZE);
+ unmap_domain_page(table);
+
+ /*
+ * Generate the entry for this new table we created,
+ * and write it back in place of the superpage entry.
+ */
+ pte = mfn_to_xen_entry(page_to_mfn(page), MT_NORMAL);
+ pte.pt.table = 1;
+ write_pte(entry, pte);
+ clean_dcache(*entry);
+
+ return rv;
+}
/*
* Take the currently mapped table, find the corresponding entry,
@@ -767,16 +837,15 @@ static int create_xen_table(lpae_t *entry)
* XEN_TABLE_MAP_FAILED: Either read_only was set and the entry
* was empty, or allocating a new page failed.
* XEN_TABLE_NORMAL_PAGE: next level mapped normally
- * XEN_TABLE_SUPER_PAGE: The next entry points to a superpage.
*/
static int xen_pt_next_level(bool read_only, unsigned int level,
- lpae_t **table, unsigned int offset)
+ lpae_t **table, const unsigned int *offsets)
{
lpae_t *entry;
int ret;
mfn_t mfn;
- entry = *table + offset;
+ entry = *table + offsets[level];
if ( !lpae_is_valid(*entry) )
{
@@ -790,7 +859,8 @@ static int xen_pt_next_level(bool read_only, unsigned int level,
/* The function xen_pt_next_level is never called at the 3rd level */
if ( lpae_is_mapping(*entry, level) )
- return XEN_TABLE_SUPER_PAGE;
+ /* Shatter the superpage before continuing */
+ xen_split_superpage(entry, level, level + 1, offsets);
mfn = lpae_get_mfn(*entry);
@@ -915,7 +985,7 @@ static int xen_pt_update_entry(mfn_t root, unsigned long virt,
table = xen_map_table(root);
for ( level = HYP_PT_ROOT_LEVEL; level < target; level++ )
{
- rc = xen_pt_next_level(read_only, level, &table, offsets[level]);
+ rc = xen_pt_next_level(read_only, level, &table, offsets);
if ( rc == XEN_TABLE_MAP_FAILED )
{
/*
@@ -941,12 +1011,7 @@ static int xen_pt_update_entry(mfn_t root, unsigned long virt,
break;
}
- if ( level != target )
- {
- mm_printk("%s: Shattering superpage is not supported\n", __func__);
- rc = -EOPNOTSUPP;
- goto out;
- }
+ BUG_ON( level != target );
entry = table + offsets[level];
--
2.7.4
Hi, On 13/12/2022 19:54, Smith, Jackson wrote: > Updates xen_pt_update_entry function from xen/arch/arm/mm.c to > automatically split superpages as needed. Your signed-off-by is missing. > --- > xen/arch/arm/mm.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++-------- > 1 file changed, 78 insertions(+), 13 deletions(-) > > diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c > index 6301752..91b9c2b 100644 > --- a/xen/arch/arm/mm.c > +++ b/xen/arch/arm/mm.c > @@ -753,8 +753,78 @@ static int create_xen_table(lpae_t *entry) > } > > #define XEN_TABLE_MAP_FAILED 0 > -#define XEN_TABLE_SUPER_PAGE 1 > -#define XEN_TABLE_NORMAL_PAGE 2 > +#define XEN_TABLE_NORMAL_PAGE 1 > + > +/* More or less taken from p2m_split_superpage, without the p2m stuff */ > +static bool xen_split_superpage(lpae_t *entry, unsigned int level, > + unsigned int target, const unsigned int *offsets) > +{ > + struct page_info *page; > + lpae_t pte, *table; > + unsigned int i; > + bool rv = true; > + > + mfn_t mfn = lpae_get_mfn(*entry); > + unsigned int next_level = level + 1; > + unsigned int level_order = XEN_PT_LEVEL_ORDER(next_level); > + > + ASSERT(level < target); > + ASSERT(lpae_is_superpage(*entry, level)); > + > + page = alloc_domheap_page(NULL, 0); Page-table may be allocated from the boot allocator. So you want to use create_xen_table(). > + if ( !page ) > + return false; > + > + table = __map_domain_page(page); You want to use xen_map_table(). > + > + /* > + * We are either splitting a first level 1G page into 512 second level > + * 2M pages, or a second level 2M page into 512 third level 4K pages. > + */ > + for ( i = 0; i < XEN_PT_LPAE_ENTRIES; i++ ) > + { > + lpae_t *new_entry = table + i; > + > + /* > + * Use the content of the superpage entry and override > + * the necessary fields. So the correct permission are kept. > + */ > + pte = *entry; > + lpae_set_mfn(pte, mfn_add(mfn, i << level_order)); > + > + /* > + * First and second level pages set walk.table = 0, but third > + * level entries set walk.table = 1. > + */ > + pte.walk.table = (next_level == 3); > + > + write_pte(new_entry, pte); > + } > + > + /* > + * Shatter superpage in the page to the level we want to make the > + * changes. > + * This is done outside the loop to avoid checking the offset to > + * know whether the entry should be shattered for every entry. > + */ > + if ( next_level != target ) > + rv = xen_split_superpage(table + offsets[next_level], > + level + 1, target, offsets); > + > + clean_dcache_va_range(table, PAGE_SIZE); Cleaning the cache is not necessary. This is done in the P2M case because it is shared with the IOMMU which may not support coherent access. > + unmap_domain_page(table); This would be xen_map > + > + /* > + * Generate the entry for this new table we created, > + * and write it back in place of the superpage entry. > + */ I am afraid this is not compliant with the Arm Arm. If you want to update valid entry (e.g. shattering a superpage), then you need to follow the break-before-make sequence. This means that: 1. Replace the valid entry with an entry with an invalid one 2. Flush the TLBs 3. Write the new entry Those steps will make your code compliant but it also means that a virtual address will be temporarily invalid so you could take a fault in the middle of your split if your stack or the table was part of the region. The same could happen for the other running CPUs but this is less problematic as they could spin on the page-table lock. This is the main reason why we never implemented super-page shattering for the hypervisor. So I would rather prefer if we can avoid shattering (I have made some suggestion in the cover letter). If we really need to shatter, then we should make sure this is only used in very limited use case by introducing a flag. So the caller will be reponsible to acknowledge it doesn't modify a region that may be used by itself or another CPU. > + pte = mfn_to_xen_entry(page_to_mfn(page), MT_NORMAL); > + pte.pt.table = 1; > + write_pte(entry, pte); > + clean_dcache(*entry); Ditto about the cache cleaning. Cheers, -- Julien Grall
On Tue, Dec 13, 2022 at 09:15:49PM +0000, Julien Grall wrote: > Hi, > > On 13/12/2022 19:54, Smith, Jackson wrote: > > Updates xen_pt_update_entry function from xen/arch/arm/mm.c to > > automatically split superpages as needed. > Your signed-off-by is missing. > > > --- > > xen/arch/arm/mm.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++-------- > > 1 file changed, 78 insertions(+), 13 deletions(-) > > > > diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c > > index 6301752..91b9c2b 100644 > > --- a/xen/arch/arm/mm.c > > +++ b/xen/arch/arm/mm.c > > @@ -753,8 +753,78 @@ static int create_xen_table(lpae_t *entry) > > } > > #define XEN_TABLE_MAP_FAILED 0 > > -#define XEN_TABLE_SUPER_PAGE 1 > > -#define XEN_TABLE_NORMAL_PAGE 2 > > +#define XEN_TABLE_NORMAL_PAGE 1 > > + > > +/* More or less taken from p2m_split_superpage, without the p2m stuff */ > > +static bool xen_split_superpage(lpae_t *entry, unsigned int level, > > + unsigned int target, const unsigned int *offsets) > > +{ > > + struct page_info *page; > > + lpae_t pte, *table; > > + unsigned int i; > > + bool rv = true; > > + > > + mfn_t mfn = lpae_get_mfn(*entry); > > + unsigned int next_level = level + 1; > > + unsigned int level_order = XEN_PT_LEVEL_ORDER(next_level); > > + > > + ASSERT(level < target); > > + ASSERT(lpae_is_superpage(*entry, level)); > > + > > + page = alloc_domheap_page(NULL, 0); > Page-table may be allocated from the boot allocator. So you want to use > create_xen_table(). > > > + if ( !page ) > > + return false; > > + > > + table = __map_domain_page(page); > > You want to use xen_map_table(). > > > + > > + /* > > + * We are either splitting a first level 1G page into 512 second level > > + * 2M pages, or a second level 2M page into 512 third level 4K pages. > > + */ > > + for ( i = 0; i < XEN_PT_LPAE_ENTRIES; i++ ) > > + { > > + lpae_t *new_entry = table + i; > > + > > + /* > > + * Use the content of the superpage entry and override > > + * the necessary fields. So the correct permission are kept. > > + */ > > + pte = *entry; > > + lpae_set_mfn(pte, mfn_add(mfn, i << level_order)); > > + > > + /* > > + * First and second level pages set walk.table = 0, but third > > + * level entries set walk.table = 1. > > + */ > > + pte.walk.table = (next_level == 3); > > + > > + write_pte(new_entry, pte); > > + } > > + > > + /* > > + * Shatter superpage in the page to the level we want to make the > > + * changes. > > + * This is done outside the loop to avoid checking the offset to > > + * know whether the entry should be shattered for every entry. > > + */ > > + if ( next_level != target ) > > + rv = xen_split_superpage(table + offsets[next_level], > > + level + 1, target, offsets); > > + > > + clean_dcache_va_range(table, PAGE_SIZE); > > Cleaning the cache is not necessary. This is done in the P2M case because it > is shared with the IOMMU which may not support coherent access. > > > + unmap_domain_page(table); > > This would be xen_map > > > + > > + /* > > + * Generate the entry for this new table we created, > > + * and write it back in place of the superpage entry. > > + */ > > I am afraid this is not compliant with the Arm Arm. If you want to update > valid entry (e.g. shattering a superpage), then you need to follow the > break-before-make sequence. This means that: > 1. Replace the valid entry with an entry with an invalid one > 2. Flush the TLBs > 3. Write the new entry > > Those steps will make your code compliant but it also means that a virtual > address will be temporarily invalid so you could take a fault in the middle > of your split if your stack or the table was part of the region. The same > could happen for the other running CPUs but this is less problematic as they > could spin on the page-table lock. Could this be worked around by writing the critical section in assembler? The assembler code would never access the stack and would run with interrupts disabled. There could also be BUG() checks for attempting to shatter a PTE that was needed to access the PTE in question, though I suspect one can work around this with a temporary PTE. That said, shattering large pages requires allocating memory, which might fail. What happens if the allocation does fail? -- Sincerely, Demi Marie Obenour (she/her/hers) Invisible Things Lab
Hi Demi, On 13/12/2022 22:17, Demi Marie Obenour wrote: > On Tue, Dec 13, 2022 at 09:15:49PM +0000, Julien Grall wrote: >> Hi, >> >> On 13/12/2022 19:54, Smith, Jackson wrote: >>> Updates xen_pt_update_entry function from xen/arch/arm/mm.c to >>> automatically split superpages as needed. >> Your signed-off-by is missing. >> >>> --- >>> xen/arch/arm/mm.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++-------- >>> 1 file changed, 78 insertions(+), 13 deletions(-) >>> >>> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c >>> index 6301752..91b9c2b 100644 >>> --- a/xen/arch/arm/mm.c >>> +++ b/xen/arch/arm/mm.c >>> @@ -753,8 +753,78 @@ static int create_xen_table(lpae_t *entry) >>> } >>> #define XEN_TABLE_MAP_FAILED 0 >>> -#define XEN_TABLE_SUPER_PAGE 1 >>> -#define XEN_TABLE_NORMAL_PAGE 2 >>> +#define XEN_TABLE_NORMAL_PAGE 1 >>> + >>> +/* More or less taken from p2m_split_superpage, without the p2m stuff */ >>> +static bool xen_split_superpage(lpae_t *entry, unsigned int level, >>> + unsigned int target, const unsigned int *offsets) >>> +{ >>> + struct page_info *page; >>> + lpae_t pte, *table; >>> + unsigned int i; >>> + bool rv = true; >>> + >>> + mfn_t mfn = lpae_get_mfn(*entry); >>> + unsigned int next_level = level + 1; >>> + unsigned int level_order = XEN_PT_LEVEL_ORDER(next_level); >>> + >>> + ASSERT(level < target); >>> + ASSERT(lpae_is_superpage(*entry, level)); >>> + >>> + page = alloc_domheap_page(NULL, 0); >> Page-table may be allocated from the boot allocator. So you want to use >> create_xen_table(). >> >>> + if ( !page ) >>> + return false; >>> + >>> + table = __map_domain_page(page); >> >> You want to use xen_map_table(). >> >>> + >>> + /* >>> + * We are either splitting a first level 1G page into 512 second level >>> + * 2M pages, or a second level 2M page into 512 third level 4K pages. >>> + */ >>> + for ( i = 0; i < XEN_PT_LPAE_ENTRIES; i++ ) >>> + { >>> + lpae_t *new_entry = table + i; >>> + >>> + /* >>> + * Use the content of the superpage entry and override >>> + * the necessary fields. So the correct permission are kept. >>> + */ >>> + pte = *entry; >>> + lpae_set_mfn(pte, mfn_add(mfn, i << level_order)); >>> + >>> + /* >>> + * First and second level pages set walk.table = 0, but third >>> + * level entries set walk.table = 1. >>> + */ >>> + pte.walk.table = (next_level == 3); >>> + >>> + write_pte(new_entry, pte); >>> + } >>> + >>> + /* >>> + * Shatter superpage in the page to the level we want to make the >>> + * changes. >>> + * This is done outside the loop to avoid checking the offset to >>> + * know whether the entry should be shattered for every entry. >>> + */ >>> + if ( next_level != target ) >>> + rv = xen_split_superpage(table + offsets[next_level], >>> + level + 1, target, offsets); >>> + >>> + clean_dcache_va_range(table, PAGE_SIZE); >> >> Cleaning the cache is not necessary. This is done in the P2M case because it >> is shared with the IOMMU which may not support coherent access. >> >>> + unmap_domain_page(table); >> >> This would be xen_map >> >>> + >>> + /* >>> + * Generate the entry for this new table we created, >>> + * and write it back in place of the superpage entry. >>> + */ >> >> I am afraid this is not compliant with the Arm Arm. If you want to update >> valid entry (e.g. shattering a superpage), then you need to follow the >> break-before-make sequence. This means that: >> 1. Replace the valid entry with an entry with an invalid one >> 2. Flush the TLBs >> 3. Write the new entry >> >> Those steps will make your code compliant but it also means that a virtual >> address will be temporarily invalid so you could take a fault in the middle >> of your split if your stack or the table was part of the region. The same >> could happen for the other running CPUs but this is less problematic as they >> could spin on the page-table lock. > > Could this be worked around by writing the critical section in > assembler? Everything is feasible. Is this worth it? I don't think so. There are way we can avoid the shattering at first by simply not mapping all the RAM. > The assembler code would never access the stack and would > run with interrupts disabled. There could also be BUG() checks for > attempting to shatter a PTE that was needed to access the PTE in > question, though I suspect one can work around this with a temporary > PTE. That said, shattering large pages requires allocating memory, > which might fail. What happens if the allocation does fail? If this is only done during boot, then I would argue you will want to crash Xen. Cheers, -- Julien Grall
On Tue, Dec 13, 2022 at 11:07:55PM +0000, Julien Grall wrote: > Hi Demi, > > On 13/12/2022 22:17, Demi Marie Obenour wrote: > > On Tue, Dec 13, 2022 at 09:15:49PM +0000, Julien Grall wrote: [snip] > > > > + > > > > + /* > > > > + * Generate the entry for this new table we created, > > > > + * and write it back in place of the superpage entry. > > > > + */ > > > > > > I am afraid this is not compliant with the Arm Arm. If you want to update > > > valid entry (e.g. shattering a superpage), then you need to follow the > > > break-before-make sequence. This means that: > > > 1. Replace the valid entry with an entry with an invalid one > > > 2. Flush the TLBs > > > 3. Write the new entry > > > > > > Those steps will make your code compliant but it also means that a virtual > > > address will be temporarily invalid so you could take a fault in the middle > > > of your split if your stack or the table was part of the region. The same > > > could happen for the other running CPUs but this is less problematic as they > > > could spin on the page-table lock. > > > > Could this be worked around by writing the critical section in > > assembler? > > Everything is feasible. Is this worth it? I don't think so. There are way we > can avoid the shattering at first by simply not mapping all the RAM. Good point. I do wonder what would go wrong if one replaced one live PTE with another that pointed to the same physical address. Is this merely a case of “spec doesn’t allow it”, or does it actually break on real hardware? > > The assembler code would never access the stack and would > > run with interrupts disabled. There could also be BUG() checks for > > attempting to shatter a PTE that was needed to access the PTE in > > question, though I suspect one can work around this with a temporary > > PTE. That said, shattering large pages requires allocating memory, > > which might fail. What happens if the allocation does fail? > > If this is only done during boot, then I would argue you will want to crash > Xen. Fair! Even NASA’s coding standards for spacecraft allow for dynamic allocation during initialization. After all, initialization is typically deterministic and easy to test. -- Sincerely, Demi Marie Obenour (she/her/hers) Invisible Things Lab
Hi Demi, On 14/12/2022 01:38, Demi Marie Obenour wrote: > On Tue, Dec 13, 2022 at 11:07:55PM +0000, Julien Grall wrote: >> Hi Demi, >> >> On 13/12/2022 22:17, Demi Marie Obenour wrote: >>> On Tue, Dec 13, 2022 at 09:15:49PM +0000, Julien Grall wrote: > > [snip] > >>>>> + >>>>> + /* >>>>> + * Generate the entry for this new table we created, >>>>> + * and write it back in place of the superpage entry. >>>>> + */ >>>> >>>> I am afraid this is not compliant with the Arm Arm. If you want to update >>>> valid entry (e.g. shattering a superpage), then you need to follow the >>>> break-before-make sequence. This means that: >>>> 1. Replace the valid entry with an entry with an invalid one >>>> 2. Flush the TLBs >>>> 3. Write the new entry >>>> >>>> Those steps will make your code compliant but it also means that a virtual >>>> address will be temporarily invalid so you could take a fault in the middle >>>> of your split if your stack or the table was part of the region. The same >>>> could happen for the other running CPUs but this is less problematic as they >>>> could spin on the page-table lock. >>> >>> Could this be worked around by writing the critical section in >>> assembler? >> >> Everything is feasible. Is this worth it? I don't think so. There are way we >> can avoid the shattering at first by simply not mapping all the RAM. > > Good point. I do wonder what would go wrong if one replaced one live > PTE with another that pointed to the same physical address. It depends what you are modifying the PTE. If you only modify the permissions, then that's fine. But anything else could result to TLB conflict, loss of coherency... > Is this > merely a case of “spec doesn’t allow it”, or does it actually break on > real hardware? I have seen issues on real HW if the ordering is not respected. Recent version of the Arm Arm introduced the possibility to skip the sequence in certain conditions and if the HW supports it (reported via the ID registers). I haven't yet seen such processor. Cheers, -- Julien Grall
© 2016 - 2024 Red Hat, Inc.