[PATCH v3 5/7] xen: add capability to remap non-RAM pages to different PFNs

Juergen Gross posted 7 patches 2 months, 2 weeks ago
[PATCH v3 5/7] xen: add capability to remap non-RAM pages to different PFNs
Posted by Juergen Gross 2 months, 2 weeks ago
When running as a Xen PV dom0 it can happen that the kernel is being
loaded to a guest physical address conflicting with the host memory
map.

In order to be able to resolve this conflict, add the capability to
remap non-RAM areas to different guest PFNs. A function to use this
remapping information for other purposes than doing the remap will be
added when needed.

As the number of conflicts should be rather low (currently only
machines with max. 1 conflict are known), save the remap data in a
small statically allocated array.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- split off from patch 5 of V1 of the series
- moved to p2m.c
V3:
- add const qualifier (Jan Beulich)
- change remap size type to size_t (Jan Beulich)
- add __ro_after_init qualifier (Jan Beulich)
- log frame numbers in hex (Jan Beulich)
- always issue message regarding number of remapped pages (Jan Beulich)
- add sanity check (Jan Beulich)
- fix remap loop (Jan Beulich)

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/p2m.c     | 65 ++++++++++++++++++++++++++++++++++++++++++
 arch/x86/xen/xen-ops.h |  3 ++
 2 files changed, 68 insertions(+)

diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 7c735b730acd..5b2aeae6f9e4 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -80,6 +80,7 @@
 #include <asm/xen/hypervisor.h>
 #include <xen/balloon.h>
 #include <xen/grant_table.h>
+#include <xen/hvc-console.h>
 
 #include "xen-ops.h"
 
@@ -792,6 +793,70 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
 	return ret;
 }
 
+/* Remapped non-RAM areas */
+#define NR_NONRAM_REMAP 4
+static struct nonram_remap {
+	phys_addr_t maddr;
+	phys_addr_t paddr;
+	size_t size;
+} xen_nonram_remap[NR_NONRAM_REMAP] __ro_after_init;
+static unsigned int nr_nonram_remap __ro_after_init;
+
+/*
+ * Do the real remapping of non-RAM regions as specified in the
+ * xen_nonram_remap[] array.
+ * In case of an error just crash the system.
+ */
+void __init xen_do_remap_nonram(void)
+{
+	unsigned int i;
+	unsigned int remapped = 0;
+	const struct nonram_remap *remap = xen_nonram_remap;
+	unsigned long pfn, mfn, end_pfn;
+
+	for (i = 0; i < nr_nonram_remap; i++) {
+		end_pfn = PFN_UP(remap->paddr + remap->size);
+		pfn = PFN_DOWN(remap->paddr);
+		mfn = PFN_DOWN(remap->maddr);
+		while (pfn < end_pfn) {
+			if (!set_phys_to_machine(pfn, mfn)) {
+				pr_err("Failed to set p2m mapping for pfn=%lx mfn=%lx\n",
+				       pfn, mfn);
+				BUG();
+			}
+
+			pfn++;
+			mfn++;
+			remapped++;
+		}
+
+		remap++;
+	}
+
+	pr_info("Remapped %u non-RAM page(s)\n", remapped);
+}
+
+/*
+ * Add a new non-RAM remap entry.
+ * In case of no free entry found, just crash the system.
+ */
+void __init xen_add_remap_nonram(phys_addr_t maddr, phys_addr_t paddr,
+				 unsigned long size)
+{
+	BUG_ON((maddr & ~PAGE_MASK) != (paddr & ~PAGE_MASK));
+
+	if (nr_nonram_remap == NR_NONRAM_REMAP) {
+		xen_raw_console_write("Number of required E820 entry remapping actions exceed maximum value\n");
+		BUG();
+	}
+
+	xen_nonram_remap[nr_nonram_remap].maddr = maddr;
+	xen_nonram_remap[nr_nonram_remap].paddr = paddr;
+	xen_nonram_remap[nr_nonram_remap].size = size;
+
+	nr_nonram_remap++;
+}
+
 #ifdef CONFIG_XEN_DEBUG_FS
 #include <linux/debugfs.h>
 static int p2m_dump_show(struct seq_file *m, void *v)
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9a27d1d653d3..e1b782e823e6 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -47,6 +47,9 @@ void xen_mm_unpin_all(void);
 #ifdef CONFIG_X86_64
 void __init xen_relocate_p2m(void);
 #endif
+void __init xen_do_remap_nonram(void);
+void __init xen_add_remap_nonram(phys_addr_t maddr, phys_addr_t paddr,
+				 unsigned long size);
 
 void __init xen_chk_is_e820_usable(phys_addr_t start, phys_addr_t size,
 				   const char *component);
-- 
2.43.0
Re: [PATCH v3 5/7] xen: add capability to remap non-RAM pages to different PFNs
Posted by Jan Beulich 2 months, 2 weeks ago
On 10.09.2024 12:39, Juergen Gross wrote:
> When running as a Xen PV dom0 it can happen that the kernel is being
> loaded to a guest physical address conflicting with the host memory
> map.
> 
> In order to be able to resolve this conflict, add the capability to
> remap non-RAM areas to different guest PFNs. A function to use this
> remapping information for other purposes than doing the remap will be
> added when needed.
> 
> As the number of conflicts should be rather low (currently only
> machines with max. 1 conflict are known), save the remap data in a
> small statically allocated array.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>
with two cosmetic remarks:

> @@ -792,6 +793,70 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
>  	return ret;
>  }
>  
> +/* Remapped non-RAM areas */
> +#define NR_NONRAM_REMAP 4
> +static struct nonram_remap {
> +	phys_addr_t maddr;
> +	phys_addr_t paddr;
> +	size_t size;
> +} xen_nonram_remap[NR_NONRAM_REMAP] __ro_after_init;
> +static unsigned int nr_nonram_remap __ro_after_init;

I take it this is the canonical placement of section attributes in the
kernel? (In Xen I'd ask for the attributes to be moved ahead of the
identifiers being declared.)

> +/*
> + * Do the real remapping of non-RAM regions as specified in the
> + * xen_nonram_remap[] array.
> + * In case of an error just crash the system.
> + */
> +void __init xen_do_remap_nonram(void)
> +{
> +	unsigned int i;
> +	unsigned int remapped = 0;
> +	const struct nonram_remap *remap = xen_nonram_remap;
> +	unsigned long pfn, mfn, end_pfn;
> +
> +	for (i = 0; i < nr_nonram_remap; i++) {
> +		end_pfn = PFN_UP(remap->paddr + remap->size);
> +		pfn = PFN_DOWN(remap->paddr);
> +		mfn = PFN_DOWN(remap->maddr);
> +		while (pfn < end_pfn) {
> +			if (!set_phys_to_machine(pfn, mfn)) {
> +				pr_err("Failed to set p2m mapping for pfn=%lx mfn=%lx\n",
> +				       pfn, mfn);
> +				BUG();

Wouldn't panic() get you both in one operation? Or do you expect the call
trace / register state to be of immediate relevance for analysis?

Jan
Re: [PATCH v3 5/7] xen: add capability to remap non-RAM pages to different PFNs
Posted by Jürgen Groß 2 months, 2 weeks ago
On 10.09.24 14:26, Jan Beulich wrote:
> On 10.09.2024 12:39, Juergen Gross wrote:
>> When running as a Xen PV dom0 it can happen that the kernel is being
>> loaded to a guest physical address conflicting with the host memory
>> map.
>>
>> In order to be able to resolve this conflict, add the capability to
>> remap non-RAM areas to different guest PFNs. A function to use this
>> remapping information for other purposes than doing the remap will be
>> added when needed.
>>
>> As the number of conflicts should be rather low (currently only
>> machines with max. 1 conflict are known), save the remap data in a
>> small statically allocated array.
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
> 
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
> with two cosmetic remarks:
> 
>> @@ -792,6 +793,70 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
>>   	return ret;
>>   }
>>   
>> +/* Remapped non-RAM areas */
>> +#define NR_NONRAM_REMAP 4
>> +static struct nonram_remap {
>> +	phys_addr_t maddr;
>> +	phys_addr_t paddr;
>> +	size_t size;
>> +} xen_nonram_remap[NR_NONRAM_REMAP] __ro_after_init;
>> +static unsigned int nr_nonram_remap __ro_after_init;
> 
> I take it this is the canonical placement of section attributes in the
> kernel? (In Xen I'd ask for the attributes to be moved ahead of the
> identifiers being declared.)

I didn't find an explicit mentioning in the coding style, but most
examples I've found place the section attribute after the name of the
variable.

> 
>> +/*
>> + * Do the real remapping of non-RAM regions as specified in the
>> + * xen_nonram_remap[] array.
>> + * In case of an error just crash the system.
>> + */
>> +void __init xen_do_remap_nonram(void)
>> +{
>> +	unsigned int i;
>> +	unsigned int remapped = 0;
>> +	const struct nonram_remap *remap = xen_nonram_remap;
>> +	unsigned long pfn, mfn, end_pfn;
>> +
>> +	for (i = 0; i < nr_nonram_remap; i++) {
>> +		end_pfn = PFN_UP(remap->paddr + remap->size);
>> +		pfn = PFN_DOWN(remap->paddr);
>> +		mfn = PFN_DOWN(remap->maddr);
>> +		while (pfn < end_pfn) {
>> +			if (!set_phys_to_machine(pfn, mfn)) {
>> +				pr_err("Failed to set p2m mapping for pfn=%lx mfn=%lx\n",
>> +				       pfn, mfn);
>> +				BUG();
> 
> Wouldn't panic() get you both in one operation? Or do you expect the call
> trace / register state to be of immediate relevance for analysis?

You are right, in this case panic() is a better option.


Juergen