[PATCH v2 3/3] xen/x86: move d->arch.physaddr_bitsize field handling to pv32

Grygorii Strashko posted 3 patches 2 weeks, 2 days ago
There is a newer version of this series
[PATCH v2 3/3] xen/x86: move d->arch.physaddr_bitsize field handling to pv32
Posted by Grygorii Strashko 2 weeks, 2 days ago
From: Grygorii Strashko <grygorii_strashko@epam.com>

The d->arch.physaddr_bitsize field is used only by PV32 code, so:

- move domain_set_alloc_bitsize() function into PV32 code
- move domain_clamp_alloc_bitsize() function into PV32 code
- move d->arch.physaddr_bitsize field under PV32 ifdef into
  struct pv_domain

Signed-off-by: Grygorii Strashko <grygorii_strashko@epam.com>
---
 xen/arch/x86/include/asm/domain.h |  8 +++++---
 xen/arch/x86/include/asm/mm.h     |  6 ++++--
 xen/arch/x86/pv/dom0_build.c      |  6 ++++--
 xen/arch/x86/pv/domain.c          | 23 +++++++++++++++++++++++
 xen/arch/x86/x86_64/mm.c          | 20 --------------------
 5 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
index 5df8c7825333..6cdfdf8b5c26 100644
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -276,6 +276,11 @@ struct pv_domain
 
     atomic_t nr_l4_pages;
 
+#ifdef CONFIG_PV32
+    /* Maximum physical-address bitwidth supported by this guest. */
+    unsigned int physaddr_bitsize;
+#endif
+
     /* Is a 32-bit PV guest? */
     bool is_32bit;
     /* XPTI active? */
@@ -316,9 +321,6 @@ struct arch_domain
     unsigned int hv_compat_vstart;
 #endif
 
-    /* Maximum physical-address bitwidth supported by this guest. */
-    unsigned int physaddr_bitsize;
-
     /* I/O-port admin-specified access capabilities. */
     struct rangeset *ioport_caps;
     uint32_t pci_cf8;
diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h
index 17ca6666a34e..128115442ecc 100644
--- a/xen/arch/x86/include/asm/mm.h
+++ b/xen/arch/x86/include/asm/mm.h
@@ -619,9 +619,11 @@ void __iomem *ioremap_wc(paddr_t pa, size_t len);
 
 extern int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm);
 
-void domain_set_alloc_bitsize(struct domain *d);
-unsigned int domain_clamp_alloc_bitsize(struct domain *d, unsigned int bits);
+#ifdef CONFIG_PV32
+unsigned int domain_clamp_alloc_bitsize(const struct domain *d,
+                                        unsigned int bits);
 #define domain_clamp_alloc_bitsize(d, bits) domain_clamp_alloc_bitsize(d, bits)
+#endif
 
 unsigned long domain_get_maximum_gpfn(struct domain *d);
 
diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
index 21158ce1812e..e4f95d8f2fc8 100644
--- a/xen/arch/x86/pv/dom0_build.c
+++ b/xen/arch/x86/pv/dom0_build.c
@@ -626,8 +626,9 @@ static int __init dom0_construct(const struct boot_domain *bd)
         initrd_mfn = paddr_to_pfn(initrd->start);
         mfn = initrd_mfn;
         count = PFN_UP(initrd_len);
-        if ( d->arch.physaddr_bitsize &&
-             ((mfn + count - 1) >> (d->arch.physaddr_bitsize - PAGE_SHIFT)) )
+#ifdef CONFIG_PV32
+        if ( d->arch.pv.physaddr_bitsize &&
+             ((mfn + count - 1) >> (d->arch.pv.physaddr_bitsize - PAGE_SHIFT)) )
         {
             order = get_order_from_pages(count);
             page = alloc_domheap_pages(d, order, MEMF_no_scrub);
@@ -650,6 +651,7 @@ static int __init dom0_construct(const struct boot_domain *bd)
             initrd->start = pfn_to_paddr(initrd_mfn);
         }
         else
+#endif
         {
             while ( count-- )
                 if ( assign_pages(mfn_to_page(_mfn(mfn++)), 1, d, 0) )
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 9c4785c187dd..ad40a712ac5f 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -230,6 +230,29 @@ unsigned long pv_make_cr4(const struct vcpu *v)
 }
 
 #ifdef CONFIG_PV32
+unsigned int domain_clamp_alloc_bitsize(const struct domain *d,
+                                        unsigned int bits)
+{
+    if ( (d == NULL) || (d->arch.pv.physaddr_bitsize == 0) )
+        return bits;
+
+    return min(d->arch.pv.physaddr_bitsize, bits);
+}
+
+static void domain_set_alloc_bitsize(struct domain *d)
+{
+    if ( !is_pv_32bit_domain(d) ||
+         (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page) ||
+         d->arch.pv.physaddr_bitsize > 0 )
+        return;
+
+    d->arch.pv.physaddr_bitsize =
+        /* 2^n entries can be contained in guest's p2m mapping space */
+        fls(MACH2PHYS_COMPAT_NR_ENTRIES(d)) - 1
+        /* 2^n pages -> 2^(n+PAGE_SHIFT) bits */
+        + PAGE_SHIFT;
+}
+
 int switch_compat(struct domain *d)
 {
     struct vcpu *v;
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index d4e6a9c0a2e0..8eadab7933d0 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -1119,26 +1119,6 @@ unmap:
     return ret;
 }
 
-void domain_set_alloc_bitsize(struct domain *d)
-{
-    if ( !is_pv_32bit_domain(d) ||
-         (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page) ||
-         d->arch.physaddr_bitsize > 0 )
-        return;
-    d->arch.physaddr_bitsize =
-        /* 2^n entries can be contained in guest's p2m mapping space */
-        fls(MACH2PHYS_COMPAT_NR_ENTRIES(d)) - 1
-        /* 2^n pages -> 2^(n+PAGE_SHIFT) bits */
-        + PAGE_SHIFT;
-}
-
-unsigned int domain_clamp_alloc_bitsize(struct domain *d, unsigned int bits)
-{
-    if ( (d == NULL) || (d->arch.physaddr_bitsize == 0) )
-        return bits;
-    return min(d->arch.physaddr_bitsize, bits);
-}
-
 static int transfer_pages_to_heap(struct mem_hotadd_info *info)
 {
     unsigned long i;
-- 
2.34.1
Re: [PATCH v2 3/3] xen/x86: move d->arch.physaddr_bitsize field handling to pv32
Posted by Andrew Cooper 2 weeks, 2 days ago
On 27/11/2025 10:12 pm, Grygorii Strashko wrote:
> diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h
> index 17ca6666a34e..128115442ecc 100644
> --- a/xen/arch/x86/include/asm/mm.h
> +++ b/xen/arch/x86/include/asm/mm.h
> @@ -619,9 +619,11 @@ void __iomem *ioremap_wc(paddr_t pa, size_t len);
>  
>  extern int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm);
>  
> -void domain_set_alloc_bitsize(struct domain *d);
> -unsigned int domain_clamp_alloc_bitsize(struct domain *d, unsigned int bits);
> +#ifdef CONFIG_PV32
> +unsigned int domain_clamp_alloc_bitsize(const struct domain *d,
> +                                        unsigned int bits);

Do not convert this, or any other domains/vcpus you see, to const.  I
realise you have been given conflicting information on this point, but
the maintainers as a whole agreed to not const pointers to complex
objects in the general case because of the churn it creates, and the
repeated examples of MISRA violations people have inserted to work
around the fact it shouldn't have been a const pointer to start with.

That aside, I think clamp wants to be a static inline here.  (Except it
can't be, so it needs to be a macro).

It's currently a concrete function call to very simple piece of logic,
where the callers have options to eliminate it entirely in the d = NULL
case if they could only see in.

#define domain_clamp_alloc_bitsize(d, bits)                             \
    (((d) && (d)->arch.pv.physaddr_bitsize)                             \
     ? min_t(uint32_t, (d)->arch.pv.physaddr_bitsize, bits) : bits)


seems to work.  The min_t() is because all callers pass in bits as a
long constant, tripping the typecheck in min().

> +#endif
>  
>  unsigned long domain_get_maximum_gpfn(struct domain *d);
>  
> diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
> index 21158ce1812e..e4f95d8f2fc8 100644
> --- a/xen/arch/x86/pv/dom0_build.c
> +++ b/xen/arch/x86/pv/dom0_build.c
> @@ -626,8 +626,9 @@ static int __init dom0_construct(const struct boot_domain *bd)
>          initrd_mfn = paddr_to_pfn(initrd->start);
>          mfn = initrd_mfn;
>          count = PFN_UP(initrd_len);
> -        if ( d->arch.physaddr_bitsize &&
> -             ((mfn + count - 1) >> (d->arch.physaddr_bitsize - PAGE_SHIFT)) )

While you're editing this, blank line here please.

> +#ifdef CONFIG_PV32
> +        if ( d->arch.pv.physaddr_bitsize &&
> +             ((mfn + count - 1) >> (d->arch.pv.physaddr_bitsize - PAGE_SHIFT)) )
>          {
>              order = get_order_from_pages(count);
>              page = alloc_domheap_pages(d, order, MEMF_no_scrub);
> @@ -650,6 +651,7 @@ static int __init dom0_construct(const struct boot_domain *bd)
>              initrd->start = pfn_to_paddr(initrd_mfn);
>          }
>          else
> +#endif
>          {
>              while ( count-- )
>                  if ( assign_pages(mfn_to_page(_mfn(mfn++)), 1, d, 0) )
> diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
> index 9c4785c187dd..ad40a712ac5f 100644
> --- a/xen/arch/x86/pv/domain.c
> +++ b/xen/arch/x86/pv/domain.c
> @@ -230,6 +230,29 @@ unsigned long pv_make_cr4(const struct vcpu *v)
>  }
>  
>  #ifdef CONFIG_PV32
> +unsigned int domain_clamp_alloc_bitsize(const struct domain *d,
> +                                        unsigned int bits)
> +{
> +    if ( (d == NULL) || (d->arch.pv.physaddr_bitsize == 0) )
> +        return bits;
> +
> +    return min(d->arch.pv.physaddr_bitsize, bits);
> +}
> +
> +static void domain_set_alloc_bitsize(struct domain *d)
> +{
> +    if ( !is_pv_32bit_domain(d) ||
> +         (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page) ||
> +         d->arch.pv.physaddr_bitsize > 0 )
> +        return;
> +
> +    d->arch.pv.physaddr_bitsize =
> +        /* 2^n entries can be contained in guest's p2m mapping space */
> +        fls(MACH2PHYS_COMPAT_NR_ENTRIES(d)) - 1
> +        /* 2^n pages -> 2^(n+PAGE_SHIFT) bits */
> +        + PAGE_SHIFT;
> +}

The single caller has just set d->arch.pv.is_32bit = true, but the
compiler can't eliminate the first condition because of the embedded
evaluate_nospec().  Nor the 3rd condition, because it can't reason about
the singleton nature of switch_compat().

Thus, it would be better inlined fully, as:

    if ( MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page )
        d->arch.pv.physaddr_bitsize =
            /* 2^n entries can be contained in guest's p2m mapping space */
            fls(MACH2PHYS_COMPAT_NR_ENTRIES(d)) - 1 + PAGE_SHIFT;


which is rather easier to follow.  Even the comment about PAGE_SHIFT is
more noise than help.

In an ideal world it ought to be in its own patch, and in principle I'm
happy to draft one with a fuller explanation if you'd prefer, but given
the repositioning of physaddr_bitsize into pv domain as wekk, it's
probably all better together in this single patch.

~Andrew

Re: [PATCH v2 3/3] xen/x86: move d->arch.physaddr_bitsize field handling to pv32
Posted by Grygorii Strashko 2 weeks, 1 day ago

On 28.11.25 01:09, Andrew Cooper wrote:
> On 27/11/2025 10:12 pm, Grygorii Strashko wrote:
>> diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h
>> index 17ca6666a34e..128115442ecc 100644
>> --- a/xen/arch/x86/include/asm/mm.h
>> +++ b/xen/arch/x86/include/asm/mm.h
>> @@ -619,9 +619,11 @@ void __iomem *ioremap_wc(paddr_t pa, size_t len);
>>   
>>   extern int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm);
>>   
>> -void domain_set_alloc_bitsize(struct domain *d);
>> -unsigned int domain_clamp_alloc_bitsize(struct domain *d, unsigned int bits);
>> +#ifdef CONFIG_PV32
>> +unsigned int domain_clamp_alloc_bitsize(const struct domain *d,
>> +                                        unsigned int bits);
> 
> Do not convert this, or any other domains/vcpus you see, to const.  I
> realise you have been given conflicting information on this point, but
> the maintainers as a whole agreed to not const pointers to complex
> objects in the general case because of the churn it creates, and the
> repeated examples of MISRA violations people have inserted to work
> around the fact it shouldn't have been a const pointer to start with.

It's pure and simple RO function that's why I added const.

> 
> That aside, I think clamp wants to be a static inline here.  (Except it
> can't be, so it needs to be a macro).

Sorry, why "can't be" static inline?

> 
> It's currently a concrete function call to very simple piece of logic,
> where the callers have options to eliminate it entirely in the d = NULL
> case if they could only see in.
> 
> #define domain_clamp_alloc_bitsize(d, bits)                             \
>      (((d) && (d)->arch.pv.physaddr_bitsize)                             \
>       ? min_t(uint32_t, (d)->arch.pv.physaddr_bitsize, bits) : bits)
> 
> 
> seems to work.  The min_t() is because all callers pass in bits as a
> long constant, tripping the typecheck in min().
> 
>> +#endif
>>   
>>   unsigned long domain_get_maximum_gpfn(struct domain *d);
>>   
>> diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
>> index 21158ce1812e..e4f95d8f2fc8 100644
>> --- a/xen/arch/x86/pv/dom0_build.c
>> +++ b/xen/arch/x86/pv/dom0_build.c
>> @@ -626,8 +626,9 @@ static int __init dom0_construct(const struct boot_domain *bd)
>>           initrd_mfn = paddr_to_pfn(initrd->start);
>>           mfn = initrd_mfn;
>>           count = PFN_UP(initrd_len);
>> -        if ( d->arch.physaddr_bitsize &&
>> -             ((mfn + count - 1) >> (d->arch.physaddr_bitsize - PAGE_SHIFT)) )
> 
> While you're editing this, blank line here please.
> 
>> +#ifdef CONFIG_PV32
>> +        if ( d->arch.pv.physaddr_bitsize &&
>> +             ((mfn + count - 1) >> (d->arch.pv.physaddr_bitsize - PAGE_SHIFT)) )
>>           {
>>               order = get_order_from_pages(count);
>>               page = alloc_domheap_pages(d, order, MEMF_no_scrub);
>> @@ -650,6 +651,7 @@ static int __init dom0_construct(const struct boot_domain *bd)
>>               initrd->start = pfn_to_paddr(initrd_mfn);
>>           }
>>           else
>> +#endif
>>           {
>>               while ( count-- )
>>                   if ( assign_pages(mfn_to_page(_mfn(mfn++)), 1, d, 0) )
>> diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
>> index 9c4785c187dd..ad40a712ac5f 100644
>> --- a/xen/arch/x86/pv/domain.c
>> +++ b/xen/arch/x86/pv/domain.c
>> @@ -230,6 +230,29 @@ unsigned long pv_make_cr4(const struct vcpu *v)
>>   }
>>   
>>   #ifdef CONFIG_PV32
>> +unsigned int domain_clamp_alloc_bitsize(const struct domain *d,
>> +                                        unsigned int bits)
>> +{
>> +    if ( (d == NULL) || (d->arch.pv.physaddr_bitsize == 0) )
>> +        return bits;
>> +
>> +    return min(d->arch.pv.physaddr_bitsize, bits);
>> +}
>> +
>> +static void domain_set_alloc_bitsize(struct domain *d)
>> +{
>> +    if ( !is_pv_32bit_domain(d) ||
>> +         (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page) ||
>> +         d->arch.pv.physaddr_bitsize > 0 )
>> +        return;
>> +
>> +    d->arch.pv.physaddr_bitsize =
>> +        /* 2^n entries can be contained in guest's p2m mapping space */
>> +        fls(MACH2PHYS_COMPAT_NR_ENTRIES(d)) - 1
>> +        /* 2^n pages -> 2^(n+PAGE_SHIFT) bits */
>> +        + PAGE_SHIFT;
>> +}
> 
> The single caller has just set d->arch.pv.is_32bit = true, but the
> compiler can't eliminate the first condition because of the embedded
> evaluate_nospec().  Nor the 3rd condition, because it can't reason about
> the singleton nature of switch_compat().
> 
> Thus, it would be better inlined fully, as:
> 
>      if ( MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page )
>          d->arch.pv.physaddr_bitsize =
>              /* 2^n entries can be contained in guest's p2m mapping space */
>              fls(MACH2PHYS_COMPAT_NR_ENTRIES(d)) - 1 + PAGE_SHIFT;
> 
> 
> which is rather easier to follow.  Even the comment about PAGE_SHIFT is
> more noise than help.

To clarify my understanding - you wanna drop domain_set_alloc_bitsize and
inline code in switch_compat(), right?

> 
> In an ideal world it ought to be in its own patch, and in principle I'm
> happy to draft one with a fuller explanation if you'd prefer, but given
> the repositioning of physaddr_bitsize into pv domain as wekk, it's
> probably all better together in this single patch.
> 
> ~Andrew

-- 
Best regards,
-grygorii


Re: [PATCH v2 3/3] xen/x86: move d->arch.physaddr_bitsize field handling to pv32
Posted by Jan Beulich 2 weeks, 2 days ago
On 28.11.2025 00:09, Andrew Cooper wrote:
> On 27/11/2025 10:12 pm, Grygorii Strashko wrote:
>> diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h
>> index 17ca6666a34e..128115442ecc 100644
>> --- a/xen/arch/x86/include/asm/mm.h
>> +++ b/xen/arch/x86/include/asm/mm.h
>> @@ -619,9 +619,11 @@ void __iomem *ioremap_wc(paddr_t pa, size_t len);
>>  
>>  extern int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm);
>>  
>> -void domain_set_alloc_bitsize(struct domain *d);
>> -unsigned int domain_clamp_alloc_bitsize(struct domain *d, unsigned int bits);
>> +#ifdef CONFIG_PV32
>> +unsigned int domain_clamp_alloc_bitsize(const struct domain *d,
>> +                                        unsigned int bits);
> 
> Do not convert this, or any other domains/vcpus you see, to const.  I
> realise you have been given conflicting information on this point, but
> the maintainers as a whole agreed to not const pointers to complex
> objects in the general case because of the churn it creates, and the
> repeated examples of MISRA violations people have inserted to work
> around the fact it shouldn't have been a const pointer to start with.

While moot here when indeed converted to a macro, as you suggest below,
I don't recall "maintainers as a whole agreed" on this. For one there
are predicate-like functions where I think even you agreed their
parameters can be pointer-to-const. The case here isn't very far from
predicate-like, in particular ...

> That aside, I think clamp wants to be a static inline here.  (Except it
> can't be, so it needs to be a macro).
> 
> It's currently a concrete function call to very simple piece of logic,
> where the callers have options to eliminate it entirely in the d = NULL
> case if they could only see in.
> 
> #define domain_clamp_alloc_bitsize(d, bits)                             \
>     (((d) && (d)->arch.pv.physaddr_bitsize)                             \
>      ? min_t(uint32_t, (d)->arch.pv.physaddr_bitsize, bits) : bits)

... this representation alone demonstrates that what d points to is only
ever read.

Yes, there are cases where the situation is more complex, and where I can
see how my pov may not be shared by others. Yet still - can you point me
to a written form of the supposed agreement among maintainers? Imo
something like this, which has been controversial for a long time, really
needs recording in ./CODING_STYLE or docs/process/coding-best-practices.pandoc.
And then preferably in a shape acceptable to everyone (i.e. no outright
"never").

Jan