From: Grygorii Strashko <grygorii_strashko@epam.com>
The d->arch.physaddr_bitsize field is used only by PV32 code, so:
- move domain_set_alloc_bitsize() function into PV32 code and
inline it into switch_compat()
- move domain_clamp_alloc_bitsize() function into PV32 code
and convert to macro
- move d->arch.physaddr_bitsize field under PV32 ifdef into
struct pv_domain
Signed-off-by: Grygorii Strashko <grygorii_strashko@epam.com>
---
changes in v3:
- domain_set_alloc_bitsize() inlined.
Note change of condition to "(MACH2PHYS_COMPAT_NR_ENTRIES(d) < max_page)"
- domain_clamp_alloc_bitsize() convert to macro
xen/arch/x86/include/asm/domain.h | 8 +++++---
xen/arch/x86/include/asm/mm.h | 9 ++++++---
xen/arch/x86/pv/dom0_build.c | 7 +++++--
xen/arch/x86/pv/domain.c | 6 +++++-
xen/arch/x86/x86_64/mm.c | 20 --------------------
5 files changed, 21 insertions(+), 29 deletions(-)
diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
index 5df8c7825333..6cdfdf8b5c26 100644
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -276,6 +276,11 @@ struct pv_domain
atomic_t nr_l4_pages;
+#ifdef CONFIG_PV32
+ /* Maximum physical-address bitwidth supported by this guest. */
+ unsigned int physaddr_bitsize;
+#endif
+
/* Is a 32-bit PV guest? */
bool is_32bit;
/* XPTI active? */
@@ -316,9 +321,6 @@ struct arch_domain
unsigned int hv_compat_vstart;
#endif
- /* Maximum physical-address bitwidth supported by this guest. */
- unsigned int physaddr_bitsize;
-
/* I/O-port admin-specified access capabilities. */
struct rangeset *ioport_caps;
uint32_t pci_cf8;
diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h
index 17ca6666a34e..a308a98df2a4 100644
--- a/xen/arch/x86/include/asm/mm.h
+++ b/xen/arch/x86/include/asm/mm.h
@@ -619,9 +619,12 @@ void __iomem *ioremap_wc(paddr_t pa, size_t len);
extern int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm);
-void domain_set_alloc_bitsize(struct domain *d);
-unsigned int domain_clamp_alloc_bitsize(struct domain *d, unsigned int bits);
-#define domain_clamp_alloc_bitsize(d, bits) domain_clamp_alloc_bitsize(d, bits)
+#ifdef CONFIG_PV32
+#define domain_clamp_alloc_bitsize(d, bits) \
+ (((d) && (d)->arch.pv.physaddr_bitsize) \
+ ? min_t(uint32_t, (d)->arch.pv.physaddr_bitsize, (bits)) \
+ : (bits))
+#endif
unsigned long domain_get_maximum_gpfn(struct domain *d);
diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
index 21158ce1812e..94f7976e819f 100644
--- a/xen/arch/x86/pv/dom0_build.c
+++ b/xen/arch/x86/pv/dom0_build.c
@@ -626,8 +626,10 @@ static int __init dom0_construct(const struct boot_domain *bd)
initrd_mfn = paddr_to_pfn(initrd->start);
mfn = initrd_mfn;
count = PFN_UP(initrd_len);
- if ( d->arch.physaddr_bitsize &&
- ((mfn + count - 1) >> (d->arch.physaddr_bitsize - PAGE_SHIFT)) )
+
+#ifdef CONFIG_PV32
+ if ( d->arch.pv.physaddr_bitsize &&
+ ((mfn + count - 1) >> (d->arch.pv.physaddr_bitsize - PAGE_SHIFT)) )
{
order = get_order_from_pages(count);
page = alloc_domheap_pages(d, order, MEMF_no_scrub);
@@ -650,6 +652,7 @@ static int __init dom0_construct(const struct boot_domain *bd)
initrd->start = pfn_to_paddr(initrd_mfn);
}
else
+#endif
{
while ( count-- )
if ( assign_pages(mfn_to_page(_mfn(mfn++)), 1, d, 0) )
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 9c4785c187dd..d58e4e213e5c 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -254,7 +254,11 @@ int switch_compat(struct domain *d)
goto undo_and_fail;
}
- domain_set_alloc_bitsize(d);
+ if ( MACH2PHYS_COMPAT_NR_ENTRIES(d) < max_page )
+ d->arch.pv.physaddr_bitsize =
+ /* 2^n entries can be contained in guest's p2m mapping space */
+ fls(MACH2PHYS_COMPAT_NR_ENTRIES(d)) - 1 + PAGE_SHIFT;
+
recalculate_cpuid_policy(d);
d->arch.x87_fip_width = 4;
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index d4e6a9c0a2e0..8eadab7933d0 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -1119,26 +1119,6 @@ unmap:
return ret;
}
-void domain_set_alloc_bitsize(struct domain *d)
-{
- if ( !is_pv_32bit_domain(d) ||
- (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page) ||
- d->arch.physaddr_bitsize > 0 )
- return;
- d->arch.physaddr_bitsize =
- /* 2^n entries can be contained in guest's p2m mapping space */
- fls(MACH2PHYS_COMPAT_NR_ENTRIES(d)) - 1
- /* 2^n pages -> 2^(n+PAGE_SHIFT) bits */
- + PAGE_SHIFT;
-}
-
-unsigned int domain_clamp_alloc_bitsize(struct domain *d, unsigned int bits)
-{
- if ( (d == NULL) || (d->arch.physaddr_bitsize == 0) )
- return bits;
- return min(d->arch.physaddr_bitsize, bits);
-}
-
static int transfer_pages_to_heap(struct mem_hotadd_info *info)
{
unsigned long i;
--
2.34.1
On 28.11.2025 16:22, Grygorii Strashko wrote:
> --- a/xen/arch/x86/include/asm/mm.h
> +++ b/xen/arch/x86/include/asm/mm.h
> @@ -619,9 +619,12 @@ void __iomem *ioremap_wc(paddr_t pa, size_t len);
>
> extern int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm);
>
> -void domain_set_alloc_bitsize(struct domain *d);
> -unsigned int domain_clamp_alloc_bitsize(struct domain *d, unsigned int bits);
> -#define domain_clamp_alloc_bitsize(d, bits) domain_clamp_alloc_bitsize(d, bits)
> +#ifdef CONFIG_PV32
> +#define domain_clamp_alloc_bitsize(d, bits) \
> + (((d) && (d)->arch.pv.physaddr_bitsize) \
> + ? min_t(uint32_t, (d)->arch.pv.physaddr_bitsize, (bits)) \
> + : (bits))
I'm not quite sure if converting to a macro was a good idea. But now that it's
done this way, so be it. Just that a couple of issues may want / need sorting:
- d is now evaluated up to 3 times,
- indentation is wrong,
- the use of uint32_t is against ./CODING_STYLE (I dislike the use of min_t()
anyway, but what do you do when a macro was asked for; of course we could
[easily] arrange for BITS_PER_LONG to be of type "unsigned int"),
- the use of "bits" in min_t() doesn't really need parentheses.
> --- a/xen/arch/x86/pv/domain.c
> +++ b/xen/arch/x86/pv/domain.c
> @@ -254,7 +254,11 @@ int switch_compat(struct domain *d)
> goto undo_and_fail;
> }
>
> - domain_set_alloc_bitsize(d);
> + if ( MACH2PHYS_COMPAT_NR_ENTRIES(d) < max_page )
You mention the change in condition in the revlog (but not in the description),
and I'm having trouble to follow why ...
> --- a/xen/arch/x86/x86_64/mm.c
> +++ b/xen/arch/x86/x86_64/mm.c
> @@ -1119,26 +1119,6 @@ unmap:
> return ret;
> }
>
> -void domain_set_alloc_bitsize(struct domain *d)
> -{
> - if ( !is_pv_32bit_domain(d) ||
> - (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page) ||
> - d->arch.physaddr_bitsize > 0 )
... this 3rd part is going away.
Jan
On 08.12.25 14:44, Jan Beulich wrote:
> On 28.11.2025 16:22, Grygorii Strashko wrote:
>> --- a/xen/arch/x86/include/asm/mm.h
>> +++ b/xen/arch/x86/include/asm/mm.h
>> @@ -619,9 +619,12 @@ void __iomem *ioremap_wc(paddr_t pa, size_t len);
>>
>> extern int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm);
>>
>> -void domain_set_alloc_bitsize(struct domain *d);
>> -unsigned int domain_clamp_alloc_bitsize(struct domain *d, unsigned int bits);
>> -#define domain_clamp_alloc_bitsize(d, bits) domain_clamp_alloc_bitsize(d, bits)
>> +#ifdef CONFIG_PV32
>> +#define domain_clamp_alloc_bitsize(d, bits) \
>> + (((d) && (d)->arch.pv.physaddr_bitsize) \
>> + ? min_t(uint32_t, (d)->arch.pv.physaddr_bitsize, (bits)) \
>> + : (bits))
>
> I'm not quite sure if converting to a macro was a good idea. But now that it's
> done this way, so be it. Just that a couple of issues may want / need sorting:
> - d is now evaluated up to 3 times,
> - indentation is wrong,
> - the use of uint32_t is against ./CODING_STYLE (I dislike the use of min_t()
> anyway, but what do you do when a macro was asked for; of course we could
> [easily] arrange for BITS_PER_LONG to be of type "unsigned int"),
> - the use of "bits" in min_t() doesn't really need parentheses.
>
I'll update it.
>> --- a/xen/arch/x86/pv/domain.c
>> +++ b/xen/arch/x86/pv/domain.c
>> @@ -254,7 +254,11 @@ int switch_compat(struct domain *d)
>> goto undo_and_fail;
>> }
>>
>> - domain_set_alloc_bitsize(d);
>> + if ( MACH2PHYS_COMPAT_NR_ENTRIES(d) < max_page )
>
> You mention the change in condition in the revlog (but not in the description),
The updated chunk was based on snippet from Andrew [1], which
used incorrect condition - I've changed it and noted in change log
[1] https://patchwork.kernel.org/comment/26680551/
> and I'm having trouble to follow why ...
>
>> --- a/xen/arch/x86/x86_64/mm.c
>> +++ b/xen/arch/x86/x86_64/mm.c
>> @@ -1119,26 +1119,6 @@ unmap:
>> return ret;
>> }
>>
>> -void domain_set_alloc_bitsize(struct domain *d)
>> -{
The domain_set_alloc_bitsize() inlined in switch_compat() and x86 PV domain
always created as 64bit domain.
At the beginning of switch_compat() there is:
( is_pv_32bit_domain(d) )
return 0;
[2]
above ensures that switch_compat() can be actually called only once (at least it can reach
point [2] only once, because there is no way to reset PV domain state 32bit->64bit
this is original condition which says:
>> - if ( !is_pv_32bit_domain(d) ||
do nothing if !is_pv_32bit_domain(d)
- for inlined code is_pv_32bit_domain(d) == true, so is_pv_32bit_domain(d) can be ignored
>> - (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page) ||
do nothing if (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page)
- inlinded code should proceed if this condition is opposite
(MACH2PHYS_COMPAT_NR_ENTRIES(d) < max_page)
>> - d->arch.physaddr_bitsize > 0 )
do nothing if d->arch.physaddr_bitsize > 0 (already set)
- inlined code will be executed only once, so (d->arch.physaddr_bitsize ==/!= 0)
can be ignored
>
> ... this 3rd part is going away.
Another observation: MACH2PHYS_COMPAT_NR_ENTRIES(d) is looks like a const, as
(d)->arch.hv_compat_vstart is always 0.
grep -rw hv_compat_vstart ./*
./arch/x86/include/asm/config.h:#define HYPERVISOR_COMPAT_VIRT_START(d) ((d)->arch.hv_compat_vstart)
./arch/x86/include/asm/domain.h: unsigned int hv_compat_vstart;
--
Best regards,
-grygorii
On 08.12.2025 20:21, Grygorii Strashko wrote:
> On 08.12.25 14:44, Jan Beulich wrote:
>> On 28.11.2025 16:22, Grygorii Strashko wrote:
>>> --- a/xen/arch/x86/pv/domain.c
>>> +++ b/xen/arch/x86/pv/domain.c
>>> @@ -254,7 +254,11 @@ int switch_compat(struct domain *d)
>>> goto undo_and_fail;
>>> }
>>>
>>> - domain_set_alloc_bitsize(d);
>>> + if ( MACH2PHYS_COMPAT_NR_ENTRIES(d) < max_page )
>>
>> You mention the change in condition in the revlog (but not in the description),
>
> The updated chunk was based on snippet from Andrew [1], which
> used incorrect condition - I've changed it and noted in change log
>
> [1] https://patchwork.kernel.org/comment/26680551/
>
>> and I'm having trouble to follow why ...
>>
>>> --- a/xen/arch/x86/x86_64/mm.c
>>> +++ b/xen/arch/x86/x86_64/mm.c
>>> @@ -1119,26 +1119,6 @@ unmap:
>>> return ret;
>>> }
>>>
>>> -void domain_set_alloc_bitsize(struct domain *d)
>>> -{
>
> The domain_set_alloc_bitsize() inlined in switch_compat() and x86 PV domain
> always created as 64bit domain.
>
> At the beginning of switch_compat() there is:
>
> ( is_pv_32bit_domain(d) )
> return 0;
> [2]
> above ensures that switch_compat() can be actually called only once (at least it can reach
> point [2] only once, because there is no way to reset PV domain state 32bit->64bit
>
> this is original condition which says:
>>> - if ( !is_pv_32bit_domain(d) ||
>
> do nothing if !is_pv_32bit_domain(d)
> - for inlined code is_pv_32bit_domain(d) == true, so is_pv_32bit_domain(d) can be ignored
>
>>> - (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page) ||
>
> do nothing if (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page)
> - inlinded code should proceed if this condition is opposite
> (MACH2PHYS_COMPAT_NR_ENTRIES(d) < max_page)
>
>>> - d->arch.physaddr_bitsize > 0 )
>
> do nothing if d->arch.physaddr_bitsize > 0 (already set)
> - inlined code will be executed only once, so (d->arch.physaddr_bitsize ==/!= 0)
> can be ignored
This is the crucial point: It being executed only once isn't spelled out
anywhere in the description, and it's not entirely obvious why that would
be. Yes, nowadays it is. Originally (iirc) one could switch the guest back
to 64-bit mode, then again to 32-bit.
>> ... this 3rd part is going away.
>
> Another observation: MACH2PHYS_COMPAT_NR_ENTRIES(d) is looks like a const, as
> (d)->arch.hv_compat_vstart is always 0.
>
> grep -rw hv_compat_vstart ./*
> ./arch/x86/include/asm/config.h:#define HYPERVISOR_COMPAT_VIRT_START(d) ((d)->arch.hv_compat_vstart)
> ./arch/x86/include/asm/domain.h: unsigned int hv_compat_vstart;
This observation isn't directly related here, is it?
Jan
On 09.12.25 10:59, Jan Beulich wrote:
> On 08.12.2025 20:21, Grygorii Strashko wrote:
>> On 08.12.25 14:44, Jan Beulich wrote:
>>> On 28.11.2025 16:22, Grygorii Strashko wrote:
>>>> --- a/xen/arch/x86/pv/domain.c
>>>> +++ b/xen/arch/x86/pv/domain.c
>>>> @@ -254,7 +254,11 @@ int switch_compat(struct domain *d)
>>>> goto undo_and_fail;
>>>> }
>>>>
>>>> - domain_set_alloc_bitsize(d);
>>>> + if ( MACH2PHYS_COMPAT_NR_ENTRIES(d) < max_page )
>>>
>>> You mention the change in condition in the revlog (but not in the description),
>>
>> The updated chunk was based on snippet from Andrew [1], which
>> used incorrect condition - I've changed it and noted in change log
>>
>> [1] https://patchwork.kernel.org/comment/26680551/
>>
>>> and I'm having trouble to follow why ...
>>>
>>>> --- a/xen/arch/x86/x86_64/mm.c
>>>> +++ b/xen/arch/x86/x86_64/mm.c
>>>> @@ -1119,26 +1119,6 @@ unmap:
>>>> return ret;
>>>> }
>>>>
>>>> -void domain_set_alloc_bitsize(struct domain *d)
>>>> -{
>>
>> The domain_set_alloc_bitsize() inlined in switch_compat() and x86 PV domain
>> always created as 64bit domain.
>>
>> At the beginning of switch_compat() there is:
>>
>> ( is_pv_32bit_domain(d) )
>> return 0;
>> [2]
>> above ensures that switch_compat() can be actually called only once (at least it can reach
>> point [2] only once, because there is no way to reset PV domain state 32bit->64bit
>>
>> this is original condition which says:
>>>> - if ( !is_pv_32bit_domain(d) ||
>>
>> do nothing if !is_pv_32bit_domain(d)
>> - for inlined code is_pv_32bit_domain(d) == true, so is_pv_32bit_domain(d) can be ignored
>>
>>>> - (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page) ||
>>
>> do nothing if (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page)
>> - inlinded code should proceed if this condition is opposite
>> (MACH2PHYS_COMPAT_NR_ENTRIES(d) < max_page)
>>
>>>> - d->arch.physaddr_bitsize > 0 )
>>
>> do nothing if d->arch.physaddr_bitsize > 0 (already set)
>> - inlined code will be executed only once, so (d->arch.physaddr_bitsize ==/!= 0)
>> can be ignored
>
> This is the crucial point: It being executed only once isn't spelled out
> anywhere in the description, and it's not entirely obvious why that would
> be. Yes, nowadays it is. Originally (iirc) one could switch the guest back
> to 64-bit mode, then again to 32-bit.
I'll update description.
Or can add it back as !d->arch.physaddr_bitsize to be safe and avoid
confusions?
>
>>> ... this 3rd part is going away.
>>
>> Another observation: MACH2PHYS_COMPAT_NR_ENTRIES(d) is looks like a const, as
>> (d)->arch.hv_compat_vstart is always 0.
>>
>> grep -rw hv_compat_vstart ./*
>> ./arch/x86/include/asm/config.h:#define HYPERVISOR_COMPAT_VIRT_START(d) ((d)->arch.hv_compat_vstart)
>> ./arch/x86/include/asm/domain.h: unsigned int hv_compat_vstart;
>
> This observation isn't directly related here, is it?
Yep. Just found it while investigated.
--
Best regards,
-grygorii
On 09/12/2025 12:24 pm, Grygorii Strashko wrote:
>
>
> On 09.12.25 10:59, Jan Beulich wrote:
>> On 08.12.2025 20:21, Grygorii Strashko wrote:
>>> On 08.12.25 14:44, Jan Beulich wrote:
>>>> On 28.11.2025 16:22, Grygorii Strashko wrote:
>>>>> --- a/xen/arch/x86/pv/domain.c
>>>>> +++ b/xen/arch/x86/pv/domain.c
>>>>> @@ -254,7 +254,11 @@ int switch_compat(struct domain *d)
>>>>> goto undo_and_fail;
>>>>> }
>>>>> - domain_set_alloc_bitsize(d);
>>>>> + if ( MACH2PHYS_COMPAT_NR_ENTRIES(d) < max_page )
>>>>
>>>> You mention the change in condition in the revlog (but not in the
>>>> description),
>>>
>>> The updated chunk was based on snippet from Andrew [1], which
>>> used incorrect condition - I've changed it and noted in change log
>>>
>>> [1] https://patchwork.kernel.org/comment/26680551/
>>>
>>>> and I'm having trouble to follow why ...
>>>>
>>>>> --- a/xen/arch/x86/x86_64/mm.c
>>>>> +++ b/xen/arch/x86/x86_64/mm.c
>>>>> @@ -1119,26 +1119,6 @@ unmap:
>>>>> return ret;
>>>>> }
>>>>> -void domain_set_alloc_bitsize(struct domain *d)
>>>>> -{
>>>
>>> The domain_set_alloc_bitsize() inlined in switch_compat() and x86
>>> PV domain
>>> always created as 64bit domain.
>>>
>>> At the beginning of switch_compat() there is:
>>>
>>> ( is_pv_32bit_domain(d) )
>>> return 0;
>>> [2]
>>> above ensures that switch_compat() can be actually called only once
>>> (at least it can reach
>>> point [2] only once, because there is no way to reset PV domain
>>> state 32bit->64bit
>>>
>>> this is original condition which says:
>>>>> - if ( !is_pv_32bit_domain(d) ||
>>>
>>> do nothing if !is_pv_32bit_domain(d)
>>> - for inlined code is_pv_32bit_domain(d) == true, so
>>> is_pv_32bit_domain(d) can be ignored
>>>
>>>>> - (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page) ||
>>>
>>> do nothing if (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page)
>>> - inlinded code should proceed if this condition is opposite
>>> (MACH2PHYS_COMPAT_NR_ENTRIES(d) < max_page)
>>>
>>>>> - d->arch.physaddr_bitsize > 0 )
>>>
>>> do nothing if d->arch.physaddr_bitsize > 0 (already set)
>>> - inlined code will be executed only once, so
>>> (d->arch.physaddr_bitsize ==/!= 0)
>>> can be ignored
>>
>> This is the crucial point: It being executed only once isn't spelled out
>> anywhere in the description, and it's not entirely obvious why that
>> would
>> be. Yes, nowadays it is. Originally (iirc) one could switch the guest
>> back
>> to 64-bit mode, then again to 32-bit.
I changed it in 02e78311cdc6
>
> I'll update description.
>
> Or can add it back as !d->arch.physaddr_bitsize to be safe and avoid
> confusions?
Please update the description. The function really is singleshot now.
I'd expect MC/DC coverage to complain at leaving the conditional in
place, not that this particular codepath is going to be on the top of
the list for coverage.
>
>>
>>>> ... this 3rd part is going away.
>>>
>>> Another observation: MACH2PHYS_COMPAT_NR_ENTRIES(d) is looks like a
>>> const, as
>>> (d)->arch.hv_compat_vstart is always 0.
>>>
>>> grep -rw hv_compat_vstart ./*
>>> ./arch/x86/include/asm/config.h:#define
>>> HYPERVISOR_COMPAT_VIRT_START(d) ((d)->arch.hv_compat_vstart)
>>> ./arch/x86/include/asm/domain.h: unsigned int hv_compat_vstart;
>>
>> This observation isn't directly related here, is it?
>
> Yep. Just found it while investigated.
>
Don't be deceived by that. pv's dom0_construct() has
HYPERVISOR_COMPAT_VIRT_START(d) =
max_t(unsigned int, m2p_compat_vstart, value);
which hides the write. I've been meaning to fix this for ages.
In fact, I could also submit the inlining of domain_set_alloc_bitsize()
too with relevant history if you'd prefer.
~Andrew
On 09.12.25 14:44, Andrew Cooper wrote:
> On 09/12/2025 12:24 pm, Grygorii Strashko wrote:
>>
>>
>> On 09.12.25 10:59, Jan Beulich wrote:
>>> On 08.12.2025 20:21, Grygorii Strashko wrote:
>>>> On 08.12.25 14:44, Jan Beulich wrote:
>>>>> On 28.11.2025 16:22, Grygorii Strashko wrote:
>>>>>> --- a/xen/arch/x86/pv/domain.c
>>>>>> +++ b/xen/arch/x86/pv/domain.c
>>>>>> @@ -254,7 +254,11 @@ int switch_compat(struct domain *d)
>>>>>> goto undo_and_fail;
>>>>>> }
>>>>>> - domain_set_alloc_bitsize(d);
>>>>>> + if ( MACH2PHYS_COMPAT_NR_ENTRIES(d) < max_page )
>>>>>
>>>>> You mention the change in condition in the revlog (but not in the
>>>>> description),
>>>>
>>>> The updated chunk was based on snippet from Andrew [1], which
>>>> used incorrect condition - I've changed it and noted in change log
>>>>
>>>> [1] https://patchwork.kernel.org/comment/26680551/
>>>>
>>>>> and I'm having trouble to follow why ...
>>>>>
>>>>>> --- a/xen/arch/x86/x86_64/mm.c
>>>>>> +++ b/xen/arch/x86/x86_64/mm.c
>>>>>> @@ -1119,26 +1119,6 @@ unmap:
>>>>>> return ret;
>>>>>> }
>>>>>> -void domain_set_alloc_bitsize(struct domain *d)
>>>>>> -{
>>>>
>>>> The domain_set_alloc_bitsize() inlined in switch_compat() and x86
>>>> PV domain
>>>> always created as 64bit domain.
>>>>
>>>> At the beginning of switch_compat() there is:
>>>>
>>>> ( is_pv_32bit_domain(d) )
>>>> return 0;
>>>> [2]
>>>> above ensures that switch_compat() can be actually called only once
>>>> (at least it can reach
>>>> point [2] only once, because there is no way to reset PV domain
>>>> state 32bit->64bit
>>>>
>>>> this is original condition which says:
>>>>>> - if ( !is_pv_32bit_domain(d) ||
>>>>
>>>> do nothing if !is_pv_32bit_domain(d)
>>>> - for inlined code is_pv_32bit_domain(d) == true, so
>>>> is_pv_32bit_domain(d) can be ignored
>>>>
>>>>>> - (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page) ||
>>>>
>>>> do nothing if (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page)
>>>> - inlinded code should proceed if this condition is opposite
>>>> (MACH2PHYS_COMPAT_NR_ENTRIES(d) < max_page)
>>>>
>>>>>> - d->arch.physaddr_bitsize > 0 )
>>>>
>>>> do nothing if d->arch.physaddr_bitsize > 0 (already set)
>>>> - inlined code will be executed only once, so
>>>> (d->arch.physaddr_bitsize ==/!= 0)
>>>> can be ignored
>>>
>>> This is the crucial point: It being executed only once isn't spelled out
>>> anywhere in the description, and it's not entirely obvious why that
>>> would
>>> be. Yes, nowadays it is. Originally (iirc) one could switch the guest
>>> back
>>> to 64-bit mode, then again to 32-bit.
>
> I changed it in 02e78311cdc6
>
>>
>> I'll update description.
>>
>> Or can add it back as !d->arch.physaddr_bitsize to be safe and avoid
>> confusions?
>
> Please update the description. The function really is singleshot now.
>
> I'd expect MC/DC coverage to complain at leaving the conditional in
> place, not that this particular codepath is going to be on the top of
> the list for coverage.
>
>>
>>>
>>>>> ... this 3rd part is going away.
>>>>
>>>> Another observation: MACH2PHYS_COMPAT_NR_ENTRIES(d) is looks like a
>>>> const, as
>>>> (d)->arch.hv_compat_vstart is always 0.
>>>>
>>>> grep -rw hv_compat_vstart ./*
>>>> ./arch/x86/include/asm/config.h:#define
>>>> HYPERVISOR_COMPAT_VIRT_START(d) ((d)->arch.hv_compat_vstart)
>>>> ./arch/x86/include/asm/domain.h: unsigned int hv_compat_vstart;
>>>
>>> This observation isn't directly related here, is it?
>>
>> Yep. Just found it while investigated.
>>
>
> Don't be deceived by that. pv's dom0_construct() has
>
> HYPERVISOR_COMPAT_VIRT_START(d) =
> max_t(unsigned int, m2p_compat_vstart, value);
>
> which hides the write. I've been meaning to fix this for ages.
>
> In fact, I could also submit the inlining of domain_set_alloc_bitsize()
> too with relevant history if you'd prefer.
Sorry, didn't get it - you wanna take ownership of the whole patch
or part of it?
In general, I'm ok.
In the later case I'll need to handle only domain_clamp_alloc_bitsize(), right?
--
Best regards,
-grygorii
On 09.12.2025 13:44, Andrew Cooper wrote:
> On 09/12/2025 12:24 pm, Grygorii Strashko wrote:
>> On 09.12.25 10:59, Jan Beulich wrote:
>>> On 08.12.2025 20:21, Grygorii Strashko wrote:
>>>> On 08.12.25 14:44, Jan Beulich wrote:
>>>>> On 28.11.2025 16:22, Grygorii Strashko wrote:
>>>>>> --- a/xen/arch/x86/pv/domain.c
>>>>>> +++ b/xen/arch/x86/pv/domain.c
>>>>>> @@ -254,7 +254,11 @@ int switch_compat(struct domain *d)
>>>>>> goto undo_and_fail;
>>>>>> }
>>>>>> - domain_set_alloc_bitsize(d);
>>>>>> + if ( MACH2PHYS_COMPAT_NR_ENTRIES(d) < max_page )
>>>>>
>>>>> You mention the change in condition in the revlog (but not in the
>>>>> description),
>>>>
>>>> The updated chunk was based on snippet from Andrew [1], which
>>>> used incorrect condition - I've changed it and noted in change log
>>>>
>>>> [1] https://patchwork.kernel.org/comment/26680551/
>>>>
>>>>> and I'm having trouble to follow why ...
>>>>>
>>>>>> --- a/xen/arch/x86/x86_64/mm.c
>>>>>> +++ b/xen/arch/x86/x86_64/mm.c
>>>>>> @@ -1119,26 +1119,6 @@ unmap:
>>>>>> return ret;
>>>>>> }
>>>>>> -void domain_set_alloc_bitsize(struct domain *d)
>>>>>> -{
>>>>
>>>> The domain_set_alloc_bitsize() inlined in switch_compat() and x86
>>>> PV domain
>>>> always created as 64bit domain.
>>>>
>>>> At the beginning of switch_compat() there is:
>>>>
>>>> ( is_pv_32bit_domain(d) )
>>>> return 0;
>>>> [2]
>>>> above ensures that switch_compat() can be actually called only once
>>>> (at least it can reach
>>>> point [2] only once, because there is no way to reset PV domain
>>>> state 32bit->64bit
>>>>
>>>> this is original condition which says:
>>>>>> - if ( !is_pv_32bit_domain(d) ||
>>>>
>>>> do nothing if !is_pv_32bit_domain(d)
>>>> - for inlined code is_pv_32bit_domain(d) == true, so
>>>> is_pv_32bit_domain(d) can be ignored
>>>>
>>>>>> - (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page) ||
>>>>
>>>> do nothing if (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page)
>>>> - inlinded code should proceed if this condition is opposite
>>>> (MACH2PHYS_COMPAT_NR_ENTRIES(d) < max_page)
>>>>
>>>>>> - d->arch.physaddr_bitsize > 0 )
>>>>
>>>> do nothing if d->arch.physaddr_bitsize > 0 (already set)
>>>> - inlined code will be executed only once, so
>>>> (d->arch.physaddr_bitsize ==/!= 0)
>>>> can be ignored
>>>
>>> This is the crucial point: It being executed only once isn't spelled out
>>> anywhere in the description, and it's not entirely obvious why that
>>> would
>>> be. Yes, nowadays it is. Originally (iirc) one could switch the guest
>>> back
>>> to 64-bit mode, then again to 32-bit.
>
> I changed it in 02e78311cdc6
>
>>
>> I'll update description.
>>
>> Or can add it back as !d->arch.physaddr_bitsize to be safe and avoid
>> confusions?
>
> Please update the description. The function really is singleshot now.
+1
Jan
© 2016 - 2025 Red Hat, Inc.