[PATCH v2] xen/x86: guest_access: optimize raw_x_guest() for PV and HVM combinations

Grygorii Strashko posted 1 patch 1 month, 1 week ago
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/20251106222630.2777457-1-grygorii._5Fstrashko@epam.com
There is a newer version of this series
xen/arch/x86/Makefile                   |  2 +-
xen/arch/x86/include/asm/guest_access.h | 78 ++++++++++++++++++-------
2 files changed, 59 insertions(+), 21 deletions(-)
[PATCH v2] xen/x86: guest_access: optimize raw_x_guest() for PV and HVM combinations
Posted by Grygorii Strashko 1 month, 1 week ago
From: Grygorii Strashko <grygorii_strashko@epam.com>

Xen uses below pattern for raw_x_guest() functions:

define raw_copy_to_guest(dst, src, len)        \
    (is_hvm_vcpu(current) ?                     \
     copy_to_user_hvm((dst), (src), (len)) :    \
     copy_to_guest_pv(dst, src, len))

This pattern works depending on CONFIG_PV/CONFIG_HVM as:
- PV=y and HVM=y
  Proper guest access function is selected depending on domain type.
- PV=y and HVM=n
  Only PV domains are possible. is_hvm_domain/vcpu() will constify to "false"
  and compiler will optimize code and skip HVM specific part.
- PV=n and HVM=y
  Only HVM domains are possible. is_hvm_domain/vcpu() will not be constified.
  No PV specific code will be optimized by compiler.
- PV=n and HVM=n
  No guests should possible. The code will still follow PV path.

Rework raw_x_guest() code to use static inline functions which account for
above PV/HVM possible configurations with main intention to optimize code
for (PV=n and HVM=y) case.

For the case (PV=n and HVM=n) return "len" value indicating a failure (no
guests should be possible in this case, which means no access to guest
memory should ever happen).

Finally build arch/x86/usercopy.c only for PV=y.

The measured (bloat-o-meter) improvement for (PV=n and HVM=y) case is:
  add/remove: 2/9 grow/shrink: 2/90 up/down: 1678/-32560 (-30882)
  Total: Before=1937092, After=1906210, chg -1.59%

Signed-off-by: Grygorii Strashko <grygorii_strashko@epam.com>
[teddy.astie@vates.tech: Suggested to use static inline functions vs macro combinations]
Suggested-by: Teddy Astie <teddy.astie@vates.tech>
---
changes in v2:
- use static inline functions instead of macro combinations

v1: https://patchwork.kernel.org/project/xen-devel/patch/20251031212058.1338332-1-grygorii_strashko@epam.com/

 xen/arch/x86/Makefile                   |  2 +-
 xen/arch/x86/include/asm/guest_access.h | 78 ++++++++++++++++++-------
 2 files changed, 59 insertions(+), 21 deletions(-)

diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index 407571c510e1..27f131ffeb61 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -71,7 +71,7 @@ obj-y += time.o
 obj-y += traps-setup.o
 obj-y += traps.o
 obj-$(CONFIG_INTEL) += tsx.o
-obj-y += usercopy.o
+obj-$(CONFIG_PV) += usercopy.o
 obj-y += x86_emulate.o
 obj-$(CONFIG_TBOOT) += tboot.o
 obj-y += hpet.o
diff --git a/xen/arch/x86/include/asm/guest_access.h b/xen/arch/x86/include/asm/guest_access.h
index 69716c8b41bb..576eac9722e6 100644
--- a/xen/arch/x86/include/asm/guest_access.h
+++ b/xen/arch/x86/include/asm/guest_access.h
@@ -13,26 +13,64 @@
 #include <asm/hvm/guest_access.h>
 
 /* Raw access functions: no type checking. */
-#define raw_copy_to_guest(dst, src, len)        \
-    (is_hvm_vcpu(current) ?                     \
-     copy_to_user_hvm((dst), (src), (len)) :    \
-     copy_to_guest_pv(dst, src, len))
-#define raw_copy_from_guest(dst, src, len)      \
-    (is_hvm_vcpu(current) ?                     \
-     copy_from_user_hvm((dst), (src), (len)) :  \
-     copy_from_guest_pv(dst, src, len))
-#define raw_clear_guest(dst,  len)              \
-    (is_hvm_vcpu(current) ?                     \
-     clear_user_hvm((dst), (len)) :             \
-     clear_guest_pv(dst, len))
-#define __raw_copy_to_guest(dst, src, len)      \
-    (is_hvm_vcpu(current) ?                     \
-     copy_to_user_hvm((dst), (src), (len)) :    \
-     __copy_to_guest_pv(dst, src, len))
-#define __raw_copy_from_guest(dst, src, len)    \
-    (is_hvm_vcpu(current) ?                     \
-     copy_from_user_hvm((dst), (src), (len)) :  \
-     __copy_from_guest_pv(dst, src, len))
+static inline unsigned int raw_copy_to_guest(void *to, const void *src,
+                                             unsigned int len)
+{
+    if ( IS_ENABLED(CONFIG_HVM) &&
+         (!IS_ENABLED(CONFIG_PV) || is_hvm_vcpu(current)) )
+        return copy_to_user_hvm(to, src, len);
+    else if ( IS_ENABLED(CONFIG_PV) )
+        return copy_to_guest_pv(to, src, len);
+    else
+        return len;
+}
+
+static inline unsigned int raw_copy_from_guest(void *dst, const void *src,
+                                               unsigned int len)
+{
+    if ( IS_ENABLED(CONFIG_HVM) &&
+         (!IS_ENABLED(CONFIG_PV) || is_hvm_vcpu(current)) )
+        return copy_from_user_hvm(dst, src, len);
+    else if ( IS_ENABLED(CONFIG_PV) )
+        return copy_from_guest_pv(dst, src, len);
+    else
+        return len;
+}
+
+static inline unsigned int raw_clear_guest(void *dst, unsigned int len)
+{
+    if ( IS_ENABLED(CONFIG_HVM) &&
+         (!IS_ENABLED(CONFIG_PV) || is_hvm_vcpu(current)) )
+        return clear_user_hvm(dst, len);
+    else if ( IS_ENABLED(CONFIG_PV) )
+        return clear_guest_pv(dst, len);
+    else
+        return len;
+}
+
+static inline unsigned int __raw_copy_to_guest(void *dst, const void *src,
+                                               unsigned int len)
+{
+    if ( IS_ENABLED(CONFIG_HVM) &&
+         (!IS_ENABLED(CONFIG_PV) || is_hvm_vcpu(current)) )
+        return copy_to_user_hvm(dst, src, len);
+    else if ( IS_ENABLED(CONFIG_PV) )
+        return __copy_to_guest_pv(dst, src, len);
+    else
+        return len;
+}
+
+static inline unsigned int __raw_copy_from_guest(void *dst, const void *src,
+                                                 unsigned int len)
+{
+    if ( IS_ENABLED(CONFIG_HVM) &&
+         (!IS_ENABLED(CONFIG_PV) || is_hvm_vcpu(current)) )
+        return copy_from_user_hvm(dst, src, len);
+    else if ( IS_ENABLED(CONFIG_PV) )
+        return __copy_from_guest_pv(dst, src, len);
+    else
+        return len;
+}
 
 /*
  * Pre-validate a guest handle.
-- 
2.34.1
Re: [PATCH v2] xen/x86: guest_access: optimize raw_x_guest() for PV and HVM combinations
Posted by Jan Beulich 1 month, 1 week ago
On 06.11.2025 23:26, Grygorii Strashko wrote:
> From: Grygorii Strashko <grygorii_strashko@epam.com>
> 
> Xen uses below pattern for raw_x_guest() functions:
> 
> define raw_copy_to_guest(dst, src, len)        \
>     (is_hvm_vcpu(current) ?                     \
>      copy_to_user_hvm((dst), (src), (len)) :    \
>      copy_to_guest_pv(dst, src, len))
> 
> This pattern works depending on CONFIG_PV/CONFIG_HVM as:
> - PV=y and HVM=y
>   Proper guest access function is selected depending on domain type.
> - PV=y and HVM=n
>   Only PV domains are possible. is_hvm_domain/vcpu() will constify to "false"
>   and compiler will optimize code and skip HVM specific part.
> - PV=n and HVM=y
>   Only HVM domains are possible. is_hvm_domain/vcpu() will not be constified.
>   No PV specific code will be optimized by compiler.
> - PV=n and HVM=n
>   No guests should possible. The code will still follow PV path.
> 
> Rework raw_x_guest() code to use static inline functions which account for
> above PV/HVM possible configurations with main intention to optimize code
> for (PV=n and HVM=y) case.
> 
> For the case (PV=n and HVM=n) return "len" value indicating a failure (no
> guests should be possible in this case, which means no access to guest
> memory should ever happen).
> 
> Finally build arch/x86/usercopy.c only for PV=y.
> 
> The measured (bloat-o-meter) improvement for (PV=n and HVM=y) case is:
>   add/remove: 2/9 grow/shrink: 2/90 up/down: 1678/-32560 (-30882)
>   Total: Before=1937092, After=1906210, chg -1.59%
> 
> Signed-off-by: Grygorii Strashko <grygorii_strashko@epam.com>
> [teddy.astie@vates.tech: Suggested to use static inline functions vs macro combinations]
> Suggested-by: Teddy Astie <teddy.astie@vates.tech>

Just one formal request for now: Please send patches To: the list, with individuals
on Cc: as necessary.

Jan
Re: [PATCH v2] xen/x86: guest_access: optimize raw_x_guest() for PV and HVM combinations
Posted by Grygorii Strashko 1 month, 1 week ago

On 07.11.25 08:52, Jan Beulich wrote:
> On 06.11.2025 23:26, Grygorii Strashko wrote:
>> From: Grygorii Strashko <grygorii_strashko@epam.com>
>>
>> Xen uses below pattern for raw_x_guest() functions:
>>
>> define raw_copy_to_guest(dst, src, len)        \
>>      (is_hvm_vcpu(current) ?                     \
>>       copy_to_user_hvm((dst), (src), (len)) :    \
>>       copy_to_guest_pv(dst, src, len))
>>
>> This pattern works depending on CONFIG_PV/CONFIG_HVM as:
>> - PV=y and HVM=y
>>    Proper guest access function is selected depending on domain type.
>> - PV=y and HVM=n
>>    Only PV domains are possible. is_hvm_domain/vcpu() will constify to "false"
>>    and compiler will optimize code and skip HVM specific part.
>> - PV=n and HVM=y
>>    Only HVM domains are possible. is_hvm_domain/vcpu() will not be constified.
>>    No PV specific code will be optimized by compiler.
>> - PV=n and HVM=n
>>    No guests should possible. The code will still follow PV path.
>>
>> Rework raw_x_guest() code to use static inline functions which account for
>> above PV/HVM possible configurations with main intention to optimize code
>> for (PV=n and HVM=y) case.
>>
>> For the case (PV=n and HVM=n) return "len" value indicating a failure (no
>> guests should be possible in this case, which means no access to guest
>> memory should ever happen).
>>
>> Finally build arch/x86/usercopy.c only for PV=y.
>>
>> The measured (bloat-o-meter) improvement for (PV=n and HVM=y) case is:
>>    add/remove: 2/9 grow/shrink: 2/90 up/down: 1678/-32560 (-30882)
>>    Total: Before=1937092, After=1906210, chg -1.59%
>>
>> Signed-off-by: Grygorii Strashko <grygorii_strashko@epam.com>
>> [teddy.astie@vates.tech: Suggested to use static inline functions vs macro combinations]
>> Suggested-by: Teddy Astie <teddy.astie@vates.tech>
> 
> Just one formal request for now: Please send patches To: the list, with individuals
> on Cc: as necessary.

Sure, sorry.

-- 
Best regards,
-grygorii
Re: [PATCH v2] xen/x86: guest_access: optimize raw_x_guest() for PV and HVM combinations
Posted by Jason Andryuk 1 month, 1 week ago

On 2025-11-06 17:26, Grygorii Strashko wrote:
> From: Grygorii Strashko <grygorii_strashko@epam.com>
> 
> Xen uses below pattern for raw_x_guest() functions:
> 
> define raw_copy_to_guest(dst, src, len)        \
>      (is_hvm_vcpu(current) ?                     \
>       copy_to_user_hvm((dst), (src), (len)) :    \
>       copy_to_guest_pv(dst, src, len))
> 
> This pattern works depending on CONFIG_PV/CONFIG_HVM as:
> - PV=y and HVM=y
>    Proper guest access function is selected depending on domain type.
> - PV=y and HVM=n
>    Only PV domains are possible. is_hvm_domain/vcpu() will constify to "false"
>    and compiler will optimize code and skip HVM specific part.
> - PV=n and HVM=y
>    Only HVM domains are possible. is_hvm_domain/vcpu() will not be constified.
>    No PV specific code will be optimized by compiler.
> - PV=n and HVM=n
>    No guests should possible. The code will still follow PV path.
> 
> Rework raw_x_guest() code to use static inline functions which account for
> above PV/HVM possible configurations with main intention to optimize code
> for (PV=n and HVM=y) case.
> 
> For the case (PV=n and HVM=n) return "len" value indicating a failure (no
> guests should be possible in this case, which means no access to guest
> memory should ever happen).
> 
> Finally build arch/x86/usercopy.c only for PV=y.
> 
> The measured (bloat-o-meter) improvement for (PV=n and HVM=y) case is:
>    add/remove: 2/9 grow/shrink: 2/90 up/down: 1678/-32560 (-30882)
>    Total: Before=1937092, After=1906210, chg -1.59%
> 
> Signed-off-by: Grygorii Strashko <grygorii_strashko@epam.com>
> [teddy.astie@vates.tech: Suggested to use static inline functions vs macro combinations]
> Suggested-by: Teddy Astie <teddy.astie@vates.tech>

I think Teddy's goes before your SoB.

> ---

> diff --git a/xen/arch/x86/include/asm/guest_access.h b/xen/arch/x86/include/asm/guest_access.h
> index 69716c8b41bb..576eac9722e6 100644
> --- a/xen/arch/x86/include/asm/guest_access.h
> +++ b/xen/arch/x86/include/asm/guest_access.h
> @@ -13,26 +13,64 @@
>   #include <asm/hvm/guest_access.h>
>   
>   /* Raw access functions: no type checking. */
> -#define raw_copy_to_guest(dst, src, len)        \
> -    (is_hvm_vcpu(current) ?                     \
> -     copy_to_user_hvm((dst), (src), (len)) :    \
> -     copy_to_guest_pv(dst, src, len))
> -#define raw_copy_from_guest(dst, src, len)      \
> -    (is_hvm_vcpu(current) ?                     \
> -     copy_from_user_hvm((dst), (src), (len)) :  \
> -     copy_from_guest_pv(dst, src, len))
> -#define raw_clear_guest(dst,  len)              \
> -    (is_hvm_vcpu(current) ?                     \
> -     clear_user_hvm((dst), (len)) :             \
> -     clear_guest_pv(dst, len))
> -#define __raw_copy_to_guest(dst, src, len)      \
> -    (is_hvm_vcpu(current) ?                     \
> -     copy_to_user_hvm((dst), (src), (len)) :    \
> -     __copy_to_guest_pv(dst, src, len))
> -#define __raw_copy_from_guest(dst, src, len)    \
> -    (is_hvm_vcpu(current) ?                     \
> -     copy_from_user_hvm((dst), (src), (len)) :  \
> -     __copy_from_guest_pv(dst, src, len))
> +static inline unsigned int raw_copy_to_guest(void *to, const void *src,

Maybe s/to/dst/ to keep this consistent with the rest?

> +                                             unsigned int len)
> +{
> +    if ( IS_ENABLED(CONFIG_HVM) &&
> +         (!IS_ENABLED(CONFIG_PV) || is_hvm_vcpu(current)) )

Since this is repeated, maybe put into a helper like 
use_hvm_access(current)?

Thanks,
Jason