xen/arm64: Remove vreg_emulate_sysreg32

[PATCH v2] xen/arm64: Remove vreg_emulate_sysreg32

Posted by Michal Orzel 4 years, 6 months ago

According to ARMv8A architecture, AArch64 registers
are 64bit wide even though in many cases the upper
32bit is reserved. Therefore there is no need for
function vreg_emulate_sysreg32 on arm64. This means
that we can have just one function vreg_emulate_sysreg
using new function pointer:
typedef bool (*vreg_reg_fn_t)(struct cpu_user_regs *regs,
                              register_t *r, bool read);

Modify vreg_emulate_cp32 to use the new function pointer
as well.

This change allows to properly use 64bit registers in AArch64
state and in case of AArch32 the upper 32 bits of AArch64
registers are inaccessible and are ignored(D1.20.1 ARM DDI 0487A.j).

Signed-off-by: Michal Orzel <michal.orzel@arm.com>
---
 xen/arch/arm/arm64/vsysreg.c    |  2 +-
 xen/arch/arm/vcpreg.c           | 16 ++++++++++----
 xen/arch/arm/vgic-v3.c          |  2 +-
 xen/arch/arm/vtimer.c           | 11 +++++-----
 xen/include/asm-arm/processor.h |  4 ++--
 xen/include/asm-arm/vreg.h      | 38 ++++++---------------------------
 6 files changed, 29 insertions(+), 44 deletions(-)

diff --git a/xen/arch/arm/arm64/vsysreg.c b/xen/arch/arm/arm64/vsysreg.c
index caf17174b8..73fa2ca9ae 100644
--- a/xen/arch/arm/arm64/vsysreg.c
+++ b/xen/arch/arm/arm64/vsysreg.c
@@ -64,7 +64,7 @@ TVM_REG(CONTEXTIDR_EL1)
     {                                                                   \
         bool res;                                                       \
                                                                         \
-        res = vreg_emulate_sysreg64(regs, hsr, vreg_emulate_##reg);     \
+        res = vreg_emulate_sysreg(regs, hsr, vreg_emulate_##reg);       \
         ASSERT(res);                                                    \
         break;                                                          \
     }
diff --git a/xen/arch/arm/vcpreg.c b/xen/arch/arm/vcpreg.c
index e3ce56d875..be1ec08159 100644
--- a/xen/arch/arm/vcpreg.c
+++ b/xen/arch/arm/vcpreg.c
@@ -57,9 +57,17 @@
 #define WRITE_SYSREG_SZ(sz, val, sysreg...)  WRITE_SYSREG##sz(val, sysreg)
 #endif
 
+/*
+ * type32_t is defined as register_t due to the vreg_emulate_cp32 and
+ * vreg_emulate_sysreg taking function pointer with register_t type used for
+ * passing register's value.
+ */
+typedef register_t type32_t;
+typedef uint64_t type64_t ;
+
 /* The name is passed from the upper macro to workaround macro expansion. */
 #define TVM_REG(sz, func, reg...)                                           \
-static bool func(struct cpu_user_regs *regs, uint##sz##_t *r, bool read)    \
+static bool func(struct cpu_user_regs *regs, type##sz##_t *r, bool read)    \
 {                                                                           \
     struct vcpu *v = current;                                               \
     bool cache_enabled = vcpu_has_cache_enabled(v);                         \
@@ -83,7 +91,7 @@ static bool func(struct cpu_user_regs *regs, uint##sz##_t *r, bool read)    \
 
 #else /* CONFIG_ARM_64 */
 #define TVM_REG32_COMBINED(lowreg, hireg, xreg)                             \
-static bool vreg_emulate_##xreg(struct cpu_user_regs *regs, uint32_t *r,    \
+static bool vreg_emulate_##xreg(struct cpu_user_regs *regs, register_t *r,  \
                                 bool read, bool hi)                         \
 {                                                                           \
     struct vcpu *v = current;                                               \
@@ -108,13 +116,13 @@ static bool vreg_emulate_##xreg(struct cpu_user_regs *regs, uint32_t *r,    \
     return true;                                                            \
 }                                                                           \
                                                                             \
-static bool vreg_emulate_##lowreg(struct cpu_user_regs *regs, uint32_t *r,  \
+static bool vreg_emulate_##lowreg(struct cpu_user_regs *regs, register_t *r,\
                                   bool read)                                \
 {                                                                           \
     return vreg_emulate_##xreg(regs, r, read, false);                       \
 }                                                                           \
                                                                             \
-static bool vreg_emulate_##hireg(struct cpu_user_regs *regs, uint32_t *r,   \
+static bool vreg_emulate_##hireg(struct cpu_user_regs *regs, register_t *r, \
                                  bool read)                                 \
 {                                                                           \
     return vreg_emulate_##xreg(regs, r, read, true);                        \
diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
index 613f37abab..cb5a70c42e 100644
--- a/xen/arch/arm/vgic-v3.c
+++ b/xen/arch/arm/vgic-v3.c
@@ -1531,7 +1531,7 @@ static bool vgic_v3_emulate_sysreg(struct cpu_user_regs *regs, union hsr hsr)
     switch ( hsr.bits & HSR_SYSREG_REGS_MASK )
     {
     case HSR_SYSREG_ICC_SGI1R_EL1:
-        return vreg_emulate_sysreg64(regs, hsr, vgic_v3_emulate_sgi1r);
+        return vreg_emulate_sysreg(regs, hsr, vgic_v3_emulate_sgi1r);
 
     default:
         return false;
diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
index 167fc6127a..0196951af4 100644
--- a/xen/arch/arm/vtimer.c
+++ b/xen/arch/arm/vtimer.c
@@ -162,7 +162,8 @@ void virt_timer_restore(struct vcpu *v)
     WRITE_SYSREG(v->arch.virt_timer.ctl, CNTV_CTL_EL0);
 }
 
-static bool vtimer_cntp_ctl(struct cpu_user_regs *regs, uint32_t *r, bool read)
+static bool vtimer_cntp_ctl(struct cpu_user_regs *regs, register_t *r,
+                            bool read)
 {
     struct vcpu *v = current;
     s_time_t expires;
@@ -197,7 +198,7 @@ static bool vtimer_cntp_ctl(struct cpu_user_regs *regs, uint32_t *r, bool read)
     return true;
 }
 
-static bool vtimer_cntp_tval(struct cpu_user_regs *regs, uint32_t *r,
+static bool vtimer_cntp_tval(struct cpu_user_regs *regs, register_t *r,
                              bool read)
 {
     struct vcpu *v = current;
@@ -316,11 +317,11 @@ static bool vtimer_emulate_sysreg(struct cpu_user_regs *regs, union hsr hsr)
     switch ( hsr.bits & HSR_SYSREG_REGS_MASK )
     {
     case HSR_SYSREG_CNTP_CTL_EL0:
-        return vreg_emulate_sysreg32(regs, hsr, vtimer_cntp_ctl);
+        return vreg_emulate_sysreg(regs, hsr, vtimer_cntp_ctl);
     case HSR_SYSREG_CNTP_TVAL_EL0:
-        return vreg_emulate_sysreg32(regs, hsr, vtimer_cntp_tval);
+        return vreg_emulate_sysreg(regs, hsr, vtimer_cntp_tval);
     case HSR_SYSREG_CNTP_CVAL_EL0:
-        return vreg_emulate_sysreg64(regs, hsr, vtimer_cntp_cval);
+        return vreg_emulate_sysreg(regs, hsr, vtimer_cntp_cval);
 
     default:
         return false;
diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
index 2577e9a244..2058b69447 100644
--- a/xen/include/asm-arm/processor.h
+++ b/xen/include/asm-arm/processor.h
@@ -484,9 +484,9 @@ extern register_t __cpu_logical_map[];
 #define CNTKCTL_EL1_EL0PTEN  (1u<<9) /* Expose phys timer registers to EL0 */
 
 /* Timer control registers */
-#define CNTx_CTL_ENABLE   (1u<<0)  /* Enable timer */
+#define CNTx_CTL_ENABLE   (1ul<<0)  /* Enable timer */
 #define CNTx_CTL_MASK     (1ul<<1)  /* Mask IRQ */
-#define CNTx_CTL_PENDING  (1u<<2)  /* IRQ pending */
+#define CNTx_CTL_PENDING  (1ul<<2)  /* IRQ pending */
 
 /* Timer frequency mask */
 #define CNTFRQ_MASK       GENMASK(31, 0)
diff --git a/xen/include/asm-arm/vreg.h b/xen/include/asm-arm/vreg.h
index 1253753833..fa2f4cdb17 100644
--- a/xen/include/asm-arm/vreg.h
+++ b/xen/include/asm-arm/vreg.h
@@ -4,13 +4,13 @@
 #ifndef __ASM_ARM_VREG__
 #define __ASM_ARM_VREG__
 
-typedef bool (*vreg_reg32_fn_t)(struct cpu_user_regs *regs, uint32_t *r,
-                                   bool read);
 typedef bool (*vreg_reg64_fn_t)(struct cpu_user_regs *regs, uint64_t *r,
                                    bool read);
+typedef bool (*vreg_reg_fn_t)(struct cpu_user_regs *regs, register_t *r,
+                                   bool read);
 
 static inline bool vreg_emulate_cp32(struct cpu_user_regs *regs, union hsr hsr,
-                                     vreg_reg32_fn_t fn)
+                                     vreg_reg_fn_t fn)
 {
     struct hsr_cp32 cp32 = hsr.cp32;
     /*
@@ -18,7 +18,7 @@ static inline bool vreg_emulate_cp32(struct cpu_user_regs *regs, union hsr hsr,
      * implementation error in the emulation (such as not correctly
      * setting r).
      */
-    uint32_t r = 0;
+    register_t r = 0;
     bool ret;
 
     if ( !cp32.read )
@@ -64,11 +64,11 @@ static inline bool vreg_emulate_cp64(struct cpu_user_regs *regs, union hsr hsr,
 }
 
 #ifdef CONFIG_ARM_64
-static inline bool vreg_emulate_sysreg32(struct cpu_user_regs *regs, union hsr hsr,
-                                         vreg_reg32_fn_t fn)
+static inline bool vreg_emulate_sysreg(struct cpu_user_regs *regs, union hsr hsr,
+                                         vreg_reg_fn_t fn)
 {
     struct hsr_sysreg sysreg = hsr.sysreg;
-    uint32_t r = 0;
+    register_t r = 0;
     bool ret;
 
     if ( !sysreg.read )
@@ -81,30 +81,6 @@ static inline bool vreg_emulate_sysreg32(struct cpu_user_regs *regs, union hsr h
 
     return ret;
 }
-
-static inline bool vreg_emulate_sysreg64(struct cpu_user_regs *regs, union hsr hsr,
-                                         vreg_reg64_fn_t fn)
-{
-    struct hsr_sysreg sysreg = hsr.sysreg;
-    /*
-     * Initialize to zero to avoid leaking data if there is an
-     * implementation error in the emulation (such as not correctly
-     * setting x).
-     */
-    uint64_t x = 0;
-    bool ret;
-
-    if ( !sysreg.read )
-        x = get_user_reg(regs, sysreg.reg);
-
-    ret = fn(regs, &x, sysreg.read);
-
-    if ( ret && sysreg.read )
-        set_user_reg(regs, sysreg.reg, x);
-
-    return ret;
-}
-
 #endif
 
 #define VREG_REG_MASK(size) ((~0UL) >> (BITS_PER_LONG - ((1 << (size)) * 8)))
-- 
2.29.0

Re: [PATCH v2] xen/arm64: Remove vreg_emulate_sysreg32

Posted by Julien Grall 4 years, 5 months ago

Hi Michal,

On 29/07/2021 11:42, Michal Orzel wrote:
> According to ARMv8A architecture, AArch64 registers
> are 64bit wide even though in many cases the upper
> 32bit is reserved. Therefore there is no need for
> function vreg_emulate_sysreg32 on arm64. This means
> that we can have just one function vreg_emulate_sysreg
> using new function pointer:
> typedef bool (*vreg_reg_fn_t)(struct cpu_user_regs *regs,
>                                register_t *r, bool read);
> 
> Modify vreg_emulate_cp32 to use the new function pointer
> as well.
> 
> This change allows to properly use 64bit registers in AArch64
> state and in case of AArch32 the upper 32 bits of AArch64
> registers are inaccessible and are ignored(D1.20.1 ARM DDI 0487A.j).
> 
> Signed-off-by: Michal Orzel <michal.orzel@arm.com>
> ---
>   xen/arch/arm/arm64/vsysreg.c    |  2 +-
>   xen/arch/arm/vcpreg.c           | 16 ++++++++++----
>   xen/arch/arm/vgic-v3.c          |  2 +-
>   xen/arch/arm/vtimer.c           | 11 +++++-----
>   xen/include/asm-arm/processor.h |  4 ++--
>   xen/include/asm-arm/vreg.h      | 38 ++++++---------------------------
>   6 files changed, 29 insertions(+), 44 deletions(-)
> 
> diff --git a/xen/arch/arm/arm64/vsysreg.c b/xen/arch/arm/arm64/vsysreg.c
> index caf17174b8..73fa2ca9ae 100644
> --- a/xen/arch/arm/arm64/vsysreg.c
> +++ b/xen/arch/arm/arm64/vsysreg.c
> @@ -64,7 +64,7 @@ TVM_REG(CONTEXTIDR_EL1)
>       {                                                                   \
>           bool res;                                                       \
>                                                                           \
> -        res = vreg_emulate_sysreg64(regs, hsr, vreg_emulate_##reg);     \
> +        res = vreg_emulate_sysreg(regs, hsr, vreg_emulate_##reg);       \
>           ASSERT(res);                                                    \
>           break;                                                          \
>       }
> diff --git a/xen/arch/arm/vcpreg.c b/xen/arch/arm/vcpreg.c
> index e3ce56d875..be1ec08159 100644
> --- a/xen/arch/arm/vcpreg.c
> +++ b/xen/arch/arm/vcpreg.c
> @@ -57,9 +57,17 @@
>   #define WRITE_SYSREG_SZ(sz, val, sysreg...)  WRITE_SYSREG##sz(val, sysreg)
>   #endif
>   
> +/*
> + * type32_t is defined as register_t due to the vreg_emulate_cp32 and
> + * vreg_emulate_sysreg taking function pointer with register_t type used for
> + * passing register's value.
> + */
> +typedef register_t type32_t;
> +typedef uint64_t type64_t ;

NIT: spurious space before ;.

> +
>   /* The name is passed from the upper macro to workaround macro expansion. */
>   #define TVM_REG(sz, func, reg...)                                           \
> -static bool func(struct cpu_user_regs *regs, uint##sz##_t *r, bool read)    \
> +static bool func(struct cpu_user_regs *regs, type##sz##_t *r, bool read)    \
>   {                                                                           \
>       struct vcpu *v = current;                                               \
>       bool cache_enabled = vcpu_has_cache_enabled(v);                         \
> @@ -83,7 +91,7 @@ static bool func(struct cpu_user_regs *regs, uint##sz##_t *r, bool read)    \
>   
>   #else /* CONFIG_ARM_64 */
>   #define TVM_REG32_COMBINED(lowreg, hireg, xreg)                             \
> -static bool vreg_emulate_##xreg(struct cpu_user_regs *regs, uint32_t *r,    \
> +static bool vreg_emulate_##xreg(struct cpu_user_regs *regs, register_t *r,  \
>                                   bool read, bool hi)                         \
>   {                                                                           \
>       struct vcpu *v = current;                                               \
> @@ -108,13 +116,13 @@ static bool vreg_emulate_##xreg(struct cpu_user_regs *regs, uint32_t *r,    \
>       return true;                                                            \
>   }                                                                           \
>                                                                               \
> -static bool vreg_emulate_##lowreg(struct cpu_user_regs *regs, uint32_t *r,  \
> +static bool vreg_emulate_##lowreg(struct cpu_user_regs *regs, register_t *r,\
>                                     bool read)                                \
>   {                                                                           \
>       return vreg_emulate_##xreg(regs, r, read, false);                       \
>   }                                                                           \
>                                                                               \
> -static bool vreg_emulate_##hireg(struct cpu_user_regs *regs, uint32_t *r,   \
> +static bool vreg_emulate_##hireg(struct cpu_user_regs *regs, register_t *r, \
>                                    bool read)                                 \
>   {                                                                           \
>       return vreg_emulate_##xreg(regs, r, read, true);                        \
> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
> index 613f37abab..cb5a70c42e 100644
> --- a/xen/arch/arm/vgic-v3.c
> +++ b/xen/arch/arm/vgic-v3.c
> @@ -1531,7 +1531,7 @@ static bool vgic_v3_emulate_sysreg(struct cpu_user_regs *regs, union hsr hsr)
>       switch ( hsr.bits & HSR_SYSREG_REGS_MASK )
>       {
>       case HSR_SYSREG_ICC_SGI1R_EL1:
> -        return vreg_emulate_sysreg64(regs, hsr, vgic_v3_emulate_sgi1r);
> +        return vreg_emulate_sysreg(regs, hsr, vgic_v3_emulate_sgi1r);
>   
>       default:
>           return false;
> diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
> index 167fc6127a..0196951af4 100644
> --- a/xen/arch/arm/vtimer.c
> +++ b/xen/arch/arm/vtimer.c
> @@ -162,7 +162,8 @@ void virt_timer_restore(struct vcpu *v)
>       WRITE_SYSREG(v->arch.virt_timer.ctl, CNTV_CTL_EL0);
>   }
>   
> -static bool vtimer_cntp_ctl(struct cpu_user_regs *regs, uint32_t *r, bool read)
> +static bool vtimer_cntp_ctl(struct cpu_user_regs *regs, register_t *r,
> +                            bool read)
>   {
>       struct vcpu *v = current;
>       s_time_t expires;
> @@ -197,7 +198,7 @@ static bool vtimer_cntp_ctl(struct cpu_user_regs *regs, uint32_t *r, bool read)
>       return true;
>   }
>   
> -static bool vtimer_cntp_tval(struct cpu_user_regs *regs, uint32_t *r,
> +static bool vtimer_cntp_tval(struct cpu_user_regs *regs, register_t *r,
>                                bool read)
>   {
>       struct vcpu *v = current;
> @@ -316,11 +317,11 @@ static bool vtimer_emulate_sysreg(struct cpu_user_regs *regs, union hsr hsr)
>       switch ( hsr.bits & HSR_SYSREG_REGS_MASK )
>       {
>       case HSR_SYSREG_CNTP_CTL_EL0:
> -        return vreg_emulate_sysreg32(regs, hsr, vtimer_cntp_ctl);
> +        return vreg_emulate_sysreg(regs, hsr, vtimer_cntp_ctl);
>       case HSR_SYSREG_CNTP_TVAL_EL0:
> -        return vreg_emulate_sysreg32(regs, hsr, vtimer_cntp_tval);
> +        return vreg_emulate_sysreg(regs, hsr, vtimer_cntp_tval);
>       case HSR_SYSREG_CNTP_CVAL_EL0:
> -        return vreg_emulate_sysreg64(regs, hsr, vtimer_cntp_cval);
> +        return vreg_emulate_sysreg(regs, hsr, vtimer_cntp_cval);
>   
>       default:
>           return false;
> diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
> index 2577e9a244..2058b69447 100644
> --- a/xen/include/asm-arm/processor.h
> +++ b/xen/include/asm-arm/processor.h
> @@ -484,9 +484,9 @@ extern register_t __cpu_logical_map[];
>   #define CNTKCTL_EL1_EL0PTEN  (1u<<9) /* Expose phys timer registers to EL0 */
>   
>   /* Timer control registers */
> -#define CNTx_CTL_ENABLE   (1u<<0)  /* Enable timer */
> +#define CNTx_CTL_ENABLE   (1ul<<0)  /* Enable timer */
>   #define CNTx_CTL_MASK     (1ul<<1)  /* Mask IRQ */
> -#define CNTx_CTL_PENDING  (1u<<2)  /* IRQ pending */
> +#define CNTx_CTL_PENDING  (1ul<<2)  /* IRQ pending */
I would suggest to mention in the commit message why this is necessary. 
AFAICT, it is not strictly necessary because you left ctl defined as a 
uin32_t. So I am guessing you only keep it for hardening purpose?

If so how about adding:

"Take the opportunity to switch CNTx_CTL_* to use UL to avoid any 
surprise with the negation of any bits (as used in vtimer_cntp_ctl)".

The rest of the patch looks fine. So I would be happy to deal with the 
fixes on commit:

Reviewed-by: Julien Grall <jgrall@amazon.com>

Cheers,

-- 
Julien Grall

Re: [PATCH v2] xen/arm64: Remove vreg_emulate_sysreg32

Posted by Michal Orzel 4 years, 5 months ago

Hi Julien,

On 06.09.2021 11:07, Julien Grall wrote:
> Hi Michal,
> 
> On 29/07/2021 11:42, Michal Orzel wrote:
>> According to ARMv8A architecture, AArch64 registers
>> are 64bit wide even though in many cases the upper
>> 32bit is reserved. Therefore there is no need for
>> function vreg_emulate_sysreg32 on arm64. This means
>> that we can have just one function vreg_emulate_sysreg
>> using new function pointer:
>> typedef bool (*vreg_reg_fn_t)(struct cpu_user_regs *regs,
>>                                register_t *r, bool read);
>>
>> Modify vreg_emulate_cp32 to use the new function pointer
>> as well.
>>
>> This change allows to properly use 64bit registers in AArch64
>> state and in case of AArch32 the upper 32 bits of AArch64
>> registers are inaccessible and are ignored(D1.20.1 ARM DDI 0487A.j).
>>
>> Signed-off-by: Michal Orzel <michal.orzel@arm.com>
>> ---
>>   xen/arch/arm/arm64/vsysreg.c    |  2 +-
>>   xen/arch/arm/vcpreg.c           | 16 ++++++++++----
>>   xen/arch/arm/vgic-v3.c          |  2 +-
>>   xen/arch/arm/vtimer.c           | 11 +++++-----
>>   xen/include/asm-arm/processor.h |  4 ++--
>>   xen/include/asm-arm/vreg.h      | 38 ++++++---------------------------
>>   6 files changed, 29 insertions(+), 44 deletions(-)
>>
>> diff --git a/xen/arch/arm/arm64/vsysreg.c b/xen/arch/arm/arm64/vsysreg.c
>> index caf17174b8..73fa2ca9ae 100644
>> --- a/xen/arch/arm/arm64/vsysreg.c
>> +++ b/xen/arch/arm/arm64/vsysreg.c
>> @@ -64,7 +64,7 @@ TVM_REG(CONTEXTIDR_EL1)
>>       {                                                                   \
>>           bool res;                                                       \
>>                                                                           \
>> -        res = vreg_emulate_sysreg64(regs, hsr, vreg_emulate_##reg);     \
>> +        res = vreg_emulate_sysreg(regs, hsr, vreg_emulate_##reg);       \
>>           ASSERT(res);                                                    \
>>           break;                                                          \
>>       }
>> diff --git a/xen/arch/arm/vcpreg.c b/xen/arch/arm/vcpreg.c
>> index e3ce56d875..be1ec08159 100644
>> --- a/xen/arch/arm/vcpreg.c
>> +++ b/xen/arch/arm/vcpreg.c
>> @@ -57,9 +57,17 @@
>>   #define WRITE_SYSREG_SZ(sz, val, sysreg...)  WRITE_SYSREG##sz(val, sysreg)
>>   #endif
>>   +/*
>> + * type32_t is defined as register_t due to the vreg_emulate_cp32 and
>> + * vreg_emulate_sysreg taking function pointer with register_t type used for
>> + * passing register's value.
>> + */
>> +typedef register_t type32_t;
>> +typedef uint64_t type64_t ;
> 
> NIT: spurious space before ;.
> 
>> +
>>   /* The name is passed from the upper macro to workaround macro expansion. */
>>   #define TVM_REG(sz, func, reg...)                                           \
>> -static bool func(struct cpu_user_regs *regs, uint##sz##_t *r, bool read)    \
>> +static bool func(struct cpu_user_regs *regs, type##sz##_t *r, bool read)    \
>>   {                                                                           \
>>       struct vcpu *v = current;                                               \
>>       bool cache_enabled = vcpu_has_cache_enabled(v);                         \
>> @@ -83,7 +91,7 @@ static bool func(struct cpu_user_regs *regs, uint##sz##_t *r, bool read)    \
>>     #else /* CONFIG_ARM_64 */
>>   #define TVM_REG32_COMBINED(lowreg, hireg, xreg)                             \
>> -static bool vreg_emulate_##xreg(struct cpu_user_regs *regs, uint32_t *r,    \
>> +static bool vreg_emulate_##xreg(struct cpu_user_regs *regs, register_t *r,  \
>>                                   bool read, bool hi)                         \
>>   {                                                                           \
>>       struct vcpu *v = current;                                               \
>> @@ -108,13 +116,13 @@ static bool vreg_emulate_##xreg(struct cpu_user_regs *regs, uint32_t *r,    \
>>       return true;                                                            \
>>   }                                                                           \
>>                                                                               \
>> -static bool vreg_emulate_##lowreg(struct cpu_user_regs *regs, uint32_t *r,  \
>> +static bool vreg_emulate_##lowreg(struct cpu_user_regs *regs, register_t *r,\
>>                                     bool read)                                \
>>   {                                                                           \
>>       return vreg_emulate_##xreg(regs, r, read, false);                       \
>>   }                                                                           \
>>                                                                               \
>> -static bool vreg_emulate_##hireg(struct cpu_user_regs *regs, uint32_t *r,   \
>> +static bool vreg_emulate_##hireg(struct cpu_user_regs *regs, register_t *r, \
>>                                    bool read)                                 \
>>   {                                                                           \
>>       return vreg_emulate_##xreg(regs, r, read, true);                        \
>> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
>> index 613f37abab..cb5a70c42e 100644
>> --- a/xen/arch/arm/vgic-v3.c
>> +++ b/xen/arch/arm/vgic-v3.c
>> @@ -1531,7 +1531,7 @@ static bool vgic_v3_emulate_sysreg(struct cpu_user_regs *regs, union hsr hsr)
>>       switch ( hsr.bits & HSR_SYSREG_REGS_MASK )
>>       {
>>       case HSR_SYSREG_ICC_SGI1R_EL1:
>> -        return vreg_emulate_sysreg64(regs, hsr, vgic_v3_emulate_sgi1r);
>> +        return vreg_emulate_sysreg(regs, hsr, vgic_v3_emulate_sgi1r);
>>         default:
>>           return false;
>> diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
>> index 167fc6127a..0196951af4 100644
>> --- a/xen/arch/arm/vtimer.c
>> +++ b/xen/arch/arm/vtimer.c
>> @@ -162,7 +162,8 @@ void virt_timer_restore(struct vcpu *v)
>>       WRITE_SYSREG(v->arch.virt_timer.ctl, CNTV_CTL_EL0);
>>   }
>>   -static bool vtimer_cntp_ctl(struct cpu_user_regs *regs, uint32_t *r, bool read)
>> +static bool vtimer_cntp_ctl(struct cpu_user_regs *regs, register_t *r,
>> +                            bool read)
>>   {
>>       struct vcpu *v = current;
>>       s_time_t expires;
>> @@ -197,7 +198,7 @@ static bool vtimer_cntp_ctl(struct cpu_user_regs *regs, uint32_t *r, bool read)
>>       return true;
>>   }
>>   -static bool vtimer_cntp_tval(struct cpu_user_regs *regs, uint32_t *r,
>> +static bool vtimer_cntp_tval(struct cpu_user_regs *regs, register_t *r,
>>                                bool read)
>>   {
>>       struct vcpu *v = current;
>> @@ -316,11 +317,11 @@ static bool vtimer_emulate_sysreg(struct cpu_user_regs *regs, union hsr hsr)
>>       switch ( hsr.bits & HSR_SYSREG_REGS_MASK )
>>       {
>>       case HSR_SYSREG_CNTP_CTL_EL0:
>> -        return vreg_emulate_sysreg32(regs, hsr, vtimer_cntp_ctl);
>> +        return vreg_emulate_sysreg(regs, hsr, vtimer_cntp_ctl);
>>       case HSR_SYSREG_CNTP_TVAL_EL0:
>> -        return vreg_emulate_sysreg32(regs, hsr, vtimer_cntp_tval);
>> +        return vreg_emulate_sysreg(regs, hsr, vtimer_cntp_tval);
>>       case HSR_SYSREG_CNTP_CVAL_EL0:
>> -        return vreg_emulate_sysreg64(regs, hsr, vtimer_cntp_cval);
>> +        return vreg_emulate_sysreg(regs, hsr, vtimer_cntp_cval);
>>         default:
>>           return false;
>> diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
>> index 2577e9a244..2058b69447 100644
>> --- a/xen/include/asm-arm/processor.h
>> +++ b/xen/include/asm-arm/processor.h
>> @@ -484,9 +484,9 @@ extern register_t __cpu_logical_map[];
>>   #define CNTKCTL_EL1_EL0PTEN  (1u<<9) /* Expose phys timer registers to EL0 */
>>     /* Timer control registers */
>> -#define CNTx_CTL_ENABLE   (1u<<0)  /* Enable timer */
>> +#define CNTx_CTL_ENABLE   (1ul<<0)  /* Enable timer */
>>   #define CNTx_CTL_MASK     (1ul<<1)  /* Mask IRQ */
>> -#define CNTx_CTL_PENDING  (1u<<2)  /* IRQ pending */
>> +#define CNTx_CTL_PENDING  (1ul<<2)  /* IRQ pending */
> I would suggest to mention in the commit message why this is necessary. AFAICT, it is not strictly necessary because you left ctl defined as a uin32_t. So I am guessing you only keep it for hardening purpose?
> 
> If so how about adding:
> 
> "Take the opportunity to switch CNTx_CTL_* to use UL to avoid any surprise with the negation of any bits (as used in vtimer_cntp_ctl)".
> 
> The rest of the patch looks fine. So I would be happy to deal with the fixes on commit:
Please do. Thanks.
> 
> Reviewed-by: Julien Grall <jgrall@amazon.com>
> 
> Cheers,
> 
Cheers,
Michal

Re: [PATCH v2] xen/arm64: Remove vreg_emulate_sysreg32

Posted by Julien Grall 4 years, 5 months ago


On 06/09/2021 10:09, Michal Orzel wrote:
> On 06.09.2021 11:07, Julien Grall wrote:
>> The rest of the patch looks fine. So I would be happy to deal with the fixes on commit:
> Please do. Thanks.

Pushed. I have also re-wrapped the commit message to 72 characters per line.

Cheers,

-- 
Julien Grall

Re: [PATCH v2] xen/arm64: Remove vreg_emulate_sysreg32

Posted by Julien Grall 4 years, 6 months ago

Hi Michal,

On 29/07/2021 11:42, Michal Orzel wrote:
> According to ARMv8A architecture, AArch64 registers
> are 64bit wide even though in many cases the upper
> 32bit is reserved. Therefore there is no need for
> function vreg_emulate_sysreg32 on arm64. This means
> that we can have just one function vreg_emulate_sysreg
> using new function pointer:
> typedef bool (*vreg_reg_fn_t)(struct cpu_user_regs *regs,
>                                register_t *r, bool read);
> 
> Modify vreg_emulate_cp32 to use the new function pointer
> as well.
> 
> This change allows to properly use 64bit registers in AArch64
> state and in case of AArch32 the upper 32 bits of AArch64
> registers are inaccessible and are ignored(D1.20.1 ARM DDI 0487A.j).

What you wrote only says that the bits are ignored. It doesn't say 
whether the bits will be 0.

They are probably, but as I wrote yesterday, I couldn't confirm it.

> 
> Signed-off-by: Michal Orzel <michal.orzel@arm.com>

Please provide a change log.

Cheers,

-- 
Julien Grall

Re: [PATCH v2] xen/arm64: Remove vreg_emulate_sysreg32

Posted by Michal Orzel 4 years, 6 months ago

Hi Julien,

On 29.07.2021 13:20, Julien Grall wrote:
> Hi Michal,
> 
> On 29/07/2021 11:42, Michal Orzel wrote:
>> According to ARMv8A architecture, AArch64 registers
>> are 64bit wide even though in many cases the upper
>> 32bit is reserved. Therefore there is no need for
>> function vreg_emulate_sysreg32 on arm64. This means
>> that we can have just one function vreg_emulate_sysreg
>> using new function pointer:
>> typedef bool (*vreg_reg_fn_t)(struct cpu_user_regs *regs,
>>                                register_t *r, bool read);
>>
>> Modify vreg_emulate_cp32 to use the new function pointer
>> as well.
>>
>> This change allows to properly use 64bit registers in AArch64
>> state and in case of AArch32 the upper 32 bits of AArch64
>> registers are inaccessible and are ignored(D1.20.1 ARM DDI 0487A.j).
> 
> What you wrote only says that the bits are ignored. It doesn't say whether the bits will be 0.
> 
> They are probably, but as I wrote yesterday, I couldn't confirm it.
> 
Should I then remove this part of the commit or write below?:
"We can assume that those bits will be 0 but the architecture
reference manual does not clarify this."
>>
>> Signed-off-by: Michal Orzel <michal.orzel@arm.com>
> 
> Please provide a change log.
> 
Ok.
> Cheers,
> 
Cheers,

Re: [PATCH v2] xen/arm64: Remove vreg_emulate_sysreg32

Posted by Julien Grall 4 years, 6 months ago

On 29/07/2021 12:47, Michal Orzel wrote:
> Hi Julien,

Hi Michal,

> On 29.07.2021 13:20, Julien Grall wrote:
>> Hi Michal,
>>
>> On 29/07/2021 11:42, Michal Orzel wrote:
>>> According to ARMv8A architecture, AArch64 registers
>>> are 64bit wide even though in many cases the upper
>>> 32bit is reserved. Therefore there is no need for
>>> function vreg_emulate_sysreg32 on arm64. This means
>>> that we can have just one function vreg_emulate_sysreg
>>> using new function pointer:
>>> typedef bool (*vreg_reg_fn_t)(struct cpu_user_regs *regs,
>>>                                 register_t *r, bool read);
>>>
>>> Modify vreg_emulate_cp32 to use the new function pointer
>>> as well.
>>>
>>> This change allows to properly use 64bit registers in AArch64
>>> state and in case of AArch32 the upper 32 bits of AArch64
>>> registers are inaccessible and are ignored(D1.20.1 ARM DDI 0487A.j).
>>
>> What you wrote only says that the bits are ignored. It doesn't say whether the bits will be 0.
>>
>> They are probably, but as I wrote yesterday, I couldn't confirm it.
>>
> Should I then remove this part of the commit or write below?:
> "We can assume that those bits will be 0 but the architecture
> reference manual does not clarify this."

There was some back and forth on security@xen.org about this. I will 
summarizing the discussion here as we considered this was a just a bug.

I wasn't looking at the correct section in the Arm Arm. There is a 
paragraph clearly describing the expected behavior in a different 
section (thanks Ash for the pointer!). Per section D1.19.2 in DDI 0487F.c:

"
If the general-purpose register was accessible from AArch32 state

The upper 32 bits either become zero, or hold the value that the same 
architectural register held before any AArch32 execution. The choice 
between these two options is IMPLEMENTATION DEFINED, and might vary 
dynamically within an implementation. Correspondingly, software must
regard the value as being a CONSTRAINED UNPREDICTABLE choice between 
these two values.

This behavior applies regardless of whether any execution occurred at 
the Exception level that was using AArch32. That is, this behavior 
applies even if AArch32 state was entered by an exception
return from AArch64 state, and another exception was immediately taken 
to AArch64 state without any instruction execution in AArch32 state.
"

So we can't assume the top 32-bits are zeroed unless the hypervisor 
ensured they were. Today, we don't have that guarantee in Xen.

This needs to be fixed. The two approachs we discussed are:
    1) Update set_user_reg() to zero the top 32-bit. We have a couple of 
places using directly the fields xN. So we would need to switch them to 
use set_user_reg()
    2) Only saving/restoring the bottom 32-bit when entering/leaving the 
hypervisor.

At the moment, my preference goes towards the latter because we don't 
risk to introduce new place where set_user_reg() is not used.

I have quickly hack the entry path. This would look like:

diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S
index fc3811ad0ad5..65e24c88b059 100644
--- a/xen/arch/arm/arm64/entry.S
+++ b/xen/arch/arm/arm64/entry.S
@@ -111,6 +111,11 @@
   */
          .macro  entry, hyp, compat, save_x0_x1=1
          sub     sp, sp, #(UREGS_SPSR_el1 - UREGS_LR) /* CPSR, PC, SP, 
LR */
+        .if \compat == 1 /* AArch32 mode */
+        /* Clobber the top 32-bit of the registers */
+        mov    w0, w0
+        mov    w1, w1
+        .endif
          push    x28, x29
          push    x26, x27
          push    x24, x25

I haven't looked whether this can be optimized or the exit path would be 
easier to modify.

Anyway, this is not a new bug so I would be fine to get this patch 
merged first. Although, I think this wants to be fixed for xen 4.16 
(CCing Ian to track it).

I will try to find sometimes in the next couple of weeks to fix it and 
have another review of this patch.

Cheers,

-- 
Julien Grall

Re: [PATCH v2] xen/arm64: Remove vreg_emulate_sysreg32

Posted by Michal Orzel 4 years, 5 months ago

Hi Julien,

On 06.08.2021 13:12, Julien Grall wrote:
> 
> 
> On 29/07/2021 12:47, Michal Orzel wrote:
>> Hi Julien,
> 
> Hi Michal,
> 
>> On 29.07.2021 13:20, Julien Grall wrote:
>>> Hi Michal,
>>>
>>> On 29/07/2021 11:42, Michal Orzel wrote:
>>>> According to ARMv8A architecture, AArch64 registers
>>>> are 64bit wide even though in many cases the upper
>>>> 32bit is reserved. Therefore there is no need for
>>>> function vreg_emulate_sysreg32 on arm64. This means
>>>> that we can have just one function vreg_emulate_sysreg
>>>> using new function pointer:
>>>> typedef bool (*vreg_reg_fn_t)(struct cpu_user_regs *regs,
>>>>                                 register_t *r, bool read);
>>>>
>>>> Modify vreg_emulate_cp32 to use the new function pointer
>>>> as well.
>>>>
>>>> This change allows to properly use 64bit registers in AArch64
>>>> state and in case of AArch32 the upper 32 bits of AArch64
>>>> registers are inaccessible and are ignored(D1.20.1 ARM DDI 0487A.j).
>>>
>>> What you wrote only says that the bits are ignored. It doesn't say whether the bits will be 0.
>>>
>>> They are probably, but as I wrote yesterday, I couldn't confirm it.
>>>
>> Should I then remove this part of the commit or write below?:
>> "We can assume that those bits will be 0 but the architecture
>> reference manual does not clarify this."
> 
> There was some back and forth on security@xen.org about this. I will summarizing the discussion here as we considered this was a just a bug.
> 
> I wasn't looking at the correct section in the Arm Arm. There is a paragraph clearly describing the expected behavior in a different section (thanks Ash for the pointer!). Per section D1.19.2 in DDI 0487F.c:
> 
> "
> If the general-purpose register was accessible from AArch32 state
> 
> The upper 32 bits either become zero, or hold the value that the same architectural register held before any AArch32 execution. The choice between these two options is IMPLEMENTATION DEFINED, and might vary dynamically within an implementation. Correspondingly, software must
> regard the value as being a CONSTRAINED UNPREDICTABLE choice between these two values.
> 
> This behavior applies regardless of whether any execution occurred at the Exception level that was using AArch32. That is, this behavior applies even if AArch32 state was entered by an exception
> return from AArch64 state, and another exception was immediately taken to AArch64 state without any instruction execution in AArch32 state.
> "
> 
> So we can't assume the top 32-bits are zeroed unless the hypervisor ensured they were. Today, we don't have that guarantee in Xen.
> 
> This needs to be fixed. The two approachs we discussed are:
>    1) Update set_user_reg() to zero the top 32-bit. We have a couple of places using directly the fields xN. So we would need to switch them to use set_user_reg()
>    2) Only saving/restoring the bottom 32-bit when entering/leaving the hypervisor.
> 
> At the moment, my preference goes towards the latter because we don't risk to introduce new place where set_user_reg() is not used.
> 
> I have quickly hack the entry path. This would look like:
> 
> diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S
> index fc3811ad0ad5..65e24c88b059 100644
> --- a/xen/arch/arm/arm64/entry.S
> +++ b/xen/arch/arm/arm64/entry.S
> @@ -111,6 +111,11 @@
>   */
>          .macro  entry, hyp, compat, save_x0_x1=1
>          sub     sp, sp, #(UREGS_SPSR_el1 - UREGS_LR) /* CPSR, PC, SP, LR */
> +        .if \compat == 1 /* AArch32 mode */
> +        /* Clobber the top 32-bit of the registers */
> +        mov    w0, w0
> +        mov    w1, w1
> +        .endif
>          push    x28, x29
>          push    x26, x27
>          push    x24, x25
> 
> I haven't looked whether this can be optimized or the exit path would be easier to modify.
> 
> Anyway, this is not a new bug so I would be fine to get this patch merged first. Although, I think this wants to be fixed for xen 4.16 (CCing Ian to track it).
> 
> I will try to find sometimes in the next couple of weeks to fix it and have another review of this patch.
> 

As the 4.16 release is getting closer I wanted to ask whether you need help with creating a pre-work patch so that this patch can be merged.
I believe this patch wants to be merged for 4.16 as the other sysreg related patches are merged already, so I'm offering a help.

> Cheers,
> 

Cheers,
Michal

Re: [PATCH v2] xen/arm64: Remove vreg_emulate_sysreg32

Posted by Julien Grall 4 years, 5 months ago

On 01/09/2021 10:38, Michal Orzel wrote:
> Hi Julien,

Hi Michal,

> On 06.08.2021 13:12, Julien Grall wrote:
>>
>>
>> On 29/07/2021 12:47, Michal Orzel wrote:
>>> Hi Julien,
>>
>> Hi Michal,
>>
>>> On 29.07.2021 13:20, Julien Grall wrote:
>>>> Hi Michal,
>>>>
>>>> On 29/07/2021 11:42, Michal Orzel wrote:
>>>>> According to ARMv8A architecture, AArch64 registers
>>>>> are 64bit wide even though in many cases the upper
>>>>> 32bit is reserved. Therefore there is no need for
>>>>> function vreg_emulate_sysreg32 on arm64. This means
>>>>> that we can have just one function vreg_emulate_sysreg
>>>>> using new function pointer:
>>>>> typedef bool (*vreg_reg_fn_t)(struct cpu_user_regs *regs,
>>>>>                                  register_t *r, bool read);
>>>>>
>>>>> Modify vreg_emulate_cp32 to use the new function pointer
>>>>> as well.
>>>>>
>>>>> This change allows to properly use 64bit registers in AArch64
>>>>> state and in case of AArch32 the upper 32 bits of AArch64
>>>>> registers are inaccessible and are ignored(D1.20.1 ARM DDI 0487A.j).
>>>>
>>>> What you wrote only says that the bits are ignored. It doesn't say whether the bits will be 0.
>>>>
>>>> They are probably, but as I wrote yesterday, I couldn't confirm it.
>>>>
>>> Should I then remove this part of the commit or write below?:
>>> "We can assume that those bits will be 0 but the architecture
>>> reference manual does not clarify this."
>>
>> There was some back and forth on security@xen.org about this. I will summarizing the discussion here as we considered this was a just a bug.
>>
>> I wasn't looking at the correct section in the Arm Arm. There is a paragraph clearly describing the expected behavior in a different section (thanks Ash for the pointer!). Per section D1.19.2 in DDI 0487F.c:
>>
>> "
>> If the general-purpose register was accessible from AArch32 state
>>
>> The upper 32 bits either become zero, or hold the value that the same architectural register held before any AArch32 execution. The choice between these two options is IMPLEMENTATION DEFINED, and might vary dynamically within an implementation. Correspondingly, software must
>> regard the value as being a CONSTRAINED UNPREDICTABLE choice between these two values.
>>
>> This behavior applies regardless of whether any execution occurred at the Exception level that was using AArch32. That is, this behavior applies even if AArch32 state was entered by an exception
>> return from AArch64 state, and another exception was immediately taken to AArch64 state without any instruction execution in AArch32 state.
>> "
>>
>> So we can't assume the top 32-bits are zeroed unless the hypervisor ensured they were. Today, we don't have that guarantee in Xen.
>>
>> This needs to be fixed. The two approachs we discussed are:
>>     1) Update set_user_reg() to zero the top 32-bit. We have a couple of places using directly the fields xN. So we would need to switch them to use set_user_reg()
>>     2) Only saving/restoring the bottom 32-bit when entering/leaving the hypervisor.
>>
>> At the moment, my preference goes towards the latter because we don't risk to introduce new place where set_user_reg() is not used.
>>
>> I have quickly hack the entry path. This would look like:
>>
>> diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S
>> index fc3811ad0ad5..65e24c88b059 100644
>> --- a/xen/arch/arm/arm64/entry.S
>> +++ b/xen/arch/arm/arm64/entry.S
>> @@ -111,6 +111,11 @@
>>    */
>>           .macro  entry, hyp, compat, save_x0_x1=1
>>           sub     sp, sp, #(UREGS_SPSR_el1 - UREGS_LR) /* CPSR, PC, SP, LR */
>> +        .if \compat == 1 /* AArch32 mode */
>> +        /* Clobber the top 32-bit of the registers */
>> +        mov    w0, w0
>> +        mov    w1, w1
>> +        .endif
>>           push    x28, x29
>>           push    x26, x27
>>           push    x24, x25
>>
>> I haven't looked whether this can be optimized or the exit path would be easier to modify.
>>
>> Anyway, this is not a new bug so I would be fine to get this patch merged first. Although, I think this wants to be fixed for xen 4.16 (CCing Ian to track it).
>>
>> I will try to find sometimes in the next couple of weeks to fix it and have another review of this patch.
>>
> 
> As the 4.16 release is getting closer I wanted to ask whether you need help with creating a pre-work patch so that this patch can be merged.

Sorry I haven't looked at the bug yet. I don't think the bug I reported 
necessarily needs to go first. This is not a new bug and AFAICT your 
patch is not going to make it worse than the current state.

Your patch needs to have an updated commit message summarizing what we 
discussed and a second review. No need to resend a patch for the updated 
commit message, this can be discussed here and updated on commit (I 
assuming there is no other comments).

> I believe this patch wants to be merged for 4.16 as the other sysreg related patches are merged already, so I'm offering a help.

If you have spare time that would be great. There are a few bits I 
wanted to look other than the entry place:
   - The DOMCTL that update the context: I believe we may need to zero 
to top bits
   - hypercall continuation (see hypercall_create_continuation)
   - multicall

For 4.16, the entry (or exit) is probably going to be a good step. We 
can look at the rest for 4.17.

Cheers,

-- 
Julien Grall

Re: [PATCH v2] xen/arm64: Remove vreg_emulate_sysreg32

Posted by Michal Orzel 4 years, 5 months ago


On 02.09.2021 14:50, Julien Grall wrote:
> On 01/09/2021 10:38, Michal Orzel wrote:
>> Hi Julien,
> 
> Hi Michal,
> 
>> On 06.08.2021 13:12, Julien Grall wrote:
>>>
>>>
>>> On 29/07/2021 12:47, Michal Orzel wrote:
>>>> Hi Julien,
>>>
>>> Hi Michal,
>>>
>>>> On 29.07.2021 13:20, Julien Grall wrote:
>>>>> Hi Michal,
>>>>>
>>>>> On 29/07/2021 11:42, Michal Orzel wrote:
>>>>>> According to ARMv8A architecture, AArch64 registers
>>>>>> are 64bit wide even though in many cases the upper
>>>>>> 32bit is reserved. Therefore there is no need for
>>>>>> function vreg_emulate_sysreg32 on arm64. This means
>>>>>> that we can have just one function vreg_emulate_sysreg
>>>>>> using new function pointer:
>>>>>> typedef bool (*vreg_reg_fn_t)(struct cpu_user_regs *regs,
>>>>>>                                  register_t *r, bool read);
>>>>>>
>>>>>> Modify vreg_emulate_cp32 to use the new function pointer
>>>>>> as well.
>>>>>>
>>>>>> This change allows to properly use 64bit registers in AArch64
>>>>>> state and in case of AArch32 the upper 32 bits of AArch64
>>>>>> registers are inaccessible and are ignored(D1.20.1 ARM DDI 0487A.j).
>>>>>
>>>>> What you wrote only says that the bits are ignored. It doesn't say whether the bits will be 0.
>>>>>
>>>>> They are probably, but as I wrote yesterday, I couldn't confirm it.
>>>>>
>>>> Should I then remove this part of the commit or write below?:
>>>> "We can assume that those bits will be 0 but the architecture
>>>> reference manual does not clarify this."
>>>
>>> There was some back and forth on security@xen.org about this. I will summarizing the discussion here as we considered this was a just a bug.
>>>
>>> I wasn't looking at the correct section in the Arm Arm. There is a paragraph clearly describing the expected behavior in a different section (thanks Ash for the pointer!). Per section D1.19.2 in DDI 0487F.c:
>>>
>>> "
>>> If the general-purpose register was accessible from AArch32 state
>>>
>>> The upper 32 bits either become zero, or hold the value that the same architectural register held before any AArch32 execution. The choice between these two options is IMPLEMENTATION DEFINED, and might vary dynamically within an implementation. Correspondingly, software must
>>> regard the value as being a CONSTRAINED UNPREDICTABLE choice between these two values.
>>>
>>> This behavior applies regardless of whether any execution occurred at the Exception level that was using AArch32. That is, this behavior applies even if AArch32 state was entered by an exception
>>> return from AArch64 state, and another exception was immediately taken to AArch64 state without any instruction execution in AArch32 state.
>>> "
>>>
>>> So we can't assume the top 32-bits are zeroed unless the hypervisor ensured they were. Today, we don't have that guarantee in Xen.
>>>
>>> This needs to be fixed. The two approachs we discussed are:
>>>     1) Update set_user_reg() to zero the top 32-bit. We have a couple of places using directly the fields xN. So we would need to switch them to use set_user_reg()
>>>     2) Only saving/restoring the bottom 32-bit when entering/leaving the hypervisor.
>>>
>>> At the moment, my preference goes towards the latter because we don't risk to introduce new place where set_user_reg() is not used.
>>>
>>> I have quickly hack the entry path. This would look like:
>>>
>>> diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S
>>> index fc3811ad0ad5..65e24c88b059 100644
>>> --- a/xen/arch/arm/arm64/entry.S
>>> +++ b/xen/arch/arm/arm64/entry.S
>>> @@ -111,6 +111,11 @@
>>>    */
>>>           .macro  entry, hyp, compat, save_x0_x1=1
>>>           sub     sp, sp, #(UREGS_SPSR_el1 - UREGS_LR) /* CPSR, PC, SP, LR */
>>> +        .if \compat == 1 /* AArch32 mode */
>>> +        /* Clobber the top 32-bit of the registers */
>>> +        mov    w0, w0
>>> +        mov    w1, w1
>>> +        .endif
>>>           push    x28, x29
>>>           push    x26, x27
>>>           push    x24, x25
>>>
>>> I haven't looked whether this can be optimized or the exit path would be easier to modify.
>>>
>>> Anyway, this is not a new bug so I would be fine to get this patch merged first. Although, I think this wants to be fixed for xen 4.16 (CCing Ian to track it).
>>>
>>> I will try to find sometimes in the next couple of weeks to fix it and have another review of this patch.
>>>
>>
>> As the 4.16 release is getting closer I wanted to ask whether you need help with creating a pre-work patch so that this patch can be merged.
> 
> Sorry I haven't looked at the bug yet. I don't think the bug I reported necessarily needs to go first. This is not a new bug and AFAICT your patch is not going to make it worse than the current state.
> 
> Your patch needs to have an updated commit message summarizing what we discussed and a second review. No need to resend a patch for the updated commit message, this can be discussed here and updated on commit (I assuming there is no other comments).
> 
If the following message is ok for you, please update the commit msg after acking this change:
"
According to ARMv8A architecture, AArch64 registers
are 64bit wide even though in many cases the upper
32bit is reserved. Therefore there is no need for
function vreg_emulate_sysreg32 on arm64. This means
that we can have just one function vreg_emulate_sysreg
using new function pointer:
typedef bool (*vreg_reg_fn_t)(struct cpu_user_regs *regs,
                              register_t *r, bool read);

Modify vreg_emulate_cp32 to use the new function pointer
as well.

This change allows to properly use 64bit registers in AArch64
state. In case of AArch32 the documantation (D1.20.2, DDI 0487A.j)
states that "the upper 32 bits either become zero, or hold the value
that the same architectural register held before any AArch32 execution."
As the choice between them is IMPLEMENTATION DEFINED we cannot assume they
are zeroed. Xen should ensure that but currently it does not. This is not
a new bug and must be fixed as agreed during a discussion over this patch.
"
>> I believe this patch wants to be merged for 4.16 as the other sysreg related patches are merged already, so I'm offering a help.
> 
> If you have spare time that would be great. There are a few bits I wanted to look other than the entry place:
>   - The DOMCTL that update the context: I believe we may need to zero to top bits
>   - hypercall continuation (see hypercall_create_continuation)
>   - multicall
> 
I'll take a look at it.
> For 4.16, the entry (or exit) is probably going to be a good step. We can look at the rest for 4.17.
> 
> Cheers,
> 
Cheers,

Re: [PATCH v2] xen/arm64: Remove vreg_emulate_sysreg32

Posted by Julien Grall 4 years, 5 months ago

Hi Michal,

On 03/09/2021 10:16, Michal Orzel wrote:
> 
> 
> On 02.09.2021 14:50, Julien Grall wrote:
>> On 01/09/2021 10:38, Michal Orzel wrote:
>>> Hi Julien,
>>
>> Hi Michal,
>>
>>> On 06.08.2021 13:12, Julien Grall wrote:
>>>>
>>>>
>>>> On 29/07/2021 12:47, Michal Orzel wrote:
>>>>> Hi Julien,
>>>>
>>>> Hi Michal,
>>>>
>>>>> On 29.07.2021 13:20, Julien Grall wrote:
>>>>>> Hi Michal,
>>>>>>
>>>>>> On 29/07/2021 11:42, Michal Orzel wrote:
>>>>>>> According to ARMv8A architecture, AArch64 registers
>>>>>>> are 64bit wide even though in many cases the upper
>>>>>>> 32bit is reserved. Therefore there is no need for
>>>>>>> function vreg_emulate_sysreg32 on arm64. This means
>>>>>>> that we can have just one function vreg_emulate_sysreg
>>>>>>> using new function pointer:
>>>>>>> typedef bool (*vreg_reg_fn_t)(struct cpu_user_regs *regs,
>>>>>>>                                   register_t *r, bool read);
>>>>>>>
>>>>>>> Modify vreg_emulate_cp32 to use the new function pointer
>>>>>>> as well.
>>>>>>>
>>>>>>> This change allows to properly use 64bit registers in AArch64
>>>>>>> state and in case of AArch32 the upper 32 bits of AArch64
>>>>>>> registers are inaccessible and are ignored(D1.20.1 ARM DDI 0487A.j).
>>>>>>
>>>>>> What you wrote only says that the bits are ignored. It doesn't say whether the bits will be 0.
>>>>>>
>>>>>> They are probably, but as I wrote yesterday, I couldn't confirm it.
>>>>>>
>>>>> Should I then remove this part of the commit or write below?:
>>>>> "We can assume that those bits will be 0 but the architecture
>>>>> reference manual does not clarify this."
>>>>
>>>> There was some back and forth on security@xen.org about this. I will summarizing the discussion here as we considered this was a just a bug.
>>>>
>>>> I wasn't looking at the correct section in the Arm Arm. There is a paragraph clearly describing the expected behavior in a different section (thanks Ash for the pointer!). Per section D1.19.2 in DDI 0487F.c:
>>>>
>>>> "
>>>> If the general-purpose register was accessible from AArch32 state
>>>>
>>>> The upper 32 bits either become zero, or hold the value that the same architectural register held before any AArch32 execution. The choice between these two options is IMPLEMENTATION DEFINED, and might vary dynamically within an implementation. Correspondingly, software must
>>>> regard the value as being a CONSTRAINED UNPREDICTABLE choice between these two values.
>>>>
>>>> This behavior applies regardless of whether any execution occurred at the Exception level that was using AArch32. That is, this behavior applies even if AArch32 state was entered by an exception
>>>> return from AArch64 state, and another exception was immediately taken to AArch64 state without any instruction execution in AArch32 state.
>>>> "
>>>>
>>>> So we can't assume the top 32-bits are zeroed unless the hypervisor ensured they were. Today, we don't have that guarantee in Xen.
>>>>
>>>> This needs to be fixed. The two approachs we discussed are:
>>>>      1) Update set_user_reg() to zero the top 32-bit. We have a couple of places using directly the fields xN. So we would need to switch them to use set_user_reg()
>>>>      2) Only saving/restoring the bottom 32-bit when entering/leaving the hypervisor.
>>>>
>>>> At the moment, my preference goes towards the latter because we don't risk to introduce new place where set_user_reg() is not used.
>>>>
>>>> I have quickly hack the entry path. This would look like:
>>>>
>>>> diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S
>>>> index fc3811ad0ad5..65e24c88b059 100644
>>>> --- a/xen/arch/arm/arm64/entry.S
>>>> +++ b/xen/arch/arm/arm64/entry.S
>>>> @@ -111,6 +111,11 @@
>>>>     */
>>>>            .macro  entry, hyp, compat, save_x0_x1=1
>>>>            sub     sp, sp, #(UREGS_SPSR_el1 - UREGS_LR) /* CPSR, PC, SP, LR */
>>>> +        .if \compat == 1 /* AArch32 mode */
>>>> +        /* Clobber the top 32-bit of the registers */
>>>> +        mov    w0, w0
>>>> +        mov    w1, w1
>>>> +        .endif
>>>>            push    x28, x29
>>>>            push    x26, x27
>>>>            push    x24, x25
>>>>
>>>> I haven't looked whether this can be optimized or the exit path would be easier to modify.
>>>>
>>>> Anyway, this is not a new bug so I would be fine to get this patch merged first. Although, I think this wants to be fixed for xen 4.16 (CCing Ian to track it).
>>>>
>>>> I will try to find sometimes in the next couple of weeks to fix it and have another review of this patch.
>>>>
>>>
>>> As the 4.16 release is getting closer I wanted to ask whether you need help with creating a pre-work patch so that this patch can be merged.
>>
>> Sorry I haven't looked at the bug yet. I don't think the bug I reported necessarily needs to go first. This is not a new bug and AFAICT your patch is not going to make it worse than the current state.
>>
>> Your patch needs to have an updated commit message summarizing what we discussed and a second review. No need to resend a patch for the updated commit message, this can be discussed here and updated on commit (I assuming there is no other comments).
>>
> If the following message is ok for you, please update the commit msg after acking this change:
> "
> According to ARMv8A architecture, AArch64 registers
> are 64bit wide even though in many cases the upper
> 32bit is reserved. Therefore there is no need for
> function vreg_emulate_sysreg32 on arm64. This means
> that we can have just one function vreg_emulate_sysreg
> using new function pointer:
> typedef bool (*vreg_reg_fn_t)(struct cpu_user_regs *regs,
>                                register_t *r, bool read);
> 
> Modify vreg_emulate_cp32 to use the new function pointer
> as well.
> 
> This change allows to properly use 64bit registers in AArch64
> state. In case of AArch32 the documantation (D1.20.2, DDI 0487A.j)

s/documantation/documentation/

> states that "the upper 32 bits either become zero, or hold the value
> that the same architectural register held before any AArch32 execution."
> As the choice between them is IMPLEMENTATION DEFINED we cannot assume they
> are zeroed. Xen should ensure that but currently it does not. This is not
> a new bug and must be fixed as agreed during a discussion over this patch.
> "

THis looks good to me. I will fold it on commit.

Cheers,

-- 
Julien Grall