[PATCH 02/10] x86: split populating of struct vcpu_time_info into a separate function

Jan Beulich posted 10 patches 3 years, 3 months ago
There is a newer version of this series
[PATCH 02/10] x86: split populating of struct vcpu_time_info into a separate function
Posted by Jan Beulich 3 years, 3 months ago
This is to facilitate subsequent re-use of this code.

While doing so add const in a number of places, extending to
gtime_to_gtsc() and then for symmetry also its inverse function.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
I was on the edge of also folding the various is_hvm_domain() into a
function scope boolean, but then wasn't really certain that this
wouldn't open up undue speculation opportunities.

--- a/xen/arch/x86/include/asm/time.h
+++ b/xen/arch/x86/include/asm/time.h
@@ -52,8 +52,8 @@ uint64_t cf_check acpi_pm_tick_to_ns(uin
 uint64_t tsc_ticks2ns(uint64_t ticks);
 
 uint64_t pv_soft_rdtsc(const struct vcpu *v, const struct cpu_user_regs *regs);
-u64 gtime_to_gtsc(struct domain *d, u64 time);
-u64 gtsc_to_gtime(struct domain *d, u64 tsc);
+uint64_t gtime_to_gtsc(const struct domain *d, uint64_t time);
+uint64_t gtsc_to_gtime(const struct domain *d, uint64_t tsc);
 
 int tsc_set_info(struct domain *d, uint32_t tsc_mode, uint64_t elapsed_nsec,
                  uint32_t gtsc_khz, uint32_t incarnation);
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1373,18 +1373,14 @@ uint64_t tsc_ticks2ns(uint64_t ticks)
     return scale_delta(ticks, &t->tsc_scale);
 }
 
-static void __update_vcpu_system_time(struct vcpu *v, int force)
+static void collect_time_info(const struct vcpu *v,
+                              struct vcpu_time_info *u)
 {
-    const struct cpu_time *t;
-    struct vcpu_time_info *u, _u = {};
-    struct domain *d = v->domain;
+    const struct cpu_time *t = &this_cpu(cpu_time);
+    const struct domain *d = v->domain;
     s_time_t tsc_stamp;
 
-    if ( v->vcpu_info == NULL )
-        return;
-
-    t = &this_cpu(cpu_time);
-    u = &vcpu_info(v, time);
+    memset(u, 0, sizeof(*u));
 
     if ( d->arch.vtsc )
     {
@@ -1392,7 +1388,7 @@ static void __update_vcpu_system_time(st
 
         if ( is_hvm_domain(d) )
         {
-            struct pl_time *pl = v->domain->arch.hvm.pl_time;
+            const struct pl_time *pl = d->arch.hvm.pl_time;
 
             stime += pl->stime_offset + v->arch.hvm.stime_offset;
             if ( stime >= 0 )
@@ -1403,27 +1399,27 @@ static void __update_vcpu_system_time(st
         else
             tsc_stamp = gtime_to_gtsc(d, stime);
 
-        _u.tsc_to_system_mul = d->arch.vtsc_to_ns.mul_frac;
-        _u.tsc_shift         = d->arch.vtsc_to_ns.shift;
+        u->tsc_to_system_mul = d->arch.vtsc_to_ns.mul_frac;
+        u->tsc_shift         = d->arch.vtsc_to_ns.shift;
     }
     else
     {
         if ( is_hvm_domain(d) && hvm_tsc_scaling_supported )
         {
             tsc_stamp            = hvm_scale_tsc(d, t->stamp.local_tsc);
-            _u.tsc_to_system_mul = d->arch.vtsc_to_ns.mul_frac;
-            _u.tsc_shift         = d->arch.vtsc_to_ns.shift;
+            u->tsc_to_system_mul = d->arch.vtsc_to_ns.mul_frac;
+            u->tsc_shift         = d->arch.vtsc_to_ns.shift;
         }
         else
         {
             tsc_stamp            = t->stamp.local_tsc;
-            _u.tsc_to_system_mul = t->tsc_scale.mul_frac;
-            _u.tsc_shift         = t->tsc_scale.shift;
+            u->tsc_to_system_mul = t->tsc_scale.mul_frac;
+            u->tsc_shift         = t->tsc_scale.shift;
         }
     }
 
-    _u.tsc_timestamp = tsc_stamp;
-    _u.system_time   = t->stamp.local_stime;
+    u->tsc_timestamp = tsc_stamp;
+    u->system_time   = t->stamp.local_stime;
 
     /*
      * It's expected that domains cope with this bit changing on every
@@ -1431,10 +1427,21 @@ static void __update_vcpu_system_time(st
      * or if it further requires monotonicity checks with other vcpus.
      */
     if ( clocksource_is_tsc() )
-        _u.flags |= XEN_PVCLOCK_TSC_STABLE_BIT;
+        u->flags |= XEN_PVCLOCK_TSC_STABLE_BIT;
 
     if ( is_hvm_domain(d) )
-        _u.tsc_timestamp += v->arch.hvm.cache_tsc_offset;
+        u->tsc_timestamp += v->arch.hvm.cache_tsc_offset;
+}
+
+static void __update_vcpu_system_time(struct vcpu *v, int force)
+{
+    struct vcpu_time_info *u = &vcpu_info(v, time), _u;
+    const struct domain *d = v->domain;
+
+    if ( v->vcpu_info == NULL )
+        return;
+
+    collect_time_info(v, &_u);
 
     /* Don't bother unless timestamp record has changed or we are forced. */
     _u.version = u->version; /* make versions match for memcmp test */
@@ -2476,7 +2483,7 @@ static int __init cf_check tsc_parse(con
 }
 custom_param("tsc", tsc_parse);
 
-u64 gtime_to_gtsc(struct domain *d, u64 time)
+uint64_t gtime_to_gtsc(const struct domain *d, uint64_t time)
 {
     if ( !is_hvm_domain(d) )
     {
@@ -2488,7 +2495,7 @@ u64 gtime_to_gtsc(struct domain *d, u64
     return scale_delta(time, &d->arch.ns_to_vtsc);
 }
 
-u64 gtsc_to_gtime(struct domain *d, u64 tsc)
+uint64_t gtsc_to_gtime(const struct domain *d, uint64_t tsc)
 {
     u64 time = scale_delta(tsc, &d->arch.vtsc_to_ns);
Re: [PATCH 02/10] x86: split populating of struct vcpu_time_info into a separate function
Posted by Andrew Cooper 3 years ago
On 19/10/2022 8:39 am, Jan Beulich wrote:
> This is to facilitate subsequent re-use of this code.
>
> While doing so add const in a number of places, extending to
> gtime_to_gtsc() and then for symmetry also its inverse function.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper@citrix.com>

> ---
> I was on the edge of also folding the various is_hvm_domain() into a
> function scope boolean, but then wasn't really certain that this
> wouldn't open up undue speculation opportunities.

I can't see anything interesting under here speculation wise. 
Commentary inline.

>
> --- a/xen/arch/x86/include/asm/time.h
> +++ b/xen/arch/x86/include/asm/time.h
> @@ -52,8 +52,8 @@ uint64_t cf_check acpi_pm_tick_to_ns(uin
>  uint64_t tsc_ticks2ns(uint64_t ticks);
>  
>  uint64_t pv_soft_rdtsc(const struct vcpu *v, const struct cpu_user_regs *regs);
> -u64 gtime_to_gtsc(struct domain *d, u64 time);
> -u64 gtsc_to_gtime(struct domain *d, u64 tsc);
> +uint64_t gtime_to_gtsc(const struct domain *d, uint64_t time);
> +uint64_t gtsc_to_gtime(const struct domain *d, uint64_t tsc);
>  
>  int tsc_set_info(struct domain *d, uint32_t tsc_mode, uint64_t elapsed_nsec,
>                   uint32_t gtsc_khz, uint32_t incarnation);
> --- a/xen/arch/x86/time.c
> +++ b/xen/arch/x86/time.c
> @@ -1373,18 +1373,14 @@ uint64_t tsc_ticks2ns(uint64_t ticks)
>      return scale_delta(ticks, &t->tsc_scale);
>  }
>  
> -static void __update_vcpu_system_time(struct vcpu *v, int force)
> +static void collect_time_info(const struct vcpu *v,
> +                              struct vcpu_time_info *u)
>  {
> -    const struct cpu_time *t;
> -    struct vcpu_time_info *u, _u = {};
> -    struct domain *d = v->domain;
> +    const struct cpu_time *t = &this_cpu(cpu_time);
> +    const struct domain *d = v->domain;
>      s_time_t tsc_stamp;
>  
> -    if ( v->vcpu_info == NULL )
> -        return;
> -
> -    t = &this_cpu(cpu_time);
> -    u = &vcpu_info(v, time);
> +    memset(u, 0, sizeof(*u));
>  
>      if ( d->arch.vtsc )
>      {
> @@ -1392,7 +1388,7 @@ static void __update_vcpu_system_time(st
>  
>          if ( is_hvm_domain(d) )
>          {
> -            struct pl_time *pl = v->domain->arch.hvm.pl_time;
> +            const struct pl_time *pl = d->arch.hvm.pl_time;

A PV guest could in in principle use...

>  
>              stime += pl->stime_offset + v->arch.hvm.stime_offset;

... this pl->stime_offset as the second deference of a whatever happens
to sit under d->arch.hvm.pl_time in the pv union.

In the current build of Xen I have to hand, that's
d->arch.pv.mapcache.{epoch,tlbflush_timestamp}, the combination of which
doesn't seem like it can be steered into being a legal pointer into Xen.

>              if ( stime >= 0 )
> @@ -1403,27 +1399,27 @@ static void __update_vcpu_system_time(st
>          else
>              tsc_stamp = gtime_to_gtsc(d, stime);
>  
> -        _u.tsc_to_system_mul = d->arch.vtsc_to_ns.mul_frac;
> -        _u.tsc_shift         = d->arch.vtsc_to_ns.shift;
> +        u->tsc_to_system_mul = d->arch.vtsc_to_ns.mul_frac;
> +        u->tsc_shift         = d->arch.vtsc_to_ns.shift;
>      }
>      else
>      {
>          if ( is_hvm_domain(d) && hvm_tsc_scaling_supported )

On the other hand, this is isn't safe.  There's no protection of the &&
calculation, but...

>          {
>              tsc_stamp            = hvm_scale_tsc(d, t->stamp.local_tsc);

... this path is the only path subject to speculative type confusion,
and all it does is read d->arch.hvm.tsc_scaling_ratio, so is
appropriately protected in this instance.

Also, all an attacker could do is encode the scaling ratio alongside
t->stamp.local_tsc (unpredictable) in the calculation for the duration
of the speculative window, with no way I can see to dereference the result.


> -            _u.tsc_to_system_mul = d->arch.vtsc_to_ns.mul_frac;
> -            _u.tsc_shift         = d->arch.vtsc_to_ns.shift;
> +            u->tsc_to_system_mul = d->arch.vtsc_to_ns.mul_frac;
> +            u->tsc_shift         = d->arch.vtsc_to_ns.shift;
>          }
>          else
>          {
>              tsc_stamp            = t->stamp.local_tsc;
> -            _u.tsc_to_system_mul = t->tsc_scale.mul_frac;
> -            _u.tsc_shift         = t->tsc_scale.shift;
> +            u->tsc_to_system_mul = t->tsc_scale.mul_frac;
> +            u->tsc_shift         = t->tsc_scale.shift;
>          }
>      }
>  
> -    _u.tsc_timestamp = tsc_stamp;
> -    _u.system_time   = t->stamp.local_stime;
> +    u->tsc_timestamp = tsc_stamp;
> +    u->system_time   = t->stamp.local_stime;
>  
>      /*
>       * It's expected that domains cope with this bit changing on every
> @@ -1431,10 +1427,21 @@ static void __update_vcpu_system_time(st
>       * or if it further requires monotonicity checks with other vcpus.
>       */
>      if ( clocksource_is_tsc() )
> -        _u.flags |= XEN_PVCLOCK_TSC_STABLE_BIT;
> +        u->flags |= XEN_PVCLOCK_TSC_STABLE_BIT;
>  
>      if ( is_hvm_domain(d) )
> -        _u.tsc_timestamp += v->arch.hvm.cache_tsc_offset;
> +        u->tsc_timestamp += v->arch.hvm.cache_tsc_offset;

This path is subject to type confusion on v->arch.{pv,hvm}, with a PV
guest able to encode the value of v->arch.pv.ctrlreg[5] into the
timestamp.  But again, no way to dereference the result.


I really don't think there's enough flexibility here for even a
perfectly-timed attacker to abuse.

~Andrew
Re: [PATCH 02/10] x86: split populating of struct vcpu_time_info into a separate function
Posted by Jan Beulich 3 years ago
On 17.01.2023 21:19, Andrew Cooper wrote:
> On 19/10/2022 8:39 am, Jan Beulich wrote:
>> This is to facilitate subsequent re-use of this code.
>>
>> While doing so add const in a number of places, extending to
>> gtime_to_gtsc() and then for symmetry also its inverse function.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> Acked-by: Andrew Cooper <andrew.cooper@citrix.com>

Thanks.

>> ---
>> I was on the edge of also folding the various is_hvm_domain() into a
>> function scope boolean, but then wasn't really certain that this
>> wouldn't open up undue speculation opportunities.
> 
> I can't see anything interesting under here speculation wise. 
> Commentary inline.

My interpretation of those comments is that the suggested conversion
would be okay-ish (as in not making things worse), but since you didn't
provide an explicit answer I thought I'd better ask for confirmation
before possibly making a patch to that effect.

Jan

>> --- a/xen/arch/x86/include/asm/time.h
>> +++ b/xen/arch/x86/include/asm/time.h
>> @@ -52,8 +52,8 @@ uint64_t cf_check acpi_pm_tick_to_ns(uin
>>  uint64_t tsc_ticks2ns(uint64_t ticks);
>>  
>>  uint64_t pv_soft_rdtsc(const struct vcpu *v, const struct cpu_user_regs *regs);
>> -u64 gtime_to_gtsc(struct domain *d, u64 time);
>> -u64 gtsc_to_gtime(struct domain *d, u64 tsc);
>> +uint64_t gtime_to_gtsc(const struct domain *d, uint64_t time);
>> +uint64_t gtsc_to_gtime(const struct domain *d, uint64_t tsc);
>>  
>>  int tsc_set_info(struct domain *d, uint32_t tsc_mode, uint64_t elapsed_nsec,
>>                   uint32_t gtsc_khz, uint32_t incarnation);
>> --- a/xen/arch/x86/time.c
>> +++ b/xen/arch/x86/time.c
>> @@ -1373,18 +1373,14 @@ uint64_t tsc_ticks2ns(uint64_t ticks)
>>      return scale_delta(ticks, &t->tsc_scale);
>>  }
>>  
>> -static void __update_vcpu_system_time(struct vcpu *v, int force)
>> +static void collect_time_info(const struct vcpu *v,
>> +                              struct vcpu_time_info *u)
>>  {
>> -    const struct cpu_time *t;
>> -    struct vcpu_time_info *u, _u = {};
>> -    struct domain *d = v->domain;
>> +    const struct cpu_time *t = &this_cpu(cpu_time);
>> +    const struct domain *d = v->domain;
>>      s_time_t tsc_stamp;
>>  
>> -    if ( v->vcpu_info == NULL )
>> -        return;
>> -
>> -    t = &this_cpu(cpu_time);
>> -    u = &vcpu_info(v, time);
>> +    memset(u, 0, sizeof(*u));
>>  
>>      if ( d->arch.vtsc )
>>      {
>> @@ -1392,7 +1388,7 @@ static void __update_vcpu_system_time(st
>>  
>>          if ( is_hvm_domain(d) )
>>          {
>> -            struct pl_time *pl = v->domain->arch.hvm.pl_time;
>> +            const struct pl_time *pl = d->arch.hvm.pl_time;
> 
> A PV guest could in in principle use...
> 
>>  
>>              stime += pl->stime_offset + v->arch.hvm.stime_offset;
> 
> ... this pl->stime_offset as the second deference of a whatever happens
> to sit under d->arch.hvm.pl_time in the pv union.
> 
> In the current build of Xen I have to hand, that's
> d->arch.pv.mapcache.{epoch,tlbflush_timestamp}, the combination of which
> doesn't seem like it can be steered into being a legal pointer into Xen.
> 
>>              if ( stime >= 0 )
>> @@ -1403,27 +1399,27 @@ static void __update_vcpu_system_time(st
>>          else
>>              tsc_stamp = gtime_to_gtsc(d, stime);
>>  
>> -        _u.tsc_to_system_mul = d->arch.vtsc_to_ns.mul_frac;
>> -        _u.tsc_shift         = d->arch.vtsc_to_ns.shift;
>> +        u->tsc_to_system_mul = d->arch.vtsc_to_ns.mul_frac;
>> +        u->tsc_shift         = d->arch.vtsc_to_ns.shift;
>>      }
>>      else
>>      {
>>          if ( is_hvm_domain(d) && hvm_tsc_scaling_supported )
> 
> On the other hand, this is isn't safe.  There's no protection of the &&
> calculation, but...
> 
>>          {
>>              tsc_stamp            = hvm_scale_tsc(d, t->stamp.local_tsc);
> 
> ... this path is the only path subject to speculative type confusion,
> and all it does is read d->arch.hvm.tsc_scaling_ratio, so is
> appropriately protected in this instance.
> 
> Also, all an attacker could do is encode the scaling ratio alongside
> t->stamp.local_tsc (unpredictable) in the calculation for the duration
> of the speculative window, with no way I can see to dereference the result.
> 
> 
>> -            _u.tsc_to_system_mul = d->arch.vtsc_to_ns.mul_frac;
>> -            _u.tsc_shift         = d->arch.vtsc_to_ns.shift;
>> +            u->tsc_to_system_mul = d->arch.vtsc_to_ns.mul_frac;
>> +            u->tsc_shift         = d->arch.vtsc_to_ns.shift;
>>          }
>>          else
>>          {
>>              tsc_stamp            = t->stamp.local_tsc;
>> -            _u.tsc_to_system_mul = t->tsc_scale.mul_frac;
>> -            _u.tsc_shift         = t->tsc_scale.shift;
>> +            u->tsc_to_system_mul = t->tsc_scale.mul_frac;
>> +            u->tsc_shift         = t->tsc_scale.shift;
>>          }
>>      }
>>  
>> -    _u.tsc_timestamp = tsc_stamp;
>> -    _u.system_time   = t->stamp.local_stime;
>> +    u->tsc_timestamp = tsc_stamp;
>> +    u->system_time   = t->stamp.local_stime;
>>  
>>      /*
>>       * It's expected that domains cope with this bit changing on every
>> @@ -1431,10 +1427,21 @@ static void __update_vcpu_system_time(st
>>       * or if it further requires monotonicity checks with other vcpus.
>>       */
>>      if ( clocksource_is_tsc() )
>> -        _u.flags |= XEN_PVCLOCK_TSC_STABLE_BIT;
>> +        u->flags |= XEN_PVCLOCK_TSC_STABLE_BIT;
>>  
>>      if ( is_hvm_domain(d) )
>> -        _u.tsc_timestamp += v->arch.hvm.cache_tsc_offset;
>> +        u->tsc_timestamp += v->arch.hvm.cache_tsc_offset;
> 
> This path is subject to type confusion on v->arch.{pv,hvm}, with a PV
> guest able to encode the value of v->arch.pv.ctrlreg[5] into the
> timestamp.  But again, no way to dereference the result.
> 
> 
> I really don't think there's enough flexibility here for even a
> perfectly-timed attacker to abuse.
> 
> ~Andrew