[PATCH] amd-iommu: Fix Guest CR3 Table following c/s 3a7947b6901

Andrew Cooper posted 1 patch 1 week, 1 day ago
Failed in applying to current master (apply log)
xen/drivers/passthrough/amd/iommu.h       |  2 --
xen/drivers/passthrough/amd/iommu_guest.c | 36 +++++++++++++++++++++++++++----
xen/drivers/passthrough/amd/iommu_map.c   | 21 ------------------
3 files changed, 32 insertions(+), 27 deletions(-)

[PATCH] amd-iommu: Fix Guest CR3 Table following c/s 3a7947b6901

Posted by Andrew Cooper 1 week, 1 day ago
"amd-iommu: use a bitfield for DTE" renamed iommu_dte_set_guest_cr3()'s gcr3
parameter to gcr3_mfn but ended up with an off-by-PAGE_SIZE error when
extracting bits from the address.

First of all, get_guest_cr3_from_dte() and iommu_dte_set_guest_cr3()
are (almost) getters and setters for the same field, so should live together.

Rename them to dte_{get,set}_gcr3_table() to specifically avoid 'guest_cr3' in
the name.  This field actually points to a table in memory containing an array
of guest CR3 values.  As these functions are used for different logical
indirections, they shouldn't use gfn/mfn terminology for their parameters.
Switch them to use straight uint64_t full addresses.

Fixes: 3a7947b6901 ("amd-iommu: use a bitfield for DTE")
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Wei Liu <wei.liu2@citrix.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Paul Durrant <paul.durrant@citrix.com>

Rebase over several years worth of changes.

This code is unreachable, so completely untestable, but I think the end result
is better than it was previously.
---
 xen/drivers/passthrough/amd/iommu.h       |  2 --
 xen/drivers/passthrough/amd/iommu_guest.c | 36 +++++++++++++++++++++++++++----
 xen/drivers/passthrough/amd/iommu_map.c   | 21 ------------------
 3 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/xen/drivers/passthrough/amd/iommu.h b/xen/drivers/passthrough/amd/iommu.h
index 28a44ceb85..ad089cb095 100644
--- a/xen/drivers/passthrough/amd/iommu.h
+++ b/xen/drivers/passthrough/amd/iommu.h
@@ -246,8 +246,6 @@ void amd_iommu_set_root_page_table(struct amd_iommu_dte *dte,
 				   uint8_t paging_mode, bool valid);
 void iommu_dte_add_device_entry(struct amd_iommu_dte *dte,
                                 const struct ivrs_mappings *ivrs_dev);
-void iommu_dte_set_guest_cr3(struct amd_iommu_dte *dte, uint16_t dom_id,
-                             uint64_t gcr3_mfn, bool gv, uint8_t glx);
 
 /* send cmd to iommu */
 void amd_iommu_flush_all_pages(struct domain *d);
diff --git a/xen/drivers/passthrough/amd/iommu_guest.c b/xen/drivers/passthrough/amd/iommu_guest.c
index 2a3def9a5d..00c5ccd7b5 100644
--- a/xen/drivers/passthrough/amd/iommu_guest.c
+++ b/xen/drivers/passthrough/amd/iommu_guest.c
@@ -68,11 +68,39 @@ static void guest_iommu_disable(struct guest_iommu *iommu)
     iommu->enabled = 0;
 }
 
-static uint64_t get_guest_cr3_from_dte(struct amd_iommu_dte *dte)
+/*
+ * The Guest CR3 Table is a table written by the guest kernel, pointing at
+ * gCR3 values for PASID transactions to use.  The Device Table Entry points
+ * at a system physical address.
+ *
+ * However, these helpers deliberately use untyped parameters without
+ * reference to gfn/mfn because they are used both for programming the real
+ * IOMMU, and interpreting a guests programming of its vIOMMU.
+ */
+static uint64_t dte_get_gcr3_table(const struct amd_iommu_dte *dte)
 {
     return (((uint64_t)dte->gcr3_trp_51_31 << 31) |
             (dte->gcr3_trp_30_15 << 15) |
-            (dte->gcr3_trp_14_12 << 12)) >> PAGE_SHIFT;
+            (dte->gcr3_trp_14_12 << 12));
+}
+
+static void dte_set_gcr3_table(struct amd_iommu_dte *dte, uint16_t dom_id,
+                               uint64_t addr, bool gv, uint8_t glx)
+{
+#define GCR3_MASK(hi, lo) (((1ul << ((hi) + 1)) - 1) & ~((1ul << (lo)) - 1))
+
+    /* I bit must be set when gcr3 is enabled */
+    dte->i = true;
+
+    dte->gcr3_trp_14_12 = MASK_EXTR(addr, GCR3_MASK(14, 12));
+    dte->gcr3_trp_30_15 = MASK_EXTR(addr, GCR3_MASK(30, 15));
+    dte->gcr3_trp_51_31 = MASK_EXTR(addr, GCR3_MASK(51, 31));
+
+    dte->domain_id = dom_id;
+    dte->glx = glx;
+    dte->gv = gv;
+
+#undef GCR3_MASK
 }
 
 static unsigned int host_domid(struct domain *d, uint64_t g_domid)
@@ -389,7 +417,7 @@ static int do_invalidate_dte(struct domain *d, cmd_entry_t *cmd)
     gdte = &dte_base[gbdf % (PAGE_SIZE / sizeof(struct amd_iommu_dte))];
 
     gdom_id = gdte->domain_id;
-    gcr3_gfn = get_guest_cr3_from_dte(gdte);
+    gcr3_gfn = dte_get_gcr3_table(gdte) >> PAGE_SHIFT;
     glx = gdte->glx;
     gv = gdte->gv;
 
@@ -419,7 +447,7 @@ static int do_invalidate_dte(struct domain *d, cmd_entry_t *cmd)
     mdte = &dte_base[req_id];
 
     spin_lock_irqsave(&iommu->lock, flags);
-    iommu_dte_set_guest_cr3(mdte, hdom_id, gcr3_mfn, gv, glx);
+    dte_set_gcr3_table(mdte, hdom_id, gcr3_mfn << PAGE_SHIFT, gv, glx);
 
     amd_iommu_flush_device(iommu, req_id);
     spin_unlock_irqrestore(&iommu->lock, flags);
diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c
index f773ab33fd..d3a8b1aec7 100644
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -173,27 +173,6 @@ void __init iommu_dte_add_device_entry(struct amd_iommu_dte *dte,
     };
 }
 
-void iommu_dte_set_guest_cr3(struct amd_iommu_dte *dte, uint16_t dom_id,
-                             uint64_t gcr3_mfn, bool gv, uint8_t glx)
-{
-#define GCR3_MASK(hi, lo) (((1ul << ((hi) + 1)) - 1) & ~((1ul << (lo)) - 1))
-#define GCR3_SHIFT(lo) ((lo) - PAGE_SHIFT)
-
-    /* I bit must be set when gcr3 is enabled */
-    dte->i = true;
-
-    dte->gcr3_trp_14_12 = (gcr3_mfn & GCR3_MASK(14, 12)) >> GCR3_SHIFT(12);
-    dte->gcr3_trp_30_15 = (gcr3_mfn & GCR3_MASK(30, 15)) >> GCR3_SHIFT(15);
-    dte->gcr3_trp_51_31 = (gcr3_mfn & GCR3_MASK(51, 31)) >> GCR3_SHIFT(31);
-
-    dte->domain_id = dom_id;
-    dte->glx = glx;
-    dte->gv = gv;
-
-#undef GCR3_SHIFT
-#undef GCR3_MASK
-}
-
 /* Walk io page tables and build level page tables if necessary
  * {Re, un}mapping super page frames causes re-allocation of io
  * page tables.
-- 
2.11.0


Re: [PATCH] amd-iommu: Fix Guest CR3 Table following c/s 3a7947b6901

Posted by Jan Beulich 1 week, 1 day ago
On 20.11.2020 15:19, Andrew Cooper wrote:
> "amd-iommu: use a bitfield for DTE" renamed iommu_dte_set_guest_cr3()'s gcr3
> parameter to gcr3_mfn but ended up with an off-by-PAGE_SIZE error when
> extracting bits from the address.
> 
> First of all, get_guest_cr3_from_dte() and iommu_dte_set_guest_cr3()
> are (almost) getters and setters for the same field, so should live together.
> 
> Rename them to dte_{get,set}_gcr3_table() to specifically avoid 'guest_cr3' in
> the name.  This field actually points to a table in memory containing an array
> of guest CR3 values.  As these functions are used for different logical
> indirections, they shouldn't use gfn/mfn terminology for their parameters.
> Switch them to use straight uint64_t full addresses.

All of this still looks to belong to "First of all ..." - did you
mean to have more in here, but forgot to actually put it in?

> --- a/xen/drivers/passthrough/amd/iommu_guest.c
> +++ b/xen/drivers/passthrough/amd/iommu_guest.c
> @@ -68,11 +68,39 @@ static void guest_iommu_disable(struct guest_iommu *iommu)
>      iommu->enabled = 0;
>  }
>  
> -static uint64_t get_guest_cr3_from_dte(struct amd_iommu_dte *dte)
> +/*
> + * The Guest CR3 Table is a table written by the guest kernel, pointing at
> + * gCR3 values for PASID transactions to use.  The Device Table Entry points
> + * at a system physical address.
> + *
> + * However, these helpers deliberately use untyped parameters without
> + * reference to gfn/mfn because they are used both for programming the real
> + * IOMMU, and interpreting a guests programming of its vIOMMU.
> + */
> +static uint64_t dte_get_gcr3_table(const struct amd_iommu_dte *dte)
>  {
>      return (((uint64_t)dte->gcr3_trp_51_31 << 31) |
>              (dte->gcr3_trp_30_15 << 15) |
> -            (dte->gcr3_trp_14_12 << 12)) >> PAGE_SHIFT;
> +            (dte->gcr3_trp_14_12 << 12));
> +}
> +
> +static void dte_set_gcr3_table(struct amd_iommu_dte *dte, uint16_t dom_id,
> +                               uint64_t addr, bool gv, uint8_t glx)
> +{
> +#define GCR3_MASK(hi, lo) (((1ul << ((hi) + 1)) - 1) & ~((1ul << (lo)) - 1))
> +
> +    /* I bit must be set when gcr3 is enabled */
> +    dte->i = true;
> +
> +    dte->gcr3_trp_14_12 = MASK_EXTR(addr, GCR3_MASK(14, 12));
> +    dte->gcr3_trp_30_15 = MASK_EXTR(addr, GCR3_MASK(30, 15));
> +    dte->gcr3_trp_51_31 = MASK_EXTR(addr, GCR3_MASK(51, 31));
> +
> +    dte->domain_id = dom_id;
> +    dte->glx = glx;
> +    dte->gv = gv;
> +
> +#undef GCR3_MASK
>  }

I realize the question is somewhat unrelated, but aren't we updating
a live DTE here? If so, are there no ordering requirements between the
writes? Might be worth putting in barrier(s) right on this occasion.

Jan

Re: [PATCH] amd-iommu: Fix Guest CR3 Table following c/s 3a7947b6901

Posted by Andrew Cooper 1 week, 1 day ago
On 20/11/2020 14:32, Jan Beulich wrote:
> On 20.11.2020 15:19, Andrew Cooper wrote:
>> "amd-iommu: use a bitfield for DTE" renamed iommu_dte_set_guest_cr3()'s gcr3
>> parameter to gcr3_mfn but ended up with an off-by-PAGE_SIZE error when
>> extracting bits from the address.
>>
>> First of all, get_guest_cr3_from_dte() and iommu_dte_set_guest_cr3()
>> are (almost) getters and setters for the same field, so should live together.
>>
>> Rename them to dte_{get,set}_gcr3_table() to specifically avoid 'guest_cr3' in
>> the name.  This field actually points to a table in memory containing an array
>> of guest CR3 values.  As these functions are used for different logical
>> indirections, they shouldn't use gfn/mfn terminology for their parameters.
>> Switch them to use straight uint64_t full addresses.
> All of this still looks to belong to "First of all ..." - did you
> mean to have more in here, but forgot to actually put it in?

No - I deleted the bit which has caused this to be blocked on minutia
for nearly 2 years.l

>
>> --- a/xen/drivers/passthrough/amd/iommu_guest.c
>> +++ b/xen/drivers/passthrough/amd/iommu_guest.c
>> @@ -68,11 +68,39 @@ static void guest_iommu_disable(struct guest_iommu *iommu)
>>      iommu->enabled = 0;
>>  }
>>  
>> -static uint64_t get_guest_cr3_from_dte(struct amd_iommu_dte *dte)
>> +/*
>> + * The Guest CR3 Table is a table written by the guest kernel, pointing at
>> + * gCR3 values for PASID transactions to use.  The Device Table Entry points
>> + * at a system physical address.
>> + *
>> + * However, these helpers deliberately use untyped parameters without
>> + * reference to gfn/mfn because they are used both for programming the real
>> + * IOMMU, and interpreting a guests programming of its vIOMMU.
>> + */
>> +static uint64_t dte_get_gcr3_table(const struct amd_iommu_dte *dte)
>>  {
>>      return (((uint64_t)dte->gcr3_trp_51_31 << 31) |
>>              (dte->gcr3_trp_30_15 << 15) |
>> -            (dte->gcr3_trp_14_12 << 12)) >> PAGE_SHIFT;
>> +            (dte->gcr3_trp_14_12 << 12));
>> +}
>> +
>> +static void dte_set_gcr3_table(struct amd_iommu_dte *dte, uint16_t dom_id,
>> +                               uint64_t addr, bool gv, uint8_t glx)
>> +{
>> +#define GCR3_MASK(hi, lo) (((1ul << ((hi) + 1)) - 1) & ~((1ul << (lo)) - 1))
>> +
>> +    /* I bit must be set when gcr3 is enabled */
>> +    dte->i = true;
>> +
>> +    dte->gcr3_trp_14_12 = MASK_EXTR(addr, GCR3_MASK(14, 12));
>> +    dte->gcr3_trp_30_15 = MASK_EXTR(addr, GCR3_MASK(30, 15));
>> +    dte->gcr3_trp_51_31 = MASK_EXTR(addr, GCR3_MASK(51, 31));
>> +
>> +    dte->domain_id = dom_id;
>> +    dte->glx = glx;
>> +    dte->gv = gv;
>> +
>> +#undef GCR3_MASK
>>  }
> I realize the question is somewhat unrelated, but aren't we updating
> a live DTE here? If so, are there no ordering requirements between the
> writes? Might be worth putting in barrier(s) right on this occasion.

I don't know.  Honestly, its not relevant either as this is code motion.

This entire file is full of security holes.  None of it is fit for use
in its current form.

~Andrew

Re: [PATCH] amd-iommu: Fix Guest CR3 Table following c/s 3a7947b6901

Posted by Jan Beulich 1 week, 1 day ago
On 20.11.2020 15:37, Andrew Cooper wrote:
> On 20/11/2020 14:32, Jan Beulich wrote:
>> On 20.11.2020 15:19, Andrew Cooper wrote:
>>> --- a/xen/drivers/passthrough/amd/iommu_guest.c
>>> +++ b/xen/drivers/passthrough/amd/iommu_guest.c
>>> @@ -68,11 +68,39 @@ static void guest_iommu_disable(struct guest_iommu *iommu)
>>>      iommu->enabled = 0;
>>>  }
>>>  
>>> -static uint64_t get_guest_cr3_from_dte(struct amd_iommu_dte *dte)
>>> +/*
>>> + * The Guest CR3 Table is a table written by the guest kernel, pointing at
>>> + * gCR3 values for PASID transactions to use.  The Device Table Entry points
>>> + * at a system physical address.
>>> + *
>>> + * However, these helpers deliberately use untyped parameters without
>>> + * reference to gfn/mfn because they are used both for programming the real
>>> + * IOMMU, and interpreting a guests programming of its vIOMMU.
>>> + */
>>> +static uint64_t dte_get_gcr3_table(const struct amd_iommu_dte *dte)
>>>  {
>>>      return (((uint64_t)dte->gcr3_trp_51_31 << 31) |
>>>              (dte->gcr3_trp_30_15 << 15) |
>>> -            (dte->gcr3_trp_14_12 << 12)) >> PAGE_SHIFT;
>>> +            (dte->gcr3_trp_14_12 << 12));
>>> +}
>>> +
>>> +static void dte_set_gcr3_table(struct amd_iommu_dte *dte, uint16_t dom_id,
>>> +                               uint64_t addr, bool gv, uint8_t glx)
>>> +{
>>> +#define GCR3_MASK(hi, lo) (((1ul << ((hi) + 1)) - 1) & ~((1ul << (lo)) - 1))
>>> +
>>> +    /* I bit must be set when gcr3 is enabled */
>>> +    dte->i = true;
>>> +
>>> +    dte->gcr3_trp_14_12 = MASK_EXTR(addr, GCR3_MASK(14, 12));
>>> +    dte->gcr3_trp_30_15 = MASK_EXTR(addr, GCR3_MASK(30, 15));
>>> +    dte->gcr3_trp_51_31 = MASK_EXTR(addr, GCR3_MASK(51, 31));
>>> +
>>> +    dte->domain_id = dom_id;
>>> +    dte->glx = glx;
>>> +    dte->gv = gv;
>>> +
>>> +#undef GCR3_MASK
>>>  }
>> I realize the question is somewhat unrelated, but aren't we updating
>> a live DTE here? If so, are there no ordering requirements between the
>> writes? Might be worth putting in barrier(s) right on this occasion.
> 
> I don't know.  Honestly, its not relevant either as this is code motion.

Well, okay:
Acked-by: Jan Beulich <jbeulich@suse.com>

> This entire file is full of security holes.  None of it is fit for use
> in its current form.

We're all aware of this, I think.

Jan