amd_iommu_set_root_page_table chooses between updating atomically
and non-atomically depending on whether the DTE is active or not.
This choice existed mostly because cx16 wasn't supposed always available
until [1]. Thus we don't need to threat the non-atomic path in a special
way anymore.
By rearranging slightly the atomic path, we can make it cover all the cases
which improves the code generation at the expense of systematically performing
cmpxchg16b.
Also remove unused raw64 fields of ldte, and the deprecated comment as the
function actually behaves in a more usual way and can't return >0.
[1] 2636fcdc15c7 "x86/iommu: check for CMPXCHG16B when enabling IOMMU"
Signed-off-by: Teddy Astie <teddy.astie@vates.tech>
---
xen/drivers/passthrough/amd/iommu_map.c | 78 ++++++++-----------------
1 file changed, 25 insertions(+), 53 deletions(-)
diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c
index 320a2dc64c..e3165d93aa 100644
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -154,69 +154,41 @@ static void set_iommu_ptes_present(unsigned long pt_mfn,
unmap_domain_page(table);
}
-/*
- * This function returns
- * - -errno for errors,
- * - 0 for a successful update, atomic when necessary
- * - 1 for a successful but non-atomic update, which may need to be warned
- * about by the caller.
- */
int amd_iommu_set_root_page_table(struct amd_iommu_dte *dte,
uint64_t root_ptr, uint16_t domain_id,
uint8_t paging_mode, unsigned int flags)
{
bool valid = flags & SET_ROOT_VALID;
- if ( dte->v && dte->tv )
- {
- union {
- struct amd_iommu_dte dte;
- uint64_t raw64[4];
- __uint128_t raw128[2];
- } ldte = { .dte = *dte };
- __uint128_t res, old = ldte.raw128[0];
- int ret = 0;
-
- ldte.dte.domain_id = domain_id;
- ldte.dte.pt_root = paddr_to_pfn(root_ptr);
- ldte.dte.iw = true;
- ldte.dte.ir = true;
- ldte.dte.paging_mode = paging_mode;
- ldte.dte.v = valid;
-
- res = cmpxchg16b(dte, &old, &ldte.raw128[0]);
-
- /*
- * Hardware does not update the DTE behind our backs, so the
- * return value should match "old".
- */
- if ( res != old )
- {
- printk(XENLOG_ERR
- "Dom%d: unexpected DTE %016lx_%016lx (expected %016lx_%016lx)\n",
- domain_id,
- (uint64_t)(res >> 64), (uint64_t)res,
- (uint64_t)(old >> 64), (uint64_t)old);
- ret = -EILSEQ;
- }
+ union {
+ struct amd_iommu_dte dte;
+ __uint128_t raw128[2];
+ } ldte = { .dte = *dte };
+ __uint128_t res, old = ldte.raw128[0];
- return ret;
- }
+ ldte.dte.domain_id = domain_id;
+ ldte.dte.pt_root = paddr_to_pfn(root_ptr);
+ ldte.dte.iw = true;
+ ldte.dte.ir = true;
+ ldte.dte.paging_mode = paging_mode;
+ ldte.dte.tv = true;
+ ldte.dte.v = valid;
+
+ res = cmpxchg16b(dte, &old, &ldte.raw128[0]);
- if ( valid || dte->v )
+ /*
+ * Hardware does not update the DTE behind our backs, so the
+ * return value should match "old".
+ */
+ if ( res != old )
{
- dte->tv = false;
- dte->v = true;
- smp_wmb();
+ printk(XENLOG_ERR
+ "Dom%d: unexpected DTE %016lx_%016lx (expected %016lx_%016lx)\n",
+ domain_id,
+ (uint64_t)(res >> 64), (uint64_t)res,
+ (uint64_t)(old >> 64), (uint64_t)old);
+ return -EILSEQ;
}
- dte->domain_id = domain_id;
- dte->pt_root = paddr_to_pfn(root_ptr);
- dte->iw = true;
- dte->ir = true;
- dte->paging_mode = paging_mode;
- smp_wmb();
- dte->tv = true;
- dte->v = valid;
return 0;
}
--
2.51.2
--
Teddy Astie | Vates XCP-ng Developer
XCP-ng & Xen Orchestra - Vates solutions
web: https://vates.tech
On 12.11.2025 16:37, Teddy Astie wrote:
> amd_iommu_set_root_page_table chooses between updating atomically
> and non-atomically depending on whether the DTE is active or not.
>
> This choice existed mostly because cx16 wasn't supposed always available
> until [1]. Thus we don't need to threat the non-atomic path in a special
> way anymore.
>
> By rearranging slightly the atomic path, we can make it cover all the cases
> which improves the code generation at the expense of systematically performing
> cmpxchg16b.
>
> Also remove unused raw64 fields of ldte, and the deprecated comment as the
> function actually behaves in a more usual way and can't return >0.
>
> [1] 2636fcdc15c7 "x86/iommu: check for CMPXCHG16B when enabling IOMMU"
>
> Signed-off-by: Teddy Astie <teddy.astie@vates.tech>
> ---
> xen/drivers/passthrough/amd/iommu_map.c | 78 ++++++++-----------------
> 1 file changed, 25 insertions(+), 53 deletions(-)
>
> diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c
> index 320a2dc64c..e3165d93aa 100644
> --- a/xen/drivers/passthrough/amd/iommu_map.c
> +++ b/xen/drivers/passthrough/amd/iommu_map.c
> @@ -154,69 +154,41 @@ static void set_iommu_ptes_present(unsigned long pt_mfn,
> unmap_domain_page(table);
> }
>
> -/*
> - * This function returns
> - * - -errno for errors,
> - * - 0 for a successful update, atomic when necessary
> - * - 1 for a successful but non-atomic update, which may need to be warned
> - * about by the caller.
> - */
> int amd_iommu_set_root_page_table(struct amd_iommu_dte *dte,
> uint64_t root_ptr, uint16_t domain_id,
> uint8_t paging_mode, unsigned int flags)
> {
> bool valid = flags & SET_ROOT_VALID;
>
> - if ( dte->v && dte->tv )
> - {
> - union {
> - struct amd_iommu_dte dte;
> - uint64_t raw64[4];
> - __uint128_t raw128[2];
> - } ldte = { .dte = *dte };
> - __uint128_t res, old = ldte.raw128[0];
> - int ret = 0;
> -
> - ldte.dte.domain_id = domain_id;
> - ldte.dte.pt_root = paddr_to_pfn(root_ptr);
> - ldte.dte.iw = true;
> - ldte.dte.ir = true;
> - ldte.dte.paging_mode = paging_mode;
> - ldte.dte.v = valid;
> -
> - res = cmpxchg16b(dte, &old, &ldte.raw128[0]);
> -
> - /*
> - * Hardware does not update the DTE behind our backs, so the
> - * return value should match "old".
> - */
> - if ( res != old )
> - {
> - printk(XENLOG_ERR
> - "Dom%d: unexpected DTE %016lx_%016lx (expected %016lx_%016lx)\n",
> - domain_id,
> - (uint64_t)(res >> 64), (uint64_t)res,
> - (uint64_t)(old >> 64), (uint64_t)old);
> - ret = -EILSEQ;
> - }
> + union {
> + struct amd_iommu_dte dte;
> + __uint128_t raw128[2];
> + } ldte = { .dte = *dte };
> + __uint128_t res, old = ldte.raw128[0];
>
> - return ret;
> - }
> + ldte.dte.domain_id = domain_id;
> + ldte.dte.pt_root = paddr_to_pfn(root_ptr);
> + ldte.dte.iw = true;
> + ldte.dte.ir = true;
> + ldte.dte.paging_mode = paging_mode;
> + ldte.dte.tv = true;
> + ldte.dte.v = valid;
> +
> + res = cmpxchg16b(dte, &old, &ldte.raw128[0]);
>
> - if ( valid || dte->v )
> + /*
> + * Hardware does not update the DTE behind our backs, so the
> + * return value should match "old".
> + */
> + if ( res != old )
> {
> - dte->tv = false;
> - dte->v = true;
> - smp_wmb();
> + printk(XENLOG_ERR
> + "Dom%d: unexpected DTE %016lx_%016lx (expected %016lx_%016lx)\n",
> + domain_id,
> + (uint64_t)(res >> 64), (uint64_t)res,
> + (uint64_t)(old >> 64), (uint64_t)old);
Indentation is now off by 1 here.
> + return -EILSEQ;
The downside of this is that all updates can now take this path. Yes, this shouldn't
be possible to be taken, but it's a (minor) concern nevertheless. At the very least
such a downside wants, imo, mentioning in the description, even if for nothing else
than to make clear that it was a deliberate choice.
Jan
Le 13/11/2025 à 12:38, Jan Beulich a écrit :
> On 12.11.2025 16:37, Teddy Astie wrote:
>> amd_iommu_set_root_page_table chooses between updating atomically
>> and non-atomically depending on whether the DTE is active or not.
>>
>> This choice existed mostly because cx16 wasn't supposed always available
>> until [1]. Thus we don't need to threat the non-atomic path in a special
>> way anymore.
>>
>> By rearranging slightly the atomic path, we can make it cover all the cases
>> which improves the code generation at the expense of systematically performing
>> cmpxchg16b.
>>
>> Also remove unused raw64 fields of ldte, and the deprecated comment as the
>> function actually behaves in a more usual way and can't return >0.
>>
>> [1] 2636fcdc15c7 "x86/iommu: check for CMPXCHG16B when enabling IOMMU"
>>
>> Signed-off-by: Teddy Astie <teddy.astie@vates.tech>
>> ---
>> xen/drivers/passthrough/amd/iommu_map.c | 78 ++++++++-----------------
>> 1 file changed, 25 insertions(+), 53 deletions(-)
>>
>> diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c
>> index 320a2dc64c..e3165d93aa 100644
>> --- a/xen/drivers/passthrough/amd/iommu_map.c
>> +++ b/xen/drivers/passthrough/amd/iommu_map.c
>> @@ -154,69 +154,41 @@ static void set_iommu_ptes_present(unsigned long pt_mfn,
>> unmap_domain_page(table);
>> }
>>
>> -/*
>> - * This function returns
>> - * - -errno for errors,
>> - * - 0 for a successful update, atomic when necessary
>> - * - 1 for a successful but non-atomic update, which may need to be warned
>> - * about by the caller.
>> - */
>> int amd_iommu_set_root_page_table(struct amd_iommu_dte *dte,
>> uint64_t root_ptr, uint16_t domain_id,
>> uint8_t paging_mode, unsigned int flags)
>> {
>> bool valid = flags & SET_ROOT_VALID;
>>
>> - if ( dte->v && dte->tv )
>> - {
>> - union {
>> - struct amd_iommu_dte dte;
>> - uint64_t raw64[4];
>> - __uint128_t raw128[2];
>> - } ldte = { .dte = *dte };
>> - __uint128_t res, old = ldte.raw128[0];
>> - int ret = 0;
>> -
>> - ldte.dte.domain_id = domain_id;
>> - ldte.dte.pt_root = paddr_to_pfn(root_ptr);
>> - ldte.dte.iw = true;
>> - ldte.dte.ir = true;
>> - ldte.dte.paging_mode = paging_mode;
>> - ldte.dte.v = valid;
>> -
>> - res = cmpxchg16b(dte, &old, &ldte.raw128[0]);
>> -
>> - /*
>> - * Hardware does not update the DTE behind our backs, so the
>> - * return value should match "old".
>> - */
>> - if ( res != old )
>> - {
>> - printk(XENLOG_ERR
>> - "Dom%d: unexpected DTE %016lx_%016lx (expected %016lx_%016lx)\n",
>> - domain_id,
>> - (uint64_t)(res >> 64), (uint64_t)res,
>> - (uint64_t)(old >> 64), (uint64_t)old);
>> - ret = -EILSEQ;
>> - }
>> + union {
>> + struct amd_iommu_dte dte;
>> + __uint128_t raw128[2];
>> + } ldte = { .dte = *dte };
>> + __uint128_t res, old = ldte.raw128[0];
>>
>> - return ret;
>> - }
>> + ldte.dte.domain_id = domain_id;
>> + ldte.dte.pt_root = paddr_to_pfn(root_ptr);
>> + ldte.dte.iw = true;
>> + ldte.dte.ir = true;
>> + ldte.dte.paging_mode = paging_mode;
>> + ldte.dte.tv = true;
>> + ldte.dte.v = valid;
>> +
>> + res = cmpxchg16b(dte, &old, &ldte.raw128[0]);
>>
>> - if ( valid || dte->v )
>> + /*
>> + * Hardware does not update the DTE behind our backs, so the
>> + * return value should match "old".
>> + */
>> + if ( res != old )
>> {
>> - dte->tv = false;
>> - dte->v = true;
>> - smp_wmb();
>> + printk(XENLOG_ERR
>> + "Dom%d: unexpected DTE %016lx_%016lx (expected %016lx_%016lx)\n",
>> + domain_id,
>> + (uint64_t)(res >> 64), (uint64_t)res,
>> + (uint64_t)(old >> 64), (uint64_t)old);
>
> Indentation is now off by 1 here.
>
>> + return -EILSEQ;
>
> The downside of this is that all updates can now take this path. Yes, this shouldn't
> be possible to be taken, but it's a (minor) concern nevertheless. At the very least
> such a downside wants, imo, mentioning in the description, even if for nothing else
> than to make clear that it was a deliberate choice.
>
I only expect to see this path in case of a bug (recoverable here),
which is only checked in the res != old case. But if something similar
occurs in the non-atomic path, then nothing good will happen as such
checks cannot be implemented properly.
Perhaps we want to emphasis this :
"The race check is now always made instead of only being made for the
atomic path. This specific path should be triggered under normal
circumstances, and is very likely a bug."
And wrap the res != old inside a unlikely() to insist on this aspect.
> Jan
>
--
Teddy Astie | Vates XCP-ng Developer
XCP-ng & Xen Orchestra - Vates solutions
web: https://vates.tech
On 17.11.2025 14:49, Teddy Astie wrote:
> Le 13/11/2025 à 12:38, Jan Beulich a écrit :
>> On 12.11.2025 16:37, Teddy Astie wrote:
>>> amd_iommu_set_root_page_table chooses between updating atomically
>>> and non-atomically depending on whether the DTE is active or not.
>>>
>>> This choice existed mostly because cx16 wasn't supposed always available
>>> until [1]. Thus we don't need to threat the non-atomic path in a special
>>> way anymore.
>>>
>>> By rearranging slightly the atomic path, we can make it cover all the cases
>>> which improves the code generation at the expense of systematically performing
>>> cmpxchg16b.
>>>
>>> Also remove unused raw64 fields of ldte, and the deprecated comment as the
>>> function actually behaves in a more usual way and can't return >0.
>>>
>>> [1] 2636fcdc15c7 "x86/iommu: check for CMPXCHG16B when enabling IOMMU"
>>>
>>> Signed-off-by: Teddy Astie <teddy.astie@vates.tech>
>>> ---
>>> xen/drivers/passthrough/amd/iommu_map.c | 78 ++++++++-----------------
>>> 1 file changed, 25 insertions(+), 53 deletions(-)
>>>
>>> diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c
>>> index 320a2dc64c..e3165d93aa 100644
>>> --- a/xen/drivers/passthrough/amd/iommu_map.c
>>> +++ b/xen/drivers/passthrough/amd/iommu_map.c
>>> @@ -154,69 +154,41 @@ static void set_iommu_ptes_present(unsigned long pt_mfn,
>>> unmap_domain_page(table);
>>> }
>>>
>>> -/*
>>> - * This function returns
>>> - * - -errno for errors,
>>> - * - 0 for a successful update, atomic when necessary
>>> - * - 1 for a successful but non-atomic update, which may need to be warned
>>> - * about by the caller.
>>> - */
>>> int amd_iommu_set_root_page_table(struct amd_iommu_dte *dte,
>>> uint64_t root_ptr, uint16_t domain_id,
>>> uint8_t paging_mode, unsigned int flags)
>>> {
>>> bool valid = flags & SET_ROOT_VALID;
>>>
>>> - if ( dte->v && dte->tv )
>>> - {
>>> - union {
>>> - struct amd_iommu_dte dte;
>>> - uint64_t raw64[4];
>>> - __uint128_t raw128[2];
>>> - } ldte = { .dte = *dte };
>>> - __uint128_t res, old = ldte.raw128[0];
>>> - int ret = 0;
>>> -
>>> - ldte.dte.domain_id = domain_id;
>>> - ldte.dte.pt_root = paddr_to_pfn(root_ptr);
>>> - ldte.dte.iw = true;
>>> - ldte.dte.ir = true;
>>> - ldte.dte.paging_mode = paging_mode;
>>> - ldte.dte.v = valid;
>>> -
>>> - res = cmpxchg16b(dte, &old, &ldte.raw128[0]);
>>> -
>>> - /*
>>> - * Hardware does not update the DTE behind our backs, so the
>>> - * return value should match "old".
>>> - */
>>> - if ( res != old )
>>> - {
>>> - printk(XENLOG_ERR
>>> - "Dom%d: unexpected DTE %016lx_%016lx (expected %016lx_%016lx)\n",
>>> - domain_id,
>>> - (uint64_t)(res >> 64), (uint64_t)res,
>>> - (uint64_t)(old >> 64), (uint64_t)old);
>>> - ret = -EILSEQ;
>>> - }
>>> + union {
>>> + struct amd_iommu_dte dte;
>>> + __uint128_t raw128[2];
>>> + } ldte = { .dte = *dte };
>>> + __uint128_t res, old = ldte.raw128[0];
>>>
>>> - return ret;
>>> - }
>>> + ldte.dte.domain_id = domain_id;
>>> + ldte.dte.pt_root = paddr_to_pfn(root_ptr);
>>> + ldte.dte.iw = true;
>>> + ldte.dte.ir = true;
>>> + ldte.dte.paging_mode = paging_mode;
>>> + ldte.dte.tv = true;
>>> + ldte.dte.v = valid;
>>> +
>>> + res = cmpxchg16b(dte, &old, &ldte.raw128[0]);
>>>
>>> - if ( valid || dte->v )
>>> + /*
>>> + * Hardware does not update the DTE behind our backs, so the
>>> + * return value should match "old".
>>> + */
>>> + if ( res != old )
>>> {
>>> - dte->tv = false;
>>> - dte->v = true;
>>> - smp_wmb();
>>> + printk(XENLOG_ERR
>>> + "Dom%d: unexpected DTE %016lx_%016lx (expected %016lx_%016lx)\n",
>>> + domain_id,
>>> + (uint64_t)(res >> 64), (uint64_t)res,
>>> + (uint64_t)(old >> 64), (uint64_t)old);
>>
>> Indentation is now off by 1 here.
>>
>>> + return -EILSEQ;
>>
>> The downside of this is that all updates can now take this path. Yes, this shouldn't
>> be possible to be taken, but it's a (minor) concern nevertheless. At the very least
>> such a downside wants, imo, mentioning in the description, even if for nothing else
>> than to make clear that it was a deliberate choice.
>>
>
> I only expect to see this path in case of a bug (recoverable here),
> which is only checked in the res != old case. But if something similar
> occurs in the non-atomic path, then nothing good will happen as such
> checks cannot be implemented properly.
>
> Perhaps we want to emphasis this :
> "The race check is now always made instead of only being made for the
> atomic path. This specific path should be triggered under normal
> circumstances, and is very likely a bug."
s/should/shouldn't/ I suppose?
Jan
> And wrap the res != old inside a unlikely() to insist on this aspect.
> --
> Teddy Astie | Vates XCP-ng Developer
>
> XCP-ng & Xen Orchestra - Vates solutions
>
> web: https://vates.tech
>
>
Le 13/11/2025 à 12:38, Jan Beulich a écrit :
> On 12.11.2025 16:37, Teddy Astie wrote:
>> amd_iommu_set_root_page_table chooses between updating atomically
>> and non-atomically depending on whether the DTE is active or not.
>>
>> This choice existed mostly because cx16 wasn't supposed always available
>> until [1]. Thus we don't need to threat the non-atomic path in a special
>> way anymore.
>>
>> By rearranging slightly the atomic path, we can make it cover all the cases
>> which improves the code generation at the expense of systematically performing
>> cmpxchg16b.
>>
>> Also remove unused raw64 fields of ldte, and the deprecated comment as the
>> function actually behaves in a more usual way and can't return >0.
>>
>> [1] 2636fcdc15c7 "x86/iommu: check for CMPXCHG16B when enabling IOMMU"
>>
>> Signed-off-by: Teddy Astie <teddy.astie@vates.tech>
>> ---
>> xen/drivers/passthrough/amd/iommu_map.c | 78 ++++++++-----------------
>> 1 file changed, 25 insertions(+), 53 deletions(-)
>>
>> diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c
>> index 320a2dc64c..e3165d93aa 100644
>> --- a/xen/drivers/passthrough/amd/iommu_map.c
>> +++ b/xen/drivers/passthrough/amd/iommu_map.c
>> @@ -154,69 +154,41 @@ static void set_iommu_ptes_present(unsigned long pt_mfn,
>> unmap_domain_page(table);
>> }
>>
>> -/*
>> - * This function returns
>> - * - -errno for errors,
>> - * - 0 for a successful update, atomic when necessary
>> - * - 1 for a successful but non-atomic update, which may need to be warned
>> - * about by the caller.
>> - */
>> int amd_iommu_set_root_page_table(struct amd_iommu_dte *dte,
>> uint64_t root_ptr, uint16_t domain_id,
>> uint8_t paging_mode, unsigned int flags)
>> {
>> bool valid = flags & SET_ROOT_VALID;
>>
>> - if ( dte->v && dte->tv )
>> - {
>> - union {
>> - struct amd_iommu_dte dte;
>> - uint64_t raw64[4];
>> - __uint128_t raw128[2];
>> - } ldte = { .dte = *dte };
>> - __uint128_t res, old = ldte.raw128[0];
>> - int ret = 0;
>> -
>> - ldte.dte.domain_id = domain_id;
>> - ldte.dte.pt_root = paddr_to_pfn(root_ptr);
>> - ldte.dte.iw = true;
>> - ldte.dte.ir = true;
>> - ldte.dte.paging_mode = paging_mode;
>> - ldte.dte.v = valid;
>> -
>> - res = cmpxchg16b(dte, &old, &ldte.raw128[0]);
>> -
>> - /*
>> - * Hardware does not update the DTE behind our backs, so the
>> - * return value should match "old".
>> - */
>> - if ( res != old )
>> - {
>> - printk(XENLOG_ERR
>> - "Dom%d: unexpected DTE %016lx_%016lx (expected %016lx_%016lx)\n",
>> - domain_id,
>> - (uint64_t)(res >> 64), (uint64_t)res,
>> - (uint64_t)(old >> 64), (uint64_t)old);
>> - ret = -EILSEQ;
>> - }
>> + union {
>> + struct amd_iommu_dte dte;
>> + __uint128_t raw128[2];
>> + } ldte = { .dte = *dte };
>> + __uint128_t res, old = ldte.raw128[0];
>>
>> - return ret;
>> - }
>> + ldte.dte.domain_id = domain_id;
>> + ldte.dte.pt_root = paddr_to_pfn(root_ptr);
>> + ldte.dte.iw = true;
>> + ldte.dte.ir = true;
>> + ldte.dte.paging_mode = paging_mode;
>> + ldte.dte.tv = true;
>> + ldte.dte.v = valid;
>> +
>> + res = cmpxchg16b(dte, &old, &ldte.raw128[0]);
>>
>> - if ( valid || dte->v )
>> + /*
>> + * Hardware does not update the DTE behind our backs, so the
>> + * return value should match "old".
>> + */
>> + if ( res != old )
>> {
>> - dte->tv = false;
>> - dte->v = true;
>> - smp_wmb();
>> + printk(XENLOG_ERR
>> + "Dom%d: unexpected DTE %016lx_%016lx (expected %016lx_%016lx)\n",
>> + domain_id,
>> + (uint64_t)(res >> 64), (uint64_t)res,
>> + (uint64_t)(old >> 64), (uint64_t)old);
>
> Indentation is now off by 1 here.
>
>> + return -EILSEQ;
>
> The downside of this is that all updates can now take this path. Yes, this shouldn't
> be possible to be taken, but it's a (minor) concern nevertheless. At the very least
> such a downside wants, imo, mentioning in the description, even if for nothing else
> than to make clear that it was a deliberate choice.
>
Apparently it doesn't build (debian-bookworm-gcc-arm32-randconfig
catched it).
ARM does provide MAX_VIRT_CPUS and GUEST_MAX_VCPUS which is 128 or
lower, but that doesn't map (or not properly) with what we have in x86
(MAX_VIRT_CPUS=8192 is PV-specific, and GUEST_MAX_VCPUS doesn't exist).
I am not sure what to do, looks like many things are redundant here.
> Jan
>
--
Teddy Astie | Vates XCP-ng Developer
XCP-ng & Xen Orchestra - Vates solutions
web: https://vates.tech
Le 17/11/2025 à 14:05, Teddy Astie a écrit :
> Le 13/11/2025 à 12:38, Jan Beulich a écrit :
>> On 12.11.2025 16:37, Teddy Astie wrote:
>>> amd_iommu_set_root_page_table chooses between updating atomically
>>> and non-atomically depending on whether the DTE is active or not.
>>>
>>> This choice existed mostly because cx16 wasn't supposed always available
>>> until [1]. Thus we don't need to threat the non-atomic path in a special
>>> way anymore.
>>>
>>> By rearranging slightly the atomic path, we can make it cover all the
>>> cases
>>> which improves the code generation at the expense of systematically
>>> performing
>>> cmpxchg16b.
>>>
>>> Also remove unused raw64 fields of ldte, and the deprecated comment
>>> as the
>>> function actually behaves in a more usual way and can't return >0.
>>>
>>> [1] 2636fcdc15c7 "x86/iommu: check for CMPXCHG16B when enabling IOMMU"
>>>
>>> Signed-off-by: Teddy Astie <teddy.astie@vates.tech>
>>> ---
>>> xen/drivers/passthrough/amd/iommu_map.c | 78 ++++++++-----------------
>>> 1 file changed, 25 insertions(+), 53 deletions(-)
>>>
>>> diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/
>>> passthrough/amd/iommu_map.c
>>> index 320a2dc64c..e3165d93aa 100644
>>> --- a/xen/drivers/passthrough/amd/iommu_map.c
>>> +++ b/xen/drivers/passthrough/amd/iommu_map.c
>>> @@ -154,69 +154,41 @@ static void set_iommu_ptes_present(unsigned
>>> long pt_mfn,
>>> unmap_domain_page(table);
>>> }
>>> -/*
>>> - * This function returns
>>> - * - -errno for errors,
>>> - * - 0 for a successful update, atomic when necessary
>>> - * - 1 for a successful but non-atomic update, which may need to be
>>> warned
>>> - * about by the caller.
>>> - */
>>> int amd_iommu_set_root_page_table(struct amd_iommu_dte *dte,
>>> uint64_t root_ptr, uint16_t
>>> domain_id,
>>> uint8_t paging_mode, unsigned int
>>> flags)
>>> {
>>> bool valid = flags & SET_ROOT_VALID;
>>> - if ( dte->v && dte->tv )
>>> - {
>>> - union {
>>> - struct amd_iommu_dte dte;
>>> - uint64_t raw64[4];
>>> - __uint128_t raw128[2];
>>> - } ldte = { .dte = *dte };
>>> - __uint128_t res, old = ldte.raw128[0];
>>> - int ret = 0;
>>> -
>>> - ldte.dte.domain_id = domain_id;
>>> - ldte.dte.pt_root = paddr_to_pfn(root_ptr);
>>> - ldte.dte.iw = true;
>>> - ldte.dte.ir = true;
>>> - ldte.dte.paging_mode = paging_mode;
>>> - ldte.dte.v = valid;
>>> -
>>> - res = cmpxchg16b(dte, &old, &ldte.raw128[0]);
>>> -
>>> - /*
>>> - * Hardware does not update the DTE behind our backs, so the
>>> - * return value should match "old".
>>> - */
>>> - if ( res != old )
>>> - {
>>> - printk(XENLOG_ERR
>>> - "Dom%d: unexpected DTE %016lx_%016lx (expected
>>> %016lx_%016lx)\n",
>>> - domain_id,
>>> - (uint64_t)(res >> 64), (uint64_t)res,
>>> - (uint64_t)(old >> 64), (uint64_t)old);
>>> - ret = -EILSEQ;
>>> - }
>>> + union {
>>> + struct amd_iommu_dte dte;
>>> + __uint128_t raw128[2];
>>> + } ldte = { .dte = *dte };
>>> + __uint128_t res, old = ldte.raw128[0];
>>> - return ret;
>>> - }
>>> + ldte.dte.domain_id = domain_id;
>>> + ldte.dte.pt_root = paddr_to_pfn(root_ptr);
>>> + ldte.dte.iw = true;
>>> + ldte.dte.ir = true;
>>> + ldte.dte.paging_mode = paging_mode;
>>> + ldte.dte.tv = true;
>>> + ldte.dte.v = valid;
>>> +
>>> + res = cmpxchg16b(dte, &old, &ldte.raw128[0]);
>>> - if ( valid || dte->v )
>>> + /*
>>> + * Hardware does not update the DTE behind our backs, so the
>>> + * return value should match "old".
>>> + */
>>> + if ( res != old )
>>> {
>>> - dte->tv = false;
>>> - dte->v = true;
>>> - smp_wmb();
>>> + printk(XENLOG_ERR
>>> + "Dom%d: unexpected DTE %016lx_%016lx (expected
>>> %016lx_%016lx)\n",
>>> + domain_id,
>>> + (uint64_t)(res >> 64), (uint64_t)res,
>>> + (uint64_t)(old >> 64), (uint64_t)old);
>>
>> Indentation is now off by 1 here.
>>
>>> + return -EILSEQ;
>>
>> The downside of this is that all updates can now take this path. Yes,
>> this shouldn't
>> be possible to be taken, but it's a (minor) concern nevertheless. At
>> the very least
>> such a downside wants, imo, mentioning in the description, even if for
>> nothing else
>> than to make clear that it was a deliberate choice.
>>
>
> Apparently it doesn't build (debian-bookworm-gcc-arm32-randconfig
> catched it).
> ARM does provide MAX_VIRT_CPUS and GUEST_MAX_VCPUS which is 128 or
> lower, but that doesn't map (or not properly) with what we have in x86
> (MAX_VIRT_CPUS=8192 is PV-specific, and GUEST_MAX_VCPUS doesn't exist).
>
> I am not sure what to do, looks like many things are redundant here.
>
>> Jan
>>
>
Oops, sent the wrong message. Please ignore.
--
Teddy Astie | Vates XCP-ng Developer
XCP-ng & Xen Orchestra - Vates solutions
web: https://vates.tech
© 2016 - 2025 Red Hat, Inc.