When updating context table entries, we do take care to set the present
bit as the last step, i.e. in the following order:
context_clear_entry(context);
<set other bits>
context_set_present(context);
However, we don't actually ensure this order, i.e. don't prevent the
compiler from reordering it. And since context entries may be updated at
runtime when translation is already enabled, this may potentially allow
a time window when a device can already do DMA while the translation is
not properly set up yet (e.g. the context entry may point to an
arbitrary page table).
To easily fix this, change context_set_*() and context_clear_*() helpers
to use READ_ONCE/WRITE_ONCE, to ensure that the ordering between updates
of individual bits in context entries matches the order of calling those
helpers, just like we already do for PASID table entries.
Link: https://lore.kernel.org/all/aTG7gc7I5wExai3S@google.com/
Signed-off-by: Dmytro Maluka <dmaluka@chromium.org>
---
drivers/iommu/intel/iommu.h | 37 +++++++++++++++++++++----------------
drivers/iommu/intel/pasid.c | 3 ++-
2 files changed, 23 insertions(+), 17 deletions(-)
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 25c5e22096d4..7f8f004fa756 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -869,7 +869,7 @@ static inline bool dma_pte_superpage(struct dma_pte *pte)
static inline bool context_present(struct context_entry *context)
{
- return (context->lo & 1);
+ return READ_ONCE(context->lo) & 1;
}
#define LEVEL_STRIDE (9)
@@ -897,46 +897,51 @@ static inline int pfn_level_offset(u64 pfn, int level)
return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
}
+static inline void context_set_bits(u64 *ptr, u64 mask, u64 bits)
+{
+ u64 old;
+
+ old = READ_ONCE(*ptr);
+ WRITE_ONCE(*ptr, (old & ~mask) | bits);
+}
static inline void context_set_present(struct context_entry *context)
{
- context->lo |= 1;
+ context_set_bits(&context->lo, 1 << 0, 1);
}
static inline void context_set_fault_enable(struct context_entry *context)
{
- context->lo &= (((u64)-1) << 2) | 1;
+ context_set_bits(&context->lo, 1 << 1, 0);
}
static inline void context_set_translation_type(struct context_entry *context,
unsigned long value)
{
- context->lo &= (((u64)-1) << 4) | 3;
- context->lo |= (value & 3) << 2;
+ context_set_bits(&context->lo, GENMASK_ULL(3, 2), value << 2);
}
static inline void context_set_address_root(struct context_entry *context,
unsigned long value)
{
- context->lo &= ~VTD_PAGE_MASK;
- context->lo |= value & VTD_PAGE_MASK;
+ context_set_bits(&context->lo, VTD_PAGE_MASK, value);
}
static inline void context_set_address_width(struct context_entry *context,
unsigned long value)
{
- context->hi |= value & 7;
+ context_set_bits(&context->hi, GENMASK_ULL(2, 0), value);
}
static inline void context_set_domain_id(struct context_entry *context,
unsigned long value)
{
- context->hi |= (value & ((1 << 16) - 1)) << 8;
+ context_set_bits(&context->hi, GENMASK_ULL(23, 8), value << 8);
}
static inline void context_set_pasid(struct context_entry *context)
{
- context->lo |= CONTEXT_PASIDE;
+ context_set_bits(&context->lo, CONTEXT_PASIDE, CONTEXT_PASIDE);
}
static inline int context_domain_id(struct context_entry *c)
@@ -946,8 +951,8 @@ static inline int context_domain_id(struct context_entry *c)
static inline void context_clear_entry(struct context_entry *context)
{
- context->lo = 0;
- context->hi = 0;
+ WRITE_ONCE(context->lo, 0);
+ WRITE_ONCE(context->hi, 0);
}
#ifdef CONFIG_INTEL_IOMMU
@@ -980,7 +985,7 @@ clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
static inline void
context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
{
- context->hi |= pasid & ((1 << 20) - 1);
+ context_set_bits(&context->hi, GENMASK_ULL(19, 0), pasid);
}
/*
@@ -989,7 +994,7 @@ context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
*/
static inline void context_set_sm_dte(struct context_entry *context)
{
- context->lo |= BIT_ULL(2);
+ context_set_bits(&context->lo, BIT_ULL(2), BIT_ULL(2));
}
/*
@@ -998,7 +1003,7 @@ static inline void context_set_sm_dte(struct context_entry *context)
*/
static inline void context_set_sm_pre(struct context_entry *context)
{
- context->lo |= BIT_ULL(4);
+ context_set_bits(&context->lo, BIT_ULL(4), BIT_ULL(4));
}
/*
@@ -1007,7 +1012,7 @@ static inline void context_set_sm_pre(struct context_entry *context)
*/
static inline void context_clear_sm_pre(struct context_entry *context)
{
- context->lo &= ~BIT_ULL(4);
+ context_set_bits(&context->lo, BIT_ULL(4), 0);
}
/* Returns a number of VTD pages, but aligned to MM page size */
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 77b9b147ab50..7e2b75bcecd4 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -984,7 +984,8 @@ static int context_entry_set_pasid_table(struct context_entry *context,
context_clear_entry(context);
pds = context_get_sm_pds(table);
- context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds);
+ WRITE_ONCE(context->lo,
+ (u64)virt_to_phys(table->table) | context_pdts(pds));
context_set_sm_rid2pasid(context, IOMMU_NO_PASID);
if (info->ats_supported)
--
2.47.3
On 12/21/25 09:43, Dmytro Maluka wrote:
> When updating context table entries, we do take care to set the present
> bit as the last step, i.e. in the following order:
>
> context_clear_entry(context);
> <set other bits>
> context_set_present(context);
>
> However, we don't actually ensure this order, i.e. don't prevent the
> compiler from reordering it. And since context entries may be updated at
> runtime when translation is already enabled, this may potentially allow
> a time window when a device can already do DMA while the translation is
> not properly set up yet (e.g. the context entry may point to an
> arbitrary page table).
>
> To easily fix this, change context_set_*() and context_clear_*() helpers
> to use READ_ONCE/WRITE_ONCE, to ensure that the ordering between updates
> of individual bits in context entries matches the order of calling those
> helpers, just like we already do for PASID table entries.
>
> Link: https://lore.kernel.org/all/aTG7gc7I5wExai3S@google.com/
> Signed-off-by: Dmytro Maluka <dmaluka@chromium.org>
> ---
> drivers/iommu/intel/iommu.h | 37 +++++++++++++++++++++----------------
> drivers/iommu/intel/pasid.c | 3 ++-
> 2 files changed, 23 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
> index 25c5e22096d4..7f8f004fa756 100644
> --- a/drivers/iommu/intel/iommu.h
> +++ b/drivers/iommu/intel/iommu.h
> @@ -869,7 +869,7 @@ static inline bool dma_pte_superpage(struct dma_pte *pte)
>
> static inline bool context_present(struct context_entry *context)
> {
> - return (context->lo & 1);
> + return READ_ONCE(context->lo) & 1;
> }
>
> #define LEVEL_STRIDE (9)
> @@ -897,46 +897,51 @@ static inline int pfn_level_offset(u64 pfn, int level)
> return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
> }
>
> +static inline void context_set_bits(u64 *ptr, u64 mask, u64 bits)
> +{
> + u64 old;
> +
> + old = READ_ONCE(*ptr);
> + WRITE_ONCE(*ptr, (old & ~mask) | bits);
> +}
Add a line to ensures that the input "bits" cannot overflow the assigned
"mask".
static inline void context_set_bits(u64 *ptr, u64 mask, u64 bits)
{
u64 val;
val = READ_ONCE(*ptr);
val &= ~mask;
val |= (bits & mask);
WRITE_ONCE(*ptr, val);
}
>
> static inline void context_set_present(struct context_entry *context)
> {
> - context->lo |= 1;
> + context_set_bits(&context->lo, 1 << 0, 1);
> }
How about adding a smp_wmb() before setting the present bit? Maybe it's
unnecessary for x86 architecture, but at least it's harmless and more
readable. Or not?
static inline void context_set_present(struct context_entry *context)
{
smp_wmb();
context_set_bits(&context->lo, 1ULL << 0, 1ULL);
}
>
> static inline void context_set_fault_enable(struct context_entry *context)
> {
> - context->lo &= (((u64)-1) << 2) | 1;
> + context_set_bits(&context->lo, 1 << 1, 0);
> }
>
> static inline void context_set_translation_type(struct context_entry *context,
> unsigned long value)
> {
> - context->lo &= (((u64)-1) << 4) | 3;
> - context->lo |= (value & 3) << 2;
> + context_set_bits(&context->lo, GENMASK_ULL(3, 2), value << 2);
> }
>
> static inline void context_set_address_root(struct context_entry *context,
> unsigned long value)
> {
> - context->lo &= ~VTD_PAGE_MASK;
> - context->lo |= value & VTD_PAGE_MASK;
> + context_set_bits(&context->lo, VTD_PAGE_MASK, value);
> }
>
> static inline void context_set_address_width(struct context_entry *context,
> unsigned long value)
> {
> - context->hi |= value & 7;
> + context_set_bits(&context->hi, GENMASK_ULL(2, 0), value);
> }
>
> static inline void context_set_domain_id(struct context_entry *context,
> unsigned long value)
> {
> - context->hi |= (value & ((1 << 16) - 1)) << 8;
> + context_set_bits(&context->hi, GENMASK_ULL(23, 8), value << 8);
> }
>
> static inline void context_set_pasid(struct context_entry *context)
> {
> - context->lo |= CONTEXT_PASIDE;
> + context_set_bits(&context->lo, CONTEXT_PASIDE, CONTEXT_PASIDE);
> }
>
> static inline int context_domain_id(struct context_entry *c)
> @@ -946,8 +951,8 @@ static inline int context_domain_id(struct context_entry *c)
>
> static inline void context_clear_entry(struct context_entry *context)
> {
> - context->lo = 0;
> - context->hi = 0;
> + WRITE_ONCE(context->lo, 0);
> + WRITE_ONCE(context->hi, 0);
> }
>
> #ifdef CONFIG_INTEL_IOMMU
> @@ -980,7 +985,7 @@ clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
> static inline void
> context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
> {
> - context->hi |= pasid & ((1 << 20) - 1);
> + context_set_bits(&context->hi, GENMASK_ULL(19, 0), pasid);
> }
>
> /*
> @@ -989,7 +994,7 @@ context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
> */
> static inline void context_set_sm_dte(struct context_entry *context)
> {
> - context->lo |= BIT_ULL(2);
> + context_set_bits(&context->lo, BIT_ULL(2), BIT_ULL(2));
> }
>
> /*
> @@ -998,7 +1003,7 @@ static inline void context_set_sm_dte(struct context_entry *context)
> */
> static inline void context_set_sm_pre(struct context_entry *context)
> {
> - context->lo |= BIT_ULL(4);
> + context_set_bits(&context->lo, BIT_ULL(4), BIT_ULL(4));
> }
>
> /*
> @@ -1007,7 +1012,7 @@ static inline void context_set_sm_pre(struct context_entry *context)
> */
> static inline void context_clear_sm_pre(struct context_entry *context)
> {
> - context->lo &= ~BIT_ULL(4);
> + context_set_bits(&context->lo, BIT_ULL(4), 0);
> }
>
> /* Returns a number of VTD pages, but aligned to MM page size */
> diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
> index 77b9b147ab50..7e2b75bcecd4 100644
> --- a/drivers/iommu/intel/pasid.c
> +++ b/drivers/iommu/intel/pasid.c
> @@ -984,7 +984,8 @@ static int context_entry_set_pasid_table(struct context_entry *context,
> context_clear_entry(context);
>
> pds = context_get_sm_pds(table);
> - context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds);
> + WRITE_ONCE(context->lo,
> + (u64)virt_to_phys(table->table) | context_pdts(pds));
> context_set_sm_rid2pasid(context, IOMMU_NO_PASID);
>
> if (info->ats_supported)
Thanks,
baolu
On Sun, Dec 21, 2025 at 05:04:27PM +0800, Baolu Lu wrote:
> On 12/21/25 09:43, Dmytro Maluka wrote:
> > +static inline void context_set_bits(u64 *ptr, u64 mask, u64 bits)
> > +{
> > + u64 old;
> > +
> > + old = READ_ONCE(*ptr);
> > + WRITE_ONCE(*ptr, (old & ~mask) | bits);
> > +}
>
> Add a line to ensures that the input "bits" cannot overflow the assigned
> "mask".
>
> static inline void context_set_bits(u64 *ptr, u64 mask, u64 bits)
> {
> u64 val;
>
> val = READ_ONCE(*ptr);
> val &= ~mask;
> val |= (bits & mask);
> WRITE_ONCE(*ptr, val);
> }
Makes sense. And then worth doing the same in pasid_set_bits() as well?
Actually we can use the same helper for both context and pasid entries
(rename it e.g. to entry_set_bits()).
> > static inline void context_set_present(struct context_entry *context)
> > {
> > - context->lo |= 1;
> > + context_set_bits(&context->lo, 1 << 0, 1);
> > }
>
> How about adding a smp_wmb() before setting the present bit? Maybe it's
> unnecessary for x86 architecture, but at least it's harmless and more
> readable. Or not?
>
> static inline void context_set_present(struct context_entry *context)
> {
> smp_wmb();
> context_set_bits(&context->lo, 1ULL << 0, 1ULL);
> }
Maybe... And if so, then in pasid_set_present() as well?
Actually this would make a slight behavioral difference even on x86:
right now this patch only provides ordering of context entry updates
between each other, while smp_wmb() would add a full compiler barrier
here.
So this barrier may be redundant as long as we always use these
context_*() helpers (and thus always use WRITE_ONCE) for any updates of
context entries. On the other hand, it might make it more robust if we
still occasionally do that without WRITE_ONCE, for example in
context_entry_set_pasid_table() which I also changed to use WRITE_ONCE
in this patch.
On 12/21/25 21:11, Dmytro Maluka wrote:
> On Sun, Dec 21, 2025 at 05:04:27PM +0800, Baolu Lu wrote:
>> On 12/21/25 09:43, Dmytro Maluka wrote:
>>> +static inline void context_set_bits(u64 *ptr, u64 mask, u64 bits)
>>> +{
>>> + u64 old;
>>> +
>>> + old = READ_ONCE(*ptr);
>>> + WRITE_ONCE(*ptr, (old & ~mask) | bits);
>>> +}
>>
>> Add a line to ensures that the input "bits" cannot overflow the assigned
>> "mask".
>>
>> static inline void context_set_bits(u64 *ptr, u64 mask, u64 bits)
>> {
>> u64 val;
>>
>> val = READ_ONCE(*ptr);
>> val &= ~mask;
>> val |= (bits & mask);
>> WRITE_ONCE(*ptr, val);
>> }
>
> Makes sense. And then worth doing the same in pasid_set_bits() as well?
>
> Actually we can use the same helper for both context and pasid entries
> (rename it e.g. to entry_set_bits()).
Yeah, that sounds better.
>
>>> static inline void context_set_present(struct context_entry *context)
>>> {
>>> - context->lo |= 1;
>>> + context_set_bits(&context->lo, 1 << 0, 1);
>>> }
>>
>> How about adding a smp_wmb() before setting the present bit? Maybe it's
>> unnecessary for x86 architecture, but at least it's harmless and more
>> readable. Or not?
>>
>> static inline void context_set_present(struct context_entry *context)
>> {
>> smp_wmb();
>> context_set_bits(&context->lo, 1ULL << 0, 1ULL);
>> }
>
> Maybe... And if so, then in pasid_set_present() as well?
>
> Actually this would make a slight behavioral difference even on x86:
> right now this patch only provides ordering of context entry updates
> between each other, while smp_wmb() would add a full compiler barrier
> here.
>
> So this barrier may be redundant as long as we always use these
> context_*() helpers (and thus always use WRITE_ONCE) for any updates of
> context entries. On the other hand, it might make it more robust if we
> still occasionally do that without WRITE_ONCE, for example in
> context_entry_set_pasid_table() which I also changed to use WRITE_ONCE
> in this patch.
Fair point. It is somewhat redundant. However, I would suggest adding a
comment there. For example:
static inline void context_set_present(struct context_entry *context)
{
/*
* smp_wmb() is unnecessary here because x86 hardware naturally
* keeps writes in order and all context entry modifications
* use WRITE_ONCE().
*/
context_set_bits(&context->lo, 1ULL << 0, 1ULL);
}
So that people are still aware of this when porting this driver to
platforms with a weak memory model.
Thanks,
baolu
On Tue, Dec 23, 2025 at 02:10:45PM +0800, Baolu Lu wrote:
> On 12/21/25 21:11, Dmytro Maluka wrote:
> > On Sun, Dec 21, 2025 at 05:04:27PM +0800, Baolu Lu wrote:
> > > On 12/21/25 09:43, Dmytro Maluka wrote:
> > > > static inline void context_set_present(struct context_entry *context)
> > > > {
> > > > - context->lo |= 1;
> > > > + context_set_bits(&context->lo, 1 << 0, 1);
> > > > }
> > >
> > > How about adding a smp_wmb() before setting the present bit? Maybe it's
> > > unnecessary for x86 architecture, but at least it's harmless and more
> > > readable. Or not?
> > >
> > > static inline void context_set_present(struct context_entry *context)
> > > {
> > > smp_wmb();
> > > context_set_bits(&context->lo, 1ULL << 0, 1ULL);
> > > }
> >
> > Maybe... And if so, then in pasid_set_present() as well?
> >
> > Actually this would make a slight behavioral difference even on x86:
> > right now this patch only provides ordering of context entry updates
> > between each other, while smp_wmb() would add a full compiler barrier
> > here.
> >
> > So this barrier may be redundant as long as we always use these
> > context_*() helpers (and thus always use WRITE_ONCE) for any updates of
> > context entries. On the other hand, it might make it more robust if we
> > still occasionally do that without WRITE_ONCE, for example in
> > context_entry_set_pasid_table() which I also changed to use WRITE_ONCE
> > in this patch.
>
> Fair point. It is somewhat redundant. However, I would suggest adding a
> comment there. For example:
>
> static inline void context_set_present(struct context_entry *context)
> {
> /*
> * smp_wmb() is unnecessary here because x86 hardware naturally
> * keeps writes in order and all context entry modifications
> * use WRITE_ONCE().
> */
> context_set_bits(&context->lo, 1ULL << 0, 1ULL);
> }
>
> So that people are still aware of this when porting this driver to
> platforms with a weak memory model.
What I actually had in mind was to add smp_wmb() here anyway. It might
be useful as an extra safety measure in case we still forget to use
WRITE_ONCE in some cases, and its performance cost would be virtually
non-existent, I guess. Done in v2.
© 2016 - 2026 Red Hat, Inc.