For vendor specific code to support superpages we need to be able to
deal with a superpage mapping replacing an intermediate page table (or
hierarchy thereof). Consequently an iommu_alloc_pgtable() counterpart is
needed to free individual page tables while a domain is still alive.
Since the freeing needs to be deferred until after a suitable IOTLB
flush was performed, released page tables get queued for processing by a
tasklet.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
I was considering whether to use a softirq-tasklet instead. This would
have the benefit of avoiding extra scheduling operations, but come with
the risk of the freeing happening prematurely because of a
process_pending_softirqs() somewhere.
---
v6: Extend comment on the use of process_pending_softirqs().
v5: Fix CPU_UP_PREPARE for BIGMEM. Schedule tasklet in CPU_DOWN_FAILED
when list is not empty. Skip all processing in CPU_DEAD when list is
empty.
v4: Change type of iommu_queue_free_pgtable()'s 1st parameter. Re-base.
v3: Call process_pending_softirqs() from free_queued_pgtables().
--- a/xen/arch/x86/include/asm/iommu.h
+++ b/xen/arch/x86/include/asm/iommu.h
@@ -147,6 +147,7 @@ void iommu_free_domid(domid_t domid, uns
int __must_check iommu_free_pgtables(struct domain *d);
struct domain_iommu;
struct page_info *__must_check iommu_alloc_pgtable(struct domain_iommu *hd);
+void iommu_queue_free_pgtable(struct domain_iommu *hd, struct page_info *pg);
#endif /* !__ARCH_X86_IOMMU_H__ */
/*
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -12,6 +12,7 @@
* this program; If not, see <http://www.gnu.org/licenses/>.
*/
+#include <xen/cpu.h>
#include <xen/sched.h>
#include <xen/iocap.h>
#include <xen/iommu.h>
@@ -551,6 +552,103 @@ struct page_info *iommu_alloc_pgtable(st
return pg;
}
+/*
+ * Intermediate page tables which get replaced by large pages may only be
+ * freed after a suitable IOTLB flush. Hence such pages get queued on a
+ * per-CPU list, with a per-CPU tasklet processing the list on the assumption
+ * that the necessary IOTLB flush will have occurred by the time tasklets get
+ * to run. (List and tasklet being per-CPU has the benefit of accesses not
+ * requiring any locking.)
+ */
+static DEFINE_PER_CPU(struct page_list_head, free_pgt_list);
+static DEFINE_PER_CPU(struct tasklet, free_pgt_tasklet);
+
+static void free_queued_pgtables(void *arg)
+{
+ struct page_list_head *list = arg;
+ struct page_info *pg;
+ unsigned int done = 0;
+
+ while ( (pg = page_list_remove_head(list)) )
+ {
+ free_domheap_page(pg);
+
+ /*
+ * Just to be on the safe side, check for processing softirqs every
+ * once in a while. Generally it is expected that parties queuing
+ * pages for freeing will find a need for preemption before too many
+ * pages can be queued. Granularity of checking is somewhat arbitrary.
+ */
+ if ( !(++done & 0x1ff) )
+ process_pending_softirqs();
+ }
+}
+
+void iommu_queue_free_pgtable(struct domain_iommu *hd, struct page_info *pg)
+{
+ unsigned int cpu = smp_processor_id();
+
+ spin_lock(&hd->arch.pgtables.lock);
+ page_list_del(pg, &hd->arch.pgtables.list);
+ spin_unlock(&hd->arch.pgtables.lock);
+
+ page_list_add_tail(pg, &per_cpu(free_pgt_list, cpu));
+
+ tasklet_schedule(&per_cpu(free_pgt_tasklet, cpu));
+}
+
+static int cf_check cpu_callback(
+ struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+ struct page_list_head *list = &per_cpu(free_pgt_list, cpu);
+ struct tasklet *tasklet = &per_cpu(free_pgt_tasklet, cpu);
+
+ switch ( action )
+ {
+ case CPU_DOWN_PREPARE:
+ tasklet_kill(tasklet);
+ break;
+
+ case CPU_DEAD:
+ if ( !page_list_empty(list) )
+ {
+ page_list_splice(list, &this_cpu(free_pgt_list));
+ INIT_PAGE_LIST_HEAD(list);
+ tasklet_schedule(&this_cpu(free_pgt_tasklet));
+ }
+ break;
+
+ case CPU_UP_PREPARE:
+ INIT_PAGE_LIST_HEAD(list);
+ fallthrough;
+ case CPU_DOWN_FAILED:
+ tasklet_init(tasklet, free_queued_pgtables, list);
+ if ( !page_list_empty(list) )
+ tasklet_schedule(tasklet);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block cpu_nfb = {
+ .notifier_call = cpu_callback,
+};
+
+static int __init cf_check bsp_init(void)
+{
+ if ( iommu_enabled )
+ {
+ cpu_callback(&cpu_nfb, CPU_UP_PREPARE,
+ (void *)(unsigned long)smp_processor_id());
+ register_cpu_notifier(&cpu_nfb);
+ }
+
+ return 0;
+}
+presmp_initcall(bsp_init);
+
bool arch_iommu_use_permitted(const struct domain *d)
{
/*
On Thu, Jun 09, 2022 at 12:16:38PM +0200, Jan Beulich wrote:
> For vendor specific code to support superpages we need to be able to
> deal with a superpage mapping replacing an intermediate page table (or
> hierarchy thereof). Consequently an iommu_alloc_pgtable() counterpart is
> needed to free individual page tables while a domain is still alive.
> Since the freeing needs to be deferred until after a suitable IOTLB
> flush was performed, released page tables get queued for processing by a
> tasklet.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> I was considering whether to use a softirq-tasklet instead. This would
> have the benefit of avoiding extra scheduling operations, but come with
> the risk of the freeing happening prematurely because of a
> process_pending_softirqs() somewhere.
> ---
> v6: Extend comment on the use of process_pending_softirqs().
> v5: Fix CPU_UP_PREPARE for BIGMEM. Schedule tasklet in CPU_DOWN_FAILED
> when list is not empty. Skip all processing in CPU_DEAD when list is
> empty.
> v4: Change type of iommu_queue_free_pgtable()'s 1st parameter. Re-base.
> v3: Call process_pending_softirqs() from free_queued_pgtables().
>
> --- a/xen/arch/x86/include/asm/iommu.h
> +++ b/xen/arch/x86/include/asm/iommu.h
> @@ -147,6 +147,7 @@ void iommu_free_domid(domid_t domid, uns
> int __must_check iommu_free_pgtables(struct domain *d);
> struct domain_iommu;
> struct page_info *__must_check iommu_alloc_pgtable(struct domain_iommu *hd);
> +void iommu_queue_free_pgtable(struct domain_iommu *hd, struct page_info *pg);
>
> #endif /* !__ARCH_X86_IOMMU_H__ */
> /*
> --- a/xen/drivers/passthrough/x86/iommu.c
> +++ b/xen/drivers/passthrough/x86/iommu.c
> @@ -12,6 +12,7 @@
> * this program; If not, see <http://www.gnu.org/licenses/>.
> */
>
> +#include <xen/cpu.h>
> #include <xen/sched.h>
> #include <xen/iocap.h>
> #include <xen/iommu.h>
> @@ -551,6 +552,103 @@ struct page_info *iommu_alloc_pgtable(st
> return pg;
> }
>
> +/*
> + * Intermediate page tables which get replaced by large pages may only be
> + * freed after a suitable IOTLB flush. Hence such pages get queued on a
> + * per-CPU list, with a per-CPU tasklet processing the list on the assumption
> + * that the necessary IOTLB flush will have occurred by the time tasklets get
> + * to run. (List and tasklet being per-CPU has the benefit of accesses not
> + * requiring any locking.)
> + */
> +static DEFINE_PER_CPU(struct page_list_head, free_pgt_list);
> +static DEFINE_PER_CPU(struct tasklet, free_pgt_tasklet);
> +
> +static void free_queued_pgtables(void *arg)
I think this is missing a cf_check attribute?
> +{
> + struct page_list_head *list = arg;
> + struct page_info *pg;
> + unsigned int done = 0;
Might be worth adding an:
ASSERT(list == &this_cpu(free_pgt_list));
To make sure tasklets are never executed on the wrong CPU.
Apart form that:
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
Thanks, Roger.
On 28.06.2022 14:39, Roger Pau Monné wrote:
> On Thu, Jun 09, 2022 at 12:16:38PM +0200, Jan Beulich wrote:
>> For vendor specific code to support superpages we need to be able to
>> deal with a superpage mapping replacing an intermediate page table (or
>> hierarchy thereof). Consequently an iommu_alloc_pgtable() counterpart is
>> needed to free individual page tables while a domain is still alive.
>> Since the freeing needs to be deferred until after a suitable IOTLB
>> flush was performed, released page tables get queued for processing by a
>> tasklet.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> I was considering whether to use a softirq-tasklet instead. This would
>> have the benefit of avoiding extra scheduling operations, but come with
>> the risk of the freeing happening prematurely because of a
>> process_pending_softirqs() somewhere.
>> ---
>> v6: Extend comment on the use of process_pending_softirqs().
>> v5: Fix CPU_UP_PREPARE for BIGMEM. Schedule tasklet in CPU_DOWN_FAILED
>> when list is not empty. Skip all processing in CPU_DEAD when list is
>> empty.
>> v4: Change type of iommu_queue_free_pgtable()'s 1st parameter. Re-base.
>> v3: Call process_pending_softirqs() from free_queued_pgtables().
>>
>> --- a/xen/arch/x86/include/asm/iommu.h
>> +++ b/xen/arch/x86/include/asm/iommu.h
>> @@ -147,6 +147,7 @@ void iommu_free_domid(domid_t domid, uns
>> int __must_check iommu_free_pgtables(struct domain *d);
>> struct domain_iommu;
>> struct page_info *__must_check iommu_alloc_pgtable(struct domain_iommu *hd);
>> +void iommu_queue_free_pgtable(struct domain_iommu *hd, struct page_info *pg);
>>
>> #endif /* !__ARCH_X86_IOMMU_H__ */
>> /*
>> --- a/xen/drivers/passthrough/x86/iommu.c
>> +++ b/xen/drivers/passthrough/x86/iommu.c
>> @@ -12,6 +12,7 @@
>> * this program; If not, see <http://www.gnu.org/licenses/>.
>> */
>>
>> +#include <xen/cpu.h>
>> #include <xen/sched.h>
>> #include <xen/iocap.h>
>> #include <xen/iommu.h>
>> @@ -551,6 +552,103 @@ struct page_info *iommu_alloc_pgtable(st
>> return pg;
>> }
>>
>> +/*
>> + * Intermediate page tables which get replaced by large pages may only be
>> + * freed after a suitable IOTLB flush. Hence such pages get queued on a
>> + * per-CPU list, with a per-CPU tasklet processing the list on the assumption
>> + * that the necessary IOTLB flush will have occurred by the time tasklets get
>> + * to run. (List and tasklet being per-CPU has the benefit of accesses not
>> + * requiring any locking.)
>> + */
>> +static DEFINE_PER_CPU(struct page_list_head, free_pgt_list);
>> +static DEFINE_PER_CPU(struct tasklet, free_pgt_tasklet);
>> +
>> +static void free_queued_pgtables(void *arg)
>
> I think this is missing a cf_check attribute?
Oh, indeed - thanks for spotting. We're still lacking compiler detection
of such issues.
>> +{
>> + struct page_list_head *list = arg;
>> + struct page_info *pg;
>> + unsigned int done = 0;
>
> Might be worth adding an:
>
> ASSERT(list == &this_cpu(free_pgt_list));
>
> To make sure tasklets are never executed on the wrong CPU.
Sure, added.
> Apart form that:
>
> Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
Thanks.
Jan
© 2016 - 2026 Red Hat, Inc.