When a page table ends up with no present entries left, it can be
replaced by a non-present entry at the next higher level. The page table
itself can then be scheduled for freeing.
Note that while its output isn't used there yet, update_contig_markers()
right away needs to be called in all places where entries get updated,
not just the one where entries get cleared.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: New.
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -21,6 +21,9 @@
#include "iommu.h"
+#define CONTIG_MASK IOMMU_PTE_CONTIG_MASK
+#include <asm/contig-marker.h>
+
/* Given pfn and page table level, return pde index */
static unsigned int pfn_to_pde_idx(unsigned long pfn, unsigned int level)
{
@@ -33,16 +36,20 @@ static unsigned int pfn_to_pde_idx(unsig
static union amd_iommu_pte clear_iommu_pte_present(unsigned long l1_mfn,
unsigned long dfn,
- unsigned int level)
+ unsigned int level,
+ bool *free)
{
union amd_iommu_pte *table, *pte, old;
+ unsigned int idx = pfn_to_pde_idx(dfn, level);
table = map_domain_page(_mfn(l1_mfn));
- pte = &table[pfn_to_pde_idx(dfn, level)];
+ pte = &table[idx];
old = *pte;
write_atomic(&pte->raw, 0);
+ *free = update_contig_markers(&table->raw, idx, level, PTE_kind_null);
+
unmap_domain_page(table);
return old;
@@ -85,7 +92,11 @@ static union amd_iommu_pte set_iommu_pte
if ( !old.pr || old.next_level ||
old.mfn != next_mfn ||
old.iw != iw || old.ir != ir )
+ {
set_iommu_pde_present(pde, next_mfn, 0, iw, ir);
+ update_contig_markers(&table->raw, pfn_to_pde_idx(dfn, level), level,
+ PTE_kind_leaf);
+ }
else
old.pr = false; /* signal "no change" to the caller */
@@ -259,6 +270,9 @@ static int iommu_pde_from_dfn(struct dom
smp_wmb();
set_iommu_pde_present(pde, next_table_mfn, next_level, true,
true);
+ update_contig_markers(&next_table_vaddr->raw,
+ pfn_to_pde_idx(dfn, level),
+ level, PTE_kind_table);
*flush_flags |= IOMMU_FLUSHF_modified;
}
@@ -284,6 +298,9 @@ static int iommu_pde_from_dfn(struct dom
next_table_mfn = mfn_x(page_to_mfn(table));
set_iommu_pde_present(pde, next_table_mfn, next_level, true,
true);
+ update_contig_markers(&next_table_vaddr->raw,
+ pfn_to_pde_idx(dfn, level),
+ level, PTE_kind_table);
}
else /* should never reach here */
{
@@ -410,8 +427,25 @@ int amd_iommu_unmap_page(struct domain *
if ( pt_mfn )
{
+ bool free;
+ unsigned int pt_lvl = level;
+
/* Mark PTE as 'page not present'. */
- old = clear_iommu_pte_present(pt_mfn, dfn_x(dfn), level);
+ old = clear_iommu_pte_present(pt_mfn, dfn_x(dfn), level, &free);
+
+ while ( unlikely(free) && ++pt_lvl < hd->arch.amd.paging_mode )
+ {
+ struct page_info *pg = mfn_to_page(_mfn(pt_mfn));
+
+ if ( iommu_pde_from_dfn(d, dfn_x(dfn), pt_lvl, &pt_mfn,
+ flush_flags, false) )
+ BUG();
+ BUG_ON(!pt_mfn);
+
+ clear_iommu_pte_present(pt_mfn, dfn_x(dfn), pt_lvl, &free);
+ *flush_flags |= IOMMU_FLUSHF_all;
+ iommu_queue_free_pgtable(d, pg);
+ }
}
spin_unlock(&hd->arch.mapping_lock);