[PATCH v2 18/18] VT-d: free all-empty page tables

Jan Beulich posted 18 patches 3 years, 2 months ago
There is a newer version of this series
[PATCH v2 18/18] VT-d: free all-empty page tables
Posted by Jan Beulich 3 years, 2 months ago
When a page table ends up with no present entries left, it can be
replaced by a non-present entry at the next higher level. The page table
itself can then be scheduled for freeing.

Note that while its output isn't used there yet, update_contig_markers()
right away needs to be called in all places where entries get updated,
not just the one where entries get cleared.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: New.

--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -42,6 +42,9 @@
 #include "vtd.h"
 #include "../ats.h"
 
+#define CONTIG_MASK DMA_PTE_CONTIG_MASK
+#include <asm/contig-marker.h>
+
 /* dom_io is used as a sentinel for quarantined devices */
 #define QUARANTINE_SKIP(d) ((d) == dom_io && !dom_iommu(d)->arch.vtd.pgd_maddr)
 
@@ -368,6 +371,9 @@ static uint64_t addr_to_dma_page_maddr(s
 
             write_atomic(&pte->val, new_pte.val);
             iommu_sync_cache(pte, sizeof(struct dma_pte));
+            update_contig_markers(&parent->val,
+                                  address_level_offset(addr, level),
+                                  level, PTE_kind_table);
         }
 
         if ( --level == target )
@@ -773,7 +779,7 @@ static int dma_pte_clear_one(struct doma
     struct domain_iommu *hd = dom_iommu(domain);
     struct dma_pte *page = NULL, *pte = NULL, old;
     u64 pg_maddr;
-    unsigned int level = (order / LEVEL_STRIDE) + 1;
+    unsigned int level = (order / LEVEL_STRIDE) + 1, pt_lvl = level;
 
     spin_lock(&hd->arch.mapping_lock);
     /* get target level pte */
@@ -796,9 +802,31 @@ static int dma_pte_clear_one(struct doma
 
     old = *pte;
     dma_clear_pte(*pte);
+    iommu_sync_cache(pte, sizeof(*pte));
+
+    while ( update_contig_markers(&page->val,
+                                  address_level_offset(addr, pt_lvl),
+                                  pt_lvl, PTE_kind_null) &&
+            ++pt_lvl < agaw_to_level(hd->arch.vtd.agaw) )
+    {
+        struct page_info *pg = maddr_to_page(pg_maddr);
+
+        unmap_vtd_domain_page(page);
+
+        pg_maddr = addr_to_dma_page_maddr(domain, addr, pt_lvl, flush_flags,
+                                          false);
+        BUG_ON(pg_maddr < PAGE_SIZE);
+
+        page = map_vtd_domain_page(pg_maddr);
+        pte = &page[address_level_offset(addr, pt_lvl)];
+        dma_clear_pte(*pte);
+        iommu_sync_cache(pte, sizeof(*pte));
+
+        *flush_flags |= IOMMU_FLUSHF_all;
+        iommu_queue_free_pgtable(domain, pg);
+    }
 
     spin_unlock(&hd->arch.mapping_lock);
-    iommu_sync_cache(pte, sizeof(struct dma_pte));
 
     unmap_vtd_domain_page(page);
 
@@ -1952,8 +1980,11 @@ static int __must_check intel_iommu_map_
     }
 
     *pte = new;
-
     iommu_sync_cache(pte, sizeof(struct dma_pte));
+    update_contig_markers(&page->val,
+                          address_level_offset(dfn_to_daddr(dfn), level),
+                          level, PTE_kind_leaf);
+
     spin_unlock(&hd->arch.mapping_lock);
     unmap_vtd_domain_page(page);