Page table are used for two purposes after allocation: They either start
out all empty, or they get filled to replace a superpage. Subsequently,
to replace all empty or fully contiguous page tables, contiguous sub-
regions will be recorded within individual page tables. Install the
initial set of markers immediately after allocation. Make sure to retain
these markers when further populating a page table in preparation for it
to replace a superpage.
The markers are simply 4-bit fields holding the order value of
contiguous entries. To demonstrate this, if a page table had just 16
entries, this would be the initial (fully contiguous) set of markers:
index 0 1 2 3 4 5 6 7 8 9 A B C D E F
marker 4 0 1 0 2 0 1 0 3 0 1 0 2 0 1 0
"Contiguous" here means not only present entries with successively
increasing MFNs, each one suitably aligned for its slot, but also a
respective number of all non-present entries.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
An alternative to the ASSERT()s added to set_iommu_ptes_present() would
be to make the function less general-purpose; it's used in a single
place only after all (i.e. it might as well be folded into its only
caller).
---
v2: New.
--- a/xen/drivers/passthrough/amd/iommu-defs.h
+++ b/xen/drivers/passthrough/amd/iommu-defs.h
@@ -445,6 +445,8 @@ union amd_iommu_x2apic_control {
#define IOMMU_PAGE_TABLE_U32_PER_ENTRY (IOMMU_PAGE_TABLE_ENTRY_SIZE / 4)
#define IOMMU_PAGE_TABLE_ALIGNMENT 4096
+#define IOMMU_PTE_CONTIG_MASK 0x1e /* The ign0 field below. */
+
union amd_iommu_pte {
uint64_t raw;
struct {
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -116,7 +116,19 @@ static void set_iommu_ptes_present(unsig
while ( nr_ptes-- )
{
- set_iommu_pde_present(pde, next_mfn, 0, iw, ir);
+ ASSERT(!pde->next_level);
+ ASSERT(!pde->u);
+
+ if ( pde > table )
+ ASSERT(pde->ign0 == find_first_set_bit(pde - table));
+ else
+ ASSERT(pde->ign0 == PAGE_SHIFT - 3);
+
+ pde->iw = iw;
+ pde->ir = ir;
+ pde->fc = true; /* See set_iommu_pde_present(). */
+ pde->mfn = next_mfn;
+ pde->pr = true;
++pde;
next_mfn += page_sz;
@@ -232,7 +244,7 @@ static int iommu_pde_from_dfn(struct dom
mfn = next_table_mfn;
/* allocate lower level page table */
- table = iommu_alloc_pgtable(d);
+ table = iommu_alloc_pgtable(d, IOMMU_PTE_CONTIG_MASK);
if ( table == NULL )
{
AMD_IOMMU_DEBUG("Cannot allocate I/O page table\n");
@@ -262,7 +274,7 @@ static int iommu_pde_from_dfn(struct dom
if ( next_table_mfn == 0 )
{
- table = iommu_alloc_pgtable(d);
+ table = iommu_alloc_pgtable(d, IOMMU_PTE_CONTIG_MASK);
if ( table == NULL )
{
AMD_IOMMU_DEBUG("Cannot allocate I/O page table\n");
@@ -648,7 +660,7 @@ int __init amd_iommu_quarantine_init(str
spin_lock(&hd->arch.mapping_lock);
- hd->arch.amd.root_table = iommu_alloc_pgtable(d);
+ hd->arch.amd.root_table = iommu_alloc_pgtable(d, 0);
if ( !hd->arch.amd.root_table )
goto out;
@@ -663,7 +675,7 @@ int __init amd_iommu_quarantine_init(str
* page table pages, and the resulting allocations are always
* zeroed.
*/
- pg = iommu_alloc_pgtable(d);
+ pg = iommu_alloc_pgtable(d, 0);
if ( !pg )
break;
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -238,7 +238,7 @@ int amd_iommu_alloc_root(struct domain *
if ( unlikely(!hd->arch.amd.root_table) )
{
- hd->arch.amd.root_table = iommu_alloc_pgtable(d);
+ hd->arch.amd.root_table = iommu_alloc_pgtable(d, 0);
if ( !hd->arch.amd.root_table )
return -ENOMEM;
}
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -297,7 +297,7 @@ static uint64_t addr_to_dma_page_maddr(s
goto out;
pte_maddr = level;
- if ( !(pg = iommu_alloc_pgtable(domain)) )
+ if ( !(pg = iommu_alloc_pgtable(domain, 0)) )
goto out;
hd->arch.vtd.pgd_maddr = page_to_maddr(pg);
@@ -339,7 +339,7 @@ static uint64_t addr_to_dma_page_maddr(s
}
pte_maddr = level - 1;
- pg = iommu_alloc_pgtable(domain);
+ pg = iommu_alloc_pgtable(domain, DMA_PTE_CONTIG_MASK);
if ( !pg )
break;
@@ -351,12 +351,13 @@ static uint64_t addr_to_dma_page_maddr(s
struct dma_pte *split = map_vtd_domain_page(pte_maddr);
unsigned long inc = 1UL << level_to_offset_bits(level - 1);
- split[0].val = pte->val;
+ split[0].val |= pte->val & ~DMA_PTE_CONTIG_MASK;
if ( inc == PAGE_SIZE )
split[0].val &= ~DMA_PTE_SP;
for ( offset = 1; offset < PTE_NUM; ++offset )
- split[offset].val = split[offset - 1].val + inc;
+ split[offset].val |=
+ (split[offset - 1].val & ~DMA_PTE_CONTIG_MASK) + inc;
iommu_sync_cache(split, PAGE_SIZE);
unmap_vtd_domain_page(split);
@@ -1943,7 +1944,7 @@ static int __must_check intel_iommu_map_
if ( iommu_snoop )
dma_set_pte_snp(new);
- if ( old.val == new.val )
+ if ( !((old.val ^ new.val) & ~DMA_PTE_CONTIG_MASK) )
{
spin_unlock(&hd->arch.mapping_lock);
unmap_vtd_domain_page(page);
@@ -2798,7 +2799,7 @@ static int __init intel_iommu_quarantine
goto out;
}
- pg = iommu_alloc_pgtable(d);
+ pg = iommu_alloc_pgtable(d, 0);
rc = -ENOMEM;
if ( !pg )
@@ -2817,7 +2818,7 @@ static int __init intel_iommu_quarantine
* page table pages, and the resulting allocations are always
* zeroed.
*/
- pg = iommu_alloc_pgtable(d);
+ pg = iommu_alloc_pgtable(d, 0);
if ( !pg )
goto out;
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -265,6 +265,7 @@ struct dma_pte {
#define DMA_PTE_PROT (DMA_PTE_READ | DMA_PTE_WRITE)
#define DMA_PTE_SP (1 << 7)
#define DMA_PTE_SNP (1 << 11)
+#define DMA_PTE_CONTIG_MASK (0xfull << PADDR_BITS)
#define dma_clear_pte(p) do {(p).val = 0;} while(0)
#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while(0)
#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while(0)
@@ -278,7 +279,7 @@ struct dma_pte {
#define dma_pte_write(p) (dma_pte_prot(p) & DMA_PTE_WRITE)
#define dma_pte_addr(p) ((p).val & PADDR_MASK & PAGE_MASK_4K)
#define dma_set_pte_addr(p, addr) do {\
- (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
+ (p).val |= ((addr) & PADDR_MASK & PAGE_MASK_4K); } while (0)
#define dma_pte_present(p) (((p).val & DMA_PTE_PROT) != 0)
#define dma_pte_superpage(p) (((p).val & DMA_PTE_SP) != 0)
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -433,12 +433,12 @@ int iommu_free_pgtables(struct domain *d
return 0;
}
-struct page_info *iommu_alloc_pgtable(struct domain *d)
+struct page_info *iommu_alloc_pgtable(struct domain *d, uint64_t contig_mask)
{
struct domain_iommu *hd = dom_iommu(d);
unsigned int memflags = 0;
struct page_info *pg;
- void *p;
+ uint64_t *p;
#ifdef CONFIG_NUMA
if ( hd->node != NUMA_NO_NODE )
@@ -450,7 +450,28 @@ struct page_info *iommu_alloc_pgtable(st
return NULL;
p = __map_domain_page(pg);
- clear_page(p);
+
+ if ( contig_mask )
+ {
+ unsigned int i, shift = find_first_set_bit(contig_mask);
+
+ ASSERT(((PAGE_SHIFT - 3) & (contig_mask >> shift)) == PAGE_SHIFT - 3);
+
+ p[0] = (PAGE_SHIFT - 3ull) << shift;
+ p[1] = 0;
+ p[2] = 1ull << shift;
+ p[3] = 0;
+
+ for ( i = 4; i < PAGE_SIZE / 8; i += 4 )
+ {
+ p[i + 0] = (find_first_set_bit(i) + 0ull) << shift;
+ p[i + 1] = 0;
+ p[i + 2] = 1ull << shift;
+ p[i + 3] = 0;
+ }
+ }
+ else
+ clear_page(p);
if ( hd->platform_ops->sync_cache )
iommu_vcall(hd->platform_ops, sync_cache, p, PAGE_SIZE);
--- a/xen/include/asm-x86/iommu.h
+++ b/xen/include/asm-x86/iommu.h
@@ -142,7 +142,8 @@ int pi_update_irte(const struct pi_desc
})
int __must_check iommu_free_pgtables(struct domain *d);
-struct page_info *__must_check iommu_alloc_pgtable(struct domain *d);
+struct page_info *__must_check iommu_alloc_pgtable(struct domain *d,
+ uint64_t contig_mask);
void iommu_queue_free_pgtable(struct domain *d, struct page_info *pg);
#endif /* !__ARCH_X86_IOMMU_H__ */