:p
atchew
Login
In this patch series are introduced necessary functions to build and manage RISC-V guest page tables and MMIO/RAM mappings. This patch series is based on the patch [1]: https://lore.kernel.org/xen-devel/da9273c20dc7ac1c131322e38a8cef361dfd86a9.1746530883.git.oleksii.kurochko@gmail.com/T/#u Oleksii Kurochko (6): xen/riscv: add inclusion of xen/bitops.h to asm/cmpxchg.h xen/riscv: introduce things necessary for p2m initialization xen/riscv: construct the P2M pages pool for guests xen/riscv: define pt_t and pt_walk_t structures xen/riscv: add new p2m types and helper macros for type classification xen/riscv: implement p2m mapping functionality xen/arch/riscv/Makefile | 1 + xen/arch/riscv/include/asm/cmpxchg.h | 1 + xen/arch/riscv/include/asm/domain.h | 16 + xen/arch/riscv/include/asm/mm.h | 36 +- xen/arch/riscv/include/asm/p2m.h | 121 ++- xen/arch/riscv/include/asm/page.h | 65 +- xen/arch/riscv/p2m.c | 1015 ++++++++++++++++++++++++++ 7 files changed, 1243 insertions(+), 12 deletions(-) create mode 100644 xen/arch/riscv/p2m.c -- 2.49.0
Add inclusion of xen/bitops.h to asm/cmpxchg.h to avoid compilation issues connected to GENMASK() which is used inside asm/cmpxchg.h. Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> --- This patch should go first; otherwise one of the further patches of this patch series could face a compilation issue. --- xen/arch/riscv/include/asm/cmpxchg.h | 1 + 1 file changed, 1 insertion(+) diff --git a/xen/arch/riscv/include/asm/cmpxchg.h b/xen/arch/riscv/include/asm/cmpxchg.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/include/asm/cmpxchg.h +++ b/xen/arch/riscv/include/asm/cmpxchg.h @@ -XXX,XX +XXX,XX @@ #ifndef ASM__RISCV__CMPXCHG_H #define ASM__RISCV__CMPXCHG_H +#include <xen/bitops.h> #include <xen/compiler.h> #include <xen/lib.h> -- 2.49.0
Introduce the following things: - p2m_domain structure which describe per p2m-table state. - Update arch_domain structure with the mentioned above structure. - p2m_get_hostp2m() to recieve domain's p2m structure. - Introudce p2m_write_lock() and p2m_is_write_locked(). - p2m_init() to initalize p2m: - allocate p2m table by using of p2m_alloc_table(). - initialize lock premitive necessary to protect updates to the p2m. - Introduce the following functions to implement p2m_alloc_table(): - p2m_allocate_root() to allocate p2m root table by using another introduced helpers p2m_get_clean_page() and clear_and_clean_page(). - introduce p2m_force_tlb_flush_sync() to flush TLBs after p2m table allocation before being used. (it isn't necessary at the current stage of development but could be useful once the VMID is marked unused, a new domain can reuse the VMID for its own. If the TLB is not flushed, entries can contain wrong translation.) - Implement maddr_to_page() and page_to_maddr(). Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> --- xen/arch/riscv/Makefile | 1 + xen/arch/riscv/include/asm/domain.h | 6 + xen/arch/riscv/include/asm/mm.h | 4 + xen/arch/riscv/include/asm/p2m.h | 76 +++++++++++++ xen/arch/riscv/p2m.c | 168 ++++++++++++++++++++++++++++ 5 files changed, 255 insertions(+) create mode 100644 xen/arch/riscv/p2m.c diff --git a/xen/arch/riscv/Makefile b/xen/arch/riscv/Makefile index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/Makefile +++ b/xen/arch/riscv/Makefile @@ -XXX,XX +XXX,XX @@ obj-y += entry.o obj-y += intc.o obj-y += mm.o obj-y += pt.o +obj-y += p2m.o obj-$(CONFIG_RISCV_64) += riscv64/ obj-y += sbi.o obj-y += setup.o diff --git a/xen/arch/riscv/include/asm/domain.h b/xen/arch/riscv/include/asm/domain.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/include/asm/domain.h +++ b/xen/arch/riscv/include/asm/domain.h @@ -XXX,XX +XXX,XX @@ #include <xen/xmalloc.h> #include <public/hvm/params.h> +#include <asm/p2m.h> + struct hvm_domain { uint64_t params[HVM_NR_PARAMS]; @@ -XXX,XX +XXX,XX @@ struct arch_vcpu_io { struct arch_vcpu { }; + struct arch_domain { struct hvm_domain hvm; + + struct p2m_domain p2m; + }; #include <xen/sched.h> diff --git a/xen/arch/riscv/include/asm/mm.h b/xen/arch/riscv/include/asm/mm.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/include/asm/mm.h +++ b/xen/arch/riscv/include/asm/mm.h @@ -XXX,XX +XXX,XX @@ extern struct page_info *frametable_virt_start; #define mfn_to_page(mfn) (frametable_virt_start + mfn_x(mfn)) #define page_to_mfn(pg) _mfn((pg) - frametable_virt_start) +/* Convert between machine addresses and page-info structures. */ +#define maddr_to_page(ma) mfn_to_page(maddr_to_mfn(ma)) +#define page_to_maddr(pg) (mfn_to_maddr(page_to_mfn(pg))) + static inline void *page_to_virt(const struct page_info *pg) { return mfn_to_virt(mfn_x(page_to_mfn(pg))); diff --git a/xen/arch/riscv/include/asm/p2m.h b/xen/arch/riscv/include/asm/p2m.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/include/asm/p2m.h +++ b/xen/arch/riscv/include/asm/p2m.h @@ -XXX,XX +XXX,XX @@ #define ASM__RISCV__P2M_H #include <xen/errno.h> +#include <xen/mem_access.h> +#include <xen/mm.h> +#include <xen/radix-tree.h> +#include <xen/rwlock.h> +#include <xen/types.h> #include <asm/page-bits.h> #define paddr_bits PADDR_BITS +/* Get host p2m table */ +#define p2m_get_hostp2m(d) (&(d)->arch.p2m) + +/* Per-p2m-table state */ +struct p2m_domain { + /* + * Lock that protects updates to the p2m. + */ + rwlock_t lock; + + /* Page containing root p2m table */ + struct page_info *root; + + /* Pages used to construct the p2m */ + struct page_list_head pages; + + /* Address Translation Table for the p2m */ + paddr_t hgatp; + + /* + * P2M updates may required TLBs to be flushed (invalidated). + * + * Flushes may be deferred by setting 'need_flush' and then flushing + * when the p2m write lock is released. + * + * If an immediate flush is required (e.g, if a super page is + * shattered), call p2m_tlb_flush_sync(). + */ + bool need_flush; + + /* Indicate if it is required to clean the cache when writing an entry */ + bool clean_pte; + + struct radix_tree_root p2m_type; + + /* + * Default P2M access type for each page in the the domain: new pages, + * swapped in pages, cleared pages, and pages that are ambiguously + * retyped get this access type. See definition of p2m_access_t. + */ + p2m_access_t default_access; + + /* Highest guest frame that's ever been mapped in the p2m */ + gfn_t max_mapped_gfn; + + /* + * Lowest mapped gfn in the p2m. When releasing mapped gfn's in a + * preemptible manner this is update to track recall where to + * resume the search. Apart from during teardown this can only + * decrease. + */ + gfn_t lowest_mapped_gfn; + + /* Back pointer to domain */ + struct domain *domain; +}; + /* * List of possible type for each page in the p2m entry. * The number of available bit per page in the pte for this purpose is 2 bits. @@ -XXX,XX +XXX,XX @@ static inline void p2m_altp2m_check(struct vcpu *v, uint16_t idx) /* Not supported on RISCV. */ } +int p2m_init(struct domain *d); + +static inline void p2m_write_lock(struct p2m_domain *p2m) +{ + write_lock(&p2m->lock); +} + +void p2m_write_unlock(struct p2m_domain *p2m); + +static inline int p2m_is_write_locked(struct p2m_domain *p2m) +{ + return rw_is_write_locked(&p2m->lock); +} + #endif /* ASM__RISCV__P2M_H */ /* diff --git a/xen/arch/riscv/p2m.c b/xen/arch/riscv/p2m.c new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/xen/arch/riscv/p2m.c @@ -XXX,XX +XXX,XX @@ +#include <xen/domain_page.h> +#include <xen/iommu.h> +#include <xen/lib.h> +#include <xen/mm.h> +#include <xen/pfn.h> +#include <xen/rwlock.h> +#include <xen/sched.h> +#include <xen/spinlock.h> + +#include <asm/page.h> +#include <asm/p2m.h> + +/* + * Force a synchronous P2M TLB flush. + * + * Must be called with the p2m lock held. + * + * TODO: add support of flushing TLB connected to VMID. + */ +static void p2m_force_tlb_flush_sync(struct p2m_domain *p2m) +{ + ASSERT(p2m_is_write_locked(p2m)); + + /* + * TODO: shouldn't be this flush done for each physical CPU? + * If yes, then SBI call sbi_remote_hfence_gvma() could + * be used for that. + */ +#if defined(__riscv_hh) || defined(__riscv_h) + asm volatile ( "hfence.gvma" ::: "memory" ); +#else + asm volatile ( ".insn r 0x73, 0x0, 0x31, x0, x0, x0" ::: "memory" ); +#endif + + p2m->need_flush = false; +} + +static void p2m_tlb_flush_sync(struct p2m_domain *p2m) +{ + if ( p2m->need_flush ) + p2m_force_tlb_flush_sync(p2m); +} + +/* Unlock the flush and do a P2M TLB flush if necessary */ +void p2m_write_unlock(struct p2m_domain *p2m) +{ + /* + * The final flush is done with the P2M write lock taken to avoid + * someone else modifying the P2M wbefore the TLB invalidation has + * completed. + */ + p2m_tlb_flush_sync(p2m); + + write_unlock(&p2m->lock); +} + +static void clear_and_clean_page(struct page_info *page) +{ + void *p = __map_domain_page(page); + + clear_page(p); + unmap_domain_page(p); +} + +static struct page_info *p2m_get_clean_page(struct domain *d) +{ + struct page_info *page; + + /* + * As mentioned in the Priviliged Architecture Spec (version 20240411) + * As explained in Section 18.5.1, for the paged virtual-memory schemes + * (Sv32x4, Sv39x4, Sv48x4, and Sv57x4), the root page table is 16 KiB + * and must be aligned to a 16-KiB boundary. + */ + page = alloc_domheap_pages(NULL, 2, 0); + if ( page == NULL ) + return NULL; + + clear_and_clean_page(page); + + return page; +} + +static struct page_info *p2m_allocate_root(struct domain *d) +{ + return p2m_get_clean_page(d); +} + +static unsigned long hgatp_from_page_info(struct page_info *page_info) +{ + unsigned long ppn; + unsigned long hgatp_mode; + + ppn = PFN_DOWN(page_to_maddr(page_info)) & HGATP_PPN; + + /* ASID (VMID) not supported yet */ + +#if RV_STAGE1_MODE == SATP_MODE_SV39 + hgatp_mode = HGATP_MODE_SV39X4; +#elif RV_STAGE1_MODE == SATP_MODE_SV48 + hgatp_mode = HGATP_MODE_SV48X4; +#else + #error "add HGATP_MODE" +#endif + + return ppn | (hgatp_mode << HGATP_MODE_SHIFT); +} + +static int p2m_alloc_table(struct domain *d) +{ + struct p2m_domain *p2m = p2m_get_hostp2m(d); + + p2m->root = p2m_allocate_root(d); + if ( !p2m->root ) + return -ENOMEM; + + p2m->hgatp = hgatp_from_page_info(p2m->root); + + /* + * Make sure that all TLBs corresponding to the new VMID are flushed + * before using it. + */ + p2m_write_lock(p2m); + p2m_force_tlb_flush_sync(p2m); + p2m_write_unlock(p2m); + + return 0; +} + +int p2m_init(struct domain *d) +{ + struct p2m_domain *p2m = p2m_get_hostp2m(d); + int rc; + + rwlock_init(&p2m->lock); + INIT_PAGE_LIST_HEAD(&p2m->pages); + + p2m->max_mapped_gfn = _gfn(0); + p2m->lowest_mapped_gfn = _gfn(ULONG_MAX); + + p2m->default_access = p2m_access_rwx; + + radix_tree_init(&p2m->p2m_type); + +#ifdef CONFIG_HAS_PASSTHROUGH + /* + * Some IOMMUs don't support coherent PT walk. When the p2m is + * shared with the CPU, Xen has to make sure that the PT changes have + * reached the memory + */ + p2m->clean_pte = is_iommu_enabled(d) && + !iommu_has_feature(d, IOMMU_FEAT_COHERENT_WALK); +#else + p2m->clean_pte = true; +#endif + + /* + * "Trivial" initialisation is now complete. Set the backpointer so + * p2m_teardown() and friends know to do something. + */ + p2m->domain = d; + + rc = p2m_alloc_table(d); + if ( rc ) + return rc; + + return 0; +} -- 2.49.0
Implement p2m_set_allocation() to construct p2m pages pool for guests based on required number of pages. This is implemented by: - Adding a `struct paging_domain` which contains a freelist, a counter variable and a spinlock to `struct arch_domain` to indicate the free p2m pages and the number of p2m total pages in the p2m pages pool. - Adding a helper `p2m_set_allocation` to set the p2m pages pool size. This helper should be called before allocating memory for a guest and is called from domain_p2m_set_allocation(), the latter is a part of common dom0less code. Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> --- xen/arch/riscv/include/asm/domain.h | 10 +++++ xen/arch/riscv/p2m.c | 67 +++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/xen/arch/riscv/include/asm/domain.h b/xen/arch/riscv/include/asm/domain.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/include/asm/domain.h +++ b/xen/arch/riscv/include/asm/domain.h @@ -XXX,XX +XXX,XX @@ #ifndef ASM__RISCV__DOMAIN_H #define ASM__RISCV__DOMAIN_H +#include <xen/mm.h> +#include <xen/spinlock.h> #include <xen/xmalloc.h> #include <public/hvm/params.h> @@ -XXX,XX +XXX,XX @@ struct arch_vcpu_io { struct arch_vcpu { }; +struct paging_domain { + spinlock_t lock; + /* Free P2M pages from the pre-allocated P2M pool */ + struct page_list_head p2m_freelist; + /* Number of pages from the pre-allocated P2M pool */ + unsigned long p2m_total_pages; +}; struct arch_domain { struct hvm_domain hvm; struct p2m_domain p2m; + struct paging_domain paging; }; #include <xen/sched.h> diff --git a/xen/arch/riscv/p2m.c b/xen/arch/riscv/p2m.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/p2m.c +++ b/xen/arch/riscv/p2m.c @@ -XXX,XX +XXX,XX @@ #include <xen/domain_page.h> +/* + * Because of general_preempt_check() from xen/sched.h which uses + * local_events_need_delivery() but latter is declared in <asm/event.h>. + * Thereby it is needed to icnlude <xen/event.h> here before xen/sched.h. + * + * Shouldn't be xen/event.h be included in <xen/sched.h>? + */ +#include <xen/event.h> #include <xen/iommu.h> #include <xen/lib.h> #include <xen/mm.h> @@ -XXX,XX +XXX,XX @@ int p2m_init(struct domain *d) int rc; rwlock_init(&p2m->lock); + spin_lock_init(&d->arch.paging.lock); INIT_PAGE_LIST_HEAD(&p2m->pages); + INIT_PAGE_LIST_HEAD(&d->arch.paging.p2m_freelist); p2m->max_mapped_gfn = _gfn(0); p2m->lowest_mapped_gfn = _gfn(ULONG_MAX); @@ -XXX,XX +XXX,XX @@ int p2m_init(struct domain *d) return 0; } + +/* + * Set the pool of pages to the required number of pages. + * Returns 0 for success, non-zero for failure. + * Call with d->arch.paging.lock held. + */ +int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted) +{ + struct page_info *pg; + + ASSERT(spin_is_locked(&d->arch.paging.lock)); + + for ( ; ; ) + { + if ( d->arch.paging.p2m_total_pages < pages ) + { + /* Need to allocate more memory from domheap */ + pg = alloc_domheap_page(d, MEMF_no_owner); + if ( pg == NULL ) + { + printk(XENLOG_ERR "Failed to allocate P2M pages.\n"); + return -ENOMEM; + } + ACCESS_ONCE(d->arch.paging.p2m_total_pages) = + d->arch.paging.p2m_total_pages + 1; + page_list_add_tail(pg, &d->arch.paging.p2m_freelist); + } + else if ( d->arch.paging.p2m_total_pages > pages ) + { + /* Need to return memory to domheap */ + pg = page_list_remove_head(&d->arch.paging.p2m_freelist); + if( pg ) + { + ACCESS_ONCE(d->arch.paging.p2m_total_pages) = + d->arch.paging.p2m_total_pages - 1; + free_domheap_page(pg); + } + else + { + printk(XENLOG_ERR + "Failed to free P2M pages, P2M freelist is empty.\n"); + return -ENOMEM; + } + } + else + break; + + /* Check to see if we need to yield and try again */ + if ( preempted && general_preempt_check() ) + { + *preempted = true; + return -ERESTART; + } + } + + return 0; +} -- 2.49.0
Refactor pte_t to be a union which hold page table entry plus pt_t and pt_walk_t structures to simpilfy p2m functions. Also, introduce some helpers which are using pt_walk_t. Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> --- xen/arch/riscv/include/asm/page.h | 54 ++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/xen/arch/riscv/include/asm/page.h b/xen/arch/riscv/include/asm/page.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/include/asm/page.h +++ b/xen/arch/riscv/include/asm/page.h @@ -XXX,XX +XXX,XX @@ #endif -/* Page Table entry */ typedef struct { + unsigned long v:1; + unsigned long r:1; + unsigned long w:1; + unsigned long x:1; + unsigned long u:1; + unsigned long g:1; + unsigned long a:1; + unsigned long d:1; + unsigned long rsw:2; +#if RV_STAGE1_MODE == SATP_MODE_SV39 + unsigned long ppn0:9; + unsigned long ppn1:9; + unsigned long ppn2:26; + unsigned long rsw2:7; + unsigned long pbmt:2; + unsigned long n:1; +#elif RV_STAGE1_MODE == SATP_MODE_SV48 + unsigned long ppn0:9; + unsigned long ppn1:9; + unsigned long ppn2:9; + unsigned long ppn3:17; + unsigned long rsw2:7; + unsigned long pbmt:2; + unsigned long n:1; +#else +#error "Add proper bits for SATP_MODE" +#endif +} pt_t; + +typedef struct { + unsigned long rsw:10; +#if RV_STAGE1_MODE == SATP_MODE_SV39 || RV_STAGE1_MODE == SATP_MODE_SV48 + unsigned long ppn: 44; +#else +#error "Add proper bits for SATP_MODE" +#endif + unsigned long rsw2:10; +} pt_walk_t; + +/* Page Table entry */ +typedef union { #ifdef CONFIG_RISCV_64 uint64_t pte; #else uint32_t pte; #endif + pt_t bits; + pt_walk_t walk; } pte_t; +static inline void pte_set_mfn(pte_t *pte, mfn_t mfn) +{ + pte->walk.ppn = mfn_x(mfn); +} + +static inline mfn_t pte_get_mfn(pte_t pte) +{ + return _mfn(pte.walk.ppn); +} + static inline pte_t paddr_to_pte(paddr_t paddr, unsigned int permissions) { -- 2.49.0
- Extended p2m_type_t with additional types: p2m_ram_ro, p2m_mmio_direct_dev, p2m_map_foreign_{rw,ro}, p2m_grant_map_{rw,ro}. - Added macros to classify memory types: P2M_RAM_TYPES, P2M_GRANT_TYPES, P2M_FOREIGN_TYPES. - Introduced helper predicates: p2m_is_ram(), p2m_is_foreign(), p2m_is_any_ram(). Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> --- xen/arch/riscv/include/asm/p2m.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/xen/arch/riscv/include/asm/p2m.h b/xen/arch/riscv/include/asm/p2m.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/include/asm/p2m.h +++ b/xen/arch/riscv/include/asm/p2m.h @@ -XXX,XX +XXX,XX @@ struct p2m_domain { typedef enum { p2m_invalid = 0, /* Nothing mapped here */ p2m_ram_rw, /* Normal read/write domain RAM */ + p2m_ram_ro, /* Read-only; writes are silently dropped */ + p2m_mmio_direct_dev,/* Read/write mapping of genuine Device MMIO area */ + p2m_map_foreign_rw, /* Read/write RAM pages from foreign domain */ + p2m_map_foreign_ro, /* Read-only RAM pages from foreign domain */ + p2m_grant_map_rw, /* Read/write grant mapping */ + p2m_grant_map_ro, /* Read-only grant mapping */ } p2m_type_t; +/* We use bitmaps and mask to handle groups of types */ +#define p2m_to_mask(t_) BIT(t_, UL) + +/* RAM types, which map to real machine frames */ +#define P2M_RAM_TYPES (p2m_to_mask(p2m_ram_rw) | \ + p2m_to_mask(p2m_ram_ro)) + +/* Grant mapping types, which map to a real frame in another VM */ +#define P2M_GRANT_TYPES (p2m_to_mask(p2m_grant_map_rw) | \ + p2m_to_mask(p2m_grant_map_ro)) + +/* Foreign mappings types */ +#define P2M_FOREIGN_TYPES (p2m_to_mask(p2m_map_foreign_rw) | \ + p2m_to_mask(p2m_map_foreign_ro)) + +/* Useful predicates */ +#define p2m_is_ram(t_) (p2m_to_mask(t_) & P2M_RAM_TYPES) +#define p2m_is_foreign(t_) (p2m_to_mask(t_) & P2M_FOREIGN_TYPES) +#define p2m_is_any_ram(t_) (p2m_to_mask(t_) & \ + (P2M_RAM_TYPES | P2M_GRANT_TYPES | \ + P2M_FOREIGN_TYPES)) + #include <xen/p2m-common.h> static inline int get_page_and_type(struct page_info *page, -- 2.49.0
These utilities are needed for building and managing RISC-V guest page tables and MMIO mappings by using functions map_regions_p2mt() and guest_physmap_add_entry(). To implement p2m mapping functionality the following is introduced: - Define P2M root level/order and entry count. - Introdude radix type for p2m types as it isn't enough free bits in pte and the helpers (p2m_type_radix_{get,set}()) to deal with them. - Introduce p2m_is_*() helpers() as pte_is_*() helpers are checking the valid bit set in the PTE but we have to check p2m_type instead (look at the comment above p2m_is_valid() for some details). - Introduce helper to set p2m's pte permission: p2m_set_permissions(). - Introduce helper to create p2m entry based on mfn, p2m_type_t and p2m_access_t. - Introduce helper to generate table entry with correct attributes: page_to_p2m_table(). - Introduce p2m page allocation function: p2m_alloc_page(). - Introduce functions to write/remove p2m's entries: p2m_{write,remove}_pte(). - Introduce function to allocate p2m table: p2m_create_table(). - Introduce functions used to free p2m entry. - Introduce function for table walking: p2m_next_level(). - Introduce function to insert an entry in the p2m (p2m_set_entry()). - Introduce superpage splitting: p2m_split_superpage()). - Introduce page table type defines (PGT_{none,writable_page}, etc). Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> --- xen/arch/riscv/include/asm/mm.h | 32 +- xen/arch/riscv/include/asm/p2m.h | 17 +- xen/arch/riscv/include/asm/page.h | 11 + xen/arch/riscv/p2m.c | 780 ++++++++++++++++++++++++++++++ 4 files changed, 829 insertions(+), 11 deletions(-) diff --git a/xen/arch/riscv/include/asm/mm.h b/xen/arch/riscv/include/asm/mm.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/include/asm/mm.h +++ b/xen/arch/riscv/include/asm/mm.h @@ -XXX,XX +XXX,XX @@ #include <xen/sections.h> #include <xen/types.h> +#include <asm/cmpxchg.h> #include <asm/page-bits.h> extern vaddr_t directmap_virt_start; @@ -XXX,XX +XXX,XX @@ static inline bool arch_mfns_in_directmap(unsigned long mfn, unsigned long nr) #define PGT_writable_page PG_mask(1, 1) /* has writable mappings? */ #define PGT_type_mask PG_mask(1, 1) /* Bits 31 or 63. */ -/* Count of uses of this frame as its current type. */ -#define PGT_count_width PG_shift(2) -#define PGT_count_mask ((1UL << PGT_count_width) - 1) + /* 9-bit count of uses of this frame as its current type. */ +#define PGT_count_mask PG_mask(0x3FF, 10) + +/* + * Sv32 has 22-bit GFN. Sv{39, 48, 57} have 44-bit GFN. + * Thereby we can use for `type_info` 10 bits for all modes, having the same + * amount of bits for `type_info` for all MMU modes let us avoid introducing + * an extra #ifdef to that header: + * if we go with maximum possible bits for count on each configuration + * we would need to have a set of PGT_count_* and PGT_gfn_*). + */ +#define PGT_gfn_width PG_shift(10) +#define PGT_gfn_mask (BIT(PGT_gfn_width, UL) - 1) + +#define PGT_INVALID_XENHEAP_GFN _gfn(PGT_gfn_mask) /* * Page needs to be scrubbed. Since this bit can only be set on a page that is @@ -XXX,XX +XXX,XX @@ static inline bool arch_mfns_in_directmap(unsigned long mfn, unsigned long nr) #define PFN_ORDER(pg) ((pg)->v.free.order) +static inline void page_set_xenheap_gfn(struct page_info *p, gfn_t gfn) +{ + gfn_t gfn_ = gfn_eq(gfn, INVALID_GFN) ? PGT_INVALID_XENHEAP_GFN : gfn; + unsigned long x, nx, y = p->u.inuse.type_info; + + ASSERT(is_xen_heap_page(p)); + + do { + x = y; + nx = (x & ~PGT_gfn_mask) | gfn_x(gfn_); + } while ( (y = cmpxchg(&p->u.inuse.type_info, x, nx)) != x ); +} + extern unsigned char cpu0_boot_stack[]; void setup_initial_pagetables(void); diff --git a/xen/arch/riscv/include/asm/p2m.h b/xen/arch/riscv/include/asm/p2m.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/include/asm/p2m.h +++ b/xen/arch/riscv/include/asm/p2m.h @@ -XXX,XX +XXX,XX @@ #include <xen/rwlock.h> #include <xen/types.h> +#include <asm/page.h> #include <asm/page-bits.h> +#define P2M_ROOT_LEVEL HYP_PT_ROOT_LEVEL +#define P2M_ROOT_ORDER XEN_PT_LEVEL_ORDER(P2M_ROOT_LEVEL) +#define P2M_ROOT_PAGES (1U << P2M_ROOT_ORDER) + #define paddr_bits PADDR_BITS /* Get host p2m table */ @@ -XXX,XX +XXX,XX @@ static inline int guest_physmap_mark_populate_on_demand(struct domain *d, return -EOPNOTSUPP; } -static inline int guest_physmap_add_entry(struct domain *d, - gfn_t gfn, mfn_t mfn, - unsigned long page_order, - p2m_type_t t) -{ - BUG_ON("unimplemented"); - return -EINVAL; -} +int guest_physmap_add_entry(struct domain *d, + gfn_t gfn, mfn_t mfn, + unsigned long page_order, + p2m_type_t t); /* Untyped version for RAM only, for compatibility */ static inline int __must_check diff --git a/xen/arch/riscv/include/asm/page.h b/xen/arch/riscv/include/asm/page.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/include/asm/page.h +++ b/xen/arch/riscv/include/asm/page.h @@ -XXX,XX +XXX,XX @@ #define XEN_PT_LEVEL_SIZE(lvl) (_AT(paddr_t, 1) << XEN_PT_LEVEL_SHIFT(lvl)) #define XEN_PT_LEVEL_MAP_MASK(lvl) (~(XEN_PT_LEVEL_SIZE(lvl) - 1)) #define XEN_PT_LEVEL_MASK(lvl) (VPN_MASK << XEN_PT_LEVEL_SHIFT(lvl)) +#define XEN_PT_ENTRIES (_AT(unsigned int, 1) << PAGETABLE_ORDER) /* * PTE format: @@ -XXX,XX +XXX,XX @@ #define PTE_PMBT_NOCACHE BIT(61, UL) #define PTE_PMBT_IO BIT(62, UL) +enum pmbt_type_t { + pbmt_pma, + pbmt_nc, + pbmt_io, + pbmt_rsvd, + pbmt_max, +}; + #define PTE_ACCESS_MASK (PTE_READABLE | PTE_WRITABLE | PTE_EXECUTABLE) #define PTE_PBMT_MASK (PTE_PMBT_NOCACHE | PTE_PMBT_IO) +#define P2M_CLEAR_PERM(p2m_pte) ((p2m_pte).pte & ~PTE_ACCESS_MASK) + /* Calculate the offsets into the pagetables for a given VA */ #define pt_linear_offset(lvl, va) ((va) >> XEN_PT_LEVEL_SHIFT(lvl)) diff --git a/xen/arch/riscv/p2m.c b/xen/arch/riscv/p2m.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/p2m.c +++ b/xen/arch/riscv/p2m.c @@ -XXX,XX +XXX,XX @@ static int p2m_alloc_table(struct domain *d) return 0; } +static p2m_type_t p2m_type_radix_get(struct p2m_domain *p2m, pte_t pte) +{ + void *ptr; + + ptr = radix_tree_lookup(&p2m->p2m_type, pte.pte); + + if ( !ptr ) + return p2m_invalid; + + return radix_tree_ptr_to_int(ptr); +} + +static int p2m_type_radix_set(struct p2m_domain *p2m, pte_t pte, p2m_type_t t) +{ + int rc; + + rc = radix_tree_insert(&p2m->p2m_type, pte.pte, + radix_tree_int_to_ptr(t)); + if ( rc == -EEXIST ) + { + /* If a setting already exists, change it to the new one */ + radix_tree_replace_slot( + radix_tree_lookup_slot( + &p2m->p2m_type, pte.pte), + radix_tree_int_to_ptr(t)); + rc = 0; + } + + return rc; +} + int p2m_init(struct domain *d) { struct p2m_domain *p2m = p2m_get_hostp2m(d); @@ -XXX,XX +XXX,XX @@ int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted) return 0; } + +/* + * Find and map the root page table. The caller is responsible for + * unmapping the table. + * + * The function will return NULL if the offset of the root table is + * invalid. + */ +static pte_t *p2m_get_root_pointer(struct p2m_domain *p2m, gfn_t gfn) +{ + unsigned long root_table; + + root_table = gfn_x(gfn) >> XEN_PT_LEVEL_ORDER(P2M_ROOT_LEVEL); + if ( root_table >= P2M_ROOT_PAGES ) + return NULL; + + return __map_domain_page(p2m->root + root_table); +} + +/* + * In the case of the P2M, the valid bit is used for other purpose. Use + * the type to check whether an entry is valid. + */ +static inline bool p2m_is_valid(struct p2m_domain *p2m, pte_t pte) +{ + return p2m_type_radix_get(p2m, pte) != p2m_invalid; +} + +/* + * pte_is_* helpers are checking the valid bit set in the + * PTE but we have to check p2m_type instead (look at the comment above + * p2m_is_valid()) + * Provide our own overlay to check the valid bit. + */ +static inline bool p2m_is_mapping(struct p2m_domain *p2m, pte_t pte) +{ + return p2m_is_valid(p2m, pte) && (pte.pte & PTE_ACCESS_MASK); +} + +static inline bool p2m_is_superpage(struct p2m_domain *p2m, pte_t pte, + unsigned int level) +{ + return p2m_is_valid(p2m, pte) && (pte.pte & PTE_ACCESS_MASK) && + (level > 0); +} + +static void p2m_set_permission(pte_t *e, p2m_type_t t, p2m_access_t a) +{ + /* First apply type permissions */ + switch ( t ) + { + case p2m_ram_rw: + e->bits.r = 1; + e->bits.w = 1; + e->bits.x = 1; + + break; + + case p2m_mmio_direct_dev: + e->bits.r = 1; + e->bits.w = 1; + e->bits.x = 0; + break; + + case p2m_invalid: + e->bits.r = 0; + e->bits.w = 0; + e->bits.x = 0; + break; + + default: + BUG(); + break; + } + + /* Then restrict with access permissions */ + switch ( a ) + { + case p2m_access_rwx: + break; + case p2m_access_wx: + e->bits.r = 0; + break; + case p2m_access_rw: + e->bits.x = 0; + break; + case p2m_access_w: + e->bits.r = 0; + e->bits.x = 0; + break; + case p2m_access_rx: + case p2m_access_rx2rw: + e->bits.w = 0; + break; + case p2m_access_x: + e->bits.r = 0; + e->bits.w = 0; + break; + case p2m_access_r: + e->bits.w = 0; + e->bits.x = 0; + break; + case p2m_access_n: + case p2m_access_n2rwx: + e->bits.r = 0; + e->bits.w = 0; + e->bits.x = 0; + break; + default: + BUG(); + break; + } +} + +static pte_t p2m_entry_from_mfn(struct p2m_domain *p2m, mfn_t mfn, p2m_type_t t, p2m_access_t a) +{ + pte_t e = (pte_t) { + .bits.v = 1, + }; + + switch ( t ) + { + case p2m_mmio_direct_dev: + e.bits.pbmt = pbmt_io; + break; + + default: + e.bits.pbmt = pbmt_pma; + break; + } + + p2m_set_permission(&e, t, a); + + ASSERT(!(mfn_to_maddr(mfn) & ~PADDR_MASK)); + + pte_set_mfn(&e, mfn); + + BUG_ON(p2m_type_radix_set(p2m, e, t)); + + return e; +} + +/* Generate table entry with correct attributes. */ +static pte_t page_to_p2m_table(struct p2m_domain *p2m, struct page_info *page) +{ + /* + * Since this function generates a table entry, according to "Encoding + * of PTE R/W/X fields," the entry's r, w, and x fields must be set to 0 + * to point to the next level of the page table. + * Therefore, to ensure that an entry is a page table entry, + * `p2m_access_n2rwx` is passed to `mfn_to_p2m_entry()` as the access value, + * which overrides whatever was passed as `p2m_type_t` and guarantees that + * the entry is a page table entry by setting r = w = x = 0. + */ + return p2m_entry_from_mfn(p2m, page_to_mfn(page), p2m_ram_rw, p2m_access_n2rwx); +} + +static struct page_info *p2m_alloc_page(struct domain *d) +{ + struct page_info *pg; + + /* + * For hardware domain, there should be no limit in the number of pages that + * can be allocated, so that the kernel may take advantage of the extended + * regions. Hence, allocate p2m pages for hardware domains from heap. + */ + if ( is_hardware_domain(d) ) + { + pg = alloc_domheap_page(d, MEMF_no_owner); + if ( pg == NULL ) + printk(XENLOG_G_ERR "Failed to allocate P2M pages for hwdom.\n"); + } + else + { + spin_lock(&d->arch.paging.lock); + pg = page_list_remove_head(&d->arch.paging.p2m_freelist); + spin_unlock(&d->arch.paging.lock); + } + + return pg; +} + +static inline void p2m_write_pte(pte_t *p, pte_t pte, bool clean_pte) +{ + write_pte(p, pte); + if ( clean_pte ) + clean_dcache_va_range(p, sizeof(*p)); +} + +static inline void p2m_remove_pte(pte_t *p, bool clean_pte) +{ + pte_t pte; + + memset(&pte, 0x00, sizeof(pte)); + p2m_write_pte(p, pte, clean_pte); +} + +/* Allocate a new page table page and hook it in via the given entry. */ +static int p2m_create_table(struct p2m_domain *p2m, pte_t *entry) +{ + struct page_info *page; + pte_t *p; + + ASSERT(!p2m_is_valid(p2m, *entry)); + + page = p2m_alloc_page(p2m->domain); + if ( page == NULL ) + return -ENOMEM; + + page_list_add(page, &p2m->pages); + + p = __map_domain_page(page); + clear_page(p); + + unmap_domain_page(p); + + p2m_write_pte(entry, page_to_p2m_table(p2m, page), p2m->clean_pte); + + return 0; +} + +#define GUEST_TABLE_MAP_NONE 0 +#define GUEST_TABLE_MAP_NOMEM 1 +#define GUEST_TABLE_SUPER_PAGE 2 +#define GUEST_TABLE_NORMAL 3 + +/* + * Take the currently mapped table, find the corresponding GFN entry, + * and map the next table, if available. The previous table will be + * unmapped if the next level was mapped (e.g GUEST_TABLE_NORMAL + * returned). + * + * `alloc_tbl` parameter indicates whether intermediate tables should + * be allocated when not present. + * + * Return values: + * GUEST_TABLE_MAP_NONE: a table allocation isn't permitted. + * GUEST_TABLE_MAP_NOMEM: allocating a new page failed. + * GUEST_TABLE_SUPER_PAGE: next level or leaf mapped normally. + * GUEST_TABLE_NORMAL: The next entry points to a superpage. + */ +static int p2m_next_level(struct p2m_domain *p2m, bool alloc_tbl, + unsigned int level, pte_t **table, + unsigned int offset) +{ + pte_t *entry; + int ret; + mfn_t mfn; + + entry = *table + offset; + + if ( !p2m_is_valid(p2m, *entry) ) + { + if ( !alloc_tbl ) + return GUEST_TABLE_MAP_NONE; + + ret = p2m_create_table(p2m, entry); + if ( ret ) + return GUEST_TABLE_MAP_NOMEM; + } + + /* The function p2m_next_level() is never called at the last level */ + ASSERT(level != 0); + if ( p2m_is_mapping(p2m, *entry) ) + return GUEST_TABLE_SUPER_PAGE; + + mfn = mfn_from_pte(*entry); + + unmap_domain_page(*table); + *table = map_domain_page(mfn); + + return GUEST_TABLE_NORMAL; +} + +static bool p2m_split_superpage(struct p2m_domain *p2m, pte_t *entry, + unsigned int level, unsigned int target, + const unsigned int *offsets) +{ + struct page_info *page; + unsigned int i; + pte_t pte, *table; + bool rv = true; + + /* Convenience aliases */ + mfn_t mfn = pte_get_mfn(*entry); + unsigned int next_level = level - 1; + unsigned int level_order = XEN_PT_LEVEL_ORDER(next_level); + + /* + * This should only be called with target != level and the entry is + * a superpage. + */ + ASSERT(level > target); + ASSERT(p2m_is_superpage(p2m, *entry, level)); + + page = p2m_alloc_page(p2m->domain); + if ( !page ) + return false; + + page_list_add(page, &p2m->pages); + table = __map_domain_page(page); + + /* + * We are either splitting a first level 1G page into 512 second level + * 2M pages, or a second level 2M page into 512 third level 4K pages. + */ + for ( i = 0; i < XEN_PT_ENTRIES; i++ ) + { + pte_t *new_entry = table + i; + + /* + * Use the content of the superpage entry and override + * the necessary fields. So the correct permission are kept. + */ + pte = *entry; + pte_set_mfn(&pte, mfn_add(mfn, i << level_order)); + + write_pte(new_entry, pte); + } + + /* + * Shatter superpage in the page to the level we want to make the + * changes. + * This is done outside the loop to avoid checking the offset to + * know whether the entry should be shattered for every entry. + */ + if ( next_level != target ) + rv = p2m_split_superpage(p2m, table + offsets[next_level], + level - 1, target, offsets); + + /* TODO: why it is necessary to have clean here? Not somewhere in the caller */ + if ( p2m->clean_pte ) + clean_dcache_va_range(table, PAGE_SIZE); + + unmap_domain_page(table); + + /* + * Even if we failed, we should install the newly allocated PTE + * entry. The caller will be in charge to free the sub-tree. + */ + p2m_write_pte(entry, page_to_p2m_table(p2m, page), p2m->clean_pte); + + return rv; +} + +static void p2m_put_foreign_page(struct page_info *pg) +{ + /* + * It's safe to do the put_page here because page_alloc will + * flush the TLBs if the page is reallocated before the end of + * this loop. + */ + put_page(pg); +} + +/* Put any references on the single 4K page referenced by mfn. */ +static void p2m_put_4k_page(mfn_t mfn, p2m_type_t type) +{ + /* TODO: Handle other p2m types */ + if ( p2m_is_foreign(type) ) + { + ASSERT(mfn_valid(mfn)); + p2m_put_foreign_page(mfn_to_page(mfn)); + } + /* Detect the xenheap page and mark the stored GFN as invalid. */ + else if ( p2m_is_ram(type) && is_xen_heap_mfn(mfn) ) + page_set_xenheap_gfn(mfn_to_page(mfn), INVALID_GFN); +} + +/* Put any references on the superpage referenced by mfn. */ +static void p2m_put_2m_superpage(mfn_t mfn, p2m_type_t type) +{ + struct page_info *pg; + unsigned int i; + + /* + * TODO: Handle other p2m types, but be aware that any changes to handle + * different types should require an update on the relinquish code to handle + * preemption. + */ + if ( !p2m_is_foreign(type) ) + return; + + ASSERT(mfn_valid(mfn)); + + pg = mfn_to_page(mfn); + + for ( i = 0; i < XEN_PT_ENTRIES; i++, pg++ ) + p2m_put_foreign_page(pg); +} + +/* Put any references on the page referenced by pte. */ +static void p2m_put_page(struct p2m_domain *p2m, const pte_t pte, + unsigned int level) +{ + mfn_t mfn = pte_get_mfn(pte); + p2m_type_t p2m_type = p2m_type_radix_get(p2m, pte); + + ASSERT(p2m_is_valid(p2m, pte)); + + /* + * TODO: Currently we don't handle level 2 super-page, Xen is not + * preemptible and therefore some work is needed to handle such + * superpages, for which at some point Xen might end up freeing memory + * and therefore for such a big mapping it could end up in a very long + * operation. + */ + if ( level == 1 ) + return p2m_put_2m_superpage(mfn, p2m_type); + else if ( level == 0 ) + return p2m_put_4k_page(mfn, p2m_type); +} + +static void p2m_free_page(struct domain *d, struct page_info *pg) +{ + if ( is_hardware_domain(d) ) + free_domheap_page(pg); + else + { + spin_lock(&d->arch.paging.lock); + page_list_add_tail(pg, &d->arch.paging.p2m_freelist); + spin_unlock(&d->arch.paging.lock); + } +} + +/* Free pte sub-tree behind an entry */ +static void p2m_free_entry(struct p2m_domain *p2m, + pte_t entry, unsigned int level) +{ + unsigned int i; + pte_t *table; + mfn_t mfn; + struct page_info *pg; + + /* Nothing to do if the entry is invalid. */ + if ( !p2m_is_valid(p2m, entry) ) + return; + + if ( p2m_is_superpage(p2m, entry, level) || (level == 0) ) + { +#ifdef CONFIG_IOREQ_SERVER + /* + * If this gets called then either the entry was replaced by an entry + * with a different base (valid case) or the shattering of a superpage + * has failed (error case). + * So, at worst, the spurious mapcache invalidation might be sent. + */ + if ( p2m_is_ram( p2m_type_radix_get(p2m, entry)) && + domain_has_ioreq_server(p2m->domain) ) + ioreq_request_mapcache_invalidate(p2m->domain); +#endif + + p2m_put_page(p2m, entry, level); + + return; + } + + table = map_domain_page(pte_get_mfn(entry)); + for ( i = 0; i < XEN_PT_ENTRIES; i++ ) + p2m_free_entry(p2m, *(table + i), level - 1); + + unmap_domain_page(table); + + /* + * Make sure all the references in the TLB have been removed before + * freing the intermediate page table. + * XXX: Should we defer the free of the page table to avoid the + * flush? + */ + p2m_tlb_flush_sync(p2m); + + mfn = pte_get_mfn(entry); + ASSERT(mfn_valid(mfn)); + + pg = mfn_to_page(mfn); + + page_list_del(pg, &p2m->pages); + p2m_free_page(p2m->domain, pg); +} + +/* + * Insert an entry in the p2m. This should be called with a mapping + * equal to a page/superpage. + */ +static int __p2m_set_entry(struct p2m_domain *p2m, + gfn_t sgfn, + unsigned int page_order, + mfn_t smfn, + p2m_type_t t, + p2m_access_t a) +{ + unsigned int level; + unsigned int target = page_order / PAGETABLE_ORDER; + pte_t *entry, *table, orig_pte; + int rc; + /* A mapping is removed if the MFN is invalid. */ + bool removing_mapping = mfn_eq(smfn, INVALID_MFN); + DECLARE_OFFSETS(offsets, gfn_to_gaddr(sgfn)); + + ASSERT(p2m_is_write_locked(p2m)); + + /* + * Check if the level target is valid: we only support + * 4K - 2M - 1G mapping. + */ + ASSERT(target <= 2); + + table = p2m_get_root_pointer(p2m, sgfn); + if ( !table ) + return -EINVAL; + + for ( level = P2M_ROOT_LEVEL; level > target; level-- ) + { + /* + * Don't try to allocate intermediate page table if the mapping + * is about to be removed. + */ + rc = p2m_next_level(p2m, !removing_mapping, + level, &table, offsets[level]); + if ( (rc == GUEST_TABLE_MAP_NONE) || (rc == GUEST_TABLE_MAP_NOMEM) ) + { + /* + * We are here because p2m_next_level has failed to map + * the intermediate page table (e.g the table does not exist + * and they p2m tree is read-only). It is a valid case + * when removing a mapping as it may not exist in the + * page table. In this case, just ignore it. + */ + rc = removing_mapping ? 0 : -ENOENT; + goto out; + } + else if ( rc != GUEST_TABLE_NORMAL ) + break; + } + + entry = table + offsets[level]; + + /* + * If we are here with level > target, we must be at a leaf node, + * and we need to break up the superpage. + */ + if ( level > target ) + { + /* We need to split the original page. */ + pte_t split_pte = *entry; + + ASSERT(p2m_is_superpage(p2m, *entry, level)); + + if ( !p2m_split_superpage(p2m, &split_pte, level, target, offsets) ) + { + /* Free the allocated sub-tree */ + p2m_free_entry(p2m, split_pte, level); + + rc = -ENOMEM; + goto out; + } + + /* Follow the break-before-sequence to update the entry. */ + p2m_remove_pte(entry, p2m->clean_pte); + p2m_force_tlb_flush_sync(p2m); + + p2m_write_pte(entry, split_pte, p2m->clean_pte); + + /* Then move to the level we want to make real changes */ + for ( ; level < target; level++ ) + { + rc = p2m_next_level(p2m, true, level, &table, offsets[level]); + + /* + * The entry should be found and either be a table + * or a superpage if level 0 is not targeted + */ + ASSERT(rc == GUEST_TABLE_NORMAL || + (rc == GUEST_TABLE_SUPER_PAGE && target > 0)); + } + + entry = table + offsets[level]; + } + + /* + * We should always be there with the correct level because + * all the intermediate tables have been installed if necessary. + */ + ASSERT(level == target); + + orig_pte = *entry; + + /* + * The access type should always be p2m_access_rwx when the mapping + * is removed. + */ + ASSERT(!mfn_eq(INVALID_MFN, smfn) || (a == p2m_access_rwx)); + + /* + * Always remove the entry in order to follow the break-before-make + * sequence when updating the translation table. + */ + if ( pte_is_valid(orig_pte) || removing_mapping ) + p2m_remove_pte(entry, p2m->clean_pte); + + if ( removing_mapping ) + /* Flush can be deferred if the entry is removed */ + p2m->need_flush |= !!pte_is_valid(orig_pte); + else + { + pte_t pte = p2m_entry_from_mfn(p2m, smfn, t, a); + + /* + * It is necessary to flush the TLB before writing the new entry + * to keep coherency when the previous entry was valid. + * + * Although, it could be defered when only the permissions are + * changed (e.g in case of memaccess). + */ + if ( pte_is_valid(orig_pte) ) + { + if ( P2M_CLEAR_PERM(pte) != P2M_CLEAR_PERM(orig_pte) ) + p2m_force_tlb_flush_sync(p2m); + else + p2m->need_flush = true; + } + + p2m_write_pte(entry, pte, p2m->clean_pte); + + p2m->max_mapped_gfn = gfn_max(p2m->max_mapped_gfn, + gfn_add(sgfn, (1UL << page_order) - 1)); + p2m->lowest_mapped_gfn = gfn_min(p2m->lowest_mapped_gfn, sgfn); + } + +#ifdef CONFIG_HAS_PASSTHROUGH + if ( is_iommu_enabled(p2m->domain) && + (pte_is_valid(orig_pte) || pte_is_valid(*entry)) ) + { + unsigned int flush_flags = 0; + + if ( pte_is_valid(orig_pte) ) + flush_flags |= IOMMU_FLUSHF_modified; + if ( pte_is_valid(*entry) ) + flush_flags |= IOMMU_FLUSHF_added; + + rc = iommu_iotlb_flush(p2m->domain, _dfn(gfn_x(sgfn)), + 1UL << page_order, flush_flags); + } + else +#endif + rc = 0; + + /* + * Free the entry only if the original pte was valid and the base + * is different (to avoid freeing when permission is changed). + */ + if ( p2m_is_valid(p2m, orig_pte) && + !mfn_eq(pte_get_mfn(*entry), pte_get_mfn(orig_pte)) ) + p2m_free_entry(p2m, orig_pte, level); + +out: + unmap_domain_page(table); + + return rc; +} + +int p2m_set_entry(struct p2m_domain *p2m, + gfn_t sgfn, + unsigned long nr, + mfn_t smfn, + p2m_type_t t, + p2m_access_t a) +{ + int rc = 0; + + /* + * Any reference taken by the P2M mappings (e.g. foreign mapping) will + * be dropped in relinquish_p2m_mapping(). As the P2M will still + * be accessible after, we need to prevent mapping to be added when the + * domain is dying. + */ + if ( unlikely(p2m->domain->is_dying) ) + return -ENOMEM; + + while ( nr ) + { + unsigned long mask; + unsigned long order = 0; + /* 1gb, 2mb, 4k mappings are supported */ + unsigned int i = ( P2M_ROOT_LEVEL > 2 ) ? 2 : P2M_ROOT_LEVEL; + + /* + * Don't take into account the MFN when removing mapping (i.e + * MFN_INVALID) to calculate the correct target order. + * + * XXX: Support superpage mappings if nr is not aligned to a + * superpage size. + */ + mask = !mfn_eq(smfn, INVALID_MFN) ? mfn_x(smfn) : 0; + mask |= gfn_x(sgfn) | nr; + + for ( ; i != 0; i-- ) + { + if ( !(mask & (BIT(XEN_PT_LEVEL_ORDER(i), UL) - 1)) ) + { + order = XEN_PT_LEVEL_ORDER(i); + break; + } + } + + rc = __p2m_set_entry(p2m, sgfn, order, smfn, t, a); + if ( rc ) + break; + + sgfn = gfn_add(sgfn, (1 << order)); + if ( !mfn_eq(smfn, INVALID_MFN) ) + smfn = mfn_add(smfn, (1 << order)); + + nr -= (1 << order); + } + + return rc; +} + +static int p2m_insert_mapping(struct domain *d, gfn_t start_gfn, + unsigned long nr, mfn_t mfn, p2m_type_t t) +{ + struct p2m_domain *p2m = p2m_get_hostp2m(d); + int rc; + + p2m_write_lock(p2m); + rc = p2m_set_entry(p2m, start_gfn, nr, mfn, t, p2m->default_access); + p2m_write_unlock(p2m); + + return rc; +} + +int map_regions_p2mt(struct domain *d, + gfn_t gfn, + unsigned long nr, + mfn_t mfn, + p2m_type_t p2mt) +{ + return p2m_insert_mapping(d, gfn, nr, mfn, p2mt); +} + +int guest_physmap_add_entry(struct domain *d, + gfn_t gfn, + mfn_t mfn, + unsigned long page_order, + p2m_type_t t) +{ + return p2m_insert_mapping(d, gfn, (1 << page_order), mfn, t); +} -- 2.49.0
In this patch series are introduced necessary functions to build and manage RISC-V guest page tables and MMIO/RAM mappings. CI tests: https://gitlab.com/xen-project/people/olkur/xen/-/pipelines/2247120521 --- Changes in V9: - Addressed comments for v8. --- Changes in V8: - All patches (except last three ones) are merged to staging. - Addressed comments for v7. --- Changes in V7: - Merged to staging: - xen/riscv: avoid redundant HGATP*_MODE_SHIFT and HGATP*_VMID_SHIFT - Introduce new patch: - xen/riscv: update p2m_set_entry to free unused metadata page (could be merged with previous one: xen/riscv: introduce metadata table to store P2M type ) - Addressed comments for v6. --- Changes in V6: - Addressed coment for v5. --- Changes in V5: - Addressed comments for v4. --- Changes in V4: - Merged to staging: - xen/riscv: introduce sbi_remote_hfence_gvma() - xen/riscv: introduce sbi_remote_hfence_gvma_vmid() - Drop "xen/riscv: introduce page_{get,set}_xenheap_gfn()" as grant tables aren't going to be introduced for the moment. Also, drops other parts connected to grant tables support. - All other changes are patch specific. --- Changes in V3: - Introduce metadata table to store P2M types. - Use x86's way to allocate VMID. - Abstract Arm-specific p2m type name for device MMIO mappings. - All other updates please look at specific patch. --- Changes in V2: - Merged to staging: - [PATCH v1 1/6] xen/riscv: add inclusion of xen/bitops.h to asm/cmpxchg.h - New patches: - xen/riscv: implement sbi_remote_hfence_gvma{_vmid}(). - Split patch "xen/riscv: implement p2m mapping functionality" into smaller one patches: - xen/riscv: introduce page_set_xenheap_gfn() - xen/riscv: implement guest_physmap_add_entry() for mapping GFNs to MFNs - xen/riscv: implement p2m_set_entry() and __p2m_set_entry() - xen/riscv: Implement p2m_free_entry() and related helpers - xen/riscv: Implement superpage splitting for p2m mappings - xen/riscv: implement p2m_next_level() - xen/riscv: Implement p2m_entry_from_mfn() and support PBMT configuration - Move root p2m table allocation to separate patch: xen/riscv: add root page table allocation - Drop dependency of this patch series from the patch witn an introduction of SvPBMT as it was merged. - Patch "[PATCH v1 4/6] xen/riscv: define pt_t and pt_walk_t structures" was renamed to xen/riscv: introduce pte_{set,get}_mfn() as after dropping of bitfields for PTE structure, this patch introduce only pte_{set,get}_mfn(). - Rename "xen/riscv: define pt_t and pt_walk_t structures" to "xen/riscv: introduce pte_{set,get}_mfn()" as pt_t and pt_walk_t were dropped. - Introduce guest domain's VMID allocation and manegement. - Add patches necessary to implement p2m lookup: - xen/riscv: implement mfn_valid() and page reference, ownership handling helpers - xen/riscv: add support of page lookup by GFN - Re-sort patch series. - All other changes are patch-specific. Please check them. --- Oleksii Kurochko (3): xen/riscv: add support of page lookup by GFN xen/riscv: introduce metadata table to store P2M type xen/riscv: update p2m_set_entry() to free unused metadata pages xen/arch/riscv/include/asm/flushtlb.h | 2 +- xen/arch/riscv/include/asm/mm.h | 21 ++ xen/arch/riscv/include/asm/p2m.h | 21 ++ xen/arch/riscv/mm.c | 13 + xen/arch/riscv/p2m.c | 435 ++++++++++++++++++++++++-- 5 files changed, 462 insertions(+), 30 deletions(-) -- 2.52.0
Introduce helper functions for safely querying the P2M (physical-to-machine) mapping: - add p2m_read_lock(), p2m_read_unlock(), and p2m_is_locked() for managing P2M lock state. - Implement p2m_get_entry() to retrieve mapping details for a given GFN, including MFN, page order, and validity. - Introduce p2m_get_page_from_gfn() to convert a GFN into a page_info pointer, acquiring a reference to the page if valid. - Introduce get_page(). Implementations are based on Arm's functions with some minor modifications: - p2m_get_entry(): - Reverse traversal of page tables, as RISC-V uses the opposite level numbering compared to Arm. - Removed the return of p2m_access_t from p2m_get_entry() since mem_access_settings is not introduced for RISC-V. - Updated BUILD_BUG_ON() to check using the level 0 mask, which corresponds to Arm's THIRD_MASK. - Replaced open-coded bit shifts with the BIT() macro. Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> --- Changes in V9: - Update check_outside_boundary() to return (P2M_MAX_ROOT_LEVEL + 1) in the case if gfn is inside range. --- Changes in V8: - Drop the local variable masked_gfn inside check_outside_boundary() and fold the is_lower conditionals into the for loop. - Initialize the local variable level in p2m_get_entry() to the root level and drop the explicit assignment when root page table wasn't found, as it now defaults to the root level. - Introduce gfn_limit_bits and use it to calculate the maximum GFN for the MMU second stage, and return the appropriate page_order when the GFN exceeds this limit. --- Changes in V7: - Refactor check_outside_boundary(). - Reword the comment above p2m_get_entry(). - As at the moment p2m_get_entry() doesn't pass `t` as NULL we could drop "if ( t )" checks inside it to not have dead code now. - Add the check inside p2m_get_entry() that requested gfn is correct. - Add "if ( t )" check inside p2m_get_page_from_gfn() as it is going to be some callers with t = NULL. --- Changes in V6: - Move if-condition with initialization up in p2m_get_page_from_gfn(). - Pass p2mt to the call of p2m_get_entry() inside p2m_get_page_from_gfn() to avoid an issue when 't' is passed NULL. With p2mt passed to p2m_get_entry() we will recieve a proper type and so the rest of the function will able to continue use a proper type. - In check_outside_boundary() in the case when is_lower == true fill the bottom bits of masked_gfn with all 1s. - Update code of check_outside_boundary() to return proper level in the case when `level` is equal to 0. - Add ASSERT(p2m) in check_outside_boundary() to be sure that p2m isn't NULL as P2M_LEVEL_MASK() depends on p2m value. --- Changes in V5: - Use introduced in earlier patches P2M_DECLARE_OFFSETS() instead of DECLARE_OFFSETS(). - Drop blank line before check_outside_boundary(). - Use more readable version of if statements inside check_outside_boundary(). - Accumulate mask in check_outside_boundary() instead of re-writing it for each page table level to have correct gfns for comparison. - Set argument `t` of p2m_get_entry() to p2m_invalid by default. - Drop checking of (rc == P2M_TABLE_MAP_NOMEM ) when p2m_next_level(...,false,...) is called. - Add ASSERT(mfn & (BIT(P2M_LEVEL_ORDER(level), UL) - 1)); in p2m_get_entry() to be sure that recieved `mfn` has cleared lowest bits. - Drop `valid` argument from p2m_get_entry(), it is not needed anymore. - Drop p2m_lookup(), use p2m_get_entry() explicitly inside p2m_get_page_from_gfn(). - Update the commit message. --- Changes in V4: - Update prototype of p2m_is_locked() to return bool and accept pointer-to-const. - Correct the comment above p2m_get_entry(). - Drop the check "BUILD_BUG_ON(XEN_PT_LEVEL_MAP_MASK(0) != PAGE_MASK);" inside p2m_get_entry() as it is stale and it was needed to sure that 4k page(s) are used on L3 (in Arm terms) what is true for RISC-V. (if not special extension are used). It was another reason for Arm to have it (and I copied it to RISC-V), but it isn't true for RISC-V. (some details could be found in response to the patch). - Style fixes. - Add explanatory comment what the loop inside "gfn is higher then the highest p2m mapping" does. Move this loop to separate function check_outside_boundary() to cover both boundaries (lower_mapped_gfn and max_mapped_gfn). - There is not need to allocate a page table as it is expected that p2m_get_entry() normally would be called after a corresponding p2m_set_entry() was called. So change 'true' to 'false' in a page table walking loop inside p2m_get_entry(). - Correct handling of p2m_is_foreign case inside p2m_get_page_from_gfn(). - Introduce and use P2M_LEVEL_MASK instead of XEN_PT_LEVEL_MASK as it isn't take into account two extra bits for root table in case of P2M. - Drop stale item from "change in v3" - Add is_p2m_foreign() macro and connected stuff. - Add p2m_read_(un)lock(). --- Changes in V3: - Change struct domain *d argument of p2m_get_page_from_gfn() to struct p2m_domain. - Update the comment above p2m_get_entry(). - s/_t/p2mt for local variable in p2m_get_entry(). - Drop local variable addr in p2m_get_entry() and use gfn_to_gaddr(gfn) to define offsets array. - Code style fixes. - Update a check of rc code from p2m_next_level() in p2m_get_entry() and drop "else" case. - Do not call p2m_get_type() if p2m_get_entry()'s t argument is NULL. - Use struct p2m_domain instead of struct domain for p2m_lookup() and p2m_get_page_from_gfn(). - Move defintion of get_page() from "xen/riscv: implement mfn_valid() and page reference, ownership handling helpers" --- Changes in V2: - New patch. --- xen/arch/riscv/include/asm/p2m.h | 21 ++++ xen/arch/riscv/mm.c | 13 +++ xen/arch/riscv/p2m.c | 185 +++++++++++++++++++++++++++++++ 3 files changed, 219 insertions(+) diff --git a/xen/arch/riscv/include/asm/p2m.h b/xen/arch/riscv/include/asm/p2m.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/include/asm/p2m.h +++ b/xen/arch/riscv/include/asm/p2m.h @@ -XXX,XX +XXX,XX @@ #define P2M_GFN_LEVEL_SHIFT(lvl) (P2M_LEVEL_ORDER(lvl) + PAGE_SHIFT) +#define P2M_LEVEL_MASK(p2m, lvl) \ + (P2M_TABLE_OFFSET(p2m, lvl) << P2M_GFN_LEVEL_SHIFT(lvl)) + #define paddr_bits PADDR_BITS /* Get host p2m table */ @@ -XXX,XX +XXX,XX @@ static inline bool p2m_is_write_locked(struct p2m_domain *p2m) unsigned long construct_hgatp(const struct p2m_domain *p2m, uint16_t vmid); +static inline void p2m_read_lock(struct p2m_domain *p2m) +{ + read_lock(&p2m->lock); +} + +static inline void p2m_read_unlock(struct p2m_domain *p2m) +{ + read_unlock(&p2m->lock); +} + +static inline bool p2m_is_locked(const struct p2m_domain *p2m) +{ + return rw_is_locked(&p2m->lock); +} + +struct page_info *p2m_get_page_from_gfn(struct p2m_domain *p2m, gfn_t gfn, + p2m_type_t *t); + #endif /* ASM__RISCV__P2M_H */ /* diff --git a/xen/arch/riscv/mm.c b/xen/arch/riscv/mm.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/mm.c +++ b/xen/arch/riscv/mm.c @@ -XXX,XX +XXX,XX @@ struct domain *page_get_owner_and_reference(struct page_info *page) return owner; } + +bool get_page(struct page_info *page, const struct domain *domain) +{ + const struct domain *owner = page_get_owner_and_reference(page); + + if ( likely(owner == domain) ) + return true; + + if ( owner != NULL ) + put_page(page); + + return false; +} diff --git a/xen/arch/riscv/p2m.c b/xen/arch/riscv/p2m.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/p2m.c +++ b/xen/arch/riscv/p2m.c @@ -XXX,XX +XXX,XX @@ int map_regions_p2mt(struct domain *d, return rc; } + +/* + * p2m_get_entry() should always return the correct order value, even if an + * entry is not present (i.e. the GFN is outside the range): + * [p2m->lowest_mapped_gfn, p2m->max_mapped_gfn] (1) + * + * This ensures that callers of p2m_get_entry() can determine what range of + * address space would be altered by a corresponding p2m_set_entry(). + * Also, it would help to avoid costly page walks for GFNs outside range (1). + * + * Therefore, this function returns true for GFNs outside range (1), and in + * that case the corresponding level is returned via the level_out argument. + * Otherwise, it returns false and p2m_get_entry() performs a page walk to + * find the proper entry. + */ +static bool check_outside_boundary(const struct p2m_domain *p2m, gfn_t gfn, + gfn_t boundary, bool is_lower, + unsigned int *level_out) +{ + unsigned int level = P2M_MAX_ROOT_LEVEL + 1; + bool ret = false; + + ASSERT(p2m); + + if ( is_lower ? gfn_x(gfn) < gfn_x(boundary) + : gfn_x(gfn) > gfn_x(boundary) ) + { + for ( level = P2M_ROOT_LEVEL(p2m) ; level; level-- ) + { + unsigned long mask = BIT(P2M_GFN_LEVEL_SHIFT(level), UL) - 1; + + if ( is_lower ? (gfn_x(gfn) | mask) < gfn_x(boundary) + : (gfn_x(gfn) & ~mask) > gfn_x(boundary) ) + break; + } + + ret = true; + } + + if ( level_out ) + *level_out = level; + + return ret; +} + +/* + * Get the details of a given gfn. + * + * If the entry is present, the associated MFN, the p2m type of the mapping, + * and the page order of the mapping in the page table (i.e., it could be a + * superpage) will be returned. + * + * If the entry is not present, INVALID_MFN will be returned, page_order will + * be set according to the order of the invalid range, and the type will be + * p2m_invalid. + */ +static mfn_t p2m_get_entry(struct p2m_domain *p2m, gfn_t gfn, + p2m_type_t *t, + unsigned int *page_order) +{ + unsigned int level = P2M_ROOT_LEVEL(p2m); + unsigned int gfn_limit_bits = + P2M_LEVEL_ORDER(level + 1) + P2M_ROOT_EXTRA_BITS(p2m, level); + pte_t entry, *table; + int rc; + mfn_t mfn = INVALID_MFN; + + P2M_BUILD_LEVEL_OFFSETS(p2m, offsets, gfn_to_gaddr(gfn)); + + ASSERT(p2m_is_locked(p2m)); + + *t = p2m_invalid; + + if ( gfn_x(gfn) > (BIT(gfn_limit_bits, UL) - 1) ) + { + if ( page_order ) + *page_order = gfn_limit_bits; + + return mfn; + } + + if ( check_outside_boundary(p2m, gfn, p2m->lowest_mapped_gfn, true, + &level) ) + goto out; + + if ( check_outside_boundary(p2m, gfn, p2m->max_mapped_gfn, false, &level) ) + goto out; + + table = p2m_get_root_pointer(p2m, gfn); + + /* + * The table should always be non-NULL because the gfn is below + * p2m->max_mapped_gfn and the root table pages are always present. + */ + if ( !table ) + { + ASSERT_UNREACHABLE(); + goto out; + } + + for ( level = P2M_ROOT_LEVEL(p2m); level; level-- ) + { + rc = p2m_next_level(p2m, false, level, &table, offsets[level]); + if ( rc == P2M_TABLE_MAP_NONE ) + goto out_unmap; + + if ( rc != P2M_TABLE_NORMAL ) + break; + } + + entry = table[offsets[level]]; + + if ( pte_is_valid(entry) ) + { + *t = p2m_get_type(entry); + + mfn = pte_get_mfn(entry); + + ASSERT(!(mfn_x(mfn) & (BIT(P2M_LEVEL_ORDER(level), UL) - 1))); + + /* + * The entry may point to a superpage. Find the MFN associated + * to the GFN. + */ + mfn = mfn_add(mfn, + gfn_x(gfn) & (BIT(P2M_LEVEL_ORDER(level), UL) - 1)); + } + + out_unmap: + unmap_domain_page(table); + + out: + if ( page_order ) + *page_order = P2M_LEVEL_ORDER(level); + + return mfn; +} + +struct page_info *p2m_get_page_from_gfn(struct p2m_domain *p2m, gfn_t gfn, + p2m_type_t *t) +{ + struct page_info *page; + p2m_type_t p2mt; + mfn_t mfn; + + p2m_read_lock(p2m); + mfn = p2m_get_entry(p2m, gfn, &p2mt, NULL); + + if ( t ) + *t = p2mt; + + if ( !mfn_valid(mfn) ) + { + p2m_read_unlock(p2m); + return NULL; + } + + page = mfn_to_page(mfn); + + /* + * get_page won't work on foreign mapping because the page doesn't + * belong to the current domain. + */ + if ( unlikely(p2m_is_foreign(p2mt)) ) + { + const struct domain *fdom = page_get_owner_and_reference(page); + + p2m_read_unlock(p2m); + + if ( fdom ) + { + if ( likely(fdom != p2m->domain) ) + return page; + + ASSERT_UNREACHABLE(); + put_page(page); + } + + return NULL; + } + + p2m_read_unlock(p2m); + + return get_page(page, p2m->domain) ? page : NULL; +} -- 2.52.0
RISC-V's PTE has only two available bits that can be used to store the P2M type. This is insufficient to represent all the current RISC-V P2M types. Therefore, some P2M types must be stored outside the PTE bits. To address this, a metadata table is introduced to store P2M types that cannot fit in the PTE itself. Not all P2M types are stored in the metadata table—only those that require it. The metadata table is linked to the intermediate page table via the `struct page_info`'s v.md.metadata field of the corresponding intermediate page. Such pages are allocated with MEMF_no_owner, which allows us to use the v field for the purpose of storing the metadata table. To simplify the allocation and linking of intermediate and metadata page tables, `p2m_{alloc,free}_table()` functions are implemented. These changes impact `p2m_split_superpage()`, since when a superpage is split, it is necessary to update the metadata table of the new intermediate page table — if the entry being split has its P2M type set to `p2m_ext_storage` in its `P2M_TYPES` bits. In addition to updating the metadata of the new intermediate page table, the corresponding entry in the metadata for the original superpage is invalidated. Also, update p2m_{get,set}_type to work with P2M types which don't fit into PTE bits. Suggested-by: Jan Beulich <jbeulich@suse.com> Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> --- Changes in V9: - Fold ASSERT(ctx->p2m) to the previous one ASSERT() in p2m_set_type(). --- Changes in V8: - Update the comment above p2m_set_type(). - Drop BUG_ON(ctx->level ...) and "if ( ctx->level <= P2M_MAX_SUPPORTED_LEVEL_MAPPING )" as p2m_set_type() doesn't care about ctx->level and it is expected that passed `pte` is valid, and so ctx->level is expected to be valid too. - Rename p2m_pte_ctx argument to ctx for p2m_pte_from_mfn() and p2m_free_subtree(). - Initialize local variable p2m_pte_ctx inside p2m_split_superpage() with an initializer. Drop an assigment of p2m_pte_ctx->level when old pte's type is got. - Use initializer for tmp_ctx and drop an assignment of tmp_ctx.p2m inside p2m_set_type(). - Drop brackets around p2m_free_subtree() call inside p2m_set_entry(). --- Changes in V7: - Put p2m_domain * inside struct p2m_pte_ctx and update an APIs of p2m_set_type(), p2m_pte_from_mfn(). Also, move ASSERT(p2m) closer to p2m_alloc_page(ctx->p2m) inside p2m_set_type(). Update all callers of p2m_set_type() and p2m_pte_from_mfn(). - Update the comment above BUILD_BUG_ON(p2m_invalid): drop unnessary sentenses and make it shorter then 80 chars. - Drop the comment and BUILD_BUG_ON() in p2m_get_type() as it is enough to have it in p2m_set_type(). - Update the comment above p2m_set_type() about p2m argument which was droppped. - Make ctx argument of p2m_set_type() const to be able to re-use p2m_pte_ctx across multiple iterations without fully reinitializing. - Declare "struct p2m_pte_ctx tmp_ctx;" as function scope variable and rework p2m_set_entry() correspondingly. --- Changes in V6: - Introduce new type md_t to use it instead of pte_t to store metadata types outside PTE bits. - Integrate introduced struct md_t. - Drop local variable "struct domain *d" inside p2m_set_type(). - Drop __func__ printting and use %pv. - Code style fixes - Drop unnessarry check inside if-condition in p2m_pte_from_mfn() as we have ASSERT(p2m) inside p2m_set_type() anyway. - Return back the commnent inside page_to_p2m_table() as it was deleted accidently. - move the initialization of p2m_pte_ctx.pt_page and p2m_pte_ctx.level ahead of the loop - Add BUILD_BUG_ON(p2m_invalid) before the call of p2m_alloc_page() in p2m_set_type() and in p2m_get_type() before " if ( type == p2m_ext_storage )". - Set to NULL tbl_pg->v.md.pg in p2m_free_table(). - Make argument 't' of p2m_set_type() non-const as we are going to change it. - Add some explanatory comments. - Update ASSERT at the start of p2m_set_type() to verify that passed ctx->index is lesser then 512 and drop calculation of an index of root page as it is guaranteed by calc_offset() and get_root_pointer() that we will aready get proper page and proper index inside this page. --- Changes in V5: - Rename metadata member of stuct md inside struct page_info to pg. - Stray blank in the declaration of p2m_alloc_table(). - Use "<" instead of "<=" in ASSERT() in p2m_set_type(). - Move the check that ctx is provided to an earlier point in p2m_set_type(). - Set `md_pg` after ASSERT() in p2m_set_type(). - Add BUG_ON() insetead of ASSERT_UNREACHABLE() in p2m_set_type(). - Drop a check that metadata isn't NULL before unmap_domain_page() is being called. - Make const `md` variable in p2m_get_type(). - unmap correct domain's page in p2m_get_type: use `md` instead of ctx->pt_page->v.md.pg. - Add description of how p2m and p2m_pte_ctx is expected to be used in p2m_pte_from_mfn() and drop a comment from page_to_p2m_table(). - Drop the stale part of the comment above p2m_alloc_table(). - Drop ASSERT(tbl_pg->v.md.pg) from p2m_free_table() as tbl_pg->v.md.pg is created conditionally now. - Drop an introduction of p2m_alloc_table(), update p2m_alloc_page() correspondengly and use it instead. - Add missing blank in definition of level member for tmp_ctx variable in p2m_free_subtree(). Also, add the comma at the end. - Initialize old_type once before for-loop in p2m_split_superpage() as old type will be used for all newly created PTEs. - Properly initialize p2m_pte_ctx.level with next_level instead of level when p2m_set_type() is going to be called for new PTEs. - Fix identations. - Move ASSERT(p2m) on top of p2m_set_type() to be sure that NULL isn't passed for p2m argument of p2m_set_type(). - s/virt_to_page(table)/mfn_to_page(domain_page_map_to_mfn(table)) to recieve correct page for a table which is mapped by domain_page_map(). - Add "return;" after domain_crash() in p2m_set_type() to avoid potential NULL pointer dereference of md_pg. --- Changes in V4: - Add Suggested-by: Jan Beulich <jbeulich@suse.com>. - Update the comment above declation of md structure inside struct page_info to: "Page is used as an intermediate P2M page table". - Allocate metadata table on demand to save some memory. (1) - Rework p2m_set_type(): - Add allocatation of metadata page only if needed. - Move a check what kind of type we are handling inside p2m_set_type(). - Move mapping of metadata page inside p2m_get_type() as it is needed only in case if PTE's type is equal to p2m_ext_storage. - Add some description to p2m_get_type() function. - Drop blank after return type of p2m_alloc_table(). - Drop allocation of metadata page inside p2m_alloc_table becaues of (1). - Fix p2m_free_table() to free metadata page only if it was allocated. --- Changes in V3: - Add is_p2m_foreign() macro and connected stuff. - Change struct domain *d argument of p2m_get_page_from_gfn() to struct p2m_domain. - Update the comment above p2m_get_entry(). - s/_t/p2mt for local variable in p2m_get_entry(). - Drop local variable addr in p2m_get_entry() and use gfn_to_gaddr(gfn) to define offsets array. - Code style fixes. - Update a check of rc code from p2m_next_level() in p2m_get_entry() and drop "else" case. - Do not call p2m_get_type() if p2m_get_entry()'s t argument is NULL. - Use struct p2m_domain instead of struct domain for p2m_lookup() and p2m_get_page_from_gfn(). - Move defintion of get_page() from "xen/riscv: implement mfn_valid() and page reference, ownership handling helpers" --- Changes in V2: - New patch. --- xen/arch/riscv/include/asm/mm.h | 9 ++ xen/arch/riscv/p2m.c | 234 ++++++++++++++++++++++++++++---- 2 files changed, 213 insertions(+), 30 deletions(-) diff --git a/xen/arch/riscv/include/asm/mm.h b/xen/arch/riscv/include/asm/mm.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/include/asm/mm.h +++ b/xen/arch/riscv/include/asm/mm.h @@ -XXX,XX +XXX,XX @@ struct page_info /* Order-size of the free chunk this page is the head of. */ unsigned int order; } free; + + /* Page is used as an intermediate P2M page table */ + struct { + /* + * Pointer to a page which store metadata for an intermediate page + * table. + */ + struct page_info *pg; + } md; } v; union { diff --git a/xen/arch/riscv/p2m.c b/xen/arch/riscv/p2m.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/p2m.c +++ b/xen/arch/riscv/p2m.c @@ -XXX,XX +XXX,XX @@ */ #define P2M_MAX_SUPPORTED_LEVEL_MAPPING _AC(2, U) +struct md_t { + /* + * Describes a type stored outside PTE bits. + * Look at the comment above definition of enum p2m_type_t. + */ + p2m_type_t type : 4; +}; + +/* + * P2M PTE context is used only when a PTE's P2M type is p2m_ext_storage. + * In this case, the P2M type is stored separately in the metadata page. + */ +struct p2m_pte_ctx { + struct p2m_domain *p2m; + struct page_info *pt_page; /* Page table page containing the PTE. */ + unsigned int index; /* Index of the PTE within that page. */ + unsigned int level; /* Paging level at which the PTE resides. */ +}; + static struct gstage_mode_desc __ro_after_init max_gstage_mode = { .mode = HGATP_MODE_OFF, .paging_levels = 0, @@ -XXX,XX +XXX,XX @@ unsigned char get_max_supported_mode(void) return max_gstage_mode.mode; } +/* + * If anything is changed here, it may also require updates to + * p2m_{get,set}_type(). + */ static inline unsigned int calc_offset(const struct p2m_domain *p2m, const unsigned int lvl, const paddr_t gpa) @@ -XXX,XX +XXX,XX @@ static inline unsigned int calc_offset(const struct p2m_domain *p2m, * The caller is responsible for unmapping the page after use. * * Returns NULL if the calculated offset into the root table is invalid. + * + * If anything is changed here, it may also require updates to + * p2m_{get,set}_type(). */ static pte_t *p2m_get_root_pointer(struct p2m_domain *p2m, gfn_t gfn) { @@ -XXX,XX +XXX,XX @@ static struct page_info *p2m_alloc_page(struct p2m_domain *p2m) return pg; } -static int p2m_set_type(pte_t *pte, p2m_type_t t) +/* + * `pte` – PTE entry for which the type `t` will be stored. + * + * If `t` >= p2m_first_external, a valid `ctx` must be provided. + */ +static void p2m_set_type(pte_t *pte, p2m_type_t t, + const struct p2m_pte_ctx *ctx) { - int rc = 0; + struct page_info **md_pg; + struct md_t *metadata = NULL; - if ( t > p2m_first_external ) - panic("unimplemeted\n"); - else - pte->pte |= MASK_INSR(t, P2M_TYPE_PTE_BITS_MASK); + /* + * It is sufficient to compare ctx->index with PAGETABLE_ENTRIES because, + * even for the p2m root page table (which is a 16 KB page allocated as + * four 4 KB pages), calc_offset() guarantees that the page-table index + * will always fall within the range [0, 511]. + */ + ASSERT(ctx && ctx->index < PAGETABLE_ENTRIES && ctx->p2m); - return rc; + /* + * At the moment, p2m_get_root_pointer() returns one of four possible p2m + * root pages, so there is no need to search for the correct ->pt_page + * here. + * Non-root page tables are 4 KB pages, so simply using ->pt_page is + * sufficient. + */ + md_pg = &ctx->pt_page->v.md.pg; + + if ( !*md_pg && (t >= p2m_first_external) ) + { + /* + * Since p2m_alloc_page() initializes an allocated page with + * zeros, p2m_invalid is expected to have the value 0 as well. + */ + BUILD_BUG_ON(p2m_invalid); + + *md_pg = p2m_alloc_page(ctx->p2m); + if ( !*md_pg ) + { + printk("%pd: can't allocate metadata page\n", + ctx->p2m->domain); + domain_crash(ctx->p2m->domain); + + return; + } + } + + if ( *md_pg ) + metadata = __map_domain_page(*md_pg); + + if ( t >= p2m_first_external ) + { + metadata[ctx->index].type = t; + + t = p2m_ext_storage; + } + else if ( metadata ) + metadata[ctx->index].type = p2m_invalid; + + pte->pte |= MASK_INSR(t, P2M_TYPE_PTE_BITS_MASK); + + unmap_domain_page(metadata); } -static p2m_type_t p2m_get_type(const pte_t pte) +/* + * `pte` -> PTE entry that stores the PTE's type. + * + * If the PTE's type is `p2m_ext_storage`, `ctx` should be provided; + * otherwise it could be NULL. + */ +static p2m_type_t p2m_get_type(const pte_t pte, const struct p2m_pte_ctx *ctx) { p2m_type_t type = MASK_EXTR(pte.pte, P2M_TYPE_PTE_BITS_MASK); if ( type == p2m_ext_storage ) - panic("unimplemented\n"); + { + const struct md_t *md = __map_domain_page(ctx->pt_page->v.md.pg); + + type = md[ctx->index].type; + + /* + * Since p2m_set_type() guarantees that the type will be greater than + * p2m_first_external, just check that we received a valid type here. + */ + ASSERT(type > p2m_first_external); + + unmap_domain_page(md); + } return type; } @@ -XXX,XX +XXX,XX @@ static void p2m_set_permission(pte_t *e, p2m_type_t t) } } -static pte_t p2m_pte_from_mfn(mfn_t mfn, p2m_type_t t, bool is_table) +/* + * If p2m_pte_from_mfn() is called with ctx = NULL, + * it means the function is working with a page table for which the `t` + * should not be applicable. Otherwise, the function is handling a leaf PTE + * for which `t` is applicable. + */ +static pte_t p2m_pte_from_mfn(mfn_t mfn, p2m_type_t t, + struct p2m_pte_ctx *ctx) { pte_t e = (pte_t) { PTE_VALID }; @@ -XXX,XX +XXX,XX @@ static pte_t p2m_pte_from_mfn(mfn_t mfn, p2m_type_t t, bool is_table) ASSERT(!(mfn_to_maddr(mfn) & ~PADDR_MASK) || mfn_eq(mfn, INVALID_MFN)); - if ( !is_table ) + if ( ctx ) { switch ( t ) { @@ -XXX,XX +XXX,XX @@ static pte_t p2m_pte_from_mfn(mfn_t mfn, p2m_type_t t, bool is_table) } p2m_set_permission(&e, t); - p2m_set_type(&e, t); + p2m_set_type(&e, t, ctx); } else /* @@ -XXX,XX +XXX,XX @@ static pte_t page_to_p2m_table(const struct page_info *page) * set to true and p2m_type_t shouldn't be applied for PTEs which * describe an intermediate table. */ - return p2m_pte_from_mfn(page_to_mfn(page), p2m_invalid, true); + return p2m_pte_from_mfn(page_to_mfn(page), p2m_invalid, NULL); +} + +static void p2m_free_page(struct p2m_domain *p2m, struct page_info *pg); + +/* + * Free page table's page and metadata page linked to page table's page. + */ +static void p2m_free_table(struct p2m_domain *p2m, struct page_info *tbl_pg) +{ + if ( tbl_pg->v.md.pg ) + { + p2m_free_page(p2m, tbl_pg->v.md.pg); + tbl_pg->v.md.pg = NULL; + } + p2m_free_page(p2m, tbl_pg); } /* Allocate a new page table page and hook it in via the given entry. */ @@ -XXX,XX +XXX,XX @@ static void p2m_free_page(struct p2m_domain *p2m, struct page_info *pg) /* Free pte sub-tree behind an entry */ static void p2m_free_subtree(struct p2m_domain *p2m, - pte_t entry, unsigned int level) + pte_t entry, + const struct p2m_pte_ctx *ctx) { unsigned int i; pte_t *table; mfn_t mfn; struct page_info *pg; + unsigned int level = ctx->level; /* * Check if the level is valid: only 4K - 2M - 1G mappings are supported. @@ -XXX,XX +XXX,XX @@ static void p2m_free_subtree(struct p2m_domain *p2m, if ( pte_is_mapping(entry) ) { - p2m_type_t p2mt = p2m_get_type(entry); + p2m_type_t p2mt = p2m_get_type(entry, ctx); #ifdef CONFIG_IOREQ_SERVER /* @@ -XXX,XX +XXX,XX @@ static void p2m_free_subtree(struct p2m_domain *p2m, return; } - table = map_domain_page(pte_get_mfn(entry)); + mfn = pte_get_mfn(entry); + ASSERT(mfn_valid(mfn)); + table = map_domain_page(mfn); + pg = mfn_to_page(mfn); for ( i = 0; i < P2M_PAGETABLE_ENTRIES(p2m, level); i++ ) - p2m_free_subtree(p2m, table[i], level - 1); + { + struct p2m_pte_ctx tmp_ctx = { + .pt_page = pg, + .index = i, + .level = level - 1, + .p2m = p2m, + }; + + p2m_free_subtree(p2m, table[i], &tmp_ctx); + } unmap_domain_page(table); @@ -XXX,XX +XXX,XX @@ static void p2m_free_subtree(struct p2m_domain *p2m, */ p2m_tlb_flush_sync(p2m); - mfn = pte_get_mfn(entry); - ASSERT(mfn_valid(mfn)); - - pg = mfn_to_page(mfn); - - p2m_free_page(p2m, pg); + p2m_free_table(p2m, pg); } static bool p2m_split_superpage(struct p2m_domain *p2m, pte_t *entry, unsigned int level, unsigned int target, - const unsigned int *offsets) + const unsigned int *offsets, + struct page_info *tbl_pg) { struct page_info *page; unsigned long i; @@ -XXX,XX +XXX,XX @@ static bool p2m_split_superpage(struct p2m_domain *p2m, pte_t *entry, unsigned int next_level = level - 1; unsigned int level_order = P2M_LEVEL_ORDER(next_level); + struct p2m_pte_ctx p2m_pte_ctx = { + .p2m = p2m, + .level = level, + }; + + /* Init with p2m_invalid just to make compiler happy. */ + p2m_type_t old_type = p2m_invalid; + /* * This should only be called with target != level and the entry is * a superpage. @@ -XXX,XX +XXX,XX @@ static bool p2m_split_superpage(struct p2m_domain *p2m, pte_t *entry, table = __map_domain_page(page); + if ( MASK_EXTR(entry->pte, P2M_TYPE_PTE_BITS_MASK) == p2m_ext_storage ) + { + p2m_pte_ctx.pt_page = tbl_pg; + p2m_pte_ctx.index = offsets[level]; + + old_type = p2m_get_type(*entry, &p2m_pte_ctx); + } + + p2m_pte_ctx.pt_page = page; + p2m_pte_ctx.level = next_level; + for ( i = 0; i < P2M_PAGETABLE_ENTRIES(p2m, next_level); i++ ) { pte_t *new_entry = table + i; @@ -XXX,XX +XXX,XX @@ static bool p2m_split_superpage(struct p2m_domain *p2m, pte_t *entry, pte = *entry; pte_set_mfn(&pte, mfn_add(mfn, i << level_order)); + if ( MASK_EXTR(pte.pte, P2M_TYPE_PTE_BITS_MASK) == p2m_ext_storage ) + { + p2m_pte_ctx.index = i; + + p2m_set_type(&pte, old_type, &p2m_pte_ctx); + } + write_pte(new_entry, pte); } @@ -XXX,XX +XXX,XX @@ static bool p2m_split_superpage(struct p2m_domain *p2m, pte_t *entry, */ if ( next_level != target ) rv = p2m_split_superpage(p2m, table + offsets[next_level], - next_level, target, offsets); + next_level, target, offsets, page); if ( p2m->clean_dcache ) clean_dcache_va_range(table, PAGE_SIZE); @@ -XXX,XX +XXX,XX @@ static int p2m_set_entry(struct p2m_domain *p2m, * are still allowed. */ bool removing_mapping = mfn_eq(mfn, INVALID_MFN); + struct p2m_pte_ctx tmp_ctx = { + .p2m = p2m, + }; P2M_BUILD_LEVEL_OFFSETS(p2m, offsets, gfn_to_gaddr(gfn)); ASSERT(p2m_is_write_locked(p2m)); @@ -XXX,XX +XXX,XX @@ static int p2m_set_entry(struct p2m_domain *p2m, { /* We need to split the original page. */ pte_t split_pte = *entry; + struct page_info *tbl_pg = mfn_to_page(domain_page_map_to_mfn(table)); ASSERT(pte_is_superpage(*entry, level)); - if ( !p2m_split_superpage(p2m, &split_pte, level, target, offsets) ) + if ( !p2m_split_superpage(p2m, &split_pte, level, target, offsets, + tbl_pg) ) { + tmp_ctx.pt_page = tbl_pg; + tmp_ctx.index = offsets[level]; + tmp_ctx.level = level; + /* Free the allocated sub-tree */ - p2m_free_subtree(p2m, split_pte, level); + p2m_free_subtree(p2m, split_pte, &tmp_ctx); rc = -ENOMEM; goto out; @@ -XXX,XX +XXX,XX @@ static int p2m_set_entry(struct p2m_domain *p2m, entry = table + offsets[level]; } + tmp_ctx.pt_page = mfn_to_page(domain_page_map_to_mfn(table)); + tmp_ctx.index = offsets[level]; + tmp_ctx.level = level; + /* * We should always be there with the correct level because all the * intermediate tables have been installed if necessary. @@ -XXX,XX +XXX,XX @@ static int p2m_set_entry(struct p2m_domain *p2m, p2m_clean_pte(entry, p2m->clean_dcache); else { - pte_t pte = p2m_pte_from_mfn(mfn, t, false); + pte_t pte = p2m_pte_from_mfn(mfn, t, &tmp_ctx); p2m_write_pte(entry, pte, p2m->clean_dcache); @@ -XXX,XX +XXX,XX @@ static int p2m_set_entry(struct p2m_domain *p2m, if ( pte_is_valid(orig_pte) && (!pte_is_valid(*entry) || !mfn_eq(pte_get_mfn(*entry), pte_get_mfn(orig_pte))) ) - p2m_free_subtree(p2m, orig_pte, level); + p2m_free_subtree(p2m, orig_pte, &tmp_ctx); out: unmap_domain_page(table); @@ -XXX,XX +XXX,XX @@ static mfn_t p2m_get_entry(struct p2m_domain *p2m, gfn_t gfn, if ( pte_is_valid(entry) ) { - *t = p2m_get_type(entry); + struct p2m_pte_ctx p2m_pte_ctx = { + .pt_page = mfn_to_page(domain_page_map_to_mfn(table)), + .index = offsets[level], + .level = level, + .p2m = p2m, + }; + + *t = p2m_get_type(entry, &p2m_pte_ctx); mfn = pte_get_mfn(entry); -- 2.52.0
Introduce tracking of metadata page entries usage and if all of them are p2m_invalid then free them. Intermediate P2M page tables are allocated with MEMF_no_owner, so we are free to repurpose struct page_info fields for them. Since page_info.u.* is not used for such pages, introduce a used_entries counter in struct page_info to track how many metadata entries are in use for a given intermediate P2M page table. The counter is updated in p2m_set_type() when metadata entries transition between p2m_invalid and a valid external type. When the last metadata entry is cleared (used_entries == 0), the associated metadata page is freed and returned to the P2M pool. Refactor metadata page freeing into a new helper, p2m_free_metadata_page(), as the same logic is needed both when tearing down a P2M table and when all metadata entries become p2m_invalid in p2m_set_type(). As part of this refactoring, move the declaration of p2m_free_page() earlier to satisfy the new helper. Additionally, implement page_set_tlbflush_timestamp() for RISC-V instead of BUGing, as it is invoked when returning memory to the domheap. Suggested-by: Jan Beulich <jbeulich@suse.com> Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> Acked-by: Jan Beulich <jbeulich@suse.com> --- Changes in v5: - Nothing changed. Only rebase. --- Changes in v4: - Move implementation of alloc_domain_struct() and free_domain_struct() ahead of alloc_vcpu_struct(). --- Changes in v3: - Move alloc_domain_struct() and free_domain_struct() to not have forward declaration. - Add Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>. --- Changes in v2: - New patch. --- xen/arch/riscv/include/asm/flushtlb.h | 2 +- xen/arch/riscv/include/asm/mm.h | 12 ++++++++++ xen/arch/riscv/p2m.c | 32 +++++++++++++++++++++------ 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/xen/arch/riscv/include/asm/flushtlb.h b/xen/arch/riscv/include/asm/flushtlb.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/include/asm/flushtlb.h +++ b/xen/arch/riscv/include/asm/flushtlb.h @@ -XXX,XX +XXX,XX @@ static inline void tlbflush_filter(cpumask_t *mask, uint32_t page_timestamp) {} static inline void page_set_tlbflush_timestamp(struct page_info *page) { - BUG_ON("unimplemented"); + page->tlbflush_timestamp = tlbflush_current_time(); } static inline void arch_flush_tlb_mask(const cpumask_t *mask) diff --git a/xen/arch/riscv/include/asm/mm.h b/xen/arch/riscv/include/asm/mm.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/include/asm/mm.h +++ b/xen/arch/riscv/include/asm/mm.h @@ -XXX,XX +XXX,XX @@ struct page_info unsigned long type_info; } inuse; + /* Page is used as an intermediate P2M page table: count_info == 0 */ + struct { + /* + * Tracks the number of used entries in the metadata page table. + * + * If used_entries == 0, then `page_info.v.md.pg` can be freed and + * returned to the P2M pool. + */ + unsigned long used_entries; + } md; + + /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */ union { struct { diff --git a/xen/arch/riscv/p2m.c b/xen/arch/riscv/p2m.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/riscv/p2m.c +++ b/xen/arch/riscv/p2m.c @@ -XXX,XX +XXX,XX @@ static struct gstage_mode_desc __ro_after_init max_gstage_mode = { .name = "Bare", }; +static void p2m_free_page(struct p2m_domain *p2m, struct page_info *pg); + +static inline void p2m_free_metadata_page(struct p2m_domain *p2m, + struct page_info **md_pg) +{ + if ( *md_pg ) + { + p2m_free_page(p2m, *md_pg); + *md_pg = NULL; + } +} + unsigned char get_max_supported_mode(void) { return max_gstage_mode.mode; @@ -XXX,XX +XXX,XX @@ static void p2m_set_type(pte_t *pte, p2m_type_t t, if ( t >= p2m_first_external ) { + if ( metadata[ctx->index].type == p2m_invalid ) + ctx->pt_page->u.md.used_entries++; + metadata[ctx->index].type = t; t = p2m_ext_storage; } else if ( metadata ) + { + if ( metadata[ctx->index].type != p2m_invalid ) + ctx->pt_page->u.md.used_entries--; + metadata[ctx->index].type = p2m_invalid; + } pte->pte |= MASK_INSR(t, P2M_TYPE_PTE_BITS_MASK); unmap_domain_page(metadata); + + if ( *md_pg && !ctx->pt_page->u.md.used_entries ) + p2m_free_metadata_page(ctx->p2m, md_pg); } /* @@ -XXX,XX +XXX,XX @@ static pte_t page_to_p2m_table(const struct page_info *page) return p2m_pte_from_mfn(page_to_mfn(page), p2m_invalid, NULL); } -static void p2m_free_page(struct p2m_domain *p2m, struct page_info *pg); - /* * Free page table's page and metadata page linked to page table's page. */ static void p2m_free_table(struct p2m_domain *p2m, struct page_info *tbl_pg) { - if ( tbl_pg->v.md.pg ) - { - p2m_free_page(p2m, tbl_pg->v.md.pg); - tbl_pg->v.md.pg = NULL; - } + p2m_free_metadata_page(p2m, &tbl_pg->v.md.pg); + p2m_free_page(p2m, tbl_pg); } -- 2.52.0