Add page table management functions to be used for KVM guest (gmap)
page tables.
This patch adds the boilerplate and functions for the allocation and
deallocation of DAT tables.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
arch/s390/kvm/Makefile | 1 +
arch/s390/kvm/dat.c | 103 +++++++++++++++++++++++++++++++++++++
arch/s390/kvm/dat.h | 77 +++++++++++++++++++++++++++
arch/s390/mm/page-states.c | 1 +
4 files changed, 182 insertions(+)
create mode 100644 arch/s390/kvm/dat.c
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 9a723c48b05a..84315d2f75fb 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -9,6 +9,7 @@ ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap-vsie.o
+kvm-y += dat.o
kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c
new file mode 100644
index 000000000000..c324a27f379f
--- /dev/null
+++ b/arch/s390/kvm/dat.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2007, 2020, 2024
+ * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * David Hildenbrand <david@redhat.com>
+ * Janosch Frank <frankja@linux.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/pagewalk.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/swapops.h>
+#include <linux/ksm.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/pgtable.h>
+#include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
+#include <linux/pgalloc.h>
+
+#include <asm/page-states.h>
+#include <asm/tlb.h>
+#include "dat.h"
+
+int kvm_s390_mmu_cache_topup(struct kvm_s390_mmu_cache *mc)
+{
+ void *o;
+
+ for ( ; mc->n_crsts < KVM_S390_MMU_CACHE_N_CRSTS; mc->n_crsts++) {
+ o = (void *)__get_free_pages(GFP_KERNEL_ACCOUNT | __GFP_COMP, CRST_ALLOC_ORDER);
+ if (!o)
+ return -ENOMEM;
+ mc->crsts[mc->n_crsts] = o;
+ }
+ for ( ; mc->n_pts < KVM_S390_MMU_CACHE_N_PTS; mc->n_pts++) {
+ o = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
+ if (!o)
+ return -ENOMEM;
+ mc->pts[mc->n_pts] = o;
+ }
+ for ( ; mc->n_rmaps < KVM_S390_MMU_CACHE_N_RMAPS; mc->n_rmaps++) {
+ o = kzalloc(sizeof(*mc->rmaps[0]), GFP_KERNEL_ACCOUNT);
+ if (!o)
+ return -ENOMEM;
+ mc->rmaps[mc->n_rmaps] = o;
+ }
+ return 0;
+}
+
+static inline struct page_table *dat_alloc_pt_noinit(struct kvm_s390_mmu_cache *mc)
+{
+ struct page_table *res;
+
+ res = kvm_s390_mmu_cache_alloc_pt(mc);
+ if (res)
+ __arch_set_page_dat(res, 1);
+ return res;
+}
+
+static inline struct crst_table *dat_alloc_crst_noinit(struct kvm_s390_mmu_cache *mc)
+{
+ struct crst_table *res;
+
+ res = kvm_s390_mmu_cache_alloc_crst(mc);
+ if (res)
+ __arch_set_page_dat(res, 1UL << CRST_ALLOC_ORDER);
+ return res;
+}
+
+struct crst_table *dat_alloc_crst_sleepable(unsigned long init)
+{
+ struct page *page;
+ void *virt;
+
+ page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_COMP, CRST_ALLOC_ORDER);
+ if (!page)
+ return NULL;
+ virt = page_to_virt(page);
+ __arch_set_page_dat(virt, 1UL << CRST_ALLOC_ORDER);
+ crst_table_init(virt, init);
+ return virt;
+}
+
+void dat_free_level(struct crst_table *table, bool owns_ptes)
+{
+ unsigned int i;
+
+ for (i = 0; i < _CRST_ENTRIES; i++) {
+ if (table->crstes[i].h.fc || table->crstes[i].h.i)
+ continue;
+ if (!is_pmd(table->crstes[i]))
+ dat_free_level(dereference_crste(table->crstes[i]), owns_ptes);
+ else if (owns_ptes)
+ dat_free_pt(dereference_pmd(table->crstes[i].pmd));
+ }
+ dat_free_crst(table);
+}
diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h
index 4d2b7a7bf898..486b7dfc5df2 100644
--- a/arch/s390/kvm/dat.h
+++ b/arch/s390/kvm/dat.h
@@ -418,6 +418,46 @@ struct vsie_rmap {
static_assert(sizeof(struct vsie_rmap) == 2 * sizeof(long));
+#define KVM_S390_MMU_CACHE_N_CRSTS 6
+#define KVM_S390_MMU_CACHE_N_PTS 2
+#define KVM_S390_MMU_CACHE_N_RMAPS 16
+struct kvm_s390_mmu_cache {
+ void *crsts[KVM_S390_MMU_CACHE_N_CRSTS];
+ void *pts[KVM_S390_MMU_CACHE_N_PTS];
+ void *rmaps[KVM_S390_MMU_CACHE_N_RMAPS];
+ short int n_crsts;
+ short int n_pts;
+ short int n_rmaps;
+};
+
+void dat_free_level(struct crst_table *table, bool owns_ptes);
+struct crst_table *dat_alloc_crst_sleepable(unsigned long init);
+
+int kvm_s390_mmu_cache_topup(struct kvm_s390_mmu_cache *mc);
+
+#define GFP_KVM_S390_MMU_CACHE (GFP_ATOMIC | __GFP_ACCOUNT | __GFP_NOWARN)
+
+static inline struct page_table *kvm_s390_mmu_cache_alloc_pt(struct kvm_s390_mmu_cache *mc)
+{
+ if (mc->n_pts)
+ return mc->pts[--mc->n_pts];
+ return (void *)__get_free_page(GFP_KVM_S390_MMU_CACHE);
+}
+
+static inline struct crst_table *kvm_s390_mmu_cache_alloc_crst(struct kvm_s390_mmu_cache *mc)
+{
+ if (mc->n_crsts)
+ return mc->crsts[--mc->n_crsts];
+ return (void *)__get_free_pages(GFP_KVM_S390_MMU_CACHE | __GFP_COMP, CRST_ALLOC_ORDER);
+}
+
+static inline struct vsie_rmap *kvm_s390_mmu_cache_alloc_rmap(struct kvm_s390_mmu_cache *mc)
+{
+ if (mc->n_rmaps)
+ return mc->rmaps[--mc->n_rmaps];
+ return kzalloc(sizeof(struct vsie_rmap), GFP_KVM_S390_MMU_CACHE);
+}
+
static inline struct crst_table *crste_table_start(union crste *crstep)
{
return (struct crst_table *)ALIGN_DOWN((unsigned long)crstep, _CRST_TABLE_SIZE);
@@ -717,4 +757,41 @@ static inline void pgste_set_unlock(union pte *ptep, union pgste pgste)
WRITE_ONCE(*pgste_of(ptep), pgste);
}
+static inline void dat_free_pt(struct page_table *pt)
+{
+ free_page((unsigned long)pt);
+}
+
+static inline void _dat_free_crst(struct crst_table *table)
+{
+ free_pages((unsigned long)table, CRST_ALLOC_ORDER);
+}
+
+#define dat_free_crst(x) _dat_free_crst(_CRSTP(x))
+
+static inline void kvm_s390_free_mmu_cache(struct kvm_s390_mmu_cache *mc)
+{
+ if (!mc)
+ return;
+ while (mc->n_pts)
+ dat_free_pt(mc->pts[--mc->n_pts]);
+ while (mc->n_crsts)
+ _dat_free_crst(mc->crsts[--mc->n_crsts]);
+ while (mc->n_rmaps)
+ kfree(mc->rmaps[--mc->n_rmaps]);
+ kfree(mc);
+}
+
+DEFINE_FREE(kvm_s390_mmu_cache, struct kvm_s390_mmu_cache *, if (_T) kvm_s390_free_mmu_cache(_T))
+
+static inline struct kvm_s390_mmu_cache *kvm_s390_new_mmu_cache(void)
+{
+ struct kvm_s390_mmu_cache *mc __free(kvm_s390_mmu_cache);
+
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL_ACCOUNT);
+ if (mc && !kvm_s390_mmu_cache_topup(mc))
+ return_ptr(mc);
+ return NULL;
+}
+
#endif /* __KVM_S390_DAT_H */
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index 01f9b39e65f5..5bee173db72e 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -13,6 +13,7 @@
#include <asm/page.h>
int __bootdata_preserved(cmma_flag);
+EXPORT_SYMBOL(cmma_flag);
void arch_free_page(struct page *page, int order)
{
--
2.51.1