While arch/*/mem/ptdump handles the kernel pagetable dumping code,
introduce KVM/ptdump which shows the guest stage-2 pagetables. The
separation is necessary because most of the definitions from the
stage-2 pagetable reside in the KVM path and we will be invoking
functionality **specific** to KVM.
When a guest is created, register a new file entry under the guest
debugfs dir which allows userspace to show the contents of the guest
stage-2 pagetables when accessed.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/kvm/Kconfig | 14 ++++++
arch/arm64/kvm/Makefile | 1 +
arch/arm64/kvm/arm.c | 2 +
arch/arm64/kvm/kvm_ptdump.h | 20 ++++++++
arch/arm64/kvm/ptdump.c | 91 +++++++++++++++++++++++++++++++++++++
5 files changed, 128 insertions(+)
create mode 100644 arch/arm64/kvm/kvm_ptdump.h
create mode 100644 arch/arm64/kvm/ptdump.c
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 8304eb342be9..fcc41e58ede6 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -66,4 +66,18 @@ config PROTECTED_NVHE_STACKTRACE
If unsure, or not using protected nVHE (pKVM), say N.
+config PTDUMP_STAGE2_DEBUGFS
+ bool "Present the stage-2 pagetables to debugfs"
+ depends on KVM
+ select PTDUMP_CORE
+ default n
+ help
+ Say Y here if you want to show the stage-2 kernel pagetables
+ layout in a debugfs file. This information is only useful for kernel developers
+ who are working in architecture specific areas of the kernel.
+ It is probably not a good idea to enable this feature in a production
+ kernel.
+
+ If in doubt, say N.
+
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 86a629aaf0a1..e4233b323a73 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -27,6 +27,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
+kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o
always-y := hyp_constants.h hyp-constants.s
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 9bef7638342e..60fed2146763 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -45,6 +45,7 @@
#include <kvm/arm_hypercalls.h>
#include <kvm/arm_pmu.h>
#include <kvm/arm_psci.h>
+#include <kvm_ptdump.h>
static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
@@ -228,6 +229,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
void kvm_arch_create_vm_debugfs(struct kvm *kvm)
{
kvm_sys_regs_create_debugfs(kvm);
+ kvm_ptdump_guest_register(kvm);
}
static void kvm_destroy_mpidr_data(struct kvm *kvm)
diff --git a/arch/arm64/kvm/kvm_ptdump.h b/arch/arm64/kvm/kvm_ptdump.h
new file mode 100644
index 000000000000..0a62b0e2908c
--- /dev/null
+++ b/arch/arm64/kvm/kvm_ptdump.h
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) Google, 2024
+ * Author: Sebastian Ene <sebastianene@google.com>
+ */
+
+#ifndef __KVM_PTDUMP_H
+#define __KVM_PTDUMP_H
+
+#include <linux/kvm_host.h>
+#include <asm/ptdump.h>
+
+
+#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
+void kvm_ptdump_guest_register(struct kvm *kvm);
+#else
+static inline void kvm_ptdump_guest_register(struct kvm *kvm) {}
+#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
+
+#endif /* __KVM_PTDUMP_H */
diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
new file mode 100644
index 000000000000..52483d56be2e
--- /dev/null
+++ b/arch/arm64/kvm/ptdump.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Debug helper used to dump the stage-2 pagetables of the system and their
+ * associated permissions.
+ *
+ * Copyright (C) Google, 2024
+ * Author: Sebastian Ene <sebastianene@google.com>
+ */
+#include <linux/debugfs.h>
+#include <linux/kvm_host.h>
+#include <linux/seq_file.h>
+
+#include <asm/kvm_pgtable.h>
+#include <kvm_ptdump.h>
+
+
+static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
+ enum kvm_pgtable_walk_flags visit)
+{
+ struct ptdump_pg_state *st = ctx->arg;
+ struct ptdump_state *pt_st = &st->ptdump;
+
+ note_page(pt_st, ctx->addr, ctx->level, ctx->old);
+ return 0;
+}
+
+static int kvm_ptdump_show_common(struct seq_file *m,
+ struct kvm_pgtable *pgtable,
+ struct ptdump_pg_state *parser_state)
+{
+ struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) {
+ .cb = kvm_ptdump_visitor,
+ .arg = parser_state,
+ .flags = KVM_PGTABLE_WALK_LEAF,
+ };
+
+ parser_state->level = -1;
+ parser_state->start_address = 0;
+
+ return kvm_pgtable_walk(pgtable, 0, BIT(pgtable->ia_bits), &walker);
+}
+
+static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
+{
+ struct kvm *kvm = m->private;
+ struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+ struct ptdump_pg_state parser_state = {0};
+ int ret;
+
+ write_lock(&kvm->mmu_lock);
+ ret = kvm_ptdump_show_common(m, mmu->pgt, &parser_state);
+ write_unlock(&kvm->mmu_lock);
+
+ return ret;
+}
+
+static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
+{
+ struct kvm *kvm = m->i_private;
+ int ret;
+
+ if (!kvm_get_kvm_safe(kvm))
+ return -ENOENT;
+
+ ret = single_open(file, kvm_ptdump_guest_show, m->i_private);
+ if (ret < 0)
+ kvm_put_kvm(kvm);
+
+ return ret;
+}
+
+static int kvm_ptdump_guest_close(struct inode *m, struct file *file)
+{
+ struct kvm *kvm = m->i_private;
+
+ kvm_put_kvm(kvm);
+ return single_release(m, file);
+}
+
+static const struct file_operations kvm_ptdump_guest_fops = {
+ .open = kvm_ptdump_guest_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = kvm_ptdump_guest_close,
+};
+
+void kvm_ptdump_guest_register(struct kvm *kvm)
+{
+ debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry,
+ kvm, &kvm_ptdump_guest_fops);
+}
--
2.46.0.184.g6999bdac58-goog
On Fri, 16 Aug 2024 13:39:04 +0100,
Sebastian Ene <sebastianene@google.com> wrote:
>
> While arch/*/mem/ptdump handles the kernel pagetable dumping code,
> introduce KVM/ptdump which shows the guest stage-2 pagetables. The
> separation is necessary because most of the definitions from the
> stage-2 pagetable reside in the KVM path and we will be invoking
> functionality **specific** to KVM.
Drop the ** emphasis.
>
> When a guest is created, register a new file entry under the guest
> debugfs dir which allows userspace to show the contents of the guest
> stage-2 pagetables when accessed.
>
> Signed-off-by: Sebastian Ene <sebastianene@google.com>
> ---
> arch/arm64/kvm/Kconfig | 14 ++++++
> arch/arm64/kvm/Makefile | 1 +
> arch/arm64/kvm/arm.c | 2 +
> arch/arm64/kvm/kvm_ptdump.h | 20 ++++++++
> arch/arm64/kvm/ptdump.c | 91 +++++++++++++++++++++++++++++++++++++
> 5 files changed, 128 insertions(+)
> create mode 100644 arch/arm64/kvm/kvm_ptdump.h
> create mode 100644 arch/arm64/kvm/ptdump.c
>
> diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> index 8304eb342be9..fcc41e58ede6 100644
> --- a/arch/arm64/kvm/Kconfig
> +++ b/arch/arm64/kvm/Kconfig
> @@ -66,4 +66,18 @@ config PROTECTED_NVHE_STACKTRACE
>
> If unsure, or not using protected nVHE (pKVM), say N.
>
> +config PTDUMP_STAGE2_DEBUGFS
> + bool "Present the stage-2 pagetables to debugfs"
> + depends on KVM
> + select PTDUMP_CORE
This looks wrong. Looking at PTDUMP_DEBUGFS, it has the following
constraints:
depends on DEBUG_KERNEL
depends on DEBUG_FS
depends on GENERIC_PTDUMP
select PTDUMP_CORE
I don't see why the Stage-2 version should have anything different.
> + default n
> + help
> + Say Y here if you want to show the stage-2 kernel pagetables
> + layout in a debugfs file. This information is only useful for kernel developers
> + who are working in architecture specific areas of the kernel.
> + It is probably not a good idea to enable this feature in a production
> + kernel.
nit: try to keep the formatting within 80 columns.
More importantly, I find it very strange to expose the configuration
option so early in the series, while the support code isn't there yet.
You can perfectly introduce code that is conditional on a config
option and only add it at the end.
> +
> + If in doubt, say N.
> +
> endif # VIRTUALIZATION
> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> index 86a629aaf0a1..e4233b323a73 100644
> --- a/arch/arm64/kvm/Makefile
> +++ b/arch/arm64/kvm/Makefile
> @@ -27,6 +27,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
>
> kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
> kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
> +kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o
>
> always-y := hyp_constants.h hyp-constants.s
>
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 9bef7638342e..60fed2146763 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -45,6 +45,7 @@
> #include <kvm/arm_hypercalls.h>
> #include <kvm/arm_pmu.h>
> #include <kvm/arm_psci.h>
> +#include <kvm_ptdump.h>
>
> static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
>
> @@ -228,6 +229,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
> void kvm_arch_create_vm_debugfs(struct kvm *kvm)
> {
> kvm_sys_regs_create_debugfs(kvm);
> + kvm_ptdump_guest_register(kvm);
Consider using a name that is homogeneous with what we already have
(kvm_s2_ptdump_create_debugfs?).
> }
>
> static void kvm_destroy_mpidr_data(struct kvm *kvm)
> diff --git a/arch/arm64/kvm/kvm_ptdump.h b/arch/arm64/kvm/kvm_ptdump.h
> new file mode 100644
> index 000000000000..0a62b0e2908c
> --- /dev/null
> +++ b/arch/arm64/kvm/kvm_ptdump.h
> @@ -0,0 +1,20 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) Google, 2024
> + * Author: Sebastian Ene <sebastianene@google.com>
> + */
> +
> +#ifndef __KVM_PTDUMP_H
> +#define __KVM_PTDUMP_H
> +
> +#include <linux/kvm_host.h>
> +#include <asm/ptdump.h>
> +
> +
> +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
> +void kvm_ptdump_guest_register(struct kvm *kvm);
> +#else
> +static inline void kvm_ptdump_guest_register(struct kvm *kvm) {}
> +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
> +
> +#endif /* __KVM_PTDUMP_H */
Please don't add new include files that contain so little stuff. These
things may as well be added either to asm/kvm_host.h or asm/ptdump.h.
> diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> new file mode 100644
> index 000000000000..52483d56be2e
> --- /dev/null
> +++ b/arch/arm64/kvm/ptdump.c
> @@ -0,0 +1,91 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Debug helper used to dump the stage-2 pagetables of the system and their
> + * associated permissions.
> + *
> + * Copyright (C) Google, 2024
> + * Author: Sebastian Ene <sebastianene@google.com>
> + */
> +#include <linux/debugfs.h>
> +#include <linux/kvm_host.h>
> +#include <linux/seq_file.h>
> +
> +#include <asm/kvm_pgtable.h>
> +#include <kvm_ptdump.h>
> +
> +
> +static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
> + enum kvm_pgtable_walk_flags visit)
> +{
> + struct ptdump_pg_state *st = ctx->arg;
> + struct ptdump_state *pt_st = &st->ptdump;
> +
> + note_page(pt_st, ctx->addr, ctx->level, ctx->old);
> + return 0;
> +}
> +
> +static int kvm_ptdump_show_common(struct seq_file *m,
What does "common" mean here? You have exactly *one* caller, so why
isn't that inlined in kvm_ptdump_guest_show()?
> + struct kvm_pgtable *pgtable,
> + struct ptdump_pg_state *parser_state)
> +{
> + struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) {
> + .cb = kvm_ptdump_visitor,
> + .arg = parser_state,
> + .flags = KVM_PGTABLE_WALK_LEAF,
> + };
> +
> + parser_state->level = -1;
> + parser_state->start_address = 0;
> +
> + return kvm_pgtable_walk(pgtable, 0, BIT(pgtable->ia_bits), &walker);
> +}
> +
> +static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
> +{
> + struct kvm *kvm = m->private;
> + struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> + struct ptdump_pg_state parser_state = {0};
nit: the common idiom is "parser_state = {}".
> + int ret;
> +
> + write_lock(&kvm->mmu_lock);
> + ret = kvm_ptdump_show_common(m, mmu->pgt, &parser_state);
> + write_unlock(&kvm->mmu_lock);
> +
> + return ret;
> +}
> +
> +static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
> +{
> + struct kvm *kvm = m->i_private;
> + int ret;
> +
> + if (!kvm_get_kvm_safe(kvm))
> + return -ENOENT;
> +
> + ret = single_open(file, kvm_ptdump_guest_show, m->i_private);
> + if (ret < 0)
> + kvm_put_kvm(kvm);
> +
> + return ret;
> +}
> +
> +static int kvm_ptdump_guest_close(struct inode *m, struct file *file)
> +{
> + struct kvm *kvm = m->i_private;
> +
> + kvm_put_kvm(kvm);
> + return single_release(m, file);
> +}
> +
> +static const struct file_operations kvm_ptdump_guest_fops = {
> + .open = kvm_ptdump_guest_open,
> + .read = seq_read,
> + .llseek = seq_lseek,
> + .release = kvm_ptdump_guest_close,
> +};
> +
> +void kvm_ptdump_guest_register(struct kvm *kvm)
> +{
> + debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry,
> + kvm, &kvm_ptdump_guest_fops);
> +}
> --
> 2.46.0.184.g6999bdac58-goog
Overall, I have a hard time understanding what this does. It walks the
page tables, but doesn't do anything useful.
I have the feeling that this patch would be better squashed together
with patch #5, since it significantly reworks what patch #4 does.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
On Tue, Aug 20, 2024 at 03:06:45PM +0100, Marc Zyngier wrote:
> On Fri, 16 Aug 2024 13:39:04 +0100,
> Sebastian Ene <sebastianene@google.com> wrote:
> >
> > While arch/*/mem/ptdump handles the kernel pagetable dumping code,
> > introduce KVM/ptdump which shows the guest stage-2 pagetables. The
> > separation is necessary because most of the definitions from the
> > stage-2 pagetable reside in the KVM path and we will be invoking
> > functionality **specific** to KVM.
>
> Drop the ** emphasis.
>
> >
> > When a guest is created, register a new file entry under the guest
> > debugfs dir which allows userspace to show the contents of the guest
> > stage-2 pagetables when accessed.
> >
> > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > ---
> > arch/arm64/kvm/Kconfig | 14 ++++++
> > arch/arm64/kvm/Makefile | 1 +
> > arch/arm64/kvm/arm.c | 2 +
> > arch/arm64/kvm/kvm_ptdump.h | 20 ++++++++
> > arch/arm64/kvm/ptdump.c | 91 +++++++++++++++++++++++++++++++++++++
> > 5 files changed, 128 insertions(+)
> > create mode 100644 arch/arm64/kvm/kvm_ptdump.h
> > create mode 100644 arch/arm64/kvm/ptdump.c
> >
> > diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> > index 8304eb342be9..fcc41e58ede6 100644
> > --- a/arch/arm64/kvm/Kconfig
> > +++ b/arch/arm64/kvm/Kconfig
> > @@ -66,4 +66,18 @@ config PROTECTED_NVHE_STACKTRACE
> >
> > If unsure, or not using protected nVHE (pKVM), say N.
> >
> > +config PTDUMP_STAGE2_DEBUGFS
> > + bool "Present the stage-2 pagetables to debugfs"
> > + depends on KVM
> > + select PTDUMP_CORE
>
> This looks wrong. Looking at PTDUMP_DEBUGFS, it has the following
> constraints:
>
> depends on DEBUG_KERNEL
> depends on DEBUG_FS
> depends on GENERIC_PTDUMP
> select PTDUMP_CORE
>
> I don't see why the Stage-2 version should have anything different.
>
> > + default n
> > + help
> > + Say Y here if you want to show the stage-2 kernel pagetables
> > + layout in a debugfs file. This information is only useful for kernel developers
> > + who are working in architecture specific areas of the kernel.
> > + It is probably not a good idea to enable this feature in a production
> > + kernel.
>
> nit: try to keep the formatting within 80 columns.
>
> More importantly, I find it very strange to expose the configuration
> option so early in the series, while the support code isn't there yet.
> You can perfectly introduce code that is conditional on a config
> option and only add it at the end.
>
> > +
> > + If in doubt, say N.
> > +
> > endif # VIRTUALIZATION
> > diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> > index 86a629aaf0a1..e4233b323a73 100644
> > --- a/arch/arm64/kvm/Makefile
> > +++ b/arch/arm64/kvm/Makefile
> > @@ -27,6 +27,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
> >
> > kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
> > kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
> > +kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o
> >
> > always-y := hyp_constants.h hyp-constants.s
> >
> > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> > index 9bef7638342e..60fed2146763 100644
> > --- a/arch/arm64/kvm/arm.c
> > +++ b/arch/arm64/kvm/arm.c
> > @@ -45,6 +45,7 @@
> > #include <kvm/arm_hypercalls.h>
> > #include <kvm/arm_pmu.h>
> > #include <kvm/arm_psci.h>
> > +#include <kvm_ptdump.h>
> >
> > static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
> >
> > @@ -228,6 +229,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
> > void kvm_arch_create_vm_debugfs(struct kvm *kvm)
> > {
> > kvm_sys_regs_create_debugfs(kvm);
> > + kvm_ptdump_guest_register(kvm);
>
> Consider using a name that is homogeneous with what we already have
> (kvm_s2_ptdump_create_debugfs?).
>
> > }
> >
> > static void kvm_destroy_mpidr_data(struct kvm *kvm)
> > diff --git a/arch/arm64/kvm/kvm_ptdump.h b/arch/arm64/kvm/kvm_ptdump.h
> > new file mode 100644
> > index 000000000000..0a62b0e2908c
> > --- /dev/null
> > +++ b/arch/arm64/kvm/kvm_ptdump.h
> > @@ -0,0 +1,20 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) Google, 2024
> > + * Author: Sebastian Ene <sebastianene@google.com>
> > + */
> > +
> > +#ifndef __KVM_PTDUMP_H
> > +#define __KVM_PTDUMP_H
> > +
> > +#include <linux/kvm_host.h>
> > +#include <asm/ptdump.h>
> > +
> > +
> > +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
> > +void kvm_ptdump_guest_register(struct kvm *kvm);
> > +#else
> > +static inline void kvm_ptdump_guest_register(struct kvm *kvm) {}
> > +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
> > +
> > +#endif /* __KVM_PTDUMP_H */
>
> Please don't add new include files that contain so little stuff. These
> things may as well be added either to asm/kvm_host.h or asm/ptdump.h.
>
I will drop this in this case.
> > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> > new file mode 100644
> > index 000000000000..52483d56be2e
> > --- /dev/null
> > +++ b/arch/arm64/kvm/ptdump.c
> > @@ -0,0 +1,91 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Debug helper used to dump the stage-2 pagetables of the system and their
> > + * associated permissions.
> > + *
> > + * Copyright (C) Google, 2024
> > + * Author: Sebastian Ene <sebastianene@google.com>
> > + */
> > +#include <linux/debugfs.h>
> > +#include <linux/kvm_host.h>
> > +#include <linux/seq_file.h>
> > +
> > +#include <asm/kvm_pgtable.h>
> > +#include <kvm_ptdump.h>
> > +
> > +
> > +static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
> > + enum kvm_pgtable_walk_flags visit)
> > +{
> > + struct ptdump_pg_state *st = ctx->arg;
> > + struct ptdump_state *pt_st = &st->ptdump;
> > +
> > + note_page(pt_st, ctx->addr, ctx->level, ctx->old);
> > + return 0;
> > +}
> > +
> > +static int kvm_ptdump_show_common(struct seq_file *m,
>
> What does "common" mean here? You have exactly *one* caller, so why
> isn't that inlined in kvm_ptdump_guest_show()?
>
Right, this first part of the series adds the support for non-protected
VM which is only one caller. The later one which I haven't sent yet adds
support for protected for both the host and the guest. I will keep it in
kvm_ptdump_guest_show() for now.
> > + struct kvm_pgtable *pgtable,
> > + struct ptdump_pg_state *parser_state)
> > +{
> > + struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) {
> > + .cb = kvm_ptdump_visitor,
> > + .arg = parser_state,
> > + .flags = KVM_PGTABLE_WALK_LEAF,
> > + };
> > +
> > + parser_state->level = -1;
> > + parser_state->start_address = 0;
> > +
> > + return kvm_pgtable_walk(pgtable, 0, BIT(pgtable->ia_bits), &walker);
> > +}
> > +
> > +static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
> > +{
> > + struct kvm *kvm = m->private;
> > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> > + struct ptdump_pg_state parser_state = {0};
>
> nit: the common idiom is "parser_state = {}".
>
> > + int ret;
> > +
> > + write_lock(&kvm->mmu_lock);
> > + ret = kvm_ptdump_show_common(m, mmu->pgt, &parser_state);
> > + write_unlock(&kvm->mmu_lock);
> > +
> > + return ret;
> > +}
> > +
> > +static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
> > +{
> > + struct kvm *kvm = m->i_private;
> > + int ret;
> > +
> > + if (!kvm_get_kvm_safe(kvm))
> > + return -ENOENT;
> > +
> > + ret = single_open(file, kvm_ptdump_guest_show, m->i_private);
> > + if (ret < 0)
> > + kvm_put_kvm(kvm);
> > +
> > + return ret;
> > +}
> > +
> > +static int kvm_ptdump_guest_close(struct inode *m, struct file *file)
> > +{
> > + struct kvm *kvm = m->i_private;
> > +
> > + kvm_put_kvm(kvm);
> > + return single_release(m, file);
> > +}
> > +
> > +static const struct file_operations kvm_ptdump_guest_fops = {
> > + .open = kvm_ptdump_guest_open,
> > + .read = seq_read,
> > + .llseek = seq_lseek,
> > + .release = kvm_ptdump_guest_close,
> > +};
> > +
> > +void kvm_ptdump_guest_register(struct kvm *kvm)
> > +{
> > + debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry,
> > + kvm, &kvm_ptdump_guest_fops);
> > +}
> > --
> > 2.46.0.184.g6999bdac58-goog
>
> Overall, I have a hard time understanding what this does. It walks the
> page tables, but doesn't do anything useful.
>
> I have the feeling that this patch would be better squashed together
> with patch #5, since it significantly reworks what patch #4 does.
>
> Thanks,
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
>
> To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@android.com.
>
On Tue, Aug 20, 2024 at 03:06:45PM +0100, Marc Zyngier wrote:
Hello Marc,
> On Fri, 16 Aug 2024 13:39:04 +0100,
> Sebastian Ene <sebastianene@google.com> wrote:
> >
> > While arch/*/mem/ptdump handles the kernel pagetable dumping code,
> > introduce KVM/ptdump which shows the guest stage-2 pagetables. The
> > separation is necessary because most of the definitions from the
> > stage-2 pagetable reside in the KVM path and we will be invoking
> > functionality **specific** to KVM.
>
> Drop the ** emphasis.
I will drop this.
>
> >
> > When a guest is created, register a new file entry under the guest
> > debugfs dir which allows userspace to show the contents of the guest
> > stage-2 pagetables when accessed.
> >
> > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > ---
> > arch/arm64/kvm/Kconfig | 14 ++++++
> > arch/arm64/kvm/Makefile | 1 +
> > arch/arm64/kvm/arm.c | 2 +
> > arch/arm64/kvm/kvm_ptdump.h | 20 ++++++++
> > arch/arm64/kvm/ptdump.c | 91 +++++++++++++++++++++++++++++++++++++
> > 5 files changed, 128 insertions(+)
> > create mode 100644 arch/arm64/kvm/kvm_ptdump.h
> > create mode 100644 arch/arm64/kvm/ptdump.c
> >
> > diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> > index 8304eb342be9..fcc41e58ede6 100644
> > --- a/arch/arm64/kvm/Kconfig
> > +++ b/arch/arm64/kvm/Kconfig
> > @@ -66,4 +66,18 @@ config PROTECTED_NVHE_STACKTRACE
> >
> > If unsure, or not using protected nVHE (pKVM), say N.
> >
> > +config PTDUMP_STAGE2_DEBUGFS
> > + bool "Present the stage-2 pagetables to debugfs"
> > + depends on KVM
> > + select PTDUMP_CORE
>
> This looks wrong. Looking at PTDUMP_DEBUGFS, it has the following
> constraints:
>
> depends on DEBUG_KERNEL
> depends on DEBUG_FS
> depends on GENERIC_PTDUMP
> select PTDUMP_CORE
>
> I don't see why the Stage-2 version should have anything different.
>
> > + default n
> > + help
> > + Say Y here if you want to show the stage-2 kernel pagetables
> > + layout in a debugfs file. This information is only useful for kernel developers
> > + who are working in architecture specific areas of the kernel.
> > + It is probably not a good idea to enable this feature in a production
> > + kernel.
>
> nit: try to keep the formatting within 80 columns.
Let me fix the formatting and add the other depends on rules.
>
> More importantly, I find it very strange to expose the configuration
> option so early in the series, while the support code isn't there yet.
> You can perfectly introduce code that is conditional on a config
> option and only add it at the end.
I will move the configuration option as a separate patch in the last
part of the series.
>
> > +
> > + If in doubt, say N.
> > +
> > endif # VIRTUALIZATION
> > diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> > index 86a629aaf0a1..e4233b323a73 100644
> > --- a/arch/arm64/kvm/Makefile
> > +++ b/arch/arm64/kvm/Makefile
> > @@ -27,6 +27,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
> >
> > kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
> > kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
> > +kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o
> >
> > always-y := hyp_constants.h hyp-constants.s
> >
> > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> > index 9bef7638342e..60fed2146763 100644
> > --- a/arch/arm64/kvm/arm.c
> > +++ b/arch/arm64/kvm/arm.c
> > @@ -45,6 +45,7 @@
> > #include <kvm/arm_hypercalls.h>
> > #include <kvm/arm_pmu.h>
> > #include <kvm/arm_psci.h>
> > +#include <kvm_ptdump.h>
> >
> > static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
> >
> > @@ -228,6 +229,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
> > void kvm_arch_create_vm_debugfs(struct kvm *kvm)
> > {
> > kvm_sys_regs_create_debugfs(kvm);
> > + kvm_ptdump_guest_register(kvm);
>
> Consider using a name that is homogeneous with what we already have
> (kvm_s2_ptdump_create_debugfs?).
>
> > }
> >
> > static void kvm_destroy_mpidr_data(struct kvm *kvm)
> > diff --git a/arch/arm64/kvm/kvm_ptdump.h b/arch/arm64/kvm/kvm_ptdump.h
> > new file mode 100644
> > index 000000000000..0a62b0e2908c
> > --- /dev/null
> > +++ b/arch/arm64/kvm/kvm_ptdump.h
> > @@ -0,0 +1,20 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) Google, 2024
> > + * Author: Sebastian Ene <sebastianene@google.com>
> > + */
> > +
> > +#ifndef __KVM_PTDUMP_H
> > +#define __KVM_PTDUMP_H
> > +
> > +#include <linux/kvm_host.h>
> > +#include <asm/ptdump.h>
> > +
> > +
> > +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
> > +void kvm_ptdump_guest_register(struct kvm *kvm);
> > +#else
> > +static inline void kvm_ptdump_guest_register(struct kvm *kvm) {}
> > +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
> > +
> > +#endif /* __KVM_PTDUMP_H */
>
> Please don't add new include files that contain so little stuff. These
> things may as well be added either to asm/kvm_host.h or asm/ptdump.h.
>
> > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> > new file mode 100644
> > index 000000000000..52483d56be2e
> > --- /dev/null
> > +++ b/arch/arm64/kvm/ptdump.c
> > @@ -0,0 +1,91 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Debug helper used to dump the stage-2 pagetables of the system and their
> > + * associated permissions.
> > + *
> > + * Copyright (C) Google, 2024
> > + * Author: Sebastian Ene <sebastianene@google.com>
> > + */
> > +#include <linux/debugfs.h>
> > +#include <linux/kvm_host.h>
> > +#include <linux/seq_file.h>
> > +
> > +#include <asm/kvm_pgtable.h>
> > +#include <kvm_ptdump.h>
> > +
> > +
> > +static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
> > + enum kvm_pgtable_walk_flags visit)
> > +{
> > + struct ptdump_pg_state *st = ctx->arg;
> > + struct ptdump_state *pt_st = &st->ptdump;
> > +
> > + note_page(pt_st, ctx->addr, ctx->level, ctx->old);
> > + return 0;
> > +}
> > +
> > +static int kvm_ptdump_show_common(struct seq_file *m,
>
> What does "common" mean here? You have exactly *one* caller, so why
> isn't that inlined in kvm_ptdump_guest_show()?
>
> > + struct kvm_pgtable *pgtable,
> > + struct ptdump_pg_state *parser_state)
> > +{
> > + struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) {
> > + .cb = kvm_ptdump_visitor,
> > + .arg = parser_state,
> > + .flags = KVM_PGTABLE_WALK_LEAF,
> > + };
> > +
> > + parser_state->level = -1;
> > + parser_state->start_address = 0;
> > +
> > + return kvm_pgtable_walk(pgtable, 0, BIT(pgtable->ia_bits), &walker);
> > +}
> > +
> > +static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
> > +{
> > + struct kvm *kvm = m->private;
> > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> > + struct ptdump_pg_state parser_state = {0};
>
> nit: the common idiom is "parser_state = {}".
>
> > + int ret;
> > +
> > + write_lock(&kvm->mmu_lock);
> > + ret = kvm_ptdump_show_common(m, mmu->pgt, &parser_state);
> > + write_unlock(&kvm->mmu_lock);
> > +
> > + return ret;
> > +}
> > +
> > +static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
> > +{
> > + struct kvm *kvm = m->i_private;
> > + int ret;
> > +
> > + if (!kvm_get_kvm_safe(kvm))
> > + return -ENOENT;
> > +
> > + ret = single_open(file, kvm_ptdump_guest_show, m->i_private);
> > + if (ret < 0)
> > + kvm_put_kvm(kvm);
> > +
> > + return ret;
> > +}
> > +
> > +static int kvm_ptdump_guest_close(struct inode *m, struct file *file)
> > +{
> > + struct kvm *kvm = m->i_private;
> > +
> > + kvm_put_kvm(kvm);
> > + return single_release(m, file);
> > +}
> > +
> > +static const struct file_operations kvm_ptdump_guest_fops = {
> > + .open = kvm_ptdump_guest_open,
> > + .read = seq_read,
> > + .llseek = seq_lseek,
> > + .release = kvm_ptdump_guest_close,
> > +};
> > +
> > +void kvm_ptdump_guest_register(struct kvm *kvm)
> > +{
> > + debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry,
> > + kvm, &kvm_ptdump_guest_fops);
> > +}
> > --
> > 2.46.0.184.g6999bdac58-goog
>
> Overall, I have a hard time understanding what this does. It walks the
> page tables, but doesn't do anything useful.
>
> I have the feeling that this patch would be better squashed together
> with patch #5, since it significantly reworks what patch #4 does.
>
Right, I think there are a few patches that should be squashed to
prevent this "empty" functionality.
Thanks for the feedback,
Seb
> Thanks,
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
>
> To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@android.com.
>
© 2016 - 2026 Red Hat, Inc.