Define a set of attributes used by the ptdump parser to display the
properties of a guest memory region covered by a pagetable descriptor.
Build a description of the pagetable levels and initialize the parser
with this configuration.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/kvm/ptdump.c | 143 ++++++++++++++++++++++++++++++++++++++--
1 file changed, 137 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
index 36dc7662729f..cc1d4fdddc6e 100644
--- a/arch/arm64/kvm/ptdump.c
+++ b/arch/arm64/kvm/ptdump.c
@@ -14,6 +14,61 @@
#include <kvm_ptdump.h>
+#define MARKERS_LEN (2)
+#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
+
+struct kvm_ptdump_guest_state {
+ struct kvm *kvm;
+ struct pg_state parser_state;
+ struct addr_marker ipa_marker[MARKERS_LEN];
+ struct pg_level level[KVM_PGTABLE_MAX_LEVELS];
+ struct ptdump_range range[MARKERS_LEN];
+};
+
+static const struct prot_bits stage2_pte_bits[] = {
+ {
+ .mask = PTE_VALID,
+ .val = PTE_VALID,
+ .set = " ",
+ .clear = "F",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
+ .val = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
+ .set = "XN",
+ .clear = " ",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
+ .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
+ .set = "R",
+ .clear = " ",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
+ .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
+ .set = "W",
+ .clear = " ",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
+ .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
+ .set = "AF",
+ .clear = " ",
+ }, {
+ .mask = PTE_NG,
+ .val = PTE_NG,
+ .set = "FnXS",
+ .clear = " ",
+ }, {
+ .mask = PTE_CONT | PTE_VALID,
+ .val = PTE_CONT | PTE_VALID,
+ .set = "CON",
+ .clear = " ",
+ }, {
+ .mask = PTE_TABLE_BIT,
+ .val = PTE_TABLE_BIT,
+ .set = " ",
+ .clear = "BLK",
+ },
+};
+
static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
{
@@ -40,15 +95,79 @@ static int kvm_ptdump_show_common(struct seq_file *m,
return kvm_pgtable_walk(pgtable, 0, BIT(pgtable->ia_bits), &walker);
}
+static int kvm_ptdump_build_levels(struct pg_level *level, u32 start_lvl)
+{
+ static const char * const level_names[] = {"PGD", "PUD", "PMD", "PTE"};
+ u32 i = 0;
+ u64 mask = 0;
+
+ if (start_lvl > 2) {
+ pr_err("invalid start_lvl %u\n", start_lvl);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++)
+ mask |= stage2_pte_bits[i].mask;
+
+ for (i = start_lvl; i < KVM_PGTABLE_MAX_LEVELS; i++) {
+ strscpy(level[i].name, level_names[i], sizeof(level[i].name));
+
+ level[i].num = ARRAY_SIZE(stage2_pte_bits);
+ level[i].bits = stage2_pte_bits;
+ level[i].mask = mask;
+ }
+
+ if (start_lvl > 0)
+ strscpy(level[start_lvl].name, level_names[0], sizeof(level_names[0]));
+
+ return 0;
+}
+
+static struct kvm_ptdump_guest_state
+*kvm_ptdump_parser_init(struct kvm *kvm)
+{
+ struct kvm_ptdump_guest_state *st;
+ struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+ struct kvm_pgtable *pgtable = mmu->pgt;
+ int ret;
+
+ st = kzalloc(sizeof(struct kvm_ptdump_guest_state), GFP_KERNEL_ACCOUNT);
+ if (!st)
+ return NULL;
+
+ ret = kvm_ptdump_build_levels(&st->level[0], pgtable->start_level);
+ if (ret)
+ goto free_with_state;
+
+ st->ipa_marker[0].name = "Guest IPA";
+ st->ipa_marker[1].start_address = BIT(pgtable->ia_bits);
+ st->range[0].end = BIT(pgtable->ia_bits);
+
+ st->kvm = kvm;
+ st->parser_state = (struct pg_state) {
+ .marker = &st->ipa_marker[0],
+ .level = -1,
+ .pg_level = &st->level[0],
+ .ptdump.range = &st->range[0],
+ };
+
+ return st;
+free_with_state:
+ kfree(st);
+ return NULL;
+}
+
static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
{
- struct kvm *kvm = m->private;
+ struct kvm_ptdump_guest_state *st = m->private;
+ struct kvm *kvm = st->kvm;
struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
- struct pg_state parser_state = {0};
int ret;
+ st->parser_state.seq = m;
+
write_lock(&kvm->mmu_lock);
- ret = kvm_ptdump_show_common(m, mmu->pgt, &parser_state);
+ ret = kvm_ptdump_show_common(m, mmu->pgt, &st->parser_state);
write_unlock(&kvm->mmu_lock);
return ret;
@@ -57,22 +176,34 @@ static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
{
struct kvm *kvm = m->i_private;
+ struct kvm_ptdump_guest_state *st;
int ret;
if (!kvm_get_kvm_safe(kvm))
return -ENOENT;
- ret = single_open(file, kvm_ptdump_guest_show, m->i_private);
- if (ret < 0)
- kvm_put_kvm(kvm);
+ st = kvm_ptdump_parser_init(kvm);
+ if (!st) {
+ ret = -ENOMEM;
+ goto free_with_kvm_ref;
+ }
+
+ ret = single_open(file, kvm_ptdump_guest_show, st);
+ if (!ret)
+ return 0;
+ kfree(st);
+free_with_kvm_ref:
+ kvm_put_kvm(kvm);
return ret;
}
static int kvm_ptdump_guest_close(struct inode *m, struct file *file)
{
struct kvm *kvm = m->i_private;
+ void *st = ((struct seq_file *)file->private_data)->private;
+ kfree(st);
kvm_put_kvm(kvm);
return single_release(m, file);
}
--
2.45.2.741.gdbec12cfda-goog
O Fri, Jun 21, 2024 at 12:32:29PM +0000, 'Sebastian Ene' via kernel-team wrote:
> Define a set of attributes used by the ptdump parser to display the
> properties of a guest memory region covered by a pagetable descriptor.
> Build a description of the pagetable levels and initialize the parser
> with this configuration.
>
> Signed-off-by: Sebastian Ene <sebastianene@google.com>
> ---
> arch/arm64/kvm/ptdump.c | 143 ++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 137 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> index 36dc7662729f..cc1d4fdddc6e 100644
> --- a/arch/arm64/kvm/ptdump.c
> +++ b/arch/arm64/kvm/ptdump.c
> @@ -14,6 +14,61 @@
> #include <kvm_ptdump.h>
>
>
> +#define MARKERS_LEN (2)
> +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
> +
> +struct kvm_ptdump_guest_state {
> + struct kvm *kvm;
> + struct pg_state parser_state;
> + struct addr_marker ipa_marker[MARKERS_LEN];
> + struct pg_level level[KVM_PGTABLE_MAX_LEVELS];
> + struct ptdump_range range[MARKERS_LEN];
> +};
> +
> +static const struct prot_bits stage2_pte_bits[] = {
> + {
> + .mask = PTE_VALID,
> + .val = PTE_VALID,
> + .set = " ",
> + .clear = "F",
> + }, {
> + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> + .val = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> + .set = "XN",
> + .clear = " ",
> + }, {
> + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> + .set = "R",
> + .clear = " ",
> + }, {
> + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> + .set = "W",
> + .clear = " ",
> + }, {
> + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> + .set = "AF",
> + .clear = " ",
> + }, {
> + .mask = PTE_NG,
> + .val = PTE_NG,
> + .set = "FnXS",
> + .clear = " ",
> + }, {
> + .mask = PTE_CONT | PTE_VALID,
> + .val = PTE_CONT | PTE_VALID,
> + .set = "CON",
> + .clear = " ",
> + }, {
> + .mask = PTE_TABLE_BIT,
>
> + .val = PTE_TABLE_BIT,
> + .set = " ",
> + .clear = "BLK",
> + },
When doing a kvm_pgtable_stage2_set_owner(), the walker will init a leaf which
has both the table-bit and the valid-bit unset. I believe this would lead to
spurious BLK annotations here.
The following should fix this problem:
.mask = PTE_TABLE_BIT | PTE_VALID,
.val = PTE_VALID,
.set = "BLK",
.clear = " ",
> +};
> +
> static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
> enum kvm_pgtable_walk_flags visit)
> {
> @@ -40,15 +95,79 @@ static int kvm_ptdump_show_common(struct seq_file *m,
> return kvm_pgtable_walk(pgtable, 0, BIT(pgtable->ia_bits), &walker);
> }
>
[...]
On Mon, Jul 01, 2024 at 09:42:43AM +0100, Vincent Donnefort wrote:
> O Fri, Jun 21, 2024 at 12:32:29PM +0000, 'Sebastian Ene' via kernel-team wrote:
> > Define a set of attributes used by the ptdump parser to display the
> > properties of a guest memory region covered by a pagetable descriptor.
> > Build a description of the pagetable levels and initialize the parser
> > with this configuration.
> >
> > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > ---
> > arch/arm64/kvm/ptdump.c | 143 ++++++++++++++++++++++++++++++++++++++--
> > 1 file changed, 137 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> > index 36dc7662729f..cc1d4fdddc6e 100644
> > --- a/arch/arm64/kvm/ptdump.c
> > +++ b/arch/arm64/kvm/ptdump.c
> > @@ -14,6 +14,61 @@
> > #include <kvm_ptdump.h>
> >
> >
> > +#define MARKERS_LEN (2)
> > +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
> > +
> > +struct kvm_ptdump_guest_state {
> > + struct kvm *kvm;
> > + struct pg_state parser_state;
> > + struct addr_marker ipa_marker[MARKERS_LEN];
> > + struct pg_level level[KVM_PGTABLE_MAX_LEVELS];
> > + struct ptdump_range range[MARKERS_LEN];
> > +};
> > +
> > +static const struct prot_bits stage2_pte_bits[] = {
> > + {
> > + .mask = PTE_VALID,
> > + .val = PTE_VALID,
> > + .set = " ",
> > + .clear = "F",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> > + .set = "XN",
> > + .clear = " ",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > + .set = "R",
> > + .clear = " ",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> > + .set = "W",
> > + .clear = " ",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> > + .set = "AF",
> > + .clear = " ",
> > + }, {
> > + .mask = PTE_NG,
> > + .val = PTE_NG,
> > + .set = "FnXS",
> > + .clear = " ",
> > + }, {
> > + .mask = PTE_CONT | PTE_VALID,
> > + .val = PTE_CONT | PTE_VALID,
> > + .set = "CON",
> > + .clear = " ",
> > + }, {
> > + .mask = PTE_TABLE_BIT,
> >
> > + .val = PTE_TABLE_BIT,
> > + .set = " ",
> > + .clear = "BLK",
> > + },
Hello Vincent,
>
> When doing a kvm_pgtable_stage2_set_owner(), the walker will init a leaf which
> has both the table-bit and the valid-bit unset. I believe this would lead to
> spurious BLK annotations here.
>
> The following should fix this problem:
>
> .mask = PTE_TABLE_BIT | PTE_VALID,
> .val = PTE_VALID,
> .set = "BLK",
> .clear = " ",
>
Let me try this, thanks for the suggestion !
> > +};
> > +
> > static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
> > enum kvm_pgtable_walk_flags visit)
> > {
> > @@ -40,15 +95,79 @@ static int kvm_ptdump_show_common(struct seq_file *m,
> > return kvm_pgtable_walk(pgtable, 0, BIT(pgtable->ia_bits), &walker);
> > }
> >
>
> [...]
Seb
On Mon, Jul 01, 2024 at 02:18:39PM +0000, Sebastian Ene wrote:
> On Mon, Jul 01, 2024 at 09:42:43AM +0100, Vincent Donnefort wrote:
> > O Fri, Jun 21, 2024 at 12:32:29PM +0000, 'Sebastian Ene' via kernel-team wrote:
> > > Define a set of attributes used by the ptdump parser to display the
> > > properties of a guest memory region covered by a pagetable descriptor.
> > > Build a description of the pagetable levels and initialize the parser
> > > with this configuration.
> > >
> > > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > > ---
> > > arch/arm64/kvm/ptdump.c | 143 ++++++++++++++++++++++++++++++++++++++--
> > > 1 file changed, 137 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> > > index 36dc7662729f..cc1d4fdddc6e 100644
> > > --- a/arch/arm64/kvm/ptdump.c
> > > +++ b/arch/arm64/kvm/ptdump.c
> > > @@ -14,6 +14,61 @@
> > > #include <kvm_ptdump.h>
> > >
> > >
> > > +#define MARKERS_LEN (2)
> > > +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
> > > +
> > > +struct kvm_ptdump_guest_state {
> > > + struct kvm *kvm;
> > > + struct pg_state parser_state;
> > > + struct addr_marker ipa_marker[MARKERS_LEN];
> > > + struct pg_level level[KVM_PGTABLE_MAX_LEVELS];
> > > + struct ptdump_range range[MARKERS_LEN];
> > > +};
> > > +
> > > +static const struct prot_bits stage2_pte_bits[] = {
> > > + {
> > > + .mask = PTE_VALID,
> > > + .val = PTE_VALID,
> > > + .set = " ",
> > > + .clear = "F",
> > > + }, {
> > > + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> > > + .val = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
KVM_PTE_LEAF_ATTR_HI_S2_XN is actually a mask covering
KVM_PTE_LEAF_ATTR_HI_S2_XN_XN, KVM_PTE_LEAF_ATTR_HI_S2_XN_PXN and
KVM_PTE_LEAF_ATTR_HI_S2_XN_UXN.
I believe here what we should do is something like?
.val = FIELD_PREP_CONST(KVM_PTE_LEAF_ATTR_HI_S2_XN,
KVM_PTE_LEAF_ATTR_HI_S2_XN_XN) | PTE_VALID
> > > + .set = "XN",
> > > + .clear = " ",
> > > + }, {
> > > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > > + .set = "R",
> > > + .clear = " ",
> > > + }, {
[...]
On Tue, Jul 16, 2024 at 10:59:41AM +0100, Vincent Donnefort wrote:
> On Mon, Jul 01, 2024 at 02:18:39PM +0000, Sebastian Ene wrote:
> > On Mon, Jul 01, 2024 at 09:42:43AM +0100, Vincent Donnefort wrote:
> > > O Fri, Jun 21, 2024 at 12:32:29PM +0000, 'Sebastian Ene' via kernel-team wrote:
> > > > Define a set of attributes used by the ptdump parser to display the
> > > > properties of a guest memory region covered by a pagetable descriptor.
> > > > Build a description of the pagetable levels and initialize the parser
> > > > with this configuration.
> > > >
> > > > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > > > ---
> > > > arch/arm64/kvm/ptdump.c | 143 ++++++++++++++++++++++++++++++++++++++--
> > > > 1 file changed, 137 insertions(+), 6 deletions(-)
> > > >
> > > > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> > > > index 36dc7662729f..cc1d4fdddc6e 100644
> > > > --- a/arch/arm64/kvm/ptdump.c
> > > > +++ b/arch/arm64/kvm/ptdump.c
> > > > @@ -14,6 +14,61 @@
> > > > #include <kvm_ptdump.h>
> > > >
> > > >
> > > > +#define MARKERS_LEN (2)
> > > > +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
> > > > +
> > > > +struct kvm_ptdump_guest_state {
> > > > + struct kvm *kvm;
> > > > + struct pg_state parser_state;
> > > > + struct addr_marker ipa_marker[MARKERS_LEN];
> > > > + struct pg_level level[KVM_PGTABLE_MAX_LEVELS];
> > > > + struct ptdump_range range[MARKERS_LEN];
> > > > +};
> > > > +
> > > > +static const struct prot_bits stage2_pte_bits[] = {
> > > > + {
> > > > + .mask = PTE_VALID,
> > > > + .val = PTE_VALID,
> > > > + .set = " ",
> > > > + .clear = "F",
> > > > + }, {
> > > > + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> > > > + .val = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
>
> KVM_PTE_LEAF_ATTR_HI_S2_XN is actually a mask covering
It is not a mask covering here but in the ACK kernel.
>
> KVM_PTE_LEAF_ATTR_HI_S2_XN_XN, KVM_PTE_LEAF_ATTR_HI_S2_XN_PXN and
> KVM_PTE_LEAF_ATTR_HI_S2_XN_UXN.
>
> I believe here what we should do is something like?
>
> .val = FIELD_PREP_CONST(KVM_PTE_LEAF_ATTR_HI_S2_XN,
> KVM_PTE_LEAF_ATTR_HI_S2_XN_XN) | PTE_VALID
>
> > > > + .set = "XN",
> > > > + .clear = " ",
> > > > + }, {
> > > > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > > > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > > > + .set = "R",
> > > > + .clear = " ",
> > > > + }, {
>
> [...]
On Fri, Jul 19, 2024 at 02:09:19PM +0000, Sebastian Ene wrote:
> On Tue, Jul 16, 2024 at 10:59:41AM +0100, Vincent Donnefort wrote:
> > On Mon, Jul 01, 2024 at 02:18:39PM +0000, Sebastian Ene wrote:
> > > On Mon, Jul 01, 2024 at 09:42:43AM +0100, Vincent Donnefort wrote:
> > > > O Fri, Jun 21, 2024 at 12:32:29PM +0000, 'Sebastian Ene' via kernel-team wrote:
> > > > > Define a set of attributes used by the ptdump parser to display the
> > > > > properties of a guest memory region covered by a pagetable descriptor.
> > > > > Build a description of the pagetable levels and initialize the parser
> > > > > with this configuration.
> > > > >
> > > > > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > > > > ---
> > > > > arch/arm64/kvm/ptdump.c | 143 ++++++++++++++++++++++++++++++++++++++--
> > > > > 1 file changed, 137 insertions(+), 6 deletions(-)
> > > > >
> > > > > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> > > > > index 36dc7662729f..cc1d4fdddc6e 100644
> > > > > --- a/arch/arm64/kvm/ptdump.c
> > > > > +++ b/arch/arm64/kvm/ptdump.c
> > > > > @@ -14,6 +14,61 @@
> > > > > #include <kvm_ptdump.h>
> > > > >
> > > > >
> > > > > +#define MARKERS_LEN (2)
> > > > > +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
> > > > > +
> > > > > +struct kvm_ptdump_guest_state {
> > > > > + struct kvm *kvm;
> > > > > + struct pg_state parser_state;
> > > > > + struct addr_marker ipa_marker[MARKERS_LEN];
> > > > > + struct pg_level level[KVM_PGTABLE_MAX_LEVELS];
> > > > > + struct ptdump_range range[MARKERS_LEN];
> > > > > +};
> > > > > +
> > > > > +static const struct prot_bits stage2_pte_bits[] = {
> > > > > + {
> > > > > + .mask = PTE_VALID,
> > > > > + .val = PTE_VALID,
> > > > > + .set = " ",
> > > > > + .clear = "F",
> > > > > + }, {
> > > > > + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> > > > > + .val = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> >
> > KVM_PTE_LEAF_ATTR_HI_S2_XN is actually a mask covering
>
> It is not a mask covering here but in the ACK kernel.
You're right, I should have double-checked upstream.
That said, how about we move this __after__ RW ? and just use "X" on .clear
so we can have something R W X ?
>
> >
> > KVM_PTE_LEAF_ATTR_HI_S2_XN_XN, KVM_PTE_LEAF_ATTR_HI_S2_XN_PXN and
> > KVM_PTE_LEAF_ATTR_HI_S2_XN_UXN.
> >
> > I believe here what we should do is something like?
> >
> > .val = FIELD_PREP_CONST(KVM_PTE_LEAF_ATTR_HI_S2_XN,
> > KVM_PTE_LEAF_ATTR_HI_S2_XN_XN) | PTE_VALID
> >
> > > > > + .set = "XN",
> > > > > + .clear = " ",
> > > > > + }, {
> > > > > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > > > > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > > > > + .set = "R",
> > > > > + .clear = " ",
> > > > > + }, {
> >
> > [...]
On Fri, Jul 19, 2024 at 03:36:04PM +0100, 'Vincent Donnefort' via kernel-team wrote:
> On Fri, Jul 19, 2024 at 02:09:19PM +0000, Sebastian Ene wrote:
> > On Tue, Jul 16, 2024 at 10:59:41AM +0100, Vincent Donnefort wrote:
> > > On Mon, Jul 01, 2024 at 02:18:39PM +0000, Sebastian Ene wrote:
> > > > On Mon, Jul 01, 2024 at 09:42:43AM +0100, Vincent Donnefort wrote:
> > > > > O Fri, Jun 21, 2024 at 12:32:29PM +0000, 'Sebastian Ene' via kernel-team wrote:
> > > > > > Define a set of attributes used by the ptdump parser to display the
> > > > > > properties of a guest memory region covered by a pagetable descriptor.
> > > > > > Build a description of the pagetable levels and initialize the parser
> > > > > > with this configuration.
> > > > > >
> > > > > > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > > > > > ---
> > > > > > arch/arm64/kvm/ptdump.c | 143 ++++++++++++++++++++++++++++++++++++++--
> > > > > > 1 file changed, 137 insertions(+), 6 deletions(-)
> > > > > >
> > > > > > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> > > > > > index 36dc7662729f..cc1d4fdddc6e 100644
> > > > > > --- a/arch/arm64/kvm/ptdump.c
> > > > > > +++ b/arch/arm64/kvm/ptdump.c
> > > > > > @@ -14,6 +14,61 @@
> > > > > > #include <kvm_ptdump.h>
> > > > > >
> > > > > >
> > > > > > +#define MARKERS_LEN (2)
> > > > > > +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
> > > > > > +
> > > > > > +struct kvm_ptdump_guest_state {
> > > > > > + struct kvm *kvm;
> > > > > > + struct pg_state parser_state;
> > > > > > + struct addr_marker ipa_marker[MARKERS_LEN];
> > > > > > + struct pg_level level[KVM_PGTABLE_MAX_LEVELS];
> > > > > > + struct ptdump_range range[MARKERS_LEN];
> > > > > > +};
> > > > > > +
> > > > > > +static const struct prot_bits stage2_pte_bits[] = {
> > > > > > + {
> > > > > > + .mask = PTE_VALID,
> > > > > > + .val = PTE_VALID,
> > > > > > + .set = " ",
> > > > > > + .clear = "F",
> > > > > > + }, {
> > > > > > + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> > > > > > + .val = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> > >
> > > KVM_PTE_LEAF_ATTR_HI_S2_XN is actually a mask covering
> >
> > It is not a mask covering here but in the ACK kernel.
>
> You're right, I should have double-checked upstream.
>
> That said, how about we move this __after__ RW ? and just use "X" on .clear
> so we can have something R W X ?
>
Yes that's a good ideea to improve output format, let me move it.
> >
> > >
> > > KVM_PTE_LEAF_ATTR_HI_S2_XN_XN, KVM_PTE_LEAF_ATTR_HI_S2_XN_PXN and
> > > KVM_PTE_LEAF_ATTR_HI_S2_XN_UXN.
> > >
> > > I believe here what we should do is something like?
> > >
> > > .val = FIELD_PREP_CONST(KVM_PTE_LEAF_ATTR_HI_S2_XN,
> > > KVM_PTE_LEAF_ATTR_HI_S2_XN_XN) | PTE_VALID
> > >
> > > > > > + .set = "XN",
> > > > > > + .clear = " ",
> > > > > > + }, {
> > > > > > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > > > > > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > > > > > + .set = "R",
> > > > > > + .clear = " ",
> > > > > > + }, {
> > >
> > > [...]
>
> To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@android.com.
>
Hi Seb,
On Fri, Jun 21, 2024 at 12:32:29PM +0000, Sebastian Ene wrote:
> Define a set of attributes used by the ptdump parser to display the
> properties of a guest memory region covered by a pagetable descriptor.
> Build a description of the pagetable levels and initialize the parser
> with this configuration.
>
> Signed-off-by: Sebastian Ene <sebastianene@google.com>
This patch should come *before* patch 4, no point in exposing the
debugfs file if we aren't ready to handle it yet.
> ---
> arch/arm64/kvm/ptdump.c | 143 ++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 137 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> index 36dc7662729f..cc1d4fdddc6e 100644
> --- a/arch/arm64/kvm/ptdump.c
> +++ b/arch/arm64/kvm/ptdump.c
> @@ -14,6 +14,61 @@
> #include <kvm_ptdump.h>
>
>
> +#define MARKERS_LEN (2)
> +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
> +
> +struct kvm_ptdump_guest_state {
> + struct kvm *kvm;
> + struct pg_state parser_state;
> + struct addr_marker ipa_marker[MARKERS_LEN];
> + struct pg_level level[KVM_PGTABLE_MAX_LEVELS];
> + struct ptdump_range range[MARKERS_LEN];
> +};
> +
> +static const struct prot_bits stage2_pte_bits[] = {
> + {
> + .mask = PTE_VALID,
> + .val = PTE_VALID,
> + .set = " ",
> + .clear = "F",
> + }, {
> + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> + .val = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> + .set = "XN",
> + .clear = " ",
> + }, {
> + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> + .set = "R",
> + .clear = " ",
> + }, {
> + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> + .set = "W",
> + .clear = " ",
> + }, {
> + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> + .set = "AF",
> + .clear = " ",
<snip>
> + }, {
> + .mask = PTE_NG,
> + .val = PTE_NG,
> + .set = "FnXS",
> + .clear = " ",
> + }, {
> + .mask = PTE_CONT | PTE_VALID,
> + .val = PTE_CONT | PTE_VALID,
> + .set = "CON",
> + .clear = " ",
> + }, {
</snip>
Neither of these bits are used at stage-2, why have descriptors for
them?
> +static int kvm_ptdump_build_levels(struct pg_level *level, u32 start_lvl)
> +{
> + static const char * const level_names[] = {"PGD", "PUD", "PMD", "PTE"};
> + u32 i = 0;
> + u64 mask = 0;
> +
> + if (start_lvl > 2) {
> + pr_err("invalid start_lvl %u\n", start_lvl);
> + return -EINVAL;
> + }
if (WARN_ON_ONCE(start_lvl >= KVM_PGTABLE_LAST_LEVEL))
return -EINVAL;
> + for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++)
> + mask |= stage2_pte_bits[i].mask;
> +
> + for (i = start_lvl; i < KVM_PGTABLE_MAX_LEVELS; i++) {
> + strscpy(level[i].name, level_names[i], sizeof(level[i].name));
> +
> + level[i].num = ARRAY_SIZE(stage2_pte_bits);
> + level[i].bits = stage2_pte_bits;
> + level[i].mask = mask;
> + }
> +
> + if (start_lvl > 0)
> + strscpy(level[start_lvl].name, level_names[0], sizeof(level_names[0]));
This should pass the size of @dst, not the source. This becomes slightly
more self-documenting if you use a literal for "PGD" here too.
strscpy(level[start_lvl].name, "PGD", sizeof(level[start_lvl].name));
> + return 0;
> +}
> +
> +static struct kvm_ptdump_guest_state
> +*kvm_ptdump_parser_init(struct kvm *kvm)
> +{
> + struct kvm_ptdump_guest_state *st;
> + struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> + struct kvm_pgtable *pgtable = mmu->pgt;
> + int ret;
> +
> + st = kzalloc(sizeof(struct kvm_ptdump_guest_state), GFP_KERNEL_ACCOUNT);
> + if (!st)
> + return NULL;
> +
> + ret = kvm_ptdump_build_levels(&st->level[0], pgtable->start_level);
> + if (ret)
> + goto free_with_state;
I don't see any value in the use of goto here, as there isn't any sort
of cascading initialization / cleanup. This also presents an opportunity
to get an error back out to the caller.
if (ret) {
kfree(st);
return ERR_PTR(ret);
}
> @@ -57,22 +176,34 @@ static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
> static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
> {
> struct kvm *kvm = m->i_private;
> + struct kvm_ptdump_guest_state *st;
> int ret;
>
> if (!kvm_get_kvm_safe(kvm))
> return -ENOENT;
>
> - ret = single_open(file, kvm_ptdump_guest_show, m->i_private);
> - if (ret < 0)
> - kvm_put_kvm(kvm);
> + st = kvm_ptdump_parser_init(kvm);
> + if (!st) {
> + ret = -ENOMEM;
> + goto free_with_kvm_ref;
> + }
(with the earlier suggestion)
st = kvm_ptdump_parser_init(kvm);
if (IS_ERR(st)) {
ret = PTR_ERR(st);
goto free_with_kvm_ref;
}
Otherwise genuine KVM bugs (-EINVAL) are getting lumped into ENOMEM.
--
Thanks,
Oliver
On Fri, Jun 28, 2024 at 09:18:16PM +0000, Oliver Upton wrote:
> Hi Seb,
>
> On Fri, Jun 21, 2024 at 12:32:29PM +0000, Sebastian Ene wrote:
> > Define a set of attributes used by the ptdump parser to display the
> > properties of a guest memory region covered by a pagetable descriptor.
> > Build a description of the pagetable levels and initialize the parser
> > with this configuration.
> >
> > Signed-off-by: Sebastian Ene <sebastianene@google.com>
>
> This patch should come *before* patch 4, no point in exposing the
> debugfs file if we aren't ready to handle it yet.
>
This is true but this patch doesn't make sense without 4 because here I
add a bunch of functions which will not be invoked (they are invoked
from the debugfs calls).
IMO we can squash them (4 and 5) but it will be a bit harder to follow.
Let me know what you think, thanks.
Seb
> > ---
> > arch/arm64/kvm/ptdump.c | 143 ++++++++++++++++++++++++++++++++++++++--
> > 1 file changed, 137 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> > index 36dc7662729f..cc1d4fdddc6e 100644
> > --- a/arch/arm64/kvm/ptdump.c
> > +++ b/arch/arm64/kvm/ptdump.c
> > @@ -14,6 +14,61 @@
> > #include <kvm_ptdump.h>
> >
> >
> > +#define MARKERS_LEN (2)
> > +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
> > +
> > +struct kvm_ptdump_guest_state {
> > + struct kvm *kvm;
> > + struct pg_state parser_state;
> > + struct addr_marker ipa_marker[MARKERS_LEN];
> > + struct pg_level level[KVM_PGTABLE_MAX_LEVELS];
> > + struct ptdump_range range[MARKERS_LEN];
> > +};
> > +
> > +static const struct prot_bits stage2_pte_bits[] = {
> > + {
> > + .mask = PTE_VALID,
> > + .val = PTE_VALID,
> > + .set = " ",
> > + .clear = "F",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> > + .set = "XN",
> > + .clear = " ",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > + .set = "R",
> > + .clear = " ",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> > + .set = "W",
> > + .clear = " ",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> > + .set = "AF",
> > + .clear = " ",
>
> <snip>
>
> > + }, {
> > + .mask = PTE_NG,
> > + .val = PTE_NG,
> > + .set = "FnXS",
> > + .clear = " ",
> > + }, {
> > + .mask = PTE_CONT | PTE_VALID,
> > + .val = PTE_CONT | PTE_VALID,
> > + .set = "CON",
> > + .clear = " ",
> > + }, {
>
> </snip>
>
> Neither of these bits are used at stage-2, why have descriptors for
> them?
>
> > +static int kvm_ptdump_build_levels(struct pg_level *level, u32 start_lvl)
> > +{
> > + static const char * const level_names[] = {"PGD", "PUD", "PMD", "PTE"};
> > + u32 i = 0;
> > + u64 mask = 0;
> > +
> > + if (start_lvl > 2) {
> > + pr_err("invalid start_lvl %u\n", start_lvl);
> > + return -EINVAL;
> > + }
>
> if (WARN_ON_ONCE(start_lvl >= KVM_PGTABLE_LAST_LEVEL))
> return -EINVAL;
>
> > + for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++)
> > + mask |= stage2_pte_bits[i].mask;
> > +
> > + for (i = start_lvl; i < KVM_PGTABLE_MAX_LEVELS; i++) {
> > + strscpy(level[i].name, level_names[i], sizeof(level[i].name));
> > +
> > + level[i].num = ARRAY_SIZE(stage2_pte_bits);
> > + level[i].bits = stage2_pte_bits;
> > + level[i].mask = mask;
> > + }
> > +
> > + if (start_lvl > 0)
> > + strscpy(level[start_lvl].name, level_names[0], sizeof(level_names[0]));
>
> This should pass the size of @dst, not the source. This becomes slightly
> more self-documenting if you use a literal for "PGD" here too.
>
> strscpy(level[start_lvl].name, "PGD", sizeof(level[start_lvl].name));
>
> > + return 0;
> > +}
> > +
> > +static struct kvm_ptdump_guest_state
> > +*kvm_ptdump_parser_init(struct kvm *kvm)
> > +{
> > + struct kvm_ptdump_guest_state *st;
> > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> > + struct kvm_pgtable *pgtable = mmu->pgt;
> > + int ret;
> > +
> > + st = kzalloc(sizeof(struct kvm_ptdump_guest_state), GFP_KERNEL_ACCOUNT);
> > + if (!st)
> > + return NULL;
> > +
> > + ret = kvm_ptdump_build_levels(&st->level[0], pgtable->start_level);
> > + if (ret)
> > + goto free_with_state;
>
> I don't see any value in the use of goto here, as there isn't any sort
> of cascading initialization / cleanup. This also presents an opportunity
> to get an error back out to the caller.
>
> if (ret) {
> kfree(st);
> return ERR_PTR(ret);
> }
>
> > @@ -57,22 +176,34 @@ static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
> > static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
> > {
> > struct kvm *kvm = m->i_private;
> > + struct kvm_ptdump_guest_state *st;
> > int ret;
> >
> > if (!kvm_get_kvm_safe(kvm))
> > return -ENOENT;
> >
> > - ret = single_open(file, kvm_ptdump_guest_show, m->i_private);
> > - if (ret < 0)
> > - kvm_put_kvm(kvm);
> > + st = kvm_ptdump_parser_init(kvm);
> > + if (!st) {
> > + ret = -ENOMEM;
> > + goto free_with_kvm_ref;
> > + }
>
> (with the earlier suggestion)
>
> st = kvm_ptdump_parser_init(kvm);
> if (IS_ERR(st)) {
> ret = PTR_ERR(st);
> goto free_with_kvm_ref;
> }
>
> Otherwise genuine KVM bugs (-EINVAL) are getting lumped into ENOMEM.
>
> --
> Thanks,
> Oliver
On Fri, Jun 28, 2024 at 09:18:16PM +0000, Oliver Upton wrote:
> Hi Seb,
>
> On Fri, Jun 21, 2024 at 12:32:29PM +0000, Sebastian Ene wrote:
> > Define a set of attributes used by the ptdump parser to display the
> > properties of a guest memory region covered by a pagetable descriptor.
> > Build a description of the pagetable levels and initialize the parser
> > with this configuration.
> >
> > Signed-off-by: Sebastian Ene <sebastianene@google.com>
>
> This patch should come *before* patch 4, no point in exposing the
> debugfs file if we aren't ready to handle it yet.
>
Gotcha, let me try to reorder them.
> > ---
> > arch/arm64/kvm/ptdump.c | 143 ++++++++++++++++++++++++++++++++++++++--
> > 1 file changed, 137 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> > index 36dc7662729f..cc1d4fdddc6e 100644
> > --- a/arch/arm64/kvm/ptdump.c
> > +++ b/arch/arm64/kvm/ptdump.c
> > @@ -14,6 +14,61 @@
> > #include <kvm_ptdump.h>
> >
> >
> > +#define MARKERS_LEN (2)
> > +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
> > +
> > +struct kvm_ptdump_guest_state {
> > + struct kvm *kvm;
> > + struct pg_state parser_state;
> > + struct addr_marker ipa_marker[MARKERS_LEN];
> > + struct pg_level level[KVM_PGTABLE_MAX_LEVELS];
> > + struct ptdump_range range[MARKERS_LEN];
> > +};
> > +
> > +static const struct prot_bits stage2_pte_bits[] = {
> > + {
> > + .mask = PTE_VALID,
> > + .val = PTE_VALID,
> > + .set = " ",
> > + .clear = "F",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> > + .set = "XN",
> > + .clear = " ",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > + .set = "R",
> > + .clear = " ",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> > + .set = "W",
> > + .clear = " ",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> > + .set = "AF",
> > + .clear = " ",
>
> <snip>
>
> > + }, {
> > + .mask = PTE_NG,
> > + .val = PTE_NG,
> > + .set = "FnXS",
> > + .clear = " ",
> > + }, {
> > + .mask = PTE_CONT | PTE_VALID,
> > + .val = PTE_CONT | PTE_VALID,
> > + .set = "CON",
> > + .clear = " ",
> > + }, {
>
> </snip>
>
> Neither of these bits are used at stage-2, why have descriptors for
> them?
>
Atm, we don't make use of the contiguous bit in stage-2 in upstream (but
we have it in some experimental patches). I can remove this, no hard
feelings for them.
> > +static int kvm_ptdump_build_levels(struct pg_level *level, u32 start_lvl)
> > +{
> > + static const char * const level_names[] = {"PGD", "PUD", "PMD", "PTE"};
> > + u32 i = 0;
> > + u64 mask = 0;
> > +
> > + if (start_lvl > 2) {
> > + pr_err("invalid start_lvl %u\n", start_lvl);
> > + return -EINVAL;
> > + }
>
> if (WARN_ON_ONCE(start_lvl >= KVM_PGTABLE_LAST_LEVEL))
> return -EINVAL;
>
I will include this validation, thanks !
> > + for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++)
> > + mask |= stage2_pte_bits[i].mask;
> > +
> > + for (i = start_lvl; i < KVM_PGTABLE_MAX_LEVELS; i++) {
> > + strscpy(level[i].name, level_names[i], sizeof(level[i].name));
> > +
> > + level[i].num = ARRAY_SIZE(stage2_pte_bits);
> > + level[i].bits = stage2_pte_bits;
> > + level[i].mask = mask;
> > + }
> > +
> > + if (start_lvl > 0)
> > + strscpy(level[start_lvl].name, level_names[0], sizeof(level_names[0]));
>
> This should pass the size of @dst, not the source. This becomes slightly
> more self-documenting if you use a literal for "PGD" here too.
>
> strscpy(level[start_lvl].name, "PGD", sizeof(level[start_lvl].name));
>
Will use this, thanks for the suggestion !
> > + return 0;
> > +}
> > +
> > +static struct kvm_ptdump_guest_state
> > +*kvm_ptdump_parser_init(struct kvm *kvm)
> > +{
> > + struct kvm_ptdump_guest_state *st;
> > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> > + struct kvm_pgtable *pgtable = mmu->pgt;
> > + int ret;
> > +
> > + st = kzalloc(sizeof(struct kvm_ptdump_guest_state), GFP_KERNEL_ACCOUNT);
> > + if (!st)
> > + return NULL;
> > +
> > + ret = kvm_ptdump_build_levels(&st->level[0], pgtable->start_level);
> > + if (ret)
> > + goto free_with_state;
>
> I don't see any value in the use of goto here, as there isn't any sort
> of cascading initialization / cleanup. This also presents an opportunity
> to get an error back out to the caller.
>
> if (ret) {
> kfree(st);
> return ERR_PTR(ret);
> }
>
Let me remove that goto; statement.
> > @@ -57,22 +176,34 @@ static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
> > static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
> > {
> > struct kvm *kvm = m->i_private;
> > + struct kvm_ptdump_guest_state *st;
> > int ret;
> >
> > if (!kvm_get_kvm_safe(kvm))
> > return -ENOENT;
> >
> > - ret = single_open(file, kvm_ptdump_guest_show, m->i_private);
> > - if (ret < 0)
> > - kvm_put_kvm(kvm);
> > + st = kvm_ptdump_parser_init(kvm);
> > + if (!st) {
> > + ret = -ENOMEM;
> > + goto free_with_kvm_ref;
> > + }
>
> (with the earlier suggestion)
>
> st = kvm_ptdump_parser_init(kvm);
> if (IS_ERR(st)) {
> ret = PTR_ERR(st);
> goto free_with_kvm_ref;
> }
>
> Otherwise genuine KVM bugs (-EINVAL) are getting lumped into ENOMEM.
>
> --
> Thanks,
> Oliver
Thanks,
Sebastian
On Mon, Jul 01, 2024 at 02:17:53PM +0000, Sebastian Ene wrote:
> > <snip>
> >
> > > + }, {
> > > + .mask = PTE_NG,
> > > + .val = PTE_NG,
> > > + .set = "FnXS",
> > > + .clear = " ",
> > > + }, {
> > > + .mask = PTE_CONT | PTE_VALID,
> > > + .val = PTE_CONT | PTE_VALID,
> > > + .set = "CON",
> > > + .clear = " ",
> > > + }, {
> >
> > </snip>
> >
> > Neither of these bits are used at stage-2, why have descriptors for
> > them?
> >
>
> Atm, we don't make use of the contiguous bit in stage-2 in upstream (but
> we have it in some experimental patches). I can remove this, no hard
> feelings for them.
Yes, please drop them. I'll nag whoever adds contpte support for stage-2
to add them back :)
--
Thanks,
Oliver
© 2016 - 2025 Red Hat, Inc.