[v1] hvf: Enable 1G page support

[PATCH] hvf: Enable 1G page support

Posted by Alexander Graf 2 years, 9 months ago

Hvf on x86 only supported 2MiB large pages, but never bothered to strip
out the 1GiB page size capability from -cpu host. With QEMU 8.0.0 this
became a problem because OVMF started to use 1GiB pages by default.

Let's just unconditionally add 1GiB page walk support to the walker.

With this fix applied, I can successfully run OVMF again.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1603
Signed-off-by: Alexander Graf <agraf@csgraf.de>
Reported-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
Reported-by: Philippe Mathieu-Daudé <philmd@linaro.org>

---

On my test VM, Linux dies later on with issues in interrupt delivery. But
those are unrelated to this patch; I confirmed that I get the same behavior
with 1GiB page support disabled.
---
 target/i386/hvf/x86_mmu.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/target/i386/hvf/x86_mmu.c b/target/i386/hvf/x86_mmu.c
index 96d117567e..1d860651c6 100644
--- a/target/i386/hvf/x86_mmu.c
+++ b/target/i386/hvf/x86_mmu.c
@@ -38,6 +38,7 @@
 #define LEGACY_PTE_PAGE_MASK        (0xffffffffllu << 12)
 #define PAE_PTE_PAGE_MASK           ((-1llu << 12) & ((1llu << 52) - 1))
 #define PAE_PTE_LARGE_PAGE_MASK     ((-1llu << (21)) & ((1llu << 52) - 1))
+#define PAE_PTE_SUPER_PAGE_MASK     ((-1llu << (30)) & ((1llu << 52) - 1))
 
 struct gpt_translation {
     target_ulong  gva;
@@ -96,7 +97,7 @@ static bool get_pt_entry(struct CPUState *cpu, struct gpt_translation *pt,
 
 /* test page table entry */
 static bool test_pt_entry(struct CPUState *cpu, struct gpt_translation *pt,
-                          int level, bool *is_large, bool pae)
+                          int level, int *largeness, bool pae)
 {
     uint64_t pte = pt->pte[level];
 
@@ -118,9 +119,9 @@ static bool test_pt_entry(struct CPUState *cpu, struct gpt_translation *pt,
         goto exit;
     }
 
-    if (1 == level && pte_large_page(pte)) {
+    if (level && pte_large_page(pte)) {
         pt->err_code |= MMU_PAGE_PT;
-        *is_large = true;
+        *largeness = level;
     }
     if (!level) {
         pt->err_code |= MMU_PAGE_PT;
@@ -152,9 +153,18 @@ static inline uint64_t pse_pte_to_page(uint64_t pte)
     return ((pte & 0x1fe000) << 19) | (pte & 0xffc00000);
 }
 
-static inline uint64_t large_page_gpa(struct gpt_translation *pt, bool pae)
+static inline uint64_t large_page_gpa(struct gpt_translation *pt, bool pae,
+                                      int largeness)
 {
-    VM_PANIC_ON(!pte_large_page(pt->pte[1]))
+    VM_PANIC_ON(!pte_large_page(pt->pte[largeness]))
+
+    /* 1Gib large page  */
+    if (pae && largeness == 2) {
+        return (pt->pte[2] & PAE_PTE_SUPER_PAGE_MASK) | (pt->gva & 0x3fffffff);
+    }
+
+    VM_PANIC_ON(largeness != 1)
+
     /* 2Mb large page  */
     if (pae) {
         return (pt->pte[1] & PAE_PTE_LARGE_PAGE_MASK) | (pt->gva & 0x1fffff);
@@ -170,7 +180,7 @@ static bool walk_gpt(struct CPUState *cpu, target_ulong addr, int err_code,
                      struct gpt_translation *pt, bool pae)
 {
     int top_level, level;
-    bool is_large = false;
+    int largeness = 0;
     target_ulong cr3 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR3);
     uint64_t page_mask = pae ? PAE_PTE_PAGE_MASK : LEGACY_PTE_PAGE_MASK;
     
@@ -186,19 +196,19 @@ static bool walk_gpt(struct CPUState *cpu, target_ulong addr, int err_code,
     for (level = top_level; level > 0; level--) {
         get_pt_entry(cpu, pt, level, pae);
 
-        if (!test_pt_entry(cpu, pt, level - 1, &is_large, pae)) {
+        if (!test_pt_entry(cpu, pt, level - 1, &largeness, pae)) {
             return false;
         }
 
-        if (is_large) {
+        if (largeness) {
             break;
         }
     }
 
-    if (!is_large) {
+    if (!largeness) {
         pt->gpa = (pt->pte[0] & page_mask) | (pt->gva & 0xfff);
     } else {
-        pt->gpa = large_page_gpa(pt, pae);
+        pt->gpa = large_page_gpa(pt, pae, largeness);
     }
 
     return true;
-- 
2.39.2 (Apple Git-143)

Re: [PATCH] hvf: Enable 1G page support

Posted by Michael Tokarev 1 year, 2 months ago

21.04.2023 01:52, Alexander Graf wrote:
> Hvf on x86 only supported 2MiB large pages, but never bothered to strip
> out the 1GiB page size capability from -cpu host. With QEMU 8.0.0 this
> became a problem because OVMF started to use 1GiB pages by default.
> 
> Let's just unconditionally add 1GiB page walk support to the walker.
> 
> With this fix applied, I can successfully run OVMF again.
> 
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1603
> Signed-off-by: Alexander Graf <agraf@csgraf.de>
> Reported-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
> Reported-by: Philippe Mathieu-Daudé <philmd@linaro.org>

Hi!

Is this change not relevant or not needed anymore?
It's been more than 1.5 years already..

(it probably needs to be tweaked for the current state of things,
but I'm surprised there's been nothing at all for such a long time)

/mjt

> On my test VM, Linux dies later on with issues in interrupt delivery. But
> those are unrelated to this patch; I confirmed that I get the same behavior
> with 1GiB page support disabled.
> ---
>   target/i386/hvf/x86_mmu.c | 30 ++++++++++++++++++++----------
>   1 file changed, 20 insertions(+), 10 deletions(-)
> 
> diff --git a/target/i386/hvf/x86_mmu.c b/target/i386/hvf/x86_mmu.c
> index 96d117567e..1d860651c6 100644
> --- a/target/i386/hvf/x86_mmu.c
> +++ b/target/i386/hvf/x86_mmu.c
> @@ -38,6 +38,7 @@
>   #define LEGACY_PTE_PAGE_MASK        (0xffffffffllu << 12)
>   #define PAE_PTE_PAGE_MASK           ((-1llu << 12) & ((1llu << 52) - 1))
>   #define PAE_PTE_LARGE_PAGE_MASK     ((-1llu << (21)) & ((1llu << 52) - 1))
> +#define PAE_PTE_SUPER_PAGE_MASK     ((-1llu << (30)) & ((1llu << 52) - 1))
>   
>   struct gpt_translation {
>       target_ulong  gva;
> @@ -96,7 +97,7 @@ static bool get_pt_entry(struct CPUState *cpu, struct gpt_translation *pt,
>   
>   /* test page table entry */
>   static bool test_pt_entry(struct CPUState *cpu, struct gpt_translation *pt,
> -                          int level, bool *is_large, bool pae)
> +                          int level, int *largeness, bool pae)
>   {
>       uint64_t pte = pt->pte[level];
>   
> @@ -118,9 +119,9 @@ static bool test_pt_entry(struct CPUState *cpu, struct gpt_translation *pt,
>           goto exit;
>       }
>   
> -    if (1 == level && pte_large_page(pte)) {
> +    if (level && pte_large_page(pte)) {
>           pt->err_code |= MMU_PAGE_PT;
> -        *is_large = true;
> +        *largeness = level;
>       }
>       if (!level) {
>           pt->err_code |= MMU_PAGE_PT;
> @@ -152,9 +153,18 @@ static inline uint64_t pse_pte_to_page(uint64_t pte)
>       return ((pte & 0x1fe000) << 19) | (pte & 0xffc00000);
>   }
>   
> -static inline uint64_t large_page_gpa(struct gpt_translation *pt, bool pae)
> +static inline uint64_t large_page_gpa(struct gpt_translation *pt, bool pae,
> +                                      int largeness)
>   {
> -    VM_PANIC_ON(!pte_large_page(pt->pte[1]))
> +    VM_PANIC_ON(!pte_large_page(pt->pte[largeness]))
> +
> +    /* 1Gib large page  */
> +    if (pae && largeness == 2) {
> +        return (pt->pte[2] & PAE_PTE_SUPER_PAGE_MASK) | (pt->gva & 0x3fffffff);
> +    }
> +
> +    VM_PANIC_ON(largeness != 1)
> +
>       /* 2Mb large page  */
>       if (pae) {
>           return (pt->pte[1] & PAE_PTE_LARGE_PAGE_MASK) | (pt->gva & 0x1fffff);
> @@ -170,7 +180,7 @@ static bool walk_gpt(struct CPUState *cpu, target_ulong addr, int err_code,
>                        struct gpt_translation *pt, bool pae)
>   {
>       int top_level, level;
> -    bool is_large = false;
> +    int largeness = 0;
>       target_ulong cr3 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR3);
>       uint64_t page_mask = pae ? PAE_PTE_PAGE_MASK : LEGACY_PTE_PAGE_MASK;
>       
> @@ -186,19 +196,19 @@ static bool walk_gpt(struct CPUState *cpu, target_ulong addr, int err_code,
>       for (level = top_level; level > 0; level--) {
>           get_pt_entry(cpu, pt, level, pae);
>   
> -        if (!test_pt_entry(cpu, pt, level - 1, &is_large, pae)) {
> +        if (!test_pt_entry(cpu, pt, level - 1, &largeness, pae)) {
>               return false;
>           }
>   
> -        if (is_large) {
> +        if (largeness) {
>               break;
>           }
>       }
>   
> -    if (!is_large) {
> +    if (!largeness) {
>           pt->gpa = (pt->pte[0] & page_mask) | (pt->gva & 0xfff);
>       } else {
> -        pt->gpa = large_page_gpa(pt, pae);
> +        pt->gpa = large_page_gpa(pt, pae, largeness);
>       }
>   
>       return true;

Re: [PATCH] hvf: Enable 1G page support

Posted by Alexander Graf 2 years, 3 months ago

On 21.04.23 00:52, Alexander Graf wrote:
> Hvf on x86 only supported 2MiB large pages, but never bothered to strip
> out the 1GiB page size capability from -cpu host. With QEMU 8.0.0 this
> became a problem because OVMF started to use 1GiB pages by default.
>
> Let's just unconditionally add 1GiB page walk support to the walker.
>
> With this fix applied, I can successfully run OVMF again.
>
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1603
> Signed-off-by: Alexander Graf <agraf@csgraf.de>
> Reported-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
> Reported-by: Philippe Mathieu-Daudé <philmd@linaro.org>


Ping. Anyone willing to pick this up? :)


Alex

Re: [PATCH] hvf: Enable 1G page support

Posted by Phil Dennis-Jordan 2 years, 3 months ago

Didn't spot this one first time around. :-)

The code looks fine to me, and in my test, it does indeed fix the
immediate crash.

There's still something making OVMF from Qemu 8.1 very, very unhappy
even with this patch - I'm getting memory allocation errors from it
and can't get anywhere near booting an OS, but I get the same result
in TCG mode, or if I disable CPUID_EXT2_PDPE1GB in hvf without this
patch, so that's not a regression caused by this change. I found no
other problems with a quick spot check using older OVMF and a few
other VMs.

Reviewed-by: Phil Dennis-Jordan <phil@philjordan.eu>

On Tue, 17 Oct 2023 at 22:16, Alexander Graf <agraf@csgraf.de> wrote:
>
>
> On 21.04.23 00:52, Alexander Graf wrote:
> > Hvf on x86 only supported 2MiB large pages, but never bothered to strip
> > out the 1GiB page size capability from -cpu host. With QEMU 8.0.0 this
> > became a problem because OVMF started to use 1GiB pages by default.
> >
> > Let's just unconditionally add 1GiB page walk support to the walker.
> >
> > With this fix applied, I can successfully run OVMF again.
> >
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1603
> > Signed-off-by: Alexander Graf <agraf@csgraf.de>
> > Reported-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
> > Reported-by: Philippe Mathieu-Daudé <philmd@linaro.org>
>
>
> Ping. Anyone willing to pick this up? :)
>
>
> Alex
>
>
>