[Qemu-devel] [PATCH v1] s390x/kvm: Configure page size after memory has actually been initialized

David Hildenbrand posted 1 patch 5 years ago
Test docker-mingw@fedora passed
Test docker-clang@ubuntu passed
Test checkpatch passed
Test asan passed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20190327135944.12848-1-david@redhat.com
Maintainers: Christian Borntraeger <borntraeger@de.ibm.com>, Richard Henderson <rth@twiddle.net>, David Hildenbrand <david@redhat.com>, Cornelia Huck <cohuck@redhat.com>, Halil Pasic <pasic@linux.ibm.com>
hw/s390x/s390-virtio-ccw.c |  9 +++++++++
target/s390x/cpu.c         |  7 +++++++
target/s390x/cpu.h         |  1 +
target/s390x/kvm-stub.c    |  4 ++++
target/s390x/kvm.c         | 35 ++++++++++++++---------------------
target/s390x/kvm_s390x.h   |  1 +
6 files changed, 36 insertions(+), 21 deletions(-)
[Qemu-devel] [PATCH v1] s390x/kvm: Configure page size after memory has actually been initialized
Posted by David Hildenbrand 5 years ago
Right now we configure the pagesize quite early, when initializing KVM.
This is long before system memory is actually allocated via
memory_region_allocate_system_memory(), and therefore memory backends
marked as mapped.

Instead, let's configure the maximum page size after initializing
memory in s390_memory_init(). cap_hpage_1m is still properly
configured before creating any CPUs, and therefore before configuring
the CPU model and eventually enabling CMMA.

We might later want to replace qemu_getrampagesize() by another
detection mechanism, e.g. only looking at mapped, initial memory.
We don't support any memory devices yet, and if so, we can always reject
devices with a page size bigger than the initial page size when
hotplugging. qemu_getrampagesize() should work for now, especially when
converting it to only look at mapped backends.

Signed-off-by: David Hildenbrand <david@redhat.com>
---

I only did a quick sanity test - I don't have a system with huge page
support around to test. I did a compile for both KVM and TCG.

As Conny is on vacation and Christian is also not around, I guess it is
okay that David G. will pick this up for now, to eventually make
qemu_getrampagesize() only check mapped backends.

 hw/s390x/s390-virtio-ccw.c |  9 +++++++++
 target/s390x/cpu.c         |  7 +++++++
 target/s390x/cpu.h         |  1 +
 target/s390x/kvm-stub.c    |  4 ++++
 target/s390x/kvm.c         | 35 ++++++++++++++---------------------
 target/s390x/kvm_s390x.h   |  1 +
 6 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index d11069b860..b968bccc87 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -15,6 +15,7 @@
 #include "cpu.h"
 #include "hw/boards.h"
 #include "exec/address-spaces.h"
+#include "exec/ram_addr.h"
 #include "hw/s390x/s390-virtio-hcall.h"
 #include "hw/s390x/sclp.h"
 #include "hw/s390x/s390_flic.h"
@@ -163,6 +164,7 @@ static void s390_memory_init(ram_addr_t mem_size)
     MemoryRegion *sysmem = get_system_memory();
     ram_addr_t chunk, offset = 0;
     unsigned int number = 0;
+    Error *local_err = NULL;
     gchar *name;
 
     /* allocate RAM for core */
@@ -182,6 +184,12 @@ static void s390_memory_init(ram_addr_t mem_size)
     }
     g_free(name);
 
+    /* Configure the maximum page size */
+    s390_set_max_pagesize(qemu_getrampagesize(), &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        exit(EXIT_FAILURE);
+    }
     /* Initialize storage key device */
     s390_skeys_init();
     /* Initialize storage attributes device */
@@ -253,6 +261,7 @@ static void ccw_init(MachineState *machine)
     DeviceState *dev;
 
     s390_sclp_init();
+    /* init memory + setup max page size. Required for the CPU model */
     s390_memory_init(machine->ram_size);
 
     /* init CPUs (incl. CPU model) early so s390_has_feature() works */
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index 698dd9cb82..b58ef0a8ef 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -399,6 +399,13 @@ int s390_set_memory_limit(uint64_t new_limit, uint64_t *hw_limit)
     return 0;
 }
 
+void s390_set_max_pagesize(uint64_t pagesize, Error **errp)
+{
+    if (kvm_enabled()) {
+        kvm_s390_set_max_pagesize(pagesize, errp);
+    }
+}
+
 void s390_cmma_reset(void)
 {
     if (kvm_enabled()) {
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index cb6d77053a..c14be2b5ba 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -734,6 +734,7 @@ static inline void s390_do_cpu_load_normal(CPUState *cs, run_on_cpu_data arg)
 /* cpu.c */
 void s390_crypto_reset(void);
 int s390_set_memory_limit(uint64_t new_limit, uint64_t *hw_limit);
+void s390_set_max_pagesize(uint64_t pagesize, Error **errp);
 void s390_cmma_reset(void);
 void s390_enable_css_support(S390CPU *cpu);
 int s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch_id,
diff --git a/target/s390x/kvm-stub.c b/target/s390x/kvm-stub.c
index bf7795e47a..22b4514ca6 100644
--- a/target/s390x/kvm-stub.c
+++ b/target/s390x/kvm-stub.c
@@ -93,6 +93,10 @@ int kvm_s390_set_mem_limit(uint64_t new_limit, uint64_t *hw_limit)
     return 0;
 }
 
+void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp)
+{
+}
+
 void kvm_s390_crypto_reset(void)
 {
 }
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 19530fb94e..bee73dc1a4 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -283,44 +283,37 @@ void kvm_s390_crypto_reset(void)
     }
 }
 
-static int kvm_s390_configure_mempath_backing(KVMState *s)
+void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp)
 {
-    size_t path_psize = qemu_getrampagesize();
-
-    if (path_psize == 4 * KiB) {
-        return 0;
+    if (pagesize == 4 * KiB) {
+        return;
     }
 
     if (!hpage_1m_allowed()) {
-        error_report("This QEMU machine does not support huge page "
-                     "mappings");
-        return -EINVAL;
+        error_setg(errp, "This QEMU machine does not support huge page "
+                   "mappings");
+        return;
     }
 
-    if (path_psize != 1 * MiB) {
-        error_report("Memory backing with 2G pages was specified, "
-                     "but KVM does not support this memory backing");
-        return -EINVAL;
+    if (pagesize != 1 * MiB) {
+        error_setg(errp, "Memory backing with 2G pages was specified, "
+                   "but KVM does not support this memory backing");
+        return;
     }
 
-    if (kvm_vm_enable_cap(s, KVM_CAP_S390_HPAGE_1M, 0)) {
-        error_report("Memory backing with 1M pages was specified, "
-                     "but KVM does not support this memory backing");
-        return -EINVAL;
+    if (kvm_vm_enable_cap(kvm_state, KVM_CAP_S390_HPAGE_1M, 0)) {
+        error_setg(errp, "Memory backing with 1M pages was specified, "
+                   "but KVM does not support this memory backing");
+        return;
     }
 
     cap_hpage_1m = 1;
-    return 0;
 }
 
 int kvm_arch_init(MachineState *ms, KVMState *s)
 {
     MachineClass *mc = MACHINE_GET_CLASS(ms);
 
-    if (kvm_s390_configure_mempath_backing(s)) {
-        return -EINVAL;
-    }
-
     mc->default_cpu_type = S390_CPU_TYPE_NAME("host");
     cap_sync_regs = kvm_check_extension(s, KVM_CAP_SYNC_REGS);
     cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF);
diff --git a/target/s390x/kvm_s390x.h b/target/s390x/kvm_s390x.h
index 6e52287da3..caf985955b 100644
--- a/target/s390x/kvm_s390x.h
+++ b/target/s390x/kvm_s390x.h
@@ -36,6 +36,7 @@ int kvm_s390_cmma_active(void);
 void kvm_s390_cmma_reset(void);
 void kvm_s390_reset_vcpu(S390CPU *cpu);
 int kvm_s390_set_mem_limit(uint64_t new_limit, uint64_t *hw_limit);
+void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp);
 void kvm_s390_crypto_reset(void);
 void kvm_s390_restart_interrupt(S390CPU *cpu);
 void kvm_s390_stop_interrupt(S390CPU *cpu);
-- 
2.17.2


Re: [Qemu-devel] [PATCH v1] s390x/kvm: Configure page size after memory has actually been initialized
Posted by Igor Mammedov 5 years ago
On Wed, 27 Mar 2019 14:59:44 +0100
David Hildenbrand <david@redhat.com> wrote:

> Right now we configure the pagesize quite early, when initializing KVM.
> This is long before system memory is actually allocated via
> memory_region_allocate_system_memory(), and therefore memory backends
> marked as mapped.
> 
> Instead, let's configure the maximum page size after initializing
> memory in s390_memory_init(). cap_hpage_1m is still properly
> configured before creating any CPUs, and therefore before configuring
> the CPU model and eventually enabling CMMA.
> 
> We might later want to replace qemu_getrampagesize() by another
> detection mechanism, e.g. only looking at mapped, initial memory.
> We don't support any memory devices yet, and if so, we can always reject
> devices with a page size bigger than the initial page size when
> hotplugging. qemu_getrampagesize() should work for now, especially when
> converting it to only look at mapped backends.
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>

Acked-by: Igor Mammedov <imammedo@redhat.com>

> ---
> 
> I only did a quick sanity test - I don't have a system with huge page
> support around to test. I did a compile for both KVM and TCG.
> 
> As Conny is on vacation and Christian is also not around, I guess it is
> okay that David G. will pick this up for now, to eventually make
> qemu_getrampagesize() only check mapped backends.
> 
>  hw/s390x/s390-virtio-ccw.c |  9 +++++++++
>  target/s390x/cpu.c         |  7 +++++++
>  target/s390x/cpu.h         |  1 +
>  target/s390x/kvm-stub.c    |  4 ++++
>  target/s390x/kvm.c         | 35 ++++++++++++++---------------------
>  target/s390x/kvm_s390x.h   |  1 +
>  6 files changed, 36 insertions(+), 21 deletions(-)
> 
> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
> index d11069b860..b968bccc87 100644
> --- a/hw/s390x/s390-virtio-ccw.c
> +++ b/hw/s390x/s390-virtio-ccw.c
> @@ -15,6 +15,7 @@
>  #include "cpu.h"
>  #include "hw/boards.h"
>  #include "exec/address-spaces.h"
> +#include "exec/ram_addr.h"
>  #include "hw/s390x/s390-virtio-hcall.h"
>  #include "hw/s390x/sclp.h"
>  #include "hw/s390x/s390_flic.h"
> @@ -163,6 +164,7 @@ static void s390_memory_init(ram_addr_t mem_size)
>      MemoryRegion *sysmem = get_system_memory();
>      ram_addr_t chunk, offset = 0;
>      unsigned int number = 0;
> +    Error *local_err = NULL;
>      gchar *name;
>  
>      /* allocate RAM for core */
> @@ -182,6 +184,12 @@ static void s390_memory_init(ram_addr_t mem_size)
>      }
>      g_free(name);
>  
> +    /* Configure the maximum page size */
> +    s390_set_max_pagesize(qemu_getrampagesize(), &local_err);
> +    if (local_err) {
> +        error_report_err(local_err);
> +        exit(EXIT_FAILURE);
> +    }
>      /* Initialize storage key device */
>      s390_skeys_init();
>      /* Initialize storage attributes device */
> @@ -253,6 +261,7 @@ static void ccw_init(MachineState *machine)
>      DeviceState *dev;
>  
>      s390_sclp_init();
> +    /* init memory + setup max page size. Required for the CPU model */
>      s390_memory_init(machine->ram_size);
>  
>      /* init CPUs (incl. CPU model) early so s390_has_feature() works */
> diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
> index 698dd9cb82..b58ef0a8ef 100644
> --- a/target/s390x/cpu.c
> +++ b/target/s390x/cpu.c
> @@ -399,6 +399,13 @@ int s390_set_memory_limit(uint64_t new_limit, uint64_t *hw_limit)
>      return 0;
>  }
>  
> +void s390_set_max_pagesize(uint64_t pagesize, Error **errp)
> +{
> +    if (kvm_enabled()) {
> +        kvm_s390_set_max_pagesize(pagesize, errp);
> +    }
> +}
> +
>  void s390_cmma_reset(void)
>  {
>      if (kvm_enabled()) {
> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
> index cb6d77053a..c14be2b5ba 100644
> --- a/target/s390x/cpu.h
> +++ b/target/s390x/cpu.h
> @@ -734,6 +734,7 @@ static inline void s390_do_cpu_load_normal(CPUState *cs, run_on_cpu_data arg)
>  /* cpu.c */
>  void s390_crypto_reset(void);
>  int s390_set_memory_limit(uint64_t new_limit, uint64_t *hw_limit);
> +void s390_set_max_pagesize(uint64_t pagesize, Error **errp);
>  void s390_cmma_reset(void);
>  void s390_enable_css_support(S390CPU *cpu);
>  int s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch_id,
> diff --git a/target/s390x/kvm-stub.c b/target/s390x/kvm-stub.c
> index bf7795e47a..22b4514ca6 100644
> --- a/target/s390x/kvm-stub.c
> +++ b/target/s390x/kvm-stub.c
> @@ -93,6 +93,10 @@ int kvm_s390_set_mem_limit(uint64_t new_limit, uint64_t *hw_limit)
>      return 0;
>  }
>  
> +void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp)
> +{
> +}
> +
>  void kvm_s390_crypto_reset(void)
>  {
>  }
> diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
> index 19530fb94e..bee73dc1a4 100644
> --- a/target/s390x/kvm.c
> +++ b/target/s390x/kvm.c
> @@ -283,44 +283,37 @@ void kvm_s390_crypto_reset(void)
>      }
>  }
>  
> -static int kvm_s390_configure_mempath_backing(KVMState *s)
> +void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp)
>  {
> -    size_t path_psize = qemu_getrampagesize();
> -
> -    if (path_psize == 4 * KiB) {
> -        return 0;
> +    if (pagesize == 4 * KiB) {
> +        return;
>      }
>  
>      if (!hpage_1m_allowed()) {
> -        error_report("This QEMU machine does not support huge page "
> -                     "mappings");
> -        return -EINVAL;
> +        error_setg(errp, "This QEMU machine does not support huge page "
> +                   "mappings");
> +        return;
>      }
>  
> -    if (path_psize != 1 * MiB) {
> -        error_report("Memory backing with 2G pages was specified, "
> -                     "but KVM does not support this memory backing");
> -        return -EINVAL;
> +    if (pagesize != 1 * MiB) {
> +        error_setg(errp, "Memory backing with 2G pages was specified, "
> +                   "but KVM does not support this memory backing");
> +        return;
>      }
>  
> -    if (kvm_vm_enable_cap(s, KVM_CAP_S390_HPAGE_1M, 0)) {
> -        error_report("Memory backing with 1M pages was specified, "
> -                     "but KVM does not support this memory backing");
> -        return -EINVAL;
> +    if (kvm_vm_enable_cap(kvm_state, KVM_CAP_S390_HPAGE_1M, 0)) {
> +        error_setg(errp, "Memory backing with 1M pages was specified, "
> +                   "but KVM does not support this memory backing");
> +        return;
>      }
>  
>      cap_hpage_1m = 1;
> -    return 0;
>  }
>  
>  int kvm_arch_init(MachineState *ms, KVMState *s)
>  {
>      MachineClass *mc = MACHINE_GET_CLASS(ms);
>  
> -    if (kvm_s390_configure_mempath_backing(s)) {
> -        return -EINVAL;
> -    }
> -
>      mc->default_cpu_type = S390_CPU_TYPE_NAME("host");
>      cap_sync_regs = kvm_check_extension(s, KVM_CAP_SYNC_REGS);
>      cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF);
> diff --git a/target/s390x/kvm_s390x.h b/target/s390x/kvm_s390x.h
> index 6e52287da3..caf985955b 100644
> --- a/target/s390x/kvm_s390x.h
> +++ b/target/s390x/kvm_s390x.h
> @@ -36,6 +36,7 @@ int kvm_s390_cmma_active(void);
>  void kvm_s390_cmma_reset(void);
>  void kvm_s390_reset_vcpu(S390CPU *cpu);
>  int kvm_s390_set_mem_limit(uint64_t new_limit, uint64_t *hw_limit);
> +void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp);
>  void kvm_s390_crypto_reset(void);
>  void kvm_s390_restart_interrupt(S390CPU *cpu);
>  void kvm_s390_stop_interrupt(S390CPU *cpu);


Re: [Qemu-devel] [PATCH v1] s390x/kvm: Configure page size after memory has actually been initialized
Posted by David Hildenbrand 5 years ago
On 27.03.19 17:45, Igor Mammedov wrote:
> On Wed, 27 Mar 2019 14:59:44 +0100
> David Hildenbrand <david@redhat.com> wrote:
> 
>> Right now we configure the pagesize quite early, when initializing KVM.
>> This is long before system memory is actually allocated via
>> memory_region_allocate_system_memory(), and therefore memory backends
>> marked as mapped.
>>
>> Instead, let's configure the maximum page size after initializing
>> memory in s390_memory_init(). cap_hpage_1m is still properly
>> configured before creating any CPUs, and therefore before configuring
>> the CPU model and eventually enabling CMMA.
>>
>> We might later want to replace qemu_getrampagesize() by another
>> detection mechanism, e.g. only looking at mapped, initial memory.
>> We don't support any memory devices yet, and if so, we can always reject
>> devices with a page size bigger than the initial page size when
>> hotplugging. qemu_getrampagesize() should work for now, especially when
>> converting it to only look at mapped backends.
>>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
> 
> Acked-by: Igor Mammedov <imammedo@redhat.com>

BTW, do we want

qemu_getmaxrampagesize()
qemu_getminrampagesize()

or similar. qemu_getrampagesize() in its current form is really far from
beautiful.

-- 

Thanks,

David / dhildenb

Re: [Qemu-devel] [PATCH v1] s390x/kvm: Configure page size after memory has actually been initialized
Posted by David Gibson 5 years ago
On Wed, Mar 27, 2019 at 09:06:53PM +0100, David Hildenbrand wrote:
> On 27.03.19 17:45, Igor Mammedov wrote:
> > On Wed, 27 Mar 2019 14:59:44 +0100
> > David Hildenbrand <david@redhat.com> wrote:
> > 
> >> Right now we configure the pagesize quite early, when initializing KVM.
> >> This is long before system memory is actually allocated via
> >> memory_region_allocate_system_memory(), and therefore memory backends
> >> marked as mapped.
> >>
> >> Instead, let's configure the maximum page size after initializing
> >> memory in s390_memory_init(). cap_hpage_1m is still properly
> >> configured before creating any CPUs, and therefore before configuring
> >> the CPU model and eventually enabling CMMA.
> >>
> >> We might later want to replace qemu_getrampagesize() by another
> >> detection mechanism, e.g. only looking at mapped, initial memory.
> >> We don't support any memory devices yet, and if so, we can always reject
> >> devices with a page size bigger than the initial page size when
> >> hotplugging. qemu_getrampagesize() should work for now, especially when
> >> converting it to only look at mapped backends.
> >>
> >> Signed-off-by: David Hildenbrand <david@redhat.com>
> > 
> > Acked-by: Igor Mammedov <imammedo@redhat.com>
> 
> BTW, do we want
> 
> qemu_getmaxrampagesize()
> qemu_getminrampagesize()

That could work.

> or similar. qemu_getrampagesize() in its current form is really far from
> beautiful.

Yeah, and AFAICT the way it's used here remains incorrect.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
Re: [Qemu-devel] [PATCH v1] s390x/kvm: Configure page size after memory has actually been initialized
Posted by David Hildenbrand 5 years ago
On 28.03.19 01:24, David Gibson wrote:
> On Wed, Mar 27, 2019 at 09:06:53PM +0100, David Hildenbrand wrote:
>> On 27.03.19 17:45, Igor Mammedov wrote:
>>> On Wed, 27 Mar 2019 14:59:44 +0100
>>> David Hildenbrand <david@redhat.com> wrote:
>>>
>>>> Right now we configure the pagesize quite early, when initializing KVM.
>>>> This is long before system memory is actually allocated via
>>>> memory_region_allocate_system_memory(), and therefore memory backends
>>>> marked as mapped.
>>>>
>>>> Instead, let's configure the maximum page size after initializing
>>>> memory in s390_memory_init(). cap_hpage_1m is still properly
>>>> configured before creating any CPUs, and therefore before configuring
>>>> the CPU model and eventually enabling CMMA.
>>>>
>>>> We might later want to replace qemu_getrampagesize() by another
>>>> detection mechanism, e.g. only looking at mapped, initial memory.
>>>> We don't support any memory devices yet, and if so, we can always reject
>>>> devices with a page size bigger than the initial page size when
>>>> hotplugging. qemu_getrampagesize() should work for now, especially when
>>>> converting it to only look at mapped backends.
>>>>
>>>> Signed-off-by: David Hildenbrand <david@redhat.com>
>>>
>>> Acked-by: Igor Mammedov <imammedo@redhat.com>
>>
>> BTW, do we want
>>
>> qemu_getmaxrampagesize()
>> qemu_getminrampagesize()
> 
> That could work.
> 
>> or similar. qemu_getrampagesize() in its current form is really far from
>> beautiful.
> 
> Yeah, and AFAICT the way it's used here remains incorrect.
> 

Soooooo,

this is all a big mess :)


1. We have to decide on the page size before initializing the CPU model
(due to CMMA) and before creating any CPUs -> We have to do it
early/before machine init.

2. Memory devices (if ever supported) are created + realized (+ backends
mapped) after machine init.

3. Memory backends are created delayed, so even after creating devices.

All we can do for s390x is

a) Use the page size of initial memory when configuring the page size in
KVM. (AFAICS what would be done right now)

b) When cold/hotplugging memory devices, reject devices with a page size
bigger than the page size of initial memory. Not an issue now. In the
future it might be "unfortunate" but at least we can print a nice error
from the machine hotplug handler "page size not supported in this
configuration".

I double checked, "-numa" is not supported in s390x. So "-numa
node,mempath=..." does not apply. However this might change and we can
easily forget about this. Also, getting rid of -mem-path might be one
issue. So the right think to do would be

a) this patch
b) introduce and use qemu_getmaxrampagesize()

Then, we would properly detect the maximum page size *for initial
memory*. Memory devices, we will have to worry about in the future, but
should be easy to handle (remember and check against maximum configured
 page size).

Thoughts?

-- 

Thanks,

David / dhildenb

Re: [Qemu-devel] [PATCH v1] s390x/kvm: Configure page size after memory has actually been initialized
Posted by Igor Mammedov 5 years ago
On Thu, 28 Mar 2019 11:18:02 +0100
David Hildenbrand <david@redhat.com> wrote:

> On 28.03.19 01:24, David Gibson wrote:
> > On Wed, Mar 27, 2019 at 09:06:53PM +0100, David Hildenbrand wrote:  
> >> On 27.03.19 17:45, Igor Mammedov wrote:  
> >>> On Wed, 27 Mar 2019 14:59:44 +0100
> >>> David Hildenbrand <david@redhat.com> wrote:
> >>>  
> >>>> Right now we configure the pagesize quite early, when initializing KVM.
> >>>> This is long before system memory is actually allocated via
> >>>> memory_region_allocate_system_memory(), and therefore memory backends
> >>>> marked as mapped.
> >>>>
> >>>> Instead, let's configure the maximum page size after initializing
> >>>> memory in s390_memory_init(). cap_hpage_1m is still properly
> >>>> configured before creating any CPUs, and therefore before configuring
> >>>> the CPU model and eventually enabling CMMA.
> >>>>
> >>>> We might later want to replace qemu_getrampagesize() by another
> >>>> detection mechanism, e.g. only looking at mapped, initial memory.
> >>>> We don't support any memory devices yet, and if so, we can always reject
> >>>> devices with a page size bigger than the initial page size when
> >>>> hotplugging. qemu_getrampagesize() should work for now, especially when
> >>>> converting it to only look at mapped backends.
> >>>>
> >>>> Signed-off-by: David Hildenbrand <david@redhat.com>  
> >>>
> >>> Acked-by: Igor Mammedov <imammedo@redhat.com>  
> >>
> >> BTW, do we want
> >>
> >> qemu_getmaxrampagesize()
> >> qemu_getminrampagesize()  
> > 
> > That could work.
> >   
> >> or similar. qemu_getrampagesize() in its current form is really far from
> >> beautiful.  
> > 
> > Yeah, and AFAICT the way it's used here remains incorrect.
> >   
> 
> Soooooo,
> 
> this is all a big mess :)
> 
> 
> 1. We have to decide on the page size before initializing the CPU model
> (due to CMMA) and before creating any CPUs -> We have to do it
> early/before machine init.
> 
> 2. Memory devices (if ever supported) are created + realized (+ backends
> mapped) after machine init.
> 
> 3. Memory backends are created delayed, so even after creating devices.
> 
> All we can do for s390x is
> 
> a) Use the page size of initial memory when configuring the page size in
> KVM. (AFAICS what would be done right now)
> 
> b) When cold/hotplugging memory devices, reject devices with a page size
> bigger than the page size of initial memory. Not an issue now. In the
> future it might be "unfortunate" but at least we can print a nice error
> from the machine hotplug handler "page size not supported in this
> configuration".
> 
> I double checked, "-numa" is not supported in s390x. So "-numa
> node,mempath=..." does not apply. However this might change and we can
> easily forget about this. Also, getting rid of -mem-path might be one
> issue. So the right think to do would be
> 
> a) this patch
> b) introduce and use qemu_getmaxrampagesize()
> 
> Then, we would properly detect the maximum page size *for initial
> memory*. Memory devices, we will have to worry about in the future, but
> should be easy to handle (remember and check against maximum configured
>  page size).

Problem David tries to solve is that 
 1: user hotplugged backend with wrong page size
 2: hotplug of associated device 'pc-dimm' cleanly fails with error
 3: guest does reboot and QEMU crashes with fatal error

Issue is that qemu_getmaxrampagesize() iterates over backends which
might be used for guest RAM or theoretically might be used for
something else.

using 'mapped' attribute is a hack that currently works around issue [3]
but it's prone to being incorrect depending on call order.

maybe we should prevent [1] in the first place (but making backends
depend on machine/accel doesn't look good either)?

> Thoughts?
> 


Re: [Qemu-devel] [PATCH v1] s390x/kvm: Configure page size after memory has actually been initialized
Posted by David Gibson 5 years ago
On Thu, Mar 28, 2019 at 12:39:24PM +0100, Igor Mammedov wrote:
> On Thu, 28 Mar 2019 11:18:02 +0100
> David Hildenbrand <david@redhat.com> wrote:
> 
> > On 28.03.19 01:24, David Gibson wrote:
> > > On Wed, Mar 27, 2019 at 09:06:53PM +0100, David Hildenbrand wrote:  
> > >> On 27.03.19 17:45, Igor Mammedov wrote:  
> > >>> On Wed, 27 Mar 2019 14:59:44 +0100
> > >>> David Hildenbrand <david@redhat.com> wrote:
> > >>>  
> > >>>> Right now we configure the pagesize quite early, when initializing KVM.
> > >>>> This is long before system memory is actually allocated via
> > >>>> memory_region_allocate_system_memory(), and therefore memory backends
> > >>>> marked as mapped.
> > >>>>
> > >>>> Instead, let's configure the maximum page size after initializing
> > >>>> memory in s390_memory_init(). cap_hpage_1m is still properly
> > >>>> configured before creating any CPUs, and therefore before configuring
> > >>>> the CPU model and eventually enabling CMMA.
> > >>>>
> > >>>> We might later want to replace qemu_getrampagesize() by another
> > >>>> detection mechanism, e.g. only looking at mapped, initial memory.
> > >>>> We don't support any memory devices yet, and if so, we can always reject
> > >>>> devices with a page size bigger than the initial page size when
> > >>>> hotplugging. qemu_getrampagesize() should work for now, especially when
> > >>>> converting it to only look at mapped backends.
> > >>>>
> > >>>> Signed-off-by: David Hildenbrand <david@redhat.com>  
> > >>>
> > >>> Acked-by: Igor Mammedov <imammedo@redhat.com>  
> > >>
> > >> BTW, do we want
> > >>
> > >> qemu_getmaxrampagesize()
> > >> qemu_getminrampagesize()  
> > > 
> > > That could work.
> > >   
> > >> or similar. qemu_getrampagesize() in its current form is really far from
> > >> beautiful.  
> > > 
> > > Yeah, and AFAICT the way it's used here remains incorrect.
> > >   
> > 
> > Soooooo,
> > 
> > this is all a big mess :)
> > 
> > 
> > 1. We have to decide on the page size before initializing the CPU model
> > (due to CMMA) and before creating any CPUs -> We have to do it
> > early/before machine init.
> > 
> > 2. Memory devices (if ever supported) are created + realized (+ backends
> > mapped) after machine init.
> > 
> > 3. Memory backends are created delayed, so even after creating devices.
> > 
> > All we can do for s390x is
> > 
> > a) Use the page size of initial memory when configuring the page size in
> > KVM. (AFAICS what would be done right now)
> > 
> > b) When cold/hotplugging memory devices, reject devices with a page size
> > bigger than the page size of initial memory. Not an issue now. In the
> > future it might be "unfortunate" but at least we can print a nice error
> > from the machine hotplug handler "page size not supported in this
> > configuration".
> > 
> > I double checked, "-numa" is not supported in s390x. So "-numa
> > node,mempath=..." does not apply. However this might change and we can
> > easily forget about this. Also, getting rid of -mem-path might be one
> > issue. So the right think to do would be
> > 
> > a) this patch
> > b) introduce and use qemu_getmaxrampagesize()
> > 
> > Then, we would properly detect the maximum page size *for initial
> > memory*. Memory devices, we will have to worry about in the future, but
> > should be easy to handle (remember and check against maximum configured
> >  page size).
> 
> Problem David tries to solve is that 

Uh, which David?

>  1: user hotplugged backend with wrong page size
>  2: hotplug of associated device 'pc-dimm' cleanly fails with error
>  3: guest does reboot and QEMU crashes with fatal error
> 
> Issue is that qemu_getmaxrampagesize() iterates over backends which
> might be used for guest RAM or theoretically might be used for
> something else.
> 
> using 'mapped' attribute is a hack that currently works around issue [3]
> but it's prone to being incorrect depending on call order.

I'm not sure what you mean by that.  For the purposes we need on ppc
checking the RAM that's actually present in the guest really is what
we need.

> maybe we should prevent [1] in the first place (but making backends
> depend on machine/accel doesn't look good either)?

Yeah, I wondered about this.  There's no way to do it right now, and a
hook would be kinda ugly.  Not sure if it's more or less ugly than the
alternatives.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
Re: [Qemu-devel] [PATCH v1] s390x/kvm: Configure page size after memory has actually been initialized
Posted by David Hildenbrand 5 years ago
>> Problem David tries to solve is that 
> 
> Uh, which David?

I assume he was referring to you :)

> 
>>  1: user hotplugged backend with wrong page size
>>  2: hotplug of associated device 'pc-dimm' cleanly fails with error
>>  3: guest does reboot and QEMU crashes with fatal error
>>
>> Issue is that qemu_getmaxrampagesize() iterates over backends which
>> might be used for guest RAM or theoretically might be used for
>> something else.
>>
>> using 'mapped' attribute is a hack that currently works around issue [3]
>> but it's prone to being incorrect depending on call order.
> 
> I'm not sure what you mean by that.  For the purposes we need on ppc
> checking the RAM that's actually present in the guest really is what
> we need.

I guess one issue is that memory backends might actually be used for
something else than RAM. As of now, you can't differentiate between RAM
and *something else* when looking at a mapped memory backend.

E.g. internal memory devices could solve that in the future. Iterating
over them, looking at the memory region, detecting the page size might
be easy and you can be sure it is RAM.

Then, we would have to change/simplify the implementation of
qemu_getrampagesize() and friends.

> 
>> maybe we should prevent [1] in the first place (but making backends
>> depend on machine/accel doesn't look good either)?
> 
> Yeah, I wondered about this.  There's no way to do it right now, and a
> hook would be kinda ugly.  Not sure if it's more or less ugly than the
> alternatives.
> 

-- 

Thanks,

David / dhildenb

Re: [Qemu-devel] [PATCH v1] s390x/kvm: Configure page size after memory has actually been initialized
Posted by David Gibson 5 years ago
On Thu, Mar 28, 2019 at 11:18:02AM +0100, David Hildenbrand wrote:
> On 28.03.19 01:24, David Gibson wrote:
> > On Wed, Mar 27, 2019 at 09:06:53PM +0100, David Hildenbrand wrote:
> >> On 27.03.19 17:45, Igor Mammedov wrote:
> >>> On Wed, 27 Mar 2019 14:59:44 +0100
> >>> David Hildenbrand <david@redhat.com> wrote:
> >>>
> >>>> Right now we configure the pagesize quite early, when initializing KVM.
> >>>> This is long before system memory is actually allocated via
> >>>> memory_region_allocate_system_memory(), and therefore memory backends
> >>>> marked as mapped.
> >>>>
> >>>> Instead, let's configure the maximum page size after initializing
> >>>> memory in s390_memory_init(). cap_hpage_1m is still properly
> >>>> configured before creating any CPUs, and therefore before configuring
> >>>> the CPU model and eventually enabling CMMA.
> >>>>
> >>>> We might later want to replace qemu_getrampagesize() by another
> >>>> detection mechanism, e.g. only looking at mapped, initial memory.
> >>>> We don't support any memory devices yet, and if so, we can always reject
> >>>> devices with a page size bigger than the initial page size when
> >>>> hotplugging. qemu_getrampagesize() should work for now, especially when
> >>>> converting it to only look at mapped backends.
> >>>>
> >>>> Signed-off-by: David Hildenbrand <david@redhat.com>
> >>>
> >>> Acked-by: Igor Mammedov <imammedo@redhat.com>
> >>
> >> BTW, do we want
> >>
> >> qemu_getmaxrampagesize()
> >> qemu_getminrampagesize()
> > 
> > That could work.
> > 
> >> or similar. qemu_getrampagesize() in its current form is really far from
> >> beautiful.
> > 
> > Yeah, and AFAICT the way it's used here remains incorrect.
> > 
> 
> Soooooo,
> 
> this is all a big mess :)
> 
> 
> 1. We have to decide on the page size before initializing the CPU model
> (due to CMMA) and before creating any CPUs -> We have to do it
> early/before machine init.
> 
> 2. Memory devices (if ever supported) are created + realized (+ backends
> mapped) after machine init.
> 
> 3. Memory backends are created delayed, so even after creating devices.
> 
> All we can do for s390x is
> 
> a) Use the page size of initial memory when configuring the page size in
> KVM. (AFAICS what would be done right now)

Is the page size used here visible to the guest?

> b) When cold/hotplugging memory devices, reject devices with a page size
> bigger than the page size of initial memory. Not an issue now. In the
> future it might be "unfortunate" but at least we can print a nice error
> from the machine hotplug handler "page size not supported in this
> configuration".

Right, ppc does something similar, though for different reasons.

> I double checked, "-numa" is not supported in s390x. So "-numa
> node,mempath=..." does not apply. However this might change and we can
> easily forget about this. Also, getting rid of -mem-path might be one
> issue. So the right think to do would be
> 
> a) this patch
> b) introduce and use qemu_getmaxrampagesize()
> 
> Then, we would properly detect the maximum page size *for initial
> memory*. Memory devices, we will have to worry about in the future, but
> should be easy to handle (remember and check against maximum configured
>  page size).
> 
> Thoughts?

Sounds reasonable.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson