[v4] s390: stop abusing memory_region_allocate_system_memory()

[Qemu-devel] [PATCH for-4.2 v4 1/2] kvm: s390: split too big memory section on several memslots

Posted by Igor Mammedov 6 years, 6 months ago

Max memslot size supported by kvm on s390 is 8Tb,
move logic of splitting RAM in chunks upto 8T to KVM code.

This way it will hide KVM specific restrictions in KVM code
and won't affect baord level design decisions. Which would allow
us to avoid misusing memory_region_allocate_system_memory() API
and eventually use a single hostmem backend for guest RAM.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
v4:
  * fix compilation issue
          (Christian Borntraeger <borntraeger@de.ibm.com>)
  * advance HVA along with GPA in kvm_set_phys_mem()
          (Christian Borntraeger <borntraeger@de.ibm.com>)

patch prepares only KVM side for switching to single RAM memory region
another patch will take care of  dropping manual RAM partitioning in
s390 code.
---
 include/sysemu/kvm_int.h   |  1 +
 accel/kvm/kvm-all.c        | 80 +++++++++++++++++++++++---------------
 hw/s390x/s390-virtio-ccw.c |  9 -----
 target/s390x/kvm.c         | 12 ++++++
 4 files changed, 62 insertions(+), 40 deletions(-)

diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index 31df465fdc..7f7520bce2 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -41,4 +41,5 @@ typedef struct KVMMemoryListener {
 void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
                                   AddressSpace *as, int as_id);
 
+void kvm_set_max_memslot_size(hwaddr max_slot_size);
 #endif
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f450f25295..d87f855ea4 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -138,6 +138,7 @@ bool kvm_direct_msi_allowed;
 bool kvm_ioeventfd_any_length_allowed;
 bool kvm_msi_use_devid;
 static bool kvm_immediate_exit;
+static hwaddr kvm_max_slot_size = ~0;
 
 static const KVMCapabilityInfo kvm_required_capabilites[] = {
     KVM_CAP_INFO(USER_MEMORY),
@@ -951,6 +952,14 @@ kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list)
     return NULL;
 }
 
+void kvm_set_max_memslot_size(hwaddr max_slot_size)
+{
+    g_assert(
+        ROUND_UP(max_slot_size, qemu_real_host_page_size) == max_slot_size
+    );
+    kvm_max_slot_size = max_slot_size;
+}
+
 static void kvm_set_phys_mem(KVMMemoryListener *kml,
                              MemoryRegionSection *section, bool add)
 {
@@ -958,7 +967,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
     int err;
     MemoryRegion *mr = section->mr;
     bool writeable = !mr->readonly && !mr->rom_device;
-    hwaddr start_addr, size;
+    hwaddr start_addr, size, slot_size;
     void *ram;
 
     if (!memory_region_is_ram(mr)) {
@@ -983,41 +992,50 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
     kvm_slots_lock(kml);
 
     if (!add) {
-        mem = kvm_lookup_matching_slot(kml, start_addr, size);
-        if (!mem) {
-            goto out;
-        }
-        if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
-            kvm_physical_sync_dirty_bitmap(kml, section);
-        }
+        do {
+            slot_size = kvm_max_slot_size < size ? kvm_max_slot_size : size;
+            mem = kvm_lookup_matching_slot(kml, start_addr, slot_size);
+            if (!mem) {
+                goto out;
+            }
+            if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+                kvm_physical_sync_dirty_bitmap(kml, section);
+            }
 
-        /* unregister the slot */
-        g_free(mem->dirty_bmap);
-        mem->dirty_bmap = NULL;
-        mem->memory_size = 0;
-        mem->flags = 0;
-        err = kvm_set_user_memory_region(kml, mem, false);
-        if (err) {
-            fprintf(stderr, "%s: error unregistering slot: %s\n",
-                    __func__, strerror(-err));
-            abort();
-        }
+            /* unregister the slot */
+            g_free(mem->dirty_bmap);
+            mem->dirty_bmap = NULL;
+            mem->memory_size = 0;
+            mem->flags = 0;
+            err = kvm_set_user_memory_region(kml, mem, false);
+            if (err) {
+                fprintf(stderr, "%s: error unregistering slot: %s\n",
+                        __func__, strerror(-err));
+                abort();
+            }
+            start_addr += slot_size;
+        } while ((size -= slot_size));
         goto out;
     }
 
     /* register the new slot */
-    mem = kvm_alloc_slot(kml);
-    mem->memory_size = size;
-    mem->start_addr = start_addr;
-    mem->ram = ram;
-    mem->flags = kvm_mem_flags(mr);
-
-    err = kvm_set_user_memory_region(kml, mem, true);
-    if (err) {
-        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
-                strerror(-err));
-        abort();
-    }
+    do {
+        slot_size = kvm_max_slot_size < size ? kvm_max_slot_size : size;
+        mem = kvm_alloc_slot(kml);
+        mem->memory_size = slot_size;
+        mem->start_addr = start_addr;
+        mem->ram = ram;
+        mem->flags = kvm_mem_flags(mr);
+
+        err = kvm_set_user_memory_region(kml, mem, true);
+        if (err) {
+            fprintf(stderr, "%s: error registering slot: %s\n", __func__,
+                    strerror(-err));
+            abort();
+        }
+        start_addr += slot_size;
+        ram += slot_size;
+    } while ((size -= slot_size));
 
 out:
     kvm_slots_unlock(kml);
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 5b6a9a4e55..0c03ffb7c7 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -151,15 +151,6 @@ static void virtio_ccw_register_hcalls(void)
                                    virtio_ccw_hcall_early_printk);
 }
 
-/*
- * KVM does only support memory slots up to KVM_MEM_MAX_NR_PAGES pages
- * as the dirty bitmap must be managed by bitops that take an int as
- * position indicator. If we have a guest beyond that we will split off
- * new subregions. The split must happen on a segment boundary (1MB).
- */
-#define KVM_MEM_MAX_NR_PAGES ((1ULL << 31) - 1)
-#define SEG_MSK (~0xfffffULL)
-#define KVM_SLOT_MAX_BYTES ((KVM_MEM_MAX_NR_PAGES * TARGET_PAGE_SIZE) & SEG_MSK)
 static void s390_memory_init(ram_addr_t mem_size)
 {
     MemoryRegion *sysmem = get_system_memory();
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 6e814c230b..6b1428a760 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -28,6 +28,7 @@
 #include "cpu.h"
 #include "internal.h"
 #include "kvm_s390x.h"
+#include "sysemu/kvm_int.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/timer.h"
@@ -122,6 +123,16 @@
 #define VCPU_IRQ_BUF_SIZE(max_cpus) (sizeof(struct kvm_s390_irq) * \
                                      (max_cpus + NR_LOCAL_IRQS))
 
+/*
+ * KVM does only support memory slots up to KVM_MEM_MAX_NR_PAGES pages
+ * as the dirty bitmap must be managed by bitops that take an int as
+ * position indicator. If we have a guest beyond that we will split off
+ * new subregions. The split must happen on a segment boundary (1MB).
+ */
+#define KVM_MEM_MAX_NR_PAGES ((1ULL << 31) - 1)
+#define SEG_MSK (~0xfffffULL)
+#define KVM_SLOT_MAX_BYTES ((KVM_MEM_MAX_NR_PAGES * TARGET_PAGE_SIZE) & SEG_MSK)
+
 static CPUWatchpoint hw_watchpoint;
 /*
  * We don't use a list because this structure is also used to transmit the
@@ -347,6 +358,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
      */
     /* kvm_vm_enable_cap(s, KVM_CAP_S390_AIS, 0); */
 
+    kvm_set_max_memslot_size(KVM_SLOT_MAX_BYTES);
     return 0;
 }
 
-- 
2.18.1

Re: [Qemu-devel] [qemu-s390x] [PATCH for-4.2 v4 1/2] kvm: s390: split too big memory section on several memslots

Posted by David Hildenbrand 6 years, 6 months ago

On 06.08.19 11:48, Igor Mammedov wrote:
> Max memslot size supported by kvm on s390 is 8Tb,
> move logic of splitting RAM in chunks upto 8T to KVM code.
> 
> This way it will hide KVM specific restrictions in KVM code
> and won't affect baord level design decisions. Which would allow
> us to avoid misusing memory_region_allocate_system_memory() API
> and eventually use a single hostmem backend for guest RAM.
> 
> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> ---
> v4:
>   * fix compilation issue
>           (Christian Borntraeger <borntraeger@de.ibm.com>)
>   * advance HVA along with GPA in kvm_set_phys_mem()
>           (Christian Borntraeger <borntraeger@de.ibm.com>)
> 
> patch prepares only KVM side for switching to single RAM memory region
> another patch will take care of  dropping manual RAM partitioning in
> s390 code.
> ---
>  include/sysemu/kvm_int.h   |  1 +
>  accel/kvm/kvm-all.c        | 80 +++++++++++++++++++++++---------------
>  hw/s390x/s390-virtio-ccw.c |  9 -----
>  target/s390x/kvm.c         | 12 ++++++
>  4 files changed, 62 insertions(+), 40 deletions(-)
> 
> diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
> index 31df465fdc..7f7520bce2 100644
> --- a/include/sysemu/kvm_int.h
> +++ b/include/sysemu/kvm_int.h
> @@ -41,4 +41,5 @@ typedef struct KVMMemoryListener {
>  void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
>                                    AddressSpace *as, int as_id);
>  
> +void kvm_set_max_memslot_size(hwaddr max_slot_size);
>  #endif
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index f450f25295..d87f855ea4 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -138,6 +138,7 @@ bool kvm_direct_msi_allowed;
>  bool kvm_ioeventfd_any_length_allowed;
>  bool kvm_msi_use_devid;
>  static bool kvm_immediate_exit;
> +static hwaddr kvm_max_slot_size = ~0;
>  
>  static const KVMCapabilityInfo kvm_required_capabilites[] = {
>      KVM_CAP_INFO(USER_MEMORY),
> @@ -951,6 +952,14 @@ kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list)
>      return NULL;
>  }
>  
> +void kvm_set_max_memslot_size(hwaddr max_slot_size)
> +{
> +    g_assert(
> +        ROUND_UP(max_slot_size, qemu_real_host_page_size) == max_slot_size
> +    );
> +    kvm_max_slot_size = max_slot_size;
> +}
> +
>  static void kvm_set_phys_mem(KVMMemoryListener *kml,
>                               MemoryRegionSection *section, bool add)
>  {
> @@ -958,7 +967,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
>      int err;
>      MemoryRegion *mr = section->mr;
>      bool writeable = !mr->readonly && !mr->rom_device;
> -    hwaddr start_addr, size;
> +    hwaddr start_addr, size, slot_size;
>      void *ram;
>  
>      if (!memory_region_is_ram(mr)) {
> @@ -983,41 +992,50 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
>      kvm_slots_lock(kml);
>  
>      if (!add) {
> -        mem = kvm_lookup_matching_slot(kml, start_addr, size);
> -        if (!mem) {
> -            goto out;
> -        }
> -        if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
> -            kvm_physical_sync_dirty_bitmap(kml, section);
> -        }
> +        do {
> +            slot_size = kvm_max_slot_size < size ? kvm_max_slot_size : size;
> +            mem = kvm_lookup_matching_slot(kml, start_addr, slot_size);
> +            if (!mem) {
> +                goto out;

I wonder if this can trigger for the first, but not the second slot (or
the other way around). In that case you would want to continue the loop
(incrementing counters). But most probably there would something be
wrong in the caller if that would happen.

> +            }
> +            if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
> +                kvm_physical_sync_dirty_bitmap(kml, section);
> +            }
>  
> -        /* unregister the slot */
> -        g_free(mem->dirty_bmap);
> -        mem->dirty_bmap = NULL;
> -        mem->memory_size = 0;
> -        mem->flags = 0;
> -        err = kvm_set_user_memory_region(kml, mem, false);
> -        if (err) {
> -            fprintf(stderr, "%s: error unregistering slot: %s\n",
> -                    __func__, strerror(-err));
> -            abort();
> -        }
> +            /* unregister the slot */
> +            g_free(mem->dirty_bmap);
> +            mem->dirty_bmap = NULL;
> +            mem->memory_size = 0;
> +            mem->flags = 0;
> +            err = kvm_set_user_memory_region(kml, mem, false);
> +            if (err) {
> +                fprintf(stderr, "%s: error unregistering slot: %s\n",
> +                        __func__, strerror(-err));
> +                abort();
> +            }
> +            start_addr += slot_size;
> +        } while ((size -= slot_size));

NIT: I think you can drop parentheses - but I would really prefer to not
perform computations in the condition.

>          goto out;
>      }
>  
>      /* register the new slot */
> -    mem = kvm_alloc_slot(kml);
> -    mem->memory_size = size;
> -    mem->start_addr = start_addr;
> -    mem->ram = ram;
> -    mem->flags = kvm_mem_flags(mr);
> -
> -    err = kvm_set_user_memory_region(kml, mem, true);
> -    if (err) {
> -        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
> -                strerror(-err));
> -        abort();
> -    }
> +    do {
> +        slot_size = kvm_max_slot_size < size ? kvm_max_slot_size : size;
> +        mem = kvm_alloc_slot(kml);
> +        mem->memory_size = slot_size;
> +        mem->start_addr = start_addr;
> +        mem->ram = ram;
> +        mem->flags = kvm_mem_flags(mr);
> +
> +        err = kvm_set_user_memory_region(kml, mem, true);
> +        if (err) {
> +            fprintf(stderr, "%s: error registering slot: %s\n", __func__,
> +                    strerror(-err));
> +            abort();
> +        }
> +        start_addr += slot_size;
> +        ram += slot_size;
> +    } while ((size -= slot_size));

dito

One note:

KVMState stores the number of slots in "nr_slots". We export that via
kvm_get_max_memslots().

E.g., spapr uses that to compare it against "machine->ram_slots". Later
(esp. for s390x), kvm_get_max_memslots() can no longer be compared to
ram_slots directly. Could be that a ram slot would map to multiple KVM
memory slots. There would be no easy way to detect if KVM is able to
deal with "machine->ram_slots" as defined by the user, until the sizes
of the slots are known.

-- 

Thanks,

David / dhildenb

Re: [Qemu-devel] [qemu-s390x] [PATCH for-4.2 v4 1/2] kvm: s390: split too big memory section on several memslots

Posted by Igor Mammedov 6 years, 6 months ago

On Wed, 7 Aug 2019 09:54:27 +0200
David Hildenbrand <david@redhat.com> wrote:

> On 06.08.19 11:48, Igor Mammedov wrote:
> > Max memslot size supported by kvm on s390 is 8Tb,
> > move logic of splitting RAM in chunks upto 8T to KVM code.
> > 
> > This way it will hide KVM specific restrictions in KVM code
> > and won't affect baord level design decisions. Which would allow
> > us to avoid misusing memory_region_allocate_system_memory() API
> > and eventually use a single hostmem backend for guest RAM.
> > 
> > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > ---
> > v4:
> >   * fix compilation issue
> >           (Christian Borntraeger <borntraeger@de.ibm.com>)
> >   * advance HVA along with GPA in kvm_set_phys_mem()
> >           (Christian Borntraeger <borntraeger@de.ibm.com>)
> > 
> > patch prepares only KVM side for switching to single RAM memory region
> > another patch will take care of  dropping manual RAM partitioning in
> > s390 code.
> > ---
> >  include/sysemu/kvm_int.h   |  1 +
> >  accel/kvm/kvm-all.c        | 80 +++++++++++++++++++++++---------------
> >  hw/s390x/s390-virtio-ccw.c |  9 -----
> >  target/s390x/kvm.c         | 12 ++++++
> >  4 files changed, 62 insertions(+), 40 deletions(-)
> > 
> > diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
> > index 31df465fdc..7f7520bce2 100644
> > --- a/include/sysemu/kvm_int.h
> > +++ b/include/sysemu/kvm_int.h
> > @@ -41,4 +41,5 @@ typedef struct KVMMemoryListener {
> >  void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
> >                                    AddressSpace *as, int as_id);
> >  
> > +void kvm_set_max_memslot_size(hwaddr max_slot_size);
> >  #endif
> > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> > index f450f25295..d87f855ea4 100644
> > --- a/accel/kvm/kvm-all.c
> > +++ b/accel/kvm/kvm-all.c
> > @@ -138,6 +138,7 @@ bool kvm_direct_msi_allowed;
> >  bool kvm_ioeventfd_any_length_allowed;
> >  bool kvm_msi_use_devid;
> >  static bool kvm_immediate_exit;
> > +static hwaddr kvm_max_slot_size = ~0;
> >  
> >  static const KVMCapabilityInfo kvm_required_capabilites[] = {
> >      KVM_CAP_INFO(USER_MEMORY),
> > @@ -951,6 +952,14 @@ kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list)
> >      return NULL;
> >  }
> >  
> > +void kvm_set_max_memslot_size(hwaddr max_slot_size)
> > +{
> > +    g_assert(
> > +        ROUND_UP(max_slot_size, qemu_real_host_page_size) == max_slot_size
> > +    );
> > +    kvm_max_slot_size = max_slot_size;
> > +}
> > +
> >  static void kvm_set_phys_mem(KVMMemoryListener *kml,
> >                               MemoryRegionSection *section, bool add)
> >  {
> > @@ -958,7 +967,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
> >      int err;
> >      MemoryRegion *mr = section->mr;
> >      bool writeable = !mr->readonly && !mr->rom_device;
> > -    hwaddr start_addr, size;
> > +    hwaddr start_addr, size, slot_size;
> >      void *ram;
> >  
> >      if (!memory_region_is_ram(mr)) {
> > @@ -983,41 +992,50 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
> >      kvm_slots_lock(kml);
> >  
> >      if (!add) {
> > -        mem = kvm_lookup_matching_slot(kml, start_addr, size);
> > -        if (!mem) {
> > -            goto out;
> > -        }
> > -        if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
> > -            kvm_physical_sync_dirty_bitmap(kml, section);
> > -        }
> > +        do {
> > +            slot_size = kvm_max_slot_size < size ? kvm_max_slot_size : size;
> > +            mem = kvm_lookup_matching_slot(kml, start_addr, slot_size);
> > +            if (!mem) {
> > +                goto out;  
> 
> I wonder if this can trigger for the first, but not the second slot (or
> the other way around). In that case you would want to continue the loop
> (incrementing counters). But most probably there would something be
> wrong in the caller if that would happen.

I couldn't come up with scenario where it would be possible
(unless flatview rendering is broken)

> 
> > +            }
> > +            if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
> > +                kvm_physical_sync_dirty_bitmap(kml, section);
> > +            }
> >  
> > -        /* unregister the slot */
> > -        g_free(mem->dirty_bmap);
> > -        mem->dirty_bmap = NULL;
> > -        mem->memory_size = 0;
> > -        mem->flags = 0;
> > -        err = kvm_set_user_memory_region(kml, mem, false);
> > -        if (err) {
> > -            fprintf(stderr, "%s: error unregistering slot: %s\n",
> > -                    __func__, strerror(-err));
> > -            abort();
> > -        }
> > +            /* unregister the slot */
> > +            g_free(mem->dirty_bmap);
> > +            mem->dirty_bmap = NULL;
> > +            mem->memory_size = 0;
> > +            mem->flags = 0;
> > +            err = kvm_set_user_memory_region(kml, mem, false);
> > +            if (err) {
> > +                fprintf(stderr, "%s: error unregistering slot: %s\n",
> > +                        __func__, strerror(-err));
> > +                abort();
> > +            }
> > +            start_addr += slot_size;
> > +        } while ((size -= slot_size));  
> 
> NIT: I think you can drop parentheses - but I would really prefer to not
> perform computations in the condition.
sure, I'll move computation within the loop

> >          goto out;
> >      }
> >  
> >      /* register the new slot */
> > -    mem = kvm_alloc_slot(kml);
> > -    mem->memory_size = size;
> > -    mem->start_addr = start_addr;
> > -    mem->ram = ram;
> > -    mem->flags = kvm_mem_flags(mr);
> > -
> > -    err = kvm_set_user_memory_region(kml, mem, true);
> > -    if (err) {
> > -        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
> > -                strerror(-err));
> > -        abort();
> > -    }
> > +    do {
> > +        slot_size = kvm_max_slot_size < size ? kvm_max_slot_size : size;
> > +        mem = kvm_alloc_slot(kml);
> > +        mem->memory_size = slot_size;
> > +        mem->start_addr = start_addr;
> > +        mem->ram = ram;
> > +        mem->flags = kvm_mem_flags(mr);
> > +
> > +        err = kvm_set_user_memory_region(kml, mem, true);
> > +        if (err) {
> > +            fprintf(stderr, "%s: error registering slot: %s\n", __func__,
> > +                    strerror(-err));
> > +            abort();
> > +        }
> > +        start_addr += slot_size;
> > +        ram += slot_size;
> > +    } while ((size -= slot_size));  
> 
> dito
> 
> One note:
> 
> KVMState stores the number of slots in "nr_slots". We export that via
> kvm_get_max_memslots().
>
> E.g., spapr uses that to compare it against "machine->ram_slots".
this patch shouldn't affect spapr/arm or x86 machines as they do not have
limits on memslot size.

> Later (esp. for s390x), kvm_get_max_memslots() can no longer be compared to
> ram_slots directly. Could be that a ram slot would map to multiple KVM
> memory slots. There would be no easy way to detect if KVM is able to
> deal with "machine->ram_slots" as defined by the user, until the sizes
> of the slots are known.

If I recall correctly about kvm_foo_slots() APIs,
assumption 1 memory region == 1 memslot didn't/doesn't hold true
in all cases (ex: x86) and it's only best effort to let us compare
the number of apples to oranges on a tree and works for current
usecases.

From hotplug point of view kvm_has_free_slot() would be more important,
to allow for graceful abort. If s390 would ever need to hotplug
RAM MemoryRegion, anyway APIs should be changed to account for
1:N split when actual dependency arises.

Re: [Qemu-devel] [qemu-s390x] [PATCH for-4.2 v4 1/2] kvm: s390: split too big memory section on several memslots

Posted by David Hildenbrand 6 years, 6 months ago

On 07.08.19 11:55, Igor Mammedov wrote:
> On Wed, 7 Aug 2019 09:54:27 +0200
> David Hildenbrand <david@redhat.com> wrote:
> 
>> On 06.08.19 11:48, Igor Mammedov wrote:
>>> Max memslot size supported by kvm on s390 is 8Tb,
>>> move logic of splitting RAM in chunks upto 8T to KVM code.
>>>
>>> This way it will hide KVM specific restrictions in KVM code
>>> and won't affect baord level design decisions. Which would allow
>>> us to avoid misusing memory_region_allocate_system_memory() API
>>> and eventually use a single hostmem backend for guest RAM.
>>>
>>> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
>>> ---
>>> v4:
>>>   * fix compilation issue
>>>           (Christian Borntraeger <borntraeger@de.ibm.com>)
>>>   * advance HVA along with GPA in kvm_set_phys_mem()
>>>           (Christian Borntraeger <borntraeger@de.ibm.com>)
>>>
>>> patch prepares only KVM side for switching to single RAM memory region
>>> another patch will take care of  dropping manual RAM partitioning in
>>> s390 code.
>>> ---
>>>  include/sysemu/kvm_int.h   |  1 +
>>>  accel/kvm/kvm-all.c        | 80 +++++++++++++++++++++++---------------
>>>  hw/s390x/s390-virtio-ccw.c |  9 -----
>>>  target/s390x/kvm.c         | 12 ++++++
>>>  4 files changed, 62 insertions(+), 40 deletions(-)
>>>
>>> diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
>>> index 31df465fdc..7f7520bce2 100644
>>> --- a/include/sysemu/kvm_int.h
>>> +++ b/include/sysemu/kvm_int.h
>>> @@ -41,4 +41,5 @@ typedef struct KVMMemoryListener {
>>>  void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
>>>                                    AddressSpace *as, int as_id);
>>>  
>>> +void kvm_set_max_memslot_size(hwaddr max_slot_size);
>>>  #endif
>>> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
>>> index f450f25295..d87f855ea4 100644
>>> --- a/accel/kvm/kvm-all.c
>>> +++ b/accel/kvm/kvm-all.c
>>> @@ -138,6 +138,7 @@ bool kvm_direct_msi_allowed;
>>>  bool kvm_ioeventfd_any_length_allowed;
>>>  bool kvm_msi_use_devid;
>>>  static bool kvm_immediate_exit;
>>> +static hwaddr kvm_max_slot_size = ~0;
>>>  
>>>  static const KVMCapabilityInfo kvm_required_capabilites[] = {
>>>      KVM_CAP_INFO(USER_MEMORY),
>>> @@ -951,6 +952,14 @@ kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list)
>>>      return NULL;
>>>  }
>>>  
>>> +void kvm_set_max_memslot_size(hwaddr max_slot_size)
>>> +{
>>> +    g_assert(
>>> +        ROUND_UP(max_slot_size, qemu_real_host_page_size) == max_slot_size
>>> +    );
>>> +    kvm_max_slot_size = max_slot_size;
>>> +}
>>> +
>>>  static void kvm_set_phys_mem(KVMMemoryListener *kml,
>>>                               MemoryRegionSection *section, bool add)
>>>  {
>>> @@ -958,7 +967,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
>>>      int err;
>>>      MemoryRegion *mr = section->mr;
>>>      bool writeable = !mr->readonly && !mr->rom_device;
>>> -    hwaddr start_addr, size;
>>> +    hwaddr start_addr, size, slot_size;
>>>      void *ram;
>>>  
>>>      if (!memory_region_is_ram(mr)) {
>>> @@ -983,41 +992,50 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
>>>      kvm_slots_lock(kml);
>>>  
>>>      if (!add) {
>>> -        mem = kvm_lookup_matching_slot(kml, start_addr, size);
>>> -        if (!mem) {
>>> -            goto out;
>>> -        }
>>> -        if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
>>> -            kvm_physical_sync_dirty_bitmap(kml, section);
>>> -        }
>>> +        do {
>>> +            slot_size = kvm_max_slot_size < size ? kvm_max_slot_size : size;
>>> +            mem = kvm_lookup_matching_slot(kml, start_addr, slot_size);
>>> +            if (!mem) {
>>> +                goto out;  
>>
>> I wonder if this can trigger for the first, but not the second slot (or
>> the other way around). In that case you would want to continue the loop
>> (incrementing counters). But most probably there would something be
>> wrong in the caller if that would happen.
> 
> I couldn't come up with scenario where it would be possible
> (unless flatview rendering is broken)
> 
>>
>>> +            }
>>> +            if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
>>> +                kvm_physical_sync_dirty_bitmap(kml, section);
>>> +            }
>>>  
>>> -        /* unregister the slot */
>>> -        g_free(mem->dirty_bmap);
>>> -        mem->dirty_bmap = NULL;
>>> -        mem->memory_size = 0;
>>> -        mem->flags = 0;
>>> -        err = kvm_set_user_memory_region(kml, mem, false);
>>> -        if (err) {
>>> -            fprintf(stderr, "%s: error unregistering slot: %s\n",
>>> -                    __func__, strerror(-err));
>>> -            abort();
>>> -        }
>>> +            /* unregister the slot */
>>> +            g_free(mem->dirty_bmap);
>>> +            mem->dirty_bmap = NULL;
>>> +            mem->memory_size = 0;
>>> +            mem->flags = 0;
>>> +            err = kvm_set_user_memory_region(kml, mem, false);
>>> +            if (err) {
>>> +                fprintf(stderr, "%s: error unregistering slot: %s\n",
>>> +                        __func__, strerror(-err));
>>> +                abort();
>>> +            }
>>> +            start_addr += slot_size;
>>> +        } while ((size -= slot_size));  
>>
>> NIT: I think you can drop parentheses - but I would really prefer to not
>> perform computations in the condition.
> sure, I'll move computation within the loop
> 
>>>          goto out;
>>>      }
>>>  
>>>      /* register the new slot */
>>> -    mem = kvm_alloc_slot(kml);
>>> -    mem->memory_size = size;
>>> -    mem->start_addr = start_addr;
>>> -    mem->ram = ram;
>>> -    mem->flags = kvm_mem_flags(mr);
>>> -
>>> -    err = kvm_set_user_memory_region(kml, mem, true);
>>> -    if (err) {
>>> -        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
>>> -                strerror(-err));
>>> -        abort();
>>> -    }
>>> +    do {
>>> +        slot_size = kvm_max_slot_size < size ? kvm_max_slot_size : size;
>>> +        mem = kvm_alloc_slot(kml);
>>> +        mem->memory_size = slot_size;
>>> +        mem->start_addr = start_addr;
>>> +        mem->ram = ram;
>>> +        mem->flags = kvm_mem_flags(mr);
>>> +
>>> +        err = kvm_set_user_memory_region(kml, mem, true);
>>> +        if (err) {
>>> +            fprintf(stderr, "%s: error registering slot: %s\n", __func__,
>>> +                    strerror(-err));
>>> +            abort();
>>> +        }
>>> +        start_addr += slot_size;
>>> +        ram += slot_size;
>>> +    } while ((size -= slot_size));  
>>
>> dito
>>
>> One note:
>>
>> KVMState stores the number of slots in "nr_slots". We export that via
>> kvm_get_max_memslots().
>>
>> E.g., spapr uses that to compare it against "machine->ram_slots".
> this patch shouldn't affect spapr/arm or x86 machines as they do not have
> limits on memslot size.
> 

Yes, just an example how the existing API could be used.

>> Later (esp. for s390x), kvm_get_max_memslots() can no longer be compared to
>> ram_slots directly. Could be that a ram slot would map to multiple KVM
>> memory slots. There would be no easy way to detect if KVM is able to
>> deal with "machine->ram_slots" as defined by the user, until the sizes
>> of the slots are known.
> 
> If I recall correctly about kvm_foo_slots() APIs,
> assumption 1 memory region == 1 memslot didn't/doesn't hold true
> in all cases (ex: x86) and it's only best effort to let us compare
> the number of apples to oranges on a tree and works for current
> usecases.

Yes, x86 needs two kvm slots due to SMM if I recall correctly.

> 
> From hotplug point of view kvm_has_free_slot() would be more important,
> to allow for graceful abort. If s390 would ever need to hotplug
> RAM MemoryRegion, anyway APIs should be changed to account for
> 1:N split when actual dependency arises.

Exactly, we should handle it gracefully then. (and hotplugging >4TB
DIMMs is quite unlikely, so we can ignore that for now, just wanted to
note it)

-- 

Thanks,

David / dhildenb

[Qemu-devel] [PATCH for-4.2 v5 1/2] kvm: s390: split too big memory section on several memslots

Posted by Igor Mammedov 6 years, 6 months ago

Max memslot size supported by kvm on s390 is 8Tb,
move logic of splitting RAM in chunks upto 8T to KVM code.

This way it will hide KVM specific restrictions in KVM code
and won't affect baord level design decisions. Which would allow
us to avoid misusing memory_region_allocate_system_memory() API
and eventually use a single hostmem backend for guest RAM.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
v5:
  * move computation 'size -= slot_size' inside of loop body
          (David Hildenbrand <david@redhat.com>)
v4:
  * fix compilation issue
          (Christian Borntraeger <borntraeger@de.ibm.com>)
  * advance HVA along with GPA in kvm_set_phys_mem()
          (Christian Borntraeger <borntraeger@de.ibm.com>)

patch prepares only KVM side for switching to single RAM memory region
another patch will take care of  dropping manual RAM partitioning in
s390 code.
---
 include/sysemu/kvm_int.h   |  1 +
 accel/kvm/kvm-all.c        | 82 ++++++++++++++++++++++++--------------
 hw/s390x/s390-virtio-ccw.c |  9 -----
 target/s390x/kvm.c         | 12 ++++++
 4 files changed, 64 insertions(+), 40 deletions(-)

diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index 31df465fdc..7f7520bce2 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -41,4 +41,5 @@ typedef struct KVMMemoryListener {
 void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
                                   AddressSpace *as, int as_id);
 
+void kvm_set_max_memslot_size(hwaddr max_slot_size);
 #endif
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f450f25295..8153556335 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -138,6 +138,7 @@ bool kvm_direct_msi_allowed;
 bool kvm_ioeventfd_any_length_allowed;
 bool kvm_msi_use_devid;
 static bool kvm_immediate_exit;
+static hwaddr kvm_max_slot_size = ~0;
 
 static const KVMCapabilityInfo kvm_required_capabilites[] = {
     KVM_CAP_INFO(USER_MEMORY),
@@ -951,6 +952,14 @@ kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list)
     return NULL;
 }
 
+void kvm_set_max_memslot_size(hwaddr max_slot_size)
+{
+    g_assert(
+        ROUND_UP(max_slot_size, qemu_real_host_page_size) == max_slot_size
+    );
+    kvm_max_slot_size = max_slot_size;
+}
+
 static void kvm_set_phys_mem(KVMMemoryListener *kml,
                              MemoryRegionSection *section, bool add)
 {
@@ -958,7 +967,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
     int err;
     MemoryRegion *mr = section->mr;
     bool writeable = !mr->readonly && !mr->rom_device;
-    hwaddr start_addr, size;
+    hwaddr start_addr, size, slot_size;
     void *ram;
 
     if (!memory_region_is_ram(mr)) {
@@ -983,41 +992,52 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
     kvm_slots_lock(kml);
 
     if (!add) {
-        mem = kvm_lookup_matching_slot(kml, start_addr, size);
-        if (!mem) {
-            goto out;
-        }
-        if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
-            kvm_physical_sync_dirty_bitmap(kml, section);
-        }
+        do {
+            slot_size = kvm_max_slot_size < size ? kvm_max_slot_size : size;
+            mem = kvm_lookup_matching_slot(kml, start_addr, slot_size);
+            if (!mem) {
+                goto out;
+            }
+            if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+                kvm_physical_sync_dirty_bitmap(kml, section);
+            }
 
-        /* unregister the slot */
-        g_free(mem->dirty_bmap);
-        mem->dirty_bmap = NULL;
-        mem->memory_size = 0;
-        mem->flags = 0;
-        err = kvm_set_user_memory_region(kml, mem, false);
-        if (err) {
-            fprintf(stderr, "%s: error unregistering slot: %s\n",
-                    __func__, strerror(-err));
-            abort();
-        }
+            /* unregister the slot */
+            g_free(mem->dirty_bmap);
+            mem->dirty_bmap = NULL;
+            mem->memory_size = 0;
+            mem->flags = 0;
+            err = kvm_set_user_memory_region(kml, mem, false);
+            if (err) {
+                fprintf(stderr, "%s: error unregistering slot: %s\n",
+                        __func__, strerror(-err));
+                abort();
+            }
+            start_addr += slot_size;
+            size -= slot_size;
+        } while (size);
         goto out;
     }
 
     /* register the new slot */
-    mem = kvm_alloc_slot(kml);
-    mem->memory_size = size;
-    mem->start_addr = start_addr;
-    mem->ram = ram;
-    mem->flags = kvm_mem_flags(mr);
-
-    err = kvm_set_user_memory_region(kml, mem, true);
-    if (err) {
-        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
-                strerror(-err));
-        abort();
-    }
+    do {
+        slot_size = kvm_max_slot_size < size ? kvm_max_slot_size : size;
+        mem = kvm_alloc_slot(kml);
+        mem->memory_size = slot_size;
+        mem->start_addr = start_addr;
+        mem->ram = ram;
+        mem->flags = kvm_mem_flags(mr);
+
+        err = kvm_set_user_memory_region(kml, mem, true);
+        if (err) {
+            fprintf(stderr, "%s: error registering slot: %s\n", __func__,
+                    strerror(-err));
+            abort();
+        }
+        start_addr += slot_size;
+        ram += slot_size;
+        size -= slot_size;
+    } while (size);
 
 out:
     kvm_slots_unlock(kml);
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 5b6a9a4e55..0c03ffb7c7 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -151,15 +151,6 @@ static void virtio_ccw_register_hcalls(void)
                                    virtio_ccw_hcall_early_printk);
 }
 
-/*
- * KVM does only support memory slots up to KVM_MEM_MAX_NR_PAGES pages
- * as the dirty bitmap must be managed by bitops that take an int as
- * position indicator. If we have a guest beyond that we will split off
- * new subregions. The split must happen on a segment boundary (1MB).
- */
-#define KVM_MEM_MAX_NR_PAGES ((1ULL << 31) - 1)
-#define SEG_MSK (~0xfffffULL)
-#define KVM_SLOT_MAX_BYTES ((KVM_MEM_MAX_NR_PAGES * TARGET_PAGE_SIZE) & SEG_MSK)
 static void s390_memory_init(ram_addr_t mem_size)
 {
     MemoryRegion *sysmem = get_system_memory();
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 6e814c230b..6b1428a760 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -28,6 +28,7 @@
 #include "cpu.h"
 #include "internal.h"
 #include "kvm_s390x.h"
+#include "sysemu/kvm_int.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/timer.h"
@@ -122,6 +123,16 @@
 #define VCPU_IRQ_BUF_SIZE(max_cpus) (sizeof(struct kvm_s390_irq) * \
                                      (max_cpus + NR_LOCAL_IRQS))
 
+/*
+ * KVM does only support memory slots up to KVM_MEM_MAX_NR_PAGES pages
+ * as the dirty bitmap must be managed by bitops that take an int as
+ * position indicator. If we have a guest beyond that we will split off
+ * new subregions. The split must happen on a segment boundary (1MB).
+ */
+#define KVM_MEM_MAX_NR_PAGES ((1ULL << 31) - 1)
+#define SEG_MSK (~0xfffffULL)
+#define KVM_SLOT_MAX_BYTES ((KVM_MEM_MAX_NR_PAGES * TARGET_PAGE_SIZE) & SEG_MSK)
+
 static CPUWatchpoint hw_watchpoint;
 /*
  * We don't use a list because this structure is also used to transmit the
@@ -347,6 +358,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
      */
     /* kvm_vm_enable_cap(s, KVM_CAP_S390_AIS, 0); */
 
+    kvm_set_max_memslot_size(KVM_SLOT_MAX_BYTES);
     return 0;
 }
 
-- 
2.18.1

Re: [Qemu-devel] [PATCH for-4.2 v5 1/2] kvm: s390: split too big memory section on several memslots

Posted by Cornelia Huck 6 years, 5 months ago

On Wed,  7 Aug 2019 11:32:41 -0400
Igor Mammedov <imammedo@redhat.com> wrote:

> Max memslot size supported by kvm on s390 is 8Tb,
> move logic of splitting RAM in chunks upto 8T to KVM code.
> 
> This way it will hide KVM specific restrictions in KVM code
> and won't affect baord level design decisions. Which would allow
> us to avoid misusing memory_region_allocate_system_memory() API
> and eventually use a single hostmem backend for guest RAM.
> 
> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> ---
> v5:
>   * move computation 'size -= slot_size' inside of loop body
>           (David Hildenbrand <david@redhat.com>)
> v4:
>   * fix compilation issue
>           (Christian Borntraeger <borntraeger@de.ibm.com>)
>   * advance HVA along with GPA in kvm_set_phys_mem()
>           (Christian Borntraeger <borntraeger@de.ibm.com>)
> 
> patch prepares only KVM side for switching to single RAM memory region
> another patch will take care of  dropping manual RAM partitioning in
> s390 code.

I may have lost track a bit -- what is the status of this patch (and
the series)?

> ---
>  include/sysemu/kvm_int.h   |  1 +
>  accel/kvm/kvm-all.c        | 82 ++++++++++++++++++++++++--------------
>  hw/s390x/s390-virtio-ccw.c |  9 -----
>  target/s390x/kvm.c         | 12 ++++++
>  4 files changed, 64 insertions(+), 40 deletions(-)
> 
> diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
> index 31df465fdc..7f7520bce2 100644
> --- a/include/sysemu/kvm_int.h
> +++ b/include/sysemu/kvm_int.h
> @@ -41,4 +41,5 @@ typedef struct KVMMemoryListener {
>  void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
>                                    AddressSpace *as, int as_id);
>  
> +void kvm_set_max_memslot_size(hwaddr max_slot_size);
>  #endif
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index f450f25295..8153556335 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -138,6 +138,7 @@ bool kvm_direct_msi_allowed;
>  bool kvm_ioeventfd_any_length_allowed;
>  bool kvm_msi_use_devid;
>  static bool kvm_immediate_exit;
> +static hwaddr kvm_max_slot_size = ~0;
>  
>  static const KVMCapabilityInfo kvm_required_capabilites[] = {
>      KVM_CAP_INFO(USER_MEMORY),
> @@ -951,6 +952,14 @@ kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list)
>      return NULL;
>  }
>  
> +void kvm_set_max_memslot_size(hwaddr max_slot_size)
> +{
> +    g_assert(
> +        ROUND_UP(max_slot_size, qemu_real_host_page_size) == max_slot_size
> +    );
> +    kvm_max_slot_size = max_slot_size;
> +}
> +
>  static void kvm_set_phys_mem(KVMMemoryListener *kml,
>                               MemoryRegionSection *section, bool add)
>  {
> @@ -958,7 +967,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
>      int err;
>      MemoryRegion *mr = section->mr;
>      bool writeable = !mr->readonly && !mr->rom_device;
> -    hwaddr start_addr, size;
> +    hwaddr start_addr, size, slot_size;
>      void *ram;
>  
>      if (!memory_region_is_ram(mr)) {
> @@ -983,41 +992,52 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
>      kvm_slots_lock(kml);
>  
>      if (!add) {
> -        mem = kvm_lookup_matching_slot(kml, start_addr, size);
> -        if (!mem) {
> -            goto out;
> -        }
> -        if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
> -            kvm_physical_sync_dirty_bitmap(kml, section);
> -        }
> +        do {
> +            slot_size = kvm_max_slot_size < size ? kvm_max_slot_size : size;
> +            mem = kvm_lookup_matching_slot(kml, start_addr, slot_size);
> +            if (!mem) {
> +                goto out;
> +            }
> +            if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
> +                kvm_physical_sync_dirty_bitmap(kml, section);
> +            }
>  
> -        /* unregister the slot */
> -        g_free(mem->dirty_bmap);
> -        mem->dirty_bmap = NULL;
> -        mem->memory_size = 0;
> -        mem->flags = 0;
> -        err = kvm_set_user_memory_region(kml, mem, false);
> -        if (err) {
> -            fprintf(stderr, "%s: error unregistering slot: %s\n",
> -                    __func__, strerror(-err));
> -            abort();
> -        }
> +            /* unregister the slot */
> +            g_free(mem->dirty_bmap);
> +            mem->dirty_bmap = NULL;
> +            mem->memory_size = 0;
> +            mem->flags = 0;
> +            err = kvm_set_user_memory_region(kml, mem, false);
> +            if (err) {
> +                fprintf(stderr, "%s: error unregistering slot: %s\n",
> +                        __func__, strerror(-err));
> +                abort();
> +            }
> +            start_addr += slot_size;
> +            size -= slot_size;
> +        } while (size);
>          goto out;
>      }
>  
>      /* register the new slot */
> -    mem = kvm_alloc_slot(kml);
> -    mem->memory_size = size;
> -    mem->start_addr = start_addr;
> -    mem->ram = ram;
> -    mem->flags = kvm_mem_flags(mr);
> -
> -    err = kvm_set_user_memory_region(kml, mem, true);
> -    if (err) {
> -        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
> -                strerror(-err));
> -        abort();
> -    }
> +    do {
> +        slot_size = kvm_max_slot_size < size ? kvm_max_slot_size : size;
> +        mem = kvm_alloc_slot(kml);
> +        mem->memory_size = slot_size;
> +        mem->start_addr = start_addr;
> +        mem->ram = ram;
> +        mem->flags = kvm_mem_flags(mr);
> +
> +        err = kvm_set_user_memory_region(kml, mem, true);
> +        if (err) {
> +            fprintf(stderr, "%s: error registering slot: %s\n", __func__,
> +                    strerror(-err));
> +            abort();
> +        }
> +        start_addr += slot_size;
> +        ram += slot_size;
> +        size -= slot_size;
> +    } while (size);
>  
>  out:
>      kvm_slots_unlock(kml);
> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
> index 5b6a9a4e55..0c03ffb7c7 100644
> --- a/hw/s390x/s390-virtio-ccw.c
> +++ b/hw/s390x/s390-virtio-ccw.c
> @@ -151,15 +151,6 @@ static void virtio_ccw_register_hcalls(void)
>                                     virtio_ccw_hcall_early_printk);
>  }
>  
> -/*
> - * KVM does only support memory slots up to KVM_MEM_MAX_NR_PAGES pages
> - * as the dirty bitmap must be managed by bitops that take an int as
> - * position indicator. If we have a guest beyond that we will split off
> - * new subregions. The split must happen on a segment boundary (1MB).
> - */
> -#define KVM_MEM_MAX_NR_PAGES ((1ULL << 31) - 1)
> -#define SEG_MSK (~0xfffffULL)
> -#define KVM_SLOT_MAX_BYTES ((KVM_MEM_MAX_NR_PAGES * TARGET_PAGE_SIZE) & SEG_MSK)
>  static void s390_memory_init(ram_addr_t mem_size)
>  {
>      MemoryRegion *sysmem = get_system_memory();
> diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
> index 6e814c230b..6b1428a760 100644
> --- a/target/s390x/kvm.c
> +++ b/target/s390x/kvm.c
> @@ -28,6 +28,7 @@
>  #include "cpu.h"
>  #include "internal.h"
>  #include "kvm_s390x.h"
> +#include "sysemu/kvm_int.h"
>  #include "qapi/error.h"
>  #include "qemu/error-report.h"
>  #include "qemu/timer.h"
> @@ -122,6 +123,16 @@
>  #define VCPU_IRQ_BUF_SIZE(max_cpus) (sizeof(struct kvm_s390_irq) * \
>                                       (max_cpus + NR_LOCAL_IRQS))
>  
> +/*
> + * KVM does only support memory slots up to KVM_MEM_MAX_NR_PAGES pages
> + * as the dirty bitmap must be managed by bitops that take an int as
> + * position indicator. If we have a guest beyond that we will split off
> + * new subregions. The split must happen on a segment boundary (1MB).
> + */
> +#define KVM_MEM_MAX_NR_PAGES ((1ULL << 31) - 1)
> +#define SEG_MSK (~0xfffffULL)
> +#define KVM_SLOT_MAX_BYTES ((KVM_MEM_MAX_NR_PAGES * TARGET_PAGE_SIZE) & SEG_MSK)
> +
>  static CPUWatchpoint hw_watchpoint;
>  /*
>   * We don't use a list because this structure is also used to transmit the
> @@ -347,6 +358,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>       */
>      /* kvm_vm_enable_cap(s, KVM_CAP_S390_AIS, 0); */
>  
> +    kvm_set_max_memslot_size(KVM_SLOT_MAX_BYTES);
>      return 0;
>  }
>

Re: [Qemu-devel] [PATCH for-4.2 v5 1/2] kvm: s390: split too big memory section on several memslots

Posted by Igor Mammedov 6 years, 5 months ago

On Tue, 20 Aug 2019 18:07:27 +0200
Cornelia Huck <cohuck@redhat.com> wrote:

> On Wed,  7 Aug 2019 11:32:41 -0400
> Igor Mammedov <imammedo@redhat.com> wrote:
> 
> > Max memslot size supported by kvm on s390 is 8Tb,
> > move logic of splitting RAM in chunks upto 8T to KVM code.
> > 
> > This way it will hide KVM specific restrictions in KVM code
> > and won't affect baord level design decisions. Which would allow
> > us to avoid misusing memory_region_allocate_system_memory() API
> > and eventually use a single hostmem backend for guest RAM.
> > 
> > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > ---
> > v5:
> >   * move computation 'size -= slot_size' inside of loop body
> >           (David Hildenbrand <david@redhat.com>)
> > v4:
> >   * fix compilation issue
> >           (Christian Borntraeger <borntraeger@de.ibm.com>)
> >   * advance HVA along with GPA in kvm_set_phys_mem()
> >           (Christian Borntraeger <borntraeger@de.ibm.com>)
> > 
> > patch prepares only KVM side for switching to single RAM memory region
> > another patch will take care of  dropping manual RAM partitioning in
> > s390 code.  
> 
> I may have lost track a bit -- what is the status of this patch (and
> the series)?

Christian,

could you test it on a host that have sufficient amount of RAM?

PS:
I've only simulated test on smaller host with KVM enabled
(by reducing KVM_SLOT_MAX_BYTES size) to verify that KVM code splits
RAM on smaller chunks.

PS2:
Also considering we decided to drop migration concerns there
is no need to check migration on a large enough host anymore.


> 
> > ---
> >  include/sysemu/kvm_int.h   |  1 +
> >  accel/kvm/kvm-all.c        | 82 ++++++++++++++++++++++++--------------
> >  hw/s390x/s390-virtio-ccw.c |  9 -----
> >  target/s390x/kvm.c         | 12 ++++++
> >  4 files changed, 64 insertions(+), 40 deletions(-)
> > 
> > diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
> > index 31df465fdc..7f7520bce2 100644
> > --- a/include/sysemu/kvm_int.h
> > +++ b/include/sysemu/kvm_int.h
> > @@ -41,4 +41,5 @@ typedef struct KVMMemoryListener {
> >  void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
> >                                    AddressSpace *as, int as_id);
> >  
> > +void kvm_set_max_memslot_size(hwaddr max_slot_size);
> >  #endif
> > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> > index f450f25295..8153556335 100644
> > --- a/accel/kvm/kvm-all.c
> > +++ b/accel/kvm/kvm-all.c
> > @@ -138,6 +138,7 @@ bool kvm_direct_msi_allowed;
> >  bool kvm_ioeventfd_any_length_allowed;
> >  bool kvm_msi_use_devid;
> >  static bool kvm_immediate_exit;
> > +static hwaddr kvm_max_slot_size = ~0;
> >  
> >  static const KVMCapabilityInfo kvm_required_capabilites[] = {
> >      KVM_CAP_INFO(USER_MEMORY),
> > @@ -951,6 +952,14 @@ kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list)
> >      return NULL;
> >  }
> >  
> > +void kvm_set_max_memslot_size(hwaddr max_slot_size)
> > +{
> > +    g_assert(
> > +        ROUND_UP(max_slot_size, qemu_real_host_page_size) == max_slot_size
> > +    );
> > +    kvm_max_slot_size = max_slot_size;
> > +}
> > +
> >  static void kvm_set_phys_mem(KVMMemoryListener *kml,
> >                               MemoryRegionSection *section, bool add)
> >  {
> > @@ -958,7 +967,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
> >      int err;
> >      MemoryRegion *mr = section->mr;
> >      bool writeable = !mr->readonly && !mr->rom_device;
> > -    hwaddr start_addr, size;
> > +    hwaddr start_addr, size, slot_size;
> >      void *ram;
> >  
> >      if (!memory_region_is_ram(mr)) {
> > @@ -983,41 +992,52 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
> >      kvm_slots_lock(kml);
> >  
> >      if (!add) {
> > -        mem = kvm_lookup_matching_slot(kml, start_addr, size);
> > -        if (!mem) {
> > -            goto out;
> > -        }
> > -        if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
> > -            kvm_physical_sync_dirty_bitmap(kml, section);
> > -        }
> > +        do {
> > +            slot_size = kvm_max_slot_size < size ? kvm_max_slot_size : size;
> > +            mem = kvm_lookup_matching_slot(kml, start_addr, slot_size);
> > +            if (!mem) {
> > +                goto out;
> > +            }
> > +            if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
> > +                kvm_physical_sync_dirty_bitmap(kml, section);
> > +            }
> >  
> > -        /* unregister the slot */
> > -        g_free(mem->dirty_bmap);
> > -        mem->dirty_bmap = NULL;
> > -        mem->memory_size = 0;
> > -        mem->flags = 0;
> > -        err = kvm_set_user_memory_region(kml, mem, false);
> > -        if (err) {
> > -            fprintf(stderr, "%s: error unregistering slot: %s\n",
> > -                    __func__, strerror(-err));
> > -            abort();
> > -        }
> > +            /* unregister the slot */
> > +            g_free(mem->dirty_bmap);
> > +            mem->dirty_bmap = NULL;
> > +            mem->memory_size = 0;
> > +            mem->flags = 0;
> > +            err = kvm_set_user_memory_region(kml, mem, false);
> > +            if (err) {
> > +                fprintf(stderr, "%s: error unregistering slot: %s\n",
> > +                        __func__, strerror(-err));
> > +                abort();
> > +            }
> > +            start_addr += slot_size;
> > +            size -= slot_size;
> > +        } while (size);
> >          goto out;
> >      }
> >  
> >      /* register the new slot */
> > -    mem = kvm_alloc_slot(kml);
> > -    mem->memory_size = size;
> > -    mem->start_addr = start_addr;
> > -    mem->ram = ram;
> > -    mem->flags = kvm_mem_flags(mr);
> > -
> > -    err = kvm_set_user_memory_region(kml, mem, true);
> > -    if (err) {
> > -        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
> > -                strerror(-err));
> > -        abort();
> > -    }
> > +    do {
> > +        slot_size = kvm_max_slot_size < size ? kvm_max_slot_size : size;
> > +        mem = kvm_alloc_slot(kml);
> > +        mem->memory_size = slot_size;
> > +        mem->start_addr = start_addr;
> > +        mem->ram = ram;
> > +        mem->flags = kvm_mem_flags(mr);
> > +
> > +        err = kvm_set_user_memory_region(kml, mem, true);
> > +        if (err) {
> > +            fprintf(stderr, "%s: error registering slot: %s\n", __func__,
> > +                    strerror(-err));
> > +            abort();
> > +        }
> > +        start_addr += slot_size;
> > +        ram += slot_size;
> > +        size -= slot_size;
> > +    } while (size);
> >  
> >  out:
> >      kvm_slots_unlock(kml);
> > diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
> > index 5b6a9a4e55..0c03ffb7c7 100644
> > --- a/hw/s390x/s390-virtio-ccw.c
> > +++ b/hw/s390x/s390-virtio-ccw.c
> > @@ -151,15 +151,6 @@ static void virtio_ccw_register_hcalls(void)
> >                                     virtio_ccw_hcall_early_printk);
> >  }
> >  
> > -/*
> > - * KVM does only support memory slots up to KVM_MEM_MAX_NR_PAGES pages
> > - * as the dirty bitmap must be managed by bitops that take an int as
> > - * position indicator. If we have a guest beyond that we will split off
> > - * new subregions. The split must happen on a segment boundary (1MB).
> > - */
> > -#define KVM_MEM_MAX_NR_PAGES ((1ULL << 31) - 1)
> > -#define SEG_MSK (~0xfffffULL)
> > -#define KVM_SLOT_MAX_BYTES ((KVM_MEM_MAX_NR_PAGES * TARGET_PAGE_SIZE) & SEG_MSK)
> >  static void s390_memory_init(ram_addr_t mem_size)
> >  {
> >      MemoryRegion *sysmem = get_system_memory();
> > diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
> > index 6e814c230b..6b1428a760 100644
> > --- a/target/s390x/kvm.c
> > +++ b/target/s390x/kvm.c
> > @@ -28,6 +28,7 @@
> >  #include "cpu.h"
> >  #include "internal.h"
> >  #include "kvm_s390x.h"
> > +#include "sysemu/kvm_int.h"
> >  #include "qapi/error.h"
> >  #include "qemu/error-report.h"
> >  #include "qemu/timer.h"
> > @@ -122,6 +123,16 @@
> >  #define VCPU_IRQ_BUF_SIZE(max_cpus) (sizeof(struct kvm_s390_irq) * \
> >                                       (max_cpus + NR_LOCAL_IRQS))
> >  
> > +/*
> > + * KVM does only support memory slots up to KVM_MEM_MAX_NR_PAGES pages
> > + * as the dirty bitmap must be managed by bitops that take an int as
> > + * position indicator. If we have a guest beyond that we will split off
> > + * new subregions. The split must happen on a segment boundary (1MB).
> > + */
> > +#define KVM_MEM_MAX_NR_PAGES ((1ULL << 31) - 1)
> > +#define SEG_MSK (~0xfffffULL)
> > +#define KVM_SLOT_MAX_BYTES ((KVM_MEM_MAX_NR_PAGES * TARGET_PAGE_SIZE) & SEG_MSK)
> > +
> >  static CPUWatchpoint hw_watchpoint;
> >  /*
> >   * We don't use a list because this structure is also used to transmit the
> > @@ -347,6 +358,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
> >       */
> >      /* kvm_vm_enable_cap(s, KVM_CAP_S390_AIS, 0); */
> >  
> > +    kvm_set_max_memslot_size(KVM_SLOT_MAX_BYTES);
> >      return 0;
> >  }
> >    
>

Re: [Qemu-devel] [PATCH for-4.2 v5 1/2] kvm: s390: split too big memory section on several memslots

Posted by Christian Borntraeger 6 years, 5 months ago


On 27.08.19 14:56, Igor Mammedov wrote:
> On Tue, 20 Aug 2019 18:07:27 +0200
> Cornelia Huck <cohuck@redhat.com> wrote:
> 
>> On Wed,  7 Aug 2019 11:32:41 -0400
>> Igor Mammedov <imammedo@redhat.com> wrote:
>>
>>> Max memslot size supported by kvm on s390 is 8Tb,
>>> move logic of splitting RAM in chunks upto 8T to KVM code.
>>>
>>> This way it will hide KVM specific restrictions in KVM code
>>> and won't affect baord level design decisions. Which would allow
>>> us to avoid misusing memory_region_allocate_system_memory() API
>>> and eventually use a single hostmem backend for guest RAM.
>>>
>>> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
>>> ---
>>> v5:
>>>   * move computation 'size -= slot_size' inside of loop body
>>>           (David Hildenbrand <david@redhat.com>)
>>> v4:
>>>   * fix compilation issue
>>>           (Christian Borntraeger <borntraeger@de.ibm.com>)
>>>   * advance HVA along with GPA in kvm_set_phys_mem()
>>>           (Christian Borntraeger <borntraeger@de.ibm.com>)
>>>
>>> patch prepares only KVM side for switching to single RAM memory region
>>> another patch will take care of  dropping manual RAM partitioning in
>>> s390 code.  
>>
>> I may have lost track a bit -- what is the status of this patch (and
>> the series)?
> 
> Christian,
> 
> could you test it on a host that have sufficient amount of RAM?


This version looks good. I was able to start a 9TB guest.
[pid 215723] ioctl(10, KVM_SET_USER_MEMORY_REGION, {slot=0, flags=0, guest_phys_addr=0, memory_size=8796091973632, userspace_addr=0x3ffee700000}) = 0
[pid 215723] ioctl(10, KVM_SET_USER_MEMORY_REGION, {slot=1, flags=0, guest_phys_addr=0x7fffff00000, memory_size=1099512676352, userspace_addr=0xbffee600000}) = 0

The only question is if we want to fix the weird alignment (0x7fffff00000) when
we already add a migration barrier for uber-large guests.
Maybe we could split at 4TB to avoid future problem with larger page sizes?

Re: [Qemu-devel] [PATCH for-4.2 v5 1/2] kvm: s390: split too big memory section on several memslots

Posted by Igor Mammedov 6 years, 5 months ago

On Thu, 29 Aug 2019 08:47:49 +0200
Christian Borntraeger <borntraeger@de.ibm.com> wrote:

> On 27.08.19 14:56, Igor Mammedov wrote:
> > On Tue, 20 Aug 2019 18:07:27 +0200
> > Cornelia Huck <cohuck@redhat.com> wrote:
> >   
> >> On Wed,  7 Aug 2019 11:32:41 -0400
> >> Igor Mammedov <imammedo@redhat.com> wrote:
> >>  
> >>> Max memslot size supported by kvm on s390 is 8Tb,
> >>> move logic of splitting RAM in chunks upto 8T to KVM code.
> >>>
> >>> This way it will hide KVM specific restrictions in KVM code
> >>> and won't affect baord level design decisions. Which would allow
> >>> us to avoid misusing memory_region_allocate_system_memory() API
> >>> and eventually use a single hostmem backend for guest RAM.
> >>>
> >>> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> >>> ---
> >>> v5:
> >>>   * move computation 'size -= slot_size' inside of loop body
> >>>           (David Hildenbrand <david@redhat.com>)
> >>> v4:
> >>>   * fix compilation issue
> >>>           (Christian Borntraeger <borntraeger@de.ibm.com>)
> >>>   * advance HVA along with GPA in kvm_set_phys_mem()
> >>>           (Christian Borntraeger <borntraeger@de.ibm.com>)
> >>>
> >>> patch prepares only KVM side for switching to single RAM memory region
> >>> another patch will take care of  dropping manual RAM partitioning in
> >>> s390 code.    
> >>
> >> I may have lost track a bit -- what is the status of this patch (and
> >> the series)?  
> > 
> > Christian,
> > 
> > could you test it on a host that have sufficient amount of RAM?  
> 
> 
> This version looks good. I was able to start a 9TB guest.
> [pid 215723] ioctl(10, KVM_SET_USER_MEMORY_REGION, {slot=0, flags=0, guest_phys_addr=0, memory_size=8796091973632, userspace_addr=0x3ffee700000}) = 0
> [pid 215723] ioctl(10, KVM_SET_USER_MEMORY_REGION, {slot=1, flags=0, guest_phys_addr=0x7fffff00000, memory_size=1099512676352, userspace_addr=0xbffee600000}) = 0
>
> The only question is if we want to fix the weird alignment (0x7fffff00000) when
> we already add a migration barrier for uber-large guests.
> Maybe we could split at 4TB to avoid future problem with larger page sizes?
That probably should be a separate patch on top.