[Qemu-devel] [PATCH v4] rcu: reduce more than 7MB heap memory by malloc_trim()

Yang Zhong posted 1 patch 6 years, 4 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/1513775806-19779-1-git-send-email-yang.zhong@intel.com
Test checkpatch passed
Test docker passed
Test ppc passed
Test s390x passed
configure  | 35 +++++++++++++++++++++++++++++++++++
util/rcu.c |  6 ++++++
2 files changed, 41 insertions(+)
[Qemu-devel] [PATCH v4] rcu: reduce more than 7MB heap memory by malloc_trim()
Posted by Yang Zhong 6 years, 4 months ago
Since there are some issues in memory alloc/free machenism
in glibc for little chunk memory, if Qemu frequently
alloc/free little chunk memory, the glibc doesn't alloc
little chunk memory from free list of glibc and still
allocate from OS, which make the heap size bigger and bigger.

This patch introduce malloc_trim(), which will free heap
memory when there is no rcu call during rcu thread loop.
malloc_trim() can be enabled/disabled by --enable-malloc-trim/
--disable-malloc-trim in the Qemu configure command. The
default malloc_trim() is enabled for libc.

Below are test results from smaps file.
(1)without patch
55f0783e1000-55f07992a000 rw-p 00000000 00:00 0  [heap]
Size:              21796 kB
Rss:               14260 kB
Pss:               14260 kB

(2)with patch
55cc5fadf000-55cc61008000 rw-p 00000000 00:00 0  [heap]
Size:              21668 kB
Rss:                6940 kB
Pss:                6940 kB

Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
 configure  | 35 +++++++++++++++++++++++++++++++++++
 util/rcu.c |  6 ++++++
 2 files changed, 41 insertions(+)

diff --git a/configure b/configure
index 9c8aa5a..afdb1ef 100755
--- a/configure
+++ b/configure
@@ -426,6 +426,7 @@ vxhs=""
 supported_cpu="no"
 supported_os="no"
 bogus_os="no"
+malloc_trim=""
 
 # parse CC options first
 for opt do
@@ -1047,6 +1048,10 @@ for opt do
   ;;
   --enable-tcg) tcg="yes"
   ;;
+  --disable-malloc-trim) malloc_trim="no"
+  ;;
+  --enable-malloc-trim) malloc_trim="yes"
+  ;;
   --disable-spice) spice="no"
   ;;
   --enable-spice) spice="yes"
@@ -1466,6 +1471,7 @@ Advanced options (experts only):
                            Default:trace-<pid>
   --disable-slirp          disable SLIRP userspace network connectivity
   --enable-tcg-interpreter enable TCG with bytecode interpreter (TCI)
+  --enable-malloc-trim     enable libc malloc_trim() for memory optimization
   --oss-lib                path to OSS library
   --cpu=CPU                Build for host CPU [$cpu]
   --with-coroutine=BACKEND coroutine backend. Supported options:
@@ -3860,6 +3866,30 @@ if test "$tcmalloc" = "yes" && test "$jemalloc" = "yes" ; then
     exit 1
 fi
 
+# Even if malloc_trim() is available, these non-libc memory allocators
+# do not support it.
+if test "$tcmalloc" = "yes" || test "$jemalloc" = "yes" ; then
+    if test "$malloc_trim" = "yes" ; then
+        echo "Disabling malloc_trim with non-libc memory allocator"
+    fi
+    malloc_trim="no"
+fi
+
+#######################################
+# malloc_trim
+
+if test "$malloc_trim" != "no" ; then
+    cat > $TMPC << EOF
+#include <malloc.h>
+int main(void) { malloc_trim(0); return 0; }
+EOF
+    if compile_prog "" "" ; then
+        malloc_trim="yes"
+    else
+        malloc_trim="no"
+    fi
+fi
+
 ##########################################
 # tcmalloc probe
 
@@ -5505,6 +5535,7 @@ if test "$tcg" = "yes" ; then
     echo "TCG debug enabled $debug_tcg"
     echo "TCG interpreter   $tcg_interpreter"
 fi
+echo "malloc trim support $malloc_trim"
 echo "RDMA support      $rdma"
 echo "fdt support       $fdt"
 echo "preadv support    $preadv"
@@ -6015,6 +6046,10 @@ if test "$opengl" = "yes" ; then
   fi
 fi
 
+if test "$malloc_trim" = "yes" ; then
+  echo "CONFIG_MALLOC_TRIM=y" >> $config_host_mak
+fi
+
 if test "$avx2_opt" = "yes" ; then
   echo "CONFIG_AVX2_OPT=y" >> $config_host_mak
 fi
diff --git a/util/rcu.c b/util/rcu.c
index ca5a63e..f4d09c8 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -32,6 +32,9 @@
 #include "qemu/atomic.h"
 #include "qemu/thread.h"
 #include "qemu/main-loop.h"
+#if defined(CONFIG_MALLOC_TRIM)
+#include <malloc.h>
+#endif
 
 /*
  * Global grace period counter.  Bit 0 is always one in rcu_gp_ctr.
@@ -246,6 +249,9 @@ static void *call_rcu_thread(void *opaque)
                 qemu_event_reset(&rcu_call_ready_event);
                 n = atomic_read(&rcu_call_count);
                 if (n == 0) {
+#if defined(CONFIG_MALLOC_TRIM)
+                    malloc_trim(4 * 1024 * 1024);
+#endif
                     qemu_event_wait(&rcu_call_ready_event);
                 }
             }
-- 
1.9.1


Re: [Qemu-devel] [PATCH v4] rcu: reduce more than 7MB heap memory by malloc_trim()
Posted by Stefan Hajnoczi 6 years, 4 months ago
On Wed, Dec 20, 2017 at 09:16:46PM +0800, Yang Zhong wrote:
> Since there are some issues in memory alloc/free machenism
> in glibc for little chunk memory, if Qemu frequently
> alloc/free little chunk memory, the glibc doesn't alloc
> little chunk memory from free list of glibc and still
> allocate from OS, which make the heap size bigger and bigger.
> 
> This patch introduce malloc_trim(), which will free heap
> memory when there is no rcu call during rcu thread loop.
> malloc_trim() can be enabled/disabled by --enable-malloc-trim/
> --disable-malloc-trim in the Qemu configure command. The
> default malloc_trim() is enabled for libc.
> 
> Below are test results from smaps file.
> (1)without patch
> 55f0783e1000-55f07992a000 rw-p 00000000 00:00 0  [heap]
> Size:              21796 kB
> Rss:               14260 kB
> Pss:               14260 kB
> 
> (2)with patch
> 55cc5fadf000-55cc61008000 rw-p 00000000 00:00 0  [heap]
> Size:              21668 kB
> Rss:                6940 kB
> Pss:                6940 kB
> 
> Signed-off-by: Yang Zhong <yang.zhong@intel.com>
> ---
>  configure  | 35 +++++++++++++++++++++++++++++++++++
>  util/rcu.c |  6 ++++++
>  2 files changed, 41 insertions(+)

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>