From nobody Tue Apr 30 06:24:56 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zoho.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1492076644021622.5383148362066; Thu, 13 Apr 2017 02:44:04 -0700 (PDT) Received: from localhost ([::1]:48290 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1cybIJ-00036T-0f for importer@patchew.org; Thu, 13 Apr 2017 05:44:03 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:40092) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1cybEW-0008UK-Bv for qemu-devel@nongnu.org; Thu, 13 Apr 2017 05:40:09 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1cybES-0008IU-Te for qemu-devel@nongnu.org; Thu, 13 Apr 2017 05:40:08 -0400 Received: from mga02.intel.com ([134.134.136.20]:11319) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1cybES-0008Hg-M8 for qemu-devel@nongnu.org; Thu, 13 Apr 2017 05:40:04 -0400 Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga101.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 13 Apr 2017 02:40:03 -0700 Received: from devel-ww.sh.intel.com ([10.239.48.105]) by orsmga004.jf.intel.com with ESMTP; 13 Apr 2017 02:39:58 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.37,194,1488873600"; d="scan'208";a="76894362" From: Wei Wang To: virtio-dev@lists.oasis-open.org, linux-kernel@vger.kernel.org, qemu-devel@nongnu.org, virtualization@lists.linux-foundation.org, kvm@vger.kernel.org, linux-mm@kvack.org, mst@redhat.com, david@redhat.com, dave.hansen@intel.com, cornelia.huck@de.ibm.com, akpm@linux-foundation.org, mgorman@techsingularity.net, aarcange@redhat.com, amit.shah@redhat.com, pbonzini@redhat.com, wei.w.wang@intel.com, liliang.opensource@gmail.com Date: Thu, 13 Apr 2017 17:35:04 +0800 Message-Id: <1492076108-117229-2-git-send-email-wei.w.wang@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1492076108-117229-1-git-send-email-wei.w.wang@intel.com> References: <1492076108-117229-1-git-send-email-wei.w.wang@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 134.134.136.20 Subject: [Qemu-devel] [PATCH v9 1/5] virtio-balloon: deflate via a page list X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" From: Liang Li This patch saves the deflated pages to a list, instead of the PFN array. Accordingly, the balloon_pfn_to_page() function is removed. Signed-off-by: Liang Li Signed-off-by: Michael S. Tsirkin Signed-off-by: Wei Wang --- drivers/virtio/virtio_balloon.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloo= n.c index 181793f..f59cb4f 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -103,12 +103,6 @@ static u32 page_to_balloon_pfn(struct page *page) return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE; } =20 -static struct page *balloon_pfn_to_page(u32 pfn) -{ - BUG_ON(pfn % VIRTIO_BALLOON_PAGES_PER_PAGE); - return pfn_to_page(pfn / VIRTIO_BALLOON_PAGES_PER_PAGE); -} - static void balloon_ack(struct virtqueue *vq) { struct virtio_balloon *vb =3D vq->vdev->priv; @@ -181,18 +175,16 @@ static unsigned fill_balloon(struct virtio_balloon *v= b, size_t num) return num_allocated_pages; } =20 -static void release_pages_balloon(struct virtio_balloon *vb) +static void release_pages_balloon(struct virtio_balloon *vb, + struct list_head *pages) { - unsigned int i; - struct page *page; + struct page *page, *next; =20 - /* Find pfns pointing at start of each page, get pages and free them. */ - for (i =3D 0; i < vb->num_pfns; i +=3D VIRTIO_BALLOON_PAGES_PER_PAGE) { - page =3D balloon_pfn_to_page(virtio32_to_cpu(vb->vdev, - vb->pfns[i])); + list_for_each_entry_safe(page, next, pages, lru) { if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) adjust_managed_page_count(page, 1); + list_del(&page->lru); put_page(page); /* balloon reference */ } } @@ -202,6 +194,7 @@ static unsigned leak_balloon(struct virtio_balloon *vb,= size_t num) unsigned num_freed_pages; struct page *page; struct balloon_dev_info *vb_dev_info =3D &vb->vb_dev_info; + LIST_HEAD(pages); =20 /* We can only do one array worth at a time. */ num =3D min(num, ARRAY_SIZE(vb->pfns)); @@ -215,6 +208,7 @@ static unsigned leak_balloon(struct virtio_balloon *vb,= size_t num) if (!page) break; set_page_pfns(vb, vb->pfns + vb->num_pfns, page); + list_add(&page->lru, &pages); vb->num_pages -=3D VIRTIO_BALLOON_PAGES_PER_PAGE; } =20 @@ -226,7 +220,7 @@ static unsigned leak_balloon(struct virtio_balloon *vb,= size_t num) */ if (vb->num_pfns !=3D 0) tell_host(vb, vb->deflate_vq); - release_pages_balloon(vb); + release_pages_balloon(vb, &pages); mutex_unlock(&vb->balloon_lock); return num_freed_pages; } --=20 2.7.4 From nobody Tue Apr 30 06:24:56 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zoho.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1492076515576683.919624148003; Thu, 13 Apr 2017 02:41:55 -0700 (PDT) Received: from localhost ([::1]:48279 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1cybGD-0001AJ-NK for importer@patchew.org; Thu, 13 Apr 2017 05:41:53 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:40115) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1cybEZ-00006V-4X for qemu-devel@nongnu.org; Thu, 13 Apr 2017 05:40:13 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1cybEW-0008NX-TW for qemu-devel@nongnu.org; Thu, 13 Apr 2017 05:40:11 -0400 Received: from mga02.intel.com ([134.134.136.20]:11319) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1cybEW-0008Hg-H0 for qemu-devel@nongnu.org; Thu, 13 Apr 2017 05:40:08 -0400 Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga101.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 13 Apr 2017 02:40:08 -0700 Received: from devel-ww.sh.intel.com ([10.239.48.105]) by orsmga004.jf.intel.com with ESMTP; 13 Apr 2017 02:40:03 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.37,194,1488873600"; d="scan'208";a="76894422" From: Wei Wang To: virtio-dev@lists.oasis-open.org, linux-kernel@vger.kernel.org, qemu-devel@nongnu.org, virtualization@lists.linux-foundation.org, kvm@vger.kernel.org, linux-mm@kvack.org, mst@redhat.com, david@redhat.com, dave.hansen@intel.com, cornelia.huck@de.ibm.com, akpm@linux-foundation.org, mgorman@techsingularity.net, aarcange@redhat.com, amit.shah@redhat.com, pbonzini@redhat.com, wei.w.wang@intel.com, liliang.opensource@gmail.com Date: Thu, 13 Apr 2017 17:35:05 +0800 Message-Id: <1492076108-117229-3-git-send-email-wei.w.wang@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1492076108-117229-1-git-send-email-wei.w.wang@intel.com> References: <1492076108-117229-1-git-send-email-wei.w.wang@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 134.134.136.20 Subject: [Qemu-devel] [PATCH v9 2/5] virtio-balloon: VIRTIO_BALLOON_F_BALLOON_CHUNKS X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Add a new feature, VIRTIO_BALLOON_F_BALLOON_CHUNKS, which enables the transfer of the ballooned (i.e. inflated/deflated) pages in chunks to the host. The implementation of the previous virtio-balloon is not very efficient, because the ballooned pages are transferred to the host one by one. Here is the breakdown of the time in percentage spent on each step of the balloon inflating process (inflating 7GB of an 8GB idle guest). 1) allocating pages (6.5%) 2) sending PFNs to host (68.3%) 3) address translation (6.1%) 4) madvise (19%) It takes about 4126ms for the inflating process to complete. The above profiling shows that the bottlenecks are stage 2) and stage 4). This patch optimizes step 2) by transferring pages to the host in chunks. A chunk consists of guest physically continuous pages, and it is offered to the host via a base PFN (i.e. the start PFN of those physically continuous pages) and the size (i.e. the total number of the pages). A chunk is formated as below: Suggested-by: Michael S. Tsirkin -------------------------------------------------------- | Base (52 bit) | Rsvd (12 bit) | -------------------------------------------------------- -------------------------------------------------------- | Size (52 bit) | Rsvd (12 bit) | -------------------------------------------------------- By doing so, step 4) can also be optimized by doing address translation and madvise() in chunks rather than page by page. With this new feature, the above ballooning process takes ~590ms resulting in an improvement of ~85%. TODO: optimize stage 1) by allocating/freeing a chunk of pages instead of a single page each time. Signed-off-by: Wei Wang Signed-off-by: Liang Li Suggested-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 384 ++++++++++++++++++++++++++++++++= +--- include/uapi/linux/virtio_balloon.h | 13 ++ 2 files changed, 374 insertions(+), 23 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloo= n.c index f59cb4f..5e2e7cc 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -42,6 +42,10 @@ #define OOM_VBALLOON_DEFAULT_PAGES 256 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80 =20 +#define PAGE_BMAP_SIZE (8 * PAGE_SIZE) +#define PFNS_PER_PAGE_BMAP (PAGE_BMAP_SIZE * BITS_PER_BYTE) +#define PAGE_BMAP_COUNT_MAX 32 + static int oom_pages =3D OOM_VBALLOON_DEFAULT_PAGES; module_param(oom_pages, int, S_IRUSR | S_IWUSR); MODULE_PARM_DESC(oom_pages, "pages to free on OOM"); @@ -50,6 +54,10 @@ MODULE_PARM_DESC(oom_pages, "pages to free on OOM"); static struct vfsmount *balloon_mnt; #endif =20 +/* Types of pages to chunk */ +#define PAGE_CHUNK_TYPE_BALLOON 0 + +#define MAX_PAGE_CHUNKS 4096 struct virtio_balloon { struct virtio_device *vdev; struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; @@ -78,6 +86,32 @@ struct virtio_balloon { /* Synchronize access/update to this struct virtio_balloon elements */ struct mutex balloon_lock; =20 + /* + * Buffer for PAGE_CHUNK_TYPE_BALLOON: + * virtio_balloon_page_chunk_hdr + + * virtio_balloon_page_chunk * MAX_PAGE_CHUNKS + */ + struct virtio_balloon_page_chunk_hdr *balloon_page_chunk_hdr; + struct virtio_balloon_page_chunk *balloon_page_chunk; + + /* Bitmap used to record pages */ + unsigned long *page_bmap[PAGE_BMAP_COUNT_MAX]; + /* Number of the allocated page_bmap */ + unsigned int page_bmaps; + + /* + * The allocated page_bmap size may be smaller than the pfn range of + * the ballooned pages. In this case, we need to use the page_bmap + * multiple times to cover the entire pfn range. It's like using a + * short ruler several times to finish measuring a long object. + * The start location of the ruler in the next measurement is the end + * location of the ruler in the previous measurement. + * + * pfn_max & pfn_min: forms the pfn range of the ballooned pages + * pfn_start & pfn_stop: records the start and stop pfn in each cover + */ + unsigned long pfn_min, pfn_max, pfn_start, pfn_stop; + /* The array of pfns we tell the Host about. */ unsigned int num_pfns; __virtio32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX]; @@ -110,20 +144,201 @@ static void balloon_ack(struct virtqueue *vq) wake_up(&vb->acked); } =20 -static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) +static inline void init_page_bmap_range(struct virtio_balloon *vb) +{ + vb->pfn_min =3D ULONG_MAX; + vb->pfn_max =3D 0; +} + +static inline void update_page_bmap_range(struct virtio_balloon *vb, + struct page *page) +{ + unsigned long balloon_pfn =3D page_to_balloon_pfn(page); + + vb->pfn_min =3D min(balloon_pfn, vb->pfn_min); + vb->pfn_max =3D max(balloon_pfn, vb->pfn_max); +} + +/* The page_bmap size is extended by adding more number of page_bmap */ +static void extend_page_bmap_size(struct virtio_balloon *vb, + unsigned long pfns) +{ + int i, bmaps; + unsigned long bmap_len; + + bmap_len =3D ALIGN(pfns, BITS_PER_LONG) / BITS_PER_BYTE; + bmap_len =3D ALIGN(bmap_len, PAGE_BMAP_SIZE); + bmaps =3D min((int)(bmap_len / PAGE_BMAP_SIZE), + PAGE_BMAP_COUNT_MAX); + + for (i =3D 1; i < bmaps; i++) { + vb->page_bmap[i] =3D kmalloc(PAGE_BMAP_SIZE, GFP_KERNEL); + if (vb->page_bmap[i]) + vb->page_bmaps++; + else + break; + } +} + +static void free_extended_page_bmap(struct virtio_balloon *vb) +{ + int i, bmaps =3D vb->page_bmaps; + + for (i =3D 1; i < bmaps; i++) { + kfree(vb->page_bmap[i]); + vb->page_bmap[i] =3D NULL; + vb->page_bmaps--; + } +} + +static void free_page_bmap(struct virtio_balloon *vb) +{ + int i; + + for (i =3D 0; i < vb->page_bmaps; i++) + kfree(vb->page_bmap[i]); +} + +static void clear_page_bmap(struct virtio_balloon *vb) +{ + int i; + + for (i =3D 0; i < vb->page_bmaps; i++) + memset(vb->page_bmap[i], 0, PAGE_BMAP_SIZE); +} + +static void send_page_chunks(struct virtio_balloon *vb, struct virtqueue *= vq, + int type, bool busy_wait) { struct scatterlist sg; + struct virtio_balloon_page_chunk_hdr *hdr; + void *buf; unsigned int len; =20 - sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns); + switch (type) { + case PAGE_CHUNK_TYPE_BALLOON: + hdr =3D vb->balloon_page_chunk_hdr; + len =3D 0; + break; + default: + dev_warn(&vb->vdev->dev, "%s: chunk %d of unknown pages\n", + __func__, type); + return; + } =20 - /* We should always be able to add one buffer to an empty queue. */ - virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); - virtqueue_kick(vq); + buf =3D (void *)hdr - len; + len +=3D sizeof(struct virtio_balloon_page_chunk_hdr); + len +=3D hdr->chunks * sizeof(struct virtio_balloon_page_chunk); + sg_init_table(&sg, 1); + sg_set_buf(&sg, buf, len); + if (!virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL)) { + virtqueue_kick(vq); + if (busy_wait) + while (!virtqueue_get_buf(vq, &len) && + !virtqueue_is_broken(vq)) + cpu_relax(); + else + wait_event(vb->acked, virtqueue_get_buf(vq, &len)); + hdr->chunks =3D 0; + } +} + +static void add_one_chunk(struct virtio_balloon *vb, struct virtqueue *vq, + int type, u64 base, u64 size) +{ + struct virtio_balloon_page_chunk_hdr *hdr; + struct virtio_balloon_page_chunk *chunk; + + switch (type) { + case PAGE_CHUNK_TYPE_BALLOON: + hdr =3D vb->balloon_page_chunk_hdr; + chunk =3D vb->balloon_page_chunk; + break; + default: + dev_warn(&vb->vdev->dev, "%s: chunk %d of unknown pages\n", + __func__, type); + return; + } + chunk =3D chunk + hdr->chunks; + chunk->base =3D cpu_to_le64(base << VIRTIO_BALLOON_CHUNK_BASE_SHIFT); + chunk->size =3D cpu_to_le64(size << VIRTIO_BALLOON_CHUNK_SIZE_SHIFT); + hdr->chunks++; + if (hdr->chunks =3D=3D MAX_PAGE_CHUNKS) + send_page_chunks(vb, vq, type, false); +} + +static void chunking_pages_from_bmap(struct virtio_balloon *vb, + struct virtqueue *vq, + unsigned long pfn_start, + unsigned long *bmap, + unsigned long len) +{ + unsigned long pos =3D 0, end =3D len * BITS_PER_BYTE; + + while (pos < end) { + unsigned long one =3D find_next_bit(bmap, end, pos); + + if (one < end) { + unsigned long chunk_size, zero; + + zero =3D find_next_zero_bit(bmap, end, one + 1); + if (zero >=3D end) + chunk_size =3D end - one; + else + chunk_size =3D zero - one; + + if (chunk_size) + add_one_chunk(vb, vq, PAGE_CHUNK_TYPE_BALLOON, + pfn_start + one, chunk_size); + pos =3D one + chunk_size; + } else + break; + } +} + +static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) +{ + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_BALLOON_CHUNKS)) { + int pfns, page_bmaps, i; + unsigned long pfn_start, pfns_len; + + pfn_start =3D vb->pfn_start; + pfns =3D vb->pfn_stop - pfn_start + 1; + pfns =3D roundup(roundup(pfns, BITS_PER_LONG), + PFNS_PER_PAGE_BMAP); + page_bmaps =3D pfns / PFNS_PER_PAGE_BMAP; + pfns_len =3D pfns / BITS_PER_BYTE; + + for (i =3D 0; i < page_bmaps; i++) { + unsigned int bmap_len =3D PAGE_BMAP_SIZE; + + /* The last one takes the leftover only */ + if (i + 1 =3D=3D page_bmaps) + bmap_len =3D pfns_len - PAGE_BMAP_SIZE * i; + + chunking_pages_from_bmap(vb, vq, pfn_start + + i * PFNS_PER_PAGE_BMAP, + vb->page_bmap[i], bmap_len); + } + if (vb->balloon_page_chunk_hdr->chunks > 0) + send_page_chunks(vb, vq, PAGE_CHUNK_TYPE_BALLOON, + false); + } else { + struct scatterlist sg; + unsigned int len; =20 - /* When host has read buffer, this completes via balloon_ack */ - wait_event(vb->acked, virtqueue_get_buf(vq, &len)); + sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns); =20 + /* + * We should always be able to add one buffer to an empty + * queue. + */ + virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); + virtqueue_kick(vq); + + /* When host has read buffer, this completes via balloon_ack */ + wait_event(vb->acked, virtqueue_get_buf(vq, &len)); + } } =20 static void set_page_pfns(struct virtio_balloon *vb, @@ -131,20 +346,73 @@ static void set_page_pfns(struct virtio_balloon *vb, { unsigned int i; =20 - /* Set balloon pfns pointing at this page. - * Note that the first pfn points at start of the page. */ + /* + * Set balloon pfns pointing at this page. + * Note that the first pfn points at start of the page. + */ for (i =3D 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++) pfns[i] =3D cpu_to_virtio32(vb->vdev, page_to_balloon_pfn(page) + i); } =20 +static void set_page_bmap(struct virtio_balloon *vb, + struct list_head *pages, struct virtqueue *vq) +{ + unsigned long pfn_start, pfn_stop; + struct page *page; + bool found; + + vb->pfn_min =3D rounddown(vb->pfn_min, BITS_PER_LONG); + vb->pfn_max =3D roundup(vb->pfn_max, BITS_PER_LONG); + + extend_page_bmap_size(vb, vb->pfn_max - vb->pfn_min + 1); + pfn_start =3D vb->pfn_min; + + while (pfn_start < vb->pfn_max) { + pfn_stop =3D pfn_start + PFNS_PER_PAGE_BMAP * vb->page_bmaps; + pfn_stop =3D pfn_stop < vb->pfn_max ? pfn_stop : vb->pfn_max; + + vb->pfn_start =3D pfn_start; + clear_page_bmap(vb); + found =3D false; + + list_for_each_entry(page, pages, lru) { + unsigned long bmap_idx, bmap_pos, balloon_pfn; + + balloon_pfn =3D page_to_balloon_pfn(page); + if (balloon_pfn < pfn_start || balloon_pfn > pfn_stop) + continue; + bmap_idx =3D (balloon_pfn - pfn_start) / + PFNS_PER_PAGE_BMAP; + bmap_pos =3D (balloon_pfn - pfn_start) % + PFNS_PER_PAGE_BMAP; + set_bit(bmap_pos, vb->page_bmap[bmap_idx]); + + found =3D true; + } + if (found) { + vb->pfn_stop =3D pfn_stop; + tell_host(vb, vq); + } + pfn_start =3D pfn_stop; + } + free_extended_page_bmap(vb); +} + static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) { struct balloon_dev_info *vb_dev_info =3D &vb->vb_dev_info; unsigned num_allocated_pages; + bool chunking =3D virtio_has_feature(vb->vdev, + VIRTIO_BALLOON_F_BALLOON_CHUNKS); =20 /* We can only do one array worth at a time. */ - num =3D min(num, ARRAY_SIZE(vb->pfns)); + if (chunking) { + init_page_bmap_range(vb); + } else { + /* We can only do one array worth at a time. */ + num =3D min(num, ARRAY_SIZE(vb->pfns)); + } =20 mutex_lock(&vb->balloon_lock); for (vb->num_pfns =3D 0; vb->num_pfns < num; @@ -159,7 +427,10 @@ static unsigned fill_balloon(struct virtio_balloon *vb= , size_t num) msleep(200); break; } - set_page_pfns(vb, vb->pfns + vb->num_pfns, page); + if (chunking) + update_page_bmap_range(vb, page); + else + set_page_pfns(vb, vb->pfns + vb->num_pfns, page); vb->num_pages +=3D VIRTIO_BALLOON_PAGES_PER_PAGE; if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) @@ -168,8 +439,13 @@ static unsigned fill_balloon(struct virtio_balloon *vb= , size_t num) =20 num_allocated_pages =3D vb->num_pfns; /* Did we get any? */ - if (vb->num_pfns !=3D 0) - tell_host(vb, vb->inflate_vq); + if (vb->num_pfns !=3D 0) { + if (chunking) + set_page_bmap(vb, &vb_dev_info->pages, + vb->inflate_vq); + else + tell_host(vb, vb->inflate_vq); + } mutex_unlock(&vb->balloon_lock); =20 return num_allocated_pages; @@ -195,6 +471,13 @@ static unsigned leak_balloon(struct virtio_balloon *vb= , size_t num) struct page *page; struct balloon_dev_info *vb_dev_info =3D &vb->vb_dev_info; LIST_HEAD(pages); + bool chunking =3D virtio_has_feature(vb->vdev, + VIRTIO_BALLOON_F_BALLOON_CHUNKS); + if (chunking) + init_page_bmap_range(vb); + else + /* We can only do one array worth at a time. */ + num =3D min(num, ARRAY_SIZE(vb->pfns)); =20 /* We can only do one array worth at a time. */ num =3D min(num, ARRAY_SIZE(vb->pfns)); @@ -208,6 +491,10 @@ static unsigned leak_balloon(struct virtio_balloon *vb= , size_t num) if (!page) break; set_page_pfns(vb, vb->pfns + vb->num_pfns, page); + if (chunking) + update_page_bmap_range(vb, page); + else + set_page_pfns(vb, vb->pfns + vb->num_pfns, page); list_add(&page->lru, &pages); vb->num_pages -=3D VIRTIO_BALLOON_PAGES_PER_PAGE; } @@ -218,8 +505,12 @@ static unsigned leak_balloon(struct virtio_balloon *vb= , size_t num) * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); * is true, we *have* to do it in this order */ - if (vb->num_pfns !=3D 0) - tell_host(vb, vb->deflate_vq); + if (vb->num_pfns !=3D 0) { + if (chunking) + set_page_bmap(vb, &pages, vb->deflate_vq); + else + tell_host(vb, vb->deflate_vq); + } release_pages_balloon(vb, &pages); mutex_unlock(&vb->balloon_lock); return num_freed_pages; @@ -431,6 +722,13 @@ static int init_vqs(struct virtio_balloon *vb) } =20 #ifdef CONFIG_BALLOON_COMPACTION + +static void tell_host_one_page(struct virtio_balloon *vb, + struct virtqueue *vq, struct page *page) +{ + add_one_chunk(vb, vq, PAGE_CHUNK_TYPE_BALLOON, page_to_pfn(page), 1); +} + /* * virtballoon_migratepage - perform the balloon page migration on behalf = of * a compation thread. (called under page lock) @@ -454,6 +752,8 @@ static int virtballoon_migratepage(struct balloon_dev_i= nfo *vb_dev_info, { struct virtio_balloon *vb =3D container_of(vb_dev_info, struct virtio_balloon, vb_dev_info); + bool chunking =3D virtio_has_feature(vb->vdev, + VIRTIO_BALLOON_F_BALLOON_CHUNKS); unsigned long flags; =20 /* @@ -475,16 +775,22 @@ static int virtballoon_migratepage(struct balloon_dev= _info *vb_dev_info, vb_dev_info->isolated_pages--; __count_vm_event(BALLOON_MIGRATE); spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); - vb->num_pfns =3D VIRTIO_BALLOON_PAGES_PER_PAGE; - set_page_pfns(vb, vb->pfns, newpage); - tell_host(vb, vb->inflate_vq); - + if (chunking) { + tell_host_one_page(vb, vb->inflate_vq, newpage); + } else { + vb->num_pfns =3D VIRTIO_BALLOON_PAGES_PER_PAGE; + set_page_pfns(vb, vb->pfns, newpage); + tell_host(vb, vb->inflate_vq); + } /* balloon's page migration 2nd step -- deflate "page" */ balloon_page_delete(page); - vb->num_pfns =3D VIRTIO_BALLOON_PAGES_PER_PAGE; - set_page_pfns(vb, vb->pfns, page); - tell_host(vb, vb->deflate_vq); - + if (chunking) { + tell_host_one_page(vb, vb->deflate_vq, page); + } else { + vb->num_pfns =3D VIRTIO_BALLOON_PAGES_PER_PAGE; + set_page_pfns(vb, vb->pfns, page); + tell_host(vb, vb->deflate_vq); + } mutex_unlock(&vb->balloon_lock); =20 put_page(page); /* balloon reference */ @@ -511,6 +817,32 @@ static struct file_system_type balloon_fs =3D { =20 #endif /* CONFIG_BALLOON_COMPACTION */ =20 +static void balloon_page_chunk_init(struct virtio_balloon *vb) +{ + void *buf; + + /* + * By default, we allocate page_bmap[0] only. More page_bmap will be + * allocated on demand. + */ + vb->page_bmap[0] =3D kmalloc(PAGE_BMAP_SIZE, GFP_KERNEL); + buf =3D kmalloc(sizeof(struct virtio_balloon_page_chunk_hdr) + + sizeof(struct virtio_balloon_page_chunk) * + MAX_PAGE_CHUNKS, GFP_KERNEL); + if (!vb->page_bmap[0] || !buf) { + __virtio_clear_bit(vb->vdev, VIRTIO_BALLOON_F_BALLOON_CHUNKS); + kfree(vb->page_bmap[0]); + kfree(vb->balloon_page_chunk_hdr); + dev_warn(&vb->vdev->dev, "%s: failed\n", __func__); + } else { + vb->page_bmaps =3D 1; + vb->balloon_page_chunk_hdr =3D buf; + vb->balloon_page_chunk_hdr->chunks =3D 0; + vb->balloon_page_chunk =3D buf + + sizeof(struct virtio_balloon_page_chunk_hdr); + } +} + static int virtballoon_probe(struct virtio_device *vdev) { struct virtio_balloon *vb; @@ -533,6 +865,10 @@ static int virtballoon_probe(struct virtio_device *vde= v) spin_lock_init(&vb->stop_update_lock); vb->stop_update =3D false; vb->num_pages =3D 0; + + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_BALLOON_CHUNKS)) + balloon_page_chunk_init(vb); + mutex_init(&vb->balloon_lock); init_waitqueue_head(&vb->acked); vb->vdev =3D vdev; @@ -609,6 +945,7 @@ static void virtballoon_remove(struct virtio_device *vd= ev) cancel_work_sync(&vb->update_balloon_stats_work); =20 remove_common(vb); + free_page_bmap(vb); if (vb->vb_dev_info.inode) iput(vb->vb_dev_info.inode); kfree(vb); @@ -649,6 +986,7 @@ static unsigned int features[] =3D { VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ, VIRTIO_BALLOON_F_DEFLATE_ON_OOM, + VIRTIO_BALLOON_F_BALLOON_CHUNKS, }; =20 static struct virtio_driver virtio_balloon_driver =3D { diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virti= o_balloon.h index 343d7dd..be317b7 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -34,6 +34,7 @@ #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages = */ #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ +#define VIRTIO_BALLOON_F_BALLOON_CHUNKS 3 /* Inflate/Deflate pages in chun= ks */ =20 /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 @@ -82,4 +83,16 @@ struct virtio_balloon_stat { __virtio64 val; } __attribute__((packed)); =20 +struct virtio_balloon_page_chunk_hdr { + /* Number of chunks in the payload */ + __le32 chunks; +}; + +#define VIRTIO_BALLOON_CHUNK_BASE_SHIFT 12 +#define VIRTIO_BALLOON_CHUNK_SIZE_SHIFT 12 +struct virtio_balloon_page_chunk { + __le64 base; + __le64 size; +}; + #endif /* _LINUX_VIRTIO_BALLOON_H */ --=20 2.7.4 From nobody Tue Apr 30 06:24:56 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zoho.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 149207677296962.04714997267217; Thu, 13 Apr 2017 02:46:12 -0700 (PDT) Received: from localhost ([::1]:48311 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1cybKN-0004xp-Jg for importer@patchew.org; Thu, 13 Apr 2017 05:46:11 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:40290) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1cybF2-0000Sq-Ks for qemu-devel@nongnu.org; Thu, 13 Apr 2017 05:40:42 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1cybEy-0000De-HP for qemu-devel@nongnu.org; Thu, 13 Apr 2017 05:40:40 -0400 Received: from mga02.intel.com ([134.134.136.20]:22809) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1cybEy-0000DT-7u for qemu-devel@nongnu.org; Thu, 13 Apr 2017 05:40:36 -0400 Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga101.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 13 Apr 2017 02:40:11 -0700 Received: from devel-ww.sh.intel.com ([10.239.48.105]) by orsmga004.jf.intel.com with ESMTP; 13 Apr 2017 02:40:08 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.37,194,1488873600"; d="scan'208";a="76894460" From: Wei Wang To: virtio-dev@lists.oasis-open.org, linux-kernel@vger.kernel.org, qemu-devel@nongnu.org, virtualization@lists.linux-foundation.org, kvm@vger.kernel.org, linux-mm@kvack.org, mst@redhat.com, david@redhat.com, dave.hansen@intel.com, cornelia.huck@de.ibm.com, akpm@linux-foundation.org, mgorman@techsingularity.net, aarcange@redhat.com, amit.shah@redhat.com, pbonzini@redhat.com, wei.w.wang@intel.com, liliang.opensource@gmail.com Date: Thu, 13 Apr 2017 17:35:06 +0800 Message-Id: <1492076108-117229-4-git-send-email-wei.w.wang@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1492076108-117229-1-git-send-email-wei.w.wang@intel.com> References: <1492076108-117229-1-git-send-email-wei.w.wang@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 134.134.136.20 Subject: [Qemu-devel] [PATCH v9 3/5] mm: function to offer a page block on the free list X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Add a function to find a page block on the free list specified by the caller. Pages from the page block may be used immediately after the function returns. The caller is responsible for detecting or preventing the use of such pages. Signed-off-by: Wei Wang Signed-off-by: Liang Li --- include/linux/mm.h | 3 ++ mm/page_alloc.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++= ++++ 2 files changed, 90 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index b84615b..096705e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1764,6 +1764,9 @@ extern void free_area_init(unsigned long * zones_size= ); extern void free_area_init_node(int nid, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); extern void free_initmem(void); +extern int inquire_unused_page_block(struct zone *zone, unsigned int order, + unsigned int migratetype, + struct page **page); =20 /* * Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f3e0c69..fa8203f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4498,6 +4498,93 @@ void show_free_areas(unsigned int filter) show_swap_cache_info(); } =20 +/** + * Heuristically get a page block in the system that is unused. + * It is possible that pages from the page block are used immediately after + * inquire_unused_page_block() returns. It is the caller's responsibility + * to either detect or prevent the use of such pages. + * + * The free list to check: zone->free_area[order].free_list[migratetype]. + * + * If the caller supplied page block (i.e. **page) is on the free list, of= fer + * the next page block on the list to the caller. Otherwise, offer the fir= st + * page block on the list. + * + * Return 0 when a page block is found on the caller specified free list. + */ +int inquire_unused_page_block(struct zone *zone, unsigned int order, + unsigned int migratetype, struct page **page) +{ + struct zone *this_zone; + struct list_head *this_list; + int ret =3D 0; + unsigned long flags; + + /* Sanity check */ + if (zone =3D=3D NULL || page =3D=3D NULL || order >=3D MAX_ORDER || + migratetype >=3D MIGRATE_TYPES) + return -EINVAL; + + /* Zone validity check */ + for_each_populated_zone(this_zone) { + if (zone =3D=3D this_zone) + break; + } + + /* Got a non-existent zone from the caller? */ + if (zone !=3D this_zone) + return -EINVAL; + + spin_lock_irqsave(&this_zone->lock, flags); + + this_list =3D &zone->free_area[order].free_list[migratetype]; + if (list_empty(this_list)) { + *page =3D NULL; + ret =3D 1; + goto out; + } + + /* The caller is asking for the first free page block on the list */ + if ((*page) =3D=3D NULL) { + *page =3D list_first_entry(this_list, struct page, lru); + ret =3D 0; + goto out; + } + + /** + * The page block passed from the caller is not on this free list + * anymore (e.g. a 1MB free page block has been split). In this case, + * offer the first page block on the free list that the caller is + * asking for. + */ + if (PageBuddy(*page) && order !=3D page_order(*page)) { + *page =3D list_first_entry(this_list, struct page, lru); + ret =3D 0; + goto out; + } + + /** + * The page block passed from the caller has been the last page block + * on the list. + */ + if ((*page)->lru.next =3D=3D this_list) { + *page =3D NULL; + ret =3D 1; + goto out; + } + + /** + * Finally, fall into the regular case: the page block passed from the + * caller is still on the free list. Offer the next one. + */ + *page =3D list_next_entry((*page), lru); + ret =3D 0; +out: + spin_unlock_irqrestore(&this_zone->lock, flags); + return ret; +} +EXPORT_SYMBOL(inquire_unused_page_block); + static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref) { zoneref->zone =3D zone; --=20 2.7.4 From nobody Tue Apr 30 06:24:56 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zoho.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1492076540572283.27243298383564; Thu, 13 Apr 2017 02:42:20 -0700 (PDT) Received: from localhost ([::1]:48280 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1cybGd-0001cg-HJ for importer@patchew.org; Thu, 13 Apr 2017 05:42:19 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:40292) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1cybF2-0000Sv-Nz for qemu-devel@nongnu.org; Thu, 13 Apr 2017 05:40:41 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1cybEz-0000Dk-5H for qemu-devel@nongnu.org; Thu, 13 Apr 2017 05:40:40 -0400 Received: from mga02.intel.com ([134.134.136.20]:22809) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1cybEy-0000DT-TC for qemu-devel@nongnu.org; Thu, 13 Apr 2017 05:40:37 -0400 Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga101.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 13 Apr 2017 02:40:14 -0700 Received: from devel-ww.sh.intel.com ([10.239.48.105]) by orsmga004.jf.intel.com with ESMTP; 13 Apr 2017 02:40:11 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.37,194,1488873600"; d="scan'208";a="76894488" From: Wei Wang To: virtio-dev@lists.oasis-open.org, linux-kernel@vger.kernel.org, qemu-devel@nongnu.org, virtualization@lists.linux-foundation.org, kvm@vger.kernel.org, linux-mm@kvack.org, mst@redhat.com, david@redhat.com, dave.hansen@intel.com, cornelia.huck@de.ibm.com, akpm@linux-foundation.org, mgorman@techsingularity.net, aarcange@redhat.com, amit.shah@redhat.com, pbonzini@redhat.com, wei.w.wang@intel.com, liliang.opensource@gmail.com Date: Thu, 13 Apr 2017 17:35:07 +0800 Message-Id: <1492076108-117229-5-git-send-email-wei.w.wang@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1492076108-117229-1-git-send-email-wei.w.wang@intel.com> References: <1492076108-117229-1-git-send-email-wei.w.wang@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 134.134.136.20 Subject: [Qemu-devel] [PATCH v9 4/5] mm: export symbol of next_zone and first_online_pgdat X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" This patch enables for_each_zone()/for_each_populated_zone() to be invoked by a kernel module. Signed-off-by: Wei Wang --- mm/mmzone.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/mmzone.c b/mm/mmzone.c index 5652be8..e14b7ec 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -13,6 +13,7 @@ struct pglist_data *first_online_pgdat(void) { return NODE_DATA(first_online_node); } +EXPORT_SYMBOL_GPL(first_online_pgdat); =20 struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) { @@ -41,6 +42,7 @@ struct zone *next_zone(struct zone *zone) } return zone; } +EXPORT_SYMBOL_GPL(next_zone); =20 static inline int zref_in_nodemask(struct zoneref *zref, nodemask_t *nodes) { --=20 2.7.4 From nobody Tue Apr 30 06:24:56 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zoho.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1492076917523687.5493473733485; Thu, 13 Apr 2017 02:48:37 -0700 (PDT) Received: from localhost ([::1]:48320 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1cybMi-0006rH-6p for importer@patchew.org; Thu, 13 Apr 2017 05:48:36 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:40294) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1cybF2-0000Sx-Ot for qemu-devel@nongnu.org; Thu, 13 Apr 2017 05:40:42 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1cybEz-0000Du-O0 for qemu-devel@nongnu.org; Thu, 13 Apr 2017 05:40:40 -0400 Received: from mga02.intel.com ([134.134.136.20]:22809) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1cybEz-0000DT-Ay for qemu-devel@nongnu.org; Thu, 13 Apr 2017 05:40:37 -0400 Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga101.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 13 Apr 2017 02:40:17 -0700 Received: from devel-ww.sh.intel.com ([10.239.48.105]) by orsmga004.jf.intel.com with ESMTP; 13 Apr 2017 02:40:14 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.37,194,1488873600"; d="scan'208";a="76894519" From: Wei Wang To: virtio-dev@lists.oasis-open.org, linux-kernel@vger.kernel.org, qemu-devel@nongnu.org, virtualization@lists.linux-foundation.org, kvm@vger.kernel.org, linux-mm@kvack.org, mst@redhat.com, david@redhat.com, dave.hansen@intel.com, cornelia.huck@de.ibm.com, akpm@linux-foundation.org, mgorman@techsingularity.net, aarcange@redhat.com, amit.shah@redhat.com, pbonzini@redhat.com, wei.w.wang@intel.com, liliang.opensource@gmail.com Date: Thu, 13 Apr 2017 17:35:08 +0800 Message-Id: <1492076108-117229-6-git-send-email-wei.w.wang@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1492076108-117229-1-git-send-email-wei.w.wang@intel.com> References: <1492076108-117229-1-git-send-email-wei.w.wang@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 134.134.136.20 Subject: [Qemu-devel] [PATCH v9 5/5] virtio-balloon: VIRTIO_BALLOON_F_MISC_VQ X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Add a new vq, miscq, to handle miscellaneous requests between the device and the driver. This patch implemnts the VIRTIO_BALLOON_MISCQ_INQUIRE_UNUSED_PAGES request sent from the device. Upon receiving this request from the miscq, the driver offers to the device the guest unused pages. Tests have shown that skipping the transfer of unused pages of a 32G guest can get the live migration time reduced to 1/8. Signed-off-by: Wei Wang Signed-off-by: Liang Li --- drivers/virtio/virtio_balloon.c | 209 ++++++++++++++++++++++++++++++++= +--- include/uapi/linux/virtio_balloon.h | 8 ++ 2 files changed, 204 insertions(+), 13 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloo= n.c index 5e2e7cc..95c703e 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -56,11 +56,12 @@ static struct vfsmount *balloon_mnt; =20 /* Types of pages to chunk */ #define PAGE_CHUNK_TYPE_BALLOON 0 +#define PAGE_CHUNK_TYPE_UNUSED 1 =20 #define MAX_PAGE_CHUNKS 4096 struct virtio_balloon { struct virtio_device *vdev; - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *miscq; =20 /* The balloon servicing is delegated to a freezable workqueue. */ struct work_struct update_balloon_stats_work; @@ -94,6 +95,19 @@ struct virtio_balloon { struct virtio_balloon_page_chunk_hdr *balloon_page_chunk_hdr; struct virtio_balloon_page_chunk *balloon_page_chunk; =20 + /* + * Buffer for PAGE_CHUNK_TYPE_UNUSED: + * virtio_balloon_miscq_hdr + + * virtio_balloon_page_chunk_hdr + + * virtio_balloon_page_chunk * MAX_PAGE_CHUNKS + */ + struct virtio_balloon_miscq_hdr *miscq_out_hdr; + struct virtio_balloon_page_chunk_hdr *unused_page_chunk_hdr; + struct virtio_balloon_page_chunk *unused_page_chunk; + + /* Buffer for host to send cmd to miscq */ + struct virtio_balloon_miscq_hdr *miscq_in_hdr; + /* Bitmap used to record pages */ unsigned long *page_bmap[PAGE_BMAP_COUNT_MAX]; /* Number of the allocated page_bmap */ @@ -220,6 +234,10 @@ static void send_page_chunks(struct virtio_balloon *vb= , struct virtqueue *vq, hdr =3D vb->balloon_page_chunk_hdr; len =3D 0; break; + case PAGE_CHUNK_TYPE_UNUSED: + hdr =3D vb->unused_page_chunk_hdr; + len =3D sizeof(struct virtio_balloon_miscq_hdr); + break; default: dev_warn(&vb->vdev->dev, "%s: chunk %d of unknown pages\n", __func__, type); @@ -254,6 +272,10 @@ static void add_one_chunk(struct virtio_balloon *vb, s= truct virtqueue *vq, hdr =3D vb->balloon_page_chunk_hdr; chunk =3D vb->balloon_page_chunk; break; + case PAGE_CHUNK_TYPE_UNUSED: + hdr =3D vb->unused_page_chunk_hdr; + chunk =3D vb->unused_page_chunk; + break; default: dev_warn(&vb->vdev->dev, "%s: chunk %d of unknown pages\n", __func__, type); @@ -686,28 +708,139 @@ static void update_balloon_size_func(struct work_str= uct *work) queue_work(system_freezable_wq, work); } =20 +static void miscq_in_hdr_add(struct virtio_balloon *vb) +{ + struct scatterlist sg_in; + + sg_init_one(&sg_in, vb->miscq_in_hdr, + sizeof(struct virtio_balloon_miscq_hdr)); + if (virtqueue_add_inbuf(vb->miscq, &sg_in, 1, vb->miscq_in_hdr, + GFP_KERNEL) < 0) { + __virtio_clear_bit(vb->vdev, + VIRTIO_BALLOON_F_MISC_VQ); + dev_warn(&vb->vdev->dev, "%s: add miscq_in_hdr err\n", + __func__); + return; + } + virtqueue_kick(vb->miscq); +} + +static void miscq_send_unused_pages(struct virtio_balloon *vb) +{ + struct virtio_balloon_miscq_hdr *miscq_out_hdr =3D vb->miscq_out_hdr; + struct virtqueue *vq =3D vb->miscq; + int ret =3D 0; + unsigned int order =3D 0, migratetype =3D 0; + struct zone *zone =3D NULL; + struct page *page =3D NULL; + u64 pfn; + + miscq_out_hdr->cmd =3D VIRTIO_BALLOON_MISCQ_INQUIRE_UNUSED_PAGES; + miscq_out_hdr->flags =3D 0; + + for_each_populated_zone(zone) { + for (order =3D MAX_ORDER - 1; order > 0; order--) { + for (migratetype =3D 0; migratetype < MIGRATE_TYPES; + migratetype++) { + do { + ret =3D inquire_unused_page_block(zone, + order, migratetype, &page); + if (!ret) { + pfn =3D (u64)page_to_pfn(page); + add_one_chunk(vb, vq, + PAGE_CHUNK_TYPE_UNUSED, + pfn, + (u64)(1 << order)); + } + } while (!ret); + } + } + } + miscq_out_hdr->flags |=3D VIRTIO_BALLOON_MISCQ_F_COMPLETE; + send_page_chunks(vb, vq, PAGE_CHUNK_TYPE_UNUSED, true); +} + +static void miscq_handle(struct virtqueue *vq) +{ + struct virtio_balloon *vb =3D vq->vdev->priv; + struct virtio_balloon_miscq_hdr *hdr; + unsigned int len; + + hdr =3D virtqueue_get_buf(vb->miscq, &len); + if (!hdr || len !=3D sizeof(struct virtio_balloon_miscq_hdr)) { + dev_warn(&vb->vdev->dev, "%s: invalid miscq hdr len\n", + __func__); + miscq_in_hdr_add(vb); + return; + } + switch (hdr->cmd) { + case VIRTIO_BALLOON_MISCQ_INQUIRE_UNUSED_PAGES: + miscq_send_unused_pages(vb); + break; + default: + dev_warn(&vb->vdev->dev, "%s: miscq cmd %d not supported\n", + __func__, hdr->cmd); + } + miscq_in_hdr_add(vb); +} + static int init_vqs(struct virtio_balloon *vb) { - struct virtqueue *vqs[3]; - vq_callback_t *callbacks[] =3D { balloon_ack, balloon_ack, stats_request = }; - static const char * const names[] =3D { "inflate", "deflate", "stats" }; - int err, nvqs; + struct virtqueue **vqs; + vq_callback_t **callbacks; + const char **names; + int err =3D -ENOMEM; + int i, nvqs; + + /* Inflateq and deflateq are used unconditionally */ + nvqs =3D 2; + + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) + nvqs++; + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_MISC_VQ)) + nvqs++; + + /* Allocate space for find_vqs parameters */ + vqs =3D kcalloc(nvqs, sizeof(*vqs), GFP_KERNEL); + if (!vqs) + goto err_vq; + callbacks =3D kmalloc_array(nvqs, sizeof(*callbacks), GFP_KERNEL); + if (!callbacks) + goto err_callback; + names =3D kmalloc_array(nvqs, sizeof(*names), GFP_KERNEL); + if (!names) + goto err_names; + + callbacks[0] =3D balloon_ack; + names[0] =3D "inflate"; + callbacks[1] =3D balloon_ack; + names[1] =3D "deflate"; + + i =3D 2; + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { + callbacks[i] =3D stats_request; + names[i] =3D "stats"; + i++; + } =20 - /* - * We expect two virtqueues: inflate and deflate, and - * optionally stat. - */ - nvqs =3D virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; - err =3D vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names); + if (virtio_has_feature(vb->vdev, + VIRTIO_BALLOON_F_MISC_VQ)) { + callbacks[i] =3D miscq_handle; + names[i] =3D "miscq"; + } + + err =3D vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, + names); if (err) - return err; + goto err_find; =20 vb->inflate_vq =3D vqs[0]; vb->deflate_vq =3D vqs[1]; + i =3D 2; if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { struct scatterlist sg; - vb->stats_vq =3D vqs[2]; =20 + vb->stats_vq =3D vqs[i++]; /* * Prime this virtqueue with one buffer so the hypervisor can * use it to signal us later (it can't be broken yet!). @@ -718,7 +851,25 @@ static int init_vqs(struct virtio_balloon *vb) BUG(); virtqueue_kick(vb->stats_vq); } + + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_MISC_VQ)) { + vb->miscq =3D vqs[i]; + miscq_in_hdr_add(vb); + } + + kfree(names); + kfree(callbacks); + kfree(vqs); return 0; + +err_find: + kfree(names); +err_names: + kfree(callbacks); +err_callback: + kfree(vqs); +err_vq: + return err; } =20 #ifdef CONFIG_BALLOON_COMPACTION @@ -843,6 +994,32 @@ static void balloon_page_chunk_init(struct virtio_ball= oon *vb) } } =20 +static void miscq_init(struct virtio_balloon *vb) +{ + void *buf; + + vb->miscq_in_hdr =3D kmalloc(sizeof(struct virtio_balloon_miscq_hdr), + GFP_KERNEL); + buf =3D kmalloc(sizeof(struct virtio_balloon_miscq_hdr) + + sizeof(struct virtio_balloon_page_chunk_hdr) + + sizeof(struct virtio_balloon_page_chunk) * + MAX_PAGE_CHUNKS, GFP_KERNEL); + if (!vb->miscq_in_hdr || !buf) { + kfree(buf); + kfree(vb->miscq_in_hdr); + __virtio_clear_bit(vb->vdev, VIRTIO_BALLOON_F_MISC_VQ); + dev_warn(&vb->vdev->dev, "%s: failed\n", __func__); + } else { + vb->miscq_out_hdr =3D buf; + vb->unused_page_chunk_hdr =3D buf + + sizeof(struct virtio_balloon_miscq_hdr); + vb->unused_page_chunk_hdr->chunks =3D 0; + vb->unused_page_chunk =3D buf + + sizeof(struct virtio_balloon_miscq_hdr) + + sizeof(struct virtio_balloon_page_chunk_hdr); + } +} + static int virtballoon_probe(struct virtio_device *vdev) { struct virtio_balloon *vb; @@ -869,6 +1046,9 @@ static int virtballoon_probe(struct virtio_device *vde= v) if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_BALLOON_CHUNKS)) balloon_page_chunk_init(vb); =20 + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_MISC_VQ)) + miscq_init(vb); + mutex_init(&vb->balloon_lock); init_waitqueue_head(&vb->acked); vb->vdev =3D vdev; @@ -946,6 +1126,8 @@ static void virtballoon_remove(struct virtio_device *v= dev) =20 remove_common(vb); free_page_bmap(vb); + kfree(vb->miscq_out_hdr); + kfree(vb->miscq_in_hdr); if (vb->vb_dev_info.inode) iput(vb->vb_dev_info.inode); kfree(vb); @@ -987,6 +1169,7 @@ static unsigned int features[] =3D { VIRTIO_BALLOON_F_STATS_VQ, VIRTIO_BALLOON_F_DEFLATE_ON_OOM, VIRTIO_BALLOON_F_BALLOON_CHUNKS, + VIRTIO_BALLOON_F_MISC_VQ, }; =20 static struct virtio_driver virtio_balloon_driver =3D { diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virti= o_balloon.h index be317b7..96bdc86 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -35,6 +35,7 @@ #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ #define VIRTIO_BALLOON_F_BALLOON_CHUNKS 3 /* Inflate/Deflate pages in chun= ks */ +#define VIRTIO_BALLOON_F_MISC_VQ 4 /* Virtqueue for misc. requests */ =20 /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 @@ -95,4 +96,11 @@ struct virtio_balloon_page_chunk { __le64 size; }; =20 +#define VIRTIO_BALLOON_MISCQ_INQUIRE_UNUSED_PAGES 0 +#define VIRTIO_BALLOON_MISCQ_F_COMPLETE 0x1 +struct virtio_balloon_miscq_hdr { + __le16 cmd; + __le16 flags; +}; + #endif /* _LINUX_VIRTIO_BALLOON_H */ --=20 2.7.4