From nobody Mon Apr 6 11:53:33 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 3888AC433FE for ; Fri, 30 Sep 2022 06:02:09 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230108AbiI3GCH (ORCPT ); Fri, 30 Sep 2022 02:02:07 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50060 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230088AbiI3GCB (ORCPT ); Fri, 30 Sep 2022 02:02:01 -0400 Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id 7A5391FD899; Thu, 29 Sep 2022 23:02:00 -0700 (PDT) Received: by linux.microsoft.com (Postfix, from userid 1134) id 551B720E0A4E; Thu, 29 Sep 2022 23:02:00 -0700 (PDT) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com 551B720E0A4E DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.microsoft.com; s=default; t=1664517720; bh=mWyDCK4sgwNQboz2oMaLFnl6zMEOPYFWjgXwMVYGH4A=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=ryXghZb3oeeWN0GT3g2MNq1x0AXB2gJ+2y70rfAV8NTeIIFiWpW4Muu6lKYiC/Mq+ tAxYeujr67OOQkWyWCKD8S4McdGmXYpQxugelV0r8jTwfTVllcfFcV2TkzZnLCUAvA D9X0DHgaSnRp0hpiTbKTxMK13DbHuC1Gou+i1oNM= From: Shradha Gupta To: Andrew Morton , linux-mm@kvack.org, linux-kernel@vger.kernel.org, linux-hyperv@vger.kernel.org Cc: Shradha Gupta , "K. Y. Srinivasan" , Haiyang Zhang , Stephen Hemminger , Wei Liu , Dexuan Cui , Michael Kelley Subject: [PATCH v2 1/2] mm/page_reporting: Add checks for page_reporting_order param Date: Thu, 29 Sep 2022 23:01:38 -0700 Message-Id: <1664517699-1085-2-git-send-email-shradhagupta@linux.microsoft.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1664517699-1085-1-git-send-email-shradhagupta@linux.microsoft.com> References: <1664447081-14744-1-git-send-email-shradhagupta@linux.microsoft.com> <1664517699-1085-1-git-send-email-shradhagupta@linux.microsoft.com> Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Current code allows the page_reporting_order parameter to be changed via sysfs to any integer value. The new value is used immediately in page reporting code with no validation, which could cause incorrect behavior. Fix this by adding validation of the new value. Export this parameter for use in the driver that is calling the page_reporting_register(). This is needed by drivers like hv_balloon to know the order of the pages reported. Traditionally the values provided in the kernel boot line or subsequently changed via sysfs take priority therefore, if page_reporting_order parameter's value is set, it takes precedence over the value passed while registering with the driver. Signed-off-by: Shradha Gupta Reviewed-by: Michael Kelley --- mm/page_reporting.c | 50 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/mm/page_reporting.c b/mm/page_reporting.c index 382958eef8a9..29d67c824fd2 100644 --- a/mm/page_reporting.c +++ b/mm/page_reporting.c @@ -11,10 +11,42 @@ #include "page_reporting.h" #include "internal.h" =20 -unsigned int page_reporting_order =3D MAX_ORDER; -module_param(page_reporting_order, uint, 0644); +/* Initialize to an unsupported value */ +unsigned int page_reporting_order =3D -1; + +int page_order_update_notify(const char *val, const struct kernel_param *k= p) +{ + /* + * If param is set beyond this limit, order is set to default + * pageblock_order value + */ + return param_set_uint_minmax(val, kp, 0, MAX_ORDER-1); +} + +const struct kernel_param_ops page_reporting_param_ops =3D { + .set =3D &page_order_update_notify, + /* + * For the get op, use param_get_int instead of param_get_uint. + * This is to make sure that when unset the initialized value of + * -1 is shown correctly + */ + .get =3D ¶m_get_int, +}; + +module_param_cb(page_reporting_order, &page_reporting_param_ops, + &page_reporting_order, 0644); MODULE_PARM_DESC(page_reporting_order, "Set page reporting order"); =20 +/* + * This symbol is also a kernel parameter. Export the page_reporting_order + * symbol so that other drivers can access it to control order values with= out + * having to introduce another configurable parameter. Only one driver can + * register with the page_reporting driver for the service, so we have just + * one control parameter for the use case(which can be accessed in both + * drivers) + */ +EXPORT_SYMBOL_GPL(page_reporting_order); + #define PAGE_REPORTING_DELAY (2 * HZ) static struct page_reporting_dev_info __rcu *pr_dev_info __read_mostly; =20 @@ -330,10 +362,18 @@ int page_reporting_register(struct page_reporting_dev= _info *prdev) } =20 /* - * Update the page reporting order if it's specified by driver. - * Otherwise, it falls back to @pageblock_order. + * If the page_reporting_order value is not set, we check if + * an order is provided from the driver that is performing the + * registration. If that is not provided either, we default to + * pageblock_order. */ - page_reporting_order =3D prdev->order ? : pageblock_order; + + if (page_reporting_order =3D=3D -1) { + if (prdev->order > 0 && prdev->order <=3D MAX_ORDER) + page_reporting_order =3D prdev->order; + else + page_reporting_order =3D pageblock_order; + } =20 /* initialize state and work structures */ atomic_set(&prdev->state, PAGE_REPORTING_IDLE); --=20 2.37.2 From nobody Mon Apr 6 11:53:33 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id E7207C433FE for ; Fri, 30 Sep 2022 06:02:23 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230173AbiI3GCW (ORCPT ); Fri, 30 Sep 2022 02:02:22 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50524 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230088AbiI3GCO (ORCPT ); Fri, 30 Sep 2022 02:02:14 -0400 Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id D8455200B2A; Thu, 29 Sep 2022 23:02:10 -0700 (PDT) Received: by linux.microsoft.com (Postfix, from userid 1134) id 194AC20E0A4E; Thu, 29 Sep 2022 23:02:10 -0700 (PDT) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com 194AC20E0A4E DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.microsoft.com; s=default; t=1664517730; bh=977Jsx8ikA7Oogcgw8EBooG0nQxyqKCRquP1jxo5VVs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=RN1WjVfUaFCMNKHKm5HFi8xmaF02pFqlHssywkVyemZuOI7i2K43GLe2jJhKoQZZA 5RgPGVkmOZaw0CrmwMbit4gPpcCjlvgOt6zhfaxjFHljjAMTA/M88V0526VriNYhNd 5iFq8GcqSqfRCRE+jdsfUB0EycQtrdE/gpG2BCZ4= From: Shradha Gupta To: Andrew Morton , linux-mm@kvack.org, linux-kernel@vger.kernel.org, linux-hyperv@vger.kernel.org Cc: Shradha Gupta , "K. Y. Srinivasan" , Haiyang Zhang , Stephen Hemminger , Wei Liu , Dexuan Cui , Michael Kelley Subject: [PATCH v2 2/2] hv_balloon: Add support for configurable order free page reporting Date: Thu, 29 Sep 2022 23:01:39 -0700 Message-Id: <1664517699-1085-3-git-send-email-shradhagupta@linux.microsoft.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1664517699-1085-1-git-send-email-shradhagupta@linux.microsoft.com> References: <1664447081-14744-1-git-send-email-shradhagupta@linux.microsoft.com> <1664517699-1085-1-git-send-email-shradhagupta@linux.microsoft.com> Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Newer versions of Hyper-V allow reporting unused guest pages in chunks smaller than 2 Mbytes. Using smaller chunks allows reporting more unused guest pages, but with increased overhead in the finding the small chunks. To make this tradeoff configurable, use the existing page_reporting_order module parameter to control the reporting order. Drop and refine checks that restricted the minimun page reporting order to 2Mbytes size pages. Add appropriate checks to make sure the underlying Hyper-V versions support cold discard hints of any order (and not just starting from 9) Signed-off-by: Shradha Gupta Reviewed-by: Michael Kelley --- drivers/hv/hv_balloon.c | 94 ++++++++++++++++++++++++++++++++--------- 1 file changed, 73 insertions(+), 21 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index fdf6decacf06..7088ed056e50 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -469,12 +469,16 @@ static bool do_hot_add; * the specified number of seconds. */ static uint pressure_report_delay =3D 45; +extern unsigned int page_reporting_order; +#define HV_MAX_FAILURES 2 =20 /* * The last time we posted a pressure report to host. */ static unsigned long last_post_time; =20 +static int hv_hypercall_multi_failure; + module_param(hot_add, bool, (S_IRUGO | S_IWUSR)); MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add"); =20 @@ -579,6 +583,10 @@ static struct hv_dynmem_device dm_device; =20 static void post_status(struct hv_dynmem_device *dm); =20 +static void enable_page_reporting(void); + +static void disable_page_reporting(void); + #ifdef CONFIG_MEMORY_HOTPLUG static inline bool has_pfn_is_backed(struct hv_hotadd_state *has, unsigned long pfn) @@ -1418,6 +1426,18 @@ static int dm_thread_func(void *dm_dev) */ reinit_completion(&dm_device.config_event); post_status(dm); + /* + * disable free page reporting if multiple hypercall + * failure flag set. It is not done in the page_reporting + * callback context as that causes a deadlock between + * page_reporting_process() and page_reporting_unregister() + */ + if (hv_hypercall_multi_failure >=3D HV_MAX_FAILURES) { + pr_err("Multiple failures in cold memory discard hypercall, disabling p= age reporting\n"); + disable_page_reporting(); + /* Reset the flag after disabling reporting */ + hv_hypercall_multi_failure =3D 0; + } } =20 return 0; @@ -1593,20 +1613,20 @@ static void balloon_onchannelcallback(void *context) =20 } =20 -/* Hyper-V only supports reporting 2MB pages or higher */ -#define HV_MIN_PAGE_REPORTING_ORDER 9 -#define HV_MIN_PAGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << HV_MIN_PAGE_REPORTI= NG_ORDER) +#define HV_LARGE_REPORTING_ORDER 9 +#define HV_LARGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << \ + HV_LARGE_REPORTING_ORDER) static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info, struct scatterlist *sgl, unsigned int nents) { unsigned long flags; struct hv_memory_hint *hint; - int i; + int i, order; u64 status; struct scatterlist *sg; =20 WARN_ON_ONCE(nents > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES); - WARN_ON_ONCE(sgl->length < HV_MIN_PAGE_REPORTING_LEN); + WARN_ON_ONCE(sgl->length < (HV_HYP_PAGE_SIZE << page_reporting_order)); local_irq_save(flags); hint =3D *(struct hv_memory_hint **)this_cpu_ptr(hyperv_pcpu_input_arg); if (!hint) { @@ -1621,21 +1641,53 @@ static int hv_free_page_report(struct page_reportin= g_dev_info *pr_dev_info, =20 range =3D &hint->ranges[i]; range->address_space =3D 0; - /* page reporting only reports 2MB pages or higher */ - range->page.largepage =3D 1; - range->page.additional_pages =3D - (sg->length / HV_MIN_PAGE_REPORTING_LEN) - 1; - range->page_size =3D HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB; - range->base_large_pfn =3D - page_to_hvpfn(sg_page(sg)) >> HV_MIN_PAGE_REPORTING_ORDER; + order =3D get_order(sg->length); + /* + * Hyper-V expects the additional_pages field in the units + * of one of these 3 sizes, 4Kbytes, 2Mbytes or 1Gbytes. + * This is dictated by the values of the fields page.largesize + * and page_size. + * This code however, only uses 4Kbytes and 2Mbytes units + * and not 1Gbytes unit. + */ + + /* page reporting for pages 2MB or higher */ + if (order >=3D HV_LARGE_REPORTING_ORDER ) { + range->page.largepage =3D 1; + range->page_size =3D HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB; + range->base_large_pfn =3D page_to_hvpfn( + sg_page(sg)) >> HV_LARGE_REPORTING_ORDER; + range->page.additional_pages =3D + (sg->length / HV_LARGE_REPORTING_LEN) - 1; + } else { + /* Page reporting for pages below 2MB */ + range->page.basepfn =3D page_to_hvpfn(sg_page(sg)); + range->page.largepage =3D false; + range->page.additional_pages =3D + (sg->length / HV_HYP_PAGE_SIZE) - 1; + } + } =20 status =3D hv_do_rep_hypercall(HV_EXT_CALL_MEMORY_HEAT_HINT, nents, 0, hint, NULL); local_irq_restore(flags); - if ((status & HV_HYPERCALL_RESULT_MASK) !=3D HV_STATUS_SUCCESS) { + if (!hv_result_success(status)) { + pr_err("Cold memory discard hypercall failed with status %llx\n", - status); + status); + if (hv_hypercall_multi_failure > 0) + hv_hypercall_multi_failure++; + + if (hv_result(status) =3D=3D HV_STATUS_INVALID_PARAMETER) { + pr_err("Underlying Hyper-V does not support order less than 9. Hypercal= l failed\n"); + pr_err("Defaulting to page_reporting_order %d\n", + pageblock_order); + page_reporting_order =3D pageblock_order; + hv_hypercall_multi_failure++; + return -EINVAL; + } + return -EINVAL; } =20 @@ -1646,12 +1698,6 @@ static void enable_page_reporting(void) { int ret; =20 - /* Essentially, validating 'PAGE_REPORTING_MIN_ORDER' is big enough. */ - if (pageblock_order < HV_MIN_PAGE_REPORTING_ORDER) { - pr_debug("Cold memory discard is only supported on 2MB pages and above\n= "); - return; - } - if (!hv_query_ext_cap(HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT)) { pr_debug("Cold memory discard hint not supported by Hyper-V\n"); return; @@ -1659,12 +1705,18 @@ static void enable_page_reporting(void) =20 BUILD_BUG_ON(PAGE_REPORTING_CAPACITY > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES= ); dm_device.pr_dev_info.report =3D hv_free_page_report; + /* + * We let the page_reporting_order parameter decide the order + * in the page_reporting code + */ + dm_device.pr_dev_info.order =3D 0; ret =3D page_reporting_register(&dm_device.pr_dev_info); if (ret < 0) { dm_device.pr_dev_info.report =3D NULL; pr_err("Failed to enable cold memory discard: %d\n", ret); } else { - pr_info("Cold memory discard hint enabled\n"); + pr_info("Cold memory discard hint enabled with order %d\n", + page_reporting_order); } } =20 --=20 2.37.2