From nobody Wed Feb 11 22:55:59 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6D058C7619A for ; Wed, 12 Apr 2023 14:11:33 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231432AbjDLOLb (ORCPT ); Wed, 12 Apr 2023 10:11:31 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41130 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231317AbjDLOL2 (ORCPT ); Wed, 12 Apr 2023 10:11:28 -0400 Received: from mail-pj1-x1029.google.com (mail-pj1-x1029.google.com [IPv6:2607:f8b0:4864:20::1029]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 937322708 for ; Wed, 12 Apr 2023 07:11:08 -0700 (PDT) Received: by mail-pj1-x1029.google.com with SMTP id jx2-20020a17090b46c200b002469a9ff94aso9852015pjb.3 for ; Wed, 12 Apr 2023 07:11:08 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=bytedance.com; s=google; t=1681308668; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=/08sgLNHl+fvsLpz2QvFxww3IWnM9URQXV0BTGByAnM=; b=ah3FfTCwtmlj6FL1p8yzw2jYC7yFw0+4YehGEEbWBum7kI6Rd8emsG3gTnRLPIDlzh ATgUdGcX4JxsAU42Ffd9/mElLT1w8zU8H/mkPAnLKxngubSiYHWJMjyc6BnmshadOED1 LZRA0eOhn+4dYVysVIEjabVJqL7TfXQi4aW/yK5TwlI6V/GsuIqGPPVchpGQD6LGlPuA XyFSWOr6h00KIKAOv2ek3wwmT02wxHTToefvZLeXFZAMqdN8GEAmtubIAZcQMmiGqZ7R IjkMW4IB3+IzCJYsfTEj/31H02aplIOF8P5doPfLi6S/Tqk3AdLW2ZOz6rI2P4JmHPkR rHIQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; t=1681308668; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=/08sgLNHl+fvsLpz2QvFxww3IWnM9URQXV0BTGByAnM=; b=Rjfv/d8fw61xn3yJcXBqxEgRKGmvBhhagPb2ewWbcHa2oax9hJ/tN7SVGOqUuWLO8R 0uKUUZpbWTN/f0qwfxNYX75dN5mFhgcW9y9lE6V7F9JUlS3uivX3+tY0LP0c4xtGCoiC PosaMfyDjevsQR+KqXeKQO3cytMN/aITXw+O6HE9hbk6DQdZpJ4A6HOtRRv7CBSlswaV uGKoa0LaRZLPpxREYJ1vMw7AYrOp5XTc5Rga/wLa+DPiMQrom0kW7D2P3880NLTQmtaB hPwenO3/3ta8mkbnk0HXS8WLLswr3fMpXl6aJhU3Lpeod1myWeZbQZi9fcK77k+I6qtR uzHA== X-Gm-Message-State: AAQBX9cd7D8tptLrIKW/4rZ++M13AVES+swsAyN8CTvnCbgJbSOdyMIj TMWXDxNEwMvrdWkPvwfizZ1R3A== X-Google-Smtp-Source: AKy350YdZubU5x7WHR5Ra7lyb5gK5DZdvAGIACucw2eA1QSl4JVAZfxjZzWbzpgx+BaceamCCqKDzw== X-Received: by 2002:a17:90b:1d04:b0:23d:3913:bc26 with SMTP id on4-20020a17090b1d0400b0023d3913bc26mr21902907pjb.2.1681308667917; Wed, 12 Apr 2023 07:11:07 -0700 (PDT) Received: from localhost.localdomain ([139.177.225.253]) by smtp.gmail.com with ESMTPSA id gz2-20020a17090b0ec200b00246aa8b0e8csm1503359pjb.55.2023.04.12.07.11.02 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 12 Apr 2023 07:11:07 -0700 (PDT) From: Gang Li To: John Hubbard , Jonathan Corbet , Ingo Molnar , Peter Zijlstra , Juri Lelli , Vincent Guittot , Dietmar Eggemann , Steven Rostedt , Ben Segall , Mel Gorman , Daniel Bristot de Oliveira , Valentin Schneider Cc: linux-api@vger.kernel.org, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, linux-doc@vger.kernel.org, Gang Li Subject: [PATCH v6 1/2] sched/numa: use static_branch_inc/dec for sched_numa_balancing Date: Wed, 12 Apr 2023 22:10:52 +0800 Message-Id: <20230412141053.59498-1-ligang.bdlg@bytedance.com> X-Mailer: git-send-email 2.32.0 In-Reply-To: <20230412140701.58337-1-ligang.bdlg@bytedance.com> References: <20230412140701.58337-1-ligang.bdlg@bytedance.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" per-process numa balancing use static_branch_inc/dec() to count how many enables in sched_numa_balancing. So here must be converted to inc/dec too. Cc: linux-api@vger.kernel.org Signed-off-by: Gang Li Acked-by: John Hubbard --- kernel/sched/core.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 94be4eebfa53..99cc1d5821a1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4501,21 +4501,15 @@ DEFINE_STATIC_KEY_FALSE(sched_numa_balancing); =20 int sysctl_numa_balancing_mode; =20 -static void __set_numabalancing_state(bool enabled) -{ - if (enabled) - static_branch_enable(&sched_numa_balancing); - else - static_branch_disable(&sched_numa_balancing); -} - void set_numabalancing_state(bool enabled) { - if (enabled) + if (enabled) { sysctl_numa_balancing_mode =3D NUMA_BALANCING_NORMAL; - else + static_branch_enable(&sched_numa_balancing); + } else { sysctl_numa_balancing_mode =3D NUMA_BALANCING_DISABLED; - __set_numabalancing_state(enabled); + static_branch_disable(&sched_numa_balancing); + } } =20 #ifdef CONFIG_PROC_SYSCTL @@ -4549,8 +4543,14 @@ static int sysctl_numa_balancing(struct ctl_table *t= able, int write, if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) && (state & NUMA_BALANCING_MEMORY_TIERING)) reset_memory_tiering(); - sysctl_numa_balancing_mode =3D state; - __set_numabalancing_state(state); + if (sysctl_numa_balancing_mode !=3D state) { + if (state =3D=3D NUMA_BALANCING_DISABLED) + static_branch_dec(&sched_numa_balancing); + else if (sysctl_numa_balancing_mode =3D=3D NUMA_BALANCING_DISABLED) + static_branch_inc(&sched_numa_balancing); + + sysctl_numa_balancing_mode =3D state; + } } return err; } --=20 2.20.1 From nobody Wed Feb 11 22:55:59 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id E3A52C77B6E for ; Wed, 12 Apr 2023 14:11:51 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231376AbjDLOLu (ORCPT ); Wed, 12 Apr 2023 10:11:50 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41416 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231464AbjDLOLn (ORCPT ); Wed, 12 Apr 2023 10:11:43 -0400 Received: from mail-pl1-x62c.google.com (mail-pl1-x62c.google.com [IPv6:2607:f8b0:4864:20::62c]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D0C959748 for ; Wed, 12 Apr 2023 07:11:38 -0700 (PDT) Received: by mail-pl1-x62c.google.com with SMTP id w11so11528539plp.13 for ; Wed, 12 Apr 2023 07:11:38 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=bytedance.com; s=google; t=1681308698; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=npZ8ffD8wKTQ6zXucxsr5OS3bboBiNDxBArrHYUiGoU=; b=RW7pyXIt0im4b2HqCafISgOSxeRW7KVI5ofazRV6UU+HgC/4UjmrrMphk7LM3ZnHKJ HMlf/2W7XVNX8xb7AK5U7X3HBb2FAsHI3zx1e3jxdivf4yOOtJYEglEwQcxfjT3b/Iic q/F422Wr+buwulz/CiF/ojo3L/d+AtxLVKO5MY579wUXATlAfEx8U1lScrVXciC0g+yr dLjWXZRqx/Yjd7Sj6KKcgQzNaPEgp9Um+0rlZ+Czw6/HiAhRcOklj98r/Y1vPRCCgFQ4 75RNsqY9Dq8vSXZpukZ02bQqmqFCTrYXYEyBv9I3ZGf6maL2rUXoRFaCc3jDNlloQFja ePrw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; t=1681308698; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=npZ8ffD8wKTQ6zXucxsr5OS3bboBiNDxBArrHYUiGoU=; b=vu88AX4MdgWMfzmKKmkvhCQ19IhRFEBpynkbemkOcJfyzLVISisBwEnUkhf2HvN7wB KVfvkevxhkkrINgxl0PbB1LBmCgr6TVhCLx1Uy3oRPjRQ+GqctI51NcAiCyi/h+e5+wz fUpQuUGqdehLvX8CWCpTEodCOy9oX3QA4FhXR38nJT+jgqjGi4UGdZucMsR+r9LJXHrO 0/cxRYeDShW6f9Tlo/xYvzQJDn9xbJWIGE9WUW2o74EetKAlS5nHMPxdz8NOiZtWRYds DALVlOq4bhqPUZZBHeEjtj28/xwGWLqHbDb+fzQbGDvPKq69/hr0JKeEqw0YmcQ0GVMp uVCg== X-Gm-Message-State: AAQBX9dPfhcHXFY9Q+CNM7T+k+92ACJHBHAKaFJTIBStgKLCRzcQOAox bet4DVMWo3Zth4oSwgWZy/NDCg== X-Google-Smtp-Source: AKy350YoCuaHsQdUX61YuKJKuz0LvXJlj/9npXvh+AVr5RsZIgeS2ij1dakxBeFkOvAWI5H/R3dmfQ== X-Received: by 2002:a17:902:e5d1:b0:19d:1d32:fc7 with SMTP id u17-20020a170902e5d100b0019d1d320fc7mr8089583plf.51.1681308698215; Wed, 12 Apr 2023 07:11:38 -0700 (PDT) Received: from localhost.localdomain ([139.177.225.253]) by smtp.gmail.com with ESMTPSA id 13-20020a170902c24d00b00194caf3e975sm11653502plg.208.2023.04.12.07.11.33 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 12 Apr 2023 07:11:37 -0700 (PDT) From: Gang Li To: John Hubbard , Jonathan Corbet , Ingo Molnar , Peter Zijlstra , Juri Lelli , Vincent Guittot , Dietmar Eggemann , Steven Rostedt , Ben Segall , Mel Gorman , Daniel Bristot de Oliveira , Valentin Schneider Cc: linux-api@vger.kernel.org, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, linux-doc@vger.kernel.org, Gang Li Subject: [PATCH v6 2/2] sched/numa: add per-process numa_balancing Date: Wed, 12 Apr 2023 22:11:26 +0800 Message-Id: <20230412141127.59741-1-ligang.bdlg@bytedance.com> X-Mailer: git-send-email 2.32.0 In-Reply-To: <20230412140701.58337-1-ligang.bdlg@bytedance.com> References: <20230412140701.58337-1-ligang.bdlg@bytedance.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Add PR_NUMA_BALANCING in prctl. A large number of page faults will cause performance loss when numa balancing is performing. Thus those processes which care about worst-case performance need numa balancing disabled. Others, on the contrary, allow a temporary performance loss in exchange for higher average performance, so enable numa balancing is better for them. Numa balancing can only be controlled globally by /proc/sys/kernel/numa_balancing. Due to the above case, we want to disable/enable numa_balancing per-process instead. Set per-process numa balancing: prctl(PR_NUMA_BALANCING, PR_SET_NUMA_BALANCING_DISABLE); //disable prctl(PR_NUMA_BALANCING, PR_SET_NUMA_BALANCING_ENABLE); //enable prctl(PR_NUMA_BALANCING, PR_SET_NUMA_BALANCING_DEFAULT); //follow global Get numa_balancing state: prctl(PR_NUMA_BALANCING, PR_GET_NUMA_BALANCING, &ret); cat /proc//status | grep NumaB_mode Cc: linux-api@vger.kernel.org Signed-off-by: Gang Li Acked-by: John Hubbard --- Documentation/filesystems/proc.rst | 2 ++ fs/proc/task_mmu.c | 20 ++++++++++++ include/linux/mm_types.h | 3 ++ include/linux/sched/numa_balancing.h | 45 ++++++++++++++++++++++++++ include/uapi/linux/prctl.h | 8 +++++ kernel/fork.c | 4 +++ kernel/sched/fair.c | 9 +++--- kernel/sys.c | 47 ++++++++++++++++++++++++++++ mm/mprotect.c | 6 ++-- 9 files changed, 138 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems= /proc.rst index bfefcbb8f82b..c9897674fc5e 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -193,6 +193,7 @@ read the file /proc/PID/status:: VmLib: 1412 kB VmPTE: 20 kb VmSwap: 0 kB + NumaB_mode: default HugetlbPages: 0 kB CoreDumping: 0 THP_enabled: 1 @@ -275,6 +276,7 @@ It's slow but very precise. VmPTE size of page table entries VmSwap amount of swap used by anonymous private data (shmem swap usage is not included) + NumaB_mode numa balancing mode, set by prctl(PR_NUMA_BAL= ANCING, ...) HugetlbPages size of hugetlb memory portions CoreDumping process's memory is currently being dumped (killing the process may lead to a corrupted = core) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 38b19a757281..3f7263226645 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include =20 #include #include @@ -75,6 +77,24 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8); SEQ_PUT_DEC(" kB\nVmSwap:\t", swap); seq_puts(m, " kB\n"); +#ifdef CONFIG_NUMA_BALANCING + seq_puts(m, "NumaB_mode:\t"); + switch (mm->numa_balancing_mode) { + case PR_SET_NUMA_BALANCING_DEFAULT: + seq_puts(m, "default"); + break; + case PR_SET_NUMA_BALANCING_DISABLED: + seq_puts(m, "disabled"); + break; + case PR_SET_NUMA_BALANCING_ENABLED: + seq_puts(m, "enabled"); + break; + default: + seq_puts(m, "unknown"); + break; + } + seq_putc(m, '\n'); +#endif hugetlb_report_usage(m, mm); } #undef SEQ_PUT_DEC diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3fc9e680f174..bd539d8c1103 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -740,6 +740,9 @@ struct mm_struct { =20 /* numa_scan_seq prevents two threads remapping PTEs. */ int numa_scan_seq; + + /* Controls whether NUMA balancing is active for this mm. */ + int numa_balancing_mode; #endif /* * An operation with batched TLB flushing is going on. Anything diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/num= a_balancing.h index 3988762efe15..fa360d17f52e 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -8,6 +8,8 @@ */ =20 #include +#include +#include =20 #define TNF_MIGRATED 0x01 #define TNF_NO_GROUP 0x02 @@ -16,12 +18,47 @@ #define TNF_MIGRATE_FAIL 0x10 =20 #ifdef CONFIG_NUMA_BALANCING +DECLARE_STATIC_KEY_FALSE(sched_numa_balancing); extern void task_numa_fault(int last_node, int node, int pages, int flags); extern pid_t task_numa_group_id(struct task_struct *p); extern void set_numabalancing_state(bool enabled); extern void task_numa_free(struct task_struct *p, bool final); extern bool should_numa_migrate_memory(struct task_struct *p, struct page = *page, int src_nid, int dst_cpu); +static inline bool numa_balancing_enabled(struct task_struct *p) +{ + if (!static_branch_unlikely(&sched_numa_balancing)) + return false; + + if (p->mm) switch (p->mm->numa_balancing_mode) { + case PR_SET_NUMA_BALANCING_ENABLED: + return true; + case PR_SET_NUMA_BALANCING_DISABLED: + return false; + default: + break; + } + + return sysctl_numa_balancing_mode; +} +static inline int numa_balancing_mode(struct mm_struct *mm) +{ + if (!static_branch_unlikely(&sched_numa_balancing)) + return PR_SET_NUMA_BALANCING_DISABLED; + + if (mm) switch (mm->numa_balancing_mode) { + case PR_SET_NUMA_BALANCING_ENABLED: + return sysctl_numa_balancing_mode =3D=3D NUMA_BALANCING_DISABLED ? + NUMA_BALANCING_NORMAL : sysctl_numa_balancing_mode; + case PR_SET_NUMA_BALANCING_DISABLED: + return NUMA_BALANCING_DISABLED; + case PR_SET_NUMA_BALANCING_DEFAULT: + default: + break; + } + + return sysctl_numa_balancing_mode; +} #else static inline void task_numa_fault(int last_node, int node, int pages, int flags) @@ -42,6 +79,14 @@ static inline bool should_numa_migrate_memory(struct tas= k_struct *p, { return true; } +static inline int numa_balancing_mode(struct mm_struct *mm) +{ + return 0; +} +static inline bool numa_balancing_enabled(struct task_struct *p) +{ + return 0; +} #endif =20 #endif /* _LINUX_SCHED_NUMA_BALANCING_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index f23d9a16507f..7f452f677c61 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -294,4 +294,12 @@ struct prctl_mm_map { =20 #define PR_SET_MEMORY_MERGE 67 #define PR_GET_MEMORY_MERGE 68 + +/* Set/get enabled per-process numa_balancing */ +#define PR_NUMA_BALANCING 69 +# define PR_SET_NUMA_BALANCING_DISABLED 0 +# define PR_SET_NUMA_BALANCING_ENABLED 1 +# define PR_SET_NUMA_BALANCING_DEFAULT 2 +# define PR_GET_NUMA_BALANCING 3 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 80dca376a536..534ba3566ac0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -99,6 +99,7 @@ #include #include #include +#include =20 #include #include @@ -1281,6 +1282,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm= , struct task_struct *p, init_tlb_flush_pending(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS mm->pmd_huge_pte =3D NULL; +#endif +#ifdef CONFIG_NUMA_BALANCING + mm->numa_balancing_mode =3D PR_SET_NUMA_BALANCING_DEFAULT; #endif mm_init_uprobes_state(mm); hugetlb_count_init(mm); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a29ca11bead2..50edc4d89c64 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -47,6 +47,7 @@ #include #include #include +#include =20 #include =20 @@ -2842,7 +2843,7 @@ void task_numa_fault(int last_cpupid, int mem_node, i= nt pages, int flags) struct numa_group *ng; int priv; =20 - if (!static_branch_likely(&sched_numa_balancing)) + if (!numa_balancing_enabled(p)) return; =20 /* for example, ksmd faulting in a user's mm */ @@ -3220,7 +3221,7 @@ static void update_scan_period(struct task_struct *p,= int new_cpu) int src_nid =3D cpu_to_node(task_cpu(p)); int dst_nid =3D cpu_to_node(new_cpu); =20 - if (!static_branch_likely(&sched_numa_balancing)) + if (!numa_balancing_enabled(p)) return; =20 if (!p->mm || !p->numa_faults || (p->flags & PF_EXITING)) @@ -8455,7 +8456,7 @@ static int migrate_degrades_locality(struct task_stru= ct *p, struct lb_env *env) unsigned long src_weight, dst_weight; int src_nid, dst_nid, dist; =20 - if (!static_branch_likely(&sched_numa_balancing)) + if (!numa_balancing_enabled(p)) return -1; =20 if (!p->numa_faults || !(env->sd->flags & SD_NUMA)) @@ -12061,7 +12062,7 @@ static void task_tick_fair(struct rq *rq, struct ta= sk_struct *curr, int queued) entity_tick(cfs_rq, se, queued); } =20 - if (static_branch_unlikely(&sched_numa_balancing)) + if (numa_balancing_enabled(curr)) task_tick_numa(rq, curr); =20 update_misfit_status(curr, rq); diff --git a/kernel/sys.c b/kernel/sys.c index a2bd2b9f5683..d3df9fab1858 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #include @@ -2118,6 +2119,35 @@ static int prctl_set_auxv(struct mm_struct *mm, unsi= gned long addr, return 0; } =20 +#ifdef CONFIG_NUMA_BALANCING +static int prctl_pid_numa_balancing_write(int numa_balancing) +{ + int old_numa_balancing; + + if (numa_balancing !=3D PR_SET_NUMA_BALANCING_DEFAULT && + numa_balancing !=3D PR_SET_NUMA_BALANCING_DISABLED && + numa_balancing !=3D PR_SET_NUMA_BALANCING_ENABLED) + return -EINVAL; + + old_numa_balancing =3D xchg(¤t->mm->numa_balancing_mode, numa_balan= cing); + + if (numa_balancing =3D=3D old_numa_balancing) + return 0; + + if (numa_balancing =3D=3D 1) + static_branch_inc(&sched_numa_balancing); + else if (old_numa_balancing =3D=3D 1) + static_branch_dec(&sched_numa_balancing); + + return 0; +} + +static int prctl_pid_numa_balancing_read(void) +{ + return current->mm->numa_balancing_mode; +} +#endif + static int prctl_set_mm(int opt, unsigned long addr, unsigned long arg4, unsigned long arg5) { @@ -2674,6 +2704,23 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, a= rg2, unsigned long, arg3, error =3D set_syscall_user_dispatch(arg2, arg3, arg4, (char __user *) arg5); break; +#ifdef CONFIG_NUMA_BALANCING + case PR_NUMA_BALANCING: + switch (arg2) { + case PR_SET_NUMA_BALANCING_DEFAULT: + case PR_SET_NUMA_BALANCING_DISABLED: + case PR_SET_NUMA_BALANCING_ENABLED: + error =3D prctl_pid_numa_balancing_write((int)arg2); + break; + case PR_GET_NUMA_BALANCING: + error =3D put_user(prctl_pid_numa_balancing_read(), (int __user *)arg3); + break; + default: + error =3D -EINVAL; + break; + } + break; +#endif #ifdef CONFIG_SCHED_CORE case PR_SCHED_CORE: error =3D sched_core_share_pid(arg2, arg3, arg4, arg5); diff --git a/mm/mprotect.c b/mm/mprotect.c index afdb6723782e..eb1098f790f2 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -165,10 +166,11 @@ static long change_pte_range(struct mmu_gather *tlb, * Skip scanning top tier node if normal numa * balancing is disabled */ - if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && + if (!(numa_balancing_mode(vma->vm_mm) & NUMA_BALANCING_NORMAL) && toptier) continue; - if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING && + if (numa_balancing_mode(vma->vm_mm) & + NUMA_BALANCING_MEMORY_TIERING && !toptier) xchg_page_access_time(page, jiffies_to_msecs(jiffies)); --=20 2.20.1